PyPI - unstructured-ingest - Versions diffs - 0.3.14__py3-none-any.whl → 0.3.15__py3-none-any.whl - Mend

unstructured-ingest 0.3.14py3-none-any.whl → 0.3.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (17) hide show

test/integration/connectors/sql/test_databricks_delta_tables.py ADDED Viewed

@@ -0,0 +1,142 @@
+import json
+import os
+import time
+from contextlib import contextmanager
+from pathlib import Path
+from uuid import uuid4
+import pytest
+from databricks.sql import connect
+from databricks.sql.client import Connection as DeltaTableConnection
+from databricks.sql.client import Cursor as DeltaTableCursor
+from pydantic import BaseModel, SecretStr
+from test.integration.connectors.utils.constants import DESTINATION_TAG, SQL_TAG, env_setup_path
+from test.integration.utils import requires_env
+from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
+from unstructured_ingest.v2.logger import logger
+from unstructured_ingest.v2.processes.connectors.sql.databricks_delta_tables import (
+    CONNECTOR_TYPE,
+    DatabrickDeltaTablesAccessConfig,
+    DatabrickDeltaTablesConnectionConfig,
+    DatabrickDeltaTablesUploader,
+    DatabrickDeltaTablesUploaderConfig,
+    DatabrickDeltaTablesUploadStager,
+)
+CATALOG = "utic-dev-tech-fixtures"
+class EnvData(BaseModel):
+    server_hostname: str
+    http_path: str
+    access_token: SecretStr
+def get_env_data() -> EnvData:
+    return EnvData(
+        server_hostname=os.environ["DATABRICKS_SERVER_HOSTNAME"],
+        http_path=os.environ["DATABRICKS_HTTP_PATH"],
+        access_token=os.environ["DATABRICKS_ACCESS_TOKEN"],
+    )
+def get_destination_schema(new_table_name: str) -> str:
+    p = Path(env_setup_path / "sql" / "databricks_delta_tables" / "destination" / "schema.sql")
+    with p.open() as f:
+        data_lines = f.readlines()
+    data_lines[0] = data_lines[0].replace("elements", new_table_name)
+    data = "".join([line.strip() for line in data_lines])
+    return data
+@contextmanager
+def get_connection() -> DeltaTableConnection:
+    env_data = get_env_data()
+    with connect(
+        server_hostname=env_data.server_hostname,
+        http_path=env_data.http_path,
+        access_token=env_data.access_token.get_secret_value(),
+    ) as connection:
+        yield connection
+@contextmanager
+def get_cursor() -> DeltaTableCursor:
+    with get_connection() as connection:
+        with connection.cursor() as cursor:
+            cursor.execute(f"USE CATALOG '{CATALOG}'")
+            yield cursor
+@pytest.fixture
+def destination_table() -> str:
+    random_id = str(uuid4())[:8]
+    table_name = f"elements_{random_id}"
+    destination_schema = get_destination_schema(new_table_name=table_name)
+    with get_cursor() as cursor:
+        logger.info(f"creating table: {table_name}")
+        cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
+        cursor.execute(destination_schema)
+    yield table_name
+    with get_cursor() as cursor:
+        logger.info(f"dropping table: {table_name}")
+        cursor.execute(f"DROP TABLE IF EXISTS {table_name}")
+def validate_destination(expected_num_elements: int, table_name: str, retries=30, interval=1):
+    with get_cursor() as cursor:
+        for i in range(retries):
+            cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
+            count = cursor.fetchone()[0]
+            if count == expected_num_elements:
+                break
+            logger.info(f"retry attempt {i}: expected {expected_num_elements} != count {count}")
+            time.sleep(interval)
+        assert (
+            count == expected_num_elements
+        ), f"dest check failed: got {count}, expected {expected_num_elements}"
+@pytest.mark.asyncio
+@pytest.mark.skip("Resources take too long to spin up to run in CI")
+@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, SQL_TAG)
+@requires_env("DATABRICKS_SERVER_HOSTNAME", "DATABRICKS_HTTP_PATH", "DATABRICKS_ACCESS_TOKEN")
+async def test_databricks_delta_tables_destination(
+    upload_file: Path, temp_dir: Path, destination_table: str
+):
+    env_data = get_env_data()
+    mock_file_data = FileData(
+        identifier="mock file data",
+        connector_type=CONNECTOR_TYPE,
+        source_identifiers=SourceIdentifiers(filename=upload_file.name, fullpath=upload_file.name),
+    )
+    stager = DatabrickDeltaTablesUploadStager()
+    staged_path = stager.run(
+        elements_filepath=upload_file,
+        file_data=mock_file_data,
+        output_dir=temp_dir,
+        output_filename=upload_file.name,
+    )
+    assert staged_path.suffix == upload_file.suffix
+    uploader = DatabrickDeltaTablesUploader(
+        connection_config=DatabrickDeltaTablesConnectionConfig(
+            access_config=DatabrickDeltaTablesAccessConfig(
+                token=env_data.access_token.get_secret_value()
+            ),
+            http_path=env_data.http_path,
+            server_hostname=env_data.server_hostname,
+        ),
+        upload_config=DatabrickDeltaTablesUploaderConfig(
+            catalog=CATALOG, database="default", table_name=destination_table
+        ),
+    )
+    with staged_path.open("r") as f:
+        staged_data = json.load(f)
+    expected_num_elements = len(staged_data)
+    uploader.precheck()
+    uploader.run(path=staged_path, file_data=mock_file_data)
+    validate_destination(expected_num_elements=expected_num_elements, table_name=destination_table)

test/integration/connectors/test_pinecone.py CHANGED Viewed

@@ -107,11 +107,15 @@ def pinecone_index() -> Generator[str, None, None]:
 def validate_pinecone_index(
-    index_name: str, expected_num_of_vectors: int, retries=30, interval=1
+    index_name: str,
+    expected_num_of_vectors: int,
+    retries=30,
+    interval=1,
+    namespace: str = "default",
 ) -> None:
     # Because there's a delay for the index to catch up to the recent writes, add in a retry
     pinecone = Pinecone(api_key=get_api_key())
-    index = pinecone.Index(name=index_name)
+    index = pinecone.Index(name=index_name, namespace=namespace)
     vector_count = -1
     for i in range(retries):
         index_stats = index.describe_index_stats()
@@ -133,11 +137,13 @@ def validate_pinecone_index(
 @pytest.mark.asyncio
 @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
 async def test_pinecone_destination(pinecone_index: str, upload_file: Path, temp_dir: Path):
     file_data = FileData(
         source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
         connector_type=CONNECTOR_TYPE,
         identifier="pinecone_mock_id",
     )
     connection_config = PineconeConnectionConfig(
         index_name=pinecone_index,
         access_config=PineconeAccessConfig(api_key=get_api_key()),
@@ -224,6 +230,66 @@ async def test_pinecone_destination_large_index(
     )
+@requires_env(API_KEY)
+@pytest.mark.asyncio
+@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
+async def test_pinecone_destination_namespace(
+    pinecone_index: str, upload_file: Path, temp_dir: Path
+):
+    """
+    tests namespace functionality of destination connector.
+    """
+    # creates a file data structure.
+    file_data = FileData(
+        source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
+        connector_type=CONNECTOR_TYPE,
+        identifier="pinecone_mock_id",
+    )
+    connection_config = PineconeConnectionConfig(
+        index_name=pinecone_index,
+        access_config=PineconeAccessConfig(api_key=get_api_key()),
+    )
+    stager_config = PineconeUploadStagerConfig()
+    stager = PineconeUploadStager(upload_stager_config=stager_config)
+    new_upload_file = stager.run(
+        elements_filepath=upload_file,
+        output_dir=temp_dir,
+        output_filename=upload_file.name,
+        file_data=file_data,
+    )
+    # here add namespace defintion
+    upload_config = PineconeUploaderConfig()
+    namespace_test_name = "user-1"
+    upload_config.namespace = namespace_test_name
+    uploader = PineconeUploader(connection_config=connection_config, upload_config=upload_config)
+    uploader.precheck()
+    uploader.run(path=new_upload_file, file_data=file_data)
+    with new_upload_file.open() as f:
+        staged_content = json.load(f)
+    expected_num_of_vectors = len(staged_content)
+    logger.info("validating first upload")
+    validate_pinecone_index(
+        index_name=pinecone_index,
+        expected_num_of_vectors=expected_num_of_vectors,
+        namespace=namespace_test_name,
+    )
+    # Rerun uploader and make sure no duplicates exist
+    uploader.run(path=new_upload_file, file_data=file_data)
+    logger.info("validating second upload")
+    validate_pinecone_index(
+        index_name=pinecone_index,
+        expected_num_of_vectors=expected_num_of_vectors,
+        namespace=namespace_test_name,
+    )
 @requires_env(API_KEY)
 @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
 def test_large_metadata(pinecone_index: str, tmp_path: Path, upload_file: Path):

unstructured_ingest/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.3.14" # pragma: no cover
1	+ __version__ = "0.3.15" # pragma: no cover

unstructured_ingest/v2/interfaces/upload_stager.py CHANGED Viewed

@@ -2,7 +2,7 @@ import json
 from abc import ABC
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, TypeVar
+from typing import Any, Optional, TypeVar
 import ndjson
 from pydantic import BaseModel
@@ -22,10 +22,10 @@ UploadStagerConfigT = TypeVar("UploadStagerConfigT", bound=UploadStagerConfig)
 class UploadStager(BaseProcess, ABC):
     upload_stager_config: UploadStagerConfigT
-    def write_output(self, output_path: Path, data: list[dict]) -> None:
+    def write_output(self, output_path: Path, data: list[dict], indent: Optional[int] = 2) -> None:
         if output_path.suffix == ".json":
             with output_path.open("w") as f:
-                json.dump(data, f, indent=2)
+                json.dump(data, f, indent=indent)
         elif output_path.suffix == ".ndjson":
             with output_path.open("w") as f:
                 ndjson.dump(data, f)

unstructured_ingest/v2/processes/connectors/databricks/__init__.py CHANGED Viewed

@@ -25,6 +25,8 @@ from .volumes_native import (
     databricks_native_volumes_destination_entry,
     databricks_native_volumes_source_entry,
 )
+from .volumes_table import CONNECTOR_TYPE as VOLUMES_TABLE_CONNECTOR_TYPE
+from .volumes_table import databricks_volumes_delta_tables_destination_entry
 add_source_entry(source_type=VOLUMES_AWS_CONNECTOR_TYPE, entry=databricks_aws_volumes_source_entry)
 add_destination_entry(
@@ -50,3 +52,7 @@ add_source_entry(
 add_destination_entry(
     destination_type=VOLUMES_AZURE_CONNECTOR_TYPE, entry=databricks_azure_volumes_destination_entry
 )
+add_destination_entry(
+    destination_type=VOLUMES_TABLE_CONNECTOR_TYPE,
+    entry=databricks_volumes_delta_tables_destination_entry,
+)

unstructured_ingest/v2/processes/connectors/databricks/volumes.py CHANGED Viewed

@@ -187,6 +187,11 @@ class DatabricksVolumesUploader(Uploader, ABC):
     upload_config: DatabricksVolumesUploaderConfig
     connection_config: DatabricksVolumesConnectionConfig
+    def get_output_path(self, file_data: FileData) -> str:
+        return os.path.join(
+            self.upload_config.path, f"{file_data.source_identifiers.filename}.json"
+        )
     def precheck(self) -> None:
         try:
             assert self.connection_config.get_client().current_user.me().active
@@ -194,9 +199,7 @@ class DatabricksVolumesUploader(Uploader, ABC):
             raise self.connection_config.wrap_error(e=e)
     def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        output_path = os.path.join(
-            self.upload_config.path, f"{file_data.source_identifiers.filename}.json"
-        )
+        output_path = self.get_output_path(file_data=file_data)
         with open(path, "rb") as elements_file:
             try:
                 self.connection_config.get_client().files.upload(

unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py ADDED Viewed

@@ -0,0 +1,106 @@
+import json
+import os
+from contextlib import contextmanager
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Generator, Optional
+from pydantic import Field
+from unstructured_ingest.v2.interfaces import FileData, Uploader, UploaderConfig
+from unstructured_ingest.v2.logger import logger
+from unstructured_ingest.v2.processes.connector_registry import (
+    DestinationRegistryEntry,
+)
+from unstructured_ingest.v2.processes.connectors.databricks.volumes import DatabricksPathMixin
+from unstructured_ingest.v2.processes.connectors.sql.databricks_delta_tables import (
+    DatabrickDeltaTablesConnectionConfig,
+    DatabrickDeltaTablesUploadStager,
+    DatabrickDeltaTablesUploadStagerConfig,
+)
+CONNECTOR_TYPE = "databricks_volume_delta_tables"
+class DatabricksVolumeDeltaTableUploaderConfig(UploaderConfig, DatabricksPathMixin):
+    database: str = Field(description="Database name", default="default")
+    table_name: str = Field(description="Table name")
+@dataclass
+class DatabricksVolumeDeltaTableStager(DatabrickDeltaTablesUploadStager):
+    def write_output(self, output_path: Path, data: list[dict], indent: Optional[int] = 2) -> None:
+        # To avoid new line issues when migrating from volumes into delta tables, omit indenting
+        # and always write it as a json file
+        with output_path.with_suffix(".json").open("w") as f:
+            json.dump(data, f)
+@dataclass
+class DatabricksVolumeDeltaTableUploader(Uploader):
+    connection_config: DatabrickDeltaTablesConnectionConfig
+    upload_config: DatabricksVolumeDeltaTableUploaderConfig
+    connector_type: str = CONNECTOR_TYPE
+    def precheck(self) -> None:
+        with self.connection_config.get_cursor() as cursor:
+            cursor.execute("SHOW CATALOGS")
+            catalogs = [r[0] for r in cursor.fetchall()]
+            if self.upload_config.catalog not in catalogs:
+                raise ValueError(
+                    "Catalog {} not found in {}".format(
+                        self.upload_config.catalog, ", ".join(catalogs)
+                    )
+                )
+            cursor.execute(f"USE CATALOG '{self.upload_config.catalog}'")
+            cursor.execute("SHOW DATABASES")
+            databases = [r[0] for r in cursor.fetchall()]
+            if self.upload_config.database not in databases:
+                raise ValueError(
+                    "Database {} not found in {}".format(
+                        self.upload_config.database, ", ".join(databases)
+                    )
+                )
+            cursor.execute("SHOW TABLES")
+            table_names = [r[1] for r in cursor.fetchall()]
+            if self.upload_config.table_name not in table_names:
+                raise ValueError(
+                    "Table {} not found in {}".format(
+                        self.upload_config.table_name, ", ".join(table_names)
+                    )
+                )
+    def get_output_path(self, file_data: FileData, suffix: str = ".json") -> str:
+        filename = Path(file_data.source_identifiers.filename)
+        adjusted_filename = filename if filename.suffix == suffix else f"{filename}{suffix}"
+        return os.path.join(self.upload_config.path, f"{adjusted_filename}")
+    @contextmanager
+    def get_cursor(self, **connect_kwargs) -> Generator[Any, None, None]:
+        with self.connection_config.get_cursor(**connect_kwargs) as cursor:
+            cursor.execute(f"USE CATALOG '{self.upload_config.catalog}'")
+            yield cursor
+    def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
+        with self.get_cursor(staging_allowed_local_path=str(path.parent)) as cursor:
+            catalog_path = self.get_output_path(file_data=file_data)
+            logger.debug(f"uploading {path.as_posix()} to {catalog_path}")
+            cursor.execute(f"PUT '{path.as_posix()}' INTO '{catalog_path}' OVERWRITE")
+            logger.debug(
+                f"migrating content from {catalog_path} to table {self.upload_config.table_name}"
+            )
+            with path.open() as f:
+                data = json.load(f)
+                columns = data[0].keys()
+            column_str = ", ".join(columns)
+            sql_statment = f"INSERT INTO `{self.upload_config.table_name}` ({column_str}) SELECT {column_str} FROM json.`{catalog_path}`"  # noqa: E501
+            cursor.execute(sql_statment)
+databricks_volumes_delta_tables_destination_entry = DestinationRegistryEntry(
+    connection_config=DatabrickDeltaTablesConnectionConfig,
+    uploader=DatabricksVolumeDeltaTableUploader,
+    uploader_config=DatabricksVolumeDeltaTableUploaderConfig,
+    upload_stager=DatabricksVolumeDeltaTableStager,
+    upload_stager_config=DatabrickDeltaTablesUploadStagerConfig,
+)

unstructured_ingest/v2/processes/connectors/pinecone.py CHANGED Viewed

@@ -5,12 +5,10 @@ from typing import TYPE_CHECKING, Any, Optional
 from pydantic import Field, Secret
 from unstructured_ingest.error import DestinationConnectionError
-from unstructured_ingest.utils.data_prep import (
-    flatten_dict,
-    generator_batching_wbytes,
-)
+from unstructured_ingest.utils.data_prep import flatten_dict, generator_batching_wbytes
 from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.constants import RECORD_ID_LABEL
+from unstructured_ingest.v2.errors import UserError
 from unstructured_ingest.v2.interfaces import (
     AccessConfig,
     ConnectionConfig,
@@ -63,6 +61,7 @@ class PineconeConnectionConfig(ConnectionConfig):
         pc = self.get_client()
         index = pc.Index(name=self.index_name, **index_kwargs)
         logger.debug(f"connected to index: {pc.describe_index(self.index_name)}")
         return index
@@ -182,14 +181,18 @@ class PineconeUploader(Uploader):
         delete_kwargs = {
             "filter": {self.upload_config.record_id_key: {"$eq": file_data.identifier}}
         }
         if namespace := self.upload_config.namespace:
             delete_kwargs["namespace"] = namespace
+            try:
+                index.delete(**delete_kwargs)
+            except UserError as e:
+                logger.error(f"failed to delete batch of ids: {delete_kwargs} {e}")
-        resp = index.delete(**delete_kwargs)
         logger.debug(
             f"deleted any content with metadata "
             f"{self.upload_config.record_id_key}={file_data.identifier} "
-            f"from pinecone index: {resp}"
+            f"from pinecone index: {delete_kwargs}"
         )
     def serverless_delete_by_record_id(self, file_data: FileData) -> None:
@@ -203,15 +206,19 @@ class PineconeUploader(Uploader):
         deleted_ids = 0
         if namespace := self.upload_config.namespace:
             list_kwargs["namespace"] = namespace
         for ids in index.list(**list_kwargs):
             deleted_ids += len(ids)
             delete_kwargs = {"ids": ids}
             if namespace := self.upload_config.namespace:
-                delete_resp = delete_kwargs["namespace"] = namespace
-                # delete_resp should be an empty dict if there were no errors
-                if delete_resp:
-                    logger.error(f"failed to delete batch of ids: {delete_resp}")
-            index.delete(**delete_kwargs)
+                delete_kwargs["namespace"] = namespace
+            try:
+                index.delete(**delete_kwargs)
+            except UserError as e:
+                logger.error(f"failed to delete batch of ids: {delete_kwargs} {e}")
         logger.info(
             f"deleted {deleted_ids} records with metadata "
             f"{self.upload_config.record_id_key}={file_data.identifier} "

unstructured_ingest/v2/processes/connectors/sql/__init__.py CHANGED Viewed

@@ -5,6 +5,8 @@ from unstructured_ingest.v2.processes.connector_registry import (
     add_source_entry,
 )
+from .databricks_delta_tables import CONNECTOR_TYPE as DATABRICKS_DELTA_TABLES_CONNECTOR_TYPE
+from .databricks_delta_tables import databricks_delta_tables_destination_entry
 from .postgres import CONNECTOR_TYPE as POSTGRES_CONNECTOR_TYPE
 from .postgres import postgres_destination_entry, postgres_source_entry
 from .singlestore import CONNECTOR_TYPE as SINGLESTORE_CONNECTOR_TYPE
@@ -25,3 +27,7 @@ add_destination_entry(destination_type=SNOWFLAKE_CONNECTOR_TYPE, entry=snowflake
 add_destination_entry(
     destination_type=SINGLESTORE_CONNECTOR_TYPE, entry=singlestore_destination_entry
 )
+add_destination_entry(
+    destination_type=DATABRICKS_DELTA_TABLES_CONNECTOR_TYPE,
+    entry=databricks_delta_tables_destination_entry,
+)

unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py ADDED Viewed

@@ -0,0 +1,213 @@
+import json
+from contextlib import contextmanager
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any, Generator, Optional
+import numpy as np
+import pandas as pd
+from pydantic import Field, Secret
+from unstructured_ingest.utils.data_prep import split_dataframe
+from unstructured_ingest.utils.dep_check import requires_dependencies
+from unstructured_ingest.v2.interfaces import FileData
+from unstructured_ingest.v2.logger import logger
+from unstructured_ingest.v2.processes.connector_registry import (
+    DestinationRegistryEntry,
+)
+from unstructured_ingest.v2.processes.connectors.sql.sql import (
+    SQLAccessConfig,
+    SQLConnectionConfig,
+    SQLUploader,
+    SQLUploaderConfig,
+    SQLUploadStager,
+    SQLUploadStagerConfig,
+)
+if TYPE_CHECKING:
+    from databricks.sdk.core import oauth_service_principal
+    from databricks.sql.client import Connection as DeltaTableConnection
+    from databricks.sql.client import Cursor as DeltaTableCursor
+CONNECTOR_TYPE = "databricks_delta_tables"
+class DatabrickDeltaTablesAccessConfig(SQLAccessConfig):
+    token: Optional[str] = Field(default=None, description="Databricks Personal Access Token")
+    client_id: Optional[str] = Field(default=None, description="Client ID of the OAuth app.")
+    client_secret: Optional[str] = Field(
+        default=None, description="Client Secret of the OAuth app."
+    )
+class DatabrickDeltaTablesConnectionConfig(SQLConnectionConfig):
+    access_config: Secret[DatabrickDeltaTablesAccessConfig]
+    server_hostname: str = Field(description="server hostname connection config value")
+    http_path: str = Field(description="http path connection config value")
+    user_agent: str = "unstructuredio_oss"
+    @requires_dependencies(["databricks"], extras="databricks-delta-tables")
+    def get_credentials_provider(self) -> "oauth_service_principal":
+        from databricks.sdk.core import Config, oauth_service_principal
+        host = f"https://{self.server_hostname}"
+        access_configs = self.access_config.get_secret_value()
+        if (client_id := access_configs.client_id) and (
+            client_secret := access_configs.client_secret
+        ):
+            return oauth_service_principal(
+                Config(
+                    host=host,
+                    client_id=client_id,
+                    client_secret=client_secret,
+                )
+            )
+        return False
+    def model_post_init(self, __context: Any) -> None:
+        access_config = self.access_config.get_secret_value()
+        if access_config.token and access_config.client_secret and access_config.client_id:
+            raise ValueError(
+                "One one for of auth can be provided, either token or client id and secret"
+            )
+        if not access_config.token and not (
+            access_config.client_secret and access_config.client_id
+        ):
+            raise ValueError(
+                "One form of auth must be provided, either token or client id and secret"
+            )
+    @contextmanager
+    @requires_dependencies(["databricks"], extras="databricks-delta-tables")
+    def get_connection(self, **connect_kwargs) -> Generator["DeltaTableConnection", None, None]:
+        from databricks.sql import connect
+        connect_kwargs = connect_kwargs or {}
+        connect_kwargs["_user_agent_entry"] = self.user_agent
+        connect_kwargs["server_hostname"] = connect_kwargs.get(
+            "server_hostname", self.server_hostname
+        )
+        connect_kwargs["http_path"] = connect_kwargs.get("http_path", self.http_path)
+        if credential_provider := self.get_credentials_provider():
+            connect_kwargs["credentials_provider"] = credential_provider
+        else:
+            connect_kwargs["access_token"] = self.access_config.get_secret_value().token
+        with connect(**connect_kwargs) as connection:
+            yield connection
+    @contextmanager
+    def get_cursor(self, **connect_kwargs) -> Generator["DeltaTableCursor", None, None]:
+        with self.get_connection(**connect_kwargs) as connection:
+            cursor = connection.cursor()
+            yield cursor
+class DatabrickDeltaTablesUploadStagerConfig(SQLUploadStagerConfig):
+    pass
+class DatabrickDeltaTablesUploadStager(SQLUploadStager):
+    upload_stager_config: DatabrickDeltaTablesUploadStagerConfig
+class DatabrickDeltaTablesUploaderConfig(SQLUploaderConfig):
+    catalog: str = Field(description="Name of the catalog in the Databricks Unity Catalog service")
+    database: str = Field(description="Database name", default="default")
+    table_name: str = Field(description="Table name")
+@dataclass
+class DatabrickDeltaTablesUploader(SQLUploader):
+    upload_config: DatabrickDeltaTablesUploaderConfig
+    connection_config: DatabrickDeltaTablesConnectionConfig
+    connector_type: str = CONNECTOR_TYPE
+    @contextmanager
+    def get_cursor(self) -> Generator[Any, None, None]:
+        with self.connection_config.get_cursor() as cursor:
+            cursor.execute(f"USE CATALOG '{self.upload_config.catalog}'")
+            yield cursor
+    def precheck(self) -> None:
+        with self.connection_config.get_cursor() as cursor:
+            cursor.execute("SHOW CATALOGS")
+            catalogs = [r[0] for r in cursor.fetchall()]
+            if self.upload_config.catalog not in catalogs:
+                raise ValueError(
+                    "Catalog {} not found in {}".format(
+                        self.upload_config.catalog, ", ".join(catalogs)
+                    )
+                )
+            cursor.execute(f"USE CATALOG '{self.upload_config.catalog}'")
+            cursor.execute("SHOW DATABASES")
+            databases = [r[0] for r in cursor.fetchall()]
+            if self.upload_config.database not in databases:
+                raise ValueError(
+                    "Database {} not found in {}".format(
+                        self.upload_config.database, ", ".join(databases)
+                    )
+                )
+            cursor.execute("SHOW TABLES")
+            table_names = [r[1] for r in cursor.fetchall()]
+            if self.upload_config.table_name not in table_names:
+                raise ValueError(
+                    "Table {} not found in {}".format(
+                        self.upload_config.table_name, ", ".join(table_names)
+                    )
+                )
+    def create_statement(self, columns: list[str], values: tuple[Any, ...]) -> str:
+        values_list = []
+        for v in values:
+            if isinstance(v, dict):
+                values_list.append(json.dumps(v))
+            elif isinstance(v, list):
+                if v and isinstance(v[0], (int, float)):
+                    values_list.append("ARRAY({})".format(", ".join([str(val) for val in v])))
+                else:
+                    values_list.append("ARRAY({})".format(", ".join([f"'{val}'" for val in v])))
+            else:
+                values_list.append(f"'{v}'")
+        statement = "INSERT INTO {table_name} ({columns}) VALUES({values})".format(
+            table_name=self.upload_config.table_name,
+            columns=", ".join(columns),
+            values=", ".join(values_list),
+        )
+        return statement
+    def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
+        if self.can_delete():
+            self.delete_by_record_id(file_data=file_data)
+        else:
+            logger.warning(
+                f"table doesn't contain expected "
+                f"record id column "
+                f"{self.upload_config.record_id_key}, skipping delete"
+            )
+        df.replace({np.nan: None}, inplace=True)
+        self._fit_to_schema(df=df)
+        columns = list(df.columns)
+        logger.info(
+            f"writing a total of {len(df)} elements via"
+            f" document batches to destination"
+            f" table named {self.upload_config.table_name}"
+            # f" with batch size {self.upload_config.batch_size}"
+        )
+        # TODO: currently variable binding not supporting for list types,
+        #  update once that gets resolved in SDK
+        for rows in split_dataframe(df=df, chunk_size=self.upload_config.batch_size):
+            with self.get_cursor() as cursor:
+                values = self.prepare_data(columns, tuple(rows.itertuples(index=False, name=None)))
+                for v in values:
+                    stmt = self.create_statement(columns=columns, values=v)
+                    cursor.execute(stmt)
+databricks_delta_tables_destination_entry = DestinationRegistryEntry(
+    connection_config=DatabrickDeltaTablesConnectionConfig,
+    uploader=DatabrickDeltaTablesUploader,
+    uploader_config=DatabrickDeltaTablesUploaderConfig,
+    upload_stager=DatabrickDeltaTablesUploadStager,
+    upload_stager_config=DatabrickDeltaTablesUploadStagerConfig,
+)

unstructured_ingest/v2/processes/connectors/sql/sql.py CHANGED Viewed

@@ -129,8 +129,13 @@ class SQLIndexer(Indexer, ABC):
     connection_config: SQLConnectionConfig
     index_config: SQLIndexerConfig
-    def _get_doc_ids(self) -> list[str]:
+    @contextmanager
+    def get_cursor(self) -> Generator[Any, None, None]:
         with self.connection_config.get_cursor() as cursor:
+            yield cursor
+    def _get_doc_ids(self) -> list[str]:
+        with self.get_cursor() as cursor:
             cursor.execute(
                 f"SELECT {self.index_config.id_column} FROM {self.index_config.table_name}"
             )
@@ -140,7 +145,7 @@ class SQLIndexer(Indexer, ABC):
     def precheck(self) -> None:
         try:
-            with self.connection_config.get_cursor() as cursor:
+            with self.get_cursor() as cursor:
                 cursor.execute("SELECT 1;")
         except Exception as e:
             logger.error(f"failed to validate connection: {e}", exc_info=True)
@@ -182,6 +187,11 @@ class SQLDownloader(Downloader, ABC):
     connection_config: SQLConnectionConfig
     download_config: SQLDownloaderConfig
+    @contextmanager
+    def get_cursor(self) -> Generator[Any, None, None]:
+        with self.connection_config.get_cursor() as cursor:
+            yield cursor
     @abstractmethod
     def query_db(self, file_data: SqlBatchFileData) -> tuple[list[tuple], list[str]]:
         pass
@@ -323,12 +333,17 @@ class SQLUploader(Uploader):
     def precheck(self) -> None:
         try:
-            with self.connection_config.get_cursor() as cursor:
+            with self.get_cursor() as cursor:
                 cursor.execute("SELECT 1;")
         except Exception as e:
             logger.error(f"failed to validate connection: {e}", exc_info=True)
             raise DestinationConnectionError(f"failed to validate connection: {e}")
+    @contextmanager
+    def get_cursor(self) -> Generator[Any, None, None]:
+        with self.connection_config.get_cursor() as cursor:
+            yield cursor
     def prepare_data(
         self, columns: list[str], data: tuple[tuple[Any, ...], ...]
     ) -> list[tuple[Any, ...]]:
@@ -346,7 +361,7 @@ class SQLUploader(Uploader):
             output.append(tuple(parsed))
         return output
-    def _fit_to_schema(self, df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
+    def _fit_to_schema(self, df: pd.DataFrame) -> pd.DataFrame:
         columns = set(df.columns)
         schema_fields = set(columns)
         columns_to_drop = columns - schema_fields
@@ -367,6 +382,7 @@ class SQLUploader(Uploader):
         for column in missing_columns:
             df[column] = pd.Series()
+        return df
     def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
         if self.can_delete():
@@ -378,7 +394,7 @@ class SQLUploader(Uploader):
                 f"{self.upload_config.record_id_key}, skipping delete"
             )
         df.replace({np.nan: None}, inplace=True)
-        self._fit_to_schema(df=df, columns=self.get_table_columns())
+        self._fit_to_schema(df=df)
         columns = list(df.columns)
         stmt = "INSERT INTO {table_name} ({columns}) VALUES({values})".format(
@@ -393,7 +409,7 @@ class SQLUploader(Uploader):
             f" with batch size {self.upload_config.batch_size}"
         )
         for rows in split_dataframe(df=df, chunk_size=self.upload_config.batch_size):
-            with self.connection_config.get_cursor() as cursor:
+            with self.get_cursor() as cursor:
                 values = self.prepare_data(columns, tuple(rows.itertuples(index=False, name=None)))
                 # For debugging purposes:
                 # for val in values:
@@ -406,7 +422,7 @@ class SQLUploader(Uploader):
                 cursor.executemany(stmt, values)
     def get_table_columns(self) -> list[str]:
-        with self.connection_config.get_cursor() as cursor:
+        with self.get_cursor() as cursor:
             cursor.execute(f"SELECT * from {self.upload_config.table_name}")
             return [desc[0] for desc in cursor.description]
@@ -420,10 +436,11 @@ class SQLUploader(Uploader):
             f"from table {self.upload_config.table_name}"
         )
         stmt = f"DELETE FROM {self.upload_config.table_name} WHERE {self.upload_config.record_id_key} = {self.values_delimiter}"  # noqa: E501
-        with self.connection_config.get_cursor() as cursor:
+        with self.get_cursor() as cursor:
             cursor.execute(stmt, [file_data.identifier])
             rowcount = cursor.rowcount
-            logger.info(f"deleted {rowcount} rows from table {self.upload_config.table_name}")
+            if rowcount > 0:
+                logger.info(f"deleted {rowcount} rows from table {self.upload_config.table_name}")
     def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
         df = pd.DataFrame(data)

{unstructured_ingest-0.3.14.dist-info → unstructured_ingest-0.3.15.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: unstructured-ingest
-Version: 0.3.14
+Version: 0.3.15
 Summary: A library that prepares raw documents for downstream ML tasks.
 Home-page: https://github.com/Unstructured-IO/unstructured-ingest
 Author: Unstructured Technologies
@@ -22,14 +22,14 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0,<3.14
 Description-Content-Type: text/markdown
 License-File: LICENSE.md
-Requires-Dist: python-dateutil
+Requires-Dist: ndjson
+Requires-Dist: pydantic>=2.7
 Requires-Dist: pandas
-Requires-Dist: tqdm
 Requires-Dist: dataclasses-json
-Requires-Dist: opentelemetry-sdk
+Requires-Dist: tqdm
 Requires-Dist: click
-Requires-Dist: ndjson
-Requires-Dist: pydantic>=2.7
+Requires-Dist: python-dateutil
+Requires-Dist: opentelemetry-sdk
 Provides-Extra: airtable
 Requires-Dist: pyairtable; extra == "airtable"
 Provides-Extra: astradb
@@ -40,11 +40,11 @@ Requires-Dist: fsspec; extra == "azure"
 Provides-Extra: azure-ai-search
 Requires-Dist: azure-search-documents; extra == "azure-ai-search"
 Provides-Extra: bedrock
-Requires-Dist: boto3; extra == "bedrock"
 Requires-Dist: aioboto3; extra == "bedrock"
+Requires-Dist: boto3; extra == "bedrock"
 Provides-Extra: biomed
-Requires-Dist: requests; extra == "biomed"
 Requires-Dist: bs4; extra == "biomed"
+Requires-Dist: requests; extra == "biomed"
 Provides-Extra: box
 Requires-Dist: boxfs; extra == "box"
 Requires-Dist: fsspec; extra == "box"
@@ -59,6 +59,8 @@ Provides-Extra: couchbase
 Requires-Dist: couchbase; extra == "couchbase"
 Provides-Extra: csv
 Requires-Dist: unstructured[tsv]; extra == "csv"
+Provides-Extra: databricks-delta-tables
+Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
 Provides-Extra: databricks-volumes
 Requires-Dist: databricks-sdk; extra == "databricks-volumes"
 Provides-Extra: delta-table
@@ -71,8 +73,8 @@ Requires-Dist: unstructured[docx]; extra == "doc"
 Provides-Extra: docx
 Requires-Dist: unstructured[docx]; extra == "docx"
 Provides-Extra: dropbox
-Requires-Dist: fsspec; extra == "dropbox"
 Requires-Dist: dropboxdrivefs; extra == "dropbox"
+Requires-Dist: fsspec; extra == "dropbox"
 Provides-Extra: duckdb
 Requires-Dist: duckdb; extra == "duckdb"
 Provides-Extra: elasticsearch
@@ -82,8 +84,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
 Provides-Extra: embed-mixedbreadai
 Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
 Provides-Extra: embed-octoai
-Requires-Dist: openai; extra == "embed-octoai"
 Requires-Dist: tiktoken; extra == "embed-octoai"
+Requires-Dist: openai; extra == "embed-octoai"
 Provides-Extra: embed-vertexai
 Requires-Dist: vertexai; extra == "embed-vertexai"
 Provides-Extra: embed-voyageai
@@ -92,8 +94,8 @@ Provides-Extra: epub
 Requires-Dist: unstructured[epub]; extra == "epub"
 Provides-Extra: gcs
 Requires-Dist: bs4; extra == "gcs"
-Requires-Dist: fsspec; extra == "gcs"
 Requires-Dist: gcsfs; extra == "gcs"
+Requires-Dist: fsspec; extra == "gcs"
 Provides-Extra: github
 Requires-Dist: pygithub>1.58.0; extra == "github"
 Requires-Dist: requests; extra == "github"
@@ -122,22 +124,22 @@ Provides-Extra: msg
 Requires-Dist: unstructured[msg]; extra == "msg"
 Provides-Extra: neo4j
 Requires-Dist: cymple; extra == "neo4j"
-Requires-Dist: networkx; extra == "neo4j"
 Requires-Dist: neo4j; extra == "neo4j"
+Requires-Dist: networkx; extra == "neo4j"
 Provides-Extra: notion
+Requires-Dist: httpx; extra == "notion"
+Requires-Dist: htmlBuilder; extra == "notion"
 Requires-Dist: backoff; extra == "notion"
 Requires-Dist: notion-client; extra == "notion"
-Requires-Dist: htmlBuilder; extra == "notion"
-Requires-Dist: httpx; extra == "notion"
 Provides-Extra: odt
 Requires-Dist: unstructured[odt]; extra == "odt"
 Provides-Extra: onedrive
-Requires-Dist: bs4; extra == "onedrive"
 Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
+Requires-Dist: bs4; extra == "onedrive"
 Requires-Dist: msal; extra == "onedrive"
 Provides-Extra: openai
-Requires-Dist: openai; extra == "openai"
 Requires-Dist: tiktoken; extra == "openai"
+Requires-Dist: openai; extra == "openai"
 Provides-Extra: opensearch
 Requires-Dist: opensearch-py; extra == "opensearch"
 Provides-Extra: org
@@ -168,8 +170,8 @@ Requires-Dist: unstructured[rst]; extra == "rst"
 Provides-Extra: rtf
 Requires-Dist: unstructured[rtf]; extra == "rtf"
 Provides-Extra: s3
-Requires-Dist: s3fs; extra == "s3"
 Requires-Dist: fsspec; extra == "s3"
+Requires-Dist: s3fs; extra == "s3"
 Provides-Extra: salesforce
 Requires-Dist: simple-salesforce; extra == "salesforce"
 Provides-Extra: sftp
@@ -183,16 +185,16 @@ Requires-Dist: singlestoredb; extra == "singlestore"
 Provides-Extra: slack
 Requires-Dist: slack-sdk[optional]; extra == "slack"
 Provides-Extra: snowflake
-Requires-Dist: psycopg2-binary; extra == "snowflake"
 Requires-Dist: snowflake-connector-python; extra == "snowflake"
+Requires-Dist: psycopg2-binary; extra == "snowflake"
 Provides-Extra: togetherai
 Requires-Dist: together; extra == "togetherai"
 Provides-Extra: tsv
 Requires-Dist: unstructured[tsv]; extra == "tsv"
 Provides-Extra: vectara
-Requires-Dist: requests; extra == "vectara"
-Requires-Dist: aiofiles; extra == "vectara"
 Requires-Dist: httpx; extra == "vectara"
+Requires-Dist: aiofiles; extra == "vectara"
+Requires-Dist: requests; extra == "vectara"
 Provides-Extra: weaviate
 Requires-Dist: weaviate-client; extra == "weaviate"
 Provides-Extra: wikipedia

{unstructured_ingest-0.3.14.dist-info → unstructured_ingest-0.3.15.dist-info}/RECORD RENAMED Viewed

@@ -16,7 +16,7 @@ test/integration/connectors/test_mongodb.py,sha256=0A6DvF-iTCSZzOefisd_i20j9li8u
 test/integration/connectors/test_neo4j.py,sha256=r4TRYtTXeeOdcRcfa_gvslhSKvoIWrwN1FRJ5XRoH4k,8456
 test/integration/connectors/test_notion.py,sha256=ueXyVqYWzP4LuZYe6PauptkXNG6qkoV3srltFOSSKTA,5403
 test/integration/connectors/test_onedrive.py,sha256=TcMaa5BIp8J6engS4UZ2t19WQP0NNz2rkpBB47m7A3Y,3835
-test/integration/connectors/test_pinecone.py,sha256=nzHwftPt-dPX4H5OrAJ6bs9qqOSOcfJL9jVEcWSPAqo,10325
+test/integration/connectors/test_pinecone.py,sha256=acKEu1vnAk0Ht3FhCnGtOEKaj_YlgCzZB7wRU17ehQ0,12407
 test/integration/connectors/test_qdrant.py,sha256=Yme3ZZ5zIbaZ-yYLUqN2oy0hsrcAfvlleRLYWMSYeSE,8049
 test/integration/connectors/test_redis.py,sha256=1aKwOb-K4zCxZwHmgW_WzGJwqLntbWTbpGQ-rtUwN9o,4360
 test/integration/connectors/test_s3.py,sha256=E1dypeag_E3OIfpQWIz3jb7ctRHRD63UtyTrzyvJzpc,7473
@@ -34,6 +34,7 @@ test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2
 test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=TsSEPsyaTUoEvFBadinrdM0b5C4FoUtEwCv24OUbpO8,12072
 test/integration/connectors/elasticsearch/test_opensearch.py,sha256=7b7z0GqoBsBqA3IK35N6axmwEMjzJ1l3Fg2WT2c7uqs,11450
 test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+test/integration/connectors/sql/test_databricks_delta_tables.py,sha256=UjVjw5hVoMSNJoYdoYympYow25gvcDAEHLmUmOJKz7I,5036
 test/integration/connectors/sql/test_postgres.py,sha256=bGDyzLRpgrXO7nl0U8nF2zSNr6ykUG-w8T4daIqUCG4,6970
 test/integration/connectors/sql/test_singlestore.py,sha256=XeU2s4Kt_3tGyaDYYKTgYjdOyb8j2dnz4TgSMwFUjWs,6153
 test/integration/connectors/sql/test_snowflake.py,sha256=LEwsRDoC6-rRiwYsqeo5B9Eo6RYygLLGAUsrtrgI9pM,7494
@@ -96,7 +97,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
 test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
 unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
-unstructured_ingest/__version__.py,sha256=F6lwOpOsFNj6MPWAGEZBkXIqf1jekdFZ5wZw3drsib8,43
+unstructured_ingest/__version__.py,sha256=31lJzr6gfqqAcVEa6C2kjStzBSJPXWUyP7eRpa8Y7gI,43
 unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
 unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
 unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -386,7 +387,7 @@ unstructured_ingest/v2/interfaces/file_data.py,sha256=7MyRlj5dijQsCR6W18wQ8fEgJi
 unstructured_ingest/v2/interfaces/indexer.py,sha256=gsa1MLhFa82BzD2h4Yb7ons0VxRwKINZOrzvHAahwVU,846
 unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
 unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
-unstructured_ingest/v2/interfaces/upload_stager.py,sha256=HSSq_htv009-5yA8QqIi6rRnkfI1fnDkX5JRom8rNDY,3566
+unstructured_ingest/v2/interfaces/upload_stager.py,sha256=nbMuo_U6Gqn9bDJrAJTCjrZXKMw_G28OZOuNsT23i0k,3608
 unstructured_ingest/v2/interfaces/uploader.py,sha256=T2oHbN-d4Px1w1oATKKYZA10aUssqytEpiaqBM92r0Q,1600
 unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
@@ -427,19 +428,20 @@ unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNN
 unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=HU1IwchTM7Q1kkeIFVe-Lg6gInMItBpgkDkVwuTvkGY,14259
 unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=d6gC40YmfqBNXxizAt4MO4OOu5BoCZ7SAe1AbNwTP0E,18322
 unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
-unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=cohF7gBj0opSGKXlENSdGfTtyIKMHd1pwu4ydeb7JAY,10605
+unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=bQDCch7OGiQgpWO3n3ncLuQ4XCWqDc7ZWEB-Qrqkss8,10730
 unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
 unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
 unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=Ndn2Wm7RupfjAtlLxxQwJueeE0V8aGMbNVPuFq9nqdQ,19730
 unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
 unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
 unstructured_ingest/v2/processes/connectors/vectara.py,sha256=BlI_4nkpNR99aYxDd9eusm5LQsVB9EI0r-5Kc1D7pgQ,12255
-unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=jO71UTC7bLA_N12CrLWJzh_yZML5gfT7VohxzCpUGWg,1848
-unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=MDnTUjFlqOP4rmQA5wkgT2DhwjhFhUwPpUPGSzqCOOE,7577
+unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=Oh8SwTWi66gO8BsNF6vRMoQVuegyBPPCpVozkOHEf3A,2136
+unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=Yj4fIxgGo9Qh1x_6-INdbrPGHCuZElu-QKNfSVtW7F4,7694
 unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=TA2e_1SIr4VaEI62873eyReCNfgmQ51_2Pko2I04pPM,2747
 unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=cb-EUW0T-linZMkbU6AcKEGWnFHQvhpO5Abtps4P2X0,3532
 unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=tR8NubkyHw49IpW_42g6w1Koxlm56EPiPf1lB-eoRSI,2783
 unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=dJLD1fueXf8_0AfC4cg0G7siJZVefz68iuEx2Kq7rMs,2890
+unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=muj7G2JFO_WwAPub14k0VqDmN3c56t9MA60rM48wal8,4750
 unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
 unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=XTV9Pox3_xVmI8YVQWC9Bn6PugbPM49kp4Scv1OXFys,2649
 unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=oUHHaLpO2pWW2Lu4Mc-XFjrA0ze97205WQ_xP95ua4M,4296
@@ -538,20 +540,21 @@ unstructured_ingest/v2/processes/connectors/qdrant/cloud.py,sha256=accJ4sNWBVWV-
 unstructured_ingest/v2/processes/connectors/qdrant/local.py,sha256=cGEyv3Oy6y4BQ4DU8yhJWMpL82QYwBVdPTxxNuV127U,1588
 unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=BHI7HYSdbS05j2vrjyDvLzVG1WfsM8osKeq-lttlybQ,5437
 unstructured_ingest/v2/processes/connectors/qdrant/server.py,sha256=odvCZWZp8DmRxLXMR7tHhW-c7UQbix1_zpFdfXfCvKI,1613
-unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=E16CXRBw8fZKTuXIECns5wif_I07oncBHskVxHC4p7w,1448
+unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=mxcrncrjeP-C2jqQoTOOpGjV3Bmyfg4efT5lq_c-V1E,1760
+unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py,sha256=s_W6wSvyIXZ9mdAxvgSXFeFSze9E7pwIvc38p1hVDLM,8839
 unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=BATfX1PQGT2kl8jAbdNKXTojYKJxh3pJV9-h3OBnHGo,5124
 unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=-2E9dsdNhjAiuzeSBytBbAhljOhvQ8kN8wvlUESvLo8,5465
 unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=8qCm1XiJmVxy8TSeoxwmQrE2W1x8S8At2ctrS_lJ8-I,7780
-unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=FtI5DSMd1QUgoYLn8NAVoETc4qwCbFEwSulqziXyODY,15292
+unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=ZGpeBfiOEzVaSiQxwqJkMC00Eu6TQhsrZKHnOHM0Xug,15667
 unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=Q5RAqn5Ccw-pbeKZLkiMn5IVw6EemCMukXzLlS7pDhc,5162
 unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
 unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
 unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
 unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
 unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=X1yv1H_orDQ-J965EMXhR2XaURqe8vovSi9n1fk85B4,10499
-unstructured_ingest-0.3.14.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
-unstructured_ingest-0.3.14.dist-info/METADATA,sha256=PiIp0oqW-sia84q3v0SXUGy-Oh0fzUZCmZqBogsg7qA,7813
-unstructured_ingest-0.3.14.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-unstructured_ingest-0.3.14.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
-unstructured_ingest-0.3.14.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
-unstructured_ingest-0.3.14.dist-info/RECORD,,
+unstructured_ingest-0.3.15.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
+unstructured_ingest-0.3.15.dist-info/METADATA,sha256=rZFAbiv0HZ-VUWVk4MP2vANZuzsxJLhK2_QWZ5zTjRA,7929
+unstructured_ingest-0.3.15.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+unstructured_ingest-0.3.15.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
+unstructured_ingest-0.3.15.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
+unstructured_ingest-0.3.15.dist-info/RECORD,,

{unstructured_ingest-0.3.14.dist-info → unstructured_ingest-0.3.15.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{unstructured_ingest-0.3.14.dist-info → unstructured_ingest-0.3.15.dist-info}/WHEEL RENAMED Viewed

File without changes

{unstructured_ingest-0.3.14.dist-info → unstructured_ingest-0.3.15.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{unstructured_ingest-0.3.14.dist-info → unstructured_ingest-0.3.15.dist-info}/top_level.txt RENAMED Viewed

File without changes

unstructured-ingest 0.3.14__py3-none-any.whl → 0.3.15__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.3.14py3-none-any.whl → 0.3.15py3-none-any.whl