PyPI - unstructured-ingest - Versions diffs - 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl - Mend

unstructured-ingest 0.3.8py3-none-any.whl → 0.3.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (87) hide show

unstructured_ingest/v2/processes/connectors/azure_ai_search.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import json
+from contextlib import contextmanager
 from dataclasses import dataclass, field
-from pathlib import Path
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Generator
 from pydantic import Field, Secret
@@ -49,29 +49,33 @@ class AzureAISearchConnectionConfig(ConnectionConfig):
     access_config: Secret[AzureAISearchAccessConfig]
     @requires_dependencies(["azure.search", "azure.core"], extras="azure-ai-search")
-    def get_search_client(self) -> "SearchClient":
+    @contextmanager
+    def get_search_client(self) -> Generator["SearchClient", None, None]:
         from azure.core.credentials import AzureKeyCredential
         from azure.search.documents import SearchClient
-        return SearchClient(
+        with SearchClient(
             endpoint=self.endpoint,
             index_name=self.index,
             credential=AzureKeyCredential(
                 self.access_config.get_secret_value().azure_ai_search_key
             ),
-        )
+        ) as client:
+            yield client
     @requires_dependencies(["azure.search", "azure.core"], extras="azure-ai-search")
-    def get_search_index_client(self) -> "SearchIndexClient":
+    @contextmanager
+    def get_search_index_client(self) -> Generator["SearchIndexClient", None, None]:
         from azure.core.credentials import AzureKeyCredential
         from azure.search.documents.indexes import SearchIndexClient
-        return SearchIndexClient(
+        with SearchIndexClient(
             endpoint=self.endpoint,
             credential=AzureKeyCredential(
                 self.access_config.get_secret_value().azure_ai_search_key
             ),
-        )
+        ) as search_index_client:
+            yield search_index_client
 class AzureAISearchUploadStagerConfig(UploadStagerConfig):
@@ -92,14 +96,13 @@ class AzureAISearchUploadStager(UploadStager):
         default_factory=lambda: AzureAISearchUploadStagerConfig()
     )
-    @staticmethod
-    def conform_dict(data: dict, file_data: FileData) -> dict:
+    def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
         """
         updates the dictionary that is from each Element being converted into a dict/json
         into a dictionary that conforms to the schema expected by the
         Azure Cognitive Search index
         """
+        data = element_dict.copy()
         data["id"] = get_enhanced_element_id(element_dict=data, file_data=file_data)
         data[RECORD_ID_LABEL] = file_data.identifier
@@ -140,31 +143,6 @@ class AzureAISearchUploadStager(UploadStager):
             data["metadata"]["page_number"] = str(page_number)
         return data
-    def run(
-        self,
-        file_data: FileData,
-        elements_filepath: Path,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-        conformed_elements = [
-            self.conform_dict(data=element, file_data=file_data) for element in elements_contents
-        ]
-        if Path(output_filename).suffix != ".json":
-            output_filename = f"{output_filename}.json"
-        else:
-            output_filename = f"{Path(output_filename).stem}.json"
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        output_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(output_path, "w") as output_file:
-            json.dump(conformed_elements, output_file, indent=2)
-        return output_path
 @dataclass
 class AzureAISearchUploader(Uploader):
@@ -270,9 +248,7 @@ class AzureAISearchUploader(Uploader):
             logger.error(f"failed to validate connection: {e}", exc_info=True)
             raise DestinationConnectionError(f"failed to validate connection: {e}")
-    def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        with path.open("r") as file:
-            elements_dict = json.load(file)
+    def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
         logger.info(
             f"writing document batches to destination"
             f" endpoint at {str(self.connection_config.endpoint)}"
@@ -287,7 +263,7 @@ class AzureAISearchUploader(Uploader):
         batch_size = self.upload_config.batch_size
         with self.connection_config.get_search_client() as search_client:
-            for chunk in batch_generator(elements_dict, batch_size):
+            for chunk in batch_generator(data, batch_size):
                 self.write_dict(elements_dict=chunk, search_client=search_client)  # noqa: E203

unstructured_ingest/v2/processes/connectors/chroma.py CHANGED Viewed

@@ -1,7 +1,5 @@
-import json
 from dataclasses import dataclass, field
 from datetime import date, datetime
-from pathlib import Path
 from typing import TYPE_CHECKING, Annotated, Any, Optional
 from dateutil import parser
@@ -42,7 +40,6 @@ class ChromaAccessConfig(AccessConfig):
 class ChromaConnectionConfig(ConnectionConfig):
-    collection_name: str = Field(description="The name of the Chroma collection to write into.")
     access_config: Secret[ChromaAccessConfig] = Field(
         default=ChromaAccessConfig(), validate_default=True
     )
@@ -62,6 +59,32 @@ class ChromaConnectionConfig(ConnectionConfig):
     )
     connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
+    @requires_dependencies(["chromadb"], extras="chroma")
+    def get_client(self) -> "Client":
+        import chromadb
+        access_config = self.access_config.get_secret_value()
+        if path := self.path:
+            return chromadb.PersistentClient(
+                path=path,
+                settings=access_config.settings,
+                tenant=self.tenant,
+                database=self.database,
+            )
+        elif (host := self.host) and (port := self.port):
+            return chromadb.HttpClient(
+                host=host,
+                port=str(port),
+                ssl=self.ssl,
+                headers=access_config.headers,
+                settings=access_config.settings,
+                tenant=self.tenant,
+                database=self.database,
+            )
+        else:
+            raise ValueError("Chroma connector requires either path or host and port to be set.")
 class ChromaUploadStagerConfig(UploadStagerConfig):
     pass
@@ -82,11 +105,11 @@ class ChromaUploadStager(UploadStager):
             logger.debug(f"date {date_string} string not a timestamp: {e}")
         return parser.parse(date_string)
-    @staticmethod
-    def conform_dict(data: dict, file_data: FileData) -> dict:
+    def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
         """
         Prepares dictionary in the format that Chroma requires
         """
+        data = element_dict.copy()
         return {
             "id": get_enhanced_element_id(element_dict=data, file_data=file_data),
             "embedding": data.pop("embeddings", None),
@@ -94,26 +117,9 @@ class ChromaUploadStager(UploadStager):
             "metadata": flatten_dict(data, separator="-", flatten_lists=True, remove_none=True),
         }
-    def run(
-        self,
-        elements_filepath: Path,
-        file_data: FileData,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-        conformed_elements = [
-            self.conform_dict(data=element, file_data=file_data) for element in elements_contents
-        ]
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        with open(output_path, "w") as output_file:
-            json.dump(conformed_elements, output_file)
-        return output_path
 class ChromaUploaderConfig(UploaderConfig):
+    collection_name: str = Field(description="The name of the Chroma collection to write into.")
     batch_size: int = Field(default=100, description="Number of records per batch")
@@ -125,37 +131,11 @@ class ChromaUploader(Uploader):
     def precheck(self) -> None:
         try:
-            self.create_client()
+            self.connection_config.get_client()
         except Exception as e:
             logger.error(f"failed to validate connection: {e}", exc_info=True)
             raise DestinationConnectionError(f"failed to validate connection: {e}")
-    @requires_dependencies(["chromadb"], extras="chroma")
-    def create_client(self) -> "Client":
-        import chromadb
-        access_config = self.connection_config.access_config.get_secret_value()
-        if self.connection_config.path:
-            return chromadb.PersistentClient(
-                path=self.connection_config.path,
-                settings=access_config.settings,
-                tenant=self.connection_config.tenant,
-                database=self.connection_config.database,
-            )
-        elif self.connection_config.host and self.connection_config.port:
-            return chromadb.HttpClient(
-                host=self.connection_config.host,
-                port=self.connection_config.port,
-                ssl=self.connection_config.ssl,
-                headers=access_config.headers,
-                settings=access_config.settings,
-                tenant=self.connection_config.tenant,
-                database=self.connection_config.database,
-            )
-        else:
-            raise ValueError("Chroma connector requires either path or host and port to be set.")
     @DestinationConnectionError.wrap
     def upsert_batch(self, collection, batch):
@@ -189,19 +169,16 @@ class ChromaUploader(Uploader):
         )
         return chroma_dict
-    def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        with path.open("r") as file:
-            elements_dict = json.load(file)
+    def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
         logger.info(
-            f"writing {len(elements_dict)} objects to destination "
-            f"collection {self.connection_config.collection_name} "
+            f"writing {len(data)} objects to destination "
+            f"collection {self.upload_config.collection_name} "
             f"at {self.connection_config.host}",
         )
-        client = self.create_client()
+        client = self.connection_config.get_client()
-        collection = client.get_or_create_collection(name=self.connection_config.collection_name)
-        for chunk in batch_generator(elements_dict, self.upload_config.batch_size):
+        collection = client.get_or_create_collection(name=self.upload_config.collection_name)
+        for chunk in batch_generator(data, self.upload_config.batch_size):
             self.upsert_batch(collection, self.prepare_chroma_list(chunk))

unstructured_ingest/v2/processes/connectors/couchbase.py CHANGED Viewed

@@ -1,13 +1,12 @@
 import hashlib
-import json
-import sys
 import time
+from contextlib import contextmanager
 from dataclasses import dataclass, field
 from datetime import timedelta
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Generator, List
-from pydantic import Field, Secret
+from pydantic import BaseModel, Field, Secret
 from unstructured_ingest.error import (
     DestinationConnectionError,
@@ -18,6 +17,8 @@ from unstructured_ingest.utils.data_prep import batch_generator, flatten_dict
 from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.v2.interfaces import (
     AccessConfig,
+    BatchFileData,
+    BatchItem,
     ConnectionConfig,
     Downloader,
     DownloaderConfig,
@@ -40,11 +41,20 @@ from unstructured_ingest.v2.processes.connector_registry import (
 if TYPE_CHECKING:
     from couchbase.cluster import Cluster
+    from couchbase.collection import Collection
 CONNECTOR_TYPE = "couchbase"
 SERVER_API_VERSION = "1"
+class CouchbaseAdditionalMetadata(BaseModel):
+    bucket: str
+class CouchbaseBatchFileData(BatchFileData):
+    additional_metadata: CouchbaseAdditionalMetadata
 class CouchbaseAccessConfig(AccessConfig):
     password: str = Field(description="The password for the Couchbase server")
@@ -65,7 +75,8 @@ class CouchbaseConnectionConfig(ConnectionConfig):
     access_config: Secret[CouchbaseAccessConfig]
     @requires_dependencies(["couchbase"], extras="couchbase")
-    def connect_to_couchbase(self) -> "Cluster":
+    @contextmanager
+    def get_client(self) -> Generator["Cluster", None, None]:
         from couchbase.auth import PasswordAuthenticator
         from couchbase.cluster import Cluster
         from couchbase.options import ClusterOptions
@@ -73,9 +84,14 @@ class CouchbaseConnectionConfig(ConnectionConfig):
         auth = PasswordAuthenticator(self.username, self.access_config.get_secret_value().password)
         options = ClusterOptions(auth)
         options.apply_profile("wan_development")
-        cluster = Cluster(self.connection_string, options)
-        cluster.wait_until_ready(timedelta(seconds=5))
-        return cluster
+        cluster = None
+        try:
+            cluster = Cluster(self.connection_string, options)
+            cluster.wait_until_ready(timedelta(seconds=5))
+            yield cluster
+        finally:
+            if cluster:
+                cluster.close()
 class CouchbaseUploadStagerConfig(UploadStagerConfig):
@@ -88,32 +104,16 @@ class CouchbaseUploadStager(UploadStager):
         default_factory=lambda: CouchbaseUploadStagerConfig()
     )
-    def run(
-        self,
-        elements_filepath: Path,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-        output_elements = []
-        for element in elements_contents:
-            new_doc = {
-                element["element_id"]: {
-                    "embedding": element.get("embeddings", None),
-                    "text": element.get("text", None),
-                    "metadata": element.get("metadata", None),
-                    "type": element.get("type", None),
-                }
+    def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
+        data = element_dict.copy()
+        return {
+            data["element_id"]: {
+                "embedding": data.get("embeddings", None),
+                "text": data.get("text", None),
+                "metadata": data.get("metadata", None),
+                "type": data.get("type", None),
             }
-            output_elements.append(new_doc)
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        with open(output_path, "w") as output_file:
-            json.dump(output_elements, output_file)
-        return output_path
+        }
 class CouchbaseUploaderConfig(UploaderConfig):
@@ -128,26 +128,26 @@ class CouchbaseUploader(Uploader):
     def precheck(self) -> None:
         try:
-            self.connection_config.connect_to_couchbase()
+            self.connection_config.get_client()
         except Exception as e:
             logger.error(f"Failed to validate connection {e}", exc_info=True)
             raise DestinationConnectionError(f"failed to validate connection: {e}")
-    def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        with path.open("r") as file:
-            elements_dict = json.load(file)
+    def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
         logger.info(
-            f"writing {len(elements_dict)} objects to destination "
+            f"writing {len(data)} objects to destination "
             f"bucket, {self.connection_config.bucket} "
             f"at {self.connection_config.connection_string}",
         )
-        cluster = self.connection_config.connect_to_couchbase()
-        bucket = cluster.bucket(self.connection_config.bucket)
-        scope = bucket.scope(self.connection_config.scope)
-        collection = scope.collection(self.connection_config.collection)
+        with self.connection_config.get_client() as client:
+            bucket = client.bucket(self.connection_config.bucket)
+            scope = bucket.scope(self.connection_config.scope)
+            collection = scope.collection(self.connection_config.collection)
-        for chunk in batch_generator(elements_dict, self.upload_config.batch_size):
-            collection.upsert_multi({doc_id: doc for doc in chunk for doc_id, doc in doc.items()})
+            for chunk in batch_generator(data, self.upload_config.batch_size):
+                collection.upsert_multi(
+                    {doc_id: doc for doc in chunk for doc_id, doc in doc.items()}
+                )
 class CouchbaseIndexerConfig(IndexerConfig):
@@ -162,7 +162,7 @@ class CouchbaseIndexer(Indexer):
     def precheck(self) -> None:
         try:
-            self.connection_config.connect_to_couchbase()
+            self.connection_config.get_client()
         except Exception as e:
             logger.error(f"Failed to validate connection {e}", exc_info=True)
             raise DestinationConnectionError(f"failed to validate connection: {e}")
@@ -180,41 +180,31 @@ class CouchbaseIndexer(Indexer):
         attempts = 0
         while attempts < max_attempts:
             try:
-                cluster = self.connection_config.connect_to_couchbase()
-                result = cluster.query(query)
-                document_ids = [row["id"] for row in result]
-                return document_ids
+                with self.connection_config.get_client() as client:
+                    result = client.query(query)
+                    document_ids = [row["id"] for row in result]
+                    return document_ids
             except Exception as e:
                 attempts += 1
                 time.sleep(3)
                 if attempts == max_attempts:
                     raise SourceConnectionError(f"failed to get document ids: {e}")
-    def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
+    def run(self, **kwargs: Any) -> Generator[CouchbaseBatchFileData, None, None]:
         ids = self._get_doc_ids()
-        id_batches = [
-            ids[i * self.index_config.batch_size : (i + 1) * self.index_config.batch_size]
-            for i in range(
-                (len(ids) + self.index_config.batch_size - 1) // self.index_config.batch_size
-            )
-        ]
-        for batch in id_batches:
+        for batch in batch_generator(ids, self.index_config.batch_size):
             # Make sure the hash is always a positive number to create identified
-            identified = str(hash(tuple(batch)) + sys.maxsize + 1)
-            yield FileData(
-                identifier=identified,
+            yield CouchbaseBatchFileData(
                 connector_type=CONNECTOR_TYPE,
-                doc_type="batch",
                 metadata=FileDataSourceMetadata(
                     url=f"{self.connection_config.connection_string}/"
                     f"{self.connection_config.bucket}",
                     date_processed=str(time.time()),
                 ),
-                additional_metadata={
-                    "ids": list(batch),
-                    "bucket": self.connection_config.bucket,
-                },
+                additional_metadata=CouchbaseAdditionalMetadata(
+                    bucket=self.connection_config.bucket
+                ),
+                batch_items=[BatchItem(identifier=b) for b in batch],
             )
@@ -251,7 +241,7 @@ class CouchbaseDownloader(Downloader):
         return concatenated_values
     def generate_download_response(
-        self, result: dict, bucket: str, file_data: FileData
+        self, result: dict, bucket: str, file_data: CouchbaseBatchFileData
     ) -> DownloadResponse:
         record_id = result[self.download_config.collection_id]
         filename_id = self.get_identifier(bucket=bucket, record_id=record_id)
@@ -271,44 +261,53 @@ class CouchbaseDownloader(Downloader):
                 exc_info=True,
             )
             raise SourceConnectionNetworkError(f"failed to download file {file_data.identifier}")
-        return DownloadResponse(
-            file_data=FileData(
-                identifier=filename_id,
-                connector_type=CONNECTOR_TYPE,
-                metadata=FileDataSourceMetadata(
-                    version=None,
-                    date_processed=str(time.time()),
-                    record_locator={
-                        "connection_string": self.connection_config.connection_string,
-                        "bucket": bucket,
-                        "scope": self.connection_config.scope,
-                        "collection": self.connection_config.collection,
-                        "document_id": record_id,
-                    },
-                ),
-            ),
-            path=download_path,
+        cast_file_data = FileData.cast(file_data=file_data)
+        cast_file_data.identifier = filename_id
+        cast_file_data.metadata.date_processed = str(time.time())
+        cast_file_data.metadata.record_locator = {
+            "connection_string": self.connection_config.connection_string,
+            "bucket": bucket,
+            "scope": self.connection_config.scope,
+            "collection": self.connection_config.collection,
+            "document_id": record_id,
+        }
+        return super().generate_download_response(
+            file_data=cast_file_data,
+            download_path=download_path,
         )
     def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
-        bucket_name: str = file_data.additional_metadata["bucket"]
-        ids: list[str] = file_data.additional_metadata["ids"]
+        couchbase_file_data = CouchbaseBatchFileData.cast(file_data=file_data)
+        bucket_name: str = couchbase_file_data.additional_metadata.bucket
+        ids: list[str] = [item.identifier for item in couchbase_file_data.batch_items]
-        cluster = self.connection_config.connect_to_couchbase()
-        bucket = cluster.bucket(bucket_name)
-        scope = bucket.scope(self.connection_config.scope)
-        collection = scope.collection(self.connection_config.collection)
+        with self.connection_config.get_client() as client:
+            bucket = client.bucket(bucket_name)
+            scope = bucket.scope(self.connection_config.scope)
+            collection = scope.collection(self.connection_config.collection)
-        download_resp = self.process_all_doc_ids(ids, collection, bucket_name, file_data)
-        return list(download_resp)
+            download_resp = self.process_all_doc_ids(ids, collection, bucket_name, file_data)
+            return list(download_resp)
-    def process_doc_id(self, doc_id, collection, bucket_name, file_data):
+    def process_doc_id(
+        self,
+        doc_id: str,
+        collection: "Collection",
+        bucket_name: str,
+        file_data: CouchbaseBatchFileData,
+    ):
         result = collection.get(doc_id)
         return self.generate_download_response(
             result=result.content_as[dict], bucket=bucket_name, file_data=file_data
         )
-    def process_all_doc_ids(self, ids, collection, bucket_name, file_data):
+    def process_all_doc_ids(
+        self,
+        ids: list[str],
+        collection: "Collection",
+        bucket_name: str,
+        file_data: CouchbaseBatchFileData,
+    ):
         for doc_id in ids:
             yield self.process_doc_id(doc_id, collection, bucket_name, file_data)

unstructured_ingest/v2/processes/connectors/delta_table.py CHANGED Viewed

@@ -11,6 +11,7 @@ import pandas as pd
 from pydantic import Field, Secret
 from unstructured_ingest.error import DestinationConnectionError
+from unstructured_ingest.utils.data_prep import get_data_df
 from unstructured_ingest.utils.dep_check import requires_dependencies
 from unstructured_ingest.utils.table import convert_to_pandas_dataframe
 from unstructured_ingest.v2.interfaces import (
@@ -28,6 +29,7 @@ from unstructured_ingest.v2.processes.connector_registry import DestinationRegis
 CONNECTOR_TYPE = "delta_table"
+@requires_dependencies(["deltalake"], extras="delta-table")
 def write_deltalake_with_error_handling(queue, **kwargs):
     from deltalake.writer import write_deltalake
@@ -136,39 +138,7 @@ class DeltaTableUploader(Uploader):
                 logger.error(f"failed to validate connection: {e}", exc_info=True)
                 raise DestinationConnectionError(f"failed to validate connection: {e}")
-    def process_csv(self, csv_paths: list[Path]) -> pd.DataFrame:
-        logger.debug(f"uploading content from {len(csv_paths)} csv files")
-        df = pd.concat((pd.read_csv(path) for path in csv_paths), ignore_index=True)
-        return df
-    def process_json(self, json_paths: list[Path]) -> pd.DataFrame:
-        logger.debug(f"uploading content from {len(json_paths)} json files")
-        all_records = []
-        for p in json_paths:
-            with open(p) as json_file:
-                all_records.extend(json.load(json_file))
-        return pd.DataFrame(data=all_records)
-    def process_parquet(self, parquet_paths: list[Path]) -> pd.DataFrame:
-        logger.debug(f"uploading content from {len(parquet_paths)} parquet files")
-        df = pd.concat((pd.read_parquet(path) for path in parquet_paths), ignore_index=True)
-        return df
-    def read_dataframe(self, path: Path) -> pd.DataFrame:
-        if path.suffix == ".csv":
-            return self.process_csv(csv_paths=[path])
-        elif path.suffix == ".json":
-            return self.process_json(json_paths=[path])
-        elif path.suffix == ".parquet":
-            return self.process_parquet(parquet_paths=[path])
-        else:
-            raise ValueError(f"Unsupported file type, must be parquet, json or csv file: {path}")
-    @requires_dependencies(["deltalake"], extras="delta-table")
-    def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
-        df = self.read_dataframe(path)
+    def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
         updated_upload_path = os.path.join(
             self.connection_config.table_uri, file_data.source_identifiers.relative_path
         )
@@ -203,6 +173,14 @@ class DeltaTableUploader(Uploader):
             logger.error(f"Exception occurred in write_deltalake: {error_message}")
             raise RuntimeError(f"Error in write_deltalake: {error_message}")
+    def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
+        df = pd.DataFrame(data=data)
+        self.upload_dataframe(df=df, file_data=file_data)
+    def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
+        df = get_data_df(path)
+        self.upload_dataframe(df=df, file_data=file_data)
 delta_table_destination_entry = DestinationRegistryEntry(
     connection_config=DeltaTableConnectionConfig,

unstructured-ingest 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.3.8py3-none-any.whl → 0.3.10py3-none-any.whl