PyPI - unstructured-ingest - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

unstructured-ingest 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (55) hide show

unstructured_ingest/v2/processes/connectors/mongodb.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import sys
-from dataclasses import dataclass, field
+from contextlib import contextmanager
+from dataclasses import dataclass, replace
 from datetime import datetime
 from pathlib import Path
 from time import time
@@ -12,6 +13,7 @@ from unstructured_ingest.__version__ import __version__ as unstructured_version
 from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
 from unstructured_ingest.utils.data_prep import batch_generator, flatten_dict
 from unstructured_ingest.utils.dep_check import requires_dependencies
+from unstructured_ingest.v2.constants import RECORD_ID_LABEL
 from unstructured_ingest.v2.interfaces import (
     AccessConfig,
     ConnectionConfig,
@@ -24,8 +26,6 @@ from unstructured_ingest.v2.interfaces import (
     SourceIdentifiers,
     Uploader,
     UploaderConfig,
-    UploadStager,
-    UploadStagerConfig,
     download_responses,
 )
 from unstructured_ingest.v2.logger import logger
@@ -36,6 +36,7 @@ from unstructured_ingest.v2.processes.connector_registry import (
 if TYPE_CHECKING:
     from pymongo import MongoClient
+    from pymongo.collection import Collection
 CONNECTOR_TYPE = "mongodb"
 SERVER_API_VERSION = "1"
@@ -54,18 +55,37 @@ class MongoDBConnectionConfig(ConnectionConfig):
         description="hostname or IP address or Unix domain socket path of a single mongod or "
         "mongos instance to connect to, or a list of hostnames",
     )
-    database: Optional[str] = Field(default=None, description="database name to connect to")
-    collection: Optional[str] = Field(default=None, description="collection name to connect to")
     port: int = Field(default=27017)
     connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
+    @contextmanager
+    @requires_dependencies(["pymongo"], extras="mongodb")
+    def get_client(self) -> Generator["MongoClient", None, None]:
+        from pymongo import MongoClient
+        from pymongo.driver_info import DriverInfo
+        from pymongo.server_api import ServerApi
-class MongoDBUploadStagerConfig(UploadStagerConfig):
-    pass
+        access_config = self.access_config.get_secret_value()
+        if uri := access_config.uri:
+            client_kwargs = {
+                "host": uri,
+                "server_api": ServerApi(version=SERVER_API_VERSION),
+                "driver": DriverInfo(name="unstructured", version=unstructured_version),
+            }
+        else:
+            client_kwargs = {
+                "host": self.host,
+                "port": self.port,
+                "server_api": ServerApi(version=SERVER_API_VERSION),
+            }
+        with MongoClient(**client_kwargs) as client:
+            yield client
 class MongoDBIndexerConfig(IndexerConfig):
     batch_size: int = Field(default=100, description="Number of records per batch")
+    database: Optional[str] = Field(default=None, description="database name to connect to")
+    collection: Optional[str] = Field(default=None, description="collection name to connect to")
 class MongoDBDownloaderConfig(DownloaderConfig):
@@ -81,42 +101,38 @@ class MongoDBIndexer(Indexer):
     def precheck(self) -> None:
         """Validates the connection to the MongoDB server."""
         try:
-            client = self.create_client()
-            client.admin.command("ping")
+            with self.connection_config.get_client() as client:
+                client.admin.command("ping")
+                database_names = client.list_database_names()
+                database_name = self.index_config.database
+                if database_name not in database_names:
+                    raise DestinationConnectionError(
+                        "database {} does not exist: {}".format(
+                            database_name, ", ".join(database_names)
+                        )
+                    )
+                database = client[database_name]
+                collection_names = database.list_collection_names()
+                collection_name = self.index_config.collection
+                if collection_name not in collection_names:
+                    raise SourceConnectionError(
+                        "collection {} does not exist: {}".format(
+                            collection_name, ", ".join(collection_names)
+                        )
+                    )
         except Exception as e:
             logger.error(f"Failed to validate connection: {e}", exc_info=True)
             raise SourceConnectionError(f"Failed to validate connection: {e}")
-    @requires_dependencies(["pymongo"], extras="mongodb")
-    def create_client(self) -> "MongoClient":
-        from pymongo import MongoClient
-        from pymongo.driver_info import DriverInfo
-        from pymongo.server_api import ServerApi
-        access_config = self.connection_config.access_config.get_secret_value()
-        if access_config.uri:
-            return MongoClient(
-                access_config.uri,
-                server_api=ServerApi(version=SERVER_API_VERSION),
-                driver=DriverInfo(name="unstructured", version=unstructured_version),
-            )
-        else:
-            return MongoClient(
-                host=self.connection_config.host,
-                port=self.connection_config.port,
-                server_api=ServerApi(version=SERVER_API_VERSION),
-            )
     def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
         """Generates FileData objects for each document in the MongoDB collection."""
-        client = self.create_client()
-        database = client[self.connection_config.database]
-        collection = database[self.connection_config.collection]
+        with self.connection_config.get_client() as client:
+            database = client[self.index_config.database]
+            collection = database[self.index_config.collection]
-        # Get list of document IDs
-        ids = collection.distinct("_id")
-        batch_size = self.index_config.batch_size if self.index_config else 100
+            # Get list of document IDs
+            ids = collection.distinct("_id")
+            batch_size = self.index_config.batch_size if self.index_config else 100
         for id_batch in batch_generator(ids, batch_size=batch_size):
             # Make sure the hash is always a positive number to create identifier
@@ -125,8 +141,8 @@ class MongoDBIndexer(Indexer):
             metadata = FileDataSourceMetadata(
                 date_processed=str(time()),
                 record_locator={
-                    "database": self.connection_config.database,
-                    "collection": self.connection_config.collection,
+                    "database": self.index_config.database,
+                    "collection": self.index_config.collection,
                 },
             )
@@ -177,8 +193,8 @@ class MongoDBDownloader(Downloader):
         from bson.objectid import ObjectId
         client = self.create_client()
-        database = client[self.connection_config.database]
-        collection = database[self.connection_config.collection]
+        database = client[file_data.metadata.record_locator["database"]]
+        collection = database[file_data.metadata.record_locator["collection"]]
         ids = file_data.additional_metadata.get("ids", [])
         if not ids:
@@ -222,14 +238,12 @@ class MongoDBDownloader(Downloader):
             concatenated_values = "\n".join(str(value) for value in flattened_dict.values())
             # Create a FileData object for each document with source_identifiers
-            individual_file_data = FileData(
-                identifier=str(doc_id),
-                connector_type=self.connector_type,
-                source_identifiers=SourceIdentifiers(
-                    filename=str(doc_id),
-                    fullpath=str(doc_id),
-                    rel_path=str(doc_id),
-                ),
+            individual_file_data = replace(file_data)
+            individual_file_data.identifier = str(doc_id)
+            individual_file_data.source_identifiers = SourceIdentifiers(
+                filename=str(doc_id),
+                fullpath=str(doc_id),
+                rel_path=str(doc_id),
             )
             # Determine the download path
@@ -247,15 +261,8 @@ class MongoDBDownloader(Downloader):
             individual_file_data.local_download_path = str(download_path)
             # Update metadata
-            individual_file_data.metadata = FileDataSourceMetadata(
-                date_created=date_created,  # Include date_created here
-                date_processed=str(time()),
-                record_locator={
-                    "database": self.connection_config.database,
-                    "collection": self.connection_config.collection,
-                    "document_id": str(doc_id),
-                },
-            )
+            individual_file_data.metadata.record_locator["document_id"] = str(doc_id)
+            individual_file_data.metadata.date_created = date_created
             download_response = self.generate_download_response(
                 file_data=individual_file_data, download_path=download_path
@@ -265,31 +272,14 @@ class MongoDBDownloader(Downloader):
         return download_responses
-@dataclass
-class MongoDBUploadStager(UploadStager):
-    upload_stager_config: MongoDBUploadStagerConfig = field(
-        default_factory=lambda: MongoDBUploadStagerConfig()
-    )
-    def run(
-        self,
-        elements_filepath: Path,
-        file_data: FileData,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents = json.load(elements_file)
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        with open(output_path, "w") as output_file:
-            json.dump(elements_contents, output_file)
-        return output_path
 class MongoDBUploaderConfig(UploaderConfig):
     batch_size: int = Field(default=100, description="Number of records per batch")
+    database: Optional[str] = Field(default=None, description="database name to connect to")
+    collection: Optional[str] = Field(default=None, description="collection name to connect to")
+    record_id_key: str = Field(
+        default=RECORD_ID_LABEL,
+        description="searchable key to find entries for the same record on previous runs",
+    )
 @dataclass
@@ -300,55 +290,76 @@ class MongoDBUploader(Uploader):
     def precheck(self) -> None:
         try:
-            client = self.create_client()
-            client.admin.command("ping")
+            with self.connection_config.get_client() as client:
+                client.admin.command("ping")
+                database_names = client.list_database_names()
+                database_name = self.upload_config.database
+                if database_name not in database_names:
+                    raise DestinationConnectionError(
+                        "database {} does not exist: {}".format(
+                            database_name, ", ".join(database_names)
+                        )
+                    )
+                database = client[database_name]
+                collection_names = database.list_collection_names()
+                collection_name = self.upload_config.collection
+                if collection_name not in collection_names:
+                    raise SourceConnectionError(
+                        "collection {} does not exist: {}".format(
+                            collection_name, ", ".join(collection_names)
+                        )
+                    )
         except Exception as e:
             logger.error(f"failed to validate connection: {e}", exc_info=True)
             raise DestinationConnectionError(f"failed to validate connection: {e}")
-    @requires_dependencies(["pymongo"], extras="mongodb")
-    def create_client(self) -> "MongoClient":
-        from pymongo import MongoClient
-        from pymongo.driver_info import DriverInfo
-        from pymongo.server_api import ServerApi
-        access_config = self.connection_config.access_config.get_secret_value()
-        if access_config.uri:
-            return MongoClient(
-                access_config.uri,
-                server_api=ServerApi(version=SERVER_API_VERSION),
-                driver=DriverInfo(name="unstructured", version=unstructured_version),
-            )
-        else:
-            return MongoClient(
-                host=self.connection_config.host,
-                port=self.connection_config.port,
-                server_api=ServerApi(version=SERVER_API_VERSION),
-            )
+    def can_delete(self, collection: "Collection") -> bool:
+        indexed_keys = []
+        for index in collection.list_indexes():
+            key_bson = index["key"]
+            indexed_keys.extend(key_bson.keys())
+        return self.upload_config.record_id_key in indexed_keys
+    def delete_by_record_id(self, collection: "Collection", file_data: FileData) -> None:
+        logger.debug(
+            f"deleting any content with metadata "
+            f"{self.upload_config.record_id_key}={file_data.identifier} "
+            f"from collection: {collection.name}"
+        )
+        query = {self.upload_config.record_id_key: file_data.identifier}
+        delete_results = collection.delete_many(filter=query)
+        logger.info(
+            f"deleted {delete_results.deleted_count} records from collection {collection.name}"
+        )
     def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
         with path.open("r") as file:
             elements_dict = json.load(file)
         logger.info(
             f"writing {len(elements_dict)} objects to destination "
-            f"db, {self.connection_config.database}, "
-            f"collection {self.connection_config.collection} "
+            f"db, {self.upload_config.database}, "
+            f"collection {self.upload_config.collection} "
             f"at {self.connection_config.host}",
         )
-        client = self.create_client()
-        db = client[self.connection_config.database]
-        collection = db[self.connection_config.collection]
-        for chunk in batch_generator(elements_dict, self.upload_config.batch_size):
-            collection.insert_many(chunk)
+        # This would typically live in the stager but since no other manipulation
+        # is done, setting the record id field in the uploader
+        for element in elements_dict:
+            element[self.upload_config.record_id_key] = file_data.identifier
+        with self.connection_config.get_client() as client:
+            db = client[self.upload_config.database]
+            collection = db[self.upload_config.collection]
+            if self.can_delete(collection=collection):
+                self.delete_by_record_id(file_data=file_data, collection=collection)
+            else:
+                logger.warning("criteria for deleting previous content not met, skipping")
+            for chunk in batch_generator(elements_dict, self.upload_config.batch_size):
+                collection.insert_many(chunk)
 mongodb_destination_entry = DestinationRegistryEntry(
     connection_config=MongoDBConnectionConfig,
     uploader=MongoDBUploader,
     uploader_config=MongoDBUploaderConfig,
-    upload_stager=MongoDBUploadStager,
-    upload_stager_config=MongoDBUploadStagerConfig,
 )
 mongodb_source_entry = SourceRegistryEntry(

unstructured_ingest/v2/processes/connectors/pinecone.py CHANGED Viewed

@@ -30,6 +30,7 @@ if TYPE_CHECKING:
 CONNECTOR_TYPE = "pinecone"
 MAX_PAYLOAD_SIZE = 2 * 1024 * 1024  # 2MB
 MAX_POOL_THREADS = 100
+MAX_METADATA_BYTES = 40960  # 40KB https://docs.pinecone.io/reference/quotas-and-limits#hard-limits
 class PineconeAccessConfig(AccessConfig):
@@ -103,6 +104,10 @@ class PineconeUploaderConfig(UploaderConfig):
         default=None,
         description="The namespace to write to. If not specified, the default namespace is used",
     )
+    record_id_key: str = Field(
+        default=RECORD_ID_LABEL,
+        description="searchable key to find entries for the same record on previous runs",
+    )
 @dataclass
@@ -133,6 +138,13 @@ class PineconeUploadStager(UploadStager):
             remove_none=True,
         )
         metadata[RECORD_ID_LABEL] = file_data.identifier
+        metadata_size_bytes = len(json.dumps(metadata).encode())
+        if metadata_size_bytes > MAX_METADATA_BYTES:
+            logger.info(
+                f"Metadata size is {metadata_size_bytes} bytes, which exceeds the limit of"
+                f" {MAX_METADATA_BYTES} bytes per vector. Dropping the metadata."
+            )
+            metadata = {}
         return {
             "id": str(uuid.uuid4()),
@@ -183,23 +195,28 @@ class PineconeUploader(Uploader):
     def pod_delete_by_record_id(self, file_data: FileData) -> None:
         logger.debug(
-            f"deleting any content with metadata {RECORD_ID_LABEL}={file_data.identifier} "
+            f"deleting any content with metadata "
+            f"{self.upload_config.record_id_key}={file_data.identifier} "
             f"from pinecone pod index"
         )
         index = self.connection_config.get_index(pool_threads=MAX_POOL_THREADS)
-        delete_kwargs = {"filter": {RECORD_ID_LABEL: {"$eq": file_data.identifier}}}
+        delete_kwargs = {
+            "filter": {self.upload_config.record_id_key: {"$eq": file_data.identifier}}
+        }
         if namespace := self.upload_config.namespace:
             delete_kwargs["namespace"] = namespace
         resp = index.delete(**delete_kwargs)
         logger.debug(
-            f"deleted any content with metadata {RECORD_ID_LABEL}={file_data.identifier} "
+            f"deleted any content with metadata "
+            f"{self.upload_config.record_id_key}={file_data.identifier} "
             f"from pinecone index: {resp}"
         )
     def serverless_delete_by_record_id(self, file_data: FileData) -> None:
         logger.debug(
-            f"deleting any content with metadata {RECORD_ID_LABEL}={file_data.identifier} "
+            f"deleting any content with metadata "
+            f"{self.upload_config.record_id_key}={file_data.identifier} "
             f"from pinecone serverless index"
         )
         index = self.connection_config.get_index(pool_threads=MAX_POOL_THREADS)
@@ -209,7 +226,7 @@ class PineconeUploader(Uploader):
             return
         dimension = index_stats["dimension"]
         query_params = {
-            "filter": {RECORD_ID_LABEL: {"$eq": file_data.identifier}},
+            "filter": {self.upload_config.record_id_key: {"$eq": file_data.identifier}},
             "vector": [0] * dimension,
             "top_k": total_vectors,
         }
@@ -226,7 +243,8 @@ class PineconeUploader(Uploader):
                 delete_params["namespace"] = namespace
             index.delete(**delete_params)
         logger.debug(
-            f"deleted any content with metadata {RECORD_ID_LABEL}={file_data.identifier} "
+            f"deleted any content with metadata "
+            f"{self.upload_config.record_id_key}={file_data.identifier} "
             f"from pinecone index"
         )
@@ -269,7 +287,6 @@ class PineconeUploader(Uploader):
             f"writing a total of {len(elements_dict)} elements via"
             f" document batches to destination"
             f" index named {self.connection_config.index_name}"
-            f" with batch size {self.upload_config.batch_size}"
         )
         # Determine if serverless or pod based index
         pinecone_client = self.connection_config.get_client()

unstructured_ingest/v2/processes/connectors/sql/sql.py CHANGED Viewed

@@ -16,6 +16,8 @@ from dateutil import parser
 from pydantic import Field, Secret
 from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
+from unstructured_ingest.utils.data_prep import split_dataframe
+from unstructured_ingest.v2.constants import RECORD_ID_LABEL
 from unstructured_ingest.v2.interfaces import (
     AccessConfig,
     ConnectionConfig,
@@ -236,35 +238,25 @@ class SQLUploadStagerConfig(UploadStagerConfig):
 class SQLUploadStager(UploadStager):
     upload_stager_config: SQLUploadStagerConfig = field(default_factory=SQLUploadStagerConfig)
-    def run(
-        self,
-        elements_filepath: Path,
-        file_data: FileData,
-        output_dir: Path,
-        output_filename: str,
-        **kwargs: Any,
-    ) -> Path:
-        with open(elements_filepath) as elements_file:
-            elements_contents: list[dict] = json.load(elements_file)
-        output_path = Path(output_dir) / Path(f"{output_filename}.json")
-        output_path.parent.mkdir(parents=True, exist_ok=True)
+    @staticmethod
+    def conform_dict(data: dict, file_data: FileData) -> pd.DataFrame:
+        working_data = data.copy()
         output = []
-        for data in elements_contents:
-            metadata: dict[str, Any] = data.pop("metadata", {})
+        for element in working_data:
+            metadata: dict[str, Any] = element.pop("metadata", {})
             data_source = metadata.pop("data_source", {})
             coordinates = metadata.pop("coordinates", {})
-            data.update(metadata)
-            data.update(data_source)
-            data.update(coordinates)
+            element.update(metadata)
+            element.update(data_source)
+            element.update(coordinates)
-            data["id"] = str(uuid.uuid4())
+            element["id"] = str(uuid.uuid4())
             # remove extraneous, not supported columns
-            data = {k: v for k, v in data.items() if k in _COLUMNS}
-            output.append(data)
+            element = {k: v for k, v in element.items() if k in _COLUMNS}
+            element[RECORD_ID_LABEL] = file_data.identifier
+            output.append(element)
         df = pd.DataFrame.from_dict(output)
         for column in filter(lambda x: x in df.columns, _DATE_COLUMNS):
@@ -281,6 +273,26 @@ class SQLUploadStager(UploadStager):
             ("version", "page_number", "regex_metadata"),
         ):
             df[column] = df[column].apply(str)
+        return df
+    def run(
+        self,
+        elements_filepath: Path,
+        file_data: FileData,
+        output_dir: Path,
+        output_filename: str,
+        **kwargs: Any,
+    ) -> Path:
+        with open(elements_filepath) as elements_file:
+            elements_contents: list[dict] = json.load(elements_file)
+        df = self.conform_dict(data=elements_contents, file_data=file_data)
+        if Path(output_filename).suffix != ".json":
+            output_filename = f"{output_filename}.json"
+        else:
+            output_filename = f"{Path(output_filename).stem}.json"
+        output_path = Path(output_dir) / Path(f"{output_filename}")
+        output_path.parent.mkdir(parents=True, exist_ok=True)
         with output_path.open("w") as output_file:
             df.to_json(output_file, orient="records", lines=True)
@@ -290,6 +302,10 @@ class SQLUploadStager(UploadStager):
 class SQLUploaderConfig(UploaderConfig):
     batch_size: int = Field(default=50, description="Number of records per batch")
     table_name: str = Field(default="elements", description="which table to upload contents to")
+    record_id_key: str = Field(
+        default=RECORD_ID_LABEL,
+        description="searchable key to find entries for the same record on previous runs",
+    )
 @dataclass
@@ -323,18 +339,45 @@ class SQLUploader(Uploader):
             output.append(tuple(parsed))
         return output
+    def _fit_to_schema(self, df: pd.DataFrame, columns: list[str]) -> pd.DataFrame:
+        columns = set(df.columns)
+        schema_fields = set(columns)
+        columns_to_drop = columns - schema_fields
+        missing_columns = schema_fields - columns
+        if columns_to_drop:
+            logger.warning(
+                "Following columns will be dropped to match the table's schema: "
+                f"{', '.join(columns_to_drop)}"
+            )
+        if missing_columns:
+            logger.info(
+                "Following null filled columns will be added to match the table's schema:"
+                f" {', '.join(missing_columns)} "
+            )
+        df = df.drop(columns=columns_to_drop)
+        for column in missing_columns:
+            df[column] = pd.Series()
     def upload_contents(self, path: Path) -> None:
         df = pd.read_json(path, orient="records", lines=True)
         df.replace({np.nan: None}, inplace=True)
+        self._fit_to_schema(df=df, columns=self.get_table_columns())
         columns = list(df.columns)
         stmt = f"INSERT INTO {self.upload_config.table_name} ({','.join(columns)}) VALUES({','.join([self.values_delimiter for x in columns])})"  # noqa E501
-        for rows in pd.read_json(
-            path, orient="records", lines=True, chunksize=self.upload_config.batch_size
-        ):
+        logger.info(
+            f"writing a total of {len(df)} elements via"
+            f" document batches to destination"
+            f" table named {self.upload_config.table_name}"
+            f" with batch size {self.upload_config.batch_size}"
+        )
+        for rows in split_dataframe(df=df, chunk_size=self.upload_config.batch_size):
             with self.connection_config.get_cursor() as cursor:
                 values = self.prepare_data(columns, tuple(rows.itertuples(index=False, name=None)))
+                # For debugging purposes:
                 # for val in values:
                 #     try:
                 #         cursor.execute(stmt, val)
@@ -343,5 +386,33 @@ class SQLUploader(Uploader):
                 #         print(f"failed to write {len(columns)}, {len(val)}: {stmt} -> {val}")
                 cursor.executemany(stmt, values)
+    def get_table_columns(self) -> list[str]:
+        with self.connection_config.get_cursor() as cursor:
+            cursor.execute(f"SELECT * from {self.upload_config.table_name}")
+            return [desc[0] for desc in cursor.description]
+    def can_delete(self) -> bool:
+        return self.upload_config.record_id_key in self.get_table_columns()
+    def delete_by_record_id(self, file_data: FileData) -> None:
+        logger.debug(
+            f"deleting any content with data "
+            f"{self.upload_config.record_id_key}={file_data.identifier} "
+            f"from table {self.upload_config.table_name}"
+        )
+        stmt = f"DELETE FROM {self.upload_config.table_name} WHERE {self.upload_config.record_id_key} = {self.values_delimiter}"  # noqa: E501
+        with self.connection_config.get_cursor() as cursor:
+            cursor.execute(stmt, [file_data.identifier])
+            rowcount = cursor.rowcount
+            logger.info(f"deleted {rowcount} rows from table {self.upload_config.table_name}")
     def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
+        if self.can_delete():
+            self.delete_by_record_id(file_data=file_data)
+        else:
+            logger.warning(
+                f"table doesn't contain expected "
+                f"record id column "
+                f"{self.upload_config.record_id_key}, skipping delete"
+            )
         self.upload_contents(path=path)

unstructured_ingest/v2/processes/connectors/weaviate/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+from __future__ import annotations
+from unstructured_ingest.v2.processes.connector_registry import (
+    add_destination_entry,
+)
+from .cloud import CONNECTOR_TYPE as CLOUD_WEAVIATE_CONNECTOR_TYPE
+from .cloud import weaviate_cloud_destination_entry
+from .embedded import CONNECTOR_TYPE as EMBEDDED_WEAVIATE_CONNECTOR_TYPE
+from .embedded import weaviate_embedded_destination_entry
+from .local import CONNECTOR_TYPE as LOCAL_WEAVIATE_CONNECTOR_TYPE
+from .local import weaviate_local_destination_entry
+from .weaviate import CONNECTOR_TYPE as WEAVIATE_CONNECTOR_TYPE
+from .weaviate import weaviate_destination_entry
+add_destination_entry(
+    destination_type=LOCAL_WEAVIATE_CONNECTOR_TYPE, entry=weaviate_local_destination_entry
+)
+add_destination_entry(
+    destination_type=CLOUD_WEAVIATE_CONNECTOR_TYPE, entry=weaviate_cloud_destination_entry
+)
+add_destination_entry(
+    destination_type=EMBEDDED_WEAVIATE_CONNECTOR_TYPE, entry=weaviate_embedded_destination_entry
+)
+add_destination_entry(destination_type=WEAVIATE_CONNECTOR_TYPE, entry=weaviate_destination_entry)

unstructured-ingest 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl