PyPI - unstructured-ingest - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.4__py3-none-any.whl - Mend

unstructured-ingest 1.0.1py3-none-any.whl → 1.0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (10) hide show

unstructured_ingest/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "1.0.1" # pragma: no cover
1	+ __version__ = "1.0.4" # pragma: no cover

unstructured_ingest/data_types/file_data.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any, Optional, Union
 from uuid import NAMESPACE_DNS, uuid5
 from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
@@ -29,7 +29,7 @@ class FileDataSourceMetadata(BaseModel):
     date_created: Optional[str] = None
     date_modified: Optional[str] = None
     date_processed: Optional[str] = None
-    permissions_data: Optional[list[dict[str, Any]]] = None
+    permissions_data: Union[list[dict[str, Any]], dict[str, Any], None] = None
     filesize_bytes: Optional[int] = None

unstructured_ingest/processes/connectors/astradb.py CHANGED Viewed

@@ -1,5 +1,7 @@
+import asyncio
 import csv
 import hashlib
+import os
 import re
 from dataclasses import dataclass, field
 from pathlib import Path
@@ -8,7 +10,6 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
 from pydantic import BaseModel, Field, Secret
-from unstructured_ingest import __name__ as integration_name
 from unstructured_ingest.__version__ import __version__ as integration_version
 from unstructured_ingest.data_types.file_data import (
     BatchFileData,
@@ -83,10 +84,8 @@ class AstraDBConnectionConfig(ConnectionConfig):
         # Create a client object to interact with the Astra DB
         # caller_name/version for Astra DB tracking
-        return AstraDBClient(
-            caller_name=integration_name,
-            caller_version=integration_version,
-        )
+        user_agent = os.getenv("UNSTRUCTURED_USER_AGENT", "unstructuredio_oss")
+        return AstraDBClient(callers=[(user_agent, integration_version)])
 def get_astra_db(
@@ -141,7 +140,7 @@ async def get_async_astra_collection(
     )
     # Get async collection from AsyncDatabase
-    async_astra_db_collection = await async_astra_db.get_collection(name=collection_name)
+    async_astra_db_collection = async_astra_db.get_collection(name=collection_name)
     return async_astra_db_collection
@@ -360,13 +359,22 @@ class AstraDBUploader(Uploader):
     upload_config: AstraDBUploaderConfig
     connector_type: str = CONNECTOR_TYPE
+    def is_async(self) -> bool:
+        return True
     def init(self, **kwargs: Any) -> None:
         self.create_destination(**kwargs)
+    @requires_dependencies(["astrapy"], extras="astradb")
     def precheck(self) -> None:
         try:
             if self.upload_config.collection_name:
-                self.get_collection(collection_name=self.upload_config.collection_name).options()
+                collection = get_astra_collection(
+                    connection_config=self.connection_config,
+                    collection_name=self.upload_config.collection_name,
+                    keyspace=self.upload_config.keyspace,
+                )
+                collection.options()
             else:
                 # check for db connection only if collection name is not provided
                 get_astra_db(
@@ -377,17 +385,7 @@ class AstraDBUploader(Uploader):
             logger.error(f"Failed to validate connection {e}", exc_info=True)
             raise DestinationConnectionError(f"failed to validate connection: {e}")
-    @requires_dependencies(["astrapy"], extras="astradb")
-    def get_collection(self, collection_name: Optional[str] = None) -> "AstraDBCollection":
-        return get_astra_collection(
-            connection_config=self.connection_config,
-            collection_name=collection_name or self.upload_config.collection_name,
-            keyspace=self.upload_config.keyspace,
-        )
     def _collection_exists(self, collection_name: str):
-        from astrapy.exceptions import CollectionNotFoundException
         collection = get_astra_collection(
             connection_config=self.connection_config,
             collection_name=collection_name,
@@ -397,8 +395,10 @@ class AstraDBUploader(Uploader):
         try:
             collection.options()
             return True
-        except CollectionNotFoundException:
-            return False
+        except RuntimeError as e:
+            if "not found" in str(e):
+                return False
+            raise DestinationConnectionError(f"failed to check if astra collection exists : {e}")
         except Exception as e:
             logger.error(f"failed to check if astra collection exists : {e}")
             raise DestinationConnectionError(f"failed to check if astra collection exists : {e}")
@@ -422,6 +422,8 @@ class AstraDBUploader(Uploader):
         self.upload_config.collection_name = collection_name
         if not self._collection_exists(collection_name):
+            from astrapy.info import CollectionDefinition
             astra_db = get_astra_db(
                 connection_config=self.connection_config, keyspace=self.upload_config.keyspace
             )
@@ -429,44 +431,56 @@ class AstraDBUploader(Uploader):
                 f"creating default astra collection '{collection_name}' with dimension "
                 f"{vector_length} and metric {similarity_metric}"
             )
-            astra_db.create_collection(
-                collection_name,
-                dimension=vector_length,
-                metric=similarity_metric,
+            definition = (
+                CollectionDefinition.builder()
+                .set_vector_dimension(dimension=vector_length)
+                .set_vector_metric(similarity_metric)
+                .build()
             )
+            (astra_db.create_collection(collection_name, definition=definition),)
             return True
         logger.debug(f"collection with name '{collection_name}' already exists, skipping creation")
         return False
-    def delete_by_record_id(self, collection: "AstraDBCollection", file_data: FileData):
+    async def delete_by_record_id(self, collection: "AstraDBAsyncCollection", file_data: FileData):
         logger.debug(
             f"deleting records from collection {collection.name} "
             f"with {self.upload_config.record_id_key} "
             f"set to {file_data.identifier}"
         )
         delete_filter = {self.upload_config.record_id_key: {"$eq": file_data.identifier}}
-        delete_resp = collection.delete_many(filter=delete_filter)
+        delete_resp = await collection.delete_many(filter=delete_filter)
         logger.debug(
             f"deleted {delete_resp.deleted_count} records from collection {collection.name}"
         )
-    def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
+    async def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
         logger.info(
             f"writing {len(data)} objects to destination "
             f"collection {self.upload_config.collection_name}"
         )
         astra_db_batch_size = self.upload_config.batch_size
-        collection = self.get_collection()
-        self.delete_by_record_id(collection=collection, file_data=file_data)
+        async_astra_collection = await get_async_astra_collection(
+            connection_config=self.connection_config,
+            collection_name=self.upload_config.collection_name,
+            keyspace=self.upload_config.keyspace,
+        )
-        for chunk in batch_generator(data, astra_db_batch_size):
-            collection.insert_many(chunk)
+        await self.delete_by_record_id(collection=async_astra_collection, file_data=file_data)
+        await asyncio.gather(
+            *[
+                async_astra_collection.insert_many(chunk)
+                for chunk in batch_generator(data, astra_db_batch_size)
+            ]
+        )
-    def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
+    async def run_async(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
         data = get_json_data(path=path)
-        self.run_data(data=data, file_data=file_data, **kwargs)
+        await self.run_data(data=data, file_data=file_data)
+    def run(self, **kwargs: Any) -> Any:
+        raise NotImplementedError("Use astradb run_async instead")
 astra_db_source_entry = SourceRegistryEntry(

unstructured_ingest/processes/connectors/confluence.py CHANGED Viewed

@@ -1,7 +1,8 @@
+from collections import OrderedDict
 from contextlib import contextmanager
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import TYPE_CHECKING, Generator, List, Optional
+from typing import TYPE_CHECKING, Generator, List, Optional, Tuple
 from pydantic import Field, Secret
@@ -135,35 +136,46 @@ class ConfluenceIndexer(Indexer):
             logger.error(f"Failed to connect to Confluence: {e}", exc_info=True)
             raise SourceConnectionError(f"Failed to connect to Confluence: {e}")
-    def _get_space_ids(self) -> List[str]:
+    def _get_space_ids_and_keys(self) -> List[Tuple[str, int]]:
+        """
+        Get a list of space IDs and keys from Confluence.
+        Example space ID (numerical): 98503
+        Example space key (str): "SD"
+        """
         spaces = self.index_config.spaces
         if spaces:
-            return spaces
+            with self.connection_config.get_client() as client:
+                space_ids_and_keys = []
+                for space_key in spaces:
+                    space = client.get_space(space_key)
+                    space_ids_and_keys.append((space_key, space["id"]))
+                return space_ids_and_keys
         else:
             with self.connection_config.get_client() as client:
                 all_spaces = client.get_all_spaces(limit=self.index_config.max_num_of_spaces)
-            space_ids = [space["key"] for space in all_spaces["results"]]
-            return space_ids
+            space_ids_and_keys = [(space["key"], space["id"]) for space in all_spaces["results"]]
+            return space_ids_and_keys
-    def _get_docs_ids_within_one_space(self, space_id: str) -> List[dict]:
+    def _get_docs_ids_within_one_space(self, space_key: str) -> List[dict]:
         with self.connection_config.get_client() as client:
             pages = client.get_all_pages_from_space(
-                space=space_id,
+                space=space_key,
                 start=0,
                 limit=self.index_config.max_num_of_docs_from_each_space,
                 expand=None,
-                content_type="page",
+                content_type="page",  # blogpost and comment types not currently supported
                 status=None,
             )
-        doc_ids = [{"space_id": space_id, "doc_id": page["id"]} for page in pages]
+        doc_ids = [{"space_id": space_key, "doc_id": page["id"]} for page in pages]
         return doc_ids
     def run(self) -> Generator[FileData, None, None]:
         from time import time
-        space_ids = self._get_space_ids()
-        for space_id in space_ids:
-            doc_ids = self._get_docs_ids_within_one_space(space_id)
+        space_ids_and_keys = self._get_space_ids_and_keys()
+        for space_key, space_id in space_ids_and_keys:
+            doc_ids = self._get_docs_ids_within_one_space(space_key)
             for doc in doc_ids:
                 doc_id = doc["doc_id"]
                 # Build metadata
@@ -171,18 +183,19 @@ class ConfluenceIndexer(Indexer):
                     date_processed=str(time()),
                     url=f"{self.connection_config.url}/pages/{doc_id}",
                     record_locator={
-                        "space_id": space_id,
+                        "space_id": space_key,
                         "document_id": doc_id,
                     },
                 )
                 additional_metadata = {
-                    "space_id": space_id,
+                    "space_key": space_key,
+                    "space_id": space_id,  # diff from record_locator space_id (which is space_key)
                     "document_id": doc_id,
                 }
                 # Construct relative path and filename
                 filename = f"{doc_id}.html"
-                relative_path = str(Path(space_id) / filename)
+                relative_path = str(Path(space_key) / filename)
                 source_identifiers = SourceIdentifiers(
                     filename=filename,
@@ -201,7 +214,9 @@ class ConfluenceIndexer(Indexer):
 class ConfluenceDownloaderConfig(DownloaderConfig, HtmlMixin):
-    pass
+    max_num_metadata_permissions: int = Field(
+        250, description="Approximate maximum number of permissions included in metadata"
+    )
 @dataclass
@@ -209,6 +224,8 @@ class ConfluenceDownloader(Downloader):
     connection_config: ConfluenceConnectionConfig
     download_config: ConfluenceDownloaderConfig = field(default_factory=ConfluenceDownloaderConfig)
     connector_type: str = CONNECTOR_TYPE
+    _permissions_cache: dict = field(default_factory=OrderedDict)
+    _permissions_cache_max_size: int = 5
     def download_embedded_files(
         self, session, html: str, current_file_data: FileData
@@ -233,6 +250,145 @@ class ConfluenceDownloader(Downloader):
             session=session,
         )
+    def parse_permissions(self, doc_permissions: dict, space_permissions: list) -> dict[str, dict]:
+        """
+        Parses document and space permissions to determine final user/group roles.
+        :param doc_permissions: dict containing document-level restrictions
+        - doc_permissions type in Confluence: ContentRestrictionArray
+        :param space_permissions: list of space-level permission assignments
+        - space_permissions type in Confluence: list of SpacePermissionAssignment
+        :return: dict with operation as keys and each maps to dict with "users" and "groups"
+        Get document permissions. If they exist, they will override space level permissions.
+        Otherwise, apply relevant space permissions (read, administer, delete)
+        """
+        # Separate flags to track if view or edit is restricted at the page level
+        page_view_restricted = bool(
+            doc_permissions.get("read", {}).get("restrictions", {}).get("user", {}).get("results")
+            or doc_permissions.get("read", {})
+            .get("restrictions", {})
+            .get("group", {})
+            .get("results")
+        )
+        page_edit_restricted = bool(
+            doc_permissions.get("update", {}).get("restrictions", {}).get("user", {}).get("results")
+            or doc_permissions.get("update", {})
+            .get("restrictions", {})
+            .get("group", {})
+            .get("results")
+        )
+        permissions_by_role = {
+            "read": {"users": set(), "groups": set()},
+            "update": {"users": set(), "groups": set()},
+            "delete": {"users": set(), "groups": set()},
+        }
+        total_permissions = 0
+        for action, permissions in doc_permissions.items():
+            restrictions_dict = permissions.get("restrictions", {})
+            for entity_type, entity_data in restrictions_dict.items():
+                for entity in entity_data.get("results"):
+                    entity_id = entity["accountId"] if entity_type == "user" else entity["id"]
+                    permissions_by_role[action][f"{entity_type}s"].add(entity_id)
+                    total_permissions += 1
+                    # edit permission implies view permission
+                    if action == "update":
+                        permissions_by_role["read"][f"{entity_type}s"].add(entity_id)
+                        # total_permissions += 1
+                        # ^ omitting to not double count an entity.
+                        # may result in a higher total count than max_num_metadata_permissions
+        for space_perm in space_permissions:
+            if total_permissions < self.download_config.max_num_metadata_permissions:
+                space_operation = space_perm["operation"]["key"]
+                space_target_type = space_perm["operation"]["targetType"]
+                space_entity_id = space_perm["principal"]["id"]
+                space_entity_type = space_perm["principal"]["type"]
+                # Apply space-level view permissions if no page restrictions exist
+                if (
+                    space_target_type == "space"
+                    and space_operation == "read"
+                    and not page_view_restricted
+                ):
+                    permissions_by_role["read"][f"{space_entity_type}s"].add(space_entity_id)
+                    total_permissions += 1
+                # Administer permission includes view + edit. Apply if not page restricted
+                elif space_target_type == "space" and space_operation == "administer":
+                    if not page_view_restricted:
+                        permissions_by_role["read"][f"{space_entity_type}s"].add(space_entity_id)
+                        total_permissions += 1
+                        if not page_edit_restricted:
+                            permissions_by_role["update"][f"{space_entity_type}s"].add(
+                                space_entity_id
+                            )
+                            # total_permissions += 1
+                            # ^ omitting to not double count an entity.
+                            # may result in a higher total count than max_num_metadata_permissions
+                # Add the "delete page" space permissions if there are other page permissions
+                elif (
+                    space_target_type == "page"
+                    and space_operation == "delete"
+                    and space_entity_id in permissions_by_role["read"][f"{space_entity_type}s"]
+                ):
+                    permissions_by_role["delete"][f"{space_entity_type}s"].add(space_entity_id)
+                    total_permissions += 1
+        # turn sets into sorted lists for consistency and json serialization
+        for role_dict in permissions_by_role.values():
+            for key in role_dict:
+                role_dict[key] = sorted(role_dict[key])
+        return permissions_by_role
+    def _get_permissions_for_space(self, space_id: int) -> Optional[List[dict]]:
+        if space_id in self._permissions_cache:
+            self._permissions_cache.move_to_end(space_id)  # mark recent use
+            return self._permissions_cache[space_id]
+        else:
+            with self.connection_config.get_client() as client:
+                try:
+                    # TODO limit the total number of results being called.
+                    # not yet implemented because this client call doesn't allow for filtering for
+                    # certain operations, so adding a limit here would result in too little data.
+                    space_permissions = []
+                    space_permissions_result = client.get(f"/api/v2/spaces/{space_id}/permissions")
+                    space_permissions.extend(space_permissions_result["results"])
+                    if space_permissions_result["_links"].get("next"):  # pagination
+                        while space_permissions_result.get("next"):
+                            space_permissions_result = client.get(space_permissions_result["next"])
+                            space_permissions.extend(space_permissions_result["results"])
+                    if len(self._permissions_cache) >= self._permissions_cache_max_size:
+                        self._permissions_cache.popitem(last=False)  # LRU/FIFO eviction
+                    self._permissions_cache[space_id] = space_permissions
+                    return space_permissions
+                except Exception as e:
+                    logger.debug(f"Could not retrieve permissions for space {space_id}: {e}")
+                    return None
+    def _parse_permissions_for_doc(self, doc_id: str, space_permissions: list) -> Optional[dict]:
+        with self.connection_config.get_client() as client:
+            try:
+                doc_permissions = client.get_all_restrictions_for_content(content_id=doc_id)
+                parsed_permissions_dict = self.parse_permissions(doc_permissions, space_permissions)
+            except Exception as e:
+                # skip writing any permission metadata
+                logger.debug(f"Could not retrieve permissions for doc {doc_id}: {e}")
+                return None
+        return parsed_permissions_dict
     def run(self, file_data: FileData, **kwargs) -> download_responses:
         from bs4 import BeautifulSoup
@@ -268,6 +424,14 @@ class ConfluenceDownloader(Downloader):
             soup = BeautifulSoup(content, "html.parser")
             f.write(soup.prettify())
+        # Get document permissions and update metadata
+        space_id = file_data.additional_metadata["space_id"]
+        space_perm = self._get_permissions_for_space(space_id)  # must be the id, NOT the space key
+        if space_perm:
+            combined_doc_permissions = self._parse_permissions_for_doc(doc_id, space_perm)
+            if combined_doc_permissions:
+                file_data.metadata.permissions_data = combined_doc_permissions
         # Update file_data with metadata
         file_data.metadata.date_created = page["history"]["createdDate"]
         file_data.metadata.date_modified = page["version"]["when"]

unstructured_ingest/processes/connectors/google_drive.py CHANGED Viewed

@@ -391,6 +391,7 @@ class GoogleDriveIndexer(Indexer):
     ) -> list[FileData]:
         root_info = self.get_root_info(files_client=files_client, object_id=object_id)
         if not self.is_dir(root_info):
+            root_info["permissions"] = self.extract_permissions(root_info.get("permissions"))
             data = [self.map_file_data(root_info)]
         else:
             file_contents = self.get_paginated_results(
@@ -400,11 +401,49 @@ class GoogleDriveIndexer(Indexer):
                 recursive=recursive,
                 previous_path=root_info["name"],
             )
-            data = [self.map_file_data(f=f) for f in file_contents]
+            data = []
+            for f in file_contents:
+                f["permissions"] = self.extract_permissions(f.get("permissions"))
+                data.append(self.map_file_data(f=f))
         for d in data:
             d.metadata.record_locator["drive_id"]: object_id
         return data
+    def extract_permissions(self, permissions: list[dict]) -> dict:
+        if not permissions:
+            return {}
+        # https://developers.google.com/workspace/drive/api/guides/ref-roles
+        role_mapping = {
+            "owner": ["read", "update", "delete"],
+            "organizer": ["read", "update", "delete"],
+            "fileOrganizer": ["read", "update"],
+            "writer": ["read", "update"],
+            "commenter": ["read"],
+            "reader": ["read"],
+        }
+        normalized_permissions = {
+            "read": {"users": set(), "groups": set()},
+            "update": {"users": set(), "groups": set()},
+            "delete": {"users": set(), "groups": set()},
+        }
+        for item in permissions:
+            # https://developers.google.com/workspace/drive/api/reference/rest/v3/permissions
+            # ignore permissions for "anyone" and "domain"
+            if item["type"] in ["user", "group"]:
+                type_key = item["type"] + "s"
+                for operation in role_mapping[item["role"]]:
+                    normalized_permissions[operation][type_key].add(item["id"])
+        # turn sets into sorted lists for consistency and json serialization
+        for role_dict in normalized_permissions.values():
+            for key in role_dict:
+                role_dict[key] = sorted(role_dict[key])
+        return normalized_permissions
     def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
         with self.connection_config.get_client() as client:
             for f in self.get_files(

{unstructured_ingest-1.0.1.dist-info → unstructured_ingest-1.0.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: unstructured_ingest
-Version: 1.0.1
+Version: 1.0.4
 Summary: Local ETL data pipeline to get data RAG ready
 Author-email: Unstructured Technologies <devops@unstructuredai.io>
 License-Expression: Apache-2.0
@@ -28,7 +28,7 @@ Provides-Extra: airtable
 Requires-Dist: pandas; extra == 'airtable'
 Requires-Dist: pyairtable; extra == 'airtable'
 Provides-Extra: astradb
-Requires-Dist: astrapy; extra == 'astradb'
+Requires-Dist: astrapy>2.0.0; extra == 'astradb'
 Provides-Extra: azure
 Requires-Dist: adlfs; extra == 'azure'
 Requires-Dist: fsspec; extra == 'azure'

{unstructured_ingest-1.0.1.dist-info → unstructured_ingest-1.0.4.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
-unstructured_ingest/__version__.py,sha256=Bkcw0TdkF4pWY_01piNW3D1XaG9Q-r4aIMSbnIeStCE,42
+unstructured_ingest/__version__.py,sha256=MizK8W2VY6aXUudG1jVogTj7GJ2uwduw5iryFPwi0tM,42
 unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
 unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
 unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -19,7 +19,7 @@ unstructured_ingest/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
 unstructured_ingest/cli/utils/click.py,sha256=1_eJgrwS2DFBl1jZPLsj1vgVgR7agFBIEBe4A_n7mH4,7827
 unstructured_ingest/cli/utils/model_conversion.py,sha256=hMjAfOVvO1RXTDsw26mmersdncvddkb_rP9JTEgVVCw,7649
 unstructured_ingest/data_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-unstructured_ingest/data_types/file_data.py,sha256=J0RQa7YXhhxiLVzhPbF5Hl2nzSpxLFK9vrP6RTBWlSg,3833
+unstructured_ingest/data_types/file_data.py,sha256=E-09hkI4ms4yj-g_aQPIrnm0kbiZLwukCnbwp6OpobQ,3859
 unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 unstructured_ingest/embed/azure_openai.py,sha256=_-I-nwd-wdCiKkSdYBL4UKrTZ2UPWsM_0T69fcObs_I,1707
 unstructured_ingest/embed/bedrock.py,sha256=t58V_QQjWPO62CTuP0aLFMDisPeXpxG2xSFGUhN-JvI,7726
@@ -62,16 +62,16 @@ unstructured_ingest/processes/partitioner.py,sha256=Kn_BSFYvOkwo8fqThw_cOpgD0Um-
 unstructured_ingest/processes/uncompress.py,sha256=o9JL3Bza4KPUTmrB39-v_5SuK_fYwhwFAhjQi2Pm8h8,2426
 unstructured_ingest/processes/connectors/__init__.py,sha256=cR4ZH2dpPod7QR6OsgMx8X9kpFcEc1TVfQndUNoKGzI,6812
 unstructured_ingest/processes/connectors/airtable.py,sha256=smx5qBSUKwM8V6Xcc7ikrf8hYQUQ94YrB1L0WVeRDv0,9024
-unstructured_ingest/processes/connectors/astradb.py,sha256=ONt8vHv5h8B6goGba9l0YPS0y5EnSAoowtfq92-E-RY,18307
+unstructured_ingest/processes/connectors/astradb.py,sha256=Ob9wQgDxa6BXDPZBOqooNKQgvjIZcMwIe4fW3VlI7h8,18929
 unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=szhSRXzUHk0DE2hGFfjGc_jNFzlUwiRlCtIkuu7tmnk,11524
 unstructured_ingest/processes/connectors/chroma.py,sha256=q5_Fu4xb6_W_NyrPxVa3-jVwZLqVdlBNlR4dFvbd7l0,7235
-unstructured_ingest/processes/connectors/confluence.py,sha256=BbZ-Ecdcn92X8dHQ0egEJtBoX16gM0-zMcBLdn-wQsM,12090
+unstructured_ingest/processes/connectors/confluence.py,sha256=7uRgmpX3NcVzA2V7VcngzjMQ69pS0J2wu6cbMp7AFA0,20739
 unstructured_ingest/processes/connectors/couchbase.py,sha256=KCHoYDNya9B05NIB5D78zXoizFyfpJRepcYBe1nLSOs,12298
 unstructured_ingest/processes/connectors/delta_table.py,sha256=2DFox_Vzoopt_D3Jy3rCjrrTGMutG2INIrwCeoIohRY,7340
 unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMkcd8lcLJC0uqbo4izjdZ3rU,5294
 unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
 unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
-unstructured_ingest/processes/connectors/google_drive.py,sha256=CqUwtK4NhKhNfozsunVzFUsKMYBEgRS1eci2pIZLnJE,20055
+unstructured_ingest/processes/connectors/google_drive.py,sha256=mcplAPbQ_A_MIsIXWc7K0YtEXMIMmluefsrzddJQNFw,21674
 unstructured_ingest/processes/connectors/jira.py,sha256=eG8yTn8ZVEz7rBJ-ha8i_d9hEh6VALN6QJT_vbYvbL0,17142
 unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
 unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
@@ -230,8 +230,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
 unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
 unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
 unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
-unstructured_ingest-1.0.1.dist-info/METADATA,sha256=k_kEG2BSsnNaIyDSJWiciUW0Z-HDiPF_flO6kLjn8QI,8713
-unstructured_ingest-1.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-unstructured_ingest-1.0.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
-unstructured_ingest-1.0.1.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
-unstructured_ingest-1.0.1.dist-info/RECORD,,
+unstructured_ingest-1.0.4.dist-info/METADATA,sha256=ZrV3WL4OOzjU53IKTL59o3dr5UIDRrSGewp-tFGfSF8,8719
+unstructured_ingest-1.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+unstructured_ingest-1.0.4.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
+unstructured_ingest-1.0.4.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
+unstructured_ingest-1.0.4.dist-info/RECORD,,

{unstructured_ingest-1.0.1.dist-info → unstructured_ingest-1.0.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{unstructured_ingest-1.0.1.dist-info → unstructured_ingest-1.0.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{unstructured_ingest-1.0.1.dist-info → unstructured_ingest-1.0.4.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

unstructured-ingest 1.0.1__py3-none-any.whl → 1.0.4__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 1.0.1py3-none-any.whl → 1.0.4py3-none-any.whl