PyPI - unstructured-ingest - Versions diffs - 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl - Mend

unstructured-ingest 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (14) hide show

unstructured_ingest/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.7.0" # pragma: no cover
1	+ __version__ = "0.7.2" # pragma: no cover

unstructured_ingest/logger.py CHANGED Viewed

@@ -1,99 +1,8 @@
-import ast
-import json
 import logging
-import typing as t
 logger = logging.getLogger("unstructured_ingest")
-def default_is_data_sensitive(k: str, v: t.Any) -> bool:
-    sensitive_fields = [
-        "account_name",
-        "client_id",
-    ]
-    sensitive_triggers = ["key", "cred", "token", "password", "oauth", "secret"]
-    return (
-        v
-        and any([s in k.lower() for s in sensitive_triggers])  # noqa: C419
-        or k.lower() in sensitive_fields
-    )
-def hide_sensitive_fields(
-    data: dict, is_sensitive_fn: t.Callable[[str, t.Any], bool] = default_is_data_sensitive
-) -> dict:
-    """
-    Will recursively look through every k, v pair in this dict and any nested ones and run
-    is_sensitive_fn to dynamically redact the value of the k, v pair. Will also check if
-    any string value can be parsed as valid json and process that dict as well and replace
-    the original string with the json.dumps() version of the redacted dict.
-    """
-    new_data = data.copy()
-    for k, v in new_data.items():
-        if is_sensitive_fn(k, v):
-            new_data[k] = "*******"
-        if isinstance(v, dict):
-            new_data[k] = hide_sensitive_fields(v)
-        if isinstance(v, str):
-            # Need to take into account strings generated via json.dumps() or simply printing a dict
-            try:
-                json_data = json.loads(v)
-                if isinstance(json_data, dict):
-                    updated_data = hide_sensitive_fields(json_data)
-                    new_data[k] = json.dumps(updated_data)
-            except json.JSONDecodeError:
-                pass
-    return new_data
-def redact_jsons(s: str) -> str:
-    """
-    Takes in a generic string and pulls out all valid json content. Leverages
-    hide_sensitive_fields() to redact any sensitive information and replaces the
-    original json with the new redacted format. There can be any number of valid
-    jsons in a generic string and this will work. Having extra '{' without a
-    closing '}' will cause this to break though. i.e '{ text, {"a": 3}'.
-    """
-    chars = list(s)
-    if "{" not in chars:
-        return s
-    i = 0
-    jsons = []
-    i = 0
-    while i < len(chars):
-        char = chars[i]
-        if char == "{":
-            stack = [char]
-            current = [char]
-            while len(stack) != 0 and i < len(chars):
-                i += 1
-                char = chars[i]
-                current.append(char)
-                if char == "{":
-                    stack.append(char)
-                if char == "}":
-                    stack.pop(-1)
-            jsons.append("".join(current))
-            continue
-        i += 1
-    for j in jsons:
-        try:
-            formatted_j = json.dumps(json.loads(j))
-        except json.JSONDecodeError:
-            formatted_j = json.dumps(ast.literal_eval(j))
-        hidden_j = json.dumps(hide_sensitive_fields(json.loads(formatted_j)))
-        s = s.replace(j, hidden_j)
-    return s
-class SensitiveFormatter(logging.Formatter):
-    def format(self, record):
-        s = super().format(record=record)
-        return redact_jsons(s)
 def remove_root_handlers(logger: logging.Logger) -> None:
     # NOTE(robinson): in some environments such as Google Colab, there is a root handler
     # that doesn't not mask secrets, meaning sensitive info such as api keys appear in logs.
@@ -106,7 +15,7 @@ def remove_root_handlers(logger: logging.Logger) -> None:
 def ingest_log_streaming_init(level: int) -> None:
     handler = logging.StreamHandler()
     handler.name = "ingest_log_handler"
-    formatter = SensitiveFormatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
+    formatter = logging.Formatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
     handler.setFormatter(formatter)
     # Only want to add the handler once
@@ -122,7 +31,7 @@ def make_default_logger(level: int) -> logging.Logger:
     logger = logging.getLogger("unstructured_ingest")
     handler = logging.StreamHandler()
     handler.name = "ingest_log_handler"
-    formatter = SensitiveFormatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
+    formatter = logging.Formatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
     handler.setFormatter(formatter)
     logger.addHandler(handler)
     logger.setLevel(level)

unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql ADDED Viewed

@@ -0,0 +1,10 @@
+CREATE TABLE elements (
+    id STRING NOT NULL PRIMARY KEY,
+    record_id STRING NOT NULL,
+    element_id STRING NOT NULL,
+    text STRING,
+    embeddings ARRAY<FLOAT>,
+    type STRING,
+    metadata VARIANT
+);

unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json ADDED Viewed

@@ -0,0 +1,23 @@
+{
+    "properties": [
+        {
+            "dataType": [
+                "text"
+            ],
+            "indexFilterable": true,
+            "indexSearchable": true,
+            "name": "record_id",
+            "tokenization": "word"
+        },
+        {
+            "dataType": [
+                "text"
+            ],
+            "indexFilterable": true,
+            "indexSearchable": true,
+            "name": "text",
+            "tokenization": "word"
+        }
+    ],
+    "vectorizer": "none"
+}

unstructured_ingest/processes/connectors/onedrive.py CHANGED Viewed

@@ -53,11 +53,14 @@ MAX_BYTES_SIZE = 512_000_000
 class OnedriveAccessConfig(AccessConfig):
     client_cred: str = Field(description="Microsoft App client secret")
+    password: Optional[str] = Field(description="Service account password", default=None)
 class OnedriveConnectionConfig(ConnectionConfig):
     client_id: str = Field(description="Microsoft app client ID")
-    user_pname: str = Field(description="User principal name, usually is your Azure AD email.")
+    user_pname: str = Field(
+        description="User principal name or service account, usually your Azure AD email."
+    )
     tenant: str = Field(
         repr=False, description="ID or domain name associated with your Azure AD instance"
     )
@@ -74,25 +77,50 @@ class OnedriveConnectionConfig(ConnectionConfig):
         drive = client.users[self.user_pname].drive
         return drive
-    @requires_dependencies(["msal"], extras="onedrive")
+    @requires_dependencies(["msal", "requests"], extras="onedrive")
     def get_token(self):
         from msal import ConfidentialClientApplication
+        from requests import post
+        if self.access_config.get_secret_value().password:
+            url = f"https://login.microsoftonline.com/{self.tenant}/oauth2/v2.0/token"
+            headers = {"Content-Type": "application/x-www-form-urlencoded"}
+            data = {
+                "grant_type": "password",
+                "username": self.user_pname,
+                "password": self.access_config.get_secret_value().password,
+                "client_id": self.client_id,
+                "client_secret": self.access_config.get_secret_value().client_cred,
+                "scope": "https://graph.microsoft.com/.default",
+            }
+            response = post(url, headers=headers, data=data)
+            if response.status_code == 200:
+                return response.json()
+            else:
+                raise SourceConnectionError(
+                    f"Oauth2 authentication failed with {response.status_code}: {response.text}"
+                )
-        try:
-            app = ConfidentialClientApplication(
-                authority=f"{self.authority_url}/{self.tenant}",
-                client_id=self.client_id,
-                client_credential=self.access_config.get_secret_value().client_cred,
-            )
-            token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
-        except ValueError as exc:
-            logger.error("Couldn't set up credentials for OneDrive")
-            raise exc
-        if "error" in token:
-            raise SourceConnectionNetworkError(
-                "failed to fetch token, {}: {}".format(token["error"], token["error_description"])
-            )
-        return token
+        else:
+            try:
+                app = ConfidentialClientApplication(
+                    authority=f"{self.authority_url}/{self.tenant}",
+                    client_id=self.client_id,
+                    client_credential=self.access_config.get_secret_value().client_cred,
+                )
+                token = app.acquire_token_for_client(
+                    scopes=["https://graph.microsoft.com/.default"]
+                )
+            except ValueError as exc:
+                logger.error("Couldn't set up credentials.")
+                raise exc
+            if "error" in token:
+                raise SourceConnectionNetworkError(
+                    "failed to fetch token, {}: {}".format(
+                        token["error"], token["error_description"]
+                    )
+                )
+            return token
     @requires_dependencies(["office365"], extras="onedrive")
     def get_client(self) -> "GraphClient":

unstructured_ingest/processes/connectors/sharepoint.py CHANGED Viewed

@@ -100,7 +100,7 @@ class SharepointDownloader(OnedriveDownloader):
     connector_type: str = CONNECTOR_TYPE
     @SourceConnectionNetworkError.wrap
-    @requires_dependencies(["office365"], extras="onedrive")
+    @requires_dependencies(["office365"], extras="sharepoint")
     def _fetch_file(self, file_data: FileData) -> DriveItem:
         from office365.runtime.client_request_exception import ClientRequestException

unstructured_ingest/utils/string_and_date_utils.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import json
 import re
-import typing as t
 from datetime import datetime
+from typing import Any, Union
 from dateutil import parser
 from unstructured_ingest.logger import logger
-def json_to_dict(json_string: str) -> t.Union[str, t.Dict[str, t.Any]]:
+def json_to_dict(json_string: str) -> Union[str, dict[str, Any]]:
     """Helper function attempts to deserialize json string to a dictionary."""
     try:
         return json.loads(json_string)
@@ -24,7 +24,7 @@ def json_to_dict(json_string: str) -> t.Union[str, t.Dict[str, t.Any]]:
     return json_string
-def ensure_isoformat_datetime(timestamp: t.Union[datetime, str]) -> str:
+def ensure_isoformat_datetime(timestamp: Union[datetime, str]) -> str:
     """
     Ensures that the input value is converted to an ISO format datetime string.
     Handles both datetime objects and strings.

{unstructured_ingest-0.7.0.dist-info → unstructured_ingest-0.7.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: unstructured-ingest
-Version: 0.7.0
+Version: 0.7.2
 Summary: A library that prepares raw documents for downstream ML tasks.
 Home-page: https://github.com/Unstructured-IO/unstructured-ingest
 Author: Unstructured Technologies
@@ -22,11 +22,11 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0,<3.14
 Description-Content-Type: text/markdown
 License-File: LICENSE.md
+Requires-Dist: tqdm
+Requires-Dist: opentelemetry-sdk
 Requires-Dist: click
 Requires-Dist: python-dateutil
 Requires-Dist: pydantic>=2.7
-Requires-Dist: opentelemetry-sdk
-Requires-Dist: tqdm
 Requires-Dist: dataclasses_json
 Requires-Dist: numpy
 Requires-Dist: pandas
@@ -103,8 +103,8 @@ Requires-Dist: astrapy; extra == "astradb"
 Requires-Dist: numpy; extra == "astradb"
 Requires-Dist: pandas; extra == "astradb"
 Provides-Extra: azure
-Requires-Dist: fsspec; extra == "azure"
 Requires-Dist: adlfs; extra == "azure"
+Requires-Dist: fsspec; extra == "azure"
 Requires-Dist: numpy; extra == "azure"
 Requires-Dist: pandas; extra == "azure"
 Provides-Extra: azure-ai-search
@@ -112,8 +112,8 @@ Requires-Dist: azure-search-documents; extra == "azure-ai-search"
 Requires-Dist: numpy; extra == "azure-ai-search"
 Requires-Dist: pandas; extra == "azure-ai-search"
 Provides-Extra: biomed
-Requires-Dist: bs4; extra == "biomed"
 Requires-Dist: requests; extra == "biomed"
+Requires-Dist: bs4; extra == "biomed"
 Requires-Dist: numpy; extra == "biomed"
 Requires-Dist: pandas; extra == "biomed"
 Provides-Extra: box
@@ -139,8 +139,8 @@ Requires-Dist: couchbase; extra == "couchbase"
 Requires-Dist: numpy; extra == "couchbase"
 Requires-Dist: pandas; extra == "couchbase"
 Provides-Extra: delta-table
-Requires-Dist: boto3; extra == "delta-table"
 Requires-Dist: deltalake; extra == "delta-table"
+Requires-Dist: boto3; extra == "delta-table"
 Requires-Dist: numpy; extra == "delta-table"
 Requires-Dist: pandas; extra == "delta-table"
 Provides-Extra: discord
@@ -148,8 +148,8 @@ Requires-Dist: discord.py; extra == "discord"
 Requires-Dist: numpy; extra == "discord"
 Requires-Dist: pandas; extra == "discord"
 Provides-Extra: dropbox
-Requires-Dist: fsspec; extra == "dropbox"
 Requires-Dist: dropboxdrivefs; extra == "dropbox"
+Requires-Dist: fsspec; extra == "dropbox"
 Requires-Dist: numpy; extra == "dropbox"
 Requires-Dist: pandas; extra == "dropbox"
 Provides-Extra: duckdb
@@ -161,14 +161,14 @@ Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
 Requires-Dist: numpy; extra == "elasticsearch"
 Requires-Dist: pandas; extra == "elasticsearch"
 Provides-Extra: gcs
-Requires-Dist: fsspec; extra == "gcs"
-Requires-Dist: bs4; extra == "gcs"
 Requires-Dist: gcsfs; extra == "gcs"
+Requires-Dist: bs4; extra == "gcs"
+Requires-Dist: fsspec; extra == "gcs"
 Requires-Dist: numpy; extra == "gcs"
 Requires-Dist: pandas; extra == "gcs"
 Provides-Extra: github
-Requires-Dist: pygithub>1.58.0; extra == "github"
 Requires-Dist: requests; extra == "github"
+Requires-Dist: pygithub>1.58.0; extra == "github"
 Requires-Dist: numpy; extra == "github"
 Requires-Dist: pandas; extra == "github"
 Provides-Extra: gitlab
@@ -180,15 +180,15 @@ Requires-Dist: google-api-python-client; extra == "google-drive"
 Requires-Dist: numpy; extra == "google-drive"
 Requires-Dist: pandas; extra == "google-drive"
 Provides-Extra: hubspot
-Requires-Dist: hubspot-api-client; extra == "hubspot"
 Requires-Dist: urllib3; extra == "hubspot"
+Requires-Dist: hubspot-api-client; extra == "hubspot"
 Requires-Dist: numpy; extra == "hubspot"
 Requires-Dist: pandas; extra == "hubspot"
 Provides-Extra: ibm-watsonx-s3
-Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
-Requires-Dist: httpx; extra == "ibm-watsonx-s3"
 Requires-Dist: tenacity; extra == "ibm-watsonx-s3"
+Requires-Dist: httpx; extra == "ibm-watsonx-s3"
 Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
+Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
 Requires-Dist: numpy; extra == "ibm-watsonx-s3"
 Requires-Dist: pandas; extra == "ibm-watsonx-s3"
 Provides-Extra: jira
@@ -216,22 +216,22 @@ Requires-Dist: pymongo; extra == "mongodb"
 Requires-Dist: numpy; extra == "mongodb"
 Requires-Dist: pandas; extra == "mongodb"
 Provides-Extra: neo4j
+Requires-Dist: neo4j-rust-ext; extra == "neo4j"
 Requires-Dist: networkx; extra == "neo4j"
 Requires-Dist: cymple; extra == "neo4j"
-Requires-Dist: neo4j-rust-ext; extra == "neo4j"
 Requires-Dist: numpy; extra == "neo4j"
 Requires-Dist: pandas; extra == "neo4j"
 Provides-Extra: notion
-Requires-Dist: htmlBuilder; extra == "notion"
-Requires-Dist: httpx; extra == "notion"
 Requires-Dist: notion-client; extra == "notion"
+Requires-Dist: httpx; extra == "notion"
 Requires-Dist: backoff; extra == "notion"
+Requires-Dist: htmlBuilder; extra == "notion"
 Requires-Dist: numpy; extra == "notion"
 Requires-Dist: pandas; extra == "notion"
 Provides-Extra: onedrive
-Requires-Dist: bs4; extra == "onedrive"
-Requires-Dist: msal; extra == "onedrive"
+Requires-Dist: requests; extra == "onedrive"
 Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
+Requires-Dist: msal; extra == "onedrive"
 Requires-Dist: numpy; extra == "onedrive"
 Requires-Dist: pandas; extra == "onedrive"
 Provides-Extra: opensearch
@@ -239,8 +239,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
 Requires-Dist: numpy; extra == "opensearch"
 Requires-Dist: pandas; extra == "opensearch"
 Provides-Extra: outlook
-Requires-Dist: msal; extra == "outlook"
 Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
+Requires-Dist: msal; extra == "outlook"
 Requires-Dist: numpy; extra == "outlook"
 Requires-Dist: pandas; extra == "outlook"
 Provides-Extra: pinecone
@@ -264,13 +264,14 @@ Requires-Dist: redis; extra == "redis"
 Requires-Dist: numpy; extra == "redis"
 Requires-Dist: pandas; extra == "redis"
 Provides-Extra: s3
-Requires-Dist: fsspec; extra == "s3"
 Requires-Dist: s3fs; extra == "s3"
+Requires-Dist: fsspec; extra == "s3"
 Requires-Dist: numpy; extra == "s3"
 Requires-Dist: pandas; extra == "s3"
 Provides-Extra: sharepoint
-Requires-Dist: msal; extra == "sharepoint"
+Requires-Dist: requests; extra == "sharepoint"
 Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
+Requires-Dist: msal; extra == "sharepoint"
 Requires-Dist: numpy; extra == "sharepoint"
 Requires-Dist: pandas; extra == "sharepoint"
 Provides-Extra: salesforce
@@ -278,8 +279,8 @@ Requires-Dist: simple-salesforce; extra == "salesforce"
 Requires-Dist: numpy; extra == "salesforce"
 Requires-Dist: pandas; extra == "salesforce"
 Provides-Extra: sftp
-Requires-Dist: fsspec; extra == "sftp"
 Requires-Dist: paramiko; extra == "sftp"
+Requires-Dist: fsspec; extra == "sftp"
 Requires-Dist: numpy; extra == "sftp"
 Requires-Dist: pandas; extra == "sftp"
 Provides-Extra: slack
@@ -312,21 +313,21 @@ Requires-Dist: singlestoredb; extra == "singlestore"
 Requires-Dist: numpy; extra == "singlestore"
 Requires-Dist: pandas; extra == "singlestore"
 Provides-Extra: vectara
-Requires-Dist: httpx; extra == "vectara"
 Requires-Dist: requests; extra == "vectara"
+Requires-Dist: httpx; extra == "vectara"
 Requires-Dist: aiofiles; extra == "vectara"
 Requires-Dist: numpy; extra == "vectara"
 Requires-Dist: pandas; extra == "vectara"
 Provides-Extra: vastdb
 Requires-Dist: ibis; extra == "vastdb"
-Requires-Dist: pyarrow; extra == "vastdb"
 Requires-Dist: vastdb; extra == "vastdb"
+Requires-Dist: pyarrow; extra == "vastdb"
 Requires-Dist: numpy; extra == "vastdb"
 Requires-Dist: pandas; extra == "vastdb"
 Provides-Extra: zendesk
+Requires-Dist: aiofiles; extra == "zendesk"
 Requires-Dist: bs4; extra == "zendesk"
 Requires-Dist: httpx; extra == "zendesk"
-Requires-Dist: aiofiles; extra == "zendesk"
 Requires-Dist: numpy; extra == "zendesk"
 Requires-Dist: pandas; extra == "zendesk"
 Provides-Extra: embed-huggingface
@@ -334,8 +335,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
 Requires-Dist: numpy; extra == "embed-huggingface"
 Requires-Dist: pandas; extra == "embed-huggingface"
 Provides-Extra: embed-octoai
-Requires-Dist: tiktoken; extra == "embed-octoai"
 Requires-Dist: openai; extra == "embed-octoai"
+Requires-Dist: tiktoken; extra == "embed-octoai"
 Requires-Dist: numpy; extra == "embed-octoai"
 Requires-Dist: pandas; extra == "embed-octoai"
 Provides-Extra: embed-vertexai
@@ -351,13 +352,13 @@ Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
 Requires-Dist: numpy; extra == "embed-mixedbreadai"
 Requires-Dist: pandas; extra == "embed-mixedbreadai"
 Provides-Extra: openai
-Requires-Dist: tiktoken; extra == "openai"
 Requires-Dist: openai; extra == "openai"
+Requires-Dist: tiktoken; extra == "openai"
 Requires-Dist: numpy; extra == "openai"
 Requires-Dist: pandas; extra == "openai"
 Provides-Extra: bedrock
-Requires-Dist: boto3; extra == "bedrock"
 Requires-Dist: aioboto3; extra == "bedrock"
+Requires-Dist: boto3; extra == "bedrock"
 Requires-Dist: numpy; extra == "bedrock"
 Requires-Dist: pandas; extra == "bedrock"
 Provides-Extra: togetherai

{unstructured_ingest-0.7.0.dist-info → unstructured_ingest-0.7.2.dist-info}/RECORD RENAMED Viewed

@@ -101,7 +101,6 @@ test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
 test/unit/test_html.py,sha256=ubsck9pVOnPDFL0P8TZkko_46MIaFLlSNQcsgFDgYoE,4496
 test/unit/test_interfaces.py,sha256=Gv3WMJsw_3xPLy3nI3dIcJuLa2WvKYszSjI_W9XLtVM,787
-test/unit/test_logger.py,sha256=0SKndXE_VRd8XmUHkrj7zuBQHZscXx3ZQllMEOvtF9Y,2380
 test/unit/test_utils.py,sha256=xeSM02zOChSOO3dzDOVAEiQme1rQ8drjnJF93S3BFmk,7247
 test/unit/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/chunkers/test_chunkers.py,sha256=wRxbSj7P1FwRGDyVcARkm8CQSVCBCro3nTe54UoUBzc,1769
@@ -134,10 +133,10 @@ test/unit/partitioners/test_partitioner.py,sha256=eJoUDbiKtweyU1WYfsY5KqVqoPjbx1
 test/unit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
 unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
-unstructured_ingest/__version__.py,sha256=bNFs6PBmjE3W8Yh7mMRTUVysaeHwLGXC2y6p_RXYa08,42
+unstructured_ingest/__version__.py,sha256=7O8GlC09PP-XuUDOj6bhRUtbOuUgpBT2COw4AjU1kk0,42
 unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
 unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
-unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
+unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
 unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
 unstructured_ingest/otel.py,sha256=NsUqOolA0gt69eFhZLABjVpcKoM9aus-AbxIKqWqPTc,4127
 unstructured_ingest/unstructured_api.py,sha256=hWUXUhGtyfi2OcDR-BriHJyT4jJywf4zfG1qpSCf9Bo,5002
@@ -212,16 +211,18 @@ unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9
 unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
 unstructured_ingest/processes/connectors/mongodb.py,sha256=1g_5bfbS6lah3nsOXqLAanR3zNYJ47_Njw_uV-uj3_U,14324
 unstructured_ingest/processes/connectors/neo4j.py,sha256=eAM2XWSLA5caKJmbcd7ctn2TapreIJEXRoHoxT1OZwA,18718
-unstructured_ingest/processes/connectors/onedrive.py,sha256=4uin_BBgxNUrcjob4txQn2NTFEhWXuWY9uLeQJ4lRbs,18047
+unstructured_ingest/processes/connectors/onedrive.py,sha256=VBkKlbJgR7uKlKTnjNybAw6ZawLKflDPpy2uVvgWYWw,19296
 unstructured_ingest/processes/connectors/outlook.py,sha256=FfHV9OfajGbj5VQZccqHsSyYJ0f6a4CLGQJi1s9UJjo,9294
 unstructured_ingest/processes/connectors/pinecone.py,sha256=TG-1hVfOsKFepxPfy2MCwEVBEZF4msg8lfNQZBpo35Y,13980
 unstructured_ingest/processes/connectors/redisdb.py,sha256=5LX6KtuNCzqjHqnJPw0zdKLE0iLx7Dk5RN9e_KT-up4,6975
 unstructured_ingest/processes/connectors/salesforce.py,sha256=a2Erx5pXbxKIj--oJWTGk2TeOcdmipuxgleazbD62o4,11664
-unstructured_ingest/processes/connectors/sharepoint.py,sha256=E_oKMz3rdn2HumlDUnDyVVmQLSBLB_3gqI48O6oZfTc,4825
+unstructured_ingest/processes/connectors/sharepoint.py,sha256=PowaqMzWr-VCW1rnwcAeRhHyE55kJ9J9FCVlrmtzN0E,4827
 unstructured_ingest/processes/connectors/slack.py,sha256=e4ntATdht_olAPsco1DKwlrOkpKLyDznPO1NJmsr0A8,9243
 unstructured_ingest/processes/connectors/utils.py,sha256=TAd0hb1f291N-q7-TUe6JKSCGkhqDyo7Ij8zmliBZUc,2071
 unstructured_ingest/processes/connectors/vectara.py,sha256=frKJkc7ffstQhXD9-HkAGoQAofGkl6AsnKJhGcl8LgA,12294
 unstructured_ingest/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=dUZZDNkyvQXKqoAThRz3ek7zaUE2l_LAQimlG5WZhH4,211
+unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
 unstructured_ingest/processes/connectors/databricks/__init__.py,sha256=RtKAPyNtXh6fzEsOQ08pA0-vC1uMr3KqYG6cqiBoo70,2133
 unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=OWQrne9-5hPzc-kxGa2P53M3DoksDzMDyjLhQyihdCo,8020
 unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=RP9rq2sfysygiqzXj6eX0CXeZpxk65xmrz7HZnWRQWA,2961
@@ -360,11 +361,11 @@ unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWh
 unstructured_ingest/utils/html.py,sha256=0WduP8tI5S3nHFQi6XHNPHgsIC9j3iWwyIayX9gDLiE,6386
 unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
 unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
-unstructured_ingest/utils/string_and_date_utils.py,sha256=QBj8HXZGvDZQSULLOQwJ8tb3r2aYrTBQ71rkiV6gZdI,2519
+unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
 unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
-unstructured_ingest-0.7.0.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
-unstructured_ingest-0.7.0.dist-info/METADATA,sha256=Ovs5cLsehNodHgRKxapQvTYZatKSYKWkbeXwK_iSXQo,14998
-unstructured_ingest-0.7.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-unstructured_ingest-0.7.0.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
-unstructured_ingest-0.7.0.dist-info/top_level.txt,sha256=85vUyT6fV2A5eCEM3M3FPRUUI9vZOVK1xVZt7eo1oV8,34
-unstructured_ingest-0.7.0.dist-info/RECORD,,
+unstructured_ingest-0.7.2.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
+unstructured_ingest-0.7.2.dist-info/METADATA,sha256=BjJRt_WKMPbiOWOxGZPs3Q9ZmwHRkPfF0FbWT7X7lA4,15050
+unstructured_ingest-0.7.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+unstructured_ingest-0.7.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
+unstructured_ingest-0.7.2.dist-info/top_level.txt,sha256=85vUyT6fV2A5eCEM3M3FPRUUI9vZOVK1xVZt7eo1oV8,34
+unstructured_ingest-0.7.2.dist-info/RECORD,,

test/unit/test_logger.py DELETED Viewed

@@ -1,78 +0,0 @@
-import json
-import pytest
-from unstructured_ingest.logger import (
-    default_is_data_sensitive,
-    hide_sensitive_fields,
-    redact_jsons,
-)
-@pytest.mark.parametrize(
-    ("key", "value", "is_sensitive"),
-    [
-        ("username", "john_smith", False),
-        ("password", "13?H%", True),
-        ("token", "123", True),
-        ("AWS_CREDENTIAL", "aws_credential", True),
-        ("AWS_KEY", None, False),
-    ],
-)
-def test_default_is_sensitive(key, value, is_sensitive):
-    assert default_is_data_sensitive(key, value) == is_sensitive
-def test_hide_sensitive_fields():
-    d = {
-        "username": "john_smith",
-        "password": "13?H%",
-        "inner": {
-            "token": "123",
-            "AWS_KEY": None,
-            "inner_j_string": json.dumps(
-                {"account_name": "secret name", "client_id": 123, "timestamp": 123}
-            ),
-        },
-    }
-    redacted_d = hide_sensitive_fields(d)
-    expected_d = {
-        "password": "*******",
-        "username": "john_smith",
-        "inner": {
-            "token": "*******",
-            "AWS_KEY": None,
-            "inner_j_string": json.dumps(
-                {"account_name": "*******", "client_id": "*******", "timestamp": 123}
-            ),
-        },
-    }
-    assert redacted_d == expected_d
-def test_redact_jsons():
-    d1 = {
-        "username": "john_smith",
-        "password": "13?H%",
-        "inner": {
-            "token": "123",
-            "AWS_KEY": None,
-            "inner_j_string": json.dumps(
-                {"account_name": "secret name", "client_id": 123, "timestamp": 123}
-            ),
-        },
-    }
-    d2 = {"username": "tim67", "update_time": 456}
-    d3 = {"account_name": "top secret", "host": "http://localhost:8888"}
-    sensitive_string = f"Some topic secret info ({json.dumps(d1)} regarding {d2} and {d3})"
-    expected_string = (
-        'Some topic secret info ({"username": "john_smith", "password": "*******", '
-        '"inner": {"token": "*******", "AWS_KEY": null, "inner_j_string": '
-        '"{\\"account_name\\": \\"*******\\", \\"client_id\\": \\"*******\\", '
-        '\\"timestamp\\": 123}"}} regarding {"username": "tim67", "update_time": 456} '
-        'and {"account_name": "*******", "host": "http://localhost:8888"})'
-    )
-    redacted_string = redact_jsons(sensitive_string)
-    assert redacted_string == expected_string

{unstructured_ingest-0.7.0.dist-info → unstructured_ingest-0.7.2.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{unstructured_ingest-0.7.0.dist-info → unstructured_ingest-0.7.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{unstructured_ingest-0.7.0.dist-info → unstructured_ingest-0.7.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{unstructured_ingest-0.7.0.dist-info → unstructured_ingest-0.7.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

unstructured-ingest 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.7.0py3-none-any.whl → 0.7.2py3-none-any.whl