PyPI - unstructured-ingest - Versions diffs - 0.7.1__py3-none-any.whl → 0.7.2__py3-none-any.whl - Mend

unstructured-ingest 0.7.1py3-none-any.whl → 0.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (12) hide show

unstructured_ingest/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.7.1" # pragma: no cover
1	+ __version__ = "0.7.2" # pragma: no cover

unstructured_ingest/logger.py CHANGED Viewed

@@ -1,99 +1,8 @@
-import ast
-import json
 import logging
-import typing as t
 logger = logging.getLogger("unstructured_ingest")
-def default_is_data_sensitive(k: str, v: t.Any) -> bool:
-    sensitive_fields = [
-        "account_name",
-        "client_id",
-    ]
-    sensitive_triggers = ["key", "cred", "token", "password", "oauth", "secret"]
-    return (
-        v
-        and any([s in k.lower() for s in sensitive_triggers])  # noqa: C419
-        or k.lower() in sensitive_fields
-    )
-def hide_sensitive_fields(
-    data: dict, is_sensitive_fn: t.Callable[[str, t.Any], bool] = default_is_data_sensitive
-) -> dict:
-    """
-    Will recursively look through every k, v pair in this dict and any nested ones and run
-    is_sensitive_fn to dynamically redact the value of the k, v pair. Will also check if
-    any string value can be parsed as valid json and process that dict as well and replace
-    the original string with the json.dumps() version of the redacted dict.
-    """
-    new_data = data.copy()
-    for k, v in new_data.items():
-        if is_sensitive_fn(k, v):
-            new_data[k] = "*******"
-        if isinstance(v, dict):
-            new_data[k] = hide_sensitive_fields(v)
-        if isinstance(v, str):
-            # Need to take into account strings generated via json.dumps() or simply printing a dict
-            try:
-                json_data = json.loads(v)
-                if isinstance(json_data, dict):
-                    updated_data = hide_sensitive_fields(json_data)
-                    new_data[k] = json.dumps(updated_data)
-            except json.JSONDecodeError:
-                pass
-    return new_data
-def redact_jsons(s: str) -> str:
-    """
-    Takes in a generic string and pulls out all valid json content. Leverages
-    hide_sensitive_fields() to redact any sensitive information and replaces the
-    original json with the new redacted format. There can be any number of valid
-    jsons in a generic string and this will work. Having extra '{' without a
-    closing '}' will cause this to break though. i.e '{ text, {"a": 3}'.
-    """
-    chars = list(s)
-    if "{" not in chars:
-        return s
-    i = 0
-    jsons = []
-    i = 0
-    while i < len(chars):
-        char = chars[i]
-        if char == "{":
-            stack = [char]
-            current = [char]
-            while len(stack) != 0 and i < len(chars):
-                i += 1
-                char = chars[i]
-                current.append(char)
-                if char == "{":
-                    stack.append(char)
-                if char == "}":
-                    stack.pop(-1)
-            jsons.append("".join(current))
-            continue
-        i += 1
-    for j in jsons:
-        try:
-            formatted_j = json.dumps(json.loads(j))
-        except json.JSONDecodeError:
-            formatted_j = json.dumps(ast.literal_eval(j))
-        hidden_j = json.dumps(hide_sensitive_fields(json.loads(formatted_j)))
-        s = s.replace(j, hidden_j)
-    return s
-class SensitiveFormatter(logging.Formatter):
-    def format(self, record):
-        s = super().format(record=record)
-        return redact_jsons(s)
 def remove_root_handlers(logger: logging.Logger) -> None:
     # NOTE(robinson): in some environments such as Google Colab, there is a root handler
     # that doesn't not mask secrets, meaning sensitive info such as api keys appear in logs.
@@ -106,7 +15,7 @@ def remove_root_handlers(logger: logging.Logger) -> None:
 def ingest_log_streaming_init(level: int) -> None:
     handler = logging.StreamHandler()
     handler.name = "ingest_log_handler"
-    formatter = SensitiveFormatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
+    formatter = logging.Formatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
     handler.setFormatter(formatter)
     # Only want to add the handler once
@@ -122,7 +31,7 @@ def make_default_logger(level: int) -> logging.Logger:
     logger = logging.getLogger("unstructured_ingest")
     handler = logging.StreamHandler()
     handler.name = "ingest_log_handler"
-    formatter = SensitiveFormatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
+    formatter = logging.Formatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
     handler.setFormatter(formatter)
     logger.addHandler(handler)
     logger.setLevel(level)

unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql ADDED Viewed

@@ -0,0 +1,10 @@
+CREATE TABLE elements (
+    id STRING NOT NULL PRIMARY KEY,
+    record_id STRING NOT NULL,
+    element_id STRING NOT NULL,
+    text STRING,
+    embeddings ARRAY<FLOAT>,
+    type STRING,
+    metadata VARIANT
+);

unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json ADDED Viewed

@@ -0,0 +1,23 @@
+{
+    "properties": [
+        {
+            "dataType": [
+                "text"
+            ],
+            "indexFilterable": true,
+            "indexSearchable": true,
+            "name": "record_id",
+            "tokenization": "word"
+        },
+        {
+            "dataType": [
+                "text"
+            ],
+            "indexFilterable": true,
+            "indexSearchable": true,
+            "name": "text",
+            "tokenization": "word"
+        }
+    ],
+    "vectorizer": "none"
+}

unstructured_ingest/utils/string_and_date_utils.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import json
 import re
-import typing as t
 from datetime import datetime
+from typing import Any, Union
 from dateutil import parser
 from unstructured_ingest.logger import logger
-def json_to_dict(json_string: str) -> t.Union[str, t.Dict[str, t.Any]]:
+def json_to_dict(json_string: str) -> Union[str, dict[str, Any]]:
     """Helper function attempts to deserialize json string to a dictionary."""
     try:
         return json.loads(json_string)
@@ -24,7 +24,7 @@ def json_to_dict(json_string: str) -> t.Union[str, t.Dict[str, t.Any]]:
     return json_string
-def ensure_isoformat_datetime(timestamp: t.Union[datetime, str]) -> str:
+def ensure_isoformat_datetime(timestamp: Union[datetime, str]) -> str:
     """
     Ensures that the input value is converted to an ISO format datetime string.
     Handles both datetime objects and strings.

{unstructured_ingest-0.7.1.dist-info → unstructured_ingest-0.7.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: unstructured-ingest
-Version: 0.7.1
+Version: 0.7.2
 Summary: A library that prepares raw documents for downstream ML tasks.
 Home-page: https://github.com/Unstructured-IO/unstructured-ingest
 Author: Unstructured Technologies
@@ -22,349 +22,349 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.9.0,<3.14
 Description-Content-Type: text/markdown
 License-File: LICENSE.md
+Requires-Dist: tqdm
 Requires-Dist: opentelemetry-sdk
-Requires-Dist: dataclasses_json
 Requires-Dist: click
-Requires-Dist: tqdm
-Requires-Dist: pydantic>=2.7
 Requires-Dist: python-dateutil
-Requires-Dist: pandas
+Requires-Dist: pydantic>=2.7
+Requires-Dist: dataclasses_json
 Requires-Dist: numpy
+Requires-Dist: pandas
 Provides-Extra: remote
 Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
-Requires-Dist: pandas; extra == "remote"
 Requires-Dist: numpy; extra == "remote"
+Requires-Dist: pandas; extra == "remote"
 Provides-Extra: csv
 Requires-Dist: unstructured[tsv]; extra == "csv"
-Requires-Dist: pandas; extra == "csv"
 Requires-Dist: numpy; extra == "csv"
+Requires-Dist: pandas; extra == "csv"
 Provides-Extra: doc
 Requires-Dist: unstructured[docx]; extra == "doc"
-Requires-Dist: pandas; extra == "doc"
 Requires-Dist: numpy; extra == "doc"
+Requires-Dist: pandas; extra == "doc"
 Provides-Extra: docx
 Requires-Dist: unstructured[docx]; extra == "docx"
-Requires-Dist: pandas; extra == "docx"
 Requires-Dist: numpy; extra == "docx"
+Requires-Dist: pandas; extra == "docx"
 Provides-Extra: epub
 Requires-Dist: unstructured[epub]; extra == "epub"
-Requires-Dist: pandas; extra == "epub"
 Requires-Dist: numpy; extra == "epub"
+Requires-Dist: pandas; extra == "epub"
 Provides-Extra: md
 Requires-Dist: unstructured[md]; extra == "md"
-Requires-Dist: pandas; extra == "md"
 Requires-Dist: numpy; extra == "md"
+Requires-Dist: pandas; extra == "md"
 Provides-Extra: msg
 Requires-Dist: unstructured[msg]; extra == "msg"
-Requires-Dist: pandas; extra == "msg"
 Requires-Dist: numpy; extra == "msg"
+Requires-Dist: pandas; extra == "msg"
 Provides-Extra: odt
 Requires-Dist: unstructured[odt]; extra == "odt"
-Requires-Dist: pandas; extra == "odt"
 Requires-Dist: numpy; extra == "odt"
+Requires-Dist: pandas; extra == "odt"
 Provides-Extra: org
 Requires-Dist: unstructured[org]; extra == "org"
-Requires-Dist: pandas; extra == "org"
 Requires-Dist: numpy; extra == "org"
+Requires-Dist: pandas; extra == "org"
 Provides-Extra: pdf
 Requires-Dist: unstructured[pdf]; extra == "pdf"
-Requires-Dist: pandas; extra == "pdf"
 Requires-Dist: numpy; extra == "pdf"
+Requires-Dist: pandas; extra == "pdf"
 Provides-Extra: ppt
 Requires-Dist: unstructured[pptx]; extra == "ppt"
-Requires-Dist: pandas; extra == "ppt"
 Requires-Dist: numpy; extra == "ppt"
+Requires-Dist: pandas; extra == "ppt"
 Provides-Extra: pptx
 Requires-Dist: unstructured[pptx]; extra == "pptx"
-Requires-Dist: pandas; extra == "pptx"
 Requires-Dist: numpy; extra == "pptx"
+Requires-Dist: pandas; extra == "pptx"
 Provides-Extra: rtf
 Requires-Dist: unstructured[rtf]; extra == "rtf"
-Requires-Dist: pandas; extra == "rtf"
 Requires-Dist: numpy; extra == "rtf"
+Requires-Dist: pandas; extra == "rtf"
 Provides-Extra: rst
 Requires-Dist: unstructured[rst]; extra == "rst"
-Requires-Dist: pandas; extra == "rst"
 Requires-Dist: numpy; extra == "rst"
+Requires-Dist: pandas; extra == "rst"
 Provides-Extra: tsv
 Requires-Dist: unstructured[tsv]; extra == "tsv"
-Requires-Dist: pandas; extra == "tsv"
 Requires-Dist: numpy; extra == "tsv"
+Requires-Dist: pandas; extra == "tsv"
 Provides-Extra: xlsx
 Requires-Dist: unstructured[xlsx]; extra == "xlsx"
-Requires-Dist: pandas; extra == "xlsx"
 Requires-Dist: numpy; extra == "xlsx"
+Requires-Dist: pandas; extra == "xlsx"
 Provides-Extra: airtable
 Requires-Dist: pyairtable; extra == "airtable"
-Requires-Dist: pandas; extra == "airtable"
 Requires-Dist: numpy; extra == "airtable"
+Requires-Dist: pandas; extra == "airtable"
 Provides-Extra: astradb
 Requires-Dist: astrapy; extra == "astradb"
-Requires-Dist: pandas; extra == "astradb"
 Requires-Dist: numpy; extra == "astradb"
+Requires-Dist: pandas; extra == "astradb"
 Provides-Extra: azure
-Requires-Dist: fsspec; extra == "azure"
 Requires-Dist: adlfs; extra == "azure"
-Requires-Dist: pandas; extra == "azure"
+Requires-Dist: fsspec; extra == "azure"
 Requires-Dist: numpy; extra == "azure"
+Requires-Dist: pandas; extra == "azure"
 Provides-Extra: azure-ai-search
 Requires-Dist: azure-search-documents; extra == "azure-ai-search"
-Requires-Dist: pandas; extra == "azure-ai-search"
 Requires-Dist: numpy; extra == "azure-ai-search"
+Requires-Dist: pandas; extra == "azure-ai-search"
 Provides-Extra: biomed
-Requires-Dist: bs4; extra == "biomed"
 Requires-Dist: requests; extra == "biomed"
-Requires-Dist: pandas; extra == "biomed"
+Requires-Dist: bs4; extra == "biomed"
 Requires-Dist: numpy; extra == "biomed"
+Requires-Dist: pandas; extra == "biomed"
 Provides-Extra: box
-Requires-Dist: fsspec; extra == "box"
 Requires-Dist: boxfs; extra == "box"
-Requires-Dist: pandas; extra == "box"
+Requires-Dist: fsspec; extra == "box"
 Requires-Dist: numpy; extra == "box"
+Requires-Dist: pandas; extra == "box"
 Provides-Extra: chroma
 Requires-Dist: chromadb; extra == "chroma"
-Requires-Dist: pandas; extra == "chroma"
 Requires-Dist: numpy; extra == "chroma"
+Requires-Dist: pandas; extra == "chroma"
 Provides-Extra: clarifai
 Requires-Dist: clarifai; extra == "clarifai"
-Requires-Dist: pandas; extra == "clarifai"
 Requires-Dist: numpy; extra == "clarifai"
+Requires-Dist: pandas; extra == "clarifai"
 Provides-Extra: confluence
-Requires-Dist: requests; extra == "confluence"
 Requires-Dist: atlassian-python-api; extra == "confluence"
-Requires-Dist: pandas; extra == "confluence"
+Requires-Dist: requests; extra == "confluence"
 Requires-Dist: numpy; extra == "confluence"
+Requires-Dist: pandas; extra == "confluence"
 Provides-Extra: couchbase
 Requires-Dist: couchbase; extra == "couchbase"
-Requires-Dist: pandas; extra == "couchbase"
 Requires-Dist: numpy; extra == "couchbase"
+Requires-Dist: pandas; extra == "couchbase"
 Provides-Extra: delta-table
 Requires-Dist: deltalake; extra == "delta-table"
 Requires-Dist: boto3; extra == "delta-table"
-Requires-Dist: pandas; extra == "delta-table"
 Requires-Dist: numpy; extra == "delta-table"
+Requires-Dist: pandas; extra == "delta-table"
 Provides-Extra: discord
 Requires-Dist: discord.py; extra == "discord"
-Requires-Dist: pandas; extra == "discord"
 Requires-Dist: numpy; extra == "discord"
+Requires-Dist: pandas; extra == "discord"
 Provides-Extra: dropbox
 Requires-Dist: dropboxdrivefs; extra == "dropbox"
 Requires-Dist: fsspec; extra == "dropbox"
-Requires-Dist: pandas; extra == "dropbox"
 Requires-Dist: numpy; extra == "dropbox"
+Requires-Dist: pandas; extra == "dropbox"
 Provides-Extra: duckdb
 Requires-Dist: duckdb; extra == "duckdb"
-Requires-Dist: pandas; extra == "duckdb"
 Requires-Dist: numpy; extra == "duckdb"
+Requires-Dist: pandas; extra == "duckdb"
 Provides-Extra: elasticsearch
 Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
-Requires-Dist: pandas; extra == "elasticsearch"
 Requires-Dist: numpy; extra == "elasticsearch"
+Requires-Dist: pandas; extra == "elasticsearch"
 Provides-Extra: gcs
+Requires-Dist: gcsfs; extra == "gcs"
 Requires-Dist: bs4; extra == "gcs"
 Requires-Dist: fsspec; extra == "gcs"
-Requires-Dist: gcsfs; extra == "gcs"
-Requires-Dist: pandas; extra == "gcs"
 Requires-Dist: numpy; extra == "gcs"
+Requires-Dist: pandas; extra == "gcs"
 Provides-Extra: github
 Requires-Dist: requests; extra == "github"
 Requires-Dist: pygithub>1.58.0; extra == "github"
-Requires-Dist: pandas; extra == "github"
 Requires-Dist: numpy; extra == "github"
+Requires-Dist: pandas; extra == "github"
 Provides-Extra: gitlab
 Requires-Dist: python-gitlab; extra == "gitlab"
-Requires-Dist: pandas; extra == "gitlab"
 Requires-Dist: numpy; extra == "gitlab"
+Requires-Dist: pandas; extra == "gitlab"
 Provides-Extra: google-drive
 Requires-Dist: google-api-python-client; extra == "google-drive"
-Requires-Dist: pandas; extra == "google-drive"
 Requires-Dist: numpy; extra == "google-drive"
+Requires-Dist: pandas; extra == "google-drive"
 Provides-Extra: hubspot
-Requires-Dist: hubspot-api-client; extra == "hubspot"
 Requires-Dist: urllib3; extra == "hubspot"
-Requires-Dist: pandas; extra == "hubspot"
+Requires-Dist: hubspot-api-client; extra == "hubspot"
 Requires-Dist: numpy; extra == "hubspot"
+Requires-Dist: pandas; extra == "hubspot"
 Provides-Extra: ibm-watsonx-s3
 Requires-Dist: tenacity; extra == "ibm-watsonx-s3"
 Requires-Dist: httpx; extra == "ibm-watsonx-s3"
-Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
 Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
-Requires-Dist: pandas; extra == "ibm-watsonx-s3"
+Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
 Requires-Dist: numpy; extra == "ibm-watsonx-s3"
+Requires-Dist: pandas; extra == "ibm-watsonx-s3"
 Provides-Extra: jira
 Requires-Dist: atlassian-python-api; extra == "jira"
-Requires-Dist: pandas; extra == "jira"
 Requires-Dist: numpy; extra == "jira"
+Requires-Dist: pandas; extra == "jira"
 Provides-Extra: kafka
 Requires-Dist: confluent-kafka; extra == "kafka"
-Requires-Dist: pandas; extra == "kafka"
 Requires-Dist: numpy; extra == "kafka"
+Requires-Dist: pandas; extra == "kafka"
 Provides-Extra: kdbai
 Requires-Dist: kdbai-client>=1.4.0; extra == "kdbai"
-Requires-Dist: pandas; extra == "kdbai"
 Requires-Dist: numpy; extra == "kdbai"
+Requires-Dist: pandas; extra == "kdbai"
 Provides-Extra: lancedb
 Requires-Dist: lancedb; extra == "lancedb"
-Requires-Dist: pandas; extra == "lancedb"
 Requires-Dist: numpy; extra == "lancedb"
+Requires-Dist: pandas; extra == "lancedb"
 Provides-Extra: milvus
 Requires-Dist: pymilvus; extra == "milvus"
-Requires-Dist: pandas; extra == "milvus"
 Requires-Dist: numpy; extra == "milvus"
+Requires-Dist: pandas; extra == "milvus"
 Provides-Extra: mongodb
 Requires-Dist: pymongo; extra == "mongodb"
-Requires-Dist: pandas; extra == "mongodb"
 Requires-Dist: numpy; extra == "mongodb"
+Requires-Dist: pandas; extra == "mongodb"
 Provides-Extra: neo4j
-Requires-Dist: cymple; extra == "neo4j"
 Requires-Dist: neo4j-rust-ext; extra == "neo4j"
 Requires-Dist: networkx; extra == "neo4j"
-Requires-Dist: pandas; extra == "neo4j"
+Requires-Dist: cymple; extra == "neo4j"
 Requires-Dist: numpy; extra == "neo4j"
+Requires-Dist: pandas; extra == "neo4j"
 Provides-Extra: notion
 Requires-Dist: notion-client; extra == "notion"
-Requires-Dist: backoff; extra == "notion"
 Requires-Dist: httpx; extra == "notion"
+Requires-Dist: backoff; extra == "notion"
 Requires-Dist: htmlBuilder; extra == "notion"
-Requires-Dist: pandas; extra == "notion"
 Requires-Dist: numpy; extra == "notion"
+Requires-Dist: pandas; extra == "notion"
 Provides-Extra: onedrive
-Requires-Dist: msal; extra == "onedrive"
-Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
 Requires-Dist: requests; extra == "onedrive"
-Requires-Dist: pandas; extra == "onedrive"
+Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
+Requires-Dist: msal; extra == "onedrive"
 Requires-Dist: numpy; extra == "onedrive"
+Requires-Dist: pandas; extra == "onedrive"
 Provides-Extra: opensearch
 Requires-Dist: opensearch-py; extra == "opensearch"
-Requires-Dist: pandas; extra == "opensearch"
 Requires-Dist: numpy; extra == "opensearch"
+Requires-Dist: pandas; extra == "opensearch"
 Provides-Extra: outlook
-Requires-Dist: msal; extra == "outlook"
 Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
-Requires-Dist: pandas; extra == "outlook"
+Requires-Dist: msal; extra == "outlook"
 Requires-Dist: numpy; extra == "outlook"
+Requires-Dist: pandas; extra == "outlook"
 Provides-Extra: pinecone
 Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
-Requires-Dist: pandas; extra == "pinecone"
 Requires-Dist: numpy; extra == "pinecone"
+Requires-Dist: pandas; extra == "pinecone"
 Provides-Extra: postgres
 Requires-Dist: psycopg2-binary; extra == "postgres"
-Requires-Dist: pandas; extra == "postgres"
 Requires-Dist: numpy; extra == "postgres"
+Requires-Dist: pandas; extra == "postgres"
 Provides-Extra: qdrant
 Requires-Dist: qdrant-client; extra == "qdrant"
-Requires-Dist: pandas; extra == "qdrant"
 Requires-Dist: numpy; extra == "qdrant"
+Requires-Dist: pandas; extra == "qdrant"
 Provides-Extra: reddit
 Requires-Dist: praw; extra == "reddit"
-Requires-Dist: pandas; extra == "reddit"
 Requires-Dist: numpy; extra == "reddit"
+Requires-Dist: pandas; extra == "reddit"
 Provides-Extra: redis
 Requires-Dist: redis; extra == "redis"
-Requires-Dist: pandas; extra == "redis"
 Requires-Dist: numpy; extra == "redis"
+Requires-Dist: pandas; extra == "redis"
 Provides-Extra: s3
-Requires-Dist: fsspec; extra == "s3"
 Requires-Dist: s3fs; extra == "s3"
-Requires-Dist: pandas; extra == "s3"
+Requires-Dist: fsspec; extra == "s3"
 Requires-Dist: numpy; extra == "s3"
+Requires-Dist: pandas; extra == "s3"
 Provides-Extra: sharepoint
-Requires-Dist: msal; extra == "sharepoint"
-Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
 Requires-Dist: requests; extra == "sharepoint"
-Requires-Dist: pandas; extra == "sharepoint"
+Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
+Requires-Dist: msal; extra == "sharepoint"
 Requires-Dist: numpy; extra == "sharepoint"
+Requires-Dist: pandas; extra == "sharepoint"
 Provides-Extra: salesforce
 Requires-Dist: simple-salesforce; extra == "salesforce"
-Requires-Dist: pandas; extra == "salesforce"
 Requires-Dist: numpy; extra == "salesforce"
+Requires-Dist: pandas; extra == "salesforce"
 Provides-Extra: sftp
 Requires-Dist: paramiko; extra == "sftp"
 Requires-Dist: fsspec; extra == "sftp"
-Requires-Dist: pandas; extra == "sftp"
 Requires-Dist: numpy; extra == "sftp"
+Requires-Dist: pandas; extra == "sftp"
 Provides-Extra: slack
 Requires-Dist: slack_sdk[optional]; extra == "slack"
-Requires-Dist: pandas; extra == "slack"
 Requires-Dist: numpy; extra == "slack"
+Requires-Dist: pandas; extra == "slack"
 Provides-Extra: snowflake
 Requires-Dist: snowflake-connector-python; extra == "snowflake"
 Requires-Dist: psycopg2-binary; extra == "snowflake"
-Requires-Dist: pandas; extra == "snowflake"
 Requires-Dist: numpy; extra == "snowflake"
+Requires-Dist: pandas; extra == "snowflake"
 Provides-Extra: wikipedia
 Requires-Dist: wikipedia; extra == "wikipedia"
-Requires-Dist: pandas; extra == "wikipedia"
 Requires-Dist: numpy; extra == "wikipedia"
+Requires-Dist: pandas; extra == "wikipedia"
 Provides-Extra: weaviate
 Requires-Dist: weaviate-client; extra == "weaviate"
-Requires-Dist: pandas; extra == "weaviate"
 Requires-Dist: numpy; extra == "weaviate"
+Requires-Dist: pandas; extra == "weaviate"
 Provides-Extra: databricks-volumes
 Requires-Dist: databricks-sdk; extra == "databricks-volumes"
-Requires-Dist: pandas; extra == "databricks-volumes"
 Requires-Dist: numpy; extra == "databricks-volumes"
+Requires-Dist: pandas; extra == "databricks-volumes"
 Provides-Extra: databricks-delta-tables
 Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
-Requires-Dist: pandas; extra == "databricks-delta-tables"
 Requires-Dist: numpy; extra == "databricks-delta-tables"
+Requires-Dist: pandas; extra == "databricks-delta-tables"
 Provides-Extra: singlestore
 Requires-Dist: singlestoredb; extra == "singlestore"
-Requires-Dist: pandas; extra == "singlestore"
 Requires-Dist: numpy; extra == "singlestore"
+Requires-Dist: pandas; extra == "singlestore"
 Provides-Extra: vectara
 Requires-Dist: requests; extra == "vectara"
 Requires-Dist: httpx; extra == "vectara"
 Requires-Dist: aiofiles; extra == "vectara"
-Requires-Dist: pandas; extra == "vectara"
 Requires-Dist: numpy; extra == "vectara"
+Requires-Dist: pandas; extra == "vectara"
 Provides-Extra: vastdb
 Requires-Dist: ibis; extra == "vastdb"
 Requires-Dist: vastdb; extra == "vastdb"
 Requires-Dist: pyarrow; extra == "vastdb"
-Requires-Dist: pandas; extra == "vastdb"
 Requires-Dist: numpy; extra == "vastdb"
+Requires-Dist: pandas; extra == "vastdb"
 Provides-Extra: zendesk
+Requires-Dist: aiofiles; extra == "zendesk"
 Requires-Dist: bs4; extra == "zendesk"
 Requires-Dist: httpx; extra == "zendesk"
-Requires-Dist: aiofiles; extra == "zendesk"
-Requires-Dist: pandas; extra == "zendesk"
 Requires-Dist: numpy; extra == "zendesk"
+Requires-Dist: pandas; extra == "zendesk"
 Provides-Extra: embed-huggingface
 Requires-Dist: sentence-transformers; extra == "embed-huggingface"
-Requires-Dist: pandas; extra == "embed-huggingface"
 Requires-Dist: numpy; extra == "embed-huggingface"
+Requires-Dist: pandas; extra == "embed-huggingface"
 Provides-Extra: embed-octoai
-Requires-Dist: tiktoken; extra == "embed-octoai"
 Requires-Dist: openai; extra == "embed-octoai"
-Requires-Dist: pandas; extra == "embed-octoai"
+Requires-Dist: tiktoken; extra == "embed-octoai"
 Requires-Dist: numpy; extra == "embed-octoai"
+Requires-Dist: pandas; extra == "embed-octoai"
 Provides-Extra: embed-vertexai
 Requires-Dist: vertexai; extra == "embed-vertexai"
-Requires-Dist: pandas; extra == "embed-vertexai"
 Requires-Dist: numpy; extra == "embed-vertexai"
+Requires-Dist: pandas; extra == "embed-vertexai"
 Provides-Extra: embed-voyageai
 Requires-Dist: voyageai; extra == "embed-voyageai"
-Requires-Dist: pandas; extra == "embed-voyageai"
 Requires-Dist: numpy; extra == "embed-voyageai"
+Requires-Dist: pandas; extra == "embed-voyageai"
 Provides-Extra: embed-mixedbreadai
 Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
-Requires-Dist: pandas; extra == "embed-mixedbreadai"
 Requires-Dist: numpy; extra == "embed-mixedbreadai"
+Requires-Dist: pandas; extra == "embed-mixedbreadai"
 Provides-Extra: openai
-Requires-Dist: tiktoken; extra == "openai"
 Requires-Dist: openai; extra == "openai"
-Requires-Dist: pandas; extra == "openai"
+Requires-Dist: tiktoken; extra == "openai"
 Requires-Dist: numpy; extra == "openai"
+Requires-Dist: pandas; extra == "openai"
 Provides-Extra: bedrock
-Requires-Dist: boto3; extra == "bedrock"
 Requires-Dist: aioboto3; extra == "bedrock"
-Requires-Dist: pandas; extra == "bedrock"
+Requires-Dist: boto3; extra == "bedrock"
 Requires-Dist: numpy; extra == "bedrock"
+Requires-Dist: pandas; extra == "bedrock"
 Provides-Extra: togetherai
 Requires-Dist: together; extra == "togetherai"
-Requires-Dist: pandas; extra == "togetherai"
 Requires-Dist: numpy; extra == "togetherai"
+Requires-Dist: pandas; extra == "togetherai"
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier

{unstructured_ingest-0.7.1.dist-info → unstructured_ingest-0.7.2.dist-info}/RECORD RENAMED Viewed

@@ -101,7 +101,6 @@ test/unit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/test_error.py,sha256=RflmngCdFNKOLXVfLnUdNfY3Mfg3k7DTEzfIl0B-syU,840
 test/unit/test_html.py,sha256=ubsck9pVOnPDFL0P8TZkko_46MIaFLlSNQcsgFDgYoE,4496
 test/unit/test_interfaces.py,sha256=Gv3WMJsw_3xPLy3nI3dIcJuLa2WvKYszSjI_W9XLtVM,787
-test/unit/test_logger.py,sha256=0SKndXE_VRd8XmUHkrj7zuBQHZscXx3ZQllMEOvtF9Y,2380
 test/unit/test_utils.py,sha256=xeSM02zOChSOO3dzDOVAEiQme1rQ8drjnJF93S3BFmk,7247
 test/unit/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/chunkers/test_chunkers.py,sha256=wRxbSj7P1FwRGDyVcARkm8CQSVCBCro3nTe54UoUBzc,1769
@@ -134,10 +133,10 @@ test/unit/partitioners/test_partitioner.py,sha256=eJoUDbiKtweyU1WYfsY5KqVqoPjbx1
 test/unit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 test/unit/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
 unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
-unstructured_ingest/__version__.py,sha256=YP5dlQlaTHZ-KOck8o_UzdjIFae7iENB5d3AMIKlZ3M,42
+unstructured_ingest/__version__.py,sha256=7O8GlC09PP-XuUDOj6bhRUtbOuUgpBT2COw4AjU1kk0,42
 unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
 unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
-unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
+unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
 unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
 unstructured_ingest/otel.py,sha256=NsUqOolA0gt69eFhZLABjVpcKoM9aus-AbxIKqWqPTc,4127
 unstructured_ingest/unstructured_api.py,sha256=hWUXUhGtyfi2OcDR-BriHJyT4jJywf4zfG1qpSCf9Bo,5002
@@ -222,6 +221,8 @@ unstructured_ingest/processes/connectors/slack.py,sha256=e4ntATdht_olAPsco1DKwlr
 unstructured_ingest/processes/connectors/utils.py,sha256=TAd0hb1f291N-q7-TUe6JKSCGkhqDyo7Ij8zmliBZUc,2071
 unstructured_ingest/processes/connectors/vectara.py,sha256=frKJkc7ffstQhXD9-HkAGoQAofGkl6AsnKJhGcl8LgA,12294
 unstructured_ingest/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=dUZZDNkyvQXKqoAThRz3ek7zaUE2l_LAQimlG5WZhH4,211
+unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
 unstructured_ingest/processes/connectors/databricks/__init__.py,sha256=RtKAPyNtXh6fzEsOQ08pA0-vC1uMr3KqYG6cqiBoo70,2133
 unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=OWQrne9-5hPzc-kxGa2P53M3DoksDzMDyjLhQyihdCo,8020
 unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=RP9rq2sfysygiqzXj6eX0CXeZpxk65xmrz7HZnWRQWA,2961
@@ -360,11 +361,11 @@ unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWh
 unstructured_ingest/utils/html.py,sha256=0WduP8tI5S3nHFQi6XHNPHgsIC9j3iWwyIayX9gDLiE,6386
 unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
 unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
-unstructured_ingest/utils/string_and_date_utils.py,sha256=QBj8HXZGvDZQSULLOQwJ8tb3r2aYrTBQ71rkiV6gZdI,2519
+unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
 unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
-unstructured_ingest-0.7.1.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
-unstructured_ingest-0.7.1.dist-info/METADATA,sha256=Ex_8EkItZzbGEoRJwR7Fqm_t0aajIZLVdtzwL7XBsQw,15050
-unstructured_ingest-0.7.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-unstructured_ingest-0.7.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
-unstructured_ingest-0.7.1.dist-info/top_level.txt,sha256=85vUyT6fV2A5eCEM3M3FPRUUI9vZOVK1xVZt7eo1oV8,34
-unstructured_ingest-0.7.1.dist-info/RECORD,,
+unstructured_ingest-0.7.2.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
+unstructured_ingest-0.7.2.dist-info/METADATA,sha256=BjJRt_WKMPbiOWOxGZPs3Q9ZmwHRkPfF0FbWT7X7lA4,15050
+unstructured_ingest-0.7.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+unstructured_ingest-0.7.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
+unstructured_ingest-0.7.2.dist-info/top_level.txt,sha256=85vUyT6fV2A5eCEM3M3FPRUUI9vZOVK1xVZt7eo1oV8,34
+unstructured_ingest-0.7.2.dist-info/RECORD,,

test/unit/test_logger.py DELETED Viewed

@@ -1,78 +0,0 @@
-import json
-import pytest
-from unstructured_ingest.logger import (
-    default_is_data_sensitive,
-    hide_sensitive_fields,
-    redact_jsons,
-)
-@pytest.mark.parametrize(
-    ("key", "value", "is_sensitive"),
-    [
-        ("username", "john_smith", False),
-        ("password", "13?H%", True),
-        ("token", "123", True),
-        ("AWS_CREDENTIAL", "aws_credential", True),
-        ("AWS_KEY", None, False),
-    ],
-)
-def test_default_is_sensitive(key, value, is_sensitive):
-    assert default_is_data_sensitive(key, value) == is_sensitive
-def test_hide_sensitive_fields():
-    d = {
-        "username": "john_smith",
-        "password": "13?H%",
-        "inner": {
-            "token": "123",
-            "AWS_KEY": None,
-            "inner_j_string": json.dumps(
-                {"account_name": "secret name", "client_id": 123, "timestamp": 123}
-            ),
-        },
-    }
-    redacted_d = hide_sensitive_fields(d)
-    expected_d = {
-        "password": "*******",
-        "username": "john_smith",
-        "inner": {
-            "token": "*******",
-            "AWS_KEY": None,
-            "inner_j_string": json.dumps(
-                {"account_name": "*******", "client_id": "*******", "timestamp": 123}
-            ),
-        },
-    }
-    assert redacted_d == expected_d
-def test_redact_jsons():
-    d1 = {
-        "username": "john_smith",
-        "password": "13?H%",
-        "inner": {
-            "token": "123",
-            "AWS_KEY": None,
-            "inner_j_string": json.dumps(
-                {"account_name": "secret name", "client_id": 123, "timestamp": 123}
-            ),
-        },
-    }
-    d2 = {"username": "tim67", "update_time": 456}
-    d3 = {"account_name": "top secret", "host": "http://localhost:8888"}
-    sensitive_string = f"Some topic secret info ({json.dumps(d1)} regarding {d2} and {d3})"
-    expected_string = (
-        'Some topic secret info ({"username": "john_smith", "password": "*******", '
-        '"inner": {"token": "*******", "AWS_KEY": null, "inner_j_string": '
-        '"{\\"account_name\\": \\"*******\\", \\"client_id\\": \\"*******\\", '
-        '\\"timestamp\\": 123}"}} regarding {"username": "tim67", "update_time": 456} '
-        'and {"account_name": "*******", "host": "http://localhost:8888"})'
-    )
-    redacted_string = redact_jsons(sensitive_string)
-    assert redacted_string == expected_string

{unstructured_ingest-0.7.1.dist-info → unstructured_ingest-0.7.2.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{unstructured_ingest-0.7.1.dist-info → unstructured_ingest-0.7.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{unstructured_ingest-0.7.1.dist-info → unstructured_ingest-0.7.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{unstructured_ingest-0.7.1.dist-info → unstructured_ingest-0.7.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

unstructured-ingest 0.7.1__py3-none-any.whl → 0.7.2__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.7.1py3-none-any.whl → 0.7.2py3-none-any.whl