unstructured-ingest 0.5.17__py3-none-any.whl → 0.5.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/test_redis.py +36 -12
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/utils/data_prep.py +10 -0
- unstructured_ingest/v2/processes/connectors/delta_table.py +2 -5
- unstructured_ingest/v2/processes/connectors/neo4j.py +3 -6
- unstructured_ingest/v2/processes/connectors/onedrive.py +6 -0
- unstructured_ingest/v2/processes/connectors/redisdb.py +7 -6
- {unstructured_ingest-0.5.17.dist-info → unstructured_ingest-0.5.19.dist-info}/METADATA +17 -17
- {unstructured_ingest-0.5.17.dist-info → unstructured_ingest-0.5.19.dist-info}/RECORD +13 -13
- {unstructured_ingest-0.5.17.dist-info → unstructured_ingest-0.5.19.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.5.17.dist-info → unstructured_ingest-0.5.19.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.5.17.dist-info → unstructured_ingest-0.5.19.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.5.17.dist-info → unstructured_ingest-0.5.19.dist-info}/top_level.txt +0 -0
|
@@ -23,20 +23,22 @@ from unstructured_ingest.v2.processes.connectors.redisdb import (
|
|
|
23
23
|
)
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
async def delete_record(client: Redis, element_id: str) -> None:
|
|
27
|
-
|
|
26
|
+
async def delete_record(client: Redis, element_id: str, key_prefix: str) -> None:
|
|
27
|
+
key_with_prefix = f"{key_prefix}{element_id}"
|
|
28
|
+
await client.delete(key_with_prefix)
|
|
28
29
|
|
|
29
30
|
|
|
30
|
-
async def validate_upload(client: Redis, first_element: dict):
|
|
31
|
+
async def validate_upload(client: Redis, first_element: dict, key_prefix: str) -> None:
|
|
31
32
|
element_id = first_element["element_id"]
|
|
33
|
+
key_with_prefix = f"{key_prefix}{element_id}"
|
|
32
34
|
expected_text = first_element["text"]
|
|
33
35
|
expected_embeddings = first_element["embeddings"]
|
|
34
36
|
async with client.pipeline(transaction=True) as pipe:
|
|
35
37
|
try:
|
|
36
|
-
response = await pipe.json().get(
|
|
38
|
+
response = await pipe.json().get(key_with_prefix, "$").execute()
|
|
37
39
|
response = response[0][0]
|
|
38
40
|
except redis_exceptions.ResponseError:
|
|
39
|
-
response = await pipe.get(
|
|
41
|
+
response = await pipe.get(key_with_prefix).execute()
|
|
40
42
|
response = json.loads(response[0])
|
|
41
43
|
|
|
42
44
|
embedding_similarity = np.linalg.norm(
|
|
@@ -53,6 +55,7 @@ async def redis_destination_test(
|
|
|
53
55
|
upload_file: Path,
|
|
54
56
|
tmp_path: Path,
|
|
55
57
|
connection_kwargs: dict,
|
|
58
|
+
uploader_config: dict,
|
|
56
59
|
uri: Optional[str] = None,
|
|
57
60
|
password: Optional[str] = None,
|
|
58
61
|
):
|
|
@@ -60,8 +63,9 @@ async def redis_destination_test(
|
|
|
60
63
|
connection_config=RedisConnectionConfig(
|
|
61
64
|
**connection_kwargs, access_config=RedisAccessConfig(uri=uri, password=password)
|
|
62
65
|
),
|
|
63
|
-
upload_config=RedisUploaderConfig(batch_size=10),
|
|
66
|
+
upload_config=RedisUploaderConfig(batch_size=10, **uploader_config),
|
|
64
67
|
)
|
|
68
|
+
key_prefix = uploader.upload_config.key_prefix
|
|
65
69
|
|
|
66
70
|
file_data = FileData(
|
|
67
71
|
source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
|
|
@@ -78,20 +82,32 @@ async def redis_destination_test(
|
|
|
78
82
|
|
|
79
83
|
if uri:
|
|
80
84
|
async with from_url(uri) as client:
|
|
81
|
-
await validate_upload(
|
|
85
|
+
await validate_upload(
|
|
86
|
+
client=client,
|
|
87
|
+
first_element=first_element,
|
|
88
|
+
key_prefix=key_prefix,
|
|
89
|
+
)
|
|
82
90
|
else:
|
|
83
91
|
async with Redis(**connection_kwargs, password=password) as client:
|
|
84
|
-
await validate_upload(
|
|
92
|
+
await validate_upload(
|
|
93
|
+
client=client,
|
|
94
|
+
first_element=first_element,
|
|
95
|
+
key_prefix=key_prefix,
|
|
96
|
+
)
|
|
85
97
|
except Exception as e:
|
|
86
98
|
raise e
|
|
87
99
|
finally:
|
|
88
100
|
if uri:
|
|
89
101
|
async with from_url(uri) as client:
|
|
90
|
-
tasks = [
|
|
102
|
+
tasks = [
|
|
103
|
+
delete_record(client, element["element_id"], key_prefix) for element in elements
|
|
104
|
+
]
|
|
91
105
|
await asyncio.gather(*tasks)
|
|
92
106
|
else:
|
|
93
107
|
async with Redis(**connection_kwargs, password=password) as client:
|
|
94
|
-
tasks = [
|
|
108
|
+
tasks = [
|
|
109
|
+
delete_record(client, element["element_id"], key_prefix) for element in elements
|
|
110
|
+
]
|
|
95
111
|
await asyncio.gather(*tasks)
|
|
96
112
|
|
|
97
113
|
|
|
@@ -105,8 +121,13 @@ async def test_redis_destination_azure_with_password(upload_file: Path, tmp_path
|
|
|
105
121
|
"db": 0,
|
|
106
122
|
"ssl": True,
|
|
107
123
|
}
|
|
124
|
+
uploader_config = {
|
|
125
|
+
"key_prefix": "test_ingest:",
|
|
126
|
+
}
|
|
108
127
|
redis_pw = os.environ["AZURE_REDIS_INGEST_TEST_PASSWORD"]
|
|
109
|
-
await redis_destination_test(
|
|
128
|
+
await redis_destination_test(
|
|
129
|
+
upload_file, tmp_path, connection_kwargs, uploader_config, password=redis_pw
|
|
130
|
+
)
|
|
110
131
|
|
|
111
132
|
|
|
112
133
|
@pytest.mark.asyncio
|
|
@@ -114,6 +135,9 @@ async def test_redis_destination_azure_with_password(upload_file: Path, tmp_path
|
|
|
114
135
|
@requires_env("AZURE_REDIS_INGEST_TEST_PASSWORD")
|
|
115
136
|
async def test_redis_destination_azure_with_uri(upload_file: Path, tmp_path: Path):
|
|
116
137
|
connection_kwargs = {}
|
|
138
|
+
uploader_config = {
|
|
139
|
+
"key_prefix": "test_ingest:",
|
|
140
|
+
}
|
|
117
141
|
redis_pw = os.environ["AZURE_REDIS_INGEST_TEST_PASSWORD"]
|
|
118
142
|
uri = f"rediss://:{redis_pw}@utic-dashboard-dev.redis.cache.windows.net:6380/0"
|
|
119
|
-
await redis_destination_test(upload_file, tmp_path, connection_kwargs, uri=uri)
|
|
143
|
+
await redis_destination_test(upload_file, tmp_path, connection_kwargs, uploader_config, uri=uri)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.5.
|
|
1
|
+
__version__ = "0.5.19" # pragma: no cover
|
|
@@ -192,6 +192,16 @@ def get_data(path: Union[Path, str]) -> list[dict]:
|
|
|
192
192
|
logger.warning(f"failed to read {path} as parquet: {e}")
|
|
193
193
|
|
|
194
194
|
|
|
195
|
+
def get_json_data(path: Path) -> list[dict]:
|
|
196
|
+
with path.open() as f:
|
|
197
|
+
if path.suffix == ".json":
|
|
198
|
+
return json.load(f)
|
|
199
|
+
elif path.suffix == ".ndjson":
|
|
200
|
+
return ndjson.load(f)
|
|
201
|
+
else:
|
|
202
|
+
raise ValueError(f"Unsupported file type: {path}")
|
|
203
|
+
|
|
204
|
+
|
|
195
205
|
def get_data_df(path: Path) -> pd.DataFrame:
|
|
196
206
|
with path.open() as f:
|
|
197
207
|
if path.suffix == ".json":
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import os
|
|
3
2
|
import traceback
|
|
4
3
|
from dataclasses import dataclass, field
|
|
@@ -11,7 +10,7 @@ import pandas as pd
|
|
|
11
10
|
from pydantic import Field, Secret
|
|
12
11
|
|
|
13
12
|
from unstructured_ingest.error import DestinationConnectionError
|
|
14
|
-
from unstructured_ingest.utils.data_prep import get_data_df
|
|
13
|
+
from unstructured_ingest.utils.data_prep import get_data_df, get_json_data
|
|
15
14
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
16
15
|
from unstructured_ingest.utils.table import convert_to_pandas_dataframe
|
|
17
16
|
from unstructured_ingest.v2.interfaces import (
|
|
@@ -86,9 +85,7 @@ class DeltaTableUploadStager(UploadStager):
|
|
|
86
85
|
output_filename: str,
|
|
87
86
|
**kwargs: Any,
|
|
88
87
|
) -> Path:
|
|
89
|
-
|
|
90
|
-
elements_contents = json.load(elements_file)
|
|
91
|
-
|
|
88
|
+
elements_contents = get_json_data(elements_filepath)
|
|
92
89
|
output_path = Path(output_dir) / Path(f"{output_filename}.parquet")
|
|
93
90
|
|
|
94
91
|
df = convert_to_pandas_dataframe(elements_dict=elements_contents)
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
import json
|
|
5
4
|
import uuid
|
|
6
5
|
from collections import defaultdict
|
|
7
6
|
from contextlib import asynccontextmanager
|
|
@@ -14,7 +13,7 @@ from pydantic import BaseModel, ConfigDict, Field, Secret, field_validator
|
|
|
14
13
|
|
|
15
14
|
from unstructured_ingest.error import DestinationConnectionError
|
|
16
15
|
from unstructured_ingest.logger import logger
|
|
17
|
-
from unstructured_ingest.utils.data_prep import batch_generator
|
|
16
|
+
from unstructured_ingest.utils.data_prep import batch_generator, get_json_data
|
|
18
17
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
19
18
|
from unstructured_ingest.v2.interfaces import (
|
|
20
19
|
AccessConfig,
|
|
@@ -97,8 +96,7 @@ class Neo4jUploadStager(UploadStager):
|
|
|
97
96
|
output_filename: str,
|
|
98
97
|
**kwargs: Any,
|
|
99
98
|
) -> Path:
|
|
100
|
-
|
|
101
|
-
elements = json.load(file)
|
|
99
|
+
elements = get_json_data(elements_filepath)
|
|
102
100
|
|
|
103
101
|
nx_graph = self._create_lexical_graph(
|
|
104
102
|
elements, self._create_document_node(file_data=file_data)
|
|
@@ -294,8 +292,7 @@ class Neo4jUploader(Uploader):
|
|
|
294
292
|
return True
|
|
295
293
|
|
|
296
294
|
async def run_async(self, path: Path, file_data: FileData, **kwargs) -> None: # type: ignore
|
|
297
|
-
|
|
298
|
-
staged_data = json.load(file)
|
|
295
|
+
staged_data = get_json_data(path)
|
|
299
296
|
|
|
300
297
|
graph_data = _GraphData.model_validate(staged_data)
|
|
301
298
|
async with self.connection_config.get_client() as client:
|
|
@@ -35,6 +35,10 @@ from unstructured_ingest.v2.processes.connector_registry import (
|
|
|
35
35
|
DestinationRegistryEntry,
|
|
36
36
|
SourceRegistryEntry,
|
|
37
37
|
)
|
|
38
|
+
from unstructured_ingest.v2.processes.utils.blob_storage import (
|
|
39
|
+
BlobStoreUploadStager,
|
|
40
|
+
BlobStoreUploadStagerConfig,
|
|
41
|
+
)
|
|
38
42
|
|
|
39
43
|
if TYPE_CHECKING:
|
|
40
44
|
from office365.graph_client import GraphClient
|
|
@@ -428,4 +432,6 @@ onedrive_destination_entry = DestinationRegistryEntry(
|
|
|
428
432
|
connection_config=OnedriveConnectionConfig,
|
|
429
433
|
uploader=OnedriveUploader,
|
|
430
434
|
uploader_config=OnedriveUploaderConfig,
|
|
435
|
+
upload_stager_config=BlobStoreUploadStagerConfig,
|
|
436
|
+
upload_stager=BlobStoreUploadStager,
|
|
431
437
|
)
|
|
@@ -110,6 +110,7 @@ class RedisConnectionConfig(ConnectionConfig):
|
|
|
110
110
|
|
|
111
111
|
class RedisUploaderConfig(UploaderConfig):
|
|
112
112
|
batch_size: int = Field(default=100, description="Number of records per batch")
|
|
113
|
+
key_prefix: str = Field(default="", description="Prefix for Redis keys")
|
|
113
114
|
|
|
114
115
|
|
|
115
116
|
@dataclass
|
|
@@ -145,11 +146,11 @@ class RedisUploader(Uploader):
|
|
|
145
146
|
async with self.connection_config.create_async_client() as async_client:
|
|
146
147
|
async with async_client.pipeline(transaction=True) as pipe:
|
|
147
148
|
for element in batch:
|
|
148
|
-
|
|
149
|
+
key_with_prefix = f"{self.upload_config.key_prefix}{element['element_id']}"
|
|
149
150
|
if redis_stack:
|
|
150
|
-
pipe.json().set(
|
|
151
|
+
pipe.json().set(key_with_prefix, "$", element)
|
|
151
152
|
else:
|
|
152
|
-
pipe.set(
|
|
153
|
+
pipe.set(key_with_prefix, json.dumps(element))
|
|
153
154
|
await pipe.execute()
|
|
154
155
|
|
|
155
156
|
@requires_dependencies(["redis"], extras="redis")
|
|
@@ -159,16 +160,16 @@ class RedisUploader(Uploader):
|
|
|
159
160
|
redis_stack = True
|
|
160
161
|
async with self.connection_config.create_async_client() as async_client:
|
|
161
162
|
async with async_client.pipeline(transaction=True) as pipe:
|
|
162
|
-
|
|
163
|
+
key_with_prefix = f"{self.upload_config.key_prefix}{element['element_id']}"
|
|
163
164
|
try:
|
|
164
165
|
# Redis with stack extension supports JSON type
|
|
165
|
-
await pipe.json().set(
|
|
166
|
+
await pipe.json().set(key_with_prefix, "$", element).execute()
|
|
166
167
|
except redis_exceptions.ResponseError as e:
|
|
167
168
|
message = str(e)
|
|
168
169
|
if "unknown command `JSON.SET`" in message:
|
|
169
170
|
# if this error occurs, Redis server doesn't support JSON type,
|
|
170
171
|
# so save as string type instead
|
|
171
|
-
await pipe.set(
|
|
172
|
+
await pipe.set(key_with_prefix, json.dumps(element)).execute()
|
|
172
173
|
redis_stack = False
|
|
173
174
|
else:
|
|
174
175
|
raise e
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.19
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -23,10 +23,10 @@ Requires-Python: >=3.9.0,<3.14
|
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
25
|
Requires-Dist: python-dateutil
|
|
26
|
+
Requires-Dist: dataclasses_json
|
|
26
27
|
Requires-Dist: opentelemetry-sdk
|
|
27
28
|
Requires-Dist: pandas
|
|
28
29
|
Requires-Dist: click
|
|
29
|
-
Requires-Dist: dataclasses_json
|
|
30
30
|
Requires-Dist: pydantic>=2.7
|
|
31
31
|
Requires-Dist: tqdm
|
|
32
32
|
Provides-Extra: remote
|
|
@@ -71,35 +71,35 @@ Requires-Dist: adlfs; extra == "azure"
|
|
|
71
71
|
Provides-Extra: azure-ai-search
|
|
72
72
|
Requires-Dist: azure-search-documents; extra == "azure-ai-search"
|
|
73
73
|
Provides-Extra: biomed
|
|
74
|
-
Requires-Dist: bs4; extra == "biomed"
|
|
75
74
|
Requires-Dist: requests; extra == "biomed"
|
|
75
|
+
Requires-Dist: bs4; extra == "biomed"
|
|
76
76
|
Provides-Extra: box
|
|
77
|
-
Requires-Dist: boxfs; extra == "box"
|
|
78
77
|
Requires-Dist: fsspec; extra == "box"
|
|
78
|
+
Requires-Dist: boxfs; extra == "box"
|
|
79
79
|
Provides-Extra: chroma
|
|
80
80
|
Requires-Dist: chromadb; extra == "chroma"
|
|
81
81
|
Provides-Extra: clarifai
|
|
82
82
|
Requires-Dist: clarifai; extra == "clarifai"
|
|
83
83
|
Provides-Extra: confluence
|
|
84
|
-
Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
85
84
|
Requires-Dist: requests; extra == "confluence"
|
|
85
|
+
Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
86
86
|
Provides-Extra: couchbase
|
|
87
87
|
Requires-Dist: couchbase; extra == "couchbase"
|
|
88
88
|
Provides-Extra: delta-table
|
|
89
|
-
Requires-Dist: boto3; extra == "delta-table"
|
|
90
89
|
Requires-Dist: deltalake; extra == "delta-table"
|
|
90
|
+
Requires-Dist: boto3; extra == "delta-table"
|
|
91
91
|
Provides-Extra: discord
|
|
92
92
|
Requires-Dist: discord.py; extra == "discord"
|
|
93
93
|
Provides-Extra: dropbox
|
|
94
|
-
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
95
94
|
Requires-Dist: fsspec; extra == "dropbox"
|
|
95
|
+
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
96
96
|
Provides-Extra: duckdb
|
|
97
97
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
98
98
|
Provides-Extra: elasticsearch
|
|
99
99
|
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
100
100
|
Provides-Extra: gcs
|
|
101
|
-
Requires-Dist: bs4; extra == "gcs"
|
|
102
101
|
Requires-Dist: gcsfs; extra == "gcs"
|
|
102
|
+
Requires-Dist: bs4; extra == "gcs"
|
|
103
103
|
Requires-Dist: fsspec; extra == "gcs"
|
|
104
104
|
Provides-Extra: github
|
|
105
105
|
Requires-Dist: requests; extra == "github"
|
|
@@ -128,10 +128,10 @@ Requires-Dist: networkx; extra == "neo4j"
|
|
|
128
128
|
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
|
|
129
129
|
Requires-Dist: cymple; extra == "neo4j"
|
|
130
130
|
Provides-Extra: notion
|
|
131
|
-
Requires-Dist: htmlBuilder; extra == "notion"
|
|
132
|
-
Requires-Dist: notion-client; extra == "notion"
|
|
133
|
-
Requires-Dist: httpx; extra == "notion"
|
|
134
131
|
Requires-Dist: backoff; extra == "notion"
|
|
132
|
+
Requires-Dist: httpx; extra == "notion"
|
|
133
|
+
Requires-Dist: notion-client; extra == "notion"
|
|
134
|
+
Requires-Dist: htmlBuilder; extra == "notion"
|
|
135
135
|
Provides-Extra: onedrive
|
|
136
136
|
Requires-Dist: bs4; extra == "onedrive"
|
|
137
137
|
Requires-Dist: msal; extra == "onedrive"
|
|
@@ -152,21 +152,21 @@ Requires-Dist: praw; extra == "reddit"
|
|
|
152
152
|
Provides-Extra: redis
|
|
153
153
|
Requires-Dist: redis; extra == "redis"
|
|
154
154
|
Provides-Extra: s3
|
|
155
|
-
Requires-Dist: fsspec; extra == "s3"
|
|
156
155
|
Requires-Dist: s3fs; extra == "s3"
|
|
156
|
+
Requires-Dist: fsspec; extra == "s3"
|
|
157
157
|
Provides-Extra: sharepoint
|
|
158
158
|
Requires-Dist: msal; extra == "sharepoint"
|
|
159
159
|
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
160
160
|
Provides-Extra: salesforce
|
|
161
161
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
162
162
|
Provides-Extra: sftp
|
|
163
|
-
Requires-Dist: paramiko; extra == "sftp"
|
|
164
163
|
Requires-Dist: fsspec; extra == "sftp"
|
|
164
|
+
Requires-Dist: paramiko; extra == "sftp"
|
|
165
165
|
Provides-Extra: slack
|
|
166
166
|
Requires-Dist: slack_sdk[optional]; extra == "slack"
|
|
167
167
|
Provides-Extra: snowflake
|
|
168
|
-
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
169
168
|
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
169
|
+
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
170
170
|
Provides-Extra: wikipedia
|
|
171
171
|
Requires-Dist: wikipedia; extra == "wikipedia"
|
|
172
172
|
Provides-Extra: weaviate
|
|
@@ -178,16 +178,16 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
|
|
|
178
178
|
Provides-Extra: singlestore
|
|
179
179
|
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
180
180
|
Provides-Extra: vectara
|
|
181
|
-
Requires-Dist: httpx; extra == "vectara"
|
|
182
181
|
Requires-Dist: requests; extra == "vectara"
|
|
182
|
+
Requires-Dist: httpx; extra == "vectara"
|
|
183
183
|
Requires-Dist: aiofiles; extra == "vectara"
|
|
184
184
|
Provides-Extra: vastdb
|
|
185
185
|
Requires-Dist: pyarrow; extra == "vastdb"
|
|
186
186
|
Requires-Dist: vastdb; extra == "vastdb"
|
|
187
187
|
Requires-Dist: ibis; extra == "vastdb"
|
|
188
188
|
Provides-Extra: zendesk
|
|
189
|
-
Requires-Dist: httpx; extra == "zendesk"
|
|
190
189
|
Requires-Dist: bs4; extra == "zendesk"
|
|
190
|
+
Requires-Dist: httpx; extra == "zendesk"
|
|
191
191
|
Requires-Dist: aiofiles; extra == "zendesk"
|
|
192
192
|
Provides-Extra: embed-huggingface
|
|
193
193
|
Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
@@ -204,8 +204,8 @@ Provides-Extra: openai
|
|
|
204
204
|
Requires-Dist: openai; extra == "openai"
|
|
205
205
|
Requires-Dist: tiktoken; extra == "openai"
|
|
206
206
|
Provides-Extra: bedrock
|
|
207
|
-
Requires-Dist: aioboto3; extra == "bedrock"
|
|
208
207
|
Requires-Dist: boto3; extra == "bedrock"
|
|
208
|
+
Requires-Dist: aioboto3; extra == "bedrock"
|
|
209
209
|
Provides-Extra: togetherai
|
|
210
210
|
Requires-Dist: together; extra == "togetherai"
|
|
211
211
|
Dynamic: author
|
|
@@ -21,7 +21,7 @@ test/integration/connectors/test_notion.py,sha256=ueXyVqYWzP4LuZYe6PauptkXNG6qko
|
|
|
21
21
|
test/integration/connectors/test_onedrive.py,sha256=iwiDK0kWCfQbIEPnWUzzAA5PiCsHcmFZSxEcIZy_6cc,5229
|
|
22
22
|
test/integration/connectors/test_pinecone.py,sha256=9FC0frer7gtDzk5A6OhGsV8S4ggYfa5ReEO9t7L3Am0,13649
|
|
23
23
|
test/integration/connectors/test_qdrant.py,sha256=Yme3ZZ5zIbaZ-yYLUqN2oy0hsrcAfvlleRLYWMSYeSE,8049
|
|
24
|
-
test/integration/connectors/test_redis.py,sha256=
|
|
24
|
+
test/integration/connectors/test_redis.py,sha256=YXWWw4m40ZmLrf3eJ85hhT7WSJnri_GY1ieixIicYlI,5102
|
|
25
25
|
test/integration/connectors/test_s3.py,sha256=E1dypeag_E3OIfpQWIz3jb7ctRHRD63UtyTrzyvJzpc,7473
|
|
26
26
|
test/integration/connectors/test_sharepoint.py,sha256=weGby5YD6se7R7KLEq96hxUZYPzwoqZqXXTPhtQWZsQ,7646
|
|
27
27
|
test/integration/connectors/test_vectara.py,sha256=4kKOOTGUjeZw2jKRcgVDI7ifbRPRZfjjVO4d_7H5C6I,8710
|
|
@@ -111,7 +111,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
111
111
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
112
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
113
113
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
114
|
-
unstructured_ingest/__version__.py,sha256=
|
|
114
|
+
unstructured_ingest/__version__.py,sha256=YeBJuoTNGo0rz_5lKoO5e3ooyBOI71QLt4fdSp1KO_c,43
|
|
115
115
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
116
116
|
unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
|
|
117
117
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -370,7 +370,7 @@ unstructured_ingest/runner/writers/fsspec/s3.py,sha256=kHJq2O3864QBd_tL2SKb0mdyw
|
|
|
370
370
|
unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
371
371
|
unstructured_ingest/utils/chunking.py,sha256=9b3sXMA6L8RW5xAkKQbwdtVudGLAcj_sgT6Grh5tyYM,1870
|
|
372
372
|
unstructured_ingest/utils/compression.py,sha256=NNiY-2S2Gf3at7zC1PYxMijaEza9vVSzRn5mdFf6mHo,4434
|
|
373
|
-
unstructured_ingest/utils/data_prep.py,sha256=
|
|
373
|
+
unstructured_ingest/utils/data_prep.py,sha256=MfID_7SPZHeZztlNTSXIzilaWvv1mdfCcLlhqpGLYNg,7557
|
|
374
374
|
unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
|
|
375
375
|
unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
|
|
376
376
|
unstructured_ingest/utils/html.py,sha256=DGRDMqGbwH8RiF94Qh6NiqVkbbjZfe1h26dIehC-X7M,6340
|
|
@@ -433,7 +433,7 @@ unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6
|
|
|
433
433
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
|
|
434
434
|
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=gSs4-AxL0gfeWdJfP7JfCrQSQNLoJRkvHquKK9RJvpQ,12043
|
|
435
435
|
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=i7vuNKsUkN93JRVmg4--MO0ZgbjvhIqt46oYqk9zFSQ,12250
|
|
436
|
-
unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=
|
|
436
|
+
unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=bfEGiepJLOS9TxK-bMkjTTjHLHUc0q7qUzIYdwkLDMs,7104
|
|
437
437
|
unstructured_ingest/v2/processes/connectors/discord.py,sha256=-e4-cBK4TnHkknK1qIb86AIVMy81lBgC288_iLpTzM8,5246
|
|
438
438
|
unstructured_ingest/v2/processes/connectors/gitlab.py,sha256=ufE65Z8q_tC4oppGg5BsGXwSaL7RbEXcaagJQYsylNo,9984
|
|
439
439
|
unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=QzcHNelUbnubsDtanFIgDCRzmYTuP-GjJ_g9y8fButE,19623
|
|
@@ -442,11 +442,11 @@ unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=VRDAiou_7oWOIAgQTdOG
|
|
|
442
442
|
unstructured_ingest/v2/processes/connectors/local.py,sha256=FWPRjjUsnQjyZMChuZGuMU04AB5X0sFEOcAXhx1r9sk,7381
|
|
443
443
|
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
|
|
444
444
|
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
|
|
445
|
-
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=
|
|
446
|
-
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=
|
|
445
|
+
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=I-eDLAlThHKKFQfkZpQL8CLFBDy5krWgTQANLgMTwTk,18679
|
|
446
|
+
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=5rg7t40gKxDHNcuJrJHmVzJ9uM7Ct4RBOvFsfwdGc5c,18002
|
|
447
447
|
unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
|
|
448
448
|
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=O9lC4mZ9V_exg9apiCJSWHsgkuYDSEOlI6CaUS5ZB7c,13961
|
|
449
|
-
unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=
|
|
449
|
+
unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=0h105_MpOO4-uydiyHgM4TvduSkAMAr931KFANcKW8Y,6936
|
|
450
450
|
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
451
451
|
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=2T9Bm1H_ALwHhG_YP7vsuUUW-mUg61zcaae3aa9BnN4,4827
|
|
452
452
|
unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
|
|
@@ -577,9 +577,9 @@ unstructured_ingest/v2/processes/connectors/zendesk/client.py,sha256=DDAYQB7catK
|
|
|
577
577
|
unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py,sha256=R8SXYkRhVUoWEHdGCt2CzcTxxuFundw_0GlGZ34YmbM,8987
|
|
578
578
|
unstructured_ingest/v2/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
579
579
|
unstructured_ingest/v2/processes/utils/blob_storage.py,sha256=EWvK4HRYubr9i1UyMhv5cU9u0UzVkCDC_BIm4Uxab7Y,964
|
|
580
|
-
unstructured_ingest-0.5.
|
|
581
|
-
unstructured_ingest-0.5.
|
|
582
|
-
unstructured_ingest-0.5.
|
|
583
|
-
unstructured_ingest-0.5.
|
|
584
|
-
unstructured_ingest-0.5.
|
|
585
|
-
unstructured_ingest-0.5.
|
|
580
|
+
unstructured_ingest-0.5.19.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
581
|
+
unstructured_ingest-0.5.19.dist-info/METADATA,sha256=6veKDuElp9klfZfEzZIFIwPCchckH6Mf04qCc0ogN7M,8465
|
|
582
|
+
unstructured_ingest-0.5.19.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
583
|
+
unstructured_ingest-0.5.19.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
584
|
+
unstructured_ingest-0.5.19.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
585
|
+
unstructured_ingest-0.5.19.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.5.17.dist-info → unstructured_ingest-0.5.19.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|