unstructured-ingest 0.5.11__py3-none-any.whl → 0.5.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/test_chroma.py +18 -0
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/processes/connectors/neo4j.py +52 -21
- unstructured_ingest/v2/processes/connectors/utils.py +5 -2
- unstructured_ingest/v2/processes/connectors/zendesk/client.py +20 -2
- unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py +8 -2
- {unstructured_ingest-0.5.11.dist-info → unstructured_ingest-0.5.13.dist-info}/METADATA +23 -19
- {unstructured_ingest-0.5.11.dist-info → unstructured_ingest-0.5.13.dist-info}/RECORD +12 -12
- {unstructured_ingest-0.5.11.dist-info → unstructured_ingest-0.5.13.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.5.11.dist-info → unstructured_ingest-0.5.13.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.5.11.dist-info → unstructured_ingest-0.5.13.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.5.11.dist-info → unstructured_ingest-0.5.13.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,19 @@
|
|
|
1
|
+
# add this back in when figure out why it's failing since NOTHING changed when it started failing
|
|
2
|
+
|
|
3
|
+
# ==================================== ERRORS ====================================
|
|
4
|
+
# _________ ERROR collecting test/integration/connectors/test_chroma.py __________
|
|
5
|
+
# ImportError while importing test module '/home/runner/work/unstructured-ingest/
|
|
6
|
+
# unstructured-ingest/test/integration/connectors/test_chroma.py'.
|
|
7
|
+
# Hint: make sure your test modules/packages have valid Python names.
|
|
8
|
+
# Traceback:
|
|
9
|
+
# /opt/hostedtoolcache/Python/3.10.16/x64/lib/python3.10/importlib/__init__.py:126: in import_module
|
|
10
|
+
# return _bootstrap._gcd_import(name[level:], package, level)
|
|
11
|
+
# test/integration/connectors/test_chroma.py:4: in <module>
|
|
12
|
+
# import chromadb
|
|
13
|
+
# E ModuleNotFoundError: No module named 'chromadb'
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
"""
|
|
1
17
|
import json
|
|
2
18
|
from pathlib import Path
|
|
3
19
|
|
|
@@ -116,3 +132,5 @@ def test_chroma_stager(
|
|
|
116
132
|
stager=stager,
|
|
117
133
|
tmp_dir=tmp_path,
|
|
118
134
|
)
|
|
135
|
+
|
|
136
|
+
"""
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.5.
|
|
1
|
+
__version__ = "0.5.13" # pragma: no cover
|
|
@@ -111,6 +111,28 @@ class Neo4jUploadStager(UploadStager):
|
|
|
111
111
|
|
|
112
112
|
return output_filepath
|
|
113
113
|
|
|
114
|
+
def _add_entities(self, element: dict, graph: "Graph", element_node: _Node) -> None:
|
|
115
|
+
entities = element.get("metadata", {}).get("entities", [])
|
|
116
|
+
if not entities:
|
|
117
|
+
return None
|
|
118
|
+
if not isinstance(entities, list):
|
|
119
|
+
return None
|
|
120
|
+
|
|
121
|
+
for entity in entities:
|
|
122
|
+
if not isinstance(entity, dict):
|
|
123
|
+
continue
|
|
124
|
+
if "entity" not in entity or "type" not in entity:
|
|
125
|
+
continue
|
|
126
|
+
entity_node = _Node(
|
|
127
|
+
labels=[Label.ENTITY], properties={"id": entity["entity"]}, id_=entity["entity"]
|
|
128
|
+
)
|
|
129
|
+
graph.add_edge(
|
|
130
|
+
entity_node,
|
|
131
|
+
_Node(labels=[Label.ENTITY], properties={"id": entity["type"]}, id_=entity["type"]),
|
|
132
|
+
relationship=Relationship.ENTITY_TYPE,
|
|
133
|
+
)
|
|
134
|
+
graph.add_edge(element_node, entity_node, relationship=Relationship.HAS_ENTITY)
|
|
135
|
+
|
|
114
136
|
def _create_lexical_graph(self, elements: list[dict], document_node: _Node) -> "Graph":
|
|
115
137
|
import networkx as nx
|
|
116
138
|
|
|
@@ -129,25 +151,23 @@ class Neo4jUploadStager(UploadStager):
|
|
|
129
151
|
previous_node = element_node
|
|
130
152
|
graph.add_edge(element_node, document_node, relationship=Relationship.PART_OF_DOCUMENT)
|
|
131
153
|
|
|
154
|
+
self._add_entities(element, graph, element_node)
|
|
155
|
+
|
|
132
156
|
if self._is_chunk(element):
|
|
133
|
-
|
|
134
|
-
self._create_element_node(origin_element)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
for origin_element_node in origin_element_nodes
|
|
148
|
-
],
|
|
149
|
-
relationship=Relationship.PART_OF_DOCUMENT,
|
|
150
|
-
)
|
|
157
|
+
for origin_element in format_and_truncate_orig_elements(element, include_text=True):
|
|
158
|
+
origin_element_node = self._create_element_node(origin_element)
|
|
159
|
+
|
|
160
|
+
graph.add_edge(
|
|
161
|
+
origin_element_node,
|
|
162
|
+
element_node,
|
|
163
|
+
relationship=Relationship.PART_OF_CHUNK,
|
|
164
|
+
)
|
|
165
|
+
graph.add_edge(
|
|
166
|
+
origin_element_node,
|
|
167
|
+
document_node,
|
|
168
|
+
relationship=Relationship.PART_OF_DOCUMENT,
|
|
169
|
+
)
|
|
170
|
+
self._add_entities(origin_element, graph, origin_element_node)
|
|
151
171
|
|
|
152
172
|
return graph
|
|
153
173
|
|
|
@@ -231,6 +251,7 @@ class Label(Enum):
|
|
|
231
251
|
UNSTRUCTURED_ELEMENT = "UnstructuredElement"
|
|
232
252
|
CHUNK = "Chunk"
|
|
233
253
|
DOCUMENT = "Document"
|
|
254
|
+
ENTITY = "Entity"
|
|
234
255
|
|
|
235
256
|
|
|
236
257
|
class Relationship(Enum):
|
|
@@ -238,6 +259,8 @@ class Relationship(Enum):
|
|
|
238
259
|
PART_OF_CHUNK = "PART_OF_CHUNK"
|
|
239
260
|
NEXT_CHUNK = "NEXT_CHUNK"
|
|
240
261
|
NEXT_ELEMENT = "NEXT_ELEMENT"
|
|
262
|
+
ENTITY_TYPE = "ENTITY_TYPE"
|
|
263
|
+
HAS_ENTITY = "HAS_ENTITY"
|
|
241
264
|
|
|
242
265
|
|
|
243
266
|
class Neo4jUploaderConfig(UploaderConfig):
|
|
@@ -304,13 +327,16 @@ class Neo4jUploader(Uploader):
|
|
|
304
327
|
async def _create_vector_index(
|
|
305
328
|
self, client: AsyncDriver, dimensions: int, similarity_function: SimilarityFunction
|
|
306
329
|
) -> None:
|
|
330
|
+
import neo4j.exceptions
|
|
331
|
+
|
|
307
332
|
label = Label.CHUNK
|
|
308
333
|
logger.info(
|
|
309
334
|
f"Creating index on nodes labeled '{label.value}' if it does not already exist."
|
|
310
335
|
)
|
|
311
336
|
index_name = f"{label.value.lower()}_vector"
|
|
312
|
-
|
|
313
|
-
|
|
337
|
+
try:
|
|
338
|
+
await client.execute_query(
|
|
339
|
+
f"""
|
|
314
340
|
CREATE VECTOR INDEX {index_name} IF NOT EXISTS
|
|
315
341
|
FOR (n:{label.value}) ON n.embedding
|
|
316
342
|
OPTIONS {{indexConfig: {{
|
|
@@ -318,7 +344,12 @@ class Neo4jUploader(Uploader):
|
|
|
318
344
|
`vector.dimensions`: {dimensions}}}
|
|
319
345
|
}}
|
|
320
346
|
"""
|
|
321
|
-
|
|
347
|
+
)
|
|
348
|
+
except neo4j.exceptions.ClientError as e:
|
|
349
|
+
if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
|
|
350
|
+
logger.info(f"Index on nodes labeled '{label.value}' already exists.")
|
|
351
|
+
else:
|
|
352
|
+
raise
|
|
322
353
|
|
|
323
354
|
async def _delete_old_data_if_exists(self, file_data: FileData, client: AsyncDriver) -> None:
|
|
324
355
|
logger.info(f"Deleting old data for the record '{file_data.identifier}' (if present).")
|
|
@@ -31,7 +31,9 @@ def conform_string_to_dict(value: Any) -> dict:
|
|
|
31
31
|
raise ValidationError(f"Input could not be mapped to a valid dict: {value}")
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
def format_and_truncate_orig_elements(
|
|
34
|
+
def format_and_truncate_orig_elements(
|
|
35
|
+
element: dict, include_text: bool = False
|
|
36
|
+
) -> list[dict[str, Any]]:
|
|
35
37
|
"""
|
|
36
38
|
This function is used to format and truncate the orig_elements field in the metadata.
|
|
37
39
|
This is used to remove the text field and other larger fields from the orig_elements
|
|
@@ -42,7 +44,8 @@ def format_and_truncate_orig_elements(element: dict) -> list[dict[str, Any]]:
|
|
|
42
44
|
orig_elements = []
|
|
43
45
|
if raw_orig_elements is not None:
|
|
44
46
|
for element in elements_from_base64_gzipped_json(raw_orig_elements):
|
|
45
|
-
|
|
47
|
+
if not include_text:
|
|
48
|
+
element.pop("text", None)
|
|
46
49
|
for prop in (
|
|
47
50
|
"image_base64",
|
|
48
51
|
"text_as_html",
|
|
@@ -2,8 +2,7 @@ import base64
|
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from typing import Dict, List
|
|
4
4
|
|
|
5
|
-
import
|
|
6
|
-
|
|
5
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
7
6
|
from unstructured_ingest.v2.errors import ProviderError, RateLimitError, UserAuthError, UserError
|
|
8
7
|
from unstructured_ingest.v2.logger import logger
|
|
9
8
|
|
|
@@ -42,7 +41,10 @@ class ZendeskArticle:
|
|
|
42
41
|
|
|
43
42
|
class ZendeskClient:
|
|
44
43
|
|
|
44
|
+
@requires_dependencies(["httpx"], extras="zendesk")
|
|
45
45
|
def __init__(self, token: str, subdomain: str, email: str):
|
|
46
|
+
import httpx
|
|
47
|
+
|
|
46
48
|
# should be okay to be blocking.
|
|
47
49
|
url_to_check = f"https://{subdomain}.zendesk.com/api/v2/groups.json"
|
|
48
50
|
auth = f"{email}/token", token
|
|
@@ -57,7 +59,10 @@ class ZendeskClient:
|
|
|
57
59
|
self._email = email
|
|
58
60
|
self._auth = auth
|
|
59
61
|
|
|
62
|
+
@requires_dependencies(["httpx"], extras="zendesk")
|
|
60
63
|
def wrap_error(self, e: Exception) -> Exception:
|
|
64
|
+
import httpx
|
|
65
|
+
|
|
61
66
|
if not isinstance(e, httpx.HTTPStatusError):
|
|
62
67
|
logger.error(f"unhandled exception from Zendesk client: {e}", exc_info=True)
|
|
63
68
|
return e
|
|
@@ -88,10 +93,12 @@ class ZendeskClient:
|
|
|
88
93
|
logger.error(f"unhandled http status error from Zendesk client: {e}", exc_info=True)
|
|
89
94
|
return e
|
|
90
95
|
|
|
96
|
+
@requires_dependencies(["httpx"], extras="zendesk")
|
|
91
97
|
async def get_articles_async(self) -> List[ZendeskArticle]:
|
|
92
98
|
"""
|
|
93
99
|
Retrieves article content from Zendesk asynchronously.
|
|
94
100
|
"""
|
|
101
|
+
import httpx
|
|
95
102
|
|
|
96
103
|
articles: List[ZendeskArticle] = []
|
|
97
104
|
|
|
@@ -117,7 +124,10 @@ class ZendeskClient:
|
|
|
117
124
|
]
|
|
118
125
|
return articles
|
|
119
126
|
|
|
127
|
+
@requires_dependencies(["httpx"], extras="zendesk")
|
|
120
128
|
async def get_comments_async(self, ticket_id: int) -> List["Comment"]:
|
|
129
|
+
import httpx
|
|
130
|
+
|
|
121
131
|
comments_url = f"https://{self._subdomain}.zendesk.com/api/v2/tickets/{ticket_id}/comments"
|
|
122
132
|
|
|
123
133
|
try:
|
|
@@ -138,7 +148,9 @@ class ZendeskClient:
|
|
|
138
148
|
for entry in response.json()["comments"]
|
|
139
149
|
]
|
|
140
150
|
|
|
151
|
+
@requires_dependencies(["httpx"], extras="zendesk")
|
|
141
152
|
def get_users(self) -> List[dict]:
|
|
153
|
+
import httpx
|
|
142
154
|
|
|
143
155
|
users: List[dict] = []
|
|
144
156
|
|
|
@@ -154,7 +166,10 @@ class ZendeskClient:
|
|
|
154
166
|
|
|
155
167
|
return users
|
|
156
168
|
|
|
169
|
+
@requires_dependencies(["httpx"], extras="zendesk")
|
|
157
170
|
async def get_tickets_async(self) -> List["ZendeskTicket"]:
|
|
171
|
+
import httpx
|
|
172
|
+
|
|
158
173
|
tickets: List["ZendeskTicket"] = []
|
|
159
174
|
tickets_url = f"https://{self._subdomain}.zendesk.com/api/v2/tickets"
|
|
160
175
|
|
|
@@ -179,10 +194,13 @@ class ZendeskClient:
|
|
|
179
194
|
|
|
180
195
|
return tickets
|
|
181
196
|
|
|
197
|
+
@requires_dependencies(["httpx"], extras="zendesk")
|
|
182
198
|
async def get_article_attachments_async(self, article_id: str):
|
|
183
199
|
"""
|
|
184
200
|
Handles article attachments such as images and stores them as UTF-8 encoded bytes.
|
|
185
201
|
"""
|
|
202
|
+
import httpx
|
|
203
|
+
|
|
186
204
|
article_attachment_url = (
|
|
187
205
|
f"https://{self._subdomain}.zendesk.com/api/v2/help_center/"
|
|
188
206
|
f"articles/{article_id}/attachments"
|
|
@@ -7,11 +7,10 @@ from pathlib import Path
|
|
|
7
7
|
from time import time
|
|
8
8
|
from typing import Any, AsyncGenerator, List, Literal
|
|
9
9
|
|
|
10
|
-
import aiofiles
|
|
11
|
-
import bs4
|
|
12
10
|
from pydantic import BaseModel, Field, Secret
|
|
13
11
|
|
|
14
12
|
from unstructured_ingest.utils.data_prep import batch_generator
|
|
13
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
15
14
|
from unstructured_ingest.utils.html import HtmlMixin
|
|
16
15
|
from unstructured_ingest.v2.errors import UserAuthError
|
|
17
16
|
from unstructured_ingest.v2.interfaces import (
|
|
@@ -290,6 +289,7 @@ class ZendeskDownloader(Downloader):
|
|
|
290
289
|
session=session,
|
|
291
290
|
)
|
|
292
291
|
|
|
292
|
+
@requires_dependencies(["bs4", "aiofiles"], extras="zendesk")
|
|
293
293
|
async def handle_articles_async(
|
|
294
294
|
self, client: ZendeskClient, batch_file_data: ZendeskBatchFileData
|
|
295
295
|
):
|
|
@@ -297,6 +297,9 @@ class ZendeskDownloader(Downloader):
|
|
|
297
297
|
Processes the article information, downloads the attachments for each article,
|
|
298
298
|
and updates the content accordingly.
|
|
299
299
|
"""
|
|
300
|
+
import aiofiles
|
|
301
|
+
import bs4
|
|
302
|
+
|
|
300
303
|
# Determine the download path
|
|
301
304
|
download_path = self.get_download_path(batch_file_data)
|
|
302
305
|
|
|
@@ -327,12 +330,15 @@ class ZendeskDownloader(Downloader):
|
|
|
327
330
|
file_data=batch_file_data, download_path=download_path
|
|
328
331
|
)
|
|
329
332
|
|
|
333
|
+
@requires_dependencies(["aiofiles"], extras="zendesk")
|
|
330
334
|
async def handle_tickets_async(
|
|
331
335
|
self, client: ZendeskClient, batch_file_data: ZendeskBatchFileData
|
|
332
336
|
) -> DownloadResponse:
|
|
333
337
|
"""
|
|
334
338
|
Processes a batch of tickets asynchronously, writing their details and comments to a file.
|
|
335
339
|
"""
|
|
340
|
+
import aiofiles
|
|
341
|
+
|
|
336
342
|
# Determine the download path
|
|
337
343
|
download_path = self.get_download_path(batch_file_data)
|
|
338
344
|
if download_path is None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.13
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,13 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.14
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist:
|
|
25
|
+
Requires-Dist: python-dateutil
|
|
26
|
+
Requires-Dist: dataclasses_json
|
|
27
27
|
Requires-Dist: pandas
|
|
28
28
|
Requires-Dist: pydantic>=2.7
|
|
29
|
+
Requires-Dist: click
|
|
29
30
|
Requires-Dist: opentelemetry-sdk
|
|
30
|
-
Requires-Dist:
|
|
31
|
-
Requires-Dist: dataclasses_json
|
|
31
|
+
Requires-Dist: tqdm
|
|
32
32
|
Provides-Extra: remote
|
|
33
33
|
Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
|
|
34
34
|
Provides-Extra: csv
|
|
@@ -66,16 +66,16 @@ Requires-Dist: pyairtable; extra == "airtable"
|
|
|
66
66
|
Provides-Extra: astradb
|
|
67
67
|
Requires-Dist: astrapy; extra == "astradb"
|
|
68
68
|
Provides-Extra: azure
|
|
69
|
-
Requires-Dist: fsspec; extra == "azure"
|
|
70
69
|
Requires-Dist: adlfs; extra == "azure"
|
|
70
|
+
Requires-Dist: fsspec; extra == "azure"
|
|
71
71
|
Provides-Extra: azure-ai-search
|
|
72
72
|
Requires-Dist: azure-search-documents; extra == "azure-ai-search"
|
|
73
73
|
Provides-Extra: biomed
|
|
74
|
-
Requires-Dist: bs4; extra == "biomed"
|
|
75
74
|
Requires-Dist: requests; extra == "biomed"
|
|
75
|
+
Requires-Dist: bs4; extra == "biomed"
|
|
76
76
|
Provides-Extra: box
|
|
77
|
-
Requires-Dist: fsspec; extra == "box"
|
|
78
77
|
Requires-Dist: boxfs; extra == "box"
|
|
78
|
+
Requires-Dist: fsspec; extra == "box"
|
|
79
79
|
Provides-Extra: chroma
|
|
80
80
|
Requires-Dist: chromadb; extra == "chroma"
|
|
81
81
|
Provides-Extra: clarifai
|
|
@@ -86,8 +86,8 @@ Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
|
86
86
|
Provides-Extra: couchbase
|
|
87
87
|
Requires-Dist: couchbase; extra == "couchbase"
|
|
88
88
|
Provides-Extra: delta-table
|
|
89
|
-
Requires-Dist: boto3; extra == "delta-table"
|
|
90
89
|
Requires-Dist: deltalake; extra == "delta-table"
|
|
90
|
+
Requires-Dist: boto3; extra == "delta-table"
|
|
91
91
|
Provides-Extra: discord
|
|
92
92
|
Requires-Dist: discord.py; extra == "discord"
|
|
93
93
|
Provides-Extra: dropbox
|
|
@@ -98,9 +98,9 @@ Requires-Dist: duckdb; extra == "duckdb"
|
|
|
98
98
|
Provides-Extra: elasticsearch
|
|
99
99
|
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
100
100
|
Provides-Extra: gcs
|
|
101
|
+
Requires-Dist: gcsfs; extra == "gcs"
|
|
101
102
|
Requires-Dist: bs4; extra == "gcs"
|
|
102
103
|
Requires-Dist: fsspec; extra == "gcs"
|
|
103
|
-
Requires-Dist: gcsfs; extra == "gcs"
|
|
104
104
|
Provides-Extra: github
|
|
105
105
|
Requires-Dist: requests; extra == "github"
|
|
106
106
|
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
@@ -124,18 +124,18 @@ Requires-Dist: pymilvus; extra == "milvus"
|
|
|
124
124
|
Provides-Extra: mongodb
|
|
125
125
|
Requires-Dist: pymongo; extra == "mongodb"
|
|
126
126
|
Provides-Extra: neo4j
|
|
127
|
-
Requires-Dist: networkx; extra == "neo4j"
|
|
128
|
-
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
|
|
129
127
|
Requires-Dist: cymple; extra == "neo4j"
|
|
128
|
+
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
|
|
129
|
+
Requires-Dist: networkx; extra == "neo4j"
|
|
130
130
|
Provides-Extra: notion
|
|
131
|
-
Requires-Dist: htmlBuilder; extra == "notion"
|
|
132
|
-
Requires-Dist: httpx; extra == "notion"
|
|
133
131
|
Requires-Dist: notion-client; extra == "notion"
|
|
132
|
+
Requires-Dist: htmlBuilder; extra == "notion"
|
|
134
133
|
Requires-Dist: backoff; extra == "notion"
|
|
134
|
+
Requires-Dist: httpx; extra == "notion"
|
|
135
135
|
Provides-Extra: onedrive
|
|
136
136
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
137
|
-
Requires-Dist: bs4; extra == "onedrive"
|
|
138
137
|
Requires-Dist: msal; extra == "onedrive"
|
|
138
|
+
Requires-Dist: bs4; extra == "onedrive"
|
|
139
139
|
Provides-Extra: opensearch
|
|
140
140
|
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
141
141
|
Provides-Extra: outlook
|
|
@@ -160,8 +160,8 @@ Requires-Dist: msal; extra == "sharepoint"
|
|
|
160
160
|
Provides-Extra: salesforce
|
|
161
161
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
162
162
|
Provides-Extra: sftp
|
|
163
|
-
Requires-Dist: fsspec; extra == "sftp"
|
|
164
163
|
Requires-Dist: paramiko; extra == "sftp"
|
|
164
|
+
Requires-Dist: fsspec; extra == "sftp"
|
|
165
165
|
Provides-Extra: slack
|
|
166
166
|
Requires-Dist: slack_sdk[optional]; extra == "slack"
|
|
167
167
|
Provides-Extra: snowflake
|
|
@@ -178,13 +178,17 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
|
|
|
178
178
|
Provides-Extra: singlestore
|
|
179
179
|
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
180
180
|
Provides-Extra: vectara
|
|
181
|
-
Requires-Dist: httpx; extra == "vectara"
|
|
182
181
|
Requires-Dist: requests; extra == "vectara"
|
|
183
182
|
Requires-Dist: aiofiles; extra == "vectara"
|
|
183
|
+
Requires-Dist: httpx; extra == "vectara"
|
|
184
184
|
Provides-Extra: vastdb
|
|
185
|
-
Requires-Dist: vastdb; extra == "vastdb"
|
|
186
185
|
Requires-Dist: ibis; extra == "vastdb"
|
|
186
|
+
Requires-Dist: vastdb; extra == "vastdb"
|
|
187
187
|
Requires-Dist: pyarrow; extra == "vastdb"
|
|
188
|
+
Provides-Extra: zendesk
|
|
189
|
+
Requires-Dist: bs4; extra == "zendesk"
|
|
190
|
+
Requires-Dist: aiofiles; extra == "zendesk"
|
|
191
|
+
Requires-Dist: httpx; extra == "zendesk"
|
|
188
192
|
Provides-Extra: embed-huggingface
|
|
189
193
|
Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
190
194
|
Provides-Extra: embed-octoai
|
|
@@ -200,8 +204,8 @@ Provides-Extra: openai
|
|
|
200
204
|
Requires-Dist: tiktoken; extra == "openai"
|
|
201
205
|
Requires-Dist: openai; extra == "openai"
|
|
202
206
|
Provides-Extra: bedrock
|
|
203
|
-
Requires-Dist: aioboto3; extra == "bedrock"
|
|
204
207
|
Requires-Dist: boto3; extra == "bedrock"
|
|
208
|
+
Requires-Dist: aioboto3; extra == "bedrock"
|
|
205
209
|
Provides-Extra: togetherai
|
|
206
210
|
Requires-Dist: together; extra == "togetherai"
|
|
207
211
|
Dynamic: author
|
|
@@ -7,7 +7,7 @@ test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMp
|
|
|
7
7
|
test/integration/connectors/conftest.py,sha256=vYs4WDlCuieAwwErkJxCk4a1lGvr3qpeiAm-YaDznSo,1018
|
|
8
8
|
test/integration/connectors/test_astradb.py,sha256=c9Lk0dvJVVdzHcokvsc4XMNJ4SIO1k2vGtT5py0cFVM,9753
|
|
9
9
|
test/integration/connectors/test_azure_ai_search.py,sha256=MxFwk84vI_HT4taQTGrNpJ8ewGPqHSGrx626j8hC_Pw,9695
|
|
10
|
-
test/integration/connectors/test_chroma.py,sha256=
|
|
10
|
+
test/integration/connectors/test_chroma.py,sha256=1uGHbZXkXKGb8wl3p7c9G-L1MViUe283Hw5u3dg8OgI,4532
|
|
11
11
|
test/integration/connectors/test_confluence.py,sha256=Ju0gRQbD2g9l9iRf2HDZKi7RyPnBGtFRWcGpsqhO3F8,3588
|
|
12
12
|
test/integration/connectors/test_delta_table.py,sha256=4qm2Arfc9Eb7SOZOnOlLF-vNpHy6Eqvr5Q45svfX1PY,6911
|
|
13
13
|
test/integration/connectors/test_dropbox.py,sha256=jzpZ6wawLa4sC1BVoHWZJ3cHjL4DWWUEX5ee7bXUOOM,4945
|
|
@@ -111,7 +111,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
111
111
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
112
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
113
113
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
114
|
-
unstructured_ingest/__version__.py,sha256=
|
|
114
|
+
unstructured_ingest/__version__.py,sha256=SoXJvWNbP_dJpjjbYHn-IaGnsULa9X8yicnEFO_W3yI,43
|
|
115
115
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
116
116
|
unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
|
|
117
117
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -442,7 +442,7 @@ unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=VRDAiou_7oWOIAgQTdOG
|
|
|
442
442
|
unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWoaQyp9zp0WVqAywMaHJ2kcAc,7153
|
|
443
443
|
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
|
|
444
444
|
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
|
|
445
|
-
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=
|
|
445
|
+
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=vxf6Xuh-OMS09Y-mIF0PIwrFauqRtoI7vjeLBXsFwTk,18744
|
|
446
446
|
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=EM9fq67RsiudZvZbi6nDXkS-i6W0xLvbkNvD0G-Ni5E,17779
|
|
447
447
|
unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
|
|
448
448
|
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=O9lC4mZ9V_exg9apiCJSWHsgkuYDSEOlI6CaUS5ZB7c,13961
|
|
@@ -450,7 +450,7 @@ unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bW
|
|
|
450
450
|
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
451
451
|
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=2T9Bm1H_ALwHhG_YP7vsuUUW-mUg61zcaae3aa9BnN4,4827
|
|
452
452
|
unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
|
|
453
|
-
unstructured_ingest/v2/processes/connectors/utils.py,sha256=
|
|
453
|
+
unstructured_ingest/v2/processes/connectors/utils.py,sha256=TAd0hb1f291N-q7-TUe6JKSCGkhqDyo7Ij8zmliBZUc,2071
|
|
454
454
|
unstructured_ingest/v2/processes/connectors/vectara.py,sha256=BlI_4nkpNR99aYxDd9eusm5LQsVB9EI0r-5Kc1D7pgQ,12255
|
|
455
455
|
unstructured_ingest/v2/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
456
456
|
unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
|
|
@@ -573,11 +573,11 @@ unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8Stu
|
|
|
573
573
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
574
574
|
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=UZ_s8dnVNx9BWFG2fPah4VbQbgEDF4nP78bQeU3jg08,12821
|
|
575
575
|
unstructured_ingest/v2/processes/connectors/zendesk/__init__.py,sha256=XMNocKJ3FHDfy36p_KHhH7ALi0-ji6NhGuQNCV2E4vY,699
|
|
576
|
-
unstructured_ingest/v2/processes/connectors/zendesk/client.py,sha256=
|
|
577
|
-
unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py,sha256=
|
|
578
|
-
unstructured_ingest-0.5.
|
|
579
|
-
unstructured_ingest-0.5.
|
|
580
|
-
unstructured_ingest-0.5.
|
|
581
|
-
unstructured_ingest-0.5.
|
|
582
|
-
unstructured_ingest-0.5.
|
|
583
|
-
unstructured_ingest-0.5.
|
|
576
|
+
unstructured_ingest/v2/processes/connectors/zendesk/client.py,sha256=MNyI6SUuxZHf_6zONoC6jR2f9BvyTYoMyGKDOhl4kgs,7897
|
|
577
|
+
unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py,sha256=vQHZa5YYiDPXXPRAPMnPXhh0QzXeiBVx_YIWskZBQIc,15465
|
|
578
|
+
unstructured_ingest-0.5.13.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
579
|
+
unstructured_ingest-0.5.13.dist-info/METADATA,sha256=K95xEzr8Tq75w90-ivlwvfFhRkNRTPnNmtIiRXDXhjs,8465
|
|
580
|
+
unstructured_ingest-0.5.13.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
581
|
+
unstructured_ingest-0.5.13.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
582
|
+
unstructured_ingest-0.5.13.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
583
|
+
unstructured_ingest-0.5.13.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.5.11.dist-info → unstructured_ingest-0.5.13.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|