unstructured-ingest 0.5.10__py3-none-any.whl → 0.5.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/test_astradb.py +21 -0
- test/integration/connectors/test_zendesk.py +142 -0
- test/integration/connectors/utils/validation/destination.py +2 -1
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/interfaces/downloader.py +2 -3
- unstructured_ingest/v2/processes/connectors/astradb.py +36 -28
- unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +31 -0
- unstructured_ingest/v2/processes/connectors/zendesk/client.py +225 -0
- unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py +419 -0
- {unstructured_ingest-0.5.10.dist-info → unstructured_ingest-0.5.11.dist-info}/METADATA +19 -19
- {unstructured_ingest-0.5.10.dist-info → unstructured_ingest-0.5.11.dist-info}/RECORD +15 -11
- {unstructured_ingest-0.5.10.dist-info → unstructured_ingest-0.5.11.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.5.10.dist-info → unstructured_ingest-0.5.11.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.5.10.dist-info → unstructured_ingest-0.5.11.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.5.10.dist-info → unstructured_ingest-0.5.11.dist-info}/top_level.txt +0 -0
|
@@ -31,6 +31,7 @@ from unstructured_ingest.v2.processes.connectors.astradb import (
|
|
|
31
31
|
AstraDBUploader,
|
|
32
32
|
AstraDBUploaderConfig,
|
|
33
33
|
AstraDBUploadStager,
|
|
34
|
+
AstraDBUploadStagerConfig,
|
|
34
35
|
DestinationConnectionError,
|
|
35
36
|
SourceConnectionError,
|
|
36
37
|
)
|
|
@@ -258,3 +259,23 @@ def test_astra_stager(
|
|
|
258
259
|
stager=stager,
|
|
259
260
|
tmp_dir=tmp_path,
|
|
260
261
|
)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
265
|
+
@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
|
|
266
|
+
def test_astra_stager_flatten_metadata(
|
|
267
|
+
request: TopRequest,
|
|
268
|
+
upload_file_str: str,
|
|
269
|
+
tmp_path: Path,
|
|
270
|
+
):
|
|
271
|
+
stager_config = AstraDBUploadStagerConfig(flatten_metadata=True)
|
|
272
|
+
upload_file: Path = request.getfixturevalue(upload_file_str)
|
|
273
|
+
stager = AstraDBUploadStager(upload_stager_config=stager_config)
|
|
274
|
+
stager_validation(
|
|
275
|
+
configs=StagerValidationConfigs(
|
|
276
|
+
test_id=CONNECTOR_TYPE, expected_count=22, expected_folder="stager_flatten_metadata"
|
|
277
|
+
),
|
|
278
|
+
input_file=upload_file,
|
|
279
|
+
stager=stager,
|
|
280
|
+
tmp_dir=tmp_path,
|
|
281
|
+
)
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from test.integration.connectors.utils.constants import SOURCE_TAG, UNCATEGORIZED_TAG
|
|
8
|
+
from test.integration.connectors.utils.validation.source import (
|
|
9
|
+
SourceValidationConfigs,
|
|
10
|
+
source_connector_validation,
|
|
11
|
+
)
|
|
12
|
+
from test.integration.utils import requires_env
|
|
13
|
+
from unstructured_ingest.v2.errors import UserAuthError
|
|
14
|
+
from unstructured_ingest.v2.processes.connectors.zendesk import (
|
|
15
|
+
CONNECTOR_TYPE,
|
|
16
|
+
ZendeskAccessConfig,
|
|
17
|
+
ZendeskConnectionConfig,
|
|
18
|
+
ZendeskDownloader,
|
|
19
|
+
ZendeskDownloaderConfig,
|
|
20
|
+
ZendeskIndexer,
|
|
21
|
+
ZendeskIndexerConfig,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def zendesk_source_test(
|
|
26
|
+
tmp_path: Path,
|
|
27
|
+
token: Optional[str] = None,
|
|
28
|
+
email: Optional[str] = None,
|
|
29
|
+
subdomain: Optional[str] = None,
|
|
30
|
+
):
|
|
31
|
+
|
|
32
|
+
access_config = ZendeskAccessConfig(api_token=token)
|
|
33
|
+
connection_config = ZendeskConnectionConfig(
|
|
34
|
+
subdomain=subdomain, email=email, access_config=access_config
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
index_config = ZendeskIndexerConfig(batch_size=2, item_type="tickets")
|
|
38
|
+
|
|
39
|
+
indexer = ZendeskIndexer(
|
|
40
|
+
connection_config=connection_config,
|
|
41
|
+
index_config=index_config,
|
|
42
|
+
connector_type=CONNECTOR_TYPE,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# handle downloader.
|
|
46
|
+
download_config = ZendeskDownloaderConfig(download_dir=tmp_path)
|
|
47
|
+
|
|
48
|
+
downloader = ZendeskDownloader(
|
|
49
|
+
connection_config=connection_config,
|
|
50
|
+
download_config=download_config,
|
|
51
|
+
connector_type=CONNECTOR_TYPE,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Run the source connector validation
|
|
55
|
+
await source_connector_validation(
|
|
56
|
+
indexer=indexer,
|
|
57
|
+
downloader=downloader,
|
|
58
|
+
configs=SourceValidationConfigs(
|
|
59
|
+
test_id="zendesk-tickets",
|
|
60
|
+
expected_num_files=4,
|
|
61
|
+
validate_file_data=False,
|
|
62
|
+
validate_downloaded_files=True,
|
|
63
|
+
),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
async def zendesk_source_articles_test(
|
|
68
|
+
tmp_path: Path,
|
|
69
|
+
token: Optional[str] = None,
|
|
70
|
+
email: Optional[str] = None,
|
|
71
|
+
subdomain: Optional[str] = None,
|
|
72
|
+
):
|
|
73
|
+
|
|
74
|
+
access_config = ZendeskAccessConfig(api_token=token)
|
|
75
|
+
connection_config = ZendeskConnectionConfig(
|
|
76
|
+
subdomain=subdomain, email=email, access_config=access_config
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
index_config = ZendeskIndexerConfig(batch_size=2, item_type="articles")
|
|
80
|
+
|
|
81
|
+
indexer = ZendeskIndexer(
|
|
82
|
+
connection_config=connection_config,
|
|
83
|
+
index_config=index_config,
|
|
84
|
+
connector_type=CONNECTOR_TYPE,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# handle downloader.
|
|
88
|
+
download_config = ZendeskDownloaderConfig(download_dir=tmp_path, extract_images=True)
|
|
89
|
+
|
|
90
|
+
downloader = ZendeskDownloader(
|
|
91
|
+
connection_config=connection_config,
|
|
92
|
+
download_config=download_config,
|
|
93
|
+
connector_type=CONNECTOR_TYPE,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Run the source connector validation
|
|
97
|
+
await source_connector_validation(
|
|
98
|
+
indexer=indexer,
|
|
99
|
+
downloader=downloader,
|
|
100
|
+
configs=SourceValidationConfigs(
|
|
101
|
+
test_id="zendesk-articles",
|
|
102
|
+
expected_num_files=4,
|
|
103
|
+
validate_file_data=False,
|
|
104
|
+
validate_downloaded_files=True,
|
|
105
|
+
),
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@pytest.mark.asyncio
|
|
110
|
+
@pytest.mark.tags(SOURCE_TAG, CONNECTOR_TYPE, UNCATEGORIZED_TAG)
|
|
111
|
+
@requires_env("ZENDESK_TOKEN")
|
|
112
|
+
async def test_zendesk_source(temp_dir):
|
|
113
|
+
await zendesk_source_test(
|
|
114
|
+
tmp_path=temp_dir,
|
|
115
|
+
token=os.environ["ZENDESK_TOKEN"],
|
|
116
|
+
email="test@unstructured.io",
|
|
117
|
+
subdomain="unstructuredhelp",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@pytest.mark.asyncio
|
|
122
|
+
@pytest.mark.tags(SOURCE_TAG, CONNECTOR_TYPE, UNCATEGORIZED_TAG)
|
|
123
|
+
@requires_env("ZENDESK_TOKEN")
|
|
124
|
+
async def test_zendesk_source_articles(temp_dir):
|
|
125
|
+
await zendesk_source_articles_test(
|
|
126
|
+
tmp_path=temp_dir,
|
|
127
|
+
token=os.environ["ZENDESK_TOKEN"],
|
|
128
|
+
email="test@unstructured.io",
|
|
129
|
+
subdomain="unstructuredhelp",
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@pytest.mark.asyncio
|
|
134
|
+
@pytest.mark.tags(SOURCE_TAG, CONNECTOR_TYPE, UNCATEGORIZED_TAG)
|
|
135
|
+
async def test_zendesk_source_articles_fail(temp_dir):
|
|
136
|
+
with pytest.raises(expected_exception=UserAuthError):
|
|
137
|
+
await zendesk_source_articles_test(
|
|
138
|
+
tmp_path=temp_dir,
|
|
139
|
+
token="FORCE_FAIL_TOKEN",
|
|
140
|
+
email="test@unstructured.io",
|
|
141
|
+
subdomain="unstructuredhelp",
|
|
142
|
+
)
|
|
@@ -9,9 +9,10 @@ from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers, Uploa
|
|
|
9
9
|
|
|
10
10
|
class StagerValidationConfigs(ValidationConfig):
|
|
11
11
|
expected_count: int
|
|
12
|
+
expected_folder: str = "stager"
|
|
12
13
|
|
|
13
14
|
def stager_output_dir(self) -> Path:
|
|
14
|
-
dir = self.test_output_dir() /
|
|
15
|
+
dir = self.test_output_dir() / self.expected_folder
|
|
15
16
|
dir.mkdir(exist_ok=True, parents=True)
|
|
16
17
|
return dir
|
|
17
18
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.5.
|
|
1
|
+
__version__ = "0.5.11" # pragma: no cover
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from abc import ABC
|
|
2
|
+
from abc import ABC
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Any, Optional, TypedDict, TypeVar, Union
|
|
5
5
|
|
|
@@ -81,9 +81,8 @@ class Downloader(BaseProcess, BaseConnector, ABC):
|
|
|
81
81
|
def is_async(self) -> bool:
|
|
82
82
|
return True
|
|
83
83
|
|
|
84
|
-
@abstractmethod
|
|
85
84
|
def run(self, file_data: FileData, **kwargs: Any) -> download_responses:
|
|
86
|
-
|
|
85
|
+
raise NotImplementedError()
|
|
87
86
|
|
|
88
87
|
async def run_async(self, file_data: FileData, **kwargs: Any) -> download_responses:
|
|
89
88
|
return self.run(file_data=file_data, **kwargs)
|
|
@@ -144,10 +144,6 @@ async def get_async_astra_collection(
|
|
|
144
144
|
return async_astra_db_collection
|
|
145
145
|
|
|
146
146
|
|
|
147
|
-
class AstraDBUploadStagerConfig(UploadStagerConfig):
|
|
148
|
-
pass
|
|
149
|
-
|
|
150
|
-
|
|
151
147
|
class AstraDBIndexerConfig(IndexerConfig):
|
|
152
148
|
collection_name: str = Field(
|
|
153
149
|
description="The name of the Astra DB collection. "
|
|
@@ -158,30 +154,6 @@ class AstraDBIndexerConfig(IndexerConfig):
|
|
|
158
154
|
batch_size: int = Field(default=20, description="Number of records per batch")
|
|
159
155
|
|
|
160
156
|
|
|
161
|
-
class AstraDBDownloaderConfig(DownloaderConfig):
|
|
162
|
-
fields: list[str] = field(default_factory=list)
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
class AstraDBUploaderConfig(UploaderConfig):
|
|
166
|
-
collection_name: Optional[str] = Field(
|
|
167
|
-
description="The name of the Astra DB collection. "
|
|
168
|
-
"Note that the collection name must only include letters, "
|
|
169
|
-
"numbers, and underscores.",
|
|
170
|
-
default=None,
|
|
171
|
-
)
|
|
172
|
-
keyspace: Optional[str] = Field(default=None, description="The Astra DB connection keyspace.")
|
|
173
|
-
requested_indexing_policy: Optional[dict[str, Any]] = Field(
|
|
174
|
-
default=None,
|
|
175
|
-
description="The indexing policy to use for the collection.",
|
|
176
|
-
examples=['{"deny": ["metadata"]}'],
|
|
177
|
-
)
|
|
178
|
-
batch_size: int = Field(default=20, description="Number of records per batch")
|
|
179
|
-
record_id_key: str = Field(
|
|
180
|
-
default=RECORD_ID_LABEL,
|
|
181
|
-
description="searchable key to find entries for the same record on previous runs",
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
|
|
185
157
|
@dataclass
|
|
186
158
|
class AstraDBIndexer(Indexer):
|
|
187
159
|
connection_config: AstraDBConnectionConfig
|
|
@@ -239,6 +211,10 @@ class AstraDBIndexer(Indexer):
|
|
|
239
211
|
yield fd
|
|
240
212
|
|
|
241
213
|
|
|
214
|
+
class AstraDBDownloaderConfig(DownloaderConfig):
|
|
215
|
+
fields: list[str] = field(default_factory=list)
|
|
216
|
+
|
|
217
|
+
|
|
242
218
|
@dataclass
|
|
243
219
|
class AstraDBDownloader(Downloader):
|
|
244
220
|
connection_config: AstraDBConnectionConfig
|
|
@@ -315,6 +291,12 @@ class AstraDBDownloader(Downloader):
|
|
|
315
291
|
return download_responses
|
|
316
292
|
|
|
317
293
|
|
|
294
|
+
class AstraDBUploadStagerConfig(UploadStagerConfig):
|
|
295
|
+
flatten_metadata: Optional[bool] = Field(
|
|
296
|
+
default=False, description="Move metadata to top level of the record."
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
|
|
318
300
|
@dataclass
|
|
319
301
|
class AstraDBUploadStager(UploadStager):
|
|
320
302
|
upload_stager_config: AstraDBUploadStagerConfig = field(
|
|
@@ -336,6 +318,12 @@ class AstraDBUploadStager(UploadStager):
|
|
|
336
318
|
|
|
337
319
|
def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
|
|
338
320
|
self.truncate_dict_elements(element_dict)
|
|
321
|
+
if self.upload_stager_config.flatten_metadata:
|
|
322
|
+
# move metadata to top level so it isn't nested in metadata column
|
|
323
|
+
metadata = element_dict.pop("metadata", None)
|
|
324
|
+
if metadata:
|
|
325
|
+
element_dict.update(metadata)
|
|
326
|
+
|
|
339
327
|
return {
|
|
340
328
|
"$vector": element_dict.pop("embeddings", None),
|
|
341
329
|
"content": element_dict.pop("text", None),
|
|
@@ -344,6 +332,26 @@ class AstraDBUploadStager(UploadStager):
|
|
|
344
332
|
}
|
|
345
333
|
|
|
346
334
|
|
|
335
|
+
class AstraDBUploaderConfig(UploaderConfig):
|
|
336
|
+
collection_name: Optional[str] = Field(
|
|
337
|
+
description="The name of the Astra DB collection. "
|
|
338
|
+
"Note that the collection name must only include letters, "
|
|
339
|
+
"numbers, and underscores.",
|
|
340
|
+
default=None,
|
|
341
|
+
)
|
|
342
|
+
keyspace: Optional[str] = Field(default=None, description="The Astra DB connection keyspace.")
|
|
343
|
+
requested_indexing_policy: Optional[dict[str, Any]] = Field(
|
|
344
|
+
default=None,
|
|
345
|
+
description="The indexing policy to use for the collection.",
|
|
346
|
+
examples=['{"deny": ["metadata"]}'],
|
|
347
|
+
)
|
|
348
|
+
batch_size: int = Field(default=20, description="Number of records per batch")
|
|
349
|
+
record_id_key: str = Field(
|
|
350
|
+
default=RECORD_ID_LABEL,
|
|
351
|
+
description="searchable key to find entries for the same record on previous runs",
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
|
|
347
355
|
@dataclass
|
|
348
356
|
class AstraDBUploader(Uploader):
|
|
349
357
|
connection_config: AstraDBConnectionConfig
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from unstructured_ingest.v2.processes.connector_registry import (
|
|
2
|
+
add_source_entry,
|
|
3
|
+
)
|
|
4
|
+
|
|
5
|
+
from .zendesk import (
|
|
6
|
+
CONNECTOR_TYPE,
|
|
7
|
+
ZendeskAccessConfig,
|
|
8
|
+
ZendeskClient,
|
|
9
|
+
ZendeskConnectionConfig,
|
|
10
|
+
ZendeskDownloader,
|
|
11
|
+
ZendeskDownloaderConfig,
|
|
12
|
+
ZendeskIndexer,
|
|
13
|
+
ZendeskIndexerConfig,
|
|
14
|
+
ZendeskTicket,
|
|
15
|
+
zendesk_source_entry,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"add_source_entry",
|
|
20
|
+
"zendesk_source_entry",
|
|
21
|
+
"ZendeskAccessConfig",
|
|
22
|
+
"ZendeskClient",
|
|
23
|
+
"ZendeskConnectionConfig",
|
|
24
|
+
"ZendeskDownloader",
|
|
25
|
+
"ZendeskDownloaderConfig",
|
|
26
|
+
"ZendeskIndexer",
|
|
27
|
+
"ZendeskIndexerConfig",
|
|
28
|
+
"ZendeskTicket",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
add_source_entry(source_type=CONNECTOR_TYPE, entry=zendesk_source_entry)
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Dict, List
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.v2.errors import ProviderError, RateLimitError, UserAuthError, UserError
|
|
8
|
+
from unstructured_ingest.v2.logger import logger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class Comment:
|
|
13
|
+
id: int
|
|
14
|
+
author_id: str
|
|
15
|
+
body: str
|
|
16
|
+
parent_ticket_id: str
|
|
17
|
+
metadata: dict
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class ZendeskTicket:
|
|
22
|
+
id: int
|
|
23
|
+
subject: str
|
|
24
|
+
description: str
|
|
25
|
+
generated_ts: int
|
|
26
|
+
metadata: dict
|
|
27
|
+
|
|
28
|
+
def __lt__(self, other):
|
|
29
|
+
return int(self.id) < int(other.id)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ZendeskArticle:
|
|
34
|
+
id: int
|
|
35
|
+
author_id: str
|
|
36
|
+
title: str
|
|
37
|
+
content: str
|
|
38
|
+
|
|
39
|
+
def __lt__(self, other):
|
|
40
|
+
return int(self.id) < int(other.id)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ZendeskClient:
|
|
44
|
+
|
|
45
|
+
def __init__(self, token: str, subdomain: str, email: str):
|
|
46
|
+
# should be okay to be blocking.
|
|
47
|
+
url_to_check = f"https://{subdomain}.zendesk.com/api/v2/groups.json"
|
|
48
|
+
auth = f"{email}/token", token
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
_ = httpx.get(url_to_check, auth=auth)
|
|
52
|
+
except Exception as e:
|
|
53
|
+
raise self.wrap_error(e=e)
|
|
54
|
+
|
|
55
|
+
self._token = token
|
|
56
|
+
self._subdomain = subdomain
|
|
57
|
+
self._email = email
|
|
58
|
+
self._auth = auth
|
|
59
|
+
|
|
60
|
+
def wrap_error(self, e: Exception) -> Exception:
|
|
61
|
+
if not isinstance(e, httpx.HTTPStatusError):
|
|
62
|
+
logger.error(f"unhandled exception from Zendesk client: {e}", exc_info=True)
|
|
63
|
+
return e
|
|
64
|
+
url = e.request.url
|
|
65
|
+
response_code = e.response.status_code
|
|
66
|
+
if response_code == 401:
|
|
67
|
+
logger.error(
|
|
68
|
+
f"Failed to connect via auth,"
|
|
69
|
+
f"{url} using zendesk response, status code {response_code}"
|
|
70
|
+
)
|
|
71
|
+
return UserAuthError(e)
|
|
72
|
+
if response_code == 429:
|
|
73
|
+
logger.error(
|
|
74
|
+
f"Failed to connect via rate limits"
|
|
75
|
+
f"{url} using zendesk response, status code {response_code}"
|
|
76
|
+
)
|
|
77
|
+
return RateLimitError(e)
|
|
78
|
+
if 400 <= response_code < 500:
|
|
79
|
+
logger.error(
|
|
80
|
+
f"Failed to connect to {url} using zendesk response, status code {response_code}"
|
|
81
|
+
)
|
|
82
|
+
return UserError(e)
|
|
83
|
+
if response_code > 500:
|
|
84
|
+
logger.error(
|
|
85
|
+
f"Failed to connect to {url} using zendesk response, status code {response_code}"
|
|
86
|
+
)
|
|
87
|
+
return ProviderError(e)
|
|
88
|
+
logger.error(f"unhandled http status error from Zendesk client: {e}", exc_info=True)
|
|
89
|
+
return e
|
|
90
|
+
|
|
91
|
+
async def get_articles_async(self) -> List[ZendeskArticle]:
|
|
92
|
+
"""
|
|
93
|
+
Retrieves article content from Zendesk asynchronously.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
articles: List[ZendeskArticle] = []
|
|
97
|
+
|
|
98
|
+
article_url = f"https://{self._subdomain}.zendesk.com/api/v2/help_center/articles.json"
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
async with httpx.AsyncClient() as client:
|
|
102
|
+
response = await client.get(article_url, auth=self._auth)
|
|
103
|
+
response.raise_for_status()
|
|
104
|
+
except Exception as e:
|
|
105
|
+
raise self.wrap_error(e=e)
|
|
106
|
+
|
|
107
|
+
articles_in_response: List[dict] = response.json()["articles"]
|
|
108
|
+
|
|
109
|
+
articles = [
|
|
110
|
+
ZendeskArticle(
|
|
111
|
+
id=int(entry["id"]),
|
|
112
|
+
author_id=str(entry["author_id"]),
|
|
113
|
+
title=str(entry["title"]),
|
|
114
|
+
content=entry["body"],
|
|
115
|
+
)
|
|
116
|
+
for entry in articles_in_response
|
|
117
|
+
]
|
|
118
|
+
return articles
|
|
119
|
+
|
|
120
|
+
async def get_comments_async(self, ticket_id: int) -> List["Comment"]:
|
|
121
|
+
comments_url = f"https://{self._subdomain}.zendesk.com/api/v2/tickets/{ticket_id}/comments"
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
async with httpx.AsyncClient() as client:
|
|
125
|
+
response = await client.get(comments_url, auth=self._auth)
|
|
126
|
+
response.raise_for_status()
|
|
127
|
+
except Exception as e:
|
|
128
|
+
raise self.wrap_error(e=e)
|
|
129
|
+
|
|
130
|
+
return [
|
|
131
|
+
Comment(
|
|
132
|
+
id=int(entry["id"]),
|
|
133
|
+
author_id=entry["author_id"],
|
|
134
|
+
body=entry["body"],
|
|
135
|
+
metadata=entry,
|
|
136
|
+
parent_ticket_id=ticket_id,
|
|
137
|
+
)
|
|
138
|
+
for entry in response.json()["comments"]
|
|
139
|
+
]
|
|
140
|
+
|
|
141
|
+
def get_users(self) -> List[dict]:
|
|
142
|
+
|
|
143
|
+
users: List[dict] = []
|
|
144
|
+
|
|
145
|
+
users_url = f"https://{self._subdomain}.zendesk.com/api/v2/users"
|
|
146
|
+
try:
|
|
147
|
+
response = httpx.get(users_url, auth=self._auth)
|
|
148
|
+
response.raise_for_status()
|
|
149
|
+
except Exception as e:
|
|
150
|
+
raise self.wrap_error(e=e)
|
|
151
|
+
|
|
152
|
+
users_in_response: List[dict] = response.json()["users"]
|
|
153
|
+
users = users_in_response
|
|
154
|
+
|
|
155
|
+
return users
|
|
156
|
+
|
|
157
|
+
async def get_tickets_async(self) -> List["ZendeskTicket"]:
|
|
158
|
+
tickets: List["ZendeskTicket"] = []
|
|
159
|
+
tickets_url = f"https://{self._subdomain}.zendesk.com/api/v2/tickets"
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
async with httpx.AsyncClient() as client:
|
|
163
|
+
response = await client.get(tickets_url, auth=self._auth)
|
|
164
|
+
response.raise_for_status()
|
|
165
|
+
except Exception as e:
|
|
166
|
+
raise self.wrap_error(e=e)
|
|
167
|
+
|
|
168
|
+
tickets_in_response: List[dict] = response.json()["tickets"]
|
|
169
|
+
|
|
170
|
+
for entry in tickets_in_response:
|
|
171
|
+
ticket = ZendeskTicket(
|
|
172
|
+
id=int(entry["id"]),
|
|
173
|
+
subject=entry["subject"],
|
|
174
|
+
description=entry["description"],
|
|
175
|
+
generated_ts=entry["generated_timestamp"],
|
|
176
|
+
metadata=entry,
|
|
177
|
+
)
|
|
178
|
+
tickets.append(ticket)
|
|
179
|
+
|
|
180
|
+
return tickets
|
|
181
|
+
|
|
182
|
+
async def get_article_attachments_async(self, article_id: str):
|
|
183
|
+
"""
|
|
184
|
+
Handles article attachments such as images and stores them as UTF-8 encoded bytes.
|
|
185
|
+
"""
|
|
186
|
+
article_attachment_url = (
|
|
187
|
+
f"https://{self._subdomain}.zendesk.com/api/v2/help_center/"
|
|
188
|
+
f"articles/{article_id}/attachments"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
async with httpx.AsyncClient() as client:
|
|
193
|
+
response = await client.get(article_attachment_url, auth=self._auth)
|
|
194
|
+
response.raise_for_status()
|
|
195
|
+
except Exception as e:
|
|
196
|
+
raise self.wrap_error(e=e)
|
|
197
|
+
|
|
198
|
+
attachments_in_response: List[Dict] = response.json().get("article_attachments", [])
|
|
199
|
+
attachments = []
|
|
200
|
+
|
|
201
|
+
for attachment in attachments_in_response:
|
|
202
|
+
attachment_data = {
|
|
203
|
+
"id": attachment["id"],
|
|
204
|
+
"file_name": attachment["file_name"],
|
|
205
|
+
"content_type": attachment["content_type"],
|
|
206
|
+
"size": attachment["size"],
|
|
207
|
+
"url": attachment["url"],
|
|
208
|
+
"content_url": attachment["content_url"],
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
try:
|
|
212
|
+
async with httpx.AsyncClient() as client:
|
|
213
|
+
download_response = await client.get(attachment["content_url"], auth=self._auth)
|
|
214
|
+
download_response.raise_for_status()
|
|
215
|
+
except Exception as e:
|
|
216
|
+
raise self.wrap_error(e=e)
|
|
217
|
+
|
|
218
|
+
encoded_content = base64.b64encode(download_response.content).decode("utf-8")
|
|
219
|
+
attachment_data["encoded_content"] = (
|
|
220
|
+
f"data:{attachment_data['content_type']};base64,{encoded_content}"
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
attachments.append(attachment_data)
|
|
224
|
+
|
|
225
|
+
return attachments
|
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import hashlib
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from time import time
|
|
8
|
+
from typing import Any, AsyncGenerator, List, Literal
|
|
9
|
+
|
|
10
|
+
import aiofiles
|
|
11
|
+
import bs4
|
|
12
|
+
from pydantic import BaseModel, Field, Secret
|
|
13
|
+
|
|
14
|
+
from unstructured_ingest.utils.data_prep import batch_generator
|
|
15
|
+
from unstructured_ingest.utils.html import HtmlMixin
|
|
16
|
+
from unstructured_ingest.v2.errors import UserAuthError
|
|
17
|
+
from unstructured_ingest.v2.interfaces import (
|
|
18
|
+
AccessConfig,
|
|
19
|
+
BatchFileData,
|
|
20
|
+
BatchItem,
|
|
21
|
+
ConnectionConfig,
|
|
22
|
+
Downloader,
|
|
23
|
+
DownloaderConfig,
|
|
24
|
+
DownloadResponse,
|
|
25
|
+
FileData,
|
|
26
|
+
FileDataSourceMetadata,
|
|
27
|
+
Indexer,
|
|
28
|
+
IndexerConfig,
|
|
29
|
+
SourceIdentifiers,
|
|
30
|
+
)
|
|
31
|
+
from unstructured_ingest.v2.logger import logger
|
|
32
|
+
from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
|
|
33
|
+
|
|
34
|
+
from .client import ZendeskArticle, ZendeskClient, ZendeskTicket
|
|
35
|
+
|
|
36
|
+
CONNECTOR_TYPE = "zendesk"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class ZendeskAdditionalMetadata(BaseModel):
|
|
40
|
+
item_type: str
|
|
41
|
+
leading_id: str # is the same as id just being verbose.
|
|
42
|
+
tail_id: str # last id in the batch.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ZendeskFileDataSourceMetadata(FileDataSourceMetadata):
|
|
46
|
+
"""
|
|
47
|
+
inherits metadata object as tickets and articles
|
|
48
|
+
are treated in single batch, we need to denote indices ticket/article
|
|
49
|
+
as the source metadata.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ZendeskBatchFileData(BatchFileData):
|
|
54
|
+
additional_metadata: ZendeskAdditionalMetadata
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class ZendeskAccessConfig(AccessConfig):
|
|
58
|
+
api_token: str = Field(
|
|
59
|
+
description="API token for zendesk generated under Apps and Integrations"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class ZendeskBatchItemTicket(BatchItem):
|
|
64
|
+
subject: str
|
|
65
|
+
description: str
|
|
66
|
+
item_type: str = "tickets" # placeholder for downloader
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ZendeskBatchItemArticle(BatchItem):
|
|
70
|
+
title: str
|
|
71
|
+
author_id: str
|
|
72
|
+
title: str
|
|
73
|
+
content: str
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ZendeskConnectionConfig(ConnectionConfig):
|
|
77
|
+
subdomain: str = Field(description="Subdomain for zendesk site, <sub-domain>.company.com")
|
|
78
|
+
email: str = Field(description="Email for zendesk site registered at the subdomain")
|
|
79
|
+
access_config: Secret[ZendeskAccessConfig]
|
|
80
|
+
|
|
81
|
+
async def get_client_async(self) -> ZendeskClient:
|
|
82
|
+
"""Provides an async manager for ZendeskClient."""
|
|
83
|
+
access_config = self.access_config.get_secret_value()
|
|
84
|
+
|
|
85
|
+
client = ZendeskClient(
|
|
86
|
+
email=self.email, subdomain=self.subdomain, token=access_config.api_token
|
|
87
|
+
)
|
|
88
|
+
return client
|
|
89
|
+
|
|
90
|
+
def get_client(self) -> ZendeskClient:
|
|
91
|
+
|
|
92
|
+
access_config = self.access_config.get_secret_value()
|
|
93
|
+
|
|
94
|
+
client = ZendeskClient(
|
|
95
|
+
email=self.email, subdomain=self.subdomain, token=access_config.api_token
|
|
96
|
+
)
|
|
97
|
+
return client
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class ZendeskIndexerConfig(IndexerConfig):
|
|
101
|
+
batch_size: int = Field(
|
|
102
|
+
default=2,
|
|
103
|
+
description="Number of tickets or articles.",
|
|
104
|
+
)
|
|
105
|
+
item_type: Literal["tickets", "articles", "all"] = Field(
|
|
106
|
+
default="tickets",
|
|
107
|
+
description="Type of item from zendesk to parse, can only be `tickets` or `articles`.",
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class ZendeskIndexer(Indexer):
|
|
113
|
+
connection_config: ZendeskConnectionConfig
|
|
114
|
+
index_config: ZendeskIndexerConfig
|
|
115
|
+
connector_type: str = CONNECTOR_TYPE
|
|
116
|
+
|
|
117
|
+
def precheck(self) -> None:
|
|
118
|
+
"""Validates connection to Zendesk API."""
|
|
119
|
+
try:
|
|
120
|
+
client = self.connection_config.get_client()
|
|
121
|
+
if not client.get_users():
|
|
122
|
+
subdomain_endpoint = f"{self.connection_config.subdomain}.zendesk.com"
|
|
123
|
+
raise UserAuthError(f"Users do not exist in subdomain {subdomain_endpoint}")
|
|
124
|
+
except UserAuthError as e:
|
|
125
|
+
logger.error(f"Source connection error: {e}", exc_info=True)
|
|
126
|
+
raise
|
|
127
|
+
except Exception as e:
|
|
128
|
+
logger.error(f"Failed to validate connection to Zendesk: {e}", exc_info=True)
|
|
129
|
+
raise UserAuthError(f"Failed to validate connection: {e}")
|
|
130
|
+
|
|
131
|
+
def is_async(self) -> bool:
|
|
132
|
+
return True
|
|
133
|
+
|
|
134
|
+
async def _list_articles_async(self) -> List[ZendeskArticle]:
|
|
135
|
+
client = await self.connection_config.get_client_async()
|
|
136
|
+
return await client.get_articles_async()
|
|
137
|
+
|
|
138
|
+
async def _list_tickets_async(self) -> List[ZendeskTicket]:
|
|
139
|
+
client = await self.connection_config.get_client_async()
|
|
140
|
+
return await client.get_tickets_async()
|
|
141
|
+
|
|
142
|
+
def _generate_fullpath(self, identifier: str) -> Path:
|
|
143
|
+
return Path(hashlib.sha256(identifier.encode("utf-8")).hexdigest()[:16] + ".txt")
|
|
144
|
+
|
|
145
|
+
async def handle_articles_async(
|
|
146
|
+
self, articles: List[ZendeskArticle], batch_size: int
|
|
147
|
+
) -> AsyncGenerator[ZendeskBatchFileData, None]:
|
|
148
|
+
"""Parses articles from a list and yields FileData objects asynchronously in batches."""
|
|
149
|
+
for article_batch in batch_generator(articles, batch_size=batch_size):
|
|
150
|
+
|
|
151
|
+
article_batch = sorted(article_batch)
|
|
152
|
+
|
|
153
|
+
additional_metadata = ZendeskAdditionalMetadata(
|
|
154
|
+
item_type="articles",
|
|
155
|
+
leading_id=str(article_batch[0].id),
|
|
156
|
+
tail_id=str(article_batch[-1].id),
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
metadata = ZendeskFileDataSourceMetadata(
|
|
160
|
+
date_processed=str(time()),
|
|
161
|
+
record_locator={
|
|
162
|
+
"id": str(article_batch[0].id),
|
|
163
|
+
"item_type": "articles",
|
|
164
|
+
},
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
batch_items: List[ZendeskBatchItemArticle] = [
|
|
168
|
+
ZendeskBatchItemArticle(
|
|
169
|
+
identifier=str(article.id),
|
|
170
|
+
author_id=str(article.author_id),
|
|
171
|
+
title=str(article.title),
|
|
172
|
+
content=str(article.content),
|
|
173
|
+
)
|
|
174
|
+
for article in article_batch
|
|
175
|
+
]
|
|
176
|
+
|
|
177
|
+
full_path = self._generate_fullpath(str(article_batch[0].id))
|
|
178
|
+
full_path = Path(str(full_path).replace(".txt", ".html"))
|
|
179
|
+
|
|
180
|
+
source_identifiers = SourceIdentifiers(filename=full_path.name, fullpath=str(full_path))
|
|
181
|
+
|
|
182
|
+
batched_file_data = ZendeskBatchFileData(
|
|
183
|
+
identifier=str(article_batch[0].id),
|
|
184
|
+
connector_type=self.connector_type,
|
|
185
|
+
metadata=metadata,
|
|
186
|
+
batch_items=batch_items,
|
|
187
|
+
additional_metadata=additional_metadata,
|
|
188
|
+
source_identifiers=source_identifiers,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
yield batched_file_data
|
|
192
|
+
|
|
193
|
+
async def handle_tickets_async(
|
|
194
|
+
self, tickets: List[ZendeskTicket], batch_size: int
|
|
195
|
+
) -> AsyncGenerator[ZendeskBatchFileData, None]:
|
|
196
|
+
"""Parses tickets from a list and yields FileData objects asynchronously in batches."""
|
|
197
|
+
for ticket_batch in batch_generator(tickets, batch_size=batch_size):
|
|
198
|
+
|
|
199
|
+
sorted_batch = sorted(ticket_batch)
|
|
200
|
+
|
|
201
|
+
additional_metadata = ZendeskAdditionalMetadata(
|
|
202
|
+
item_type="tickets",
|
|
203
|
+
leading_id=str(sorted_batch[0].id),
|
|
204
|
+
tail_id=str(sorted_batch[-1].id),
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
metadata = ZendeskFileDataSourceMetadata(
|
|
208
|
+
date_processed=str(time()),
|
|
209
|
+
record_locator={
|
|
210
|
+
"id": str(sorted_batch[0].id),
|
|
211
|
+
"item_type": "tickets",
|
|
212
|
+
},
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
batch_items: List[ZendeskBatchItemTicket] = [
|
|
216
|
+
ZendeskBatchItemTicket(
|
|
217
|
+
identifier=str(ticket.id),
|
|
218
|
+
subject=str(ticket.subject),
|
|
219
|
+
description=str(ticket.description),
|
|
220
|
+
)
|
|
221
|
+
for ticket in sorted_batch
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
full_path = self._generate_fullpath(str(sorted_batch[0].id))
|
|
225
|
+
source_identifiers = SourceIdentifiers(filename=full_path.name, fullpath=str(full_path))
|
|
226
|
+
|
|
227
|
+
batched_file_data = ZendeskBatchFileData(
|
|
228
|
+
connector_type=self.connector_type,
|
|
229
|
+
metadata=metadata,
|
|
230
|
+
batch_items=batch_items,
|
|
231
|
+
additional_metadata=additional_metadata,
|
|
232
|
+
source_identifiers=source_identifiers,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
yield batched_file_data
|
|
236
|
+
|
|
237
|
+
async def run_async(self, **kwargs: Any) -> AsyncGenerator[FileData, None]:
|
|
238
|
+
"""Determines item type and processes accordingly asynchronously."""
|
|
239
|
+
item_type = self.index_config.item_type
|
|
240
|
+
batch_size = self.index_config.batch_size
|
|
241
|
+
|
|
242
|
+
if item_type == "articles":
|
|
243
|
+
articles = await self._list_articles_async()
|
|
244
|
+
async for file_data in self.handle_articles_async(
|
|
245
|
+
articles, batch_size
|
|
246
|
+
): # Using async version
|
|
247
|
+
yield file_data
|
|
248
|
+
|
|
249
|
+
elif item_type == "tickets":
|
|
250
|
+
tickets = await self._list_tickets_async()
|
|
251
|
+
async for file_data in self.handle_tickets_async(
|
|
252
|
+
tickets, batch_size
|
|
253
|
+
): # Using async version
|
|
254
|
+
yield file_data
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class ZendeskDownloaderConfig(DownloaderConfig, HtmlMixin):
|
|
258
|
+
pass
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
@dataclass
|
|
262
|
+
class ZendeskDownloader(Downloader):
|
|
263
|
+
download_config: ZendeskDownloaderConfig
|
|
264
|
+
connection_config: ZendeskConnectionConfig
|
|
265
|
+
connector_type: str = CONNECTOR_TYPE
|
|
266
|
+
|
|
267
|
+
def is_async(self) -> bool:
|
|
268
|
+
return True
|
|
269
|
+
|
|
270
|
+
def download_embedded_files(
|
|
271
|
+
self, session, html: str, current_file_data: FileData
|
|
272
|
+
) -> list[DownloadResponse]:
|
|
273
|
+
if not self.download_config.extract_files:
|
|
274
|
+
return []
|
|
275
|
+
url = current_file_data.metadata.url
|
|
276
|
+
if url is None:
|
|
277
|
+
logger.warning(
|
|
278
|
+
f"""Missing URL for file: {current_file_data.source_identifiers.filename}.
|
|
279
|
+
Skipping file extraction."""
|
|
280
|
+
)
|
|
281
|
+
return []
|
|
282
|
+
filepath = current_file_data.source_identifiers.relative_path
|
|
283
|
+
download_path = Path(self.download_dir) / filepath
|
|
284
|
+
download_dir = download_path.with_suffix("")
|
|
285
|
+
return self.download_config.extract_embedded_files(
|
|
286
|
+
url=url,
|
|
287
|
+
download_dir=download_dir,
|
|
288
|
+
original_filedata=current_file_data,
|
|
289
|
+
html=html,
|
|
290
|
+
session=session,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
async def handle_articles_async(
|
|
294
|
+
self, client: ZendeskClient, batch_file_data: ZendeskBatchFileData
|
|
295
|
+
):
|
|
296
|
+
"""
|
|
297
|
+
Processes the article information, downloads the attachments for each article,
|
|
298
|
+
and updates the content accordingly.
|
|
299
|
+
"""
|
|
300
|
+
# Determine the download path
|
|
301
|
+
download_path = self.get_download_path(batch_file_data)
|
|
302
|
+
|
|
303
|
+
if download_path is None:
|
|
304
|
+
raise ValueError("Download path could not be determined")
|
|
305
|
+
|
|
306
|
+
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
307
|
+
|
|
308
|
+
async with aiofiles.open(download_path, "a", encoding="utf8") as f:
|
|
309
|
+
for article in batch_file_data.batch_items:
|
|
310
|
+
html_data_str = article.content
|
|
311
|
+
soup = bs4.BeautifulSoup(html_data_str, "html.parser")
|
|
312
|
+
|
|
313
|
+
if self.download_config.extract_images:
|
|
314
|
+
# Get article attachments asynchronously
|
|
315
|
+
image_data_decoded: List = await client.get_article_attachments_async(
|
|
316
|
+
article_id=article.identifier
|
|
317
|
+
)
|
|
318
|
+
img_tags = soup.find_all("img")
|
|
319
|
+
|
|
320
|
+
# Ensure we don't exceed the available images
|
|
321
|
+
for img_tag, img_data in zip(img_tags, image_data_decoded):
|
|
322
|
+
img_tag["src"] = img_data.get("encoded_content", "")
|
|
323
|
+
|
|
324
|
+
await f.write(soup.prettify())
|
|
325
|
+
|
|
326
|
+
return super().generate_download_response(
|
|
327
|
+
file_data=batch_file_data, download_path=download_path
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
async def handle_tickets_async(
|
|
331
|
+
self, client: ZendeskClient, batch_file_data: ZendeskBatchFileData
|
|
332
|
+
) -> DownloadResponse:
|
|
333
|
+
"""
|
|
334
|
+
Processes a batch of tickets asynchronously, writing their details and comments to a file.
|
|
335
|
+
"""
|
|
336
|
+
# Determine the download path
|
|
337
|
+
download_path = self.get_download_path(batch_file_data)
|
|
338
|
+
if download_path is None:
|
|
339
|
+
raise ValueError("Download path could not be determined")
|
|
340
|
+
|
|
341
|
+
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
342
|
+
|
|
343
|
+
# Process each ticket in the batch
|
|
344
|
+
async with aiofiles.open(download_path, "a", encoding="utf8") as f:
|
|
345
|
+
for batch_item in batch_file_data.batch_items:
|
|
346
|
+
ticket_identifier = batch_item.identifier
|
|
347
|
+
first_date = None
|
|
348
|
+
comments: List[dict] = []
|
|
349
|
+
|
|
350
|
+
# Fetch comments asynchronously
|
|
351
|
+
comments_list = await client.get_comments_async(ticket_id=int(ticket_identifier))
|
|
352
|
+
|
|
353
|
+
for comment in comments_list: # Iterate over the resolved list
|
|
354
|
+
date_created = (
|
|
355
|
+
comment.metadata["created_at"].isoformat()
|
|
356
|
+
if isinstance(comment.metadata["created_at"], datetime.datetime)
|
|
357
|
+
else str(comment.metadata["created_at"])
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
if first_date is None:
|
|
361
|
+
first_date = date_created
|
|
362
|
+
|
|
363
|
+
comments.append(
|
|
364
|
+
{
|
|
365
|
+
"comment_id": comment.id,
|
|
366
|
+
"author_id": comment.author_id,
|
|
367
|
+
"body": comment.body,
|
|
368
|
+
"date_created": date_created,
|
|
369
|
+
}
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Write ticket details to file
|
|
373
|
+
content = (
|
|
374
|
+
"\nticket\n"
|
|
375
|
+
f"{batch_item.identifier}\n"
|
|
376
|
+
f"{batch_file_data.metadata.record_locator.get('subject', '')}\n"
|
|
377
|
+
f"{batch_file_data.metadata.record_locator.get('description', '')}\n"
|
|
378
|
+
f"{first_date}\n"
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
# Append comments
|
|
382
|
+
for comment in comments:
|
|
383
|
+
content += (
|
|
384
|
+
"comment\n"
|
|
385
|
+
f"{comment.get('comment_id', '')}\n"
|
|
386
|
+
f"{comment.get('author_id', '')}\n"
|
|
387
|
+
f"{comment.get('body', '')}\n"
|
|
388
|
+
f"{comment.get('date_created', '')}\n"
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
await f.write(content)
|
|
392
|
+
|
|
393
|
+
return super().generate_download_response(
|
|
394
|
+
file_data=batch_file_data, download_path=download_path
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
async def run_async(self, file_data: ZendeskBatchFileData, **kwargs: Any) -> DownloadResponse:
|
|
398
|
+
|
|
399
|
+
zendesk_filedata: FileData = FileData.cast(file_data=file_data)
|
|
400
|
+
|
|
401
|
+
client = await self.connection_config.get_client_async()
|
|
402
|
+
item_type = zendesk_filedata.metadata.record_locator["item_type"]
|
|
403
|
+
|
|
404
|
+
if item_type == "articles":
|
|
405
|
+
return await self.handle_articles_async(client, file_data)
|
|
406
|
+
elif item_type == "tickets":
|
|
407
|
+
return await self.handle_tickets_async(client, file_data)
|
|
408
|
+
else:
|
|
409
|
+
raise RuntimeError(f"Item type {item_type} cannot be handled by the downloader")
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
# create entry
|
|
413
|
+
zendesk_source_entry = SourceRegistryEntry(
|
|
414
|
+
connection_config=ZendeskConnectionConfig,
|
|
415
|
+
indexer_config=ZendeskIndexerConfig,
|
|
416
|
+
indexer=ZendeskIndexer,
|
|
417
|
+
downloader=ZendeskDownloader,
|
|
418
|
+
downloader_config=ZendeskDownloaderConfig,
|
|
419
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.11
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -23,12 +23,12 @@ Requires-Python: >=3.9.0,<3.14
|
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
25
|
Requires-Dist: tqdm
|
|
26
|
-
Requires-Dist: dataclasses_json
|
|
27
|
-
Requires-Dist: pydantic>=2.7
|
|
28
|
-
Requires-Dist: pandas
|
|
29
26
|
Requires-Dist: click
|
|
27
|
+
Requires-Dist: pandas
|
|
28
|
+
Requires-Dist: pydantic>=2.7
|
|
30
29
|
Requires-Dist: opentelemetry-sdk
|
|
31
30
|
Requires-Dist: python-dateutil
|
|
31
|
+
Requires-Dist: dataclasses_json
|
|
32
32
|
Provides-Extra: remote
|
|
33
33
|
Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
|
|
34
34
|
Provides-Extra: csv
|
|
@@ -66,13 +66,13 @@ Requires-Dist: pyairtable; extra == "airtable"
|
|
|
66
66
|
Provides-Extra: astradb
|
|
67
67
|
Requires-Dist: astrapy; extra == "astradb"
|
|
68
68
|
Provides-Extra: azure
|
|
69
|
-
Requires-Dist: adlfs; extra == "azure"
|
|
70
69
|
Requires-Dist: fsspec; extra == "azure"
|
|
70
|
+
Requires-Dist: adlfs; extra == "azure"
|
|
71
71
|
Provides-Extra: azure-ai-search
|
|
72
72
|
Requires-Dist: azure-search-documents; extra == "azure-ai-search"
|
|
73
73
|
Provides-Extra: biomed
|
|
74
|
-
Requires-Dist: requests; extra == "biomed"
|
|
75
74
|
Requires-Dist: bs4; extra == "biomed"
|
|
75
|
+
Requires-Dist: requests; extra == "biomed"
|
|
76
76
|
Provides-Extra: box
|
|
77
77
|
Requires-Dist: fsspec; extra == "box"
|
|
78
78
|
Requires-Dist: boxfs; extra == "box"
|
|
@@ -91,19 +91,19 @@ Requires-Dist: deltalake; extra == "delta-table"
|
|
|
91
91
|
Provides-Extra: discord
|
|
92
92
|
Requires-Dist: discord.py; extra == "discord"
|
|
93
93
|
Provides-Extra: dropbox
|
|
94
|
-
Requires-Dist: fsspec; extra == "dropbox"
|
|
95
94
|
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
95
|
+
Requires-Dist: fsspec; extra == "dropbox"
|
|
96
96
|
Provides-Extra: duckdb
|
|
97
97
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
98
98
|
Provides-Extra: elasticsearch
|
|
99
99
|
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
100
100
|
Provides-Extra: gcs
|
|
101
|
-
Requires-Dist: fsspec; extra == "gcs"
|
|
102
101
|
Requires-Dist: bs4; extra == "gcs"
|
|
102
|
+
Requires-Dist: fsspec; extra == "gcs"
|
|
103
103
|
Requires-Dist: gcsfs; extra == "gcs"
|
|
104
104
|
Provides-Extra: github
|
|
105
|
-
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
106
105
|
Requires-Dist: requests; extra == "github"
|
|
106
|
+
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
107
107
|
Provides-Extra: gitlab
|
|
108
108
|
Requires-Dist: python-gitlab; extra == "gitlab"
|
|
109
109
|
Provides-Extra: google-drive
|
|
@@ -124,18 +124,18 @@ Requires-Dist: pymilvus; extra == "milvus"
|
|
|
124
124
|
Provides-Extra: mongodb
|
|
125
125
|
Requires-Dist: pymongo; extra == "mongodb"
|
|
126
126
|
Provides-Extra: neo4j
|
|
127
|
-
Requires-Dist: cymple; extra == "neo4j"
|
|
128
|
-
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
|
|
129
127
|
Requires-Dist: networkx; extra == "neo4j"
|
|
128
|
+
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
|
|
129
|
+
Requires-Dist: cymple; extra == "neo4j"
|
|
130
130
|
Provides-Extra: notion
|
|
131
|
+
Requires-Dist: htmlBuilder; extra == "notion"
|
|
132
|
+
Requires-Dist: httpx; extra == "notion"
|
|
131
133
|
Requires-Dist: notion-client; extra == "notion"
|
|
132
134
|
Requires-Dist: backoff; extra == "notion"
|
|
133
|
-
Requires-Dist: httpx; extra == "notion"
|
|
134
|
-
Requires-Dist: htmlBuilder; extra == "notion"
|
|
135
135
|
Provides-Extra: onedrive
|
|
136
136
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
137
|
-
Requires-Dist: msal; extra == "onedrive"
|
|
138
137
|
Requires-Dist: bs4; extra == "onedrive"
|
|
138
|
+
Requires-Dist: msal; extra == "onedrive"
|
|
139
139
|
Provides-Extra: opensearch
|
|
140
140
|
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
141
141
|
Provides-Extra: outlook
|
|
@@ -178,18 +178,18 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
|
|
|
178
178
|
Provides-Extra: singlestore
|
|
179
179
|
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
180
180
|
Provides-Extra: vectara
|
|
181
|
+
Requires-Dist: httpx; extra == "vectara"
|
|
181
182
|
Requires-Dist: requests; extra == "vectara"
|
|
182
183
|
Requires-Dist: aiofiles; extra == "vectara"
|
|
183
|
-
Requires-Dist: httpx; extra == "vectara"
|
|
184
184
|
Provides-Extra: vastdb
|
|
185
|
+
Requires-Dist: vastdb; extra == "vastdb"
|
|
185
186
|
Requires-Dist: ibis; extra == "vastdb"
|
|
186
187
|
Requires-Dist: pyarrow; extra == "vastdb"
|
|
187
|
-
Requires-Dist: vastdb; extra == "vastdb"
|
|
188
188
|
Provides-Extra: embed-huggingface
|
|
189
189
|
Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
190
190
|
Provides-Extra: embed-octoai
|
|
191
|
-
Requires-Dist: openai; extra == "embed-octoai"
|
|
192
191
|
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
192
|
+
Requires-Dist: openai; extra == "embed-octoai"
|
|
193
193
|
Provides-Extra: embed-vertexai
|
|
194
194
|
Requires-Dist: vertexai; extra == "embed-vertexai"
|
|
195
195
|
Provides-Extra: embed-voyageai
|
|
@@ -197,11 +197,11 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
|
197
197
|
Provides-Extra: embed-mixedbreadai
|
|
198
198
|
Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
|
|
199
199
|
Provides-Extra: openai
|
|
200
|
-
Requires-Dist: openai; extra == "openai"
|
|
201
200
|
Requires-Dist: tiktoken; extra == "openai"
|
|
201
|
+
Requires-Dist: openai; extra == "openai"
|
|
202
202
|
Provides-Extra: bedrock
|
|
203
|
-
Requires-Dist: boto3; extra == "bedrock"
|
|
204
203
|
Requires-Dist: aioboto3; extra == "bedrock"
|
|
204
|
+
Requires-Dist: boto3; extra == "bedrock"
|
|
205
205
|
Provides-Extra: togetherai
|
|
206
206
|
Requires-Dist: together; extra == "togetherai"
|
|
207
207
|
Dynamic: author
|
|
@@ -5,7 +5,7 @@ test/integration/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJW
|
|
|
5
5
|
test/integration/chunkers/test_chunkers.py,sha256=USkltQN_mVVCxI0FkJsrS1gnLXlVr-fvsc0tPaK2sWI,1062
|
|
6
6
|
test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
test/integration/connectors/conftest.py,sha256=vYs4WDlCuieAwwErkJxCk4a1lGvr3qpeiAm-YaDznSo,1018
|
|
8
|
-
test/integration/connectors/test_astradb.py,sha256=
|
|
8
|
+
test/integration/connectors/test_astradb.py,sha256=c9Lk0dvJVVdzHcokvsc4XMNJ4SIO1k2vGtT5py0cFVM,9753
|
|
9
9
|
test/integration/connectors/test_azure_ai_search.py,sha256=MxFwk84vI_HT4taQTGrNpJ8ewGPqHSGrx626j8hC_Pw,9695
|
|
10
10
|
test/integration/connectors/test_chroma.py,sha256=NuQv0PWPM0_LQfdPeUd6IYKqaKKXWmVaHGWjq5aBfOY,3721
|
|
11
11
|
test/integration/connectors/test_confluence.py,sha256=Ju0gRQbD2g9l9iRf2HDZKi7RyPnBGtFRWcGpsqhO3F8,3588
|
|
@@ -25,6 +25,7 @@ test/integration/connectors/test_redis.py,sha256=1aKwOb-K4zCxZwHmgW_WzGJwqLntbWT
|
|
|
25
25
|
test/integration/connectors/test_s3.py,sha256=E1dypeag_E3OIfpQWIz3jb7ctRHRD63UtyTrzyvJzpc,7473
|
|
26
26
|
test/integration/connectors/test_sharepoint.py,sha256=weGby5YD6se7R7KLEq96hxUZYPzwoqZqXXTPhtQWZsQ,7646
|
|
27
27
|
test/integration/connectors/test_vectara.py,sha256=4kKOOTGUjeZw2jKRcgVDI7ifbRPRZfjjVO4d_7H5C6I,8710
|
|
28
|
+
test/integration/connectors/test_zendesk.py,sha256=6Xsxxav9b1NBp_zd66S_sE4Nn5iO6Et4a5zgGR2-Y04,4159
|
|
28
29
|
test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
30
|
test/integration/connectors/databricks/test_volumes_native.py,sha256=KqiapQAV0s_Zv0CO8BwYoiCk30dwrSZzuigUWNRIem0,9559
|
|
30
31
|
test/integration/connectors/discord/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -49,7 +50,7 @@ test/integration/connectors/utils/constants.py,sha256=JhTk6YNw7JVpkk-Pl8zn2YYkEx
|
|
|
49
50
|
test/integration/connectors/utils/docker.py,sha256=4g1STiSbYN5qcmDTXyPxVJgwx97O6wk7n-DJ-zgzgag,4971
|
|
50
51
|
test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQQx_cBfz13ZTs1HpbaYipNU,2223
|
|
51
52
|
test/integration/connectors/utils/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
|
-
test/integration/connectors/utils/validation/destination.py,sha256=
|
|
53
|
+
test/integration/connectors/utils/validation/destination.py,sha256=HUdwpvszGOuGnKZFawGdxRNptbbJDIghyi-roQjhEs4,2697
|
|
53
54
|
test/integration/connectors/utils/validation/equality.py,sha256=R6d_1c-Si5518WJcBcshF_wBRnywnZ0ORQ-NL0xNmGo,2602
|
|
54
55
|
test/integration/connectors/utils/validation/source.py,sha256=xnAZI26ILdeMhgrWAGrU2N2fqK58YNGkfyUhJekZ0Ho,13541
|
|
55
56
|
test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_JjB4w2Sf8aQPvDVT5vZTs05ILs,1428
|
|
@@ -110,7 +111,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
110
111
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
111
112
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
112
113
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
113
|
-
unstructured_ingest/__version__.py,sha256=
|
|
114
|
+
unstructured_ingest/__version__.py,sha256=jn_Macoo3VuCWr-9TnO28WPJsWO8fYuvd5mexbdfL3c,43
|
|
114
115
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
115
116
|
unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
|
|
116
117
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -397,7 +398,7 @@ unstructured_ingest/v2/cli/utils/click.py,sha256=1_eJgrwS2DFBl1jZPLsj1vgVgR7agFB
|
|
|
397
398
|
unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=7eEIkk1KU51-ZNiIfI1KRxlwITNW1xl1YxMAG8BcTk0,7604
|
|
398
399
|
unstructured_ingest/v2/interfaces/__init__.py,sha256=Xp7-345QpM6MG7V7G4ZrVERjADAUBiPAY88PKaMRyqY,1005
|
|
399
400
|
unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
|
|
400
|
-
unstructured_ingest/v2/interfaces/downloader.py,sha256=
|
|
401
|
+
unstructured_ingest/v2/interfaces/downloader.py,sha256=Qi_wISgUACZKEPu5p1kUaG3uiCXcr3zWg9z9uRDwoOk,2927
|
|
401
402
|
unstructured_ingest/v2/interfaces/file_data.py,sha256=7MyRlj5dijQsCR6W18wQ8fEgJigGKwoOYc10g9A6PSo,3834
|
|
402
403
|
unstructured_ingest/v2/interfaces/indexer.py,sha256=i0oftyifXefxfKa4a3sCfSwkzWGSPE6EvC9sg6fwZgk,833
|
|
403
404
|
unstructured_ingest/v2/interfaces/process.py,sha256=S3A_9gkwwGC-iQxvnpj3Er6IJAjAT5npzpSgxuFAzUM,449
|
|
@@ -427,7 +428,7 @@ unstructured_ingest/v2/processes/partitioner.py,sha256=HxopDSbovLh_1epeGeVtuWEX7
|
|
|
427
428
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
428
429
|
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=rkEQVVgcHoY3jwgW_5PH_NzdXIEwtBLs9Dk4VzmTZMA,6387
|
|
429
430
|
unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
|
|
430
|
-
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=
|
|
431
|
+
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=E6fB4anCd_gtSzVUsZ5pDrfdxs5AWERQM_NEfeenfEs,18202
|
|
431
432
|
unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
|
|
432
433
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=VHCnM56qNXuHzovJihrNfJnZbWLJShOe8j12PJFrbL0,7219
|
|
433
434
|
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=wTZewdbmCHaQuEJZ7Wf0NBOo8fS_n1I0DDwlhN96woE,11243
|
|
@@ -571,9 +572,12 @@ unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-
|
|
|
571
572
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
572
573
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
573
574
|
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=UZ_s8dnVNx9BWFG2fPah4VbQbgEDF4nP78bQeU3jg08,12821
|
|
574
|
-
unstructured_ingest
|
|
575
|
-
unstructured_ingest
|
|
576
|
-
unstructured_ingest
|
|
577
|
-
unstructured_ingest-0.5.
|
|
578
|
-
unstructured_ingest-0.5.
|
|
579
|
-
unstructured_ingest-0.5.
|
|
575
|
+
unstructured_ingest/v2/processes/connectors/zendesk/__init__.py,sha256=XMNocKJ3FHDfy36p_KHhH7ALi0-ji6NhGuQNCV2E4vY,699
|
|
576
|
+
unstructured_ingest/v2/processes/connectors/zendesk/client.py,sha256=wK2x5t2h0qXSwCYgli8Zegg8bujdSrgnmiTO-bu7nN4,7297
|
|
577
|
+
unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py,sha256=97yikyb6EQ70pjU2ZXpYnJeC55vkeXaEXlawx5qS6Oo,15228
|
|
578
|
+
unstructured_ingest-0.5.11.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
579
|
+
unstructured_ingest-0.5.11.dist-info/METADATA,sha256=5HEW821YxrURJHOb7OxOa8AggarvDctXU0V8p2z1gws,8317
|
|
580
|
+
unstructured_ingest-0.5.11.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
581
|
+
unstructured_ingest-0.5.11.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
582
|
+
unstructured_ingest-0.5.11.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
583
|
+
unstructured_ingest-0.5.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.5.10.dist-info → unstructured_ingest-0.5.11.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|