unstructured-ingest 0.5.15__py3-none-any.whl → 0.5.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/test_zendesk.py +31 -53
- test/integration/connectors/utils/validation/source.py +5 -3
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/pipeline/steps/download.py +3 -3
- unstructured_ingest/v2/pipeline/steps/index.py +4 -4
- unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py +1 -1
- unstructured_ingest/v2/processes/connectors/zendesk/client.py +221 -156
- unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py +83 -274
- {unstructured_ingest-0.5.15.dist-info → unstructured_ingest-0.5.16.dist-info}/METADATA +19 -19
- {unstructured_ingest-0.5.15.dist-info → unstructured_ingest-0.5.16.dist-info}/RECORD +14 -14
- {unstructured_ingest-0.5.15.dist-info → unstructured_ingest-0.5.16.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.5.15.dist-info → unstructured_ingest-0.5.16.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.5.15.dist-info → unstructured_ingest-0.5.16.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.5.15.dist-info → unstructured_ingest-0.5.16.dist-info}/top_level.txt +0 -0
|
@@ -1,22 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import datetime
|
|
4
3
|
import hashlib
|
|
5
4
|
from dataclasses import dataclass
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
from time import time
|
|
8
|
-
from typing import Any, AsyncGenerator,
|
|
7
|
+
from typing import Any, AsyncGenerator, Literal, Union
|
|
9
8
|
|
|
10
9
|
from pydantic import BaseModel, Field, Secret
|
|
11
10
|
|
|
12
|
-
from unstructured_ingest.utils.data_prep import batch_generator
|
|
13
11
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
14
12
|
from unstructured_ingest.utils.html import HtmlMixin
|
|
15
|
-
from unstructured_ingest.v2.errors import UserAuthError
|
|
16
13
|
from unstructured_ingest.v2.interfaces import (
|
|
17
14
|
AccessConfig,
|
|
18
|
-
BatchFileData,
|
|
19
|
-
BatchItem,
|
|
20
15
|
ConnectionConfig,
|
|
21
16
|
Downloader,
|
|
22
17
|
DownloaderConfig,
|
|
@@ -36,20 +31,11 @@ CONNECTOR_TYPE = "zendesk"
|
|
|
36
31
|
|
|
37
32
|
|
|
38
33
|
class ZendeskAdditionalMetadata(BaseModel):
|
|
39
|
-
item_type:
|
|
40
|
-
|
|
41
|
-
tail_id: str # last id in the batch.
|
|
34
|
+
item_type: Literal["ticket", "article"]
|
|
35
|
+
content: Union[ZendeskTicket, ZendeskArticle]
|
|
42
36
|
|
|
43
37
|
|
|
44
|
-
class
|
|
45
|
-
"""
|
|
46
|
-
inherits metadata object as tickets and articles
|
|
47
|
-
are treated in single batch, we need to denote indices ticket/article
|
|
48
|
-
as the source metadata.
|
|
49
|
-
"""
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
class ZendeskBatchFileData(BatchFileData):
|
|
38
|
+
class ZendeskFileData(FileData):
|
|
53
39
|
additional_metadata: ZendeskAdditionalMetadata
|
|
54
40
|
|
|
55
41
|
|
|
@@ -59,48 +45,21 @@ class ZendeskAccessConfig(AccessConfig):
|
|
|
59
45
|
)
|
|
60
46
|
|
|
61
47
|
|
|
62
|
-
class ZendeskBatchItemTicket(BatchItem):
|
|
63
|
-
subject: str
|
|
64
|
-
description: str
|
|
65
|
-
item_type: str = "tickets" # placeholder for downloader
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
class ZendeskBatchItemArticle(BatchItem):
|
|
69
|
-
title: str
|
|
70
|
-
author_id: str
|
|
71
|
-
title: str
|
|
72
|
-
content: str
|
|
73
|
-
|
|
74
|
-
|
|
75
48
|
class ZendeskConnectionConfig(ConnectionConfig):
|
|
76
49
|
subdomain: str = Field(description="Subdomain for zendesk site, <sub-domain>.company.com")
|
|
77
50
|
email: str = Field(description="Email for zendesk site registered at the subdomain")
|
|
78
51
|
access_config: Secret[ZendeskAccessConfig]
|
|
79
52
|
|
|
80
|
-
async def get_client_async(self) -> ZendeskClient:
|
|
81
|
-
"""Provides an async manager for ZendeskClient."""
|
|
82
|
-
access_config = self.access_config.get_secret_value()
|
|
83
|
-
|
|
84
|
-
client = ZendeskClient(
|
|
85
|
-
email=self.email, subdomain=self.subdomain, token=access_config.api_token
|
|
86
|
-
)
|
|
87
|
-
return client
|
|
88
|
-
|
|
89
53
|
def get_client(self) -> ZendeskClient:
|
|
90
54
|
|
|
91
55
|
access_config = self.access_config.get_secret_value()
|
|
92
56
|
|
|
93
|
-
|
|
57
|
+
return ZendeskClient(
|
|
94
58
|
email=self.email, subdomain=self.subdomain, token=access_config.api_token
|
|
95
59
|
)
|
|
96
|
-
return client
|
|
97
60
|
|
|
98
61
|
|
|
99
62
|
class ZendeskIndexerConfig(IndexerConfig):
|
|
100
|
-
batch_size: int = Field(
|
|
101
|
-
default=2,
|
|
102
|
-
description="Number of tickets or articles.",
|
|
103
|
-
)
|
|
104
63
|
item_type: Literal["tickets", "articles", "all"] = Field(
|
|
105
64
|
default="tickets",
|
|
106
65
|
description="Type of item from zendesk to parse, can only be `tickets` or `articles`.",
|
|
@@ -115,142 +74,76 @@ class ZendeskIndexer(Indexer):
|
|
|
115
74
|
|
|
116
75
|
def precheck(self) -> None:
|
|
117
76
|
"""Validates connection to Zendesk API."""
|
|
118
|
-
|
|
119
|
-
client = self.connection_config.get_client()
|
|
120
|
-
if not client.get_users():
|
|
121
|
-
subdomain_endpoint = f"{self.connection_config.subdomain}.zendesk.com"
|
|
122
|
-
raise UserAuthError(f"Users do not exist in subdomain {subdomain_endpoint}")
|
|
123
|
-
except UserAuthError as e:
|
|
124
|
-
logger.error(f"Source connection error: {e}", exc_info=True)
|
|
125
|
-
raise
|
|
126
|
-
except Exception as e:
|
|
127
|
-
logger.error(f"Failed to validate connection to Zendesk: {e}", exc_info=True)
|
|
128
|
-
raise UserAuthError(f"Failed to validate connection: {e}")
|
|
77
|
+
self.connection_config.get_client()
|
|
129
78
|
|
|
130
79
|
def is_async(self) -> bool:
|
|
131
80
|
return True
|
|
132
81
|
|
|
133
|
-
async def _list_articles_async(self) -> List[ZendeskArticle]:
|
|
134
|
-
client = await self.connection_config.get_client_async()
|
|
135
|
-
return await client.get_articles_async()
|
|
136
|
-
|
|
137
|
-
async def _list_tickets_async(self) -> List[ZendeskTicket]:
|
|
138
|
-
client = await self.connection_config.get_client_async()
|
|
139
|
-
return await client.get_tickets_async()
|
|
140
|
-
|
|
141
82
|
def _generate_fullpath(self, identifier: str) -> Path:
|
|
142
83
|
return Path(hashlib.sha256(identifier.encode("utf-8")).hexdigest()[:16] + ".txt")
|
|
143
84
|
|
|
144
|
-
async def
|
|
145
|
-
self
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
for article_batch in batch_generator(articles, batch_size=batch_size):
|
|
149
|
-
|
|
150
|
-
article_batch = sorted(article_batch)
|
|
151
|
-
|
|
152
|
-
additional_metadata = ZendeskAdditionalMetadata(
|
|
153
|
-
item_type="articles",
|
|
154
|
-
leading_id=str(article_batch[0].id),
|
|
155
|
-
tail_id=str(article_batch[-1].id),
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
metadata = ZendeskFileDataSourceMetadata(
|
|
159
|
-
date_processed=str(time()),
|
|
160
|
-
record_locator={
|
|
161
|
-
"id": str(article_batch[0].id),
|
|
162
|
-
"item_type": "articles",
|
|
163
|
-
},
|
|
164
|
-
)
|
|
165
|
-
|
|
166
|
-
batch_items: List[ZendeskBatchItemArticle] = [
|
|
167
|
-
ZendeskBatchItemArticle(
|
|
168
|
-
identifier=str(article.id),
|
|
169
|
-
author_id=str(article.author_id),
|
|
170
|
-
title=str(article.title),
|
|
171
|
-
content=str(article.content),
|
|
172
|
-
)
|
|
173
|
-
for article in article_batch
|
|
174
|
-
]
|
|
175
|
-
|
|
176
|
-
full_path = self._generate_fullpath(str(article_batch[0].id))
|
|
177
|
-
full_path = Path(str(full_path).replace(".txt", ".html"))
|
|
178
|
-
|
|
179
|
-
source_identifiers = SourceIdentifiers(filename=full_path.name, fullpath=str(full_path))
|
|
180
|
-
|
|
181
|
-
batched_file_data = ZendeskBatchFileData(
|
|
182
|
-
identifier=str(article_batch[0].id),
|
|
183
|
-
connector_type=self.connector_type,
|
|
184
|
-
metadata=metadata,
|
|
185
|
-
batch_items=batch_items,
|
|
186
|
-
additional_metadata=additional_metadata,
|
|
187
|
-
source_identifiers=source_identifiers,
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
yield batched_file_data
|
|
191
|
-
|
|
192
|
-
async def handle_tickets_async(
|
|
193
|
-
self, tickets: List[ZendeskTicket], batch_size: int
|
|
194
|
-
) -> AsyncGenerator[ZendeskBatchFileData, None]:
|
|
195
|
-
"""Parses tickets from a list and yields FileData objects asynchronously in batches."""
|
|
196
|
-
for ticket_batch in batch_generator(tickets, batch_size=batch_size):
|
|
197
|
-
|
|
198
|
-
sorted_batch = sorted(ticket_batch)
|
|
199
|
-
|
|
200
|
-
additional_metadata = ZendeskAdditionalMetadata(
|
|
201
|
-
item_type="tickets",
|
|
202
|
-
leading_id=str(sorted_batch[0].id),
|
|
203
|
-
tail_id=str(sorted_batch[-1].id),
|
|
204
|
-
)
|
|
205
|
-
|
|
206
|
-
metadata = ZendeskFileDataSourceMetadata(
|
|
207
|
-
date_processed=str(time()),
|
|
208
|
-
record_locator={
|
|
209
|
-
"id": str(sorted_batch[0].id),
|
|
210
|
-
"item_type": "tickets",
|
|
211
|
-
},
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
batch_items: List[ZendeskBatchItemTicket] = [
|
|
215
|
-
ZendeskBatchItemTicket(
|
|
85
|
+
async def get_tickets(self) -> AsyncGenerator[ZendeskFileData, None]:
|
|
86
|
+
async with self.connection_config.get_client() as client:
|
|
87
|
+
async for ticket in client.get_tickets():
|
|
88
|
+
yield ZendeskFileData(
|
|
216
89
|
identifier=str(ticket.id),
|
|
217
|
-
|
|
218
|
-
|
|
90
|
+
connector_type=self.connector_type,
|
|
91
|
+
source_identifiers=SourceIdentifiers(
|
|
92
|
+
filename=f"{ticket.id}.txt", fullpath=f"tickets/{ticket.id}.txt"
|
|
93
|
+
),
|
|
94
|
+
additional_metadata=ZendeskAdditionalMetadata(
|
|
95
|
+
item_type="ticket", content=ticket
|
|
96
|
+
),
|
|
97
|
+
metadata=FileDataSourceMetadata(
|
|
98
|
+
url=str(ticket.url) if ticket.url else None,
|
|
99
|
+
date_created=ticket.created_at.isoformat() if ticket.created_at else None,
|
|
100
|
+
date_modified=ticket.updated_at.isoformat() if ticket.updated_at else None,
|
|
101
|
+
date_processed=str(time()),
|
|
102
|
+
),
|
|
219
103
|
)
|
|
220
|
-
for ticket in sorted_batch
|
|
221
|
-
]
|
|
222
|
-
|
|
223
|
-
full_path = self._generate_fullpath(str(sorted_batch[0].id))
|
|
224
|
-
source_identifiers = SourceIdentifiers(filename=full_path.name, fullpath=str(full_path))
|
|
225
|
-
|
|
226
|
-
batched_file_data = ZendeskBatchFileData(
|
|
227
|
-
connector_type=self.connector_type,
|
|
228
|
-
metadata=metadata,
|
|
229
|
-
batch_items=batch_items,
|
|
230
|
-
additional_metadata=additional_metadata,
|
|
231
|
-
source_identifiers=source_identifiers,
|
|
232
|
-
)
|
|
233
104
|
|
|
234
|
-
|
|
105
|
+
async def get_articles(self) -> AsyncGenerator[ZendeskFileData, None]:
|
|
106
|
+
async with self.connection_config.get_client() as client:
|
|
107
|
+
async for article in client.get_articles():
|
|
108
|
+
yield ZendeskFileData(
|
|
109
|
+
identifier=str(article.id),
|
|
110
|
+
connector_type=self.connector_type,
|
|
111
|
+
source_identifiers=SourceIdentifiers(
|
|
112
|
+
filename=f"{article.id}.html", fullpath=f"articles/{article.id}.html"
|
|
113
|
+
),
|
|
114
|
+
additional_metadata=ZendeskAdditionalMetadata(
|
|
115
|
+
item_type="article", content=article
|
|
116
|
+
),
|
|
117
|
+
metadata=FileDataSourceMetadata(
|
|
118
|
+
url=str(article.url) if article.url else None,
|
|
119
|
+
date_created=article.created_at.isoformat() if article.created_at else None,
|
|
120
|
+
date_modified=(
|
|
121
|
+
article.updated_at.isoformat() if article.updated_at else None
|
|
122
|
+
),
|
|
123
|
+
date_processed=str(time()),
|
|
124
|
+
),
|
|
125
|
+
)
|
|
235
126
|
|
|
236
|
-
async def run_async(self, **kwargs: Any) -> AsyncGenerator[
|
|
127
|
+
async def run_async(self, **kwargs: Any) -> AsyncGenerator[ZendeskFileData, None]:
|
|
237
128
|
"""Determines item type and processes accordingly asynchronously."""
|
|
238
129
|
item_type = self.index_config.item_type
|
|
239
|
-
batch_size = self.index_config.batch_size
|
|
240
130
|
|
|
241
131
|
if item_type == "articles":
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
articles, batch_size
|
|
245
|
-
): # Using async version
|
|
246
|
-
yield file_data
|
|
132
|
+
async for article_file_data in self.get_articles():
|
|
133
|
+
yield article_file_data
|
|
247
134
|
|
|
248
135
|
elif item_type == "tickets":
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
136
|
+
async for ticket_file_data in self.get_tickets():
|
|
137
|
+
yield ticket_file_data
|
|
138
|
+
|
|
139
|
+
elif item_type == "all":
|
|
140
|
+
async for article_file_data in self.get_articles():
|
|
141
|
+
yield article_file_data
|
|
142
|
+
async for ticket_file_data in self.get_tickets():
|
|
143
|
+
yield ticket_file_data
|
|
144
|
+
|
|
145
|
+
else:
|
|
146
|
+
raise ValueError(f"Item type {item_type} is not supported by the indexer")
|
|
254
147
|
|
|
255
148
|
|
|
256
149
|
class ZendeskDownloaderConfig(DownloaderConfig, HtmlMixin):
|
|
@@ -289,130 +182,46 @@ class ZendeskDownloader(Downloader):
|
|
|
289
182
|
session=session,
|
|
290
183
|
)
|
|
291
184
|
|
|
292
|
-
@requires_dependencies(["
|
|
293
|
-
async def
|
|
294
|
-
self, client: ZendeskClient, batch_file_data: ZendeskBatchFileData
|
|
295
|
-
):
|
|
296
|
-
"""
|
|
297
|
-
Processes the article information, downloads the attachments for each article,
|
|
298
|
-
and updates the content accordingly.
|
|
299
|
-
"""
|
|
185
|
+
@requires_dependencies(["aiofiles", "bs4"], extras="zendesk")
|
|
186
|
+
async def download_article(self, article: ZendeskArticle, download_path: Path) -> None:
|
|
300
187
|
import aiofiles
|
|
301
188
|
import bs4
|
|
302
189
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
if download_path is None:
|
|
307
|
-
raise ValueError("Download path could not be determined")
|
|
308
|
-
|
|
309
|
-
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
310
|
-
|
|
311
|
-
async with aiofiles.open(download_path, "a", encoding="utf8") as f:
|
|
312
|
-
for article in batch_file_data.batch_items:
|
|
313
|
-
html_data_str = article.content
|
|
314
|
-
soup = bs4.BeautifulSoup(html_data_str, "html.parser")
|
|
315
|
-
|
|
316
|
-
if self.download_config.extract_images:
|
|
317
|
-
# Get article attachments asynchronously
|
|
318
|
-
image_data_decoded: List = await client.get_article_attachments_async(
|
|
319
|
-
article_id=article.identifier
|
|
320
|
-
)
|
|
321
|
-
img_tags = soup.find_all("img")
|
|
322
|
-
|
|
323
|
-
# Ensure we don't exceed the available images
|
|
324
|
-
for img_tag, img_data in zip(img_tags, image_data_decoded):
|
|
325
|
-
img_tag["src"] = img_data.get("encoded_content", "")
|
|
326
|
-
|
|
190
|
+
article_html = article.as_html()
|
|
191
|
+
soup = bs4.BeautifulSoup(article_html, "html.parser")
|
|
192
|
+
async with aiofiles.open(download_path, "w", encoding="utf8") as f:
|
|
327
193
|
await f.write(soup.prettify())
|
|
328
194
|
|
|
329
|
-
return super().generate_download_response(
|
|
330
|
-
file_data=batch_file_data, download_path=download_path
|
|
331
|
-
)
|
|
332
|
-
|
|
333
195
|
@requires_dependencies(["aiofiles"], extras="zendesk")
|
|
334
|
-
async def
|
|
335
|
-
self, client: ZendeskClient, batch_file_data: ZendeskBatchFileData
|
|
336
|
-
) -> DownloadResponse:
|
|
337
|
-
"""
|
|
338
|
-
Processes a batch of tickets asynchronously, writing their details and comments to a file.
|
|
339
|
-
"""
|
|
196
|
+
async def download_ticket(self, ticket: ZendeskTicket, download_path: Path) -> None:
|
|
340
197
|
import aiofiles
|
|
341
198
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
348
|
-
|
|
349
|
-
# Process each ticket in the batch
|
|
350
|
-
async with aiofiles.open(download_path, "a", encoding="utf8") as f:
|
|
351
|
-
for batch_item in batch_file_data.batch_items:
|
|
352
|
-
ticket_identifier = batch_item.identifier
|
|
353
|
-
first_date = None
|
|
354
|
-
comments: List[dict] = []
|
|
355
|
-
|
|
356
|
-
# Fetch comments asynchronously
|
|
357
|
-
comments_list = await client.get_comments_async(ticket_id=int(ticket_identifier))
|
|
358
|
-
|
|
359
|
-
for comment in comments_list: # Iterate over the resolved list
|
|
360
|
-
date_created = (
|
|
361
|
-
comment.metadata["created_at"].isoformat()
|
|
362
|
-
if isinstance(comment.metadata["created_at"], datetime.datetime)
|
|
363
|
-
else str(comment.metadata["created_at"])
|
|
364
|
-
)
|
|
365
|
-
|
|
366
|
-
if first_date is None:
|
|
367
|
-
first_date = date_created
|
|
368
|
-
|
|
369
|
-
comments.append(
|
|
370
|
-
{
|
|
371
|
-
"comment_id": comment.id,
|
|
372
|
-
"author_id": comment.author_id,
|
|
373
|
-
"body": comment.body,
|
|
374
|
-
"date_created": date_created,
|
|
375
|
-
}
|
|
376
|
-
)
|
|
377
|
-
|
|
378
|
-
# Write ticket details to file
|
|
379
|
-
content = (
|
|
380
|
-
"\nticket\n"
|
|
381
|
-
f"{batch_item.identifier}\n"
|
|
382
|
-
f"{batch_file_data.metadata.record_locator.get('subject', '')}\n"
|
|
383
|
-
f"{batch_file_data.metadata.record_locator.get('description', '')}\n"
|
|
384
|
-
f"{first_date}\n"
|
|
385
|
-
)
|
|
386
|
-
|
|
387
|
-
# Append comments
|
|
199
|
+
async with aiofiles.open(download_path, "w", encoding="utf8") as f:
|
|
200
|
+
await f.write(ticket.as_text())
|
|
201
|
+
async with self.connection_config.get_client() as client:
|
|
202
|
+
comments = [comment async for comment in client.get_comments(ticket_id=ticket.id)]
|
|
388
203
|
for comment in comments:
|
|
389
|
-
|
|
390
|
-
"comment\n"
|
|
391
|
-
f"{comment.get('comment_id', '')}\n"
|
|
392
|
-
f"{comment.get('author_id', '')}\n"
|
|
393
|
-
f"{comment.get('body', '')}\n"
|
|
394
|
-
f"{comment.get('date_created', '')}\n"
|
|
395
|
-
)
|
|
396
|
-
|
|
397
|
-
await f.write(content)
|
|
204
|
+
await f.write(comment.as_text())
|
|
398
205
|
|
|
399
|
-
|
|
400
|
-
file_data=batch_file_data, download_path=download_path
|
|
401
|
-
)
|
|
206
|
+
async def run_async(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
|
|
402
207
|
|
|
403
|
-
|
|
208
|
+
zendesk_filedata = ZendeskFileData.cast(file_data=file_data)
|
|
404
209
|
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
item_type = zendesk_filedata.metadata.record_locator["item_type"]
|
|
210
|
+
item_type = zendesk_filedata.additional_metadata.item_type
|
|
211
|
+
download_path = self.get_download_path(file_data=zendesk_filedata)
|
|
212
|
+
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
409
213
|
|
|
410
|
-
if item_type == "
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
214
|
+
if item_type == "article":
|
|
215
|
+
article = ZendeskArticle.model_validate(zendesk_filedata.additional_metadata.content)
|
|
216
|
+
await self.download_article(article=article, download_path=download_path)
|
|
217
|
+
elif item_type == "ticket":
|
|
218
|
+
ticket = ZendeskTicket.model_validate(zendesk_filedata.additional_metadata.content)
|
|
219
|
+
await self.download_ticket(ticket=ticket, download_path=download_path)
|
|
414
220
|
else:
|
|
415
221
|
raise RuntimeError(f"Item type {item_type} cannot be handled by the downloader")
|
|
222
|
+
return super().generate_download_response(
|
|
223
|
+
file_data=zendesk_filedata, download_path=download_path
|
|
224
|
+
)
|
|
416
225
|
|
|
417
226
|
|
|
418
227
|
# create entry
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.16
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -23,12 +23,12 @@ Requires-Python: >=3.9.0,<3.14
|
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
25
|
Requires-Dist: pandas
|
|
26
|
+
Requires-Dist: opentelemetry-sdk
|
|
26
27
|
Requires-Dist: tqdm
|
|
28
|
+
Requires-Dist: dataclasses_json
|
|
27
29
|
Requires-Dist: pydantic>=2.7
|
|
28
30
|
Requires-Dist: python-dateutil
|
|
29
31
|
Requires-Dist: click
|
|
30
|
-
Requires-Dist: opentelemetry-sdk
|
|
31
|
-
Requires-Dist: dataclasses_json
|
|
32
32
|
Provides-Extra: remote
|
|
33
33
|
Requires-Dist: unstructured-client>=0.30.0; extra == "remote"
|
|
34
34
|
Provides-Extra: csv
|
|
@@ -71,11 +71,11 @@ Requires-Dist: fsspec; extra == "azure"
|
|
|
71
71
|
Provides-Extra: azure-ai-search
|
|
72
72
|
Requires-Dist: azure-search-documents; extra == "azure-ai-search"
|
|
73
73
|
Provides-Extra: biomed
|
|
74
|
-
Requires-Dist: bs4; extra == "biomed"
|
|
75
74
|
Requires-Dist: requests; extra == "biomed"
|
|
75
|
+
Requires-Dist: bs4; extra == "biomed"
|
|
76
76
|
Provides-Extra: box
|
|
77
|
-
Requires-Dist: fsspec; extra == "box"
|
|
78
77
|
Requires-Dist: boxfs; extra == "box"
|
|
78
|
+
Requires-Dist: fsspec; extra == "box"
|
|
79
79
|
Provides-Extra: chroma
|
|
80
80
|
Requires-Dist: chromadb; extra == "chroma"
|
|
81
81
|
Provides-Extra: clarifai
|
|
@@ -91,19 +91,19 @@ Requires-Dist: boto3; extra == "delta-table"
|
|
|
91
91
|
Provides-Extra: discord
|
|
92
92
|
Requires-Dist: discord.py; extra == "discord"
|
|
93
93
|
Provides-Extra: dropbox
|
|
94
|
-
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
95
94
|
Requires-Dist: fsspec; extra == "dropbox"
|
|
95
|
+
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
96
96
|
Provides-Extra: duckdb
|
|
97
97
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
98
98
|
Provides-Extra: elasticsearch
|
|
99
99
|
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
100
100
|
Provides-Extra: gcs
|
|
101
|
-
Requires-Dist: gcsfs; extra == "gcs"
|
|
102
|
-
Requires-Dist: bs4; extra == "gcs"
|
|
103
101
|
Requires-Dist: fsspec; extra == "gcs"
|
|
102
|
+
Requires-Dist: bs4; extra == "gcs"
|
|
103
|
+
Requires-Dist: gcsfs; extra == "gcs"
|
|
104
104
|
Provides-Extra: github
|
|
105
|
-
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
106
105
|
Requires-Dist: requests; extra == "github"
|
|
106
|
+
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
107
107
|
Provides-Extra: gitlab
|
|
108
108
|
Requires-Dist: python-gitlab; extra == "gitlab"
|
|
109
109
|
Provides-Extra: google-drive
|
|
@@ -124,8 +124,8 @@ Requires-Dist: pymilvus; extra == "milvus"
|
|
|
124
124
|
Provides-Extra: mongodb
|
|
125
125
|
Requires-Dist: pymongo; extra == "mongodb"
|
|
126
126
|
Provides-Extra: neo4j
|
|
127
|
-
Requires-Dist: cymple; extra == "neo4j"
|
|
128
127
|
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
|
|
128
|
+
Requires-Dist: cymple; extra == "neo4j"
|
|
129
129
|
Requires-Dist: networkx; extra == "neo4j"
|
|
130
130
|
Provides-Extra: notion
|
|
131
131
|
Requires-Dist: httpx; extra == "notion"
|
|
@@ -133,14 +133,14 @@ Requires-Dist: htmlBuilder; extra == "notion"
|
|
|
133
133
|
Requires-Dist: notion-client; extra == "notion"
|
|
134
134
|
Requires-Dist: backoff; extra == "notion"
|
|
135
135
|
Provides-Extra: onedrive
|
|
136
|
+
Requires-Dist: msal; extra == "onedrive"
|
|
136
137
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
137
138
|
Requires-Dist: bs4; extra == "onedrive"
|
|
138
|
-
Requires-Dist: msal; extra == "onedrive"
|
|
139
139
|
Provides-Extra: opensearch
|
|
140
140
|
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
141
141
|
Provides-Extra: outlook
|
|
142
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
143
142
|
Requires-Dist: msal; extra == "outlook"
|
|
143
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
144
144
|
Provides-Extra: pinecone
|
|
145
145
|
Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
|
|
146
146
|
Provides-Extra: postgres
|
|
@@ -155,13 +155,13 @@ Provides-Extra: s3
|
|
|
155
155
|
Requires-Dist: s3fs; extra == "s3"
|
|
156
156
|
Requires-Dist: fsspec; extra == "s3"
|
|
157
157
|
Provides-Extra: sharepoint
|
|
158
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
159
158
|
Requires-Dist: msal; extra == "sharepoint"
|
|
159
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
160
160
|
Provides-Extra: salesforce
|
|
161
161
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
162
162
|
Provides-Extra: sftp
|
|
163
|
-
Requires-Dist: paramiko; extra == "sftp"
|
|
164
163
|
Requires-Dist: fsspec; extra == "sftp"
|
|
164
|
+
Requires-Dist: paramiko; extra == "sftp"
|
|
165
165
|
Provides-Extra: slack
|
|
166
166
|
Requires-Dist: slack_sdk[optional]; extra == "slack"
|
|
167
167
|
Provides-Extra: snowflake
|
|
@@ -178,12 +178,12 @@ Requires-Dist: databricks-sql-connector; extra == "databricks-delta-tables"
|
|
|
178
178
|
Provides-Extra: singlestore
|
|
179
179
|
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
180
180
|
Provides-Extra: vectara
|
|
181
|
-
Requires-Dist: httpx; extra == "vectara"
|
|
182
|
-
Requires-Dist: requests; extra == "vectara"
|
|
183
181
|
Requires-Dist: aiofiles; extra == "vectara"
|
|
182
|
+
Requires-Dist: requests; extra == "vectara"
|
|
183
|
+
Requires-Dist: httpx; extra == "vectara"
|
|
184
184
|
Provides-Extra: vastdb
|
|
185
|
-
Requires-Dist: vastdb; extra == "vastdb"
|
|
186
185
|
Requires-Dist: ibis; extra == "vastdb"
|
|
186
|
+
Requires-Dist: vastdb; extra == "vastdb"
|
|
187
187
|
Requires-Dist: pyarrow; extra == "vastdb"
|
|
188
188
|
Provides-Extra: zendesk
|
|
189
189
|
Requires-Dist: httpx; extra == "zendesk"
|
|
@@ -192,8 +192,8 @@ Requires-Dist: aiofiles; extra == "zendesk"
|
|
|
192
192
|
Provides-Extra: embed-huggingface
|
|
193
193
|
Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
194
194
|
Provides-Extra: embed-octoai
|
|
195
|
-
Requires-Dist: openai; extra == "embed-octoai"
|
|
196
195
|
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
196
|
+
Requires-Dist: openai; extra == "embed-octoai"
|
|
197
197
|
Provides-Extra: embed-vertexai
|
|
198
198
|
Requires-Dist: vertexai; extra == "embed-vertexai"
|
|
199
199
|
Provides-Extra: embed-voyageai
|
|
@@ -201,8 +201,8 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
|
201
201
|
Provides-Extra: embed-mixedbreadai
|
|
202
202
|
Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
|
|
203
203
|
Provides-Extra: openai
|
|
204
|
-
Requires-Dist: openai; extra == "openai"
|
|
205
204
|
Requires-Dist: tiktoken; extra == "openai"
|
|
205
|
+
Requires-Dist: openai; extra == "openai"
|
|
206
206
|
Provides-Extra: bedrock
|
|
207
207
|
Requires-Dist: aioboto3; extra == "bedrock"
|
|
208
208
|
Requires-Dist: boto3; extra == "bedrock"
|
|
@@ -25,7 +25,7 @@ test/integration/connectors/test_redis.py,sha256=1aKwOb-K4zCxZwHmgW_WzGJwqLntbWT
|
|
|
25
25
|
test/integration/connectors/test_s3.py,sha256=E1dypeag_E3OIfpQWIz3jb7ctRHRD63UtyTrzyvJzpc,7473
|
|
26
26
|
test/integration/connectors/test_sharepoint.py,sha256=weGby5YD6se7R7KLEq96hxUZYPzwoqZqXXTPhtQWZsQ,7646
|
|
27
27
|
test/integration/connectors/test_vectara.py,sha256=4kKOOTGUjeZw2jKRcgVDI7ifbRPRZfjjVO4d_7H5C6I,8710
|
|
28
|
-
test/integration/connectors/test_zendesk.py,sha256=
|
|
28
|
+
test/integration/connectors/test_zendesk.py,sha256=nMBVNlEQr1uvmI1fzUC1bmoa2doXnYp5n4bMJS2FN-o,3727
|
|
29
29
|
test/integration/connectors/databricks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
30
|
test/integration/connectors/databricks/test_volumes_native.py,sha256=KqiapQAV0s_Zv0CO8BwYoiCk30dwrSZzuigUWNRIem0,9559
|
|
31
31
|
test/integration/connectors/discord/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -52,7 +52,7 @@ test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQ
|
|
|
52
52
|
test/integration/connectors/utils/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
53
|
test/integration/connectors/utils/validation/destination.py,sha256=HUdwpvszGOuGnKZFawGdxRNptbbJDIghyi-roQjhEs4,2697
|
|
54
54
|
test/integration/connectors/utils/validation/equality.py,sha256=R6d_1c-Si5518WJcBcshF_wBRnywnZ0ORQ-NL0xNmGo,2602
|
|
55
|
-
test/integration/connectors/utils/validation/source.py,sha256=
|
|
55
|
+
test/integration/connectors/utils/validation/source.py,sha256=tIZHrLONlU6nfrTesC_tymSxYOkZyIyM4UuTtvqyjr8,13652
|
|
56
56
|
test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_JjB4w2Sf8aQPvDVT5vZTs05ILs,1428
|
|
57
57
|
test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
58
|
test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
|
|
@@ -111,7 +111,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
111
111
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
112
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
113
113
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
114
|
-
unstructured_ingest/__version__.py,sha256=
|
|
114
|
+
unstructured_ingest/__version__.py,sha256=EgX3pL6NG5u1RONYNW1ysr-stCliU2U7MUb-vn-absY,43
|
|
115
115
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
116
116
|
unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
|
|
117
117
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -411,10 +411,10 @@ unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5
|
|
|
411
411
|
unstructured_ingest/v2/pipeline/pipeline.py,sha256=m3m9F9wZsCEhsFK_0WZv5_ENl2M42VHBV6Vc39t90v8,16842
|
|
412
412
|
unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
413
413
|
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=s2BY2v1cs_ImTsPrg8J-92k-fV73b61nDiSy4p9736k,3223
|
|
414
|
-
unstructured_ingest/v2/pipeline/steps/download.py,sha256=
|
|
414
|
+
unstructured_ingest/v2/pipeline/steps/download.py,sha256=umfKzObfbhQe3iFLWlLW3T9zB-Uj1FGOE-OkQ0MkGmY,8260
|
|
415
415
|
unstructured_ingest/v2/pipeline/steps/embed.py,sha256=HPQgEWvVrpThUD1FB9k7XNiARXkd6rb4lnpxTGmEQxI,3201
|
|
416
416
|
unstructured_ingest/v2/pipeline/steps/filter.py,sha256=pju7knTSbB2ll1jC9DPePRDnHlOlvEcU1-sjk6xYGGc,1211
|
|
417
|
-
unstructured_ingest/v2/pipeline/steps/index.py,sha256=
|
|
417
|
+
unstructured_ingest/v2/pipeline/steps/index.py,sha256=JrPIwMV3S-t2pPwJ00OfOoULgYzvMY1Q-HFgXXpP4H4,3563
|
|
418
418
|
unstructured_ingest/v2/pipeline/steps/partition.py,sha256=yE4HFFyORhnzH25PoJG6MNquMXqpzAznyf9NoZYBV5E,3284
|
|
419
419
|
unstructured_ingest/v2/pipeline/steps/stage.py,sha256=VR8SLUJdVva61aieVKyxUHzupTCQbQeaMA0CKu4Fx7o,2347
|
|
420
420
|
unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=p2nPFGbcpivPAZO5jDogTfn0iaL5bCFsgBNMejxVbzE,1768
|
|
@@ -460,7 +460,7 @@ unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=h6q
|
|
|
460
460
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=gjICJJwhDHBLt_L-LrMlvJ3DL1DYtwFpyMLb_zYvOIg,3755
|
|
461
461
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=Uss3XPPaq1AsqJOEy4RJgBJw2-bTjrXH2PgtVNYd2w0,3006
|
|
462
462
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=g1qYnIrML4TjN7rmC0MGrD5JzAprb6SymBHlEdOumz0,3113
|
|
463
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=
|
|
463
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py,sha256=FZhjrMYBr_je6mWYp7MUUvyKR9YwGD2HiNljeT7U5ws,5044
|
|
464
464
|
unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
|
|
465
465
|
unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=IHaY1mWuidt6GDEJhB1c_orwmjeyXuRCVJ88djYDciM,2793
|
|
466
466
|
unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=oUHHaLpO2pWW2Lu4Mc-XFjrA0ze97205WQ_xP95ua4M,4296
|
|
@@ -573,13 +573,13 @@ unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8Stu
|
|
|
573
573
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
574
574
|
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=UZ_s8dnVNx9BWFG2fPah4VbQbgEDF4nP78bQeU3jg08,12821
|
|
575
575
|
unstructured_ingest/v2/processes/connectors/zendesk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
576
|
-
unstructured_ingest/v2/processes/connectors/zendesk/client.py,sha256=
|
|
577
|
-
unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py,sha256=
|
|
576
|
+
unstructured_ingest/v2/processes/connectors/zendesk/client.py,sha256=DDAYQB7catKfyGKxB5sfTwbOxrDj_NfWxrN372vA5Gc,11955
|
|
577
|
+
unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py,sha256=R8SXYkRhVUoWEHdGCt2CzcTxxuFundw_0GlGZ34YmbM,8987
|
|
578
578
|
unstructured_ingest/v2/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
579
579
|
unstructured_ingest/v2/processes/utils/blob_storage.py,sha256=EWvK4HRYubr9i1UyMhv5cU9u0UzVkCDC_BIm4Uxab7Y,964
|
|
580
|
-
unstructured_ingest-0.5.
|
|
581
|
-
unstructured_ingest-0.5.
|
|
582
|
-
unstructured_ingest-0.5.
|
|
583
|
-
unstructured_ingest-0.5.
|
|
584
|
-
unstructured_ingest-0.5.
|
|
585
|
-
unstructured_ingest-0.5.
|
|
580
|
+
unstructured_ingest-0.5.16.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
581
|
+
unstructured_ingest-0.5.16.dist-info/METADATA,sha256=Sm1qizGZbPHlXmzpcPhDuIsO-uWO-mrpfQZhovwhTQI,8465
|
|
582
|
+
unstructured_ingest-0.5.16.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
583
|
+
unstructured_ingest-0.5.16.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
584
|
+
unstructured_ingest-0.5.16.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
585
|
+
unstructured_ingest-0.5.16.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.5.15.dist-info → unstructured_ingest-0.5.16.dist-info}/entry_points.txt
RENAMED
|
File without changes
|