unstructured-ingest 0.5.9__py3-none-any.whl → 0.5.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (28) hide show
  1. test/integration/connectors/test_astradb.py +21 -0
  2. test/integration/connectors/test_dropbox.py +151 -0
  3. test/integration/connectors/test_jira.py +67 -0
  4. test/integration/connectors/test_zendesk.py +142 -0
  5. test/integration/connectors/utils/validation/destination.py +2 -1
  6. test/unit/test_utils.py +27 -0
  7. test/unit/v2/connectors/test_jira.py +401 -0
  8. unstructured_ingest/__version__.py +1 -1
  9. unstructured_ingest/embed/openai.py +4 -3
  10. unstructured_ingest/utils/string_and_date_utils.py +25 -0
  11. unstructured_ingest/v2/interfaces/downloader.py +2 -3
  12. unstructured_ingest/v2/processes/connectors/__init__.py +4 -0
  13. unstructured_ingest/v2/processes/connectors/astradb.py +36 -28
  14. unstructured_ingest/v2/processes/connectors/confluence.py +2 -2
  15. unstructured_ingest/v2/processes/connectors/delta_table.py +2 -0
  16. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +78 -15
  17. unstructured_ingest/v2/processes/connectors/jira.py +453 -0
  18. unstructured_ingest/v2/processes/connectors/zendesk/__init__.py +31 -0
  19. unstructured_ingest/v2/processes/connectors/zendesk/client.py +225 -0
  20. unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py +419 -0
  21. unstructured_ingest/v2/processes/partitioner.py +2 -5
  22. unstructured_ingest/v2/unstructured_api.py +7 -0
  23. {unstructured_ingest-0.5.9.dist-info → unstructured_ingest-0.5.11.dist-info}/METADATA +26 -26
  24. {unstructured_ingest-0.5.9.dist-info → unstructured_ingest-0.5.11.dist-info}/RECORD +28 -20
  25. {unstructured_ingest-0.5.9.dist-info → unstructured_ingest-0.5.11.dist-info}/LICENSE.md +0 -0
  26. {unstructured_ingest-0.5.9.dist-info → unstructured_ingest-0.5.11.dist-info}/WHEEL +0 -0
  27. {unstructured_ingest-0.5.9.dist-info → unstructured_ingest-0.5.11.dist-info}/entry_points.txt +0 -0
  28. {unstructured_ingest-0.5.9.dist-info → unstructured_ingest-0.5.11.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
- from contextlib import contextmanager
4
3
  from dataclasses import dataclass, field
5
4
  from time import time
6
- from typing import TYPE_CHECKING, Generator, Optional
5
+ from typing import TYPE_CHECKING, Any, Optional
7
6
 
8
7
  from pydantic import Field, Secret
9
8
 
@@ -34,7 +33,7 @@ from unstructured_ingest.v2.processes.connectors.fsspec.fsspec import (
34
33
  )
35
34
 
36
35
  if TYPE_CHECKING:
37
- from dropboxdrivefs import DropboxDriveFileSystem
36
+ pass
38
37
 
39
38
  CONNECTOR_TYPE = "dropbox"
40
39
 
@@ -46,32 +45,95 @@ class DropboxIndexerConfig(FsspecIndexerConfig):
46
45
 
47
46
 
48
47
  class DropboxAccessConfig(FsspecAccessConfig):
49
- token: Optional[str] = Field(default=None, description="Dropbox access token.")
48
+ token: Optional[str] = Field(
49
+ default=None, description="Dropbox access token."
50
+ ) # This is the short lived (4h) token that needs to be generated anew each time.
51
+ app_key: Optional[str] = Field(default=None, description="Dropbox app key.")
52
+ app_secret: Optional[str] = Field(default=None, description="Dropbox app secret.")
53
+ refresh_token: Optional[str] = Field(
54
+ default=None, description="Dropbox refresh token."
55
+ ) # This is the long lived token that doesn't expire
50
56
 
51
57
 
52
58
  class DropboxConnectionConfig(FsspecConnectionConfig):
53
- supported_protocols: list[str] = field(default_factory=lambda: ["dropbox"], init=False)
54
59
  access_config: Secret[DropboxAccessConfig] = Field(
55
60
  default=DropboxAccessConfig(), validate_default=True
56
61
  )
57
- connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
58
-
59
- @requires_dependencies(["dropboxdrivefs", "fsspec"], extras="dropbox")
60
- @contextmanager
61
- def get_client(self, protocol: str) -> Generator["DropboxDriveFileSystem", None, None]:
62
- with super().get_client(protocol=protocol) as client:
63
- yield client
62
+ connector_type: str = Field(default=CONNECTOR_TYPE)
63
+
64
+ @requires_dependencies(["dropbox"])
65
+ def get_dropbox_access_token_from_refresh(
66
+ self,
67
+ refresh_token: str,
68
+ app_key: str,
69
+ app_secret: str,
70
+ ) -> str:
71
+ """
72
+ Uses the Dropbox Python SDK to exchange a long-lived refresh token for an access token.
73
+ """
74
+ import dropbox
75
+
76
+ dbx = dropbox.Dropbox(
77
+ oauth2_access_token=None,
78
+ oauth2_refresh_token=refresh_token,
79
+ app_key=app_key,
80
+ app_secret=app_secret,
81
+ )
64
82
 
83
+ # This call fetches a new short-lived token and auto-updates dbx._oauth2_access_token
84
+ dbx.check_and_refresh_access_token()
85
+ short_lived_token = dbx._oauth2_access_token # Private attr, but standard usage
86
+ return short_lived_token
87
+
88
+ def get_access_config(self) -> dict[str, Any]:
89
+ """
90
+ Overrides the parent FsspecConnectionConfig.get_access_config() to ensure
91
+ that we always provide an access token if refresh credentials exist.
92
+ """
93
+ base_conf = super().get_access_config()
94
+
95
+ refresh_token = base_conf.get("refresh_token")
96
+ app_key = base_conf.get("app_key")
97
+ app_secret = base_conf.get("app_secret")
98
+
99
+ # Standard scenario - we have refresh a token and creds provided
100
+ # which we're going to use to retrieve access token
101
+ if refresh_token and app_key and app_secret:
102
+ logger.debug("Attempting to generate access token from refresh token...")
103
+ new_token = self.get_dropbox_access_token_from_refresh(
104
+ refresh_token=refresh_token,
105
+ app_key=app_key,
106
+ app_secret=app_secret,
107
+ )
108
+ if not new_token:
109
+ raise ValueError(
110
+ "Unable to retrieve an access token from Dropbox. "
111
+ "Please check that your refresh token, app key, and secret are valid."
112
+ )
113
+ base_conf["token"] = new_token
114
+ elif not base_conf.get("token"): # we might already have an access token from outside
115
+ # We have neither an existing short?lived token nor refresh credentials
116
+ raise ValueError(
117
+ "No valid token or refresh_token with app credentials was found. "
118
+ "Please check that your refresh token, app key, and secret are valid "
119
+ "or provide a valid short-lived token"
120
+ )
121
+
122
+ return base_conf
123
+
124
+ @requires_dependencies(["dropbox"])
65
125
  def wrap_error(self, e: Exception) -> Exception:
66
126
  from dropbox.exceptions import AuthError, HttpError, RateLimitError
67
127
 
68
128
  if not isinstance(e, HttpError):
69
- logger.error(f"unhandled exception from dropbox ({type(e)}): {e}", exc_info=True)
129
+ logger.error(f"Unhandled Dropbox exception: {repr(e)}", exc_info=True)
70
130
  return e
131
+
71
132
  if isinstance(e, AuthError):
72
133
  raise UserAuthError(e.error)
73
- if isinstance(e, RateLimitError):
134
+ elif isinstance(e, RateLimitError):
74
135
  return CustomRateLimitError(e.error)
136
+
75
137
  status_code = e.status_code
76
138
  if 400 <= status_code < 500:
77
139
  if body := getattr(e, "body", None):
@@ -83,7 +145,8 @@ class DropboxConnectionConfig(FsspecConnectionConfig):
83
145
  return ProviderError(body)
84
146
  else:
85
147
  return ProviderError(e.body)
86
- logger.error(f"unhandled exception from dropbox ({type(e)}): {e}", exc_info=True)
148
+
149
+ logger.error(f"Unhandled Dropbox HttpError: {repr(e)}", exc_info=True)
87
150
  return e
88
151
 
89
152
 
@@ -0,0 +1,453 @@
1
+ import math
2
+ from collections import abc
3
+ from contextlib import contextmanager
4
+ from dataclasses import dataclass, field
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union
7
+
8
+ from pydantic import Field, Secret
9
+
10
+ from unstructured_ingest.error import SourceConnectionError
11
+ from unstructured_ingest.utils.dep_check import requires_dependencies
12
+ from unstructured_ingest.v2.interfaces import (
13
+ AccessConfig,
14
+ ConnectionConfig,
15
+ Downloader,
16
+ DownloaderConfig,
17
+ DownloadResponse,
18
+ FileData,
19
+ FileDataSourceMetadata,
20
+ Indexer,
21
+ IndexerConfig,
22
+ SourceIdentifiers,
23
+ )
24
+ from unstructured_ingest.v2.logger import logger
25
+ from unstructured_ingest.v2.processes.connector_registry import (
26
+ SourceRegistryEntry,
27
+ )
28
+
29
+ if TYPE_CHECKING:
30
+ from atlassian import Jira
31
+
32
+ CONNECTOR_TYPE = "jira"
33
+
34
+ DEFAULT_C_SEP = " " * 5
35
+ DEFAULT_R_SEP = "\n"
36
+
37
+
38
+ @dataclass
39
+ class JiraIssueMetadata:
40
+ id: str
41
+ key: str
42
+ board_id: Optional[str] = None
43
+
44
+ @property
45
+ def project_id(self) -> str:
46
+ return self.key.split("-")[0]
47
+
48
+ def to_dict(self) -> Dict[str, Union[str, None]]:
49
+ return {
50
+ "id": self.id,
51
+ "key": self.key,
52
+ "board_id": self.board_id,
53
+ "project_id": self.project_id,
54
+ }
55
+
56
+
57
+ class FieldGetter(dict):
58
+ def __getitem__(self, key):
59
+ value = super().__getitem__(key) if key in self else None
60
+ if value is None:
61
+ value = FieldGetter({})
62
+ return value
63
+
64
+
65
+ def nested_object_to_field_getter(obj: dict) -> Union[FieldGetter, dict]:
66
+ if isinstance(obj, abc.Mapping):
67
+ new_object = {}
68
+ for k, v in obj.items():
69
+ if isinstance(v, abc.Mapping):
70
+ new_object[k] = FieldGetter(nested_object_to_field_getter(v))
71
+ else:
72
+ new_object[k] = v
73
+ return FieldGetter(new_object)
74
+ else:
75
+ return obj
76
+
77
+
78
+ def issues_fetcher_wrapper(func, results_key="results", number_of_issues_to_fetch: int = 100):
79
+ """
80
+ A decorator function that wraps around a function to fetch issues from Jira API in a paginated
81
+ manner. This is required because the Jira API has a limit of 100 issues per request.
82
+
83
+ Args:
84
+ func (callable): The function to be wrapped. This function should accept `limit` and `start`
85
+ as keyword arguments.
86
+ results_key (str, optional): The key in the response dictionary that contains the list of
87
+ results. Defaults to "results".
88
+ number_of_issues_to_fetch (int, optional): The total number of issues to fetch. Defaults to
89
+ 100.
90
+
91
+ Returns:
92
+ list: A list of all fetched issues.
93
+
94
+ Raises:
95
+ KeyError: If the response dictionary does not contain the specified `results_key`.
96
+ TypeError: If the response type from the Jira API is neither list nor dict.
97
+ """
98
+
99
+ def wrapper(*args, **kwargs) -> list:
100
+ kwargs["limit"] = min(100, number_of_issues_to_fetch)
101
+ kwargs["start"] = kwargs.get("start", 0)
102
+
103
+ all_results = []
104
+ num_iterations = math.ceil(number_of_issues_to_fetch / kwargs["limit"])
105
+
106
+ for _ in range(num_iterations):
107
+ response = func(*args, **kwargs)
108
+ if isinstance(response, list):
109
+ all_results += response
110
+ elif isinstance(response, dict):
111
+ if results_key not in response:
112
+ raise KeyError(f'Response object is missing "{results_key}" key.')
113
+ all_results += response[results_key]
114
+ else:
115
+ raise TypeError(
116
+ f"""Unexpected response type from Jira API.
117
+ Response type has to be either list or dict, got: {type(response).__name__}."""
118
+ )
119
+ kwargs["start"] += kwargs["limit"]
120
+
121
+ return all_results
122
+
123
+ return wrapper
124
+
125
+
126
+ class JiraAccessConfig(AccessConfig):
127
+ password: Optional[str] = Field(
128
+ description="Jira password or Cloud API token",
129
+ default=None,
130
+ )
131
+ token: Optional[str] = Field(
132
+ description="Jira Personal Access Token",
133
+ default=None,
134
+ )
135
+
136
+
137
+ class JiraConnectionConfig(ConnectionConfig):
138
+ url: str = Field(description="URL of the Jira instance")
139
+ username: Optional[str] = Field(
140
+ description="Username or email for authentication",
141
+ default=None,
142
+ )
143
+ cloud: bool = Field(description="Authenticate to Jira Cloud", default=False)
144
+ access_config: Secret[JiraAccessConfig] = Field(description="Access configuration for Jira")
145
+
146
+ def model_post_init(self, __context):
147
+ access_configs = self.access_config.get_secret_value()
148
+ basic_auth = self.username and access_configs.password
149
+ pat_auth = access_configs.token
150
+ if self.cloud and not basic_auth:
151
+ raise ValueError(
152
+ "cloud authentication requires username and API token (--password), "
153
+ "see: https://atlassian-python-api.readthedocs.io/"
154
+ )
155
+ if basic_auth and pat_auth:
156
+ raise ValueError(
157
+ "both password and token provided, only one allowed, "
158
+ "see: https://atlassian-python-api.readthedocs.io/"
159
+ )
160
+ if not (basic_auth or pat_auth):
161
+ raise ValueError(
162
+ "no form of auth provided, see: https://atlassian-python-api.readthedocs.io/"
163
+ )
164
+
165
+ @requires_dependencies(["atlassian"], extras="jira")
166
+ @contextmanager
167
+ def get_client(self) -> Generator["Jira", None, None]:
168
+ from atlassian import Jira
169
+
170
+ access_configs = self.access_config.get_secret_value()
171
+ with Jira(
172
+ url=self.url,
173
+ username=self.username,
174
+ password=access_configs.password,
175
+ token=access_configs.token,
176
+ cloud=self.cloud,
177
+ ) as client:
178
+ yield client
179
+
180
+
181
+ class JiraIndexerConfig(IndexerConfig):
182
+ projects: Optional[List[str]] = Field(None, description="List of project keys")
183
+ boards: Optional[List[str]] = Field(None, description="List of board IDs")
184
+ issues: Optional[List[str]] = Field(None, description="List of issue keys or IDs")
185
+
186
+
187
+ @dataclass
188
+ class JiraIndexer(Indexer):
189
+ connection_config: JiraConnectionConfig
190
+ index_config: JiraIndexerConfig
191
+ connector_type: str = CONNECTOR_TYPE
192
+
193
+ def precheck(self) -> None:
194
+ try:
195
+ with self.connection_config.get_client() as client:
196
+ response = client.get_permissions("BROWSE_PROJECTS")
197
+ permitted = response["permissions"]["BROWSE_PROJECTS"]["havePermission"]
198
+ except Exception as e:
199
+ logger.error(f"Failed to connect to Jira: {e}", exc_info=True)
200
+ raise SourceConnectionError(f"Failed to connect to Jira: {e}")
201
+ if not permitted:
202
+ raise ValueError(
203
+ """The provided user is not permitted to browse projects
204
+ from the given Jira organization URL.
205
+ Try checking username, password, token and the url arguments.""",
206
+ )
207
+ logger.info("Connection to Jira successful.")
208
+
209
+ def _get_issues_within_single_project(self, project_key: str) -> List[JiraIssueMetadata]:
210
+ with self.connection_config.get_client() as client:
211
+ number_of_issues_to_fetch = client.get_project_issues_count(project=project_key)
212
+ if isinstance(number_of_issues_to_fetch, dict):
213
+ if "total" not in number_of_issues_to_fetch:
214
+ raise KeyError('Response object is missing "total" key.')
215
+ number_of_issues_to_fetch = number_of_issues_to_fetch["total"]
216
+ if not number_of_issues_to_fetch:
217
+ logger.warning(f"No issues found in project: {project_key}. Skipping!")
218
+ return []
219
+ get_project_issues = issues_fetcher_wrapper(
220
+ client.get_all_project_issues,
221
+ results_key="issues",
222
+ number_of_issues_to_fetch=number_of_issues_to_fetch,
223
+ )
224
+ issues = get_project_issues(project=project_key, fields=["key", "id"])
225
+ logger.debug(f"Found {len(issues)} issues in project: {project_key}")
226
+ return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
227
+
228
+ def _get_issues_within_projects(self) -> List[JiraIssueMetadata]:
229
+ project_keys = self.index_config.projects
230
+ if not project_keys:
231
+ # for when a component list is provided, without any projects
232
+ if self.index_config.boards or self.index_config.issues:
233
+ return []
234
+ # for when no components are provided. all projects will be ingested
235
+ else:
236
+ with self.connection_config.get_client() as client:
237
+ project_keys = [project["key"] for project in client.projects()]
238
+ return [
239
+ issue
240
+ for project_key in project_keys
241
+ for issue in self._get_issues_within_single_project(project_key)
242
+ ]
243
+
244
+ def _get_issues_within_single_board(self, board_id: str) -> List[JiraIssueMetadata]:
245
+ with self.connection_config.get_client() as client:
246
+ get_board_issues = issues_fetcher_wrapper(
247
+ client.get_issues_for_board,
248
+ results_key="issues",
249
+ )
250
+ issues = get_board_issues(board_id=board_id, fields=["key", "id"], jql=None)
251
+ logger.debug(f"Found {len(issues)} issues in board: {board_id}")
252
+ return [
253
+ JiraIssueMetadata(id=issue["id"], key=issue["key"], board_id=board_id)
254
+ for issue in issues
255
+ ]
256
+
257
+ def _get_issues_within_boards(self) -> List[JiraIssueMetadata]:
258
+ if not self.index_config.boards:
259
+ return []
260
+ return [
261
+ issue
262
+ for board_id in self.index_config.boards
263
+ for issue in self._get_issues_within_single_board(board_id)
264
+ ]
265
+
266
+ def _get_issues(self) -> List[JiraIssueMetadata]:
267
+ with self.connection_config.get_client() as client:
268
+ issues = [
269
+ client.get_issue(issue_id_or_key=issue_key, fields=["key", "id"])
270
+ for issue_key in self.index_config.issues or []
271
+ ]
272
+ return [JiraIssueMetadata(id=issue["id"], key=issue["key"]) for issue in issues]
273
+
274
+ def get_issues(self) -> List[JiraIssueMetadata]:
275
+ issues = [
276
+ *self._get_issues_within_boards(),
277
+ *self._get_issues_within_projects(),
278
+ *self._get_issues(),
279
+ ]
280
+ # Select unique issues by issue 'id'.
281
+ # Since boards issues are fetched first,
282
+ # if there are duplicates, the board issues will be kept,
283
+ # in order to keep issue 'board_id' information.
284
+ seen = set()
285
+ unique_issues: List[JiraIssueMetadata] = []
286
+ for issue in issues:
287
+ if issue.id not in seen:
288
+ unique_issues.append(issue)
289
+ seen.add(issue.id)
290
+ return unique_issues
291
+
292
+ def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
293
+ from time import time
294
+
295
+ issues = self.get_issues()
296
+ for issue in issues:
297
+ # Build metadata
298
+ metadata = FileDataSourceMetadata(
299
+ date_processed=str(time()),
300
+ record_locator=issue.to_dict(),
301
+ )
302
+
303
+ # Construct relative path and filename
304
+ filename = f"{issue.id}.txt"
305
+ relative_path = str(Path(issue.project_id) / filename)
306
+
307
+ source_identifiers = SourceIdentifiers(
308
+ filename=filename,
309
+ fullpath=relative_path,
310
+ rel_path=relative_path,
311
+ )
312
+
313
+ file_data = FileData(
314
+ identifier=issue.id,
315
+ connector_type=self.connector_type,
316
+ metadata=metadata,
317
+ additional_metadata=issue.to_dict(),
318
+ source_identifiers=source_identifiers,
319
+ )
320
+ yield file_data
321
+
322
+
323
+ class JiraDownloaderConfig(DownloaderConfig):
324
+ pass
325
+
326
+
327
+ @dataclass
328
+ class JiraDownloader(Downloader):
329
+ connection_config: JiraConnectionConfig
330
+ download_config: JiraDownloaderConfig = field(default_factory=JiraDownloaderConfig)
331
+ connector_type: str = CONNECTOR_TYPE
332
+
333
+ def _get_id_fields_for_issue(
334
+ self, issue: dict, c_sep: str = DEFAULT_C_SEP, r_sep: str = DEFAULT_R_SEP
335
+ ) -> str:
336
+ issue_id, key = issue["id"], issue["key"]
337
+ return f"IssueID_IssueKey:{issue_id}{c_sep}{key}{r_sep}"
338
+
339
+ def _get_project_fields_for_issue(
340
+ self, issue: dict, c_sep: str = DEFAULT_C_SEP, r_sep: str = DEFAULT_R_SEP
341
+ ) -> str:
342
+ if "project" in issue:
343
+ return (
344
+ f"ProjectID_Key:{issue['project']['key']}{c_sep}{issue['project']['name']}{r_sep}"
345
+ )
346
+ else:
347
+ return ""
348
+
349
+ def _get_dropdown_fields_for_issue(
350
+ self, issue: dict, c_sep: str = DEFAULT_C_SEP, r_sep: str = DEFAULT_R_SEP
351
+ ) -> str:
352
+ return f"""
353
+ IssueType:{issue["issuetype"]["name"]}
354
+ {r_sep}
355
+ Status:{issue["status"]["name"]}
356
+ {r_sep}
357
+ Priority:{issue["priority"]}
358
+ {r_sep}
359
+ AssigneeID_Name:{issue["assignee"]["accountId"]}{c_sep}{issue["assignee"]["displayName"]}
360
+ {r_sep}
361
+ ReporterAdr_Name:{issue["reporter"]["emailAddress"]}{c_sep}{issue["reporter"]["displayName"]}
362
+ {r_sep}
363
+ Labels:{c_sep.join(issue["labels"])}
364
+ {r_sep}
365
+ Components:{c_sep.join([component["name"] for component in issue["components"]])}
366
+ {r_sep}
367
+ """
368
+
369
+ def _get_subtasks_for_issue(self, issue: dict) -> str:
370
+ return ""
371
+
372
+ def _get_text_fields_for_issue(
373
+ self, issue: dict, c_sep: str = DEFAULT_C_SEP, r_sep: str = DEFAULT_R_SEP
374
+ ) -> str:
375
+ return f"""
376
+ {issue["summary"]}
377
+ {r_sep}
378
+ {issue["description"]}
379
+ {r_sep}
380
+ {c_sep.join([attachment["self"] for attachment in issue["attachment"]])}
381
+ {r_sep}
382
+ """
383
+
384
+ def _get_comments_for_issue(
385
+ self, issue: dict, c_sep: str = DEFAULT_C_SEP, r_sep: str = DEFAULT_R_SEP
386
+ ) -> str:
387
+ return c_sep.join(
388
+ [self._get_fields_for_comment(comment) for comment in issue["comment"]["comments"]],
389
+ )
390
+
391
+ def _get_fields_for_comment(
392
+ self, comment, c_sep: str = DEFAULT_C_SEP, r_sep: str = DEFAULT_R_SEP
393
+ ) -> str:
394
+ return f"{comment['author']['displayName']}{c_sep}{comment['body']}{r_sep}"
395
+
396
+ def form_templated_string(
397
+ self,
398
+ issue: dict,
399
+ parsed_fields: Union[FieldGetter, dict],
400
+ c_sep: str = "|||",
401
+ r_sep: str = "\n\n\n",
402
+ ) -> str:
403
+ """Forms a template string via parsing the fields from the API response object on the issue
404
+ The template string will be saved to the disk, and then will be processed by partition."""
405
+ return r_sep.join(
406
+ [
407
+ self._get_id_fields_for_issue(issue),
408
+ self._get_project_fields_for_issue(parsed_fields),
409
+ self._get_dropdown_fields_for_issue(parsed_fields),
410
+ self._get_subtasks_for_issue(parsed_fields),
411
+ self._get_comments_for_issue(parsed_fields),
412
+ self._get_text_fields_for_issue(parsed_fields),
413
+ ],
414
+ )
415
+
416
+ def update_file_data(self, file_data: FileData, issue: dict) -> None:
417
+ file_data.metadata.date_created = issue["fields"]["created"]
418
+ file_data.metadata.date_modified = issue["fields"]["updated"]
419
+ file_data.display_name = issue["fields"]["project"]["name"]
420
+
421
+ def get_issue(self, issue_key: str) -> dict:
422
+ try:
423
+ with self.connection_config.get_client() as client:
424
+ return client.issue(key=issue_key)
425
+ except Exception as e:
426
+ logger.error(f"Failed to fetch issue with key: {issue_key}: {e}", exc_info=True)
427
+ raise SourceConnectionError(f"Failed to fetch issue with key: {issue_key}: {e}")
428
+
429
+ def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
430
+ issue_key = file_data.additional_metadata.get("key")
431
+ if not issue_key:
432
+ raise ValueError("Issue key not found in metadata.")
433
+ issue = self.get_issue(issue_key)
434
+ parsed_fields = nested_object_to_field_getter(issue["fields"])
435
+ issue_str = self.form_templated_string(issue, parsed_fields)
436
+
437
+ download_path = self.get_download_path(file_data)
438
+ if download_path is None:
439
+ raise ValueError("File data is missing source identifiers data.")
440
+ download_path.parent.mkdir(parents=True, exist_ok=True)
441
+ with open(download_path, "w") as f:
442
+ f.write(issue_str)
443
+ self.update_file_data(file_data, issue)
444
+ return self.generate_download_response(file_data=file_data, download_path=download_path)
445
+
446
+
447
+ jira_source_entry = SourceRegistryEntry(
448
+ connection_config=JiraConnectionConfig,
449
+ indexer_config=JiraIndexerConfig,
450
+ indexer=JiraIndexer,
451
+ downloader_config=JiraDownloaderConfig,
452
+ downloader=JiraDownloader,
453
+ )
@@ -0,0 +1,31 @@
1
+ from unstructured_ingest.v2.processes.connector_registry import (
2
+ add_source_entry,
3
+ )
4
+
5
+ from .zendesk import (
6
+ CONNECTOR_TYPE,
7
+ ZendeskAccessConfig,
8
+ ZendeskClient,
9
+ ZendeskConnectionConfig,
10
+ ZendeskDownloader,
11
+ ZendeskDownloaderConfig,
12
+ ZendeskIndexer,
13
+ ZendeskIndexerConfig,
14
+ ZendeskTicket,
15
+ zendesk_source_entry,
16
+ )
17
+
18
+ __all__ = [
19
+ "add_source_entry",
20
+ "zendesk_source_entry",
21
+ "ZendeskAccessConfig",
22
+ "ZendeskClient",
23
+ "ZendeskConnectionConfig",
24
+ "ZendeskDownloader",
25
+ "ZendeskDownloaderConfig",
26
+ "ZendeskIndexer",
27
+ "ZendeskIndexerConfig",
28
+ "ZendeskTicket",
29
+ ]
30
+
31
+ add_source_entry(source_type=CONNECTOR_TYPE, entry=zendesk_source_entry)