unstructured-ingest 1.2.9__py3-none-any.whl → 1.2.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (59) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/embed/bedrock.py +1 -1
  3. unstructured_ingest/embed/octoai.py +1 -1
  4. unstructured_ingest/embed/openai.py +1 -1
  5. unstructured_ingest/embed/togetherai.py +4 -4
  6. unstructured_ingest/embed/vertexai.py +1 -1
  7. unstructured_ingest/embed/voyageai.py +2 -2
  8. unstructured_ingest/error.py +113 -6
  9. unstructured_ingest/errors_v2.py +139 -8
  10. unstructured_ingest/interfaces/downloader.py +2 -2
  11. unstructured_ingest/processes/connectors/airtable.py +1 -0
  12. unstructured_ingest/processes/connectors/azure_ai_search.py +1 -1
  13. unstructured_ingest/processes/connectors/chroma.py +2 -2
  14. unstructured_ingest/processes/connectors/confluence.py +6 -2
  15. unstructured_ingest/processes/connectors/databricks/volumes.py +1 -1
  16. unstructured_ingest/processes/connectors/databricks/volumes_table.py +1 -0
  17. unstructured_ingest/processes/connectors/delta_table.py +3 -3
  18. unstructured_ingest/processes/connectors/discord.py +3 -3
  19. unstructured_ingest/processes/connectors/duckdb/duckdb.py +1 -1
  20. unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +3 -2
  21. unstructured_ingest/processes/connectors/fsspec/azure.py +1 -1
  22. unstructured_ingest/processes/connectors/fsspec/box.py +1 -1
  23. unstructured_ingest/processes/connectors/fsspec/dropbox.py +3 -2
  24. unstructured_ingest/processes/connectors/fsspec/fsspec.py +8 -10
  25. unstructured_ingest/processes/connectors/fsspec/gcs.py +2 -2
  26. unstructured_ingest/processes/connectors/fsspec/s3.py +1 -1
  27. unstructured_ingest/processes/connectors/github.py +8 -3
  28. unstructured_ingest/processes/connectors/gitlab.py +1 -1
  29. unstructured_ingest/processes/connectors/google_drive.py +2 -4
  30. unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +12 -10
  31. unstructured_ingest/processes/connectors/jira.py +1 -1
  32. unstructured_ingest/processes/connectors/kafka/kafka.py +5 -5
  33. unstructured_ingest/processes/connectors/local.py +2 -1
  34. unstructured_ingest/processes/connectors/milvus.py +6 -1
  35. unstructured_ingest/processes/connectors/mongodb.py +6 -1
  36. unstructured_ingest/processes/connectors/neo4j.py +6 -2
  37. unstructured_ingest/processes/connectors/notion/client.py +14 -14
  38. unstructured_ingest/processes/connectors/notion/connector.py +1 -1
  39. unstructured_ingest/processes/connectors/onedrive.py +2 -1
  40. unstructured_ingest/processes/connectors/outlook.py +1 -1
  41. unstructured_ingest/processes/connectors/pinecone.py +8 -6
  42. unstructured_ingest/processes/connectors/redisdb.py +2 -2
  43. unstructured_ingest/processes/connectors/salesforce.py +6 -6
  44. unstructured_ingest/processes/connectors/sharepoint.py +5 -2
  45. unstructured_ingest/processes/connectors/slack.py +1 -1
  46. unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +1 -0
  47. unstructured_ingest/processes/connectors/sql/sqlite.py +1 -0
  48. unstructured_ingest/processes/connectors/vectara.py +1 -1
  49. unstructured_ingest/processes/connectors/weaviate/cloud.py +1 -0
  50. unstructured_ingest/processes/connectors/weaviate/weaviate.py +1 -1
  51. unstructured_ingest/processes/connectors/zendesk/client.py +8 -2
  52. unstructured_ingest/processes/connectors/zendesk/zendesk.py +4 -1
  53. unstructured_ingest/processes/partitioner.py +1 -1
  54. unstructured_ingest/unstructured_api.py +1 -1
  55. {unstructured_ingest-1.2.9.dist-info → unstructured_ingest-1.2.11.dist-info}/METADATA +1 -1
  56. {unstructured_ingest-1.2.9.dist-info → unstructured_ingest-1.2.11.dist-info}/RECORD +59 -59
  57. {unstructured_ingest-1.2.9.dist-info → unstructured_ingest-1.2.11.dist-info}/WHEEL +0 -0
  58. {unstructured_ingest-1.2.9.dist-info → unstructured_ingest-1.2.11.dist-info}/entry_points.txt +0 -0
  59. {unstructured_ingest-1.2.9.dist-info → unstructured_ingest-1.2.11.dist-info}/licenses/LICENSE.md +0 -0
@@ -12,7 +12,12 @@ from unstructured_ingest.data_types.file_data import (
12
12
  FileDataSourceMetadata,
13
13
  SourceIdentifiers,
14
14
  )
15
- from unstructured_ingest.errors_v2 import ProviderError, UserAuthError, UserError
15
+ from unstructured_ingest.error import (
16
+ ProviderError,
17
+ UnstructuredIngestError,
18
+ UserAuthError,
19
+ UserError,
20
+ )
16
21
  from unstructured_ingest.interfaces import (
17
22
  AccessConfig,
18
23
  ConnectionConfig,
@@ -85,7 +90,7 @@ class GithubConnectionConfig(ConnectionConfig):
85
90
  if status_code > 500:
86
91
  return ProviderError(e.response.text)
87
92
  logger.debug(f"unhandled http error: {e}")
88
- return e
93
+ return UnstructuredIngestError(str(e))
89
94
 
90
95
  @requires_dependencies(["requests"], extras="github")
91
96
  def wrap_error(self, e: Exception) -> Exception:
@@ -97,7 +102,7 @@ class GithubConnectionConfig(ConnectionConfig):
97
102
  if isinstance(e, HTTPError):
98
103
  return self.wrap_http_error(e=e)
99
104
  logger.debug(f"unhandled error: {e}")
100
- return e
105
+ return UnstructuredIngestError(str(e))
101
106
 
102
107
 
103
108
  class GithubIndexerConfig(IndexerConfig):
@@ -13,7 +13,7 @@ from unstructured_ingest.data_types.file_data import (
13
13
  FileDataSourceMetadata,
14
14
  SourceIdentifiers,
15
15
  )
16
- from unstructured_ingest.error import SourceConnectionError
16
+ from unstructured_ingest.error import SourceConnectionError, ValueError
17
17
  from unstructured_ingest.interfaces import (
18
18
  AccessConfig,
19
19
  ConnectionConfig,
@@ -13,9 +13,7 @@ from unstructured_ingest.data_types.file_data import (
13
13
  FileDataSourceMetadata,
14
14
  SourceIdentifiers,
15
15
  )
16
- from unstructured_ingest.error import (
17
- SourceConnectionError,
18
- )
16
+ from unstructured_ingest.error import SourceConnectionError, UserAuthError, ValueError
19
17
  from unstructured_ingest.interfaces import (
20
18
  AccessConfig,
21
19
  ConnectionConfig,
@@ -113,7 +111,7 @@ class GoogleDriveConnectionConfig(ConnectionConfig):
113
111
  except HttpError as exc:
114
112
  raise ValueError(f"{exc.reason}")
115
113
  except exceptions.DefaultCredentialsError:
116
- raise ValueError("The provided API key is invalid.")
114
+ raise UserAuthError("The provided API key is invalid.")
117
115
 
118
116
 
119
117
  class GoogleDriveIndexerConfig(IndexerConfig):
@@ -8,7 +8,13 @@ from typing import TYPE_CHECKING, Any, Generator, Optional, Tuple
8
8
  from pydantic import Field, Secret
9
9
 
10
10
  from unstructured_ingest.data_types.file_data import FileData
11
- from unstructured_ingest.errors_v2 import ProviderError, UserAuthError, UserError
11
+ from unstructured_ingest.error import (
12
+ DestinationConnectionError,
13
+ IcebergCommitFailedException,
14
+ ProviderError,
15
+ UserAuthError,
16
+ UserError,
17
+ )
12
18
  from unstructured_ingest.interfaces import (
13
19
  AccessConfig,
14
20
  ConnectionConfig,
@@ -40,10 +46,6 @@ DEFAULT_ICEBERG_URI_PATH = "/mds/iceberg"
40
46
  DEFAULT_ICEBERG_CATALOG_TYPE = "rest"
41
47
 
42
48
 
43
- class IcebergCommitFailedException(Exception):
44
- """Failed to commit changes to the iceberg table."""
45
-
46
-
47
49
  class IbmWatsonxAccessConfig(AccessConfig):
48
50
  iam_api_key: str = Field(description="IBM IAM API Key")
49
51
  access_key_id: str = Field(description="Cloud Object Storage HMAC Access Key ID")
@@ -292,16 +294,16 @@ class IbmWatsonxUploader(SQLUploader):
292
294
  except CommitFailedException as e:
293
295
  table.refresh()
294
296
  logger.debug(e)
295
- raise IcebergCommitFailedException(e)
296
- except RESTError:
297
- raise
297
+ raise IcebergCommitFailedException(str(e))
298
+ except RESTError as e:
299
+ raise DestinationConnectionError(str(e))
298
300
  except Exception as e:
299
301
  raise ProviderError(f"Failed to upload data to table: {e}")
300
302
 
301
303
  try:
302
304
  return _upload_data_table(table, data_table, file_data)
303
- except RESTError:
304
- raise
305
+ except RESTError as e:
306
+ raise DestinationConnectionError(str(e))
305
307
  except ProviderError:
306
308
  raise
307
309
  except Exception as e:
@@ -12,7 +12,7 @@ from unstructured_ingest.data_types.file_data import (
12
12
  FileDataSourceMetadata,
13
13
  SourceIdentifiers,
14
14
  )
15
- from unstructured_ingest.error import SourceConnectionError
15
+ from unstructured_ingest.error import SourceConnectionError, ValueError
16
16
  from unstructured_ingest.interfaces import (
17
17
  AccessConfig,
18
18
  ConnectionConfig,
@@ -17,6 +17,8 @@ from unstructured_ingest.error import (
17
17
  DestinationConnectionError,
18
18
  SourceConnectionError,
19
19
  SourceConnectionNetworkError,
20
+ UnstructuredIngestError,
21
+ ValueError,
20
22
  )
21
23
  from unstructured_ingest.interfaces import (
22
24
  AccessConfig,
@@ -101,7 +103,7 @@ class KafkaIndexer(Indexer, ABC):
101
103
 
102
104
  @requires_dependencies(["confluent_kafka"], extras="kafka")
103
105
  def generate_messages(self) -> Generator[Any, None, None]:
104
- from confluent_kafka import KafkaError, KafkaException
106
+ from confluent_kafka import KafkaError
105
107
 
106
108
  messages_consumed = 0
107
109
  max_empty_polls = 10
@@ -122,7 +124,7 @@ class KafkaIndexer(Indexer, ABC):
122
124
  )
123
125
  break
124
126
  else:
125
- raise KafkaException(msg.error())
127
+ raise UnstructuredIngestError(msg.error())
126
128
  try:
127
129
  empty_polls = 0
128
130
  messages_consumed += 1
@@ -246,8 +248,6 @@ class KafkaUploader(Uploader, ABC):
246
248
  raise DestinationConnectionError(f"failed to validate connection: {e}")
247
249
 
248
250
  def produce_batch(self, elements: list[dict]) -> None:
249
- from confluent_kafka.error import KafkaException
250
-
251
251
  producer = self.connection_config.get_producer()
252
252
  failed_producer = False
253
253
 
@@ -268,7 +268,7 @@ class KafkaUploader(Uploader, ABC):
268
268
  logger.debug(f"another iteration of kafka producer flush. Queue length: {producer_len}")
269
269
  producer.flush(timeout=self.upload_config.timeout)
270
270
  if failed_producer:
271
- raise KafkaException("failed to produce all messages in batch")
271
+ raise UnstructuredIngestError("failed to produce all kafka messages in batch")
272
272
 
273
273
  def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
274
274
  for element_batch in batch_generator(data, batch_size=self.upload_config.batch_size):
@@ -13,6 +13,7 @@ from unstructured_ingest.data_types.file_data import (
13
13
  FileDataSourceMetadata,
14
14
  SourceIdentifiers,
15
15
  )
16
+ from unstructured_ingest.error import FileExistsError
16
17
  from unstructured_ingest.interfaces import (
17
18
  AccessConfig,
18
19
  ConnectionConfig,
@@ -168,7 +169,7 @@ class LocalUploaderConfig(UploaderConfig):
168
169
 
169
170
  def __post_init__(self):
170
171
  if self.output_path.exists() and self.output_path.is_file():
171
- raise ValueError("output path already exists as a file")
172
+ raise FileExistsError(f"output path {self.output_path} already exists as a file")
172
173
 
173
174
 
174
175
  @dataclass
@@ -7,7 +7,12 @@ from dateutil import parser
7
7
  from pydantic import Field, Secret
8
8
 
9
9
  from unstructured_ingest.data_types.file_data import FileData
10
- from unstructured_ingest.error import DestinationConnectionError, WriteError
10
+ from unstructured_ingest.error import (
11
+ DestinationConnectionError,
12
+ KeyError,
13
+ ValueError,
14
+ WriteError,
15
+ )
11
16
  from unstructured_ingest.interfaces import (
12
17
  AccessConfig,
13
18
  ConnectionConfig,
@@ -14,7 +14,12 @@ from unstructured_ingest.data_types.file_data import (
14
14
  FileDataSourceMetadata,
15
15
  SourceIdentifiers,
16
16
  )
17
- from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
17
+ from unstructured_ingest.error import (
18
+ ConnectionError,
19
+ DestinationConnectionError,
20
+ SourceConnectionError,
21
+ ValueError,
22
+ )
18
23
  from unstructured_ingest.interfaces import (
19
24
  AccessConfig,
20
25
  ConnectionConfig,
@@ -13,7 +13,11 @@ from pydantic import BaseModel, ConfigDict, Field, Secret, ValidationError, fiel
13
13
 
14
14
  from unstructured_ingest.data_types.entities import EntitiesData, Entity, EntityRelationship
15
15
  from unstructured_ingest.data_types.file_data import FileData
16
- from unstructured_ingest.error import DestinationConnectionError
16
+ from unstructured_ingest.error import (
17
+ DestinationConnectionError,
18
+ UnstructuredIngestError,
19
+ ValueError,
20
+ )
17
21
  from unstructured_ingest.interfaces import (
18
22
  AccessConfig,
19
23
  ConnectionConfig,
@@ -374,7 +378,7 @@ class Neo4jUploader(Uploader):
374
378
  if e.code == "Neo.ClientError.Schema.EquivalentSchemaRuleAlreadyExists":
375
379
  logger.info(f"Index on nodes labeled '{label.value}' already exists.")
376
380
  else:
377
- raise
381
+ raise UnstructuredIngestError(str(e))
378
382
 
379
383
  async def _delete_old_data_if_exists(self, file_data: FileData, client: AsyncDriver) -> None:
380
384
  logger.info(f"Deleting old data for the record '{file_data.identifier}' (if present).")
@@ -8,8 +8,8 @@ from notion_client.api_endpoints import BlocksEndpoint as NotionBlocksEndpoint
8
8
  from notion_client.api_endpoints import DatabasesEndpoint as NotionDatabasesEndpoint
9
9
  from notion_client.api_endpoints import Endpoint
10
10
  from notion_client.api_endpoints import PagesEndpoint as NotionPagesEndpoint
11
- from notion_client.errors import HTTPResponseError, RequestTimeoutError
12
11
 
12
+ from unstructured_ingest.error import SourceConnectionError, TimeoutError
13
13
  from unstructured_ingest.processes.connectors.notion.ingest_backoff import RetryHandler
14
14
  from unstructured_ingest.processes.connectors.notion.ingest_backoff.types import RetryStrategyConfig
15
15
  from unstructured_ingest.processes.connectors.notion.types.block import Block
@@ -132,8 +132,8 @@ class DatabasesEndpoint(NotionDatabasesEndpoint):
132
132
  else (self.parent.client.send(request))
133
133
  ) # type: ignore
134
134
  return response.status_code
135
- except httpx.TimeoutException:
136
- raise RequestTimeoutError()
135
+ except httpx.TimeoutException as e:
136
+ raise TimeoutError(str(e))
137
137
 
138
138
  def query(self, database_id: str, **kwargs: Any) -> Tuple[List[Page], dict]:
139
139
  """Get a list of [Pages](https://developers.notion.com/reference/page) contained in the database.
@@ -236,8 +236,8 @@ class PagesEndpoint(NotionPagesEndpoint):
236
236
  else (self.parent.client.send(request))
237
237
  ) # type: ignore
238
238
  return response.status_code
239
- except httpx.TimeoutException:
240
- raise RequestTimeoutError()
239
+ except httpx.TimeoutException as e:
240
+ raise TimeoutError(str(e))
241
241
 
242
242
 
243
243
  class Client(NotionClient):
@@ -266,9 +266,9 @@ class AsyncBlocksChildrenEndpoint(NotionBlocksChildrenEndpoint):
266
266
  )
267
267
  response.raise_for_status()
268
268
  except httpx.HTTPStatusError as e:
269
- raise HTTPResponseError(f"Failed to list blocks: {str(e)}")
270
- except httpx.TimeoutException:
271
- raise RequestTimeoutError()
269
+ raise SourceConnectionError(f"Failed to list blocks: {str(e)}")
270
+ except httpx.TimeoutException as e:
271
+ raise TimeoutError(str(e))
272
272
 
273
273
  resp = response.json()
274
274
  child_blocks = [Block.from_dict(data=b) for b in resp.pop("results", [])]
@@ -307,9 +307,9 @@ class AsyncDatabasesEndpoint(NotionDatabasesEndpoint):
307
307
  )
308
308
  response.raise_for_status()
309
309
  except httpx.HTTPStatusError as e:
310
- raise HTTPResponseError(f"Failed to retrieve database: {str(e)}")
311
- except httpx.TimeoutException:
312
- raise RequestTimeoutError()
310
+ raise SourceConnectionError(f"Failed to retrieve database: {str(e)}")
311
+ except httpx.TimeoutException as e:
312
+ raise TimeoutError(str(e))
313
313
 
314
314
  return Database.from_dict(data=response.json())
315
315
 
@@ -322,9 +322,9 @@ class AsyncDatabasesEndpoint(NotionDatabasesEndpoint):
322
322
  )
323
323
  response.raise_for_status()
324
324
  except httpx.HTTPStatusError as e:
325
- raise HTTPResponseError(f"Failed to query database: {str(e)}")
326
- except httpx.TimeoutException:
327
- raise RequestTimeoutError()
325
+ raise SourceConnectionError(f"Failed to query database: {str(e)}")
326
+ except httpx.TimeoutException as e:
327
+ raise TimeoutError(str(e))
328
328
 
329
329
  resp = response.json()
330
330
  pages = [Page.from_dict(data=p) for p in resp.pop("results", [])]
@@ -9,7 +9,7 @@ from unstructured_ingest.data_types.file_data import (
9
9
  FileDataSourceMetadata,
10
10
  SourceIdentifiers,
11
11
  )
12
- from unstructured_ingest.error import SourceConnectionError
12
+ from unstructured_ingest.error import SourceConnectionError, ValueError
13
13
  from unstructured_ingest.interfaces import (
14
14
  AccessConfig,
15
15
  ConnectionConfig,
@@ -19,8 +19,9 @@ from unstructured_ingest.error import (
19
19
  DestinationConnectionError,
20
20
  SourceConnectionError,
21
21
  SourceConnectionNetworkError,
22
+ UserAuthError,
23
+ ValueError,
22
24
  )
23
- from unstructured_ingest.errors_v2 import UserAuthError
24
25
  from unstructured_ingest.interfaces import (
25
26
  AccessConfig,
26
27
  ConnectionConfig,
@@ -12,7 +12,7 @@ from unstructured_ingest.data_types.file_data import (
12
12
  FileDataSourceMetadata,
13
13
  SourceIdentifiers,
14
14
  )
15
- from unstructured_ingest.error import SourceConnectionError
15
+ from unstructured_ingest.error import SourceConnectionError, ValueError
16
16
  from unstructured_ingest.interfaces import (
17
17
  AccessConfig,
18
18
  ConnectionConfig,
@@ -7,8 +7,12 @@ from typing import TYPE_CHECKING, Any, Literal, Optional
7
7
  from pydantic import Field, Secret
8
8
 
9
9
  from unstructured_ingest.data_types.file_data import FileData
10
- from unstructured_ingest.error import DestinationConnectionError
11
- from unstructured_ingest.errors_v2 import UserError
10
+ from unstructured_ingest.error import (
11
+ DestinationConnectionError,
12
+ NotFoundError,
13
+ UnstructuredIngestError,
14
+ UserError,
15
+ )
12
16
  from unstructured_ingest.interfaces import (
13
17
  AccessConfig,
14
18
  ConnectionConfig,
@@ -220,9 +224,7 @@ class PineconeUploader(VectorDBUploader):
220
224
  if self.connection_config.index_name and not self.index_exists(
221
225
  self.connection_config.index_name
222
226
  ):
223
- raise DestinationConnectionError(
224
- f"index {self.connection_config.index_name} does not exist"
225
- )
227
+ raise NotFoundError(f"index {self.connection_config.index_name} does not exist")
226
228
  except Exception as e:
227
229
  logger.error(f"failed to validate connection: {e}", exc_info=True)
228
230
  raise DestinationConnectionError(f"failed to validate connection: {e}")
@@ -364,7 +366,7 @@ class PineconeUploader(VectorDBUploader):
364
366
  try:
365
367
  results = [async_result.get() for async_result in async_results]
366
368
  except PineconeApiException as api_error:
367
- raise DestinationConnectionError(f"http error: {api_error}") from api_error
369
+ raise UnstructuredIngestError(f"http error: {api_error}") from api_error
368
370
  logger.debug(f"results: {results}")
369
371
 
370
372
  def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator, Optional
6
6
  from pydantic import Field, Secret, model_validator
7
7
 
8
8
  from unstructured_ingest.data_types.file_data import FileData
9
- from unstructured_ingest.error import DestinationConnectionError
9
+ from unstructured_ingest.error import DestinationConnectionError, ResponseError, ValueError
10
10
  from unstructured_ingest.interfaces import (
11
11
  AccessConfig,
12
12
  ConnectionConfig,
@@ -203,7 +203,7 @@ class RedisUploader(Uploader):
203
203
  await pipe.set(key_with_prefix, json.dumps(element)).execute()
204
204
  redis_stack = False
205
205
  else:
206
- raise redis_exceptions.ResponseError(message) from e
206
+ raise ResponseError(message) from e
207
207
  return redis_stack
208
208
 
209
209
 
@@ -25,7 +25,12 @@ from unstructured_ingest.data_types.file_data import (
25
25
  FileDataSourceMetadata,
26
26
  SourceIdentifiers,
27
27
  )
28
- from unstructured_ingest.error import SourceConnectionError, SourceConnectionNetworkError
28
+ from unstructured_ingest.error import (
29
+ MissingCategoryError,
30
+ SourceConnectionError,
31
+ SourceConnectionNetworkError,
32
+ ValueError,
33
+ )
29
34
  from unstructured_ingest.interfaces import (
30
35
  AccessConfig,
31
36
  ConnectionConfig,
@@ -41,11 +46,6 @@ from unstructured_ingest.processes.connector_registry import (
41
46
  )
42
47
  from unstructured_ingest.utils.dep_check import requires_dependencies
43
48
 
44
-
45
- class MissingCategoryError(Exception):
46
- """There are no categories with that name."""
47
-
48
-
49
49
  CONNECTOR_TYPE = "salesforce"
50
50
 
51
51
  if TYPE_CHECKING:
@@ -10,10 +10,13 @@ from unstructured_ingest.data_types.file_data import (
10
10
  FileData,
11
11
  )
12
12
  from unstructured_ingest.error import (
13
+ NotFoundError,
13
14
  SourceConnectionError,
14
15
  SourceConnectionNetworkError,
16
+ UserAuthError,
17
+ UserError,
18
+ ValueError,
15
19
  )
16
- from unstructured_ingest.errors_v2 import UserAuthError, UserError
17
20
  from unstructured_ingest.logger import logger
18
21
  from unstructured_ingest.processes.connector_registry import (
19
22
  SourceRegistryEntry,
@@ -239,7 +242,7 @@ class SharepointDownloader(OnedriveDownloader):
239
242
  file = site_drive_item.get_by_path(server_relative_path).get().execute_query()
240
243
 
241
244
  if not file:
242
- raise FileNotFoundError(f"file not found: {server_relative_path}")
245
+ raise NotFoundError(f"file not found: {server_relative_path}")
243
246
  return file
244
247
 
245
248
 
@@ -13,7 +13,7 @@ from unstructured_ingest.data_types.file_data import (
13
13
  FileDataSourceMetadata,
14
14
  SourceIdentifiers,
15
15
  )
16
- from unstructured_ingest.error import SourceConnectionError
16
+ from unstructured_ingest.error import SourceConnectionError, ValueError
17
17
  from unstructured_ingest.interfaces import (
18
18
  AccessConfig,
19
19
  ConnectionConfig,
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
8
8
  from pydantic import Field, Secret
9
9
 
10
10
  from unstructured_ingest.data_types.file_data import FileData
11
+ from unstructured_ingest.error import ValueError
11
12
  from unstructured_ingest.logger import logger
12
13
  from unstructured_ingest.processes.connector_registry import (
13
14
  DestinationRegistryEntry,
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Generator
7
7
  from pydantic import Field, Secret, model_validator
8
8
 
9
9
  from unstructured_ingest.data_types.file_data import FileData
10
+ from unstructured_ingest.error import ValueError
10
11
  from unstructured_ingest.logger import logger
11
12
  from unstructured_ingest.processes.connector_registry import (
12
13
  DestinationRegistryEntry,
@@ -9,7 +9,7 @@ from typing import Any, Dict, Mapping, Optional
9
9
  from pydantic import Field, Secret
10
10
 
11
11
  from unstructured_ingest.data_types.file_data import FileData
12
- from unstructured_ingest.error import DestinationConnectionError
12
+ from unstructured_ingest.error import DestinationConnectionError, ValueError
13
13
  from unstructured_ingest.interfaces import (
14
14
  AccessConfig,
15
15
  ConnectionConfig,
@@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
4
4
 
5
5
  from pydantic import Field, Secret
6
6
 
7
+ from unstructured_ingest.error import ValueError
7
8
  from unstructured_ingest.processes.connector_registry import DestinationRegistryEntry
8
9
  from unstructured_ingest.processes.connectors.weaviate.weaviate import (
9
10
  WeaviateAccessConfig,
@@ -11,7 +11,7 @@ from dateutil import parser
11
11
  from pydantic import Field, Secret
12
12
 
13
13
  from unstructured_ingest.data_types.file_data import FileData
14
- from unstructured_ingest.error import DestinationConnectionError, WriteError
14
+ from unstructured_ingest.error import DestinationConnectionError, ValueError, WriteError
15
15
  from unstructured_ingest.interfaces import (
16
16
  AccessConfig,
17
17
  ConnectionConfig,
@@ -4,7 +4,13 @@ from typing import TYPE_CHECKING, Any, AsyncGenerator, Literal, Optional, Union
4
4
 
5
5
  from pydantic import BaseModel, Field, HttpUrl
6
6
 
7
- from unstructured_ingest.errors_v2 import ProviderError, RateLimitError, UserAuthError, UserError
7
+ from unstructured_ingest.error import (
8
+ ProviderError,
9
+ RateLimitError,
10
+ UnstructuredIngestError,
11
+ UserAuthError,
12
+ UserError,
13
+ )
8
14
  from unstructured_ingest.logger import logger
9
15
  from unstructured_ingest.utils.dep_check import requires_dependencies
10
16
  from unstructured_ingest.utils.string_and_date_utils import fix_unescaped_unicode
@@ -211,7 +217,7 @@ class ZendeskClient:
211
217
 
212
218
  if not isinstance(e, httpx.HTTPStatusError):
213
219
  logger.error(f"unhandled exception from Zendesk client: {e}", exc_info=True)
214
- return e
220
+ return UnstructuredIngestError(str(e))
215
221
  url = e.request.url
216
222
  response_code = e.response.status_code
217
223
  if response_code == 401:
@@ -13,6 +13,7 @@ from unstructured_ingest.data_types.file_data import (
13
13
  FileDataSourceMetadata,
14
14
  SourceIdentifiers,
15
15
  )
16
+ from unstructured_ingest.error import SourceConnectionError, ValueError
16
17
  from unstructured_ingest.interfaces import (
17
18
  AccessConfig,
18
19
  ConnectionConfig,
@@ -222,7 +223,9 @@ class ZendeskDownloader(Downloader):
222
223
  ticket = ZendeskTicket.model_validate(zendesk_filedata.additional_metadata.content)
223
224
  await self.download_ticket(ticket=ticket, download_path=download_path)
224
225
  else:
225
- raise RuntimeError(f"Item type {item_type} cannot be handled by the downloader")
226
+ raise SourceConnectionError(
227
+ f"Item type {item_type} cannot be handled by the downloader"
228
+ )
226
229
  return super().generate_download_response(
227
230
  file_data=zendesk_filedata, download_path=download_path
228
231
  )
@@ -5,7 +5,7 @@ from typing import Any, Optional
5
5
 
6
6
  from pydantic import BaseModel, Field, SecretStr
7
7
 
8
- from unstructured_ingest.errors_v2 import UserError
8
+ from unstructured_ingest.error import UserError
9
9
  from unstructured_ingest.interfaces.process import BaseProcess
10
10
  from unstructured_ingest.logger import logger
11
11
  from unstructured_ingest.unstructured_api import call_api_async
@@ -2,7 +2,7 @@ from dataclasses import fields
2
2
  from pathlib import Path
3
3
  from typing import TYPE_CHECKING, Optional
4
4
 
5
- from unstructured_ingest.errors_v2 import ProviderError, QuotaError, UserAuthError, UserError
5
+ from unstructured_ingest.error import ProviderError, QuotaError, UserAuthError, UserError
6
6
  from unstructured_ingest.logger import logger
7
7
 
8
8
  if TYPE_CHECKING:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.2.9
3
+ Version: 1.2.11
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0