unstructured-ingest 1.2.8__py3-none-any.whl → 1.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (59) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/embed/bedrock.py +1 -1
  3. unstructured_ingest/embed/octoai.py +1 -1
  4. unstructured_ingest/embed/openai.py +1 -1
  5. unstructured_ingest/embed/togetherai.py +4 -4
  6. unstructured_ingest/embed/vertexai.py +1 -1
  7. unstructured_ingest/embed/voyageai.py +2 -2
  8. unstructured_ingest/error.py +113 -6
  9. unstructured_ingest/interfaces/downloader.py +2 -2
  10. unstructured_ingest/processes/connectors/airtable.py +1 -0
  11. unstructured_ingest/processes/connectors/azure_ai_search.py +1 -1
  12. unstructured_ingest/processes/connectors/chroma.py +2 -2
  13. unstructured_ingest/processes/connectors/confluence.py +6 -2
  14. unstructured_ingest/processes/connectors/databricks/volumes.py +1 -1
  15. unstructured_ingest/processes/connectors/databricks/volumes_table.py +1 -0
  16. unstructured_ingest/processes/connectors/delta_table.py +3 -3
  17. unstructured_ingest/processes/connectors/discord.py +3 -3
  18. unstructured_ingest/processes/connectors/duckdb/duckdb.py +1 -1
  19. unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +3 -2
  20. unstructured_ingest/processes/connectors/fsspec/azure.py +1 -1
  21. unstructured_ingest/processes/connectors/fsspec/box.py +1 -1
  22. unstructured_ingest/processes/connectors/fsspec/dropbox.py +3 -2
  23. unstructured_ingest/processes/connectors/fsspec/fsspec.py +8 -10
  24. unstructured_ingest/processes/connectors/fsspec/gcs.py +2 -2
  25. unstructured_ingest/processes/connectors/fsspec/s3.py +4 -1
  26. unstructured_ingest/processes/connectors/github.py +8 -3
  27. unstructured_ingest/processes/connectors/gitlab.py +1 -1
  28. unstructured_ingest/processes/connectors/google_drive.py +2 -4
  29. unstructured_ingest/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +12 -10
  30. unstructured_ingest/processes/connectors/jira.py +1 -1
  31. unstructured_ingest/processes/connectors/kafka/kafka.py +5 -5
  32. unstructured_ingest/processes/connectors/local.py +2 -1
  33. unstructured_ingest/processes/connectors/milvus.py +6 -1
  34. unstructured_ingest/processes/connectors/mongodb.py +6 -1
  35. unstructured_ingest/processes/connectors/neo4j.py +6 -2
  36. unstructured_ingest/processes/connectors/notion/client.py +14 -14
  37. unstructured_ingest/processes/connectors/notion/connector.py +1 -1
  38. unstructured_ingest/processes/connectors/onedrive.py +2 -1
  39. unstructured_ingest/processes/connectors/outlook.py +1 -1
  40. unstructured_ingest/processes/connectors/pinecone.py +8 -6
  41. unstructured_ingest/processes/connectors/redisdb.py +2 -2
  42. unstructured_ingest/processes/connectors/salesforce.py +6 -6
  43. unstructured_ingest/processes/connectors/sharepoint.py +5 -2
  44. unstructured_ingest/processes/connectors/slack.py +1 -1
  45. unstructured_ingest/processes/connectors/sql/databricks_delta_tables.py +1 -0
  46. unstructured_ingest/processes/connectors/sql/sqlite.py +1 -0
  47. unstructured_ingest/processes/connectors/vectara.py +1 -1
  48. unstructured_ingest/processes/connectors/weaviate/cloud.py +1 -0
  49. unstructured_ingest/processes/connectors/weaviate/weaviate.py +1 -1
  50. unstructured_ingest/processes/connectors/zendesk/client.py +8 -2
  51. unstructured_ingest/processes/connectors/zendesk/zendesk.py +4 -1
  52. unstructured_ingest/processes/partitioner.py +1 -1
  53. unstructured_ingest/unstructured_api.py +1 -1
  54. {unstructured_ingest-1.2.8.dist-info → unstructured_ingest-1.2.10.dist-info}/METADATA +1 -1
  55. {unstructured_ingest-1.2.8.dist-info → unstructured_ingest-1.2.10.dist-info}/RECORD +58 -59
  56. unstructured_ingest/errors_v2.py +0 -25
  57. {unstructured_ingest-1.2.8.dist-info → unstructured_ingest-1.2.10.dist-info}/WHEEL +0 -0
  58. {unstructured_ingest-1.2.8.dist-info → unstructured_ingest-1.2.10.dist-info}/entry_points.txt +0 -0
  59. {unstructured_ingest-1.2.8.dist-info → unstructured_ingest-1.2.10.dist-info}/licenses/LICENSE.md +0 -0
@@ -1 +1 @@
1
- __version__ = "1.2.8" # pragma: no cover
1
+ __version__ = "1.2.10" # pragma: no cover
@@ -13,7 +13,7 @@ from unstructured_ingest.embed.interfaces import (
13
13
  BaseEmbeddingEncoder,
14
14
  EmbeddingConfig,
15
15
  )
16
- from unstructured_ingest.errors_v2 import (
16
+ from unstructured_ingest.error import (
17
17
  ProviderError,
18
18
  RateLimitError,
19
19
  UserAuthError,
@@ -8,7 +8,7 @@ from unstructured_ingest.embed.interfaces import (
8
8
  BaseEmbeddingEncoder,
9
9
  EmbeddingConfig,
10
10
  )
11
- from unstructured_ingest.errors_v2 import (
11
+ from unstructured_ingest.error import (
12
12
  ProviderError,
13
13
  QuotaError,
14
14
  RateLimitError,
@@ -8,7 +8,7 @@ from unstructured_ingest.embed.interfaces import (
8
8
  BaseEmbeddingEncoder,
9
9
  EmbeddingConfig,
10
10
  )
11
- from unstructured_ingest.errors_v2 import (
11
+ from unstructured_ingest.error import (
12
12
  ProviderError,
13
13
  QuotaError,
14
14
  RateLimitError,
@@ -8,13 +8,13 @@ from unstructured_ingest.embed.interfaces import (
8
8
  BaseEmbeddingEncoder,
9
9
  EmbeddingConfig,
10
10
  )
11
- from unstructured_ingest.errors_v2 import (
11
+ from unstructured_ingest.error import (
12
12
  ProviderError,
13
13
  UserAuthError,
14
14
  UserError,
15
15
  is_internal_error,
16
16
  )
17
- from unstructured_ingest.errors_v2 import (
17
+ from unstructured_ingest.error import (
18
18
  RateLimitError as CustomRateLimitError,
19
19
  )
20
20
  from unstructured_ingest.logger import logger
@@ -46,8 +46,8 @@ class TogetherAIEmbeddingConfig(EmbeddingConfig):
46
46
  return UserAuthError(message)
47
47
  if isinstance(e, RateLimitError):
48
48
  return CustomRateLimitError(message)
49
-
50
- status_code = getattr(e, 'status_code', None)
49
+
50
+ status_code = getattr(e, "status_code", None)
51
51
  if status_code is not None:
52
52
  if 400 <= status_code < 500:
53
53
  return UserError(message)
@@ -13,7 +13,7 @@ from unstructured_ingest.embed.interfaces import (
13
13
  BaseEmbeddingEncoder,
14
14
  EmbeddingConfig,
15
15
  )
16
- from unstructured_ingest.errors_v2 import UserAuthError, is_internal_error
16
+ from unstructured_ingest.error import UserAuthError, is_internal_error
17
17
  from unstructured_ingest.utils.dep_check import requires_dependencies
18
18
 
19
19
  if TYPE_CHECKING:
@@ -8,8 +8,8 @@ from unstructured_ingest.embed.interfaces import (
8
8
  BaseEmbeddingEncoder,
9
9
  EmbeddingConfig,
10
10
  )
11
- from unstructured_ingest.errors_v2 import ProviderError, UserAuthError, UserError, is_internal_error
12
- from unstructured_ingest.errors_v2 import (
11
+ from unstructured_ingest.error import ProviderError, UserAuthError, UserError, is_internal_error
12
+ from unstructured_ingest.error import (
13
13
  RateLimitError as CustomRateLimitError,
14
14
  )
15
15
  from unstructured_ingest.logger import logger
@@ -1,9 +1,11 @@
1
1
  from abc import ABC
2
2
  from functools import wraps
3
+ from typing import Optional
3
4
 
4
5
 
5
- class CustomError(Exception, ABC):
6
+ class UnstructuredIngestError(Exception, ABC):
6
7
  error_string: str
8
+ status_code: Optional[int] = None
7
9
 
8
10
  @classmethod
9
11
  def wrap(cls, f):
@@ -25,25 +27,130 @@ class CustomError(Exception, ABC):
25
27
  return wrapper
26
28
 
27
29
 
28
- class SourceConnectionError(CustomError):
30
+ class ConnectionError(UnstructuredIngestError):
31
+ error_string = "Connection error: {}"
32
+ status_code: Optional[int] = 400
33
+
34
+
35
+ class SourceConnectionError(ConnectionError):
29
36
  error_string = "Error in getting data from upstream data source: {}"
37
+ status_code: Optional[int] = 400
30
38
 
31
39
 
32
40
  class SourceConnectionNetworkError(SourceConnectionError):
33
41
  error_string = "Error in connecting to upstream data source: {}"
42
+ status_code: Optional[int] = 400
34
43
 
35
44
 
36
- class DestinationConnectionError(CustomError):
45
+ class DestinationConnectionError(ConnectionError):
37
46
  error_string = "Error in connecting to downstream data source: {}"
47
+ status_code: Optional[int] = 400
38
48
 
39
49
 
40
- class EmbeddingEncoderConnectionError(CustomError):
50
+ class EmbeddingEncoderConnectionError(ConnectionError):
41
51
  error_string = "Error in connecting to the embedding model provider: {}"
52
+ status_code: Optional[int] = 400
53
+
54
+
55
+ class UserError(UnstructuredIngestError):
56
+ error_string = "User error: {}"
57
+ status_code: Optional[int] = 401
58
+
59
+
60
+ class UserAuthError(UserError):
61
+ error_string = "User authentication error: {}"
62
+ status_code: Optional[int] = 401
63
+
64
+
65
+ class RateLimitError(UserError):
66
+ error_string = "Rate limit error: {}"
67
+ status_code: Optional[int] = 429
68
+
42
69
 
70
+ class NotFoundError(UnstructuredIngestError):
71
+ error_string = "Not found error: {}"
72
+ status_code: Optional[int] = 404
43
73
 
44
- class WriteError(CustomError):
74
+
75
+ class TimeoutError(UnstructuredIngestError):
76
+ error_string = "Timeout error: {}"
77
+ status_code: Optional[int] = 408
78
+
79
+
80
+ class ResponseError(UnstructuredIngestError):
81
+ error_string = "Response error: {}"
82
+ status_code: Optional[int] = 400
83
+
84
+
85
+ class WriteError(UnstructuredIngestError):
45
86
  error_string = "Error in writing to downstream data source: {}"
87
+ status_code: Optional[int] = 400
88
+
89
+
90
+ class ProviderError(UnstructuredIngestError):
91
+ error_string = "Provider error: {}"
92
+ status_code: Optional[int] = 500
93
+
94
+
95
+ class ValueError(UnstructuredIngestError):
96
+ error_string = "Value error: {}"
46
97
 
47
98
 
48
- class PartitionError(CustomError):
99
+ class PartitionError(UnstructuredIngestError):
49
100
  error_string = "Error in partitioning content: {}"
101
+
102
+
103
+ class QuotaError(UserError):
104
+ error_string = "Quota error: {}"
105
+
106
+
107
+ class MissingCategoryError(UnstructuredIngestError):
108
+ error_string = "Missing category error: {}"
109
+
110
+
111
+ class ValidationError(UnstructuredIngestError):
112
+ error_string = "Validation error: {}"
113
+
114
+
115
+ class KeyError(UnstructuredIngestError):
116
+ error_string = "Key error: {}"
117
+
118
+
119
+ class FileExistsError(UnstructuredIngestError):
120
+ error_string = "File exists error: {}"
121
+
122
+
123
+ class TypeError(UnstructuredIngestError):
124
+ error_string = "Type error: {}"
125
+
126
+
127
+ class IcebergCommitFailedException(UnstructuredIngestError):
128
+ error_string = "Failed to commit changes to the iceberg table"
129
+
130
+
131
+ recognized_errors = [
132
+ UserError,
133
+ UserAuthError,
134
+ RateLimitError,
135
+ QuotaError,
136
+ ProviderError,
137
+ NotFoundError,
138
+ TypeError,
139
+ ValueError,
140
+ FileExistsError,
141
+ TimeoutError,
142
+ KeyError,
143
+ ResponseError,
144
+ ValidationError,
145
+ PartitionError,
146
+ WriteError,
147
+ ConnectionError,
148
+ SourceConnectionError,
149
+ SourceConnectionNetworkError,
150
+ DestinationConnectionError,
151
+ EmbeddingEncoderConnectionError,
152
+ ]
153
+
154
+
155
+ def is_internal_error(e: Exception) -> bool:
156
+ return any(isinstance(e, recognized_error) for recognized_error in recognized_errors)
@@ -36,11 +36,11 @@ class Downloader(BaseProcess, BaseConnector, ABC):
36
36
  def get_download_path(self, file_data: FileData) -> Optional[Path]:
37
37
  if not file_data.source_identifiers:
38
38
  return None
39
-
39
+
40
40
  rel_path = file_data.source_identifiers.relative_path
41
41
  if not rel_path:
42
42
  return None
43
-
43
+
44
44
  rel_path = rel_path[1:] if rel_path.startswith("/") else rel_path
45
45
  return self.download_dir / Path(rel_path)
46
46
 
@@ -6,6 +6,7 @@ from uuid import NAMESPACE_DNS, uuid5
6
6
  from pydantic import BaseModel, Field, Secret, field_validator
7
7
 
8
8
  from unstructured_ingest.data_types.file_data import FileData, SourceIdentifiers
9
+ from unstructured_ingest.error import ValueError
9
10
  from unstructured_ingest.interfaces import (
10
11
  AccessConfig,
11
12
  ConnectionConfig,
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any, Generator
6
6
  from pydantic import Field, Secret
7
7
 
8
8
  from unstructured_ingest.data_types.file_data import FileData
9
- from unstructured_ingest.error import DestinationConnectionError, WriteError
9
+ from unstructured_ingest.error import DestinationConnectionError, ValueError, WriteError
10
10
  from unstructured_ingest.interfaces import (
11
11
  AccessConfig,
12
12
  ConnectionConfig,
@@ -7,7 +7,7 @@ from pydantic import Field, Secret
7
7
  from pydantic.functional_validators import BeforeValidator
8
8
 
9
9
  from unstructured_ingest.data_types.file_data import FileData
10
- from unstructured_ingest.error import DestinationConnectionError
10
+ from unstructured_ingest.error import DestinationConnectionError, ValueError
11
11
  from unstructured_ingest.interfaces import (
12
12
  AccessConfig,
13
13
  ConnectionConfig,
@@ -151,7 +151,7 @@ class ChromaUploader(Uploader):
151
151
  metadatas=batch["metadatas"],
152
152
  )
153
153
  except Exception as e:
154
- raise ValueError(f"chroma error: {e}") from e
154
+ raise DestinationConnectionError(f"chroma error: {e}") from e
155
155
 
156
156
  @staticmethod
157
157
  def prepare_chroma_list(chunk: tuple[dict[str, Any]]) -> dict[str, list[Any]]:
@@ -11,8 +11,12 @@ from unstructured_ingest.data_types.file_data import (
11
11
  FileDataSourceMetadata,
12
12
  SourceIdentifiers,
13
13
  )
14
- from unstructured_ingest.error import SourceConnectionError
15
- from unstructured_ingest.errors_v2 import UserAuthError, UserError
14
+ from unstructured_ingest.error import (
15
+ SourceConnectionError,
16
+ UserAuthError,
17
+ UserError,
18
+ ValueError,
19
+ )
16
20
  from unstructured_ingest.interfaces import (
17
21
  AccessConfig,
18
22
  ConnectionConfig,
@@ -12,7 +12,7 @@ from unstructured_ingest.data_types.file_data import (
12
12
  FileDataSourceMetadata,
13
13
  SourceIdentifiers,
14
14
  )
15
- from unstructured_ingest.errors_v2 import (
15
+ from unstructured_ingest.error import (
16
16
  ProviderError,
17
17
  RateLimitError,
18
18
  UserAuthError,
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
8
8
  from pydantic import Field
9
9
 
10
10
  from unstructured_ingest.data_types.file_data import FileData
11
+ from unstructured_ingest.error import ValueError
11
12
  from unstructured_ingest.interfaces import (
12
13
  Uploader,
13
14
  UploaderConfig,
@@ -9,7 +9,7 @@ from urllib.parse import urlparse
9
9
  from pydantic import Field, Secret
10
10
 
11
11
  from unstructured_ingest.data_types.file_data import FileData
12
- from unstructured_ingest.error import DestinationConnectionError
12
+ from unstructured_ingest.error import DestinationConnectionError, ValueError
13
13
  from unstructured_ingest.interfaces import (
14
14
  AccessConfig,
15
15
  ConnectionConfig,
@@ -254,12 +254,12 @@ class DeltaTableUploader(Uploader):
254
254
  if not queue.empty():
255
255
  error_message = queue.get()
256
256
  logger.error("Exception occurred in write_deltalake: %s", error_message)
257
- raise RuntimeError(f"Error in write_deltalake: {error_message}")
257
+ raise DestinationConnectionError(f"Error in write_deltalake: {error_message}")
258
258
 
259
259
  # If the subprocess terminated abnormally but produced no traceback (e.g., SIGABRT),
260
260
  # still raise a helpful error for callers.
261
261
  if not current_process().daemon and writer.exitcode != 0:
262
- raise RuntimeError(
262
+ raise DestinationConnectionError(
263
263
  f"write_deltalake subprocess exited with code {writer.exitcode}"
264
264
  )
265
265
 
@@ -9,7 +9,7 @@ from unstructured_ingest.data_types.file_data import (
9
9
  FileDataSourceMetadata,
10
10
  SourceIdentifiers,
11
11
  )
12
- from unstructured_ingest.error import SourceConnectionError
12
+ from unstructured_ingest.error import UserAuthError, ValueError
13
13
  from unstructured_ingest.interfaces import (
14
14
  AccessConfig,
15
15
  ConnectionConfig,
@@ -70,9 +70,9 @@ class DiscordIndexer(Indexer):
70
70
 
71
71
  def precheck(self) -> None:
72
72
  if not self.connection_config.access_config.get_secret_value().token:
73
- raise SourceConnectionError("Discord token is missing")
73
+ raise UserAuthError("Discord token is missing")
74
74
  if not self.index_config.channels:
75
- raise SourceConnectionError("No channels provided")
75
+ raise ValueError("No channels provided")
76
76
 
77
77
  def get_channel_file_data(self, channel_id: str) -> Optional[FileData]:
78
78
  # Fetch channel metadata
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
6
6
  from pydantic import Field, Secret
7
7
 
8
8
  from unstructured_ingest.data_types.file_data import FileData
9
- from unstructured_ingest.error import DestinationConnectionError
9
+ from unstructured_ingest.error import DestinationConnectionError, ValueError
10
10
  from unstructured_ingest.interfaces import (
11
11
  AccessConfig,
12
12
  ConnectionConfig,
@@ -19,6 +19,7 @@ from unstructured_ingest.error import (
19
19
  DestinationConnectionError,
20
20
  SourceConnectionError,
21
21
  SourceConnectionNetworkError,
22
+ UnstructuredIngestError,
22
23
  )
23
24
  from unstructured_ingest.interfaces import (
24
25
  AccessConfig,
@@ -440,10 +441,10 @@ class ElasticsearchUploader(Uploader):
440
441
  logger.error(
441
442
  f"Batch upload failed - {e} - with following errors: {sanitized_errors}"
442
443
  )
443
- raise e
444
+ raise DestinationConnectionError(str(e))
444
445
  except Exception as e:
445
446
  logger.error(f"Batch upload failed - {e}")
446
- raise e
447
+ raise UnstructuredIngestError(str(e))
447
448
 
448
449
  def _sanitize_bulk_index_error(self, error: dict[str, dict]) -> dict:
449
450
  """Remove data uploaded to index from the log, leave only error information.
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
8
8
  from pydantic import Field, Secret
9
9
 
10
10
  from unstructured_ingest.data_types.file_data import FileDataSourceMetadata
11
- from unstructured_ingest.errors_v2 import ProviderError, UserAuthError, UserError
11
+ from unstructured_ingest.error import ProviderError, UserAuthError, UserError, ValueError
12
12
  from unstructured_ingest.logger import logger
13
13
  from unstructured_ingest.processes.connector_registry import (
14
14
  DestinationRegistryEntry,
@@ -10,7 +10,7 @@ from pydantic import Field, Secret
10
10
  from pydantic.functional_validators import BeforeValidator
11
11
 
12
12
  from unstructured_ingest.data_types.file_data import FileDataSourceMetadata
13
- from unstructured_ingest.errors_v2 import ProviderError, UserAuthError, UserError
13
+ from unstructured_ingest.error import ProviderError, UserAuthError, UserError
14
14
  from unstructured_ingest.logger import logger
15
15
  from unstructured_ingest.processes.connector_registry import (
16
16
  DestinationRegistryEntry,
@@ -7,12 +7,13 @@ from typing import TYPE_CHECKING, Any, Optional
7
7
  from pydantic import Field, Secret
8
8
 
9
9
  from unstructured_ingest.data_types.file_data import FileDataSourceMetadata
10
- from unstructured_ingest.errors_v2 import (
10
+ from unstructured_ingest.error import (
11
11
  ProviderError,
12
12
  UserAuthError,
13
13
  UserError,
14
+ ValueError,
14
15
  )
15
- from unstructured_ingest.errors_v2 import (
16
+ from unstructured_ingest.error import (
16
17
  RateLimitError as CustomRateLimitError,
17
18
  )
18
19
  from unstructured_ingest.logger import logger
@@ -17,6 +17,7 @@ from unstructured_ingest.data_types.file_data import (
17
17
  FileDataSourceMetadata,
18
18
  SourceIdentifiers,
19
19
  )
20
+ from unstructured_ingest.error import TypeError, ValueError
20
21
  from unstructured_ingest.interfaces import (
21
22
  AccessConfig,
22
23
  ConnectionConfig,
@@ -265,29 +266,26 @@ FsspecDownloaderConfigT = TypeVar("FsspecDownloaderConfigT", bound=FsspecDownloa
265
266
  @dataclass
266
267
  class FsspecDownloader(Downloader):
267
268
  TEMP_DIR_PREFIX = "unstructured_"
268
-
269
+
269
270
  protocol: str
270
271
  connection_config: FsspecConnectionConfigT
271
272
  connector_type: str = CONNECTOR_TYPE
272
273
  download_config: Optional[FsspecDownloaderConfigT] = field(
273
274
  default_factory=lambda: FsspecDownloaderConfig()
274
275
  )
275
-
276
+
276
277
  def get_download_path(self, file_data: FileData) -> Optional[Path]:
277
278
  has_source_identifiers = file_data.source_identifiers is not None
278
279
  has_filename = has_source_identifiers and file_data.source_identifiers.filename
279
-
280
+
280
281
  if not (has_source_identifiers and has_filename):
281
282
  return None
282
-
283
+
283
284
  filename = file_data.source_identifiers.filename
284
-
285
+
285
286
  mkdir_concurrent_safe(self.download_dir)
286
-
287
- temp_dir = tempfile.mkdtemp(
288
- prefix=self.TEMP_DIR_PREFIX,
289
- dir=self.download_dir
290
- )
287
+
288
+ temp_dir = tempfile.mkdtemp(prefix=self.TEMP_DIR_PREFIX, dir=self.download_dir)
291
289
  return Path(temp_dir) / filename
292
290
 
293
291
  def is_async(self) -> bool:
@@ -10,7 +10,7 @@ from dateutil import parser
10
10
  from pydantic import Field, Secret
11
11
 
12
12
  from unstructured_ingest.data_types.file_data import FileDataSourceMetadata
13
- from unstructured_ingest.errors_v2 import ProviderError, UserError
13
+ from unstructured_ingest.error import ProviderError, UserError, ValueError
14
14
  from unstructured_ingest.logger import logger
15
15
  from unstructured_ingest.processes.connector_registry import (
16
16
  DestinationRegistryEntry,
@@ -125,7 +125,7 @@ class GcsConnectionConfig(FsspecConnectionConfig):
125
125
  raise UserError(message)
126
126
  if http_error_code >= 500:
127
127
  raise ProviderError(message)
128
- logger.error(f"unhandled exception from gcs ({type(e)}): {e}", exc_info=True)
128
+ logger.error(f"({type(e)} from gcs): {e}", exc_info=True)
129
129
  return e
130
130
 
131
131
 
@@ -10,7 +10,7 @@ from pydantic import Field, Secret
10
10
  from unstructured_ingest.data_types.file_data import (
11
11
  FileDataSourceMetadata,
12
12
  )
13
- from unstructured_ingest.errors_v2 import ProviderError, UserAuthError, UserError
13
+ from unstructured_ingest.error import ProviderError, UserAuthError, UserError
14
14
  from unstructured_ingest.logger import logger
15
15
  from unstructured_ingest.processes.connector_registry import (
16
16
  DestinationRegistryEntry,
@@ -118,6 +118,9 @@ class S3ConnectionConfig(FsspecConnectionConfig):
118
118
  if self.endpoint_url:
119
119
  access_configs["endpoint_url"] = self.endpoint_url
120
120
 
121
+ # This allows s3fs to properly follow AWS region redirects
122
+ access_configs["cache_regions"] = True
123
+
121
124
  return access_configs
122
125
 
123
126
  @requires_dependencies(["s3fs", "fsspec"], extras="s3")
@@ -12,7 +12,12 @@ from unstructured_ingest.data_types.file_data import (
12
12
  FileDataSourceMetadata,
13
13
  SourceIdentifiers,
14
14
  )
15
- from unstructured_ingest.errors_v2 import ProviderError, UserAuthError, UserError
15
+ from unstructured_ingest.error import (
16
+ ProviderError,
17
+ UnstructuredIngestError,
18
+ UserAuthError,
19
+ UserError,
20
+ )
16
21
  from unstructured_ingest.interfaces import (
17
22
  AccessConfig,
18
23
  ConnectionConfig,
@@ -85,7 +90,7 @@ class GithubConnectionConfig(ConnectionConfig):
85
90
  if status_code > 500:
86
91
  return ProviderError(e.response.text)
87
92
  logger.debug(f"unhandled http error: {e}")
88
- return e
93
+ return UnstructuredIngestError(str(e))
89
94
 
90
95
  @requires_dependencies(["requests"], extras="github")
91
96
  def wrap_error(self, e: Exception) -> Exception:
@@ -97,7 +102,7 @@ class GithubConnectionConfig(ConnectionConfig):
97
102
  if isinstance(e, HTTPError):
98
103
  return self.wrap_http_error(e=e)
99
104
  logger.debug(f"unhandled error: {e}")
100
- return e
105
+ return UnstructuredIngestError(str(e))
101
106
 
102
107
 
103
108
  class GithubIndexerConfig(IndexerConfig):
@@ -13,7 +13,7 @@ from unstructured_ingest.data_types.file_data import (
13
13
  FileDataSourceMetadata,
14
14
  SourceIdentifiers,
15
15
  )
16
- from unstructured_ingest.error import SourceConnectionError
16
+ from unstructured_ingest.error import SourceConnectionError, ValueError
17
17
  from unstructured_ingest.interfaces import (
18
18
  AccessConfig,
19
19
  ConnectionConfig,
@@ -13,9 +13,7 @@ from unstructured_ingest.data_types.file_data import (
13
13
  FileDataSourceMetadata,
14
14
  SourceIdentifiers,
15
15
  )
16
- from unstructured_ingest.error import (
17
- SourceConnectionError,
18
- )
16
+ from unstructured_ingest.error import SourceConnectionError, UserAuthError, ValueError
19
17
  from unstructured_ingest.interfaces import (
20
18
  AccessConfig,
21
19
  ConnectionConfig,
@@ -113,7 +111,7 @@ class GoogleDriveConnectionConfig(ConnectionConfig):
113
111
  except HttpError as exc:
114
112
  raise ValueError(f"{exc.reason}")
115
113
  except exceptions.DefaultCredentialsError:
116
- raise ValueError("The provided API key is invalid.")
114
+ raise UserAuthError("The provided API key is invalid.")
117
115
 
118
116
 
119
117
  class GoogleDriveIndexerConfig(IndexerConfig):
@@ -8,7 +8,13 @@ from typing import TYPE_CHECKING, Any, Generator, Optional, Tuple
8
8
  from pydantic import Field, Secret
9
9
 
10
10
  from unstructured_ingest.data_types.file_data import FileData
11
- from unstructured_ingest.errors_v2 import ProviderError, UserAuthError, UserError
11
+ from unstructured_ingest.error import (
12
+ DestinationConnectionError,
13
+ IcebergCommitFailedException,
14
+ ProviderError,
15
+ UserAuthError,
16
+ UserError,
17
+ )
12
18
  from unstructured_ingest.interfaces import (
13
19
  AccessConfig,
14
20
  ConnectionConfig,
@@ -40,10 +46,6 @@ DEFAULT_ICEBERG_URI_PATH = "/mds/iceberg"
40
46
  DEFAULT_ICEBERG_CATALOG_TYPE = "rest"
41
47
 
42
48
 
43
- class IcebergCommitFailedException(Exception):
44
- """Failed to commit changes to the iceberg table."""
45
-
46
-
47
49
  class IbmWatsonxAccessConfig(AccessConfig):
48
50
  iam_api_key: str = Field(description="IBM IAM API Key")
49
51
  access_key_id: str = Field(description="Cloud Object Storage HMAC Access Key ID")
@@ -292,16 +294,16 @@ class IbmWatsonxUploader(SQLUploader):
292
294
  except CommitFailedException as e:
293
295
  table.refresh()
294
296
  logger.debug(e)
295
- raise IcebergCommitFailedException(e)
296
- except RESTError:
297
- raise
297
+ raise IcebergCommitFailedException(str(e))
298
+ except RESTError as e:
299
+ raise DestinationConnectionError(str(e))
298
300
  except Exception as e:
299
301
  raise ProviderError(f"Failed to upload data to table: {e}")
300
302
 
301
303
  try:
302
304
  return _upload_data_table(table, data_table, file_data)
303
- except RESTError:
304
- raise
305
+ except RESTError as e:
306
+ raise DestinationConnectionError(str(e))
305
307
  except ProviderError:
306
308
  raise
307
309
  except Exception as e:
@@ -12,7 +12,7 @@ from unstructured_ingest.data_types.file_data import (
12
12
  FileDataSourceMetadata,
13
13
  SourceIdentifiers,
14
14
  )
15
- from unstructured_ingest.error import SourceConnectionError
15
+ from unstructured_ingest.error import SourceConnectionError, ValueError
16
16
  from unstructured_ingest.interfaces import (
17
17
  AccessConfig,
18
18
  ConnectionConfig,