unstructured-ingest 1.2.2__py3-none-any.whl → 1.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "1.2.2" # pragma: no cover
1
+ __version__ = "1.2.4" # pragma: no cover
@@ -8,10 +8,15 @@ from unstructured_ingest.embed.interfaces import (
8
8
  BaseEmbeddingEncoder,
9
9
  EmbeddingConfig,
10
10
  )
11
+ from unstructured_ingest.errors_v2 import (
12
+ ProviderError,
13
+ UserAuthError,
14
+ UserError,
15
+ is_internal_error,
16
+ )
11
17
  from unstructured_ingest.errors_v2 import (
12
18
  RateLimitError as CustomRateLimitError,
13
19
  )
14
- from unstructured_ingest.errors_v2 import UserAuthError, UserError, is_internal_error
15
20
  from unstructured_ingest.logger import logger
16
21
  from unstructured_ingest.utils.dep_check import requires_dependencies
17
22
 
@@ -34,14 +39,22 @@ class TogetherAIEmbeddingConfig(EmbeddingConfig):
34
39
  from together.error import AuthenticationError, RateLimitError, TogetherException
35
40
 
36
41
  if not isinstance(e, TogetherException):
37
- logger.error(f"unhandled exception from openai: {e}", exc_info=True)
42
+ logger.error(f"unhandled exception from together: {e}", exc_info=True)
38
43
  return e
39
44
  message = e.args[0]
40
45
  if isinstance(e, AuthenticationError):
41
46
  return UserAuthError(message)
42
47
  if isinstance(e, RateLimitError):
43
48
  return CustomRateLimitError(message)
44
- return UserError(message)
49
+
50
+ status_code = getattr(e, 'status_code', None)
51
+ if status_code is not None:
52
+ if 400 <= status_code < 500:
53
+ return UserError(message)
54
+ if status_code >= 500:
55
+ return ProviderError(message)
56
+ logger.error(f"unhandled exception from together: {e}", exc_info=True)
57
+ return e
45
58
 
46
59
  def run_precheck(self) -> None:
47
60
  client = self.get_client()
@@ -1,4 +1,5 @@
1
1
  import contextlib
2
+ import os
2
3
  from contextlib import contextmanager
3
4
  from dataclasses import dataclass, field
4
5
  from time import time
@@ -33,7 +34,7 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
33
34
 
34
35
  CONNECTOR_TYPE = "s3"
35
36
 
36
- # https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html#object-key-guidelines-avoid-characters
37
+ # https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html#object-key-guidelines-avoid-characters # noqa
37
38
  CHARACTERS_TO_AVOID = ["\\", "{", "^", "}", "%", "`", "]", '"', ">", "[", "~", "<", "#", "|"]
38
39
 
39
40
  if TYPE_CHECKING:
@@ -56,6 +57,13 @@ class S3AccessConfig(FsspecAccessConfig):
56
57
  token: Optional[str] = Field(
57
58
  default=None, description="If not anonymous, use this security token, if specified."
58
59
  )
60
+ ambient_credentials: bool = Field(
61
+ default=False,
62
+ description="Explicitly allow using ambient AWS credentials from .aws folder, "
63
+ "environment variables, or IAM roles. Requires ALLOW_AMBIENT_CREDENTIALS_S3 environment "
64
+ "variable to also be set to 'true' (case insensitive) for security. When False (default), "
65
+ "only explicit credentials or anonymous access are allowed.",
66
+ )
59
67
 
60
68
 
61
69
  class S3ConnectionConfig(FsspecConnectionConfig):
@@ -72,14 +80,48 @@ class S3ConnectionConfig(FsspecConnectionConfig):
72
80
  connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
73
81
 
74
82
  def get_access_config(self) -> dict[str, Any]:
75
- access_configs: dict[str, Any] = {"anon": self.anonymous}
83
+ access_config = self.access_config.get_secret_value()
84
+ has_explicit_credentials = bool(
85
+ access_config.key or access_config.secret or access_config.token
86
+ )
87
+
88
+ access_configs: dict[str, Any]
89
+
90
+ if has_explicit_credentials:
91
+ access_configs = {"anon": False}
92
+ # Avoid injecting None by filtering out k,v pairs where the value is None
93
+ access_configs.update(
94
+ {
95
+ k: v
96
+ for k, v in access_config.model_dump().items()
97
+ if v is not None and k != "ambient_credentials"
98
+ }
99
+ )
100
+ elif access_config.ambient_credentials:
101
+ if os.getenv("ALLOW_AMBIENT_CREDENTIALS_S3", "").lower() == "true":
102
+ logger.info(
103
+ "Using ambient AWS credentials (environment variables, .aws folder, IAM roles)"
104
+ )
105
+ access_configs = {"anon": False}
106
+ # Don't pass explicit credentials, let s3fs/boto3 auto-detect
107
+ else:
108
+ # Field allows but environment doesn't - raise error for security
109
+ raise UserAuthError(
110
+ "Ambient credentials requested (ambient_credentials=True) but "
111
+ "ALLOW_AMBIENT_CREDENTIALS_S3 environment variable is not set to 'true'. "
112
+ )
113
+ elif self.anonymous:
114
+ access_configs = {"anon": True}
115
+ else:
116
+ # User set anonymous=False but provided no credentials and no ambient permission
117
+ raise UserAuthError(
118
+ "No authentication method specified. anonymous=False but no explicit credentials "
119
+ "provided and ambient_credentials=False."
120
+ )
121
+
76
122
  if self.endpoint_url:
77
123
  access_configs["endpoint_url"] = self.endpoint_url
78
124
 
79
- # Avoid injecting None by filtering out k,v pairs where the value is None
80
- access_configs.update(
81
- {k: v for k, v in self.access_config.get_secret_value().model_dump().items() if v}
82
- )
83
125
  return access_configs
84
126
 
85
127
  @requires_dependencies(["s3fs", "fsspec"], extras="s3")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.2.2
3
+ Version: 1.2.4
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=HPy7TMxiKrkQS-Rrw57HuZN3ZHBCTvYH8fjgFH1cXxs,41
2
+ unstructured_ingest/__version__.py,sha256=yGezyMCk7VVJRmF3nAJ5bz5NlFjU1AdeOFzTWe3uic8,42
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
5
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -29,7 +29,7 @@ unstructured_ingest/embed/interfaces.py,sha256=VCrCSJiEfIxKB4NL4AHgKb-0vB_SEekb4
29
29
  unstructured_ingest/embed/mixedbreadai.py,sha256=uKTqzoi4M_WeYZu-qc_TSxwJONOESzxVbBLUbD1Wbns,3922
30
30
  unstructured_ingest/embed/octoai.py,sha256=yZuD7R4mEKS4Jjyae_IrNWogMPOFFS8gW5oUllj3ROU,4540
31
31
  unstructured_ingest/embed/openai.py,sha256=09I5BIrb-iGsv92LOV46-F7oZ7j1JnJIOQFARNKVq3k,5029
32
- unstructured_ingest/embed/togetherai.py,sha256=ykaveEUBxBGBzRlmWc9utCFQuUWHdbW4F9KAb-uBAJM,3630
32
+ unstructured_ingest/embed/togetherai.py,sha256=KJQ6pf2ICoLgCgFGTBg0CYNDP-szAiKgOot8fajh2fY,3993
33
33
  unstructured_ingest/embed/vertexai.py,sha256=DphvPhiYdXTMrQxJCd-64vMs4iVdLY_BphHqz3n5HfM,3758
34
34
  unstructured_ingest/embed/voyageai.py,sha256=EOrYzaoXOZ6C4fNkMlCgb8KA8rdfgVXN3USMFpnn0Bs,4698
35
35
  unstructured_ingest/interfaces/__init__.py,sha256=QIkWqjsq9INTa89gPuXlMlQL4s3y5TqLmPkuVuTyXcs,795
@@ -111,7 +111,7 @@ unstructured_ingest/processes/connectors/fsspec/box.py,sha256=1gLS7xR2vbjgKBrQ4Z
111
111
  unstructured_ingest/processes/connectors/fsspec/dropbox.py,sha256=HwwKjQmjM7yFk9Esh_F20xDisRPXGUkFduzaasByRDE,8355
112
112
  unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=yIvaII_uQ6ANibyj9aysM6c7fg5vUuL2eccLb51LhWk,18497
113
113
  unstructured_ingest/processes/connectors/fsspec/gcs.py,sha256=ouxISCKpZTAj3T6pWGYbASu93wytJjl5WSICvQcrgfE,7172
114
- unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=P5nd3hamhLFO3l5nV3lMuIxHtb_rZYFP4F6q_py3xpc,7492
114
+ unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=UX1kZR_1RtMhhTxGfkXFRrNwFxDXgog8RG36qsqzZE0,9415
115
115
  unstructured_ingest/processes/connectors/fsspec/sftp.py,sha256=pR_a2SgLjt8ffNkariHrPB1E0HVSTj5h3pt7KxTU3TI,6371
116
116
  unstructured_ingest/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
117
117
  unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py,sha256=kf0UpgdAY2KK1R1FbAB6GEBBAIOeYQ8cZIr3bp660qM,374
@@ -235,8 +235,8 @@ unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3r
235
235
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
236
236
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
237
237
  unstructured_ingest/utils/tls.py,sha256=Ra8Mii1F4VqErRreg76PBI0eAqPBC009l0sSHa8FdnA,448
238
- unstructured_ingest-1.2.2.dist-info/METADATA,sha256=kLg62BHEhhU0BK_73Qc0XqsKtrf5XN3pzD40eGXW3xM,8826
239
- unstructured_ingest-1.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
240
- unstructured_ingest-1.2.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
241
- unstructured_ingest-1.2.2.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
242
- unstructured_ingest-1.2.2.dist-info/RECORD,,
238
+ unstructured_ingest-1.2.4.dist-info/METADATA,sha256=Fx-9Pq4cPql6ijyfDpH97FqG5ZFy1cLPPXa54RYEDyU,8826
239
+ unstructured_ingest-1.2.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
240
+ unstructured_ingest-1.2.4.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
241
+ unstructured_ingest-1.2.4.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
242
+ unstructured_ingest-1.2.4.dist-info/RECORD,,