unstructured-ingest 1.2.2__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "1.2.2" # pragma: no cover
1
+ __version__ = "1.2.3" # pragma: no cover
@@ -1,4 +1,5 @@
1
1
  import contextlib
2
+ import os
2
3
  from contextlib import contextmanager
3
4
  from dataclasses import dataclass, field
4
5
  from time import time
@@ -33,7 +34,7 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
33
34
 
34
35
  CONNECTOR_TYPE = "s3"
35
36
 
36
- # https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html#object-key-guidelines-avoid-characters
37
+ # https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html#object-key-guidelines-avoid-characters # noqa
37
38
  CHARACTERS_TO_AVOID = ["\\", "{", "^", "}", "%", "`", "]", '"', ">", "[", "~", "<", "#", "|"]
38
39
 
39
40
  if TYPE_CHECKING:
@@ -56,6 +57,13 @@ class S3AccessConfig(FsspecAccessConfig):
56
57
  token: Optional[str] = Field(
57
58
  default=None, description="If not anonymous, use this security token, if specified."
58
59
  )
60
+ ambient_credentials: bool = Field(
61
+ default=False,
62
+ description="Explicitly allow using ambient AWS credentials from .aws folder, "
63
+ "environment variables, or IAM roles. Requires ALLOW_AMBIENT_CREDENTIALS_S3 environment "
64
+ "variable to also be set to 'true' (case insensitive) for security. When False (default), "
65
+ "only explicit credentials or anonymous access are allowed.",
66
+ )
59
67
 
60
68
 
61
69
  class S3ConnectionConfig(FsspecConnectionConfig):
@@ -72,14 +80,48 @@ class S3ConnectionConfig(FsspecConnectionConfig):
72
80
  connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
73
81
 
74
82
  def get_access_config(self) -> dict[str, Any]:
75
- access_configs: dict[str, Any] = {"anon": self.anonymous}
83
+ access_config = self.access_config.get_secret_value()
84
+ has_explicit_credentials = bool(
85
+ access_config.key or access_config.secret or access_config.token
86
+ )
87
+
88
+ access_configs: dict[str, Any]
89
+
90
+ if has_explicit_credentials:
91
+ access_configs = {"anon": False}
92
+ # Avoid injecting None by filtering out k,v pairs where the value is None
93
+ access_configs.update(
94
+ {
95
+ k: v
96
+ for k, v in access_config.model_dump().items()
97
+ if v is not None and k != "ambient_credentials"
98
+ }
99
+ )
100
+ elif access_config.ambient_credentials:
101
+ if os.getenv("ALLOW_AMBIENT_CREDENTIALS_S3", "").lower() == "true":
102
+ logger.info(
103
+ "Using ambient AWS credentials (environment variables, .aws folder, IAM roles)"
104
+ )
105
+ access_configs = {"anon": False}
106
+ # Don't pass explicit credentials, let s3fs/boto3 auto-detect
107
+ else:
108
+ # Field allows but environment doesn't - raise error for security
109
+ raise UserAuthError(
110
+ "Ambient credentials requested (ambient_credentials=True) but "
111
+ "ALLOW_AMBIENT_CREDENTIALS_S3 environment variable is not set to 'true'. "
112
+ )
113
+ elif self.anonymous:
114
+ access_configs = {"anon": True}
115
+ else:
116
+ # User set anonymous=False but provided no credentials and no ambient permission
117
+ raise UserAuthError(
118
+ "No authentication method specified. anonymous=False but no explicit credentials "
119
+ "provided and ambient_credentials=False."
120
+ )
121
+
76
122
  if self.endpoint_url:
77
123
  access_configs["endpoint_url"] = self.endpoint_url
78
124
 
79
- # Avoid injecting None by filtering out k,v pairs where the value is None
80
- access_configs.update(
81
- {k: v for k, v in self.access_config.get_secret_value().model_dump().items() if v}
82
- )
83
125
  return access_configs
84
126
 
85
127
  @requires_dependencies(["s3fs", "fsspec"], extras="s3")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.2.2
3
+ Version: 1.2.3
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=HPy7TMxiKrkQS-Rrw57HuZN3ZHBCTvYH8fjgFH1cXxs,41
2
+ unstructured_ingest/__version__.py,sha256=rNRejJU1rpN1Dw-S8aXOaPrt3Fh403mDtzJwDpLdB_Q,42
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
5
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -111,7 +111,7 @@ unstructured_ingest/processes/connectors/fsspec/box.py,sha256=1gLS7xR2vbjgKBrQ4Z
111
111
  unstructured_ingest/processes/connectors/fsspec/dropbox.py,sha256=HwwKjQmjM7yFk9Esh_F20xDisRPXGUkFduzaasByRDE,8355
112
112
  unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=yIvaII_uQ6ANibyj9aysM6c7fg5vUuL2eccLb51LhWk,18497
113
113
  unstructured_ingest/processes/connectors/fsspec/gcs.py,sha256=ouxISCKpZTAj3T6pWGYbASu93wytJjl5WSICvQcrgfE,7172
114
- unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=P5nd3hamhLFO3l5nV3lMuIxHtb_rZYFP4F6q_py3xpc,7492
114
+ unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=UX1kZR_1RtMhhTxGfkXFRrNwFxDXgog8RG36qsqzZE0,9415
115
115
  unstructured_ingest/processes/connectors/fsspec/sftp.py,sha256=pR_a2SgLjt8ffNkariHrPB1E0HVSTj5h3pt7KxTU3TI,6371
116
116
  unstructured_ingest/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
117
117
  unstructured_ingest/processes/connectors/ibm_watsonx/__init__.py,sha256=kf0UpgdAY2KK1R1FbAB6GEBBAIOeYQ8cZIr3bp660qM,374
@@ -235,8 +235,8 @@ unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3r
235
235
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
236
236
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
237
237
  unstructured_ingest/utils/tls.py,sha256=Ra8Mii1F4VqErRreg76PBI0eAqPBC009l0sSHa8FdnA,448
238
- unstructured_ingest-1.2.2.dist-info/METADATA,sha256=kLg62BHEhhU0BK_73Qc0XqsKtrf5XN3pzD40eGXW3xM,8826
239
- unstructured_ingest-1.2.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
240
- unstructured_ingest-1.2.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
241
- unstructured_ingest-1.2.2.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
242
- unstructured_ingest-1.2.2.dist-info/RECORD,,
238
+ unstructured_ingest-1.2.3.dist-info/METADATA,sha256=pOJRa3rcHmixBKaIZGlem8vVYSpctM4jagYxyiprnn8,8826
239
+ unstructured_ingest-1.2.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
240
+ unstructured_ingest-1.2.3.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
241
+ unstructured_ingest-1.2.3.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
242
+ unstructured_ingest-1.2.3.dist-info/RECORD,,