x2s3 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
x2s3/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .client import ProxyClient
2
+ from .settings import Settings
3
+
4
+ __all__ = ['Settings', 'ProxyClient']
x2s3/app.py ADDED
@@ -0,0 +1,309 @@
1
+ import os
2
+ import sys
3
+ from typing import Optional
4
+
5
+ from loguru import logger
6
+ from fastapi import FastAPI, HTTPException, Request, Query
7
+ from fastapi.responses import JSONResponse, FileResponse, PlainTextResponse
8
+ from fastapi.staticfiles import StaticFiles
9
+ from fastapi.templating import Jinja2Templates
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from fastapi.exceptions import RequestValidationError
12
+ from starlette.exceptions import HTTPException as StarletteHTTPException
13
+
14
+ from x2s3.utils import *
15
+ from x2s3 import client_registry
16
+ from x2s3.settings import get_settings, Target
17
+
18
+ def create_app(settings):
19
+
20
+ app = FastAPI()
21
+ app.add_middleware(
22
+ CORSMiddleware,
23
+ allow_origins=["*"],
24
+ allow_credentials=True,
25
+ allow_methods=["GET","HEAD"],
26
+ allow_headers=["*"],
27
+ expose_headers=["Range", "Content-Range"],
28
+ )
29
+ app.mount("/static", StaticFiles(directory="static"), name="static")
30
+ templates = Jinja2Templates(directory="templates")
31
+
32
+
33
+ @app.exception_handler(StarletteHTTPException)
34
+ async def http_exception_handler(request, exc):
35
+ return JSONResponse({"error":str(exc.detail)}, status_code=exc.status_code)
36
+
37
+
38
+ @app.exception_handler(RequestValidationError)
39
+ async def validation_exception_handler(request, exc):
40
+ return JSONResponse({"error":str(exc)}, status_code=400)
41
+
42
+
43
+ @app.on_event("startup")
44
+ async def startup_event():
45
+ """ Runs once when the service is first starting.
46
+ Reads the configuration and sets up the proxy clients.
47
+ """
48
+ if callable(settings):
49
+ app.settings = settings()
50
+ else:
51
+ app.settings = settings
52
+
53
+ # Configure logging
54
+ logger.remove()
55
+ logger.add(sys.stderr, level=app.settings.log_level)
56
+
57
+ logger.trace("Available protocols:")
58
+ for proto in client_registry.available_protocols():
59
+ logger.trace(f"- {proto}")
60
+
61
+ app.clients = {}
62
+
63
+ # Add local path client if configured
64
+ if app.settings.local_path:
65
+ local_target = Target(
66
+ name=app.settings.local_name,
67
+ client='file',
68
+ options={
69
+ 'path': str(app.settings.local_path),
70
+ }
71
+ )
72
+ app.settings.targets += [local_target]
73
+
74
+ # Configure targets
75
+ for target_name in app.settings.get_target_map():
76
+ target_key = target_name.lower()
77
+ target_config = app.settings.get_target_config(target_key)
78
+ proxy_kwargs = {
79
+ 'target_name': target_name,
80
+ }
81
+
82
+ client = client_registry.client(target_config.client,
83
+ proxy_kwargs, **target_config.options)
84
+
85
+ if target_key in app.clients:
86
+ logger.warning(f"Overriding target key: {target_key}")
87
+
88
+ app.clients[target_key] = client
89
+ logger.debug(f"Configured target {target_name}")
90
+
91
+ logger.info(f"Server ready with {len(app.clients)} targets")
92
+
93
+
94
+ def get_client(target_name):
95
+ target_key = target_name.lower()
96
+ if target_key in app.clients:
97
+ return app.clients[target_key]
98
+ return None
99
+
100
+
101
+ def get_target(request, path):
102
+ target_path = path
103
+ base_url = app.settings.base_url
104
+
105
+ logger.trace(f"base_url: {base_url}")
106
+ logger.trace(f"request.url.hostname: {request.url.hostname}")
107
+
108
+ subdomain = None
109
+ if app.settings.virtual_buckets:
110
+ if base_url:
111
+ subdomain = request.url.hostname.removesuffix(base_url.host).removesuffix('.')
112
+ else:
113
+ logger.warning("virtual_buckets enabled but no base URL is configured")
114
+
115
+ if subdomain:
116
+ # Target is given in the subdomain
117
+ is_virtual = True
118
+ target_name = subdomain.split('.')[0]
119
+ else:
120
+ # Target is encoded as the first element in the path
121
+ is_virtual = False
122
+ # Extract target from path
123
+ ts = target_path.removeprefix('/').split('/', maxsplit=1)
124
+ logger.trace(f"target path components: {ts}")
125
+ if len(ts)==2:
126
+ target_name, target_path = ts
127
+ elif len(ts)==1:
128
+ # This happens if we are at the root of the proxy
129
+ target_name, target_path = ts[0], ''
130
+ else:
131
+ # This shouldn't happen
132
+ target_name, target_path = None, ''
133
+
134
+ logger.trace(f"target_name={target_name}, target_path={target_path}, is_virtual={is_virtual}")
135
+ return target_name, target_path, is_virtual
136
+
137
+
138
+ async def browse_bucket(request: Request,
139
+ target_name: str,
140
+ prefix: str,
141
+ continuation_token: str = None,
142
+ max_keys: int = 10,
143
+ is_virtual: bool = False):
144
+
145
+ target_config = app.settings.get_target_config(target_name)
146
+ if not target_config:
147
+ raise HTTPException(status_code=404, detail="Target bucket not found")
148
+
149
+ client = get_client(target_name)
150
+ if client is None:
151
+ raise HTTPException(status_code=500, detail="Client for target bucket not found")
152
+
153
+ response = await client.list_objects_v2(continuation_token, '/', None,
154
+ False, max_keys, prefix, None)
155
+
156
+ if response.status_code != 200:
157
+ # Return error respone
158
+ return response
159
+
160
+ xml = response.body.decode("utf-8")
161
+ root = parse_xml(xml)
162
+
163
+ common_prefixes = []
164
+ cps = [c for c in root.findall('CommonPrefixes')]
165
+ if cps:
166
+ for cp in cps:
167
+ common_prefixes += [dir_path(e.text) for e in cp.iter('Prefix')] if cps else []
168
+
169
+ contents = []
170
+ cs = [c for c in root.findall('Contents')]
171
+ if cs:
172
+ for c in cs:
173
+ key_elem = c.find('Key')
174
+ if key_elem is not None and key_elem.text != prefix:
175
+
176
+ content = {'key': key_elem.text}
177
+
178
+ size_elem = c.find('Size')
179
+ if size_elem is not None and size_elem.text:
180
+ num_bytes = int(size_elem.text)
181
+ content['size'] = humanize_bytes(num_bytes)
182
+
183
+ lm_elem = c.find('LastModified')
184
+ if lm_elem is not None and lm_elem.text:
185
+ content['lastmod'] = format_isoformat_as_local(lm_elem.text)
186
+
187
+ contents.append(content)
188
+
189
+ next_token = None
190
+ truncated_elem = root.find('IsTruncated')
191
+ if truncated_elem is not None and truncated_elem.text=='true':
192
+ next_ct_elem = root.find('NextContinuationToken')
193
+ next_token = next_ct_elem.text
194
+
195
+ target_prefix = '' if is_virtual else '/'+target_name
196
+ parent_prefix = dir_path(os.path.dirname(prefix.rstrip('/')))
197
+
198
+ return templates.TemplateResponse("browse.html", {
199
+ "request": request,
200
+ "prefix": prefix,
201
+ "index_url": app.settings.base_url or '/',
202
+ "target_prefix": target_prefix,
203
+ "common_prefixes": common_prefixes,
204
+ "contents": contents,
205
+ "parent_prefix": parent_prefix,
206
+ "remove_prefix": remove_prefix,
207
+ "continuation_token": next_token
208
+ })
209
+
210
+
211
+ @app.get('/favicon.ico', include_in_schema=False)
212
+ async def favicon():
213
+ return FileResponse('static/favicon.ico')
214
+
215
+
216
+ @app.get('/robots.txt', response_class=PlainTextResponse)
217
+ def robots():
218
+ return """User-agent: *\nDisallow: /"""
219
+
220
+
221
+ @app.get("/{path:path}")
222
+ async def target_dispatcher(request: Request,
223
+ path: str,
224
+ list_type: int = Query(None, alias="list-type"),
225
+ continuation_token: Optional[str] = Query(None, alias="continuation-token"),
226
+ delimiter: Optional[str] = Query(None, alias="delimiter"),
227
+ encoding_type: Optional[str] = Query(None, alias="encoding-type"),
228
+ fetch_owner: Optional[bool] = Query(None, alias="fetch-owner"),
229
+ max_keys: Optional[int] = Query(1000, alias="max-keys"),
230
+ prefix: Optional[str] = Query(None, alias="prefix"),
231
+ start_after: Optional[str] = Query(None, alias="start-after")):
232
+
233
+ target_name, target_path, is_virtual = get_target(request, path)
234
+ logger.debug(f"target_name={target_name}, target_path={target_path}, is_virtual={is_virtual}")
235
+
236
+ if not target_name or (is_virtual and target_name=='www'):
237
+ # Return target index
238
+ bucket_list = { target: f"/{target}/" for target in app.settings.get_browseable_targets()}
239
+ if app.settings.ui:
240
+ return templates.TemplateResponse("index.html", {"request": request, "links": bucket_list})
241
+ else:
242
+ xml = get_bucket_list_xml(bucket_list)
243
+ return Response(content=xml, status_code=200, media_type="application/xml")
244
+
245
+ target_config = app.settings.get_target_config(target_name)
246
+ if not target_config:
247
+ return get_nosuchbucket_response(target_name)
248
+
249
+ client = get_client(target_name)
250
+ if client is None:
251
+ raise HTTPException(status_code=500, detail="Client for target bucket not found")
252
+
253
+ if 'acl' in request.query_params:
254
+ return get_read_access_acl()
255
+
256
+ if list_type:
257
+ if not target_path:
258
+ if list_type == 2:
259
+ return await client.list_objects_v2(continuation_token, delimiter, \
260
+ encoding_type, fetch_owner, max_keys, prefix, start_after)
261
+ else:
262
+ raise HTTPException(status_code=400, detail="Invalid list type")
263
+ else:
264
+ range_header = request.headers.get("range")
265
+ return await client.get_object(target_path, range_header)
266
+
267
+ if not target_path or target_path.endswith("/"):
268
+ if app.settings.ui:
269
+ return await browse_bucket(request, target_name, target_path,
270
+ continuation_token=continuation_token,
271
+ max_keys=100,
272
+ is_virtual=is_virtual)
273
+ else:
274
+ return get_nosuchbucket_response(target_name)
275
+ else:
276
+ range_header = request.headers.get("range")
277
+ return await client.get_object(target_path, range_header)
278
+
279
+
280
+
281
+ @app.head("{path:path}")
282
+ async def head_object(request: Request, path: str):
283
+
284
+ target_name, target_path, _ = get_target(request, path)
285
+ if not target_name:
286
+ return get_nosuchbucket_response('')
287
+
288
+ try:
289
+ target_config = app.settings.get_target_config(target_name)
290
+ if not target_config:
291
+ return get_nosuchbucket_response(target_name)
292
+
293
+ client = get_client(target_name)
294
+ if client is None:
295
+ raise HTTPException(status_code=500, detail="Client for target bucket not found")
296
+
297
+ return await client.head_object(target_path)
298
+ except:
299
+ logger.opt(exception=sys.exc_info()).info("Error requesting head")
300
+ return JSONResponse({"error":"Error requesting HEAD"}, status_code=500)
301
+
302
+ return app
303
+
304
+
305
+ app = create_app(get_settings)
306
+
307
+ if __name__ == "__main__":
308
+ import uvicorn
309
+ uvicorn.run(app, host="0.0.0.0", port=8000)
x2s3/client.py ADDED
@@ -0,0 +1,34 @@
1
+ class ProxyClient:
2
+ """ Interface for a client that implements an S3-like interface
3
+ to key-value access against some backend service.
4
+
5
+ Note that this interface does not try to encode the entire S3 API.
6
+ We only care about the bare-bones functionality that is required
7
+ for viewers like Neuroglancer, N5 Viewer, Vizarr, etc.
8
+ """
9
+
10
+ async def head_object(self, key: str):
11
+ """
12
+ Basic interface for AWS S3's HeadObject API.
13
+ https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadObject.html
14
+ """
15
+
16
+ async def get_object(self, key: str, range_header: str = None):
17
+ """
18
+ Basic interface for AWS S3's GetObject API.
19
+ https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html
20
+ """
21
+
22
+ async def list_objects_v2(self,
23
+ continuation_token: str,
24
+ delimiter: str,
25
+ encoding_type: str,
26
+ fetch_owner: str,
27
+ max_keys: str,
28
+ prefix: str,
29
+ start_after: str):
30
+ """
31
+ Basic interface for AWS S3's ListObjectsV2 API.
32
+ https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html
33
+ """
34
+
x2s3/client_aioboto.py ADDED
@@ -0,0 +1,292 @@
1
+ import os
2
+ import sys
3
+ import typing
4
+ from typing_extensions import override
5
+
6
+ from loguru import logger
7
+ from starlette.background import BackgroundTask
8
+ import botocore
9
+ from botocore.exceptions import NoCredentialsError, PartialCredentialsError
10
+ from aiobotocore.session import get_session
11
+ from aiobotocore.config import AioConfig
12
+ from fastapi.responses import Response, StreamingResponse, JSONResponse
13
+
14
+ from x2s3.utils import *
15
+ from x2s3.client import ProxyClient
16
+
17
+ def handle_s3_exception(e, key=None):
18
+ """ Handle various cases of generic errors from the boto AWS API.
19
+ """
20
+ if isinstance(e, (NoCredentialsError, PartialCredentialsError)):
21
+ logger.opt(exception=sys.exc_info()).error("AWS credentials not configured properly")
22
+ return JSONResponse({"error":"AWS credentials not configured properly"}, status_code=408)
23
+ elif isinstance(e, botocore.exceptions.ReadTimeoutError):
24
+ return JSONResponse({"error":"Upstream endpoint timed out"}, status_code=408)
25
+ elif isinstance(e, botocore.exceptions.ClientError):
26
+ status_code = e.response['ResponseMetadata']['HTTPStatusCode']
27
+ error = e.response['Error']
28
+ error_code = error['Code'] if 'Code' in error else 'Unknown'
29
+ if error_code == "NoSuchKey":
30
+ return get_nosuchkey_response(key)
31
+ else:
32
+ message = error['Message'] if 'Message' in error else 'Unknown'
33
+ resource = error['Resource'] if 'Resource' in error else 'Unknown'
34
+ return get_error_response(status_code, error_code, message, resource)
35
+ else:
36
+ logger.opt(exception=sys.exc_info()).error("Error communicating with AWS S3")
37
+ return JSONResponse({"error":"Error communicating with AWS S3"}, status_code=500)
38
+
39
+
40
+ class AiobotoProxyClient(ProxyClient):
41
+
42
+ def __init__(self, proxy_kwargs, **kwargs):
43
+
44
+ self.proxy_kwargs = proxy_kwargs or {}
45
+ self.target_name = self.proxy_kwargs['target_name']
46
+ self.bucket_name = kwargs['bucket']
47
+ self.bucket_prefix = kwargs.get('prefix')
48
+
49
+ self.anonymous = True
50
+ access_key,secret_key = '',''
51
+
52
+ if 'access_key_path' in kwargs:
53
+ self.anonymous = False
54
+ access_key_path = kwargs['access_key_path']
55
+ secret_key_path = kwargs['secret_key_path']
56
+
57
+ with open(access_key_path, 'r') as ak_file:
58
+ access_key = ak_file.read().strip()
59
+
60
+ with open(secret_key_path, 'r') as sk_file:
61
+ secret_key = sk_file.read().strip()
62
+
63
+ self.client_kwargs = {
64
+ 'aws_access_key_id': access_key,
65
+ 'aws_secret_access_key': secret_key,
66
+ }
67
+
68
+ if 'endpoint' in kwargs:
69
+ self.client_kwargs['endpoint_url'] = kwargs.get('endpoint')
70
+
71
+
72
+ def get_client_creator(self):
73
+ session = get_session()
74
+ conf = AioConfig(signature_version=botocore.UNSIGNED) if self.anonymous else AioConfig()
75
+ return session.create_client('s3', config=conf, **self.client_kwargs)
76
+
77
+
78
+ @override
79
+ async def head_object(self, key: str):
80
+ real_key = key
81
+ if self.bucket_prefix:
82
+ real_key = os.path.join(self.bucket_prefix, key) if key else self.bucket_prefix
83
+
84
+ async with self.get_client_creator() as client:
85
+ try:
86
+ s3_res = await client.head_object(Bucket=self.bucket_name, Key=real_key)
87
+ headers = {
88
+ "ETag": s3_res.get("ETag"),
89
+ "Accept-Ranges": "bytes",
90
+ "Content-Length": str(s3_res.get("ContentLength")),
91
+ "Last-Modified": s3_res.get("LastModified").strftime("%a, %d %b %Y %H:%M:%S GMT"),
92
+ }
93
+
94
+ content_type = guess_content_type(real_key)
95
+ headers['Content-Type'] = content_type
96
+
97
+ return Response(headers=headers)
98
+ except Exception as e:
99
+ return handle_s3_exception(e, key)
100
+
101
+
102
+ @override
103
+ async def get_object(self, key: str, range_header: str = None):
104
+ real_key = key
105
+ if self.bucket_prefix:
106
+ real_key = os.path.join(self.bucket_prefix, key) if key else self.bucket_prefix
107
+
108
+ filename = os.path.basename(real_key)
109
+ content_type = guess_content_type(filename)
110
+
111
+ headers = {
112
+ 'Accept-Ranges': "bytes",
113
+ 'Content-Type': content_type,
114
+ }
115
+
116
+ if content_type=='application/octet-stream':
117
+ headers['Content-Disposition'] = f'attachment; filename="{filename}"'
118
+
119
+ try:
120
+ return S3Stream(
121
+ self.get_client_creator,
122
+ headers=headers,
123
+ media_type=content_type,
124
+ bucket=self.bucket_name,
125
+ key=key,
126
+ real_key=real_key,
127
+ range_header=range_header,
128
+ )
129
+ except Exception as e:
130
+ return handle_s3_exception(e, key)
131
+
132
+
133
+ @override
134
+ async def list_objects_v2(self,
135
+ continuation_token: str,
136
+ delimiter: str,
137
+ encoding_type: str,
138
+ fetch_owner: str,
139
+ max_keys: str,
140
+ prefix: str,
141
+ start_after: str):
142
+
143
+ # prefix user-supplied prefix with configured prefix
144
+ real_prefix = prefix
145
+ if self.bucket_prefix:
146
+ real_prefix = os.path.join(self.bucket_prefix, prefix) if prefix else self.bucket_prefix
147
+
148
+ # ensure the prefix ends with a slash
149
+ if real_prefix and not real_prefix.endswith('/'):
150
+ real_prefix += '/'
151
+
152
+ async with self.get_client_creator() as client:
153
+ try:
154
+ params = {
155
+ "Bucket": self.bucket_name,
156
+ "ContinuationToken": continuation_token,
157
+ "Delimiter": delimiter,
158
+ "EncodingType": encoding_type,
159
+ "FetchOwner": fetch_owner,
160
+ "MaxKeys": max_keys,
161
+ "Prefix": real_prefix,
162
+ "StartAfter": start_after
163
+ }
164
+ # Remove any None values because boto3 doesn't like those
165
+ params = {k: v for k, v in params.items() if v is not None}
166
+
167
+ response = await client.list_objects_v2(**params)
168
+ next_token = remove_prefix(self.bucket_prefix, response.get("NextContinuationToken", ""))
169
+ is_truncated = "true" if response.get("IsTruncated", False) else "false"
170
+
171
+ contents = []
172
+ for obj in response.get("Contents", []):
173
+ contents.append({
174
+ 'Key': remove_prefix(self.bucket_prefix, obj["Key"]),
175
+ 'LastModified': obj["LastModified"].isoformat(),
176
+ 'ETag': obj.get("ETag"),
177
+ 'Size': obj.get("Size"),
178
+ 'StorageClass': obj.get("StorageClass")
179
+ })
180
+
181
+ common_prefixes = []
182
+ for cp in response.get("CommonPrefixes", []):
183
+ common_prefix = remove_prefix(self.bucket_prefix, cp["Prefix"])
184
+ common_prefixes.append(common_prefix)
185
+
186
+ kwargs = {
187
+ 'Name': self.target_name,
188
+ 'Prefix': prefix,
189
+ 'Delimiter': delimiter,
190
+ 'MaxKeys': max_keys,
191
+ 'EncodingType': encoding_type,
192
+ 'KeyCount': response.get("KeyCount", 0),
193
+ 'IsTruncated': is_truncated,
194
+ 'ContinuationToken': continuation_token,
195
+ 'NextContinuationToken': next_token,
196
+ 'StartAfter': start_after
197
+ }
198
+
199
+ xml = get_list_xml(contents, common_prefixes, **kwargs)
200
+ return Response(content=xml, media_type="application/xml")
201
+
202
+ except Exception as e:
203
+ return handle_s3_exception(e, key=prefix)
204
+
205
+
206
+ # Adapted from https://stackoverflow.com/questions/69617252/response-file-stream-from-s3-fastapi
207
+ class S3Stream(StreamingResponse):
208
+ """ Stream the result of GetObject.
209
+ """
210
+ def __init__(
211
+ self,
212
+ client_creator: typing.Callable,
213
+ content: typing.Any = None,
214
+ status_code: int = 200,
215
+ headers: dict = None,
216
+ media_type: str = None,
217
+ background: BackgroundTask = None,
218
+ bucket: str = None,
219
+ key: str = None,
220
+ real_key: str = None,
221
+ range_header: str = None
222
+ ):
223
+ super(S3Stream, self).__init__(content, status_code, headers, media_type, background)
224
+ self.client_creator = client_creator
225
+ self.bucket = bucket
226
+ self.key = key
227
+ self.real_key = real_key
228
+ self.range_header = range_header
229
+
230
+ async def stream_response(self, send) -> None:
231
+
232
+ async def send_response(r):
233
+ await send({
234
+ "type": "http.response.start",
235
+ "status": r.status_code,
236
+ "headers": r.raw_headers,
237
+ })
238
+ await send({
239
+ "type": "http.response.body",
240
+ "body": r.body,
241
+ "more_body": False,
242
+ })
243
+
244
+ async with self.client_creator() as client:
245
+ result = None
246
+ try:
247
+ # Get the object with the range specified in headers
248
+ get_object_params = {
249
+ "Bucket": self.bucket,
250
+ "Key": self.real_key,
251
+ }
252
+ if self.range_header:
253
+ get_object_params["Range"] = self.range_header
254
+
255
+ result = await client.get_object(**get_object_params)
256
+ res_headers = result["ResponseMetadata"]["HTTPHeaders"]
257
+
258
+ # Determine if this is a Range result
259
+ if "content-range" in res_headers:
260
+ self.status_code = 206 # Partial Content
261
+ self.raw_headers.append((b"content-range",
262
+ res_headers["content-range"].encode('utf-8')))
263
+
264
+ if "content-length" in res_headers:
265
+ self.raw_headers.append((b"content-length",
266
+ res_headers["content-length"].encode('utf-8')))
267
+
268
+ await send({
269
+ "type": "http.response.start",
270
+ "status": self.status_code,
271
+ "headers": self.raw_headers,
272
+ })
273
+
274
+ async for chunk in result["Body"]:
275
+
276
+ if not isinstance(chunk, bytes):
277
+ chunk = chunk.encode(self.charset)
278
+
279
+ await send({
280
+ "type": "http.response.body",
281
+ "body": chunk,
282
+ "more_body": True
283
+ })
284
+
285
+ await send({
286
+ "type": "http.response.body",
287
+ "body": b"",
288
+ "more_body": False})
289
+
290
+ except Exception as e:
291
+ r = handle_s3_exception(e, self.key)
292
+ await send_response(r)