swift 2.23.3__py3-none-any.whl → 2.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. swift/__init__.py +29 -50
  2. swift/account/auditor.py +21 -118
  3. swift/account/backend.py +33 -28
  4. swift/account/reaper.py +37 -28
  5. swift/account/replicator.py +22 -0
  6. swift/account/server.py +60 -26
  7. swift/account/utils.py +28 -11
  8. swift-2.23.3.data/scripts/swift-account-audit → swift/cli/account_audit.py +23 -13
  9. swift-2.23.3.data/scripts/swift-config → swift/cli/config.py +2 -2
  10. swift/cli/container_deleter.py +5 -11
  11. swift-2.23.3.data/scripts/swift-dispersion-populate → swift/cli/dispersion_populate.py +8 -7
  12. swift/cli/dispersion_report.py +10 -9
  13. swift-2.23.3.data/scripts/swift-drive-audit → swift/cli/drive_audit.py +63 -21
  14. swift/cli/form_signature.py +3 -7
  15. swift-2.23.3.data/scripts/swift-get-nodes → swift/cli/get_nodes.py +8 -2
  16. swift/cli/info.py +154 -14
  17. swift/cli/manage_shard_ranges.py +705 -37
  18. swift-2.23.3.data/scripts/swift-oldies → swift/cli/oldies.py +25 -14
  19. swift-2.23.3.data/scripts/swift-orphans → swift/cli/orphans.py +7 -3
  20. swift/cli/recon.py +196 -67
  21. swift-2.23.3.data/scripts/swift-recon-cron → swift/cli/recon_cron.py +17 -20
  22. swift-2.23.3.data/scripts/swift-reconciler-enqueue → swift/cli/reconciler_enqueue.py +2 -3
  23. swift/cli/relinker.py +807 -126
  24. swift/cli/reload.py +135 -0
  25. swift/cli/ringbuilder.py +217 -20
  26. swift/cli/ringcomposer.py +0 -1
  27. swift/cli/shard-info.py +4 -3
  28. swift/common/base_storage_server.py +9 -20
  29. swift/common/bufferedhttp.py +48 -74
  30. swift/common/constraints.py +20 -15
  31. swift/common/container_sync_realms.py +9 -11
  32. swift/common/daemon.py +25 -8
  33. swift/common/db.py +195 -128
  34. swift/common/db_auditor.py +168 -0
  35. swift/common/db_replicator.py +95 -55
  36. swift/common/digest.py +141 -0
  37. swift/common/direct_client.py +144 -33
  38. swift/common/error_limiter.py +93 -0
  39. swift/common/exceptions.py +25 -1
  40. swift/common/header_key_dict.py +2 -9
  41. swift/common/http_protocol.py +373 -0
  42. swift/common/internal_client.py +129 -59
  43. swift/common/linkat.py +3 -4
  44. swift/common/manager.py +284 -67
  45. swift/common/memcached.py +390 -145
  46. swift/common/middleware/__init__.py +4 -0
  47. swift/common/middleware/account_quotas.py +211 -46
  48. swift/common/middleware/acl.py +3 -8
  49. swift/common/middleware/backend_ratelimit.py +230 -0
  50. swift/common/middleware/bulk.py +22 -34
  51. swift/common/middleware/catch_errors.py +1 -3
  52. swift/common/middleware/cname_lookup.py +6 -11
  53. swift/common/middleware/container_quotas.py +1 -1
  54. swift/common/middleware/container_sync.py +39 -17
  55. swift/common/middleware/copy.py +12 -0
  56. swift/common/middleware/crossdomain.py +22 -9
  57. swift/common/middleware/crypto/__init__.py +2 -1
  58. swift/common/middleware/crypto/crypto_utils.py +11 -15
  59. swift/common/middleware/crypto/decrypter.py +28 -11
  60. swift/common/middleware/crypto/encrypter.py +12 -17
  61. swift/common/middleware/crypto/keymaster.py +8 -15
  62. swift/common/middleware/crypto/kms_keymaster.py +2 -1
  63. swift/common/middleware/dlo.py +15 -11
  64. swift/common/middleware/domain_remap.py +5 -4
  65. swift/common/middleware/etag_quoter.py +128 -0
  66. swift/common/middleware/formpost.py +73 -70
  67. swift/common/middleware/gatekeeper.py +8 -1
  68. swift/common/middleware/keystoneauth.py +33 -3
  69. swift/common/middleware/list_endpoints.py +4 -4
  70. swift/common/middleware/listing_formats.py +85 -49
  71. swift/common/middleware/memcache.py +4 -95
  72. swift/common/middleware/name_check.py +3 -2
  73. swift/common/middleware/proxy_logging.py +160 -92
  74. swift/common/middleware/ratelimit.py +17 -10
  75. swift/common/middleware/read_only.py +6 -4
  76. swift/common/middleware/recon.py +59 -22
  77. swift/common/middleware/s3api/acl_handlers.py +25 -3
  78. swift/common/middleware/s3api/acl_utils.py +6 -1
  79. swift/common/middleware/s3api/controllers/__init__.py +6 -0
  80. swift/common/middleware/s3api/controllers/acl.py +3 -2
  81. swift/common/middleware/s3api/controllers/bucket.py +242 -137
  82. swift/common/middleware/s3api/controllers/logging.py +2 -2
  83. swift/common/middleware/s3api/controllers/multi_delete.py +43 -20
  84. swift/common/middleware/s3api/controllers/multi_upload.py +219 -133
  85. swift/common/middleware/s3api/controllers/obj.py +112 -8
  86. swift/common/middleware/s3api/controllers/object_lock.py +44 -0
  87. swift/common/middleware/s3api/controllers/s3_acl.py +2 -2
  88. swift/common/middleware/s3api/controllers/tagging.py +57 -0
  89. swift/common/middleware/s3api/controllers/versioning.py +36 -7
  90. swift/common/middleware/s3api/etree.py +22 -9
  91. swift/common/middleware/s3api/exception.py +0 -4
  92. swift/common/middleware/s3api/s3api.py +113 -41
  93. swift/common/middleware/s3api/s3request.py +384 -218
  94. swift/common/middleware/s3api/s3response.py +126 -23
  95. swift/common/middleware/s3api/s3token.py +16 -17
  96. swift/common/middleware/s3api/schema/delete.rng +1 -1
  97. swift/common/middleware/s3api/subresource.py +7 -10
  98. swift/common/middleware/s3api/utils.py +27 -10
  99. swift/common/middleware/slo.py +665 -358
  100. swift/common/middleware/staticweb.py +64 -37
  101. swift/common/middleware/symlink.py +51 -18
  102. swift/common/middleware/tempauth.py +76 -58
  103. swift/common/middleware/tempurl.py +191 -173
  104. swift/common/middleware/versioned_writes/__init__.py +51 -0
  105. swift/common/middleware/{versioned_writes.py → versioned_writes/legacy.py} +27 -26
  106. swift/common/middleware/versioned_writes/object_versioning.py +1482 -0
  107. swift/common/middleware/x_profile/exceptions.py +1 -4
  108. swift/common/middleware/x_profile/html_viewer.py +18 -19
  109. swift/common/middleware/x_profile/profile_model.py +1 -2
  110. swift/common/middleware/xprofile.py +10 -10
  111. swift-2.23.3.data/scripts/swift-container-server → swift/common/recon.py +13 -8
  112. swift/common/registry.py +147 -0
  113. swift/common/request_helpers.py +324 -57
  114. swift/common/ring/builder.py +67 -25
  115. swift/common/ring/composite_builder.py +1 -1
  116. swift/common/ring/ring.py +177 -51
  117. swift/common/ring/utils.py +1 -1
  118. swift/common/splice.py +10 -6
  119. swift/common/statsd_client.py +205 -0
  120. swift/common/storage_policy.py +49 -44
  121. swift/common/swob.py +86 -102
  122. swift/common/{utils.py → utils/__init__.py} +2163 -2772
  123. swift/common/utils/base.py +131 -0
  124. swift/common/utils/config.py +433 -0
  125. swift/common/utils/ipaddrs.py +256 -0
  126. swift/common/utils/libc.py +345 -0
  127. swift/common/utils/logs.py +859 -0
  128. swift/common/utils/timestamp.py +412 -0
  129. swift/common/wsgi.py +553 -535
  130. swift/container/auditor.py +14 -100
  131. swift/container/backend.py +490 -231
  132. swift/container/reconciler.py +126 -37
  133. swift/container/replicator.py +96 -22
  134. swift/container/server.py +358 -165
  135. swift/container/sharder.py +1540 -684
  136. swift/container/sync.py +94 -88
  137. swift/container/updater.py +53 -32
  138. swift/obj/auditor.py +153 -35
  139. swift/obj/diskfile.py +466 -217
  140. swift/obj/expirer.py +406 -124
  141. swift/obj/mem_diskfile.py +7 -4
  142. swift/obj/mem_server.py +1 -0
  143. swift/obj/reconstructor.py +523 -262
  144. swift/obj/replicator.py +249 -188
  145. swift/obj/server.py +207 -122
  146. swift/obj/ssync_receiver.py +145 -85
  147. swift/obj/ssync_sender.py +113 -54
  148. swift/obj/updater.py +652 -139
  149. swift/obj/watchers/__init__.py +0 -0
  150. swift/obj/watchers/dark_data.py +213 -0
  151. swift/proxy/controllers/account.py +11 -11
  152. swift/proxy/controllers/base.py +848 -604
  153. swift/proxy/controllers/container.py +433 -92
  154. swift/proxy/controllers/info.py +3 -2
  155. swift/proxy/controllers/obj.py +1000 -489
  156. swift/proxy/server.py +185 -112
  157. {swift-2.23.3.dist-info → swift-2.35.0.dist-info}/AUTHORS +58 -11
  158. {swift-2.23.3.dist-info → swift-2.35.0.dist-info}/METADATA +51 -56
  159. swift-2.35.0.dist-info/RECORD +201 -0
  160. {swift-2.23.3.dist-info → swift-2.35.0.dist-info}/WHEEL +1 -1
  161. {swift-2.23.3.dist-info → swift-2.35.0.dist-info}/entry_points.txt +43 -0
  162. swift-2.35.0.dist-info/pbr.json +1 -0
  163. swift/locale/de/LC_MESSAGES/swift.po +0 -1216
  164. swift/locale/en_GB/LC_MESSAGES/swift.po +0 -1207
  165. swift/locale/es/LC_MESSAGES/swift.po +0 -1085
  166. swift/locale/fr/LC_MESSAGES/swift.po +0 -909
  167. swift/locale/it/LC_MESSAGES/swift.po +0 -894
  168. swift/locale/ja/LC_MESSAGES/swift.po +0 -965
  169. swift/locale/ko_KR/LC_MESSAGES/swift.po +0 -964
  170. swift/locale/pt_BR/LC_MESSAGES/swift.po +0 -881
  171. swift/locale/ru/LC_MESSAGES/swift.po +0 -891
  172. swift/locale/tr_TR/LC_MESSAGES/swift.po +0 -832
  173. swift/locale/zh_CN/LC_MESSAGES/swift.po +0 -833
  174. swift/locale/zh_TW/LC_MESSAGES/swift.po +0 -838
  175. swift-2.23.3.data/scripts/swift-account-auditor +0 -23
  176. swift-2.23.3.data/scripts/swift-account-info +0 -51
  177. swift-2.23.3.data/scripts/swift-account-reaper +0 -23
  178. swift-2.23.3.data/scripts/swift-account-replicator +0 -34
  179. swift-2.23.3.data/scripts/swift-account-server +0 -23
  180. swift-2.23.3.data/scripts/swift-container-auditor +0 -23
  181. swift-2.23.3.data/scripts/swift-container-info +0 -55
  182. swift-2.23.3.data/scripts/swift-container-reconciler +0 -21
  183. swift-2.23.3.data/scripts/swift-container-replicator +0 -34
  184. swift-2.23.3.data/scripts/swift-container-sharder +0 -37
  185. swift-2.23.3.data/scripts/swift-container-sync +0 -23
  186. swift-2.23.3.data/scripts/swift-container-updater +0 -23
  187. swift-2.23.3.data/scripts/swift-dispersion-report +0 -24
  188. swift-2.23.3.data/scripts/swift-form-signature +0 -20
  189. swift-2.23.3.data/scripts/swift-init +0 -119
  190. swift-2.23.3.data/scripts/swift-object-auditor +0 -29
  191. swift-2.23.3.data/scripts/swift-object-expirer +0 -33
  192. swift-2.23.3.data/scripts/swift-object-info +0 -60
  193. swift-2.23.3.data/scripts/swift-object-reconstructor +0 -33
  194. swift-2.23.3.data/scripts/swift-object-relinker +0 -41
  195. swift-2.23.3.data/scripts/swift-object-replicator +0 -37
  196. swift-2.23.3.data/scripts/swift-object-server +0 -27
  197. swift-2.23.3.data/scripts/swift-object-updater +0 -23
  198. swift-2.23.3.data/scripts/swift-proxy-server +0 -23
  199. swift-2.23.3.data/scripts/swift-recon +0 -24
  200. swift-2.23.3.data/scripts/swift-ring-builder +0 -24
  201. swift-2.23.3.data/scripts/swift-ring-builder-analyzer +0 -22
  202. swift-2.23.3.data/scripts/swift-ring-composer +0 -22
  203. swift-2.23.3.dist-info/RECORD +0 -220
  204. swift-2.23.3.dist-info/pbr.json +0 -1
  205. {swift-2.23.3.dist-info → swift-2.35.0.dist-info}/LICENSE +0 -0
  206. {swift-2.23.3.dist-info → swift-2.35.0.dist-info}/top_level.txt +0 -0
@@ -24,28 +24,26 @@
24
24
  # These shenanigans are to ensure all related objects can be garbage
25
25
  # collected. We've seen objects hang around forever otherwise.
26
26
 
27
- from six.moves.urllib.parse import quote
27
+ from urllib.parse import quote
28
28
 
29
- import os
30
29
  import time
31
30
  import json
32
31
  import functools
33
32
  import inspect
34
33
  import itertools
35
34
  import operator
35
+ import random
36
36
  from copy import deepcopy
37
- from sys import exc_info
38
- from swift import gettext_ as _
39
37
 
40
- from eventlet import sleep
41
38
  from eventlet.timeout import Timeout
42
- import six
43
39
 
40
+ from swift.common.memcached import MemcacheConnectionError
44
41
  from swift.common.wsgi import make_pre_authed_env, make_pre_authed_request
45
- from swift.common.utils import Timestamp, config_true_value, \
42
+ from swift.common.utils import Timestamp, WatchdogTimeout, config_true_value, \
46
43
  public, split_path, list_from_csv, GreenthreadSafeIterator, \
47
- GreenAsyncPile, quorum_size, parse_content_type, close_if_possible, \
48
- document_iters_to_http_response_body, ShardRange, find_shard_range
44
+ GreenAsyncPile, quorum_size, parse_content_type, drain_and_close, \
45
+ document_iters_to_http_response_body, cache_from_env, \
46
+ CooperativeIterator, NamespaceBoundList, Namespace, ClosingMapper
49
47
  from swift.common.bufferedhttp import http_connect
50
48
  from swift.common import constraints
51
49
  from swift.common.exceptions import ChunkReadTimeout, ChunkWriteTimeout, \
@@ -54,20 +52,23 @@ from swift.common.header_key_dict import HeaderKeyDict
54
52
  from swift.common.http import is_informational, is_success, is_redirection, \
55
53
  is_server_error, HTTP_OK, HTTP_PARTIAL_CONTENT, HTTP_MULTIPLE_CHOICES, \
56
54
  HTTP_BAD_REQUEST, HTTP_NOT_FOUND, HTTP_SERVICE_UNAVAILABLE, \
57
- HTTP_INSUFFICIENT_STORAGE, HTTP_UNAUTHORIZED, HTTP_CONTINUE, HTTP_GONE
55
+ HTTP_UNAUTHORIZED, HTTP_CONTINUE, HTTP_GONE, \
56
+ HTTP_REQUESTED_RANGE_NOT_SATISFIABLE
58
57
  from swift.common.swob import Request, Response, Range, \
59
58
  HTTPException, HTTPRequestedRangeNotSatisfiable, HTTPServiceUnavailable, \
60
- status_map, wsgi_to_str, str_to_wsgi, wsgi_quote
59
+ status_map, wsgi_to_str, str_to_wsgi, wsgi_quote, wsgi_unquote, \
60
+ normalize_etag
61
61
  from swift.common.request_helpers import strip_sys_meta_prefix, \
62
62
  strip_user_meta_prefix, is_user_meta, is_sys_meta, is_sys_or_user_meta, \
63
63
  http_response_to_document_iters, is_object_transient_sysmeta, \
64
- strip_object_transient_sysmeta_prefix
64
+ strip_object_transient_sysmeta_prefix, get_ip_port, get_user_meta_prefix, \
65
+ get_sys_meta_prefix, is_use_replication_network
65
66
  from swift.common.storage_policy import POLICIES
66
67
 
67
-
68
68
  DEFAULT_RECHECK_ACCOUNT_EXISTENCE = 60 # seconds
69
69
  DEFAULT_RECHECK_CONTAINER_EXISTENCE = 60 # seconds
70
70
  DEFAULT_RECHECK_UPDATING_SHARD_RANGES = 3600 # seconds
71
+ DEFAULT_RECHECK_LISTING_SHARD_RANGES = 600 # seconds
71
72
 
72
73
 
73
74
  def update_headers(response, headers):
@@ -88,19 +89,6 @@ def update_headers(response, headers):
88
89
  response.headers[name] = value
89
90
 
90
91
 
91
- def source_key(resp):
92
- """
93
- Provide the timestamp of the swift http response as a floating
94
- point value. Used as a sort key.
95
-
96
- :param resp: bufferedhttp response object
97
- """
98
- return Timestamp(resp.getheader('x-backend-data-timestamp') or
99
- resp.getheader('x-backend-timestamp') or
100
- resp.getheader('x-put-timestamp') or
101
- resp.getheader('x-timestamp') or 0)
102
-
103
-
104
92
  def delay_denial(func):
105
93
  """
106
94
  Decorator to declare which methods should have any swift.authorize call
@@ -179,6 +167,7 @@ def headers_to_container_info(headers, status_int=HTTP_OK):
179
167
  'status': status_int,
180
168
  'read_acl': headers.get('x-container-read'),
181
169
  'write_acl': headers.get('x-container-write'),
170
+ 'sync_to': headers.get('x-container-sync-to'),
182
171
  'sync_key': headers.get('x-container-sync-key'),
183
172
  'object_count': headers.get('x-container-object-count'),
184
173
  'bytes': headers.get('x-container-bytes-used'),
@@ -192,9 +181,100 @@ def headers_to_container_info(headers, status_int=HTTP_OK):
192
181
  'meta': meta,
193
182
  'sysmeta': sysmeta,
194
183
  'sharding_state': headers.get('x-backend-sharding-state', 'unsharded'),
184
+ # the 'internal' format version of timestamps is cached since the
185
+ # normal format can be derived from this when required
186
+ 'created_at': headers.get('x-backend-timestamp'),
187
+ 'put_timestamp': headers.get('x-backend-put-timestamp'),
188
+ 'delete_timestamp': headers.get('x-backend-delete-timestamp'),
189
+ 'status_changed_at': headers.get('x-backend-status-changed-at'),
195
190
  }
196
191
 
197
192
 
193
+ def headers_from_container_info(info):
194
+ """
195
+ Construct a HeaderKeyDict from a container info dict.
196
+
197
+ :param info: a dict of container metadata
198
+ :returns: a HeaderKeyDict or None if info is None or any required headers
199
+ could not be constructed
200
+ """
201
+ if not info:
202
+ return None
203
+
204
+ required = (
205
+ ('x-backend-timestamp', 'created_at'),
206
+ ('x-backend-put-timestamp', 'put_timestamp'),
207
+ ('x-backend-delete-timestamp', 'delete_timestamp'),
208
+ ('x-backend-status-changed-at', 'status_changed_at'),
209
+ ('x-backend-storage-policy-index', 'storage_policy'),
210
+ ('x-container-object-count', 'object_count'),
211
+ ('x-container-bytes-used', 'bytes'),
212
+ ('x-backend-sharding-state', 'sharding_state'),
213
+ )
214
+ required_normal_format_timestamps = (
215
+ ('x-timestamp', 'created_at'),
216
+ ('x-put-timestamp', 'put_timestamp'),
217
+ )
218
+ optional = (
219
+ ('x-container-read', 'read_acl'),
220
+ ('x-container-write', 'write_acl'),
221
+ ('x-container-sync-key', 'sync_key'),
222
+ ('x-container-sync-to', 'sync_to'),
223
+ ('x-versions-location', 'versions'),
224
+ )
225
+ cors_optional = (
226
+ ('access-control-allow-origin', 'allow_origin'),
227
+ ('access-control-expose-headers', 'expose_headers'),
228
+ ('access-control-max-age', 'max_age')
229
+ )
230
+
231
+ def lookup(info, key):
232
+ # raises KeyError or ValueError
233
+ val = info[key]
234
+ if val is None:
235
+ raise ValueError
236
+ return val
237
+
238
+ # note: required headers may be missing from info for example during
239
+ # upgrade when stale info is still in cache
240
+ headers = HeaderKeyDict()
241
+ for hdr, key in required:
242
+ try:
243
+ headers[hdr] = lookup(info, key)
244
+ except (KeyError, ValueError):
245
+ return None
246
+
247
+ for hdr, key in required_normal_format_timestamps:
248
+ try:
249
+ headers[hdr] = Timestamp(lookup(info, key)).normal
250
+ except (KeyError, ValueError):
251
+ return None
252
+
253
+ for hdr, key in optional:
254
+ try:
255
+ headers[hdr] = lookup(info, key)
256
+ except (KeyError, ValueError):
257
+ pass
258
+
259
+ policy_index = info.get('storage_policy')
260
+ headers['x-storage-policy'] = POLICIES[int(policy_index)].name
261
+ prefix = get_user_meta_prefix('container')
262
+ headers.update(
263
+ (prefix + k, v)
264
+ for k, v in info.get('meta', {}).items())
265
+ for hdr, key in cors_optional:
266
+ try:
267
+ headers[prefix + hdr] = lookup(info.get('cors'), key)
268
+ except (KeyError, ValueError):
269
+ pass
270
+ prefix = get_sys_meta_prefix('container')
271
+ headers.update(
272
+ (prefix + k, v)
273
+ for k, v in info.get('sysmeta', {}).items())
274
+
275
+ return headers
276
+
277
+
198
278
  def headers_to_object_info(headers, status_int=HTTP_OK):
199
279
  """
200
280
  Construct a cacheable dict of object info based on response headers.
@@ -282,6 +362,10 @@ def cors_validation(func):
282
362
  resp.headers['Access-Control-Allow-Origin'] = '*'
283
363
  else:
284
364
  resp.headers['Access-Control-Allow-Origin'] = req_origin
365
+ if 'Vary' in resp.headers:
366
+ resp.headers['Vary'] += ', Origin'
367
+ else:
368
+ resp.headers['Vary'] = 'Origin'
285
369
 
286
370
  return resp
287
371
  else:
@@ -319,11 +403,43 @@ def get_object_info(env, app, path=None, swift_source=None):
319
403
  return info
320
404
 
321
405
 
322
- def get_container_info(env, app, swift_source=None):
406
+ def _record_ac_info_cache_metrics(
407
+ app, cache_state, container=None, resp=None):
408
+ """
409
+ Record a single cache operation by account or container lookup into its
410
+ corresponding metrics.
411
+
412
+ :param app: the application object
413
+ :param cache_state: the state of this cache operation, includes
414
+ infocache_hit, memcache hit, miss, error, skip, force_skip
415
+ and disabled.
416
+ :param container: the container name
417
+ :param resp: the response from either backend or cache hit.
418
+ """
419
+ try:
420
+ proxy_app = app._pipeline_final_app
421
+ except AttributeError:
422
+ logger = None
423
+ else:
424
+ logger = proxy_app.logger
425
+ server_type = 'container' if container else 'account'
426
+ if logger:
427
+ record_cache_op_metrics(logger, server_type, 'info', cache_state, resp)
428
+
429
+
430
+ def get_container_info(env, app, swift_source=None, cache_only=False):
323
431
  """
324
432
  Get the info structure for a container, based on env and app.
325
433
  This is useful to middlewares.
326
434
 
435
+ :param env: the environment used by the current request
436
+ :param app: the application object
437
+ :param swift_source: Used to mark the request as originating out of
438
+ middleware. Will be logged in proxy logs.
439
+ :param cache_only: If true, indicates that caller doesn't want to HEAD the
440
+ backend container when cache miss.
441
+ :returns: the object info
442
+
327
443
  .. note::
328
444
 
329
445
  This call bypasses auth. Success does not imply that the request has
@@ -331,13 +447,28 @@ def get_container_info(env, app, swift_source=None):
331
447
  """
332
448
  (version, wsgi_account, wsgi_container, unused) = \
333
449
  split_path(env['PATH_INFO'], 3, 4, True)
450
+
451
+ if not constraints.valid_api_version(version):
452
+ # Not a valid Swift request; return 0 like we do
453
+ # if there's an account failure
454
+ return headers_to_container_info({}, 0)
455
+
334
456
  account = wsgi_to_str(wsgi_account)
335
457
  container = wsgi_to_str(wsgi_container)
336
458
 
459
+ # Try to cut through all the layers to the proxy app
460
+ # (while also preserving logging)
461
+ try:
462
+ logged_app = app._pipeline_request_logging_app
463
+ proxy_app = app._pipeline_final_app
464
+ except AttributeError:
465
+ logged_app = proxy_app = app
337
466
  # Check in environment cache and in memcache (in that order)
338
- info = _get_info_from_caches(app, env, account, container)
467
+ info, cache_state = _get_info_from_caches(
468
+ proxy_app, env, account, container)
339
469
 
340
- if not info:
470
+ resp = None
471
+ if not info and not cache_only:
341
472
  # Cache miss; go HEAD the container and populate the caches
342
473
  env.setdefault('swift.infocache', {})
343
474
  # Before checking the container, make sure the account exists.
@@ -347,29 +478,35 @@ def get_container_info(env, app, swift_source=None):
347
478
  # account is successful whether the account actually has .db files
348
479
  # on disk or not.
349
480
  is_autocreate_account = account.startswith(
350
- getattr(app, 'auto_create_account_prefix', '.'))
481
+ constraints.AUTO_CREATE_ACCOUNT_PREFIX)
351
482
  if not is_autocreate_account:
352
- account_info = get_account_info(env, app, swift_source)
483
+ account_info = get_account_info(env, logged_app, swift_source)
353
484
  if not account_info or not is_success(account_info['status']):
485
+ _record_ac_info_cache_metrics(
486
+ logged_app, cache_state, container)
354
487
  return headers_to_container_info({}, 0)
355
488
 
356
489
  req = _prepare_pre_auth_info_request(
357
490
  env, ("/%s/%s/%s" % (version, wsgi_account, wsgi_container)),
358
491
  (swift_source or 'GET_CONTAINER_INFO'))
359
- resp = req.get_response(app)
360
- close_if_possible(resp.app_iter)
492
+ # *Always* allow reserved names for get-info requests -- it's on the
493
+ # caller to keep the result private-ish
494
+ req.headers['X-Backend-Allow-Reserved-Names'] = 'true'
495
+ resp = req.get_response(logged_app)
496
+ drain_and_close(resp)
361
497
  # Check in infocache to see if the proxy (or anyone else) already
362
498
  # populated the cache for us. If they did, just use what's there.
363
499
  #
364
500
  # See similar comment in get_account_info() for justification.
365
501
  info = _get_info_from_infocache(env, account, container)
366
502
  if info is None:
367
- info = set_info_cache(app, env, account, container, resp)
503
+ info = set_info_cache(env, account, container, resp)
368
504
 
369
505
  if info:
370
506
  info = deepcopy(info) # avoid mutating what's in swift.infocache
371
507
  else:
372
- info = headers_to_container_info({}, 503)
508
+ status_int = 0 if cache_only else 503
509
+ info = headers_to_container_info({}, status_int)
373
510
 
374
511
  # Old data format in memcache immediately after a Swift upgrade; clean
375
512
  # it up so consumers of get_container_info() aren't exposed to it.
@@ -385,6 +522,18 @@ def get_container_info(env, app, swift_source=None):
385
522
  if info.get('sharding_state') is None:
386
523
  info['sharding_state'] = 'unsharded'
387
524
 
525
+ versions_cont = info.get('sysmeta', {}).get('versions-container', '')
526
+ if versions_cont:
527
+ versions_cont = wsgi_unquote(str_to_wsgi(
528
+ versions_cont)).split('/')[0]
529
+ versions_req = _prepare_pre_auth_info_request(
530
+ env, ("/%s/%s/%s" % (version, wsgi_account, versions_cont)),
531
+ (swift_source or 'GET_CONTAINER_INFO'))
532
+ versions_req.headers['X-Backend-Allow-Reserved-Names'] = 'true'
533
+ versions_info = get_container_info(versions_req.environ, app)
534
+ info['bytes'] = info['bytes'] + versions_info['bytes']
535
+
536
+ _record_ac_info_cache_metrics(logged_app, cache_state, container, resp)
388
537
  return info
389
538
 
390
539
 
@@ -401,19 +550,34 @@ def get_account_info(env, app, swift_source=None):
401
550
  :raises ValueError: when path doesn't contain an account
402
551
  """
403
552
  (version, wsgi_account, _junk) = split_path(env['PATH_INFO'], 2, 3, True)
553
+
554
+ if not constraints.valid_api_version(version):
555
+ return headers_to_account_info({}, 0)
556
+
404
557
  account = wsgi_to_str(wsgi_account)
405
558
 
559
+ # Try to cut through all the layers to the proxy app
560
+ # (while also preserving logging)
561
+ try:
562
+ app = app._pipeline_request_logging_app
563
+ except AttributeError:
564
+ pass
406
565
  # Check in environment cache and in memcache (in that order)
407
- info = _get_info_from_caches(app, env, account)
566
+ info, cache_state = _get_info_from_caches(app, env, account)
408
567
 
409
568
  # Cache miss; go HEAD the account and populate the caches
410
- if not info:
569
+ if info:
570
+ resp = None
571
+ else:
411
572
  env.setdefault('swift.infocache', {})
412
573
  req = _prepare_pre_auth_info_request(
413
574
  env, "/%s/%s" % (version, wsgi_account),
414
575
  (swift_source or 'GET_ACCOUNT_INFO'))
576
+ # *Always* allow reserved names for get-info requests -- it's on the
577
+ # caller to keep the result private-ish
578
+ req.headers['X-Backend-Allow-Reserved-Names'] = 'true'
415
579
  resp = req.get_response(app)
416
- close_if_possible(resp.app_iter)
580
+ drain_and_close(resp)
417
581
  # Check in infocache to see if the proxy (or anyone else) already
418
582
  # populated the cache for us. If they did, just use what's there.
419
583
  #
@@ -430,7 +594,7 @@ def get_account_info(env, app, swift_source=None):
430
594
  # memcache would defeat the purpose.
431
595
  info = _get_info_from_infocache(env, account)
432
596
  if info is None:
433
- info = set_info_cache(app, env, account, None, resp)
597
+ info = set_info_cache(env, account, None, resp)
434
598
 
435
599
  if info:
436
600
  info = info.copy() # avoid mutating what's in swift.infocache
@@ -443,6 +607,7 @@ def get_account_info(env, app, swift_source=None):
443
607
  else:
444
608
  info[field] = int(info[field])
445
609
 
610
+ _record_ac_info_cache_metrics(app, cache_state, container=None, resp=resp)
446
611
  return info
447
612
 
448
613
 
@@ -459,16 +624,10 @@ def get_cache_key(account, container=None, obj=None, shard=None):
459
624
  with obj)
460
625
  :returns: a (native) string cache_key
461
626
  """
462
- if six.PY2:
463
- def to_native(s):
464
- if s is None or isinstance(s, str):
465
- return s
466
- return s.encode('utf8')
467
- else:
468
- def to_native(s):
469
- if s is None or isinstance(s, str):
470
- return s
471
- return s.decode('utf8', 'surrogateescape')
627
+ def to_native(s):
628
+ if s is None or isinstance(s, str):
629
+ return s
630
+ return s.decode('utf8', 'surrogateescape')
472
631
 
473
632
  account = to_native(account)
474
633
  container = to_native(container)
@@ -479,7 +638,7 @@ def get_cache_key(account, container=None, obj=None, shard=None):
479
638
  raise ValueError('Shard cache key requires account and container')
480
639
  if obj:
481
640
  raise ValueError('Shard cache key cannot have obj')
482
- cache_key = 'shard-%s/%s/%s' % (shard, account, container)
641
+ cache_key = 'shard-%s-v2/%s/%s' % (shard, account, container)
483
642
  elif obj:
484
643
  if not (account and container):
485
644
  raise ValueError('Object cache key requires account and container')
@@ -497,11 +656,11 @@ def get_cache_key(account, container=None, obj=None, shard=None):
497
656
  return cache_key
498
657
 
499
658
 
500
- def set_info_cache(app, env, account, container, resp):
659
+ def set_info_cache(env, account, container, resp):
501
660
  """
502
661
  Cache info in both memcache and env.
503
662
 
504
- :param app: the application object
663
+ :param env: the WSGI request environment
505
664
  :param account: the unquoted account name
506
665
  :param container: the unquoted container name or None
507
666
  :param resp: the response received or None if info cache should be cleared
@@ -509,31 +668,27 @@ def set_info_cache(app, env, account, container, resp):
509
668
  :returns: the info that was placed into the cache, or None if the
510
669
  request status was not in (404, 410, 2xx).
511
670
  """
671
+ cache_key = get_cache_key(account, container)
512
672
  infocache = env.setdefault('swift.infocache', {})
673
+ memcache = cache_from_env(env, True)
674
+ if resp is None:
675
+ clear_info_cache(env, account, container)
676
+ return
513
677
 
514
- cache_time = None
515
- if container and resp:
678
+ if container:
516
679
  cache_time = int(resp.headers.get(
517
680
  'X-Backend-Recheck-Container-Existence',
518
681
  DEFAULT_RECHECK_CONTAINER_EXISTENCE))
519
- elif resp:
682
+ else:
520
683
  cache_time = int(resp.headers.get(
521
684
  'X-Backend-Recheck-Account-Existence',
522
685
  DEFAULT_RECHECK_ACCOUNT_EXISTENCE))
523
- cache_key = get_cache_key(account, container)
524
686
 
525
- if resp:
526
- if resp.status_int in (HTTP_NOT_FOUND, HTTP_GONE):
527
- cache_time *= 0.1
528
- elif not is_success(resp.status_int):
529
- cache_time = None
530
-
531
- # Next actually set both memcache and the env cache
532
- memcache = getattr(app, 'memcache', None) or env.get('swift.cache')
533
- if cache_time is None:
534
- infocache.pop(cache_key, None)
535
- if memcache:
536
- memcache.delete(cache_key)
687
+ if resp.status_int in (HTTP_NOT_FOUND, HTTP_GONE):
688
+ cache_time *= 0.1
689
+ elif not is_success(resp.status_int):
690
+ # If we got a response, it was unsuccessful, and it wasn't an
691
+ # "authoritative" failure, bail without touching caches.
537
692
  return
538
693
 
539
694
  if container:
@@ -574,16 +729,23 @@ def set_object_info_cache(app, env, account, container, obj, resp):
574
729
  return info
575
730
 
576
731
 
577
- def clear_info_cache(app, env, account, container=None):
732
+ def clear_info_cache(env, account, container=None, shard=None):
578
733
  """
579
734
  Clear the cached info in both memcache and env
580
735
 
581
- :param app: the application object
582
- :param env: the WSGI environment
736
+ :param env: the WSGI request environment
583
737
  :param account: the account name
584
- :param container: the containr name or None if setting info for containers
738
+ :param container: the container name if clearing info for containers, or
739
+ None
740
+ :param shard: the sharding state if clearing info for container shard
741
+ ranges, or None
585
742
  """
586
- set_info_cache(app, env, account, container, None)
743
+ cache_key = get_cache_key(account, container, shard=shard)
744
+ infocache = env.setdefault('swift.infocache', {})
745
+ memcache = cache_from_env(env, True)
746
+ infocache.pop(cache_key, None)
747
+ if memcache:
748
+ memcache.delete(cache_key)
587
749
 
588
750
 
589
751
  def _get_info_from_infocache(env, account, container=None):
@@ -603,6 +765,40 @@ def _get_info_from_infocache(env, account, container=None):
603
765
  return None
604
766
 
605
767
 
768
+ def record_cache_op_metrics(
769
+ logger, server_type, op_type, cache_state, resp=None):
770
+ """
771
+ Record a single cache operation into its corresponding metrics.
772
+
773
+ :param logger: the metrics logger
774
+ :param server_type: 'account' or 'container'
775
+ :param op_type: the name of the operation type, includes 'shard_listing',
776
+ 'shard_updating', and etc.
777
+ :param cache_state: the state of this cache operation. When it's
778
+ 'infocache_hit' or memcache 'hit', expect it succeeded and 'resp'
779
+ will be None; for all other cases like memcache 'miss' or 'skip'
780
+ which will make to backend, expect a valid 'resp'.
781
+ :param resp: the response from backend for all cases except cache hits.
782
+ """
783
+ server_type = server_type.lower()
784
+ if cache_state == 'infocache_hit':
785
+ logger.increment('%s.%s.infocache.hit' % (server_type, op_type))
786
+ elif cache_state == 'hit':
787
+ # memcache hits.
788
+ logger.increment('%s.%s.cache.hit' % (server_type, op_type))
789
+ else:
790
+ # the cases of cache_state is memcache miss, error, skip, force_skip
791
+ # or disabled.
792
+ if resp:
793
+ logger.increment('%s.%s.cache.%s.%d' % (
794
+ server_type, op_type, cache_state, resp.status_int))
795
+ else:
796
+ # In some situation, we choose not to lookup backend after cache
797
+ # miss.
798
+ logger.increment('%s.%s.cache.%s' % (
799
+ server_type, op_type, cache_state))
800
+
801
+
606
802
  def _get_info_from_memcache(app, env, account, container=None):
607
803
  """
608
804
  Get cached account or container information from memcache
@@ -612,38 +808,37 @@ def _get_info_from_memcache(app, env, account, container=None):
612
808
  :param account: the account name
613
809
  :param container: the container name
614
810
 
615
- :returns: a dictionary of cached info on cache hit, None on miss. Also
616
- returns None if memcache is not in use.
811
+ :returns: a tuple of two values, the first is a dictionary of cached info
812
+ on cache hit, None on miss or if memcache is not in use; the second is
813
+ cache state.
617
814
  """
815
+ memcache = cache_from_env(env, True)
816
+ if not memcache:
817
+ return None, 'disabled'
818
+
819
+ try:
820
+ proxy_app = app._pipeline_final_app
821
+ except AttributeError:
822
+ # Only the middleware entry-points get a reference to the
823
+ # proxy-server app; if a middleware composes itself as multiple
824
+ # filters, we'll just have to choose a reasonable default
825
+ skip_chance = 0.0
826
+ else:
827
+ if container:
828
+ skip_chance = proxy_app.container_existence_skip_cache
829
+ else:
830
+ skip_chance = proxy_app.account_existence_skip_cache
831
+
618
832
  cache_key = get_cache_key(account, container)
619
- memcache = getattr(app, 'memcache', None) or env.get('swift.cache')
620
- if memcache:
833
+ if skip_chance and random.random() < skip_chance:
834
+ info = None
835
+ cache_state = 'skip'
836
+ else:
621
837
  info = memcache.get(cache_key)
622
- if info and six.PY2:
623
- # Get back to native strings
624
- new_info = {}
625
- for key in info:
626
- new_key = key.encode("utf-8") if isinstance(
627
- key, six.text_type) else key
628
- if isinstance(info[key], six.text_type):
629
- new_info[new_key] = info[key].encode("utf-8")
630
- elif isinstance(info[key], dict):
631
- new_info[new_key] = {}
632
- for subkey, value in info[key].items():
633
- new_subkey = subkey.encode("utf-8") if isinstance(
634
- subkey, six.text_type) else subkey
635
- if isinstance(value, six.text_type):
636
- new_info[new_key][new_subkey] = \
637
- value.encode("utf-8")
638
- else:
639
- new_info[new_key][new_subkey] = value
640
- else:
641
- new_info[new_key] = info[key]
642
- info = new_info
643
- if info:
644
- env.setdefault('swift.infocache', {})[cache_key] = info
645
- return info
646
- return None
838
+ cache_state = 'hit' if info else 'miss'
839
+ if info:
840
+ env.setdefault('swift.infocache', {})[cache_key] = info
841
+ return info, cache_state
647
842
 
648
843
 
649
844
  def _get_info_from_caches(app, env, account, container=None):
@@ -653,13 +848,81 @@ def _get_info_from_caches(app, env, account, container=None):
653
848
 
654
849
  :param app: the application object
655
850
  :param env: the environment used by the current request
656
- :returns: the cached info or None if not cached
851
+ :returns: a tuple of (the cached info or None if not cached, cache state)
657
852
  """
658
853
 
659
854
  info = _get_info_from_infocache(env, account, container)
660
- if info is None:
661
- info = _get_info_from_memcache(app, env, account, container)
662
- return info
855
+ if info:
856
+ cache_state = 'infocache_hit'
857
+ else:
858
+ info, cache_state = _get_info_from_memcache(
859
+ app, env, account, container)
860
+ return info, cache_state
861
+
862
+
863
+ def get_namespaces_from_cache(req, cache_key, skip_chance):
864
+ """
865
+ Get cached namespaces from infocache or memcache.
866
+
867
+ :param req: a :class:`swift.common.swob.Request` object.
868
+ :param cache_key: the cache key for both infocache and memcache.
869
+ :param skip_chance: the probability of skipping the memcache look-up.
870
+ :return: a tuple of (value, cache state). Value is an instance of
871
+ :class:`swift.common.utils.NamespaceBoundList` if a non-empty list is
872
+ found in memcache. Otherwise value is ``None``, for example if memcache
873
+ look-up was skipped, or no value was found, or an empty list was found.
874
+ """
875
+ # try get namespaces from infocache first
876
+ infocache = req.environ.setdefault('swift.infocache', {})
877
+ ns_bound_list = infocache.get(cache_key)
878
+ if ns_bound_list:
879
+ return ns_bound_list, 'infocache_hit'
880
+
881
+ # then try get them from memcache
882
+ memcache = cache_from_env(req.environ, True)
883
+ if not memcache:
884
+ return None, 'disabled'
885
+ if skip_chance and random.random() < skip_chance:
886
+ return None, 'skip'
887
+ try:
888
+ bounds = memcache.get(cache_key, raise_on_error=True)
889
+ cache_state = 'hit' if bounds else 'miss'
890
+ except MemcacheConnectionError:
891
+ bounds = None
892
+ cache_state = 'error'
893
+
894
+ if bounds:
895
+ ns_bound_list = NamespaceBoundList(bounds)
896
+ infocache[cache_key] = ns_bound_list
897
+ else:
898
+ ns_bound_list = None
899
+ return ns_bound_list, cache_state
900
+
901
+
902
+ def set_namespaces_in_cache(req, cache_key, ns_bound_list, time):
903
+ """
904
+ Set a list of namespace bounds in infocache and memcache.
905
+
906
+ :param req: a :class:`swift.common.swob.Request` object.
907
+ :param cache_key: the cache key for both infocache and memcache.
908
+ :param ns_bound_list: a :class:`swift.common.utils.NamespaceBoundList`.
909
+ :param time: how long the namespaces should remain in memcache.
910
+ :return: the cache_state.
911
+ """
912
+ infocache = req.environ.setdefault('swift.infocache', {})
913
+ infocache[cache_key] = ns_bound_list
914
+ memcache = cache_from_env(req.environ, True)
915
+ if memcache and ns_bound_list:
916
+ try:
917
+ memcache.set(cache_key, ns_bound_list.bounds, time=time,
918
+ raise_on_error=True)
919
+ except MemcacheConnectionError:
920
+ cache_state = 'set_error'
921
+ else:
922
+ cache_state = 'set'
923
+ else:
924
+ cache_state = 'disabled'
925
+ return cache_state
663
926
 
664
927
 
665
928
  def _prepare_pre_auth_info_request(env, path, swift_source):
@@ -739,6 +1002,9 @@ def _get_object_info(app, env, account, container, obj, swift_source=None):
739
1002
  # Not in cache, let's try the object servers
740
1003
  path = '/v1/%s/%s/%s' % (account, container, obj)
741
1004
  req = _prepare_pre_auth_info_request(env, path, swift_source)
1005
+ # *Always* allow reserved names for get-info requests -- it's on the
1006
+ # caller to keep the result private-ish
1007
+ req.headers['X-Backend-Allow-Reserved-Names'] = 'true'
742
1008
  resp = req.get_response(app)
743
1009
  # Unlike get_account_info() and get_container_info(), we don't save
744
1010
  # things in memcache, so we can store the info without network traffic,
@@ -791,6 +1057,33 @@ def bytes_to_skip(record_size, range_start):
791
1057
  return (record_size - (range_start % record_size)) % record_size
792
1058
 
793
1059
 
1060
+ def is_good_source(status, server_type):
1061
+ """
1062
+ Indicates whether or not the request made to the backend found
1063
+ what it was looking for.
1064
+
1065
+ :param resp: the response from the backend.
1066
+ :param server_type: the type of server: 'Account', 'Container' or 'Object'.
1067
+ :returns: True if the response status code is acceptable, False if not.
1068
+ """
1069
+ if (server_type == 'Object' and
1070
+ status == HTTP_REQUESTED_RANGE_NOT_SATISFIABLE):
1071
+ return True
1072
+ return is_success(status) or is_redirection(status)
1073
+
1074
+
1075
+ def is_useful_response(resp, node):
1076
+ if not resp:
1077
+ return False
1078
+ if ('handoff_index' in node
1079
+ and resp.status == 404
1080
+ and resp.getheader('x-backend-timestamp') is None):
1081
+ # a 404 from a handoff are not considered authoritative unless they
1082
+ # have an x-backend-timestamp that indicates that there is a tombstone
1083
+ return False
1084
+ return True
1085
+
1086
+
794
1087
  class ByteCountEnforcer(object):
795
1088
  """
796
1089
  Enforces that successive calls to file_like.read() give at least
@@ -822,44 +1115,131 @@ class ByteCountEnforcer(object):
822
1115
  return chunk
823
1116
 
824
1117
 
825
- class ResumingGetter(object):
826
- def __init__(self, app, req, server_type, node_iter, partition, path,
827
- backend_headers, concurrency=1, client_chunk_size=None,
828
- newest=None, header_provider=None):
1118
+ class GetterSource(object):
1119
+ """
1120
+ Encapsulates properties of a source from which a GET response is read.
1121
+
1122
+ :param app: a proxy app.
1123
+ :param resp: an instance of ``HTTPResponse``.
1124
+ :param node: a dict describing the node from which the response was
1125
+ returned.
1126
+ """
1127
+ __slots__ = ('app', 'resp', 'node', '_parts_iter')
1128
+
1129
+ def __init__(self, app, resp, node):
1130
+ self.app = app
1131
+ self.resp = resp
1132
+ self.node = node
1133
+ self._parts_iter = None
1134
+
1135
+ @property
1136
+ def timestamp(self):
1137
+ """
1138
+ Provide the timestamp of the swift http response as a floating
1139
+ point value. Used as a sort key.
1140
+
1141
+ :return: an instance of ``utils.Timestamp``
1142
+ """
1143
+ return Timestamp(self.resp.getheader('x-backend-data-timestamp') or
1144
+ self.resp.getheader('x-backend-timestamp') or
1145
+ self.resp.getheader('x-put-timestamp') or
1146
+ self.resp.getheader('x-timestamp') or 0)
1147
+
1148
+ @property
1149
+ def parts_iter(self):
1150
+ # lazy load a source response body parts iter if and when the source is
1151
+ # actually read
1152
+ if self.resp and not self._parts_iter:
1153
+ self._parts_iter = http_response_to_document_iters(
1154
+ self.resp, read_chunk_size=self.app.object_chunk_size)
1155
+ return self._parts_iter
1156
+
1157
+ def close(self):
1158
+ # Close-out the connection as best as possible.
1159
+ close_swift_conn(self.resp)
1160
+
1161
+
1162
+ class GetterBase(object):
1163
+ """
1164
+ This base class provides helper methods for handling GET requests to
1165
+ backend servers.
1166
+
1167
+ :param app: a proxy app.
1168
+ :param req: an instance of ``swob.Request``.
1169
+ :param node_iter: an iterator yielding nodes.
1170
+ :param partition: partition.
1171
+ :param policy: the policy instance, or None if Account or Container.
1172
+ :param path: path for the request.
1173
+ :param backend_headers: a dict of headers to be sent with backend requests.
1174
+ :param node_timeout: the timeout value for backend requests.
1175
+ :param resource_type: a string description of the type of resource being
1176
+ accessed; ``resource type`` is used in logs and isn't necessarily the
1177
+ server type.
1178
+ :param logger: a logger instance.
1179
+ """
1180
+ def __init__(self, app, req, node_iter, partition, policy,
1181
+ path, backend_headers, node_timeout, resource_type,
1182
+ logger=None):
829
1183
  self.app = app
1184
+ self.req = req
830
1185
  self.node_iter = node_iter
831
- self.server_type = server_type
832
1186
  self.partition = partition
1187
+ self.policy = policy
833
1188
  self.path = path
834
1189
  self.backend_headers = backend_headers
835
- self.client_chunk_size = client_chunk_size
836
- self.skip_bytes = 0
1190
+ # resource type is used in logs and isn't necessarily the server type
1191
+ self.resource_type = resource_type
1192
+ self.node_timeout = node_timeout
1193
+ self.logger = logger or app.logger
837
1194
  self.bytes_used_from_backend = 0
838
- self.used_nodes = []
839
- self.used_source_etag = ''
840
- self.concurrency = concurrency
841
- self.node = None
842
- self.header_provider = header_provider
843
- self.latest_404_timestamp = Timestamp(0)
844
-
845
- # stuff from request
846
- self.req_method = req.method
847
- self.req_path = req.path
848
- self.req_query_string = req.query_string
849
- if newest is None:
850
- self.newest = config_true_value(req.headers.get('x-newest', 'f'))
851
- else:
852
- self.newest = newest
1195
+ self.source = None
853
1196
 
854
- # populated when finding source
855
- self.statuses = []
856
- self.reasons = []
857
- self.bodies = []
858
- self.source_headers = []
859
- self.sources = []
1197
+ def _find_source(self):
1198
+ """
1199
+ Look for a suitable new source and if one is found then set
1200
+ ``self.source``.
860
1201
 
861
- # populated from response headers
862
- self.start_byte = self.end_byte = self.length = None
1202
+ :return: ``True`` if ``self.source`` has been updated, ``False``
1203
+ otherwise.
1204
+ """
1205
+ # Subclasses must implement this method, but _replace_source should be
1206
+ # called to get a source installed
1207
+ raise NotImplementedError()
1208
+
1209
+ def _replace_source(self, err_msg=''):
1210
+ if self.source:
1211
+ self.app.error_occurred(self.source.node, err_msg)
1212
+ self.source.close()
1213
+ return self._find_source()
1214
+
1215
+ def _get_next_response_part(self):
1216
+ # return the next part of the response body; there may only be one part
1217
+ # unless it's a multipart/byteranges response
1218
+ while True:
1219
+ # the loop here is to resume if trying to parse
1220
+ # multipart/byteranges response raises a ChunkReadTimeout
1221
+ # and resets the source_parts_iter
1222
+ try:
1223
+ with WatchdogTimeout(self.app.watchdog, self.node_timeout,
1224
+ ChunkReadTimeout):
1225
+ # If we don't have a multipart/byteranges response,
1226
+ # but just a 200 or a single-range 206, then this
1227
+ # performs no IO, and either just returns source or
1228
+ # raises StopIteration.
1229
+ # Otherwise, this call to next() performs IO when
1230
+ # we have a multipart/byteranges response, as it
1231
+ # will read the MIME boundary and part headers. In this
1232
+ # case, ChunkReadTimeout may also be raised.
1233
+ # If StopIteration is raised, it escapes and is
1234
+ # handled elsewhere.
1235
+ start_byte, end_byte, length, headers, part = next(
1236
+ self.source.parts_iter)
1237
+ return (start_byte, end_byte, length, headers, part)
1238
+ except ChunkReadTimeout:
1239
+ if not self._replace_source(
1240
+ 'Trying to read next part of %s multi-part GET '
1241
+ '(retrying)' % self.resource_type):
1242
+ raise
863
1243
 
864
1244
  def fast_forward(self, num_bytes):
865
1245
  """
@@ -873,6 +1253,9 @@ class ResumingGetter(object):
873
1253
  > end of range + 1
874
1254
  :raises RangeAlreadyComplete: if begin + num_bytes == end of range + 1
875
1255
  """
1256
+ self.backend_headers.pop(
1257
+ 'X-Backend-Ignore-Range-If-Metadata-Present', None)
1258
+
876
1259
  try:
877
1260
  req_range = Range(self.backend_headers.get('Range'))
878
1261
  except ValueError:
@@ -935,9 +1318,6 @@ class ResumingGetter(object):
935
1318
 
936
1319
  def learn_size_from_content_range(self, start, end, length):
937
1320
  """
938
- If client_chunk_size is set, makes sure we yield things starting on
939
- chunk boundaries based on the Content-Range header in the response.
940
-
941
1321
  Sets our Range header's first byterange to the value learned from
942
1322
  the Content-Range header in the response; if we were given a
943
1323
  fully-specified range (e.g. "bytes=123-456"), this is a no-op.
@@ -950,9 +1330,6 @@ class ResumingGetter(object):
950
1330
  if length == 0:
951
1331
  return
952
1332
 
953
- if self.client_chunk_size:
954
- self.skip_bytes = bytes_to_skip(self.client_chunk_size, start)
955
-
956
1333
  if 'Range' in self.backend_headers:
957
1334
  try:
958
1335
  req_range = Range(self.backend_headers['Range'])
@@ -967,221 +1344,134 @@ class ResumingGetter(object):
967
1344
  e if e is not None else '')
968
1345
  for s, e in new_ranges)))
969
1346
 
970
- def is_good_source(self, src):
971
- """
972
- Indicates whether or not the request made to the backend found
973
- what it was looking for.
974
1347
 
975
- :param src: the response from the backend
976
- :returns: True if found, False if not
977
- """
978
- if self.server_type == 'Object' and src.status == 416:
979
- return True
980
- return is_success(src.status) or is_redirection(src.status)
1348
+ class GetOrHeadHandler(GetterBase):
1349
+ """
1350
+ Handles GET requests to backend servers.
1351
+
1352
+ :param app: a proxy app.
1353
+ :param req: an instance of ``swob.Request``.
1354
+ :param server_type: server type used in logging
1355
+ :param node_iter: an iterator yielding nodes.
1356
+ :param partition: partition.
1357
+ :param path: path for the request.
1358
+ :param backend_headers: a dict of headers to be sent with backend requests.
1359
+ :param concurrency: number of requests to run concurrently.
1360
+ :param policy: the policy instance, or None if Account or Container.
1361
+ :param logger: a logger instance.
1362
+ """
1363
+ def __init__(self, app, req, server_type, node_iter, partition, path,
1364
+ backend_headers, concurrency=1, policy=None, logger=None):
1365
+ newest = config_true_value(req.headers.get('x-newest', 'f'))
1366
+ if server_type == 'Object' and not newest:
1367
+ node_timeout = app.recoverable_node_timeout
1368
+ else:
1369
+ node_timeout = app.node_timeout
1370
+ super(GetOrHeadHandler, self).__init__(
1371
+ app=app, req=req, node_iter=node_iter, partition=partition,
1372
+ policy=policy, path=path, backend_headers=backend_headers,
1373
+ node_timeout=node_timeout, resource_type=server_type.lower(),
1374
+ logger=logger)
1375
+ self.newest = newest
1376
+ self.server_type = server_type
1377
+ self.used_nodes = []
1378
+ self.used_source_etag = None
1379
+ self.concurrency = concurrency
1380
+ self.latest_404_timestamp = Timestamp(0)
1381
+ policy_options = self.app.get_policy_options(self.policy)
1382
+ self.rebalance_missing_suppression_count = min(
1383
+ policy_options.rebalance_missing_suppression_count,
1384
+ node_iter.num_primary_nodes - 1)
981
1385
 
982
- def response_parts_iter(self, req):
983
- source, node = self._get_source_and_node()
984
- it = None
985
- if source:
986
- it = self._get_response_parts_iter(req, node, source)
987
- return it
1386
+ # populated when finding source
1387
+ self.statuses = []
1388
+ self.reasons = []
1389
+ self.bodies = []
1390
+ self.source_headers = []
1391
+ self.sources = []
988
1392
 
989
- def _get_response_parts_iter(self, req, node, source):
990
- # Someday we can replace this [mess] with python 3's "nonlocal"
991
- source = [source]
992
- node = [node]
1393
+ # populated from response headers
1394
+ self.start_byte = self.end_byte = self.length = None
993
1395
 
994
- try:
995
- client_chunk_size = self.client_chunk_size
996
- node_timeout = self.app.node_timeout
997
- if self.server_type == 'Object':
998
- node_timeout = self.app.recoverable_node_timeout
999
-
1000
- # This is safe; it sets up a generator but does not call next()
1001
- # on it, so no IO is performed.
1002
- parts_iter = [
1003
- http_response_to_document_iters(
1004
- source[0], read_chunk_size=self.app.object_chunk_size)]
1005
-
1006
- def get_next_doc_part():
1007
- while True:
1008
- try:
1009
- # This call to next() performs IO when we have a
1010
- # multipart/byteranges response; it reads the MIME
1011
- # boundary and part headers.
1012
- #
1013
- # If we don't have a multipart/byteranges response,
1014
- # but just a 200 or a single-range 206, then this
1015
- # performs no IO, and either just returns source or
1016
- # raises StopIteration.
1017
- with ChunkReadTimeout(node_timeout):
1018
- # if StopIteration is raised, it escapes and is
1019
- # handled elsewhere
1020
- start_byte, end_byte, length, headers, part = next(
1021
- parts_iter[0])
1022
- return (start_byte, end_byte, length, headers, part)
1023
- except ChunkReadTimeout:
1024
- new_source, new_node = self._get_source_and_node()
1025
- if new_source:
1026
- self.app.error_occurred(
1027
- node[0], _('Trying to read object during '
1028
- 'GET (retrying)'))
1029
- # Close-out the connection as best as possible.
1030
- if getattr(source[0], 'swift_conn', None):
1031
- close_swift_conn(source[0])
1032
- source[0] = new_source
1033
- node[0] = new_node
1034
- # This is safe; it sets up a generator but does
1035
- # not call next() on it, so no IO is performed.
1036
- parts_iter[0] = http_response_to_document_iters(
1037
- new_source,
1038
- read_chunk_size=self.app.object_chunk_size)
1039
- else:
1040
- raise StopIteration()
1041
-
1042
- def iter_bytes_from_response_part(part_file, nbytes):
1043
- nchunks = 0
1044
- buf = b''
1045
- part_file = ByteCountEnforcer(part_file, nbytes)
1046
- while True:
1396
+ def _iter_bytes_from_response_part(self, part_file, nbytes):
1397
+ # yield chunks of bytes from a single response part; if an error
1398
+ # occurs, try to resume yielding bytes from a different source
1399
+ part_file = ByteCountEnforcer(part_file, nbytes)
1400
+ while True:
1401
+ try:
1402
+ with WatchdogTimeout(self.app.watchdog, self.node_timeout,
1403
+ ChunkReadTimeout):
1404
+ chunk = part_file.read(self.app.object_chunk_size)
1405
+ if nbytes is not None:
1406
+ nbytes -= len(chunk)
1407
+ except (ChunkReadTimeout, ShortReadError) as e:
1408
+ if self.newest or self.server_type != 'Object':
1409
+ raise
1410
+ try:
1411
+ self.fast_forward(self.bytes_used_from_backend)
1412
+ except (HTTPException, ValueError):
1413
+ raise e
1414
+ except RangeAlreadyComplete:
1415
+ break
1416
+ if self._replace_source(
1417
+ 'Trying to read object during GET (retrying)'):
1047
1418
  try:
1048
- with ChunkReadTimeout(node_timeout):
1049
- chunk = part_file.read(self.app.object_chunk_size)
1050
- nchunks += 1
1051
- # NB: this append must be *inside* the context
1052
- # manager for test.unit.SlowBody to do its thing
1053
- buf += chunk
1054
- if nbytes is not None:
1055
- nbytes -= len(chunk)
1056
- except (ChunkReadTimeout, ShortReadError):
1057
- exc_type, exc_value, exc_traceback = exc_info()
1058
- if self.newest or self.server_type != 'Object':
1059
- raise
1060
- try:
1061
- self.fast_forward(self.bytes_used_from_backend)
1062
- except (HTTPException, ValueError):
1063
- six.reraise(exc_type, exc_value, exc_traceback)
1064
- except RangeAlreadyComplete:
1065
- break
1066
- buf = b''
1067
- new_source, new_node = self._get_source_and_node()
1068
- if new_source:
1069
- self.app.error_occurred(
1070
- node[0], _('Trying to read object during '
1071
- 'GET (retrying)'))
1072
- # Close-out the connection as best as possible.
1073
- if getattr(source[0], 'swift_conn', None):
1074
- close_swift_conn(source[0])
1075
- source[0] = new_source
1076
- node[0] = new_node
1077
- # This is safe; it just sets up a generator but
1078
- # does not call next() on it, so no IO is
1079
- # performed.
1080
- parts_iter[0] = http_response_to_document_iters(
1081
- new_source,
1082
- read_chunk_size=self.app.object_chunk_size)
1083
-
1084
- try:
1085
- _junk, _junk, _junk, _junk, part_file = \
1086
- get_next_doc_part()
1087
- except StopIteration:
1088
- # Tried to find a new node from which to
1089
- # finish the GET, but failed. There's
1090
- # nothing more we can do here.
1091
- six.reraise(exc_type, exc_value, exc_traceback)
1092
- part_file = ByteCountEnforcer(part_file, nbytes)
1093
- else:
1094
- six.reraise(exc_type, exc_value, exc_traceback)
1095
- else:
1096
- if buf and self.skip_bytes:
1097
- if self.skip_bytes < len(buf):
1098
- buf = buf[self.skip_bytes:]
1099
- self.bytes_used_from_backend += self.skip_bytes
1100
- self.skip_bytes = 0
1101
- else:
1102
- self.skip_bytes -= len(buf)
1103
- self.bytes_used_from_backend += len(buf)
1104
- buf = b''
1105
-
1106
- if not chunk:
1107
- if buf:
1108
- with ChunkWriteTimeout(
1109
- self.app.client_timeout):
1110
- self.bytes_used_from_backend += len(buf)
1111
- yield buf
1112
- buf = b''
1113
- break
1114
-
1115
- if client_chunk_size is not None:
1116
- while len(buf) >= client_chunk_size:
1117
- client_chunk = buf[:client_chunk_size]
1118
- buf = buf[client_chunk_size:]
1119
- with ChunkWriteTimeout(
1120
- self.app.client_timeout):
1121
- self.bytes_used_from_backend += \
1122
- len(client_chunk)
1123
- yield client_chunk
1124
- else:
1125
- with ChunkWriteTimeout(self.app.client_timeout):
1126
- self.bytes_used_from_backend += len(buf)
1127
- yield buf
1128
- buf = b''
1129
-
1130
- # This is for fairness; if the network is outpacing
1131
- # the CPU, we'll always be able to read and write
1132
- # data without encountering an EWOULDBLOCK, and so
1133
- # eventlet will not switch greenthreads on its own.
1134
- # We do it manually so that clients don't starve.
1135
- #
1136
- # The number 5 here was chosen by making stuff up.
1137
- # It's not every single chunk, but it's not too big
1138
- # either, so it seemed like it would probably be an
1139
- # okay choice.
1140
- #
1141
- # Note that we may trampoline to other greenthreads
1142
- # more often than once every 5 chunks, depending on
1143
- # how blocking our network IO is; the explicit sleep
1144
- # here simply provides a lower bound on the rate of
1145
- # trampolining.
1146
- if nchunks % 5 == 0:
1147
- sleep()
1148
-
1419
+ _junk, _junk, _junk, _junk, part_file = \
1420
+ self._get_next_response_part()
1421
+ except StopIteration:
1422
+ # Tried to find a new node from which to
1423
+ # finish the GET, but failed. There's
1424
+ # nothing more we can do here.
1425
+ raise e
1426
+ part_file = ByteCountEnforcer(part_file, nbytes)
1427
+ else:
1428
+ raise e
1429
+ else:
1430
+ if not chunk:
1431
+ break
1432
+
1433
+ with WatchdogTimeout(self.app.watchdog,
1434
+ self.app.client_timeout,
1435
+ ChunkWriteTimeout):
1436
+ self.bytes_used_from_backend += len(chunk)
1437
+ yield chunk
1438
+
1439
+ def _iter_parts_from_response(self):
1440
+ # iterate over potentially multiple response body parts; for each
1441
+ # part, yield an iterator over the part's bytes
1442
+ try:
1149
1443
  part_iter = None
1150
1444
  try:
1151
1445
  while True:
1152
1446
  start_byte, end_byte, length, headers, part = \
1153
- get_next_doc_part()
1154
- # note: learn_size_from_content_range() sets
1155
- # self.skip_bytes
1447
+ self._get_next_response_part()
1156
1448
  self.learn_size_from_content_range(
1157
1449
  start_byte, end_byte, length)
1158
1450
  self.bytes_used_from_backend = 0
1159
1451
  # not length; that refers to the whole object, so is the
1160
1452
  # wrong value to use for GET-range responses
1161
- byte_count = ((end_byte - start_byte + 1) - self.skip_bytes
1453
+ byte_count = ((end_byte - start_byte + 1)
1162
1454
  if (end_byte is not None
1163
1455
  and start_byte is not None)
1164
1456
  else None)
1165
- part_iter = iter_bytes_from_response_part(part, byte_count)
1457
+ part_iter = CooperativeIterator(
1458
+ self._iter_bytes_from_response_part(part, byte_count))
1166
1459
  yield {'start_byte': start_byte, 'end_byte': end_byte,
1167
1460
  'entity_length': length, 'headers': headers,
1168
1461
  'part_iter': part_iter}
1169
1462
  self.pop_range()
1170
1463
  except StopIteration:
1171
- req.environ['swift.non_client_disconnect'] = True
1464
+ self.req.environ['swift.non_client_disconnect'] = True
1172
1465
  finally:
1173
1466
  if part_iter:
1174
1467
  part_iter.close()
1175
1468
 
1176
- except ChunkReadTimeout:
1177
- self.app.exception_occurred(node[0], _('Object'),
1178
- _('Trying to read during GET'))
1179
- raise
1180
1469
  except ChunkWriteTimeout:
1181
- self.app.logger.warning(
1182
- _('Client did not read from proxy within %ss') %
1470
+ self.logger.info(
1471
+ 'Client did not read from proxy within %ss',
1183
1472
  self.app.client_timeout)
1184
- self.app.logger.increment('client_timeouts')
1473
+ self.logger.increment('%s.client_timeouts' %
1474
+ self.server_type.lower())
1185
1475
  except GeneratorExit:
1186
1476
  warn = True
1187
1477
  req_range = self.backend_headers['Range']
@@ -1192,16 +1482,16 @@ class ResumingGetter(object):
1192
1482
  if end is not None and begin is not None:
1193
1483
  if end - begin + 1 == self.bytes_used_from_backend:
1194
1484
  warn = False
1195
- if not req.environ.get('swift.non_client_disconnect') and warn:
1196
- self.app.logger.warning(_('Client disconnected on read'))
1485
+ if (warn and
1486
+ not self.req.environ.get('swift.non_client_disconnect')):
1487
+ self.logger.info('Client disconnected on read of %r',
1488
+ self.path)
1197
1489
  raise
1198
1490
  except Exception:
1199
- self.app.logger.exception(_('Trying to send to client'))
1491
+ self.logger.exception('Trying to send to client')
1200
1492
  raise
1201
1493
  finally:
1202
- # Close-out the connection as best as possible.
1203
- if getattr(source[0], 'swift_conn', None):
1204
- close_swift_conn(source[0])
1494
+ self.source.close()
1205
1495
 
1206
1496
  @property
1207
1497
  def last_status(self):
@@ -1217,39 +1507,42 @@ class ResumingGetter(object):
1217
1507
  else:
1218
1508
  return None
1219
1509
 
1220
- def _make_node_request(self, node, node_timeout, logger_thread_locals):
1221
- self.app.logger.thread_locals = logger_thread_locals
1510
+ def _make_node_request(self, node, logger_thread_locals):
1511
+ # make a backend request; return True if the response is deemed good
1512
+ # (has an acceptable status code), useful (matches any previously
1513
+ # discovered etag) and sufficient (a single good response is
1514
+ # insufficient when we're searching for the newest timestamp)
1515
+ self.logger.thread_locals = logger_thread_locals
1222
1516
  if node in self.used_nodes:
1223
1517
  return False
1518
+
1224
1519
  req_headers = dict(self.backend_headers)
1225
- # a request may be specialised with specific backend headers
1226
- if self.header_provider:
1227
- req_headers.update(self.header_provider())
1520
+ ip, port = get_ip_port(node, req_headers)
1228
1521
  start_node_timing = time.time()
1229
1522
  try:
1230
1523
  with ConnectionTimeout(self.app.conn_timeout):
1231
1524
  conn = http_connect(
1232
- node['ip'], node['port'], node['device'],
1233
- self.partition, self.req_method, self.path,
1525
+ ip, port, node['device'],
1526
+ self.partition, self.req.method, self.path,
1234
1527
  headers=req_headers,
1235
- query_string=self.req_query_string)
1528
+ query_string=self.req.query_string)
1236
1529
  self.app.set_node_timing(node, time.time() - start_node_timing)
1237
1530
 
1238
- with Timeout(node_timeout):
1531
+ with Timeout(self.node_timeout):
1239
1532
  possible_source = conn.getresponse()
1240
1533
  # See NOTE: swift_conn at top of file about this.
1241
1534
  possible_source.swift_conn = conn
1242
1535
  except (Exception, Timeout):
1243
1536
  self.app.exception_occurred(
1244
1537
  node, self.server_type,
1245
- _('Trying to %(method)s %(path)s') %
1246
- {'method': self.req_method, 'path': self.req_path})
1538
+ 'Trying to %(method)s %(path)s' %
1539
+ {'method': self.req.method, 'path': self.req.path})
1247
1540
  return False
1248
1541
 
1249
1542
  src_headers = dict(
1250
1543
  (k.lower(), v) for k, v in
1251
1544
  possible_source.getheaders())
1252
- if self.is_good_source(possible_source):
1545
+ if is_good_source(possible_source.status, self.server_type):
1253
1546
  # 404 if we know we don't have a synced copy
1254
1547
  if not float(possible_source.getheader('X-PUT-Timestamp', 1)):
1255
1548
  self.statuses.append(HTTP_NOT_FOUND)
@@ -1259,9 +1552,8 @@ class ResumingGetter(object):
1259
1552
  close_swift_conn(possible_source)
1260
1553
  else:
1261
1554
  if self.used_source_etag and \
1262
- self.used_source_etag != src_headers.get(
1263
- 'x-object-sysmeta-ec-etag',
1264
- src_headers.get('etag', '')).strip('"'):
1555
+ self.used_source_etag != normalize_etag(
1556
+ src_headers.get('etag', '')):
1265
1557
  self.statuses.append(HTTP_NOT_FOUND)
1266
1558
  self.reasons.append('')
1267
1559
  self.bodies.append('')
@@ -1280,16 +1572,25 @@ class ResumingGetter(object):
1280
1572
  self.reasons.append(possible_source.reason)
1281
1573
  self.bodies.append(None)
1282
1574
  self.source_headers.append(possible_source.getheaders())
1283
- self.sources.append((possible_source, node))
1575
+ self.sources.append(
1576
+ GetterSource(self.app, possible_source, node))
1284
1577
  if not self.newest: # one good source is enough
1285
1578
  return True
1286
1579
  else:
1287
1580
  if 'handoff_index' in node and \
1581
+ (is_server_error(possible_source.status) or
1582
+ possible_source.status == HTTP_NOT_FOUND) and \
1583
+ not Timestamp(src_headers.get('x-backend-timestamp', 0)):
1584
+ # throw out 5XX and 404s from handoff nodes unless the data is
1585
+ # really on disk and had been DELETEd
1586
+ return False
1587
+
1588
+ if self.rebalance_missing_suppression_count > 0 and \
1288
1589
  possible_source.status == HTTP_NOT_FOUND and \
1289
1590
  not Timestamp(src_headers.get('x-backend-timestamp', 0)):
1290
- # throw out 404s from handoff nodes unless the data is really
1291
- # on disk and had been DELETEd
1591
+ self.rebalance_missing_suppression_count -= 1
1292
1592
  return False
1593
+
1293
1594
  self.statuses.append(possible_source.status)
1294
1595
  self.reasons.append(possible_source.reason)
1295
1596
  self.bodies.append(possible_source.read())
@@ -1305,18 +1606,12 @@ class ResumingGetter(object):
1305
1606
  ts = Timestamp(hdrs.get('X-Backend-Timestamp', 0))
1306
1607
  if ts > self.latest_404_timestamp:
1307
1608
  self.latest_404_timestamp = ts
1308
- if possible_source.status == HTTP_INSUFFICIENT_STORAGE:
1309
- self.app.error_limit(node, _('ERROR Insufficient Storage'))
1310
- elif is_server_error(possible_source.status):
1311
- self.app.error_occurred(
1312
- node, _('ERROR %(status)d %(body)s '
1313
- 'From %(type)s Server') %
1314
- {'status': possible_source.status,
1315
- 'body': self.bodies[-1][:1024],
1316
- 'type': self.server_type})
1609
+ self.app.check_response(node, self.server_type, possible_source,
1610
+ self.req.method, self.path,
1611
+ self.bodies[-1])
1317
1612
  return False
1318
1613
 
1319
- def _get_source_and_node(self):
1614
+ def _find_source(self):
1320
1615
  self.statuses = []
1321
1616
  self.reasons = []
1322
1617
  self.bodies = []
@@ -1325,16 +1620,13 @@ class ResumingGetter(object):
1325
1620
 
1326
1621
  nodes = GreenthreadSafeIterator(self.node_iter)
1327
1622
 
1328
- node_timeout = self.app.node_timeout
1329
- if self.server_type == 'Object' and not self.newest:
1330
- node_timeout = self.app.recoverable_node_timeout
1331
-
1332
1623
  pile = GreenAsyncPile(self.concurrency)
1333
1624
 
1334
1625
  for node in nodes:
1335
- pile.spawn(self._make_node_request, node, node_timeout,
1336
- self.app.logger.thread_locals)
1337
- _timeout = self.app.concurrency_timeout \
1626
+ pile.spawn(self._make_node_request, node,
1627
+ self.logger.thread_locals)
1628
+ _timeout = self.app.get_policy_options(
1629
+ self.policy).concurrency_timeout \
1338
1630
  if pile.inflight < self.concurrency else None
1339
1631
  if pile.waitfirst(_timeout):
1340
1632
  break
@@ -1346,46 +1638,37 @@ class ResumingGetter(object):
1346
1638
  # and added to the list in the case of x-newest.
1347
1639
  if self.sources:
1348
1640
  self.sources = [s for s in self.sources
1349
- if source_key(s[0]) >= self.latest_404_timestamp]
1641
+ if s.timestamp >= self.latest_404_timestamp]
1350
1642
 
1351
1643
  if self.sources:
1352
- self.sources.sort(key=lambda s: source_key(s[0]))
1353
- source, node = self.sources.pop()
1354
- for src, _junk in self.sources:
1355
- close_swift_conn(src)
1356
- self.used_nodes.append(node)
1357
- src_headers = dict(
1358
- (k.lower(), v) for k, v in
1359
- source.getheaders())
1644
+ self.sources.sort(key=operator.attrgetter('timestamp'))
1645
+ source = self.sources.pop()
1646
+ for unused_source in self.sources:
1647
+ unused_source.close()
1648
+ self.used_nodes.append(source.node)
1360
1649
 
1361
1650
  # Save off the source etag so that, if we lose the connection
1362
1651
  # and have to resume from a different node, we can be sure that
1363
- # we have the same object (replication) or a fragment archive
1364
- # from the same object (EC). Otherwise, if the cluster has two
1365
- # versions of the same object, we might end up switching between
1366
- # old and new mid-stream and giving garbage to the client.
1367
- self.used_source_etag = src_headers.get(
1368
- 'x-object-sysmeta-ec-etag',
1369
- src_headers.get('etag', '')).strip('"')
1370
- self.node = node
1371
- return source, node
1372
- return None, None
1373
-
1374
-
1375
- class GetOrHeadHandler(ResumingGetter):
1376
- def _make_app_iter(self, req, node, source):
1652
+ # we have the same object (replication). Otherwise, if the cluster
1653
+ # has two versions of the same object, we might end up switching
1654
+ # between old and new mid-stream and giving garbage to the client.
1655
+ if self.used_source_etag is None:
1656
+ self.used_source_etag = normalize_etag(
1657
+ source.resp.getheader('etag', ''))
1658
+ self.source = source
1659
+ return True
1660
+ return False
1661
+
1662
+ def _make_app_iter(self):
1377
1663
  """
1378
1664
  Returns an iterator over the contents of the source (via its read
1379
1665
  func). There is also quite a bit of cleanup to ensure garbage
1380
1666
  collection works and the underlying socket of the source is closed.
1381
1667
 
1382
- :param req: incoming request object
1383
- :param source: The httplib.Response object this iterator should read
1384
- from.
1385
- :param node: The node the source is reading from, for logging purposes.
1668
+ :return: an iterator that yields chunks of response body bytes
1386
1669
  """
1387
1670
 
1388
- ct = source.getheader('Content-Type')
1671
+ ct = self.source.resp.getheader('Content-Type')
1389
1672
  if ct:
1390
1673
  content_type, content_type_attrs = parse_content_type(ct)
1391
1674
  is_multipart = content_type == 'multipart/byteranges'
@@ -1398,7 +1681,7 @@ class GetOrHeadHandler(ResumingGetter):
1398
1681
  # furnished one for us, so we'll just re-use it
1399
1682
  boundary = dict(content_type_attrs)["boundary"]
1400
1683
 
1401
- parts_iter = self._get_response_parts_iter(req, node, source)
1684
+ parts_iter = self._iter_parts_from_response()
1402
1685
 
1403
1686
  def add_content_type(response_part):
1404
1687
  response_part["content_type"] = \
@@ -1406,29 +1689,29 @@ class GetOrHeadHandler(ResumingGetter):
1406
1689
  return response_part
1407
1690
 
1408
1691
  return document_iters_to_http_response_body(
1409
- (add_content_type(pi) for pi in parts_iter),
1410
- boundary, is_multipart, self.app.logger)
1692
+ ClosingMapper(add_content_type, parts_iter),
1693
+ boundary, is_multipart, self.logger)
1411
1694
 
1412
- def get_working_response(self, req):
1413
- source, node = self._get_source_and_node()
1695
+ def get_working_response(self):
1414
1696
  res = None
1415
- if source:
1416
- res = Response(request=req)
1417
- res.status = source.status
1418
- update_headers(res, source.getheaders())
1419
- if req.method == 'GET' and \
1420
- source.status in (HTTP_OK, HTTP_PARTIAL_CONTENT):
1421
- res.app_iter = self._make_app_iter(req, node, source)
1697
+ if self._replace_source():
1698
+ res = Response(request=self.req)
1699
+ res.status = self.source.resp.status
1700
+ update_headers(res, self.source.resp.getheaders())
1701
+ if self.req.method == 'GET' and \
1702
+ self.source.resp.status in (HTTP_OK, HTTP_PARTIAL_CONTENT):
1703
+ res.app_iter = self._make_app_iter()
1422
1704
  # See NOTE: swift_conn at top of file about this.
1423
- res.swift_conn = source.swift_conn
1705
+ res.swift_conn = self.source.resp.swift_conn
1424
1706
  if not res.environ:
1425
1707
  res.environ = {}
1426
- res.environ['swift_x_timestamp'] = source.getheader('x-timestamp')
1708
+ res.environ['swift_x_timestamp'] = self.source.resp.getheader(
1709
+ 'x-timestamp')
1427
1710
  res.accept_ranges = 'bytes'
1428
- res.content_length = source.getheader('Content-Length')
1429
- if source.getheader('Content-Type'):
1711
+ res.content_length = self.source.resp.getheader('Content-Length')
1712
+ if self.source.resp.getheader('Content-Type'):
1430
1713
  res.charset = None
1431
- res.content_type = source.getheader('Content-Type')
1714
+ res.content_type = self.source.resp.getheader('Content-Type')
1432
1715
  return res
1433
1716
 
1434
1717
 
@@ -1446,36 +1729,48 @@ class NodeIter(object):
1446
1729
  may not, depending on how logging is configured, the vagaries of
1447
1730
  socket IO and eventlet, and the phase of the moon.)
1448
1731
 
1732
+ :param server_type: one of 'account', 'container', or 'object'
1449
1733
  :param app: a proxy app
1450
1734
  :param ring: ring to get yield nodes from
1451
1735
  :param partition: ring partition to yield nodes for
1736
+ :param logger: a logger instance
1737
+ :param request: yielded nodes will be annotated with `use_replication`
1738
+ based on the `request` headers.
1452
1739
  :param node_iter: optional iterable of nodes to try. Useful if you
1453
1740
  want to filter or reorder the nodes.
1454
1741
  :param policy: an instance of :class:`BaseStoragePolicy`. This should be
1455
1742
  None for an account or container ring.
1456
1743
  """
1457
1744
 
1458
- def __init__(self, app, ring, partition, node_iter=None, policy=None):
1745
+ def __init__(self, server_type, app, ring, partition, logger, request,
1746
+ node_iter=None, policy=None):
1747
+ self.server_type = server_type
1459
1748
  self.app = app
1460
1749
  self.ring = ring
1461
1750
  self.partition = partition
1751
+ self.logger = logger
1752
+ self.request = request
1462
1753
 
1463
1754
  part_nodes = ring.get_part_nodes(partition)
1464
1755
  if node_iter is None:
1465
1756
  node_iter = itertools.chain(
1466
1757
  part_nodes, ring.get_more_nodes(partition))
1467
- num_primary_nodes = len(part_nodes)
1468
- self.nodes_left = self.app.request_node_count(num_primary_nodes)
1469
- self.expected_handoffs = self.nodes_left - num_primary_nodes
1758
+ self.num_primary_nodes = len(part_nodes)
1759
+ self.nodes_left = self.app.request_node_count(self.num_primary_nodes)
1760
+ self.expected_handoffs = self.nodes_left - self.num_primary_nodes
1470
1761
 
1471
1762
  # Use of list() here forcibly yanks the first N nodes (the primary
1472
1763
  # nodes) from node_iter, so the rest of its values are handoffs.
1473
1764
  self.primary_nodes = self.app.sort_nodes(
1474
- list(itertools.islice(node_iter, num_primary_nodes)),
1765
+ list(itertools.islice(node_iter, self.num_primary_nodes)),
1475
1766
  policy=policy)
1476
1767
  self.handoff_iter = node_iter
1477
1768
  self._node_provider = None
1478
1769
 
1770
+ @property
1771
+ def primaries_left(self):
1772
+ return len(self.primary_nodes)
1773
+
1479
1774
  def __iter__(self):
1480
1775
  self._node_iter = self._node_gen()
1481
1776
  return self
@@ -1496,12 +1791,14 @@ class NodeIter(object):
1496
1791
  return
1497
1792
  extra_handoffs = handoffs - self.expected_handoffs
1498
1793
  if extra_handoffs > 0:
1499
- self.app.logger.increment('handoff_count')
1500
- self.app.logger.warning(
1794
+ self.logger.increment('%s.handoff_count' %
1795
+ self.server_type.lower())
1796
+ self.logger.warning(
1501
1797
  'Handoff requested (%d)' % handoffs)
1502
- if (extra_handoffs == len(self.primary_nodes)):
1798
+ if (extra_handoffs == self.num_primary_nodes):
1503
1799
  # all the primaries were skipped, and handoffs didn't help
1504
- self.app.logger.increment('handoff_all_count')
1800
+ self.logger.increment('%s.handoff_all_count' %
1801
+ self.server_type.lower())
1505
1802
 
1506
1803
  def set_node_provider(self, callback):
1507
1804
  """
@@ -1515,7 +1812,8 @@ class NodeIter(object):
1515
1812
  self._node_provider = callback
1516
1813
 
1517
1814
  def _node_gen(self):
1518
- for node in self.primary_nodes:
1815
+ while self.primary_nodes:
1816
+ node = self.primary_nodes.pop(0)
1519
1817
  if not self.app.error_limited(node):
1520
1818
  yield node
1521
1819
  if not self.app.error_limited(node):
@@ -1533,16 +1831,27 @@ class NodeIter(object):
1533
1831
  if self.nodes_left <= 0:
1534
1832
  return
1535
1833
 
1536
- def next(self):
1834
+ def _annotate_node(self, node):
1835
+ """
1836
+ Helper function to set use_replication dict value for a node by looking
1837
+ up the header value for x-backend-use-replication-network.
1838
+
1839
+ :param node: node dictionary from the ring or node_iter.
1840
+ :returns: node dictionary with replication network enabled/disabled
1841
+ """
1842
+ # nodes may have come from a ring or a node_iter passed to the
1843
+ # constructor: be careful not to mutate them!
1844
+ return dict(node, use_replication=is_use_replication_network(
1845
+ self.request.headers))
1846
+
1847
+ def __next__(self):
1848
+ node = None
1537
1849
  if self._node_provider:
1538
1850
  # give node provider the opportunity to inject a node
1539
1851
  node = self._node_provider()
1540
- if node:
1541
- return node
1542
- return next(self._node_iter)
1543
-
1544
- def __next__(self):
1545
- return self.next()
1852
+ if not node:
1853
+ node = next(self._node_iter)
1854
+ return self._annotate_node(node)
1546
1855
 
1547
1856
 
1548
1857
  class Controller(object):
@@ -1564,6 +1873,10 @@ class Controller(object):
1564
1873
  self._allowed_methods = None
1565
1874
  self._private_methods = None
1566
1875
 
1876
+ @property
1877
+ def logger(self):
1878
+ return self.app.logger
1879
+
1567
1880
  @property
1568
1881
  def allowed_methods(self):
1569
1882
  if self._allowed_methods is None:
@@ -1616,39 +1929,44 @@ class Controller(object):
1616
1929
  def generate_request_headers(self, orig_req=None, additional=None,
1617
1930
  transfer=False):
1618
1931
  """
1619
- Create a list of headers to be used in backend requests
1932
+ Create a dict of headers to be used in backend requests
1620
1933
 
1621
1934
  :param orig_req: the original request sent by the client to the proxy
1622
1935
  :param additional: additional headers to send to the backend
1623
1936
  :param transfer: If True, transfer headers from original client request
1624
1937
  :returns: a dictionary of headers
1625
1938
  """
1626
- # Use the additional headers first so they don't overwrite the headers
1627
- # we require.
1628
- headers = HeaderKeyDict(additional) if additional else HeaderKeyDict()
1629
- if transfer:
1630
- self.transfer_headers(orig_req.headers, headers)
1631
- headers.setdefault('x-timestamp', Timestamp.now().internal)
1939
+ headers = HeaderKeyDict()
1632
1940
  if orig_req:
1941
+ headers.update((k.lower(), v)
1942
+ for k, v in orig_req.headers.items()
1943
+ if k.lower().startswith('x-backend-'))
1633
1944
  referer = orig_req.as_referer()
1634
1945
  else:
1635
1946
  referer = ''
1947
+ # additional headers can override x-backend-* headers from orig_req
1948
+ if additional:
1949
+ headers.update(additional)
1950
+ if orig_req and transfer:
1951
+ # transfer headers from orig_req can override additional headers
1952
+ self.transfer_headers(orig_req.headers, headers)
1953
+ headers.setdefault('x-timestamp', Timestamp.now().internal)
1954
+ # orig_req and additional headers cannot override the following...
1636
1955
  headers['x-trans-id'] = self.trans_id
1637
1956
  headers['connection'] = 'close'
1638
- headers['user-agent'] = 'proxy-server %s' % os.getpid()
1957
+ headers['user-agent'] = self.app.backend_user_agent
1639
1958
  headers['referer'] = referer
1640
1959
  return headers
1641
1960
 
1642
- def account_info(self, account, req=None):
1961
+ def account_info(self, account, req):
1643
1962
  """
1644
1963
  Get account information, and also verify that the account exists.
1645
1964
 
1646
1965
  :param account: native str name of the account to get the info for
1647
- :param req: caller's HTTP request context object (optional)
1966
+ :param req: caller's HTTP request context object
1648
1967
  :returns: tuple of (account partition, account nodes, container_count)
1649
1968
  or (None, None, None) if it does not exist
1650
1969
  """
1651
- partition, nodes = self.app.account_ring.get_nodes(account)
1652
1970
  if req:
1653
1971
  env = getattr(req, 'environ', {})
1654
1972
  else:
@@ -1663,23 +1981,23 @@ class Controller(object):
1663
1981
  or not info.get('account_really_exists', True)):
1664
1982
  return None, None, None
1665
1983
  container_count = info['container_count']
1984
+ partition, nodes = self.app.account_ring.get_nodes(account)
1666
1985
  return partition, nodes, container_count
1667
1986
 
1668
- def container_info(self, account, container, req=None):
1987
+ def container_info(self, account, container, req):
1669
1988
  """
1670
1989
  Get container information and thusly verify container existence.
1671
1990
  This will also verify account existence.
1672
1991
 
1673
1992
  :param account: native-str account name for the container
1674
1993
  :param container: native-str container name to look up
1675
- :param req: caller's HTTP request context object (optional)
1994
+ :param req: caller's HTTP request context object
1676
1995
  :returns: dict containing at least container partition ('partition'),
1677
1996
  container nodes ('containers'), container read
1678
1997
  acl ('read_acl'), container write acl ('write_acl'),
1679
1998
  and container sync key ('sync_key').
1680
1999
  Values are set to None if the container does not exist.
1681
2000
  """
1682
- part, nodes = self.app.container_ring.get_nodes(account, container)
1683
2001
  if req:
1684
2002
  env = getattr(req, 'environ', {})
1685
2003
  else:
@@ -1689,11 +2007,11 @@ class Controller(object):
1689
2007
  path_env['PATH_INFO'] = "/v1/%s/%s" % (
1690
2008
  str_to_wsgi(account), str_to_wsgi(container))
1691
2009
  info = get_container_info(path_env, self.app)
1692
- if not info or not is_success(info.get('status')):
1693
- info = headers_to_container_info({}, 0)
2010
+ if not is_success(info.get('status')):
1694
2011
  info['partition'] = None
1695
2012
  info['nodes'] = None
1696
2013
  else:
2014
+ part, nodes = self.app.container_ring.get_nodes(account, container)
1697
2015
  info['partition'] = part
1698
2016
  info['nodes'] = nodes
1699
2017
  return info
@@ -1716,22 +2034,23 @@ class Controller(object):
1716
2034
  :param body: byte string to use as the request body.
1717
2035
  Try to keep it small.
1718
2036
  :param logger_thread_locals: The thread local values to be set on the
1719
- self.app.logger to retain transaction
2037
+ self.logger to retain transaction
1720
2038
  logging information.
1721
2039
  :returns: a swob.Response object, or None if no responses were received
1722
2040
  """
1723
- self.app.logger.thread_locals = logger_thread_locals
2041
+ self.logger.thread_locals = logger_thread_locals
1724
2042
  if body:
1725
2043
  if not isinstance(body, bytes):
1726
2044
  raise TypeError('body must be bytes, not %s' % type(body))
1727
2045
  headers['Content-Length'] = str(len(body))
1728
2046
  for node in nodes:
1729
2047
  try:
2048
+ ip, port = get_ip_port(node, headers)
1730
2049
  start_node_timing = time.time()
1731
2050
  with ConnectionTimeout(self.app.conn_timeout):
1732
- conn = http_connect(node['ip'], node['port'],
1733
- node['device'], part, method, path,
1734
- headers=headers, query_string=query)
2051
+ conn = http_connect(
2052
+ ip, port, node['device'], part, method, path,
2053
+ headers=headers, query_string=query)
1735
2054
  conn.node = node
1736
2055
  self.app.set_node_timing(node, time.time() - start_node_timing)
1737
2056
  if body:
@@ -1739,27 +2058,17 @@ class Controller(object):
1739
2058
  conn.send(body)
1740
2059
  with Timeout(self.app.node_timeout):
1741
2060
  resp = conn.getresponse()
1742
- if not is_informational(resp.status) and \
1743
- not is_server_error(resp.status):
1744
- return resp.status, resp.reason, resp.getheaders(), \
1745
- resp.read()
1746
- elif resp.status == HTTP_INSUFFICIENT_STORAGE:
1747
- self.app.error_limit(node,
1748
- _('ERROR Insufficient Storage'))
1749
- elif is_server_error(resp.status):
1750
- self.app.error_occurred(
1751
- node, _('ERROR %(status)d '
1752
- 'Trying to %(method)s %(path)s'
1753
- ' From %(type)s Server') % {
1754
- 'status': resp.status,
1755
- 'method': method,
1756
- 'path': path,
1757
- 'type': self.server_type})
2061
+ if (self.app.check_response(node, self.server_type, resp,
2062
+ method, path)
2063
+ and not is_informational(resp.status)):
2064
+ return resp, resp.read(), node
2065
+
1758
2066
  except (Exception, Timeout):
1759
2067
  self.app.exception_occurred(
1760
2068
  node, self.server_type,
1761
- _('Trying to %(method)s %(path)s') %
2069
+ 'Trying to %(method)s %(path)s' %
1762
2070
  {'method': method, 'path': path})
2071
+ return None, None, None
1763
2072
 
1764
2073
  def make_requests(self, req, ring, part, method, path, headers,
1765
2074
  query_string='', overrides=None, node_count=None,
@@ -1782,36 +2091,37 @@ class Controller(object):
1782
2091
  the returned status of a request.
1783
2092
  :param node_count: optional number of nodes to send request to.
1784
2093
  :param node_iterator: optional node iterator.
2094
+ :param body: byte string to use as the request body.
2095
+ Try to keep it small.
1785
2096
  :returns: a swob.Response object
1786
2097
  """
1787
- nodes = GreenthreadSafeIterator(
1788
- node_iterator or self.app.iter_nodes(ring, part)
1789
- )
2098
+ nodes = GreenthreadSafeIterator(node_iterator or NodeIter(
2099
+ self.server_type.lower(), self.app, ring, part, self.logger, req))
1790
2100
  node_number = node_count or len(ring.get_part_nodes(part))
1791
2101
  pile = GreenAsyncPile(node_number)
1792
2102
 
1793
2103
  for head in headers:
1794
2104
  pile.spawn(self._make_request, nodes, part, method, path,
1795
- head, query_string, body, self.app.logger.thread_locals)
1796
- response = []
2105
+ head, query_string, body, self.logger.thread_locals)
2106
+ results = []
1797
2107
  statuses = []
1798
- for resp in pile:
1799
- if not resp:
2108
+ for resp, body, node in pile:
2109
+ if not is_useful_response(resp, node):
1800
2110
  continue
1801
- response.append(resp)
1802
- statuses.append(resp[0])
2111
+ results.append((resp.status, resp.reason, resp.getheaders(), body))
2112
+ statuses.append(resp.status)
1803
2113
  if self.have_quorum(statuses, node_number):
1804
2114
  break
1805
2115
  # give any pending requests *some* chance to finish
1806
2116
  finished_quickly = pile.waitall(self.app.post_quorum_timeout)
1807
- for resp in finished_quickly:
1808
- if not resp:
2117
+ for resp, body, node in finished_quickly:
2118
+ if not is_useful_response(resp, node):
1809
2119
  continue
1810
- response.append(resp)
1811
- statuses.append(resp[0])
1812
- while len(response) < node_number:
1813
- response.append((HTTP_SERVICE_UNAVAILABLE, '', '', b''))
1814
- statuses, reasons, resp_headers, bodies = zip(*response)
2120
+ results.append((resp.status, resp.reason, resp.getheaders(), body))
2121
+ statuses.append(resp.status)
2122
+ while len(results) < node_number:
2123
+ results.append((HTTP_SERVICE_UNAVAILABLE, '', '', b''))
2124
+ statuses, reasons, resp_headers, bodies = zip(*results)
1815
2125
  return self.best_response(req, statuses, reasons, bodies,
1816
2126
  '%s %s' % (self.server_type, req.method),
1817
2127
  overrides=overrides, headers=resp_headers)
@@ -1885,8 +2195,8 @@ class Controller(object):
1885
2195
 
1886
2196
  if not resp:
1887
2197
  resp = HTTPServiceUnavailable(request=req)
1888
- self.app.logger.error(_('%(type)s returning 503 for %(statuses)s'),
1889
- {'type': server_type, 'statuses': statuses})
2198
+ self.logger.error('%(type)s returning 503 for %(statuses)s',
2199
+ {'type': server_type, 'statuses': statuses})
1890
2200
 
1891
2201
  return resp
1892
2202
 
@@ -1913,7 +2223,7 @@ class Controller(object):
1913
2223
  if headers:
1914
2224
  update_headers(resp, headers[status_index])
1915
2225
  if etag:
1916
- resp.headers['etag'] = etag.strip('"')
2226
+ resp.headers['etag'] = normalize_etag(etag)
1917
2227
  return resp
1918
2228
  return None
1919
2229
 
@@ -1955,20 +2265,19 @@ class Controller(object):
1955
2265
  headers.update((k, v)
1956
2266
  for k, v in req.headers.items()
1957
2267
  if is_sys_meta('account', k))
1958
- resp = self.make_requests(Request.blank('/v1' + path),
2268
+ resp = self.make_requests(Request.blank(str_to_wsgi('/v1' + path)),
1959
2269
  self.app.account_ring, partition, 'PUT',
1960
2270
  path, [headers] * len(nodes))
1961
2271
  if is_success(resp.status_int):
1962
- self.app.logger.info(_('autocreate account %r'), path)
1963
- clear_info_cache(self.app, req.environ, account)
2272
+ self.logger.info('autocreate account %r', path)
2273
+ clear_info_cache(req.environ, account)
1964
2274
  return True
1965
2275
  else:
1966
- self.app.logger.warning(_('Could not autocreate account %r'),
1967
- path)
2276
+ self.logger.warning('Could not autocreate account %r', path)
1968
2277
  return False
1969
2278
 
1970
2279
  def GETorHEAD_base(self, req, server_type, node_iter, partition, path,
1971
- concurrency=1, client_chunk_size=None):
2280
+ concurrency=1, policy=None):
1972
2281
  """
1973
2282
  Base handler for HTTP GET or HEAD requests.
1974
2283
 
@@ -1978,7 +2287,7 @@ class Controller(object):
1978
2287
  :param partition: partition
1979
2288
  :param path: path for the request
1980
2289
  :param concurrency: number of requests to run concurrently
1981
- :param client_chunk_size: chunk size for response body iterator
2290
+ :param policy: the policy instance, or None if Account or Container
1982
2291
  :returns: swob.Response object
1983
2292
  """
1984
2293
  backend_headers = self.generate_request_headers(
@@ -1986,9 +2295,9 @@ class Controller(object):
1986
2295
 
1987
2296
  handler = GetOrHeadHandler(self.app, req, self.server_type, node_iter,
1988
2297
  partition, path, backend_headers,
1989
- concurrency,
1990
- client_chunk_size=client_chunk_size)
1991
- res = handler.get_working_response(req)
2298
+ concurrency, policy=policy,
2299
+ logger=self.logger)
2300
+ res = handler.get_working_response()
1992
2301
 
1993
2302
  if not res:
1994
2303
  res = self.best_response(
@@ -2006,7 +2315,7 @@ class Controller(object):
2006
2315
  if policy:
2007
2316
  res.headers['X-Storage-Policy'] = policy.name
2008
2317
  else:
2009
- self.app.logger.error(
2318
+ self.logger.error(
2010
2319
  'Could not translate %s (%r) from %r to policy',
2011
2320
  'X-Backend-Storage-Policy-Index',
2012
2321
  res.headers['X-Backend-Storage-Policy-Index'], path)
@@ -2116,6 +2425,26 @@ class Controller(object):
2116
2425
  raise ValueError(
2117
2426
  "server_type can only be 'account' or 'container'")
2118
2427
 
2428
+ def _parse_listing_response(self, req, response):
2429
+ if not is_success(response.status_int):
2430
+ record_type = req.headers.get('X-Backend-Record-Type')
2431
+ self.logger.warning(
2432
+ 'Failed to get container %s listing from %s: %s',
2433
+ record_type, req.path_qs, response.status_int)
2434
+ return None
2435
+
2436
+ try:
2437
+ data = json.loads(response.body)
2438
+ if not isinstance(data, list):
2439
+ raise ValueError('not a list')
2440
+ return data
2441
+ except ValueError as err:
2442
+ record_type = response.headers.get('X-Backend-Record-Type')
2443
+ self.logger.error(
2444
+ 'Problem with container %s listing response from %s: %r',
2445
+ record_type, req.path_qs, err)
2446
+ return None
2447
+
2119
2448
  def _get_container_listing(self, req, account, container, headers=None,
2120
2449
  params=None):
2121
2450
  """
@@ -2123,8 +2452,10 @@ class Controller(object):
2123
2452
 
2124
2453
  :param req: original Request instance.
2125
2454
  :param account: account in which `container` is stored.
2126
- :param container: container from listing should be fetched.
2127
- :param headers: headers to be included with the request
2455
+ :param container: container from which listing should be fetched.
2456
+ :param headers: extra headers to be included with the listing
2457
+ sub-request; these update the headers copied from the original
2458
+ request.
2128
2459
  :param params: query string parameters to be used.
2129
2460
  :return: a tuple of (deserialized json data structure, swob Response)
2130
2461
  """
@@ -2138,120 +2469,33 @@ class Controller(object):
2138
2469
  if headers:
2139
2470
  subreq.headers.update(headers)
2140
2471
  subreq.params = params
2141
- self.app.logger.debug(
2472
+ self.logger.debug(
2142
2473
  'Get listing from %s %s' % (subreq.path_qs, headers))
2143
2474
  response = self.app.handle_request(subreq)
2475
+ data = self._parse_listing_response(subreq, response)
2476
+ return data, response
2144
2477
 
2145
- if not is_success(response.status_int):
2146
- self.app.logger.warning(
2147
- 'Failed to get container listing from %s: %s',
2148
- subreq.path_qs, response.status_int)
2149
- return None, response
2150
-
2151
- try:
2152
- data = json.loads(response.body)
2153
- if not isinstance(data, list):
2154
- raise ValueError('not a list')
2155
- return data, response
2156
- except ValueError as err:
2157
- self.app.logger.error(
2158
- 'Problem with listing response from %s: %r',
2159
- subreq.path_qs, err)
2160
- return None, response
2161
-
2162
- def _get_shard_ranges(self, req, account, container, includes=None,
2163
- states=None):
2164
- """
2165
- Fetch shard ranges from given `account/container`. If `includes` is
2166
- given then the shard range for that object name is requested, otherwise
2167
- all shard ranges are requested.
2168
-
2169
- :param req: original Request instance.
2170
- :param account: account from which shard ranges should be fetched.
2171
- :param container: container from which shard ranges should be fetched.
2172
- :param includes: (optional) restricts the list of fetched shard ranges
2173
- to those which include the given name.
2174
- :param states: (optional) the states of shard ranges to be fetched.
2175
- :return: a list of instances of :class:`swift.common.utils.ShardRange`,
2176
- or None if there was a problem fetching the shard ranges
2177
- """
2178
- params = req.params.copy()
2179
- params.pop('limit', None)
2180
- params['format'] = 'json'
2181
- if includes:
2182
- params['includes'] = includes
2183
- if states:
2184
- params['states'] = states
2185
- headers = {'X-Backend-Record-Type': 'shard'}
2186
- listing, response = self._get_container_listing(
2187
- req, account, container, headers=headers, params=params)
2478
+ def _parse_namespaces(self, req, listing, response):
2188
2479
  if listing is None:
2189
2480
  return None
2190
2481
 
2191
2482
  record_type = response.headers.get('x-backend-record-type')
2192
2483
  if record_type != 'shard':
2193
2484
  err = 'unexpected record type %r' % record_type
2194
- self.app.logger.error("Failed to get shard ranges from %s: %s",
2195
- req.path_qs, err)
2485
+ self.logger.error("Failed to get shard ranges from %s: %s",
2486
+ req.path_qs, err)
2196
2487
  return None
2197
2488
 
2198
2489
  try:
2199
- return [ShardRange.from_dict(shard_range)
2200
- for shard_range in listing]
2490
+ # Note: a legacy container-server could return a list of
2491
+ # ShardRanges, but that's ok: namespaces just need 'name', 'lower'
2492
+ # and 'upper' keys. If we ever need to know we can look for a
2493
+ # 'x-backend-record-shard-format' header from newer container
2494
+ # servers.
2495
+ return [Namespace(data['name'], data['lower'], data['upper'])
2496
+ for data in listing]
2201
2497
  except (ValueError, TypeError, KeyError) as err:
2202
- self.app.logger.error(
2203
- "Failed to get shard ranges from %s: invalid data: %r",
2498
+ self.logger.error(
2499
+ "Failed to get namespaces from %s: invalid data: %r",
2204
2500
  req.path_qs, err)
2205
2501
  return None
2206
-
2207
- def _get_update_shard(self, req, account, container, obj):
2208
- """
2209
- Find the appropriate shard range for an object update.
2210
-
2211
- Note that this fetches and caches (in both the per-request infocache
2212
- and memcache, if available) all shard ranges for the given root
2213
- container so we won't have to contact the container DB for every write.
2214
-
2215
- :param req: original Request instance.
2216
- :param account: account from which shard ranges should be fetched.
2217
- :param container: container from which shard ranges should be fetched.
2218
- :param obj: object getting updated.
2219
- :return: an instance of :class:`swift.common.utils.ShardRange`,
2220
- or None if the update should go back to the root
2221
- """
2222
- if not self.app.recheck_updating_shard_ranges:
2223
- # caching is disabled; fall back to old behavior
2224
- shard_ranges = self._get_shard_ranges(
2225
- req, account, container, states='updating', includes=obj)
2226
- if not shard_ranges:
2227
- return None
2228
- return shard_ranges[0]
2229
-
2230
- cache_key = get_cache_key(account, container, shard='updating')
2231
- infocache = req.environ.setdefault('swift.infocache', {})
2232
- memcache = getattr(self.app, 'memcache', None) or req.environ.get(
2233
- 'swift.cache')
2234
-
2235
- cached_ranges = infocache.get(cache_key)
2236
- if cached_ranges is None and memcache:
2237
- cached_ranges = memcache.get(cache_key)
2238
-
2239
- if cached_ranges:
2240
- shard_ranges = [
2241
- ShardRange.from_dict(shard_range)
2242
- for shard_range in cached_ranges]
2243
- else:
2244
- shard_ranges = self._get_shard_ranges(
2245
- req, account, container, states='updating')
2246
- if shard_ranges:
2247
- cached_ranges = [dict(sr) for sr in shard_ranges]
2248
- # went to disk; cache it
2249
- if memcache:
2250
- memcache.set(cache_key, cached_ranges,
2251
- time=self.app.recheck_updating_shard_ranges)
2252
-
2253
- if not shard_ranges:
2254
- return None
2255
-
2256
- infocache[cache_key] = tuple(cached_ranges)
2257
- return find_shard_range(obj, shard_ranges)