swift 2.32.1__py2.py3-none-any.whl → 2.33.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. swift/account/server.py +1 -11
  2. swift/cli/info.py +28 -1
  3. swift-2.32.1.data/scripts/swift-recon-cron → swift/cli/recon_cron.py +4 -13
  4. swift/cli/reload.py +141 -0
  5. swift/common/daemon.py +12 -2
  6. swift/common/db.py +12 -8
  7. swift/common/http_protocol.py +76 -3
  8. swift/common/manager.py +18 -5
  9. swift/common/memcached.py +18 -12
  10. swift/common/middleware/proxy_logging.py +35 -27
  11. swift/common/middleware/s3api/acl_handlers.py +1 -1
  12. swift/common/middleware/s3api/controllers/__init__.py +3 -0
  13. swift/common/middleware/s3api/controllers/acl.py +3 -2
  14. swift/common/middleware/s3api/controllers/logging.py +2 -2
  15. swift/common/middleware/s3api/controllers/multi_upload.py +30 -6
  16. swift/common/middleware/s3api/controllers/object_lock.py +44 -0
  17. swift/common/middleware/s3api/s3api.py +4 -0
  18. swift/common/middleware/s3api/s3request.py +19 -12
  19. swift/common/middleware/s3api/s3response.py +13 -2
  20. swift/common/middleware/s3api/utils.py +1 -1
  21. swift/common/middleware/slo.py +395 -298
  22. swift/common/middleware/staticweb.py +45 -14
  23. swift/common/middleware/tempurl.py +132 -91
  24. swift/common/request_helpers.py +32 -8
  25. swift/common/storage_policy.py +1 -1
  26. swift/common/swob.py +5 -2
  27. swift/common/utils/__init__.py +230 -135
  28. swift/common/utils/timestamp.py +23 -2
  29. swift/common/wsgi.py +8 -0
  30. swift/container/backend.py +126 -21
  31. swift/container/replicator.py +42 -6
  32. swift/container/server.py +264 -145
  33. swift/container/sharder.py +50 -30
  34. swift/container/updater.py +1 -0
  35. swift/obj/auditor.py +2 -1
  36. swift/obj/diskfile.py +55 -19
  37. swift/obj/expirer.py +1 -13
  38. swift/obj/mem_diskfile.py +2 -1
  39. swift/obj/mem_server.py +1 -0
  40. swift/obj/replicator.py +2 -2
  41. swift/obj/server.py +12 -23
  42. swift/obj/updater.py +1 -0
  43. swift/obj/watchers/dark_data.py +72 -34
  44. swift/proxy/controllers/account.py +3 -2
  45. swift/proxy/controllers/base.py +217 -127
  46. swift/proxy/controllers/container.py +274 -289
  47. swift/proxy/controllers/obj.py +98 -141
  48. swift/proxy/server.py +2 -12
  49. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-container-info +3 -0
  50. swift-2.33.1.data/scripts/swift-recon-cron +24 -0
  51. {swift-2.32.1.dist-info → swift-2.33.1.dist-info}/AUTHORS +3 -1
  52. {swift-2.32.1.dist-info → swift-2.33.1.dist-info}/METADATA +4 -3
  53. {swift-2.32.1.dist-info → swift-2.33.1.dist-info}/RECORD +94 -91
  54. {swift-2.32.1.dist-info → swift-2.33.1.dist-info}/WHEEL +1 -1
  55. {swift-2.32.1.dist-info → swift-2.33.1.dist-info}/entry_points.txt +1 -0
  56. swift-2.33.1.dist-info/pbr.json +1 -0
  57. swift-2.32.1.dist-info/pbr.json +0 -1
  58. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-account-audit +0 -0
  59. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-account-auditor +0 -0
  60. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-account-info +0 -0
  61. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-account-reaper +0 -0
  62. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-account-replicator +0 -0
  63. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-account-server +0 -0
  64. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-config +0 -0
  65. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-container-auditor +0 -0
  66. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-container-reconciler +0 -0
  67. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-container-replicator +0 -0
  68. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-container-server +0 -0
  69. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-container-sharder +0 -0
  70. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-container-sync +0 -0
  71. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-container-updater +0 -0
  72. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-dispersion-populate +0 -0
  73. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-dispersion-report +0 -0
  74. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-drive-audit +0 -0
  75. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-form-signature +0 -0
  76. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-get-nodes +0 -0
  77. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-init +0 -0
  78. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-object-auditor +0 -0
  79. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-object-expirer +0 -0
  80. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-object-info +0 -0
  81. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-object-reconstructor +0 -0
  82. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-object-relinker +0 -0
  83. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-object-replicator +0 -0
  84. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-object-server +0 -0
  85. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-object-updater +0 -0
  86. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-oldies +0 -0
  87. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-orphans +0 -0
  88. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-proxy-server +0 -0
  89. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-recon +0 -0
  90. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-reconciler-enqueue +0 -0
  91. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-ring-builder +0 -0
  92. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-ring-builder-analyzer +0 -0
  93. {swift-2.32.1.data → swift-2.33.1.data}/scripts/swift-ring-composer +0 -0
  94. {swift-2.32.1.dist-info → swift-2.33.1.dist-info}/LICENSE +0 -0
  95. {swift-2.32.1.dist-info → swift-2.33.1.dist-info}/top_level.txt +0 -0
@@ -334,6 +334,7 @@ import time
334
334
  import six
335
335
 
336
336
  from swift.cli.container_deleter import make_delete_jobs
337
+ from swift.common.header_key_dict import HeaderKeyDict
337
338
  from swift.common.exceptions import ListingIterError, SegmentError
338
339
  from swift.common.middleware.listing_formats import \
339
340
  MAX_CONTAINER_LISTING_CONTENT_LENGTH
@@ -345,15 +346,16 @@ from swift.common.swob import Request, HTTPBadRequest, HTTPServerError, \
345
346
  RESPONSE_REASONS, str_to_wsgi, bytes_to_wsgi, wsgi_to_str, wsgi_quote
346
347
  from swift.common.utils import get_logger, config_true_value, \
347
348
  get_valid_utf8_str, override_bytes_from_content_type, split_path, \
348
- RateLimitedIterator, quote, close_if_possible, closing_if_possible, \
349
- LRUCache, StreamingPile, strict_b64decode, Timestamp, drain_and_close, \
349
+ RateLimitedIterator, quote, closing_if_possible, \
350
+ LRUCache, StreamingPile, strict_b64decode, Timestamp, friendly_close, \
350
351
  get_expirer_container, md5
351
352
  from swift.common.registry import register_swift_info
352
353
  from swift.common.request_helpers import SegmentedIterable, \
353
354
  get_sys_meta_prefix, update_etag_is_at_header, resolve_etag_is_at_header, \
354
- get_container_update_override_key, update_ignore_range_header
355
+ get_container_update_override_key, update_ignore_range_header, \
356
+ get_param
355
357
  from swift.common.constraints import check_utf8, AUTO_CREATE_ACCOUNT_PREFIX
356
- from swift.common.http import HTTP_NOT_FOUND, HTTP_UNAUTHORIZED, is_success
358
+ from swift.common.http import HTTP_NOT_FOUND, HTTP_UNAUTHORIZED
357
359
  from swift.common.wsgi import WSGIContext, make_subrequest, make_env, \
358
360
  make_pre_authed_request
359
361
  from swift.common.middleware.bulk import get_response_body, \
@@ -530,6 +532,129 @@ def parse_and_validate_input(req_body, req_path):
530
532
  return parsed_data
531
533
 
532
534
 
535
+ def _annotate_segments(segments, logger=None):
536
+ """
537
+ Decode any inlined data and update sub_slo segments bytes from content-type
538
+ when available; then annotate segment dicts in segments list with
539
+ 'segment_length'.
540
+
541
+ N.B. raw_data segments don't have a bytes key and range-segments need to
542
+ calculate their length from their range key but afterwards all segments
543
+ dicts will have 'segment_length' representing the length of the segment.
544
+ """
545
+ for seg_dict in segments:
546
+ if 'data' in seg_dict:
547
+ seg_dict['raw_data'] = base64.b64decode(seg_dict.pop('data'))
548
+ segment_length = len(seg_dict['raw_data'])
549
+ else:
550
+ if config_true_value(seg_dict.get('sub_slo')):
551
+ override_bytes_from_content_type(
552
+ seg_dict, logger=logger)
553
+ seg_range = seg_dict.get('range')
554
+ if seg_range is not None:
555
+ # The range is of the form N-M, where N and M are both
556
+ # positive decimal integers. We know this because this
557
+ # middleware is the only thing that creates the SLO
558
+ # manifests stored in the cluster.
559
+ range_start, range_end = [
560
+ int(x) for x in seg_range.split('-')]
561
+ segment_length = (range_end - range_start) + 1
562
+ else:
563
+ segment_length = int(seg_dict['bytes'])
564
+ seg_dict['segment_length'] = segment_length
565
+
566
+
567
+ class RespAttrs(object):
568
+ """
569
+ Encapsulate properties of a GET or HEAD response that are pertinent to
570
+ handling a potential SLO response.
571
+
572
+ Instances of this class are typically constructed using the
573
+ ``from_headers`` method.
574
+
575
+ :param is_slo: True if the response appears to be an SLO manifest, False
576
+ otherwise.
577
+ :param timestamp: an instance of :class:`~swift.common.utils.Timestamp`.
578
+ :param manifest_etag: the Etag of the manifest object, or None if
579
+ ``is_slo`` is False.
580
+ :param slo_etag: the Etag of the SLO.
581
+ :param slo_size: the size of the SLO.
582
+ """
583
+ def __init__(self, is_slo, timestamp, manifest_etag, slo_etag, slo_size):
584
+ self.is_slo = bool(is_slo)
585
+ self.timestamp = Timestamp(timestamp or 0)
586
+ # manifest_etag is unambiguous, but json_md5 is even more explicit
587
+ self.json_md5 = manifest_etag or ''
588
+ self.slo_etag = slo_etag or ''
589
+ try:
590
+ # even though it's from sysmeta, we have to worry about empty
591
+ # values - see test_get_invalid_sysmeta_passthrough
592
+ self.slo_size = int(slo_size)
593
+ except (ValueError, TypeError):
594
+ self.slo_size = -1
595
+ self.is_legacy = not self._has_size_and_etag()
596
+
597
+ def _has_size_and_etag(self):
598
+ return self.slo_size >= 0 and self.slo_etag
599
+
600
+ @classmethod
601
+ def from_headers(cls, response_headers):
602
+ """
603
+ Inspect response headers and extract any resp_attrs we can find.
604
+
605
+ :param response_headers: list of tuples from a object response
606
+ :returns: an instance of RespAttrs to represent the response headers
607
+ """
608
+ is_slo = False
609
+ timestamp = None
610
+ found_etag = None
611
+ slo_etag = None
612
+ slo_size = None
613
+ for header, value in response_headers:
614
+ header = header.lower()
615
+ if header == 'x-static-large-object':
616
+ is_slo = config_true_value(value)
617
+ elif header == 'x-backend-timestamp':
618
+ timestamp = value
619
+ elif header == 'etag':
620
+ found_etag = value
621
+ elif header == SYSMETA_SLO_ETAG:
622
+ slo_etag = value
623
+ elif header == SYSMETA_SLO_SIZE:
624
+ slo_size = value
625
+ manifest_etag = found_etag if is_slo else None
626
+ return cls(is_slo, timestamp, manifest_etag, slo_etag, slo_size)
627
+
628
+ def update_from_segments(self, segments):
629
+ """
630
+ Always called if SLO has fetched the manifest response body, for
631
+ legacy manifests we'll calculate size/etag values we wouldn't have
632
+ gotten from sys-meta headers.
633
+ """
634
+ # we only have to set size/etag once; it doesn't matter if we got the
635
+ # values from sysmeta headers or segments
636
+ if self._has_size_and_etag():
637
+ return
638
+
639
+ calculated_size = 0
640
+ calculated_etag = md5(usedforsecurity=False)
641
+
642
+ for seg_dict in segments:
643
+ calculated_size += seg_dict['segment_length']
644
+
645
+ if 'raw_data' in seg_dict:
646
+ r = md5(seg_dict['raw_data'],
647
+ usedforsecurity=False).hexdigest()
648
+ elif seg_dict.get('range'):
649
+ r = '%s:%s;' % (seg_dict['hash'], seg_dict['range'])
650
+ else:
651
+ r = seg_dict['hash']
652
+ calculated_etag.update(r.encode('ascii'))
653
+
654
+ self.slo_size = calculated_size
655
+ self.slo_etag = calculated_etag.hexdigest()
656
+
657
+
533
658
  class SloGetContext(WSGIContext):
534
659
 
535
660
  max_slo_recursion_depth = 10
@@ -537,6 +662,8 @@ class SloGetContext(WSGIContext):
537
662
  def __init__(self, slo):
538
663
  self.slo = slo
539
664
  super(SloGetContext, self).__init__(slo.app)
665
+ # we'll know more after we look at the response metadata
666
+ self.segment_listing_needed = False
540
667
 
541
668
  def _fetch_sub_slo_segments(self, req, version, acc, con, obj):
542
669
  """
@@ -571,9 +698,8 @@ class SloGetContext(WSGIContext):
571
698
  body if len(body) <= 60 else body[:57] + '...'))
572
699
 
573
700
  try:
574
- with closing_if_possible(sub_resp.app_iter):
575
- return json.loads(b''.join(sub_resp.app_iter))
576
- except ValueError as err:
701
+ return self._parse_segments(sub_resp.app_iter)
702
+ except HTTPException as err:
577
703
  raise ListingIterError(
578
704
  'while fetching %s, JSON-decoding of submanifest %s '
579
705
  'failed with %s' % (req.path, sub_req.path, err))
@@ -584,32 +710,8 @@ class SloGetContext(WSGIContext):
584
710
  conobj=seg_dict['name'].lstrip('/')
585
711
  )
586
712
 
587
- def _segment_length(self, seg_dict):
588
- """
589
- Returns the number of bytes that will be fetched from the specified
590
- segment on a plain GET request for this SLO manifest.
591
- """
592
- if 'raw_data' in seg_dict:
593
- return len(seg_dict['raw_data'])
594
-
595
- seg_range = seg_dict.get('range')
596
- if seg_range is not None:
597
- # The range is of the form N-M, where N and M are both positive
598
- # decimal integers. We know this because this middleware is the
599
- # only thing that creates the SLO manifests stored in the
600
- # cluster.
601
- range_start, range_end = [int(x) for x in seg_range.split('-')]
602
- return (range_end - range_start) + 1
603
- else:
604
- return int(seg_dict['bytes'])
605
-
606
713
  def _segment_listing_iterator(self, req, version, account, segments,
607
714
  byteranges):
608
- for seg_dict in segments:
609
- if config_true_value(seg_dict.get('sub_slo')):
610
- override_bytes_from_content_type(seg_dict,
611
- logger=self.slo.logger)
612
-
613
715
  # We handle the range stuff here so that we can be smart about
614
716
  # skipping unused submanifests. For example, if our first segment is a
615
717
  # submanifest referencing 50 MiB total, but start_byte falls in
@@ -617,9 +719,6 @@ class SloGetContext(WSGIContext):
617
719
  #
618
720
  # If we were to make SegmentedIterable handle all the range
619
721
  # calculations, we would be unable to make this optimization.
620
- total_length = sum(self._segment_length(seg) for seg in segments)
621
- if not byteranges:
622
- byteranges = [(0, total_length - 1)]
623
722
 
624
723
  # Cache segments from sub-SLOs in case more than one byterange
625
724
  # includes data from a particular sub-SLO. We only cache a few sets
@@ -646,12 +745,26 @@ class SloGetContext(WSGIContext):
646
745
  first_byte, last_byte,
647
746
  cached_fetch_sub_slo_segments,
648
747
  recursion_depth=1):
748
+ """
749
+ Iterable that generates a filtered and annotated stream of segment
750
+ dicts describing the sub-segment ranges that would be used by the
751
+ SegmentedIterable to construct the bytes for a ranged response.
752
+
753
+ :param req: original request object
754
+ :param version: version
755
+ :param account: account
756
+ :param segments: segments dictionary
757
+ :param first_byte: offset into the large object for the first byte
758
+ that is returned to the client
759
+ :param last_byte: offset into the large object for the last byte
760
+ that is returned to the client
761
+ :param cached_fetch_sub_slo_segments: LRU cache used for fetching
762
+ sub-segments
763
+ :param recursion_depth: max number of recursive sub_slo calls
764
+ """
649
765
  last_sub_path = None
650
766
  for seg_dict in segments:
651
- if 'data' in seg_dict:
652
- seg_dict['raw_data'] = strict_b64decode(seg_dict.pop('data'))
653
-
654
- seg_length = self._segment_length(seg_dict)
767
+ seg_length = seg_dict['segment_length']
655
768
  if first_byte >= seg_length:
656
769
  # don't need any bytes from this segment
657
770
  first_byte -= seg_length
@@ -718,50 +831,194 @@ class SloGetContext(WSGIContext):
718
831
  first_byte -= seg_length
719
832
  last_byte -= seg_length
720
833
 
721
- def _need_to_refetch_manifest(self, req):
834
+ def _is_body_complete(self):
835
+ content_range = ''
836
+ for header, value in self._response_headers:
837
+ if header.lower() == 'content-range':
838
+ content_range = value
839
+ break
840
+ # e.g. Content-Range: bytes 0-14289/14290
841
+ match = re.match(r'bytes (\d+)-(\d+)/(\d+)$', content_range)
842
+ if not match:
843
+ # Malformed or missing, so we don't know what we got.
844
+ return False
845
+ first_byte, last_byte, length = [int(x) for x in match.groups()]
846
+ # If and only if we actually got back the full manifest body, then
847
+ # we can avoid re-fetching the object.
848
+ return first_byte == 0 and last_byte == length - 1
849
+
850
+ def _is_manifest_and_need_to_refetch(self, req, resp_attrs,
851
+ is_manifest_get):
722
852
  """
723
- Just because a response shows that an object is a SLO manifest does not
724
- mean that response's body contains the entire SLO manifest. If it
725
- doesn't, we need to make a second request to actually get the whole
726
- thing.
853
+ Check if the segments will be needed to service the request and update
854
+ the segment_listing_needed attribute.
727
855
 
728
- Note: this assumes that X-Static-Large-Object has already been found.
856
+ :return: boolean indicating if we need to refetch, only if the segments
857
+ ARE needed we MAY need to refetch them!
729
858
  """
859
+ if not resp_attrs.is_slo:
860
+ # Not a static large object manifest, maybe an error, regardless
861
+ # no refetch needed
862
+ return False
863
+
864
+ if is_manifest_get:
865
+ # Any manifest json object response will do
866
+ return False
867
+
730
868
  if req.method == 'HEAD':
731
- # We've already looked for SYSMETA_SLO_ETAG/SIZE in the response
732
- # and didn't find them. We have to fetch the whole manifest and
733
- # recompute.
869
+ # There may be some cases in the future where a HEAD resp on even a
870
+ # modern manifest should refetch, e.g. lp bug #2029174
871
+ self.segment_listing_needed = resp_attrs.is_legacy
872
+ # it will always be the case that a HEAD must re-fetch iff
873
+ # segment_listing_needed
874
+ return self.segment_listing_needed
875
+
876
+ last_resp_status_int = self._get_status_int()
877
+ # These are based on etag (or last-modified), but the SLO's etag is
878
+ # almost certainly not the manifest object's etag. Still, it's highly
879
+ # likely that the submitted If-None-Match won't match the manifest
880
+ # object's etag, so we can avoid re-fetching the manifest if we got a
881
+ # successful response.
882
+ if last_resp_status_int in (412, 304):
883
+ # a conditional response from a modern manifest would have an
884
+ # accurate SLO etag, AND comparison with the etag-is-at header, but
885
+ # for legacy manifests responses (who always need to calculate the
886
+ # correct etag, even for if-[un]modified-since errors) we can't say
887
+ # what the etag is or if it matches unless we calculate it from
888
+ # segments - so we always need them
889
+ self.segment_listing_needed = resp_attrs.is_legacy
890
+ # if we need them; we can't get them from the error
891
+ return self.segment_listing_needed
892
+
893
+ # This is GET request for an SLO object, if we're going to return a
894
+ # successful response we're going to need the segments, but this
895
+ # resp_iter may not contain the entire SLO manifest.
896
+ self.segment_listing_needed = True
897
+
898
+ # modern swift object-servers should ignore Range headers on manifests,
899
+ # but during upgrade if we get a range response we'll probably have to
900
+ # refetch
901
+ if last_resp_status_int == 416:
902
+ # if the range wasn't satisfiable we need to refetch
734
903
  return True
904
+ elif last_resp_status_int == 206:
905
+ # a partial response might included the whole content-range?!
906
+ return not self._is_body_complete()
907
+ else:
908
+ # a good number of error responses would have returned earlier for
909
+ # lacking is_slo sys-meta, at this point we've filtered all the
910
+ # other response codes, so this is a prefectly normal 200 response,
911
+ # no need to refetch
912
+ return False
913
+
914
+ def _refetch_manifest(self, req, resp_iter, orig_resp_attrs):
915
+ req.environ['swift.non_client_disconnect'] = True
916
+ friendly_close(resp_iter)
917
+ del req.environ['swift.non_client_disconnect']
918
+
919
+ get_req = make_subrequest(
920
+ req.environ, method='GET',
921
+ headers={'x-auth-token': req.headers.get('x-auth-token')},
922
+ agent='%(orig)s SLO MultipartGET', swift_source='SLO')
923
+ resp_iter = self._app_call(get_req.environ)
924
+ new_resp_attrs = RespAttrs.from_headers(self._response_headers)
925
+ if new_resp_attrs.timestamp < orig_resp_attrs.timestamp and \
926
+ not new_resp_attrs.is_slo:
927
+ # Our *orig_resp_attrs* saw *newer* data that indicated it was an
928
+ # SLO, but on refetch it's an older object or error; 503 seems
929
+ # reasonable?
930
+ friendly_close(resp_iter)
931
+ raise HTTPServiceUnavailable(request=req)
932
+ # else, the caller will know how to return this response
933
+ return new_resp_attrs, resp_iter
934
+
935
+ def _parse_segments(self, resp_iter):
936
+ """
937
+ Read the manifest body and parse segments.
735
938
 
736
- response_status = int(self._response_status[:3])
939
+ :returns: segments
940
+ :raises: HTTPServerError
941
+ """
942
+ segments = self._get_manifest_read(resp_iter)
943
+ _annotate_segments(segments, logger=self.slo.logger)
944
+ return segments
737
945
 
738
- # These are based on etag, and the SLO's etag is almost certainly not
739
- # the manifest object's etag. Still, it's highly likely that the
740
- # submitted If-None-Match won't match the manifest object's etag, so
741
- # we can avoid re-fetching the manifest if we got a successful
742
- # response.
743
- if ((req.if_match or req.if_none_match) and
744
- not is_success(response_status)):
745
- return True
946
+ def _return_manifest_response(self, req, start_response, resp_iter,
947
+ is_format_raw):
948
+ if is_format_raw:
949
+ json_data = self.convert_segment_listing(resp_iter)
950
+ # we've created a new response body
951
+ resp_iter = [json_data]
952
+ replace_headers = {
953
+ # Note that we have to return the large object's content-type
954
+ # (not application/json) so it's like what the client sent on
955
+ # PUT. Otherwise, server-side copy won't work.
956
+ 'Content-Length': len(json_data),
957
+ 'Etag': md5(json_data, usedforsecurity=False).hexdigest(),
958
+ }
959
+ else:
960
+ # we're going to return the manifest resp_iter as-is
961
+ replace_headers = {
962
+ 'Content-Type': 'application/json; charset=utf-8',
963
+ }
964
+ return self._return_response(req, start_response, resp_iter,
965
+ replace_headers)
966
+
967
+ def _return_slo_response(self, req, start_response, resp_iter, resp_attrs):
968
+ if self.segment_listing_needed:
969
+ # consume existing resp_iter; we'll create a new one
970
+ segments = self._parse_segments(resp_iter)
971
+ resp_attrs.update_from_segments(segments)
972
+ if req.method == 'HEAD':
973
+ resp_iter = []
974
+ else:
975
+ resp_iter = self._build_resp_iter(req, segments, resp_attrs)
976
+ headers = {
977
+ 'Etag': '"%s"' % resp_attrs.slo_etag,
978
+ 'X-Manifest-Etag': resp_attrs.json_md5,
979
+ # This isn't correct for range requests, but swob will fix it?
980
+ 'Content-Length': str(resp_attrs.slo_size),
981
+ # ignore bogus content-range, make swob figure it out
982
+ 'Content-Range': None
983
+ }
984
+ return self._return_response(req, start_response, resp_iter,
985
+ replace_headers=headers)
986
+
987
+ def _return_response(self, req, start_response, resp_iter,
988
+ replace_headers):
989
+ if req.method == 'HEAD' or self._get_status_int() in (412, 304):
990
+ # we should drain HEAD and unmet condition responses since they
991
+ # don't have bodies
992
+ friendly_close(resp_iter)
993
+ resp_iter = b''
994
+ resp_headers = HeaderKeyDict(self._response_headers, **replace_headers)
995
+ resp = Response(
996
+ status=self._response_status,
997
+ headers=resp_headers,
998
+ app_iter=resp_iter,
999
+ request=req,
1000
+ conditional_response=True,
1001
+ conditional_etag=resolve_etag_is_at_header(req, resp_headers))
1002
+ return resp(req.environ, start_response)
746
1003
 
747
- if req.range and response_status in (206, 416):
748
- content_range = ''
749
- for header, value in self._response_headers:
750
- if header.lower() == 'content-range':
751
- content_range = value
752
- break
753
- # e.g. Content-Range: bytes 0-14289/14290
754
- match = re.match(r'bytes (\d+)-(\d+)/(\d+)$', content_range)
755
- if not match:
756
- # Malformed or missing, so we don't know what we got.
757
- return True
758
- first_byte, last_byte, length = [int(x) for x in match.groups()]
759
- # If and only if we actually got back the full manifest body, then
760
- # we can avoid re-fetching the object.
761
- got_everything = (first_byte == 0 and last_byte == length - 1)
762
- return not got_everything
763
-
764
- return False
1004
+ def _return_non_slo_response(self, req, start_response, resp_iter):
1005
+ # our "pass-through" response may have been from a manifest refetch w/o
1006
+ # range/conditional headers that turned out to be a real object, and
1007
+ # now we want out. But if the original client request included Range
1008
+ # or Conditional headers we can trust swob to do the right conversion
1009
+ # back into a 206/416/304/412 (as long as the response we have is a
1010
+ # normal successful response and we respect any forwarding middleware's
1011
+ # etag-is-at header that we stripped off for the refetch!)
1012
+ resp = Response(
1013
+ status=self._response_status,
1014
+ headers=self._response_headers,
1015
+ app_iter=resp_iter,
1016
+ request=req,
1017
+ conditional_response=self._get_status_int() == 200,
1018
+ conditional_etag=resolve_etag_is_at_header(
1019
+ req, self._response_headers)
1020
+ )
1021
+ return resp(req.environ, start_response)
765
1022
 
766
1023
  def handle_slo_get_or_head(self, req, start_response):
767
1024
  """
@@ -774,137 +1031,61 @@ class SloGetContext(WSGIContext):
774
1031
  large object manifest.
775
1032
  :param start_response: WSGI start_response callable
776
1033
  """
777
- if req.params.get('multipart-manifest') != 'get':
1034
+ is_manifest_get = get_param(req, 'multipart-manifest') == 'get'
1035
+ is_format_raw = is_manifest_get and get_param(req, 'format') == 'raw'
1036
+
1037
+ if not is_manifest_get:
778
1038
  # If this object is an SLO manifest, we may have saved off the
779
1039
  # large object etag during the original PUT. Send an
780
- # X-Backend-Etag-Is-At header so that, if the SLO etag *was*
781
- # saved, we can trust the object-server to respond appropriately
782
- # to If-Match/If-None-Match requests.
1040
+ # X-Backend-Etag-Is-At header so that, if the SLO etag *was* saved,
1041
+ # we can trust the object-server to respond appropriately to
1042
+ # If-Match/If-None-Match requests.
783
1043
  update_etag_is_at_header(req, SYSMETA_SLO_ETAG)
784
1044
  # Tell the object server that if it's a manifest,
785
1045
  # we want the whole thing
786
1046
  update_ignore_range_header(req, 'X-Static-Large-Object')
787
- resp_iter = self._app_call(req.environ)
788
1047
 
789
- # make sure this response is for a static large object manifest
790
- slo_marker = slo_etag = slo_size = slo_timestamp = None
791
- for header, value in self._response_headers:
792
- header = header.lower()
793
- if header == SYSMETA_SLO_ETAG:
794
- slo_etag = value
795
- elif header == SYSMETA_SLO_SIZE:
796
- slo_size = value
797
- elif (header == 'x-static-large-object' and
798
- config_true_value(value)):
799
- slo_marker = value
800
- elif header == 'x-backend-timestamp':
801
- slo_timestamp = value
802
-
803
- if slo_marker and slo_etag and slo_size and slo_timestamp:
804
- break
805
-
806
- if not slo_marker:
807
- # Not a static large object manifest. Just pass it through.
808
- start_response(self._response_status,
809
- self._response_headers,
810
- self._response_exc_info)
811
- return resp_iter
812
-
813
- # Handle pass-through request for the manifest itself
814
- if req.params.get('multipart-manifest') == 'get':
815
- if req.params.get('format') == 'raw':
816
- resp_iter = self.convert_segment_listing(
817
- self._response_headers, resp_iter)
818
- else:
819
- new_headers = []
820
- for header, value in self._response_headers:
821
- if header.lower() == 'content-type':
822
- new_headers.append(('Content-Type',
823
- 'application/json; charset=utf-8'))
824
- else:
825
- new_headers.append((header, value))
826
- self._response_headers = new_headers
827
- start_response(self._response_status,
828
- self._response_headers,
829
- self._response_exc_info)
830
- return resp_iter
831
-
832
- is_conditional = self._response_status.startswith(('304', '412')) and (
833
- req.if_match or req.if_none_match)
834
- if slo_etag and slo_size and (
835
- req.method == 'HEAD' or is_conditional):
836
- # Since we have length and etag, we can respond immediately
837
- resp = Response(
838
- status=self._response_status,
839
- headers=self._response_headers,
840
- app_iter=resp_iter,
841
- request=req,
842
- conditional_etag=resolve_etag_is_at_header(
843
- req, self._response_headers),
844
- conditional_response=True)
845
- resp.headers.update({
846
- 'Etag': '"%s"' % slo_etag,
847
- 'X-Manifest-Etag': self._response_header_value('etag'),
848
- 'Content-Length': slo_size,
849
- })
850
- return resp(req.environ, start_response)
851
-
852
- if self._need_to_refetch_manifest(req):
853
- req.environ['swift.non_client_disconnect'] = True
854
- close_if_possible(resp_iter)
855
- del req.environ['swift.non_client_disconnect']
856
-
857
- get_req = make_subrequest(
858
- req.environ, method='GET',
859
- headers={'x-auth-token': req.headers.get('x-auth-token')},
860
- agent='%(orig)s SLO MultipartGET', swift_source='SLO')
861
- resp_iter = self._app_call(get_req.environ)
862
- slo_marker = config_true_value(self._response_header_value(
863
- 'x-static-large-object'))
864
- if not slo_marker: # will also catch non-2xx responses
865
- got_timestamp = self._response_header_value(
866
- 'x-backend-timestamp') or '0'
867
- if Timestamp(got_timestamp) >= Timestamp(slo_timestamp):
868
- # We've got a newer response available, so serve that.
869
- # Note that if there's data, it's going to be a 200 now,
870
- # not a 206, and we're not going to drop bytes in the
871
- # proxy on the client's behalf. Fortunately, the RFC is
872
- # pretty forgiving for a server; there's no guarantee that
873
- # a Range header will be respected.
874
- resp = Response(
875
- status=self._response_status,
876
- headers=self._response_headers,
877
- app_iter=resp_iter,
878
- request=req,
879
- conditional_etag=resolve_etag_is_at_header(
880
- req, self._response_headers),
881
- conditional_response=is_success(
882
- int(self._response_status[:3])))
883
- return resp(req.environ, start_response)
884
- else:
885
- # We saw newer data that indicated it's an SLO, but
886
- # couldn't fetch the whole thing; 503 seems reasonable?
887
- close_if_possible(resp_iter)
888
- raise HTTPServiceUnavailable(request=req)
889
- # NB: we might have gotten an out-of-date manifest -- that's OK;
890
- # we'll just try to serve the old data
891
-
892
- # Any Content-Range from a manifest is almost certainly wrong for the
893
- # full large object.
894
- resp_headers = [(h, v) for h, v in self._response_headers
895
- if not h.lower() == 'content-range']
896
-
897
- response = self.get_or_head_response(
898
- req, resp_headers, resp_iter)
899
- return response(req.environ, start_response)
900
-
901
- def convert_segment_listing(self, resp_headers, resp_iter):
1048
+ # process original request
1049
+ resp_iter = self._app_call(req.environ)
1050
+ resp_attrs = RespAttrs.from_headers(self._response_headers)
1051
+ # the next two calls hide a couple side-effects, sorry:
1052
+ #
1053
+ # 1) regardless of the return value the "need_to_refetch" check *may*
1054
+ # also set self.segment_listing_needed = True (it's commented to
1055
+ # help you wrap your head around that one, good luck)
1056
+ # 2) if we refetch, we overwrite the current resp_iter and resp_attrs
1057
+ # variables, partly because we *might* get back a NOT
1058
+ # resp_attrs.is_slo response (even if we had one to start), but
1059
+ # hopefully they're just the manifest resp we needed to refetch!
1060
+ if self._is_manifest_and_need_to_refetch(req, resp_attrs,
1061
+ is_manifest_get):
1062
+ resp_attrs, resp_iter = self._refetch_manifest(
1063
+ req, resp_iter, resp_attrs)
1064
+
1065
+ if not resp_attrs.is_slo:
1066
+ # even if the original resp_attrs may have been SLO we may have
1067
+ # refetched, this also handles the server error case
1068
+ return self._return_non_slo_response(
1069
+ req, start_response, resp_iter)
1070
+
1071
+ if is_manifest_get:
1072
+ # manifest pass through doesn't require resp_attrs
1073
+ return self._return_manifest_response(req, start_response,
1074
+ resp_iter, is_format_raw)
1075
+
1076
+ # this a GET/HEAD response for the SLO object (not the manifest)
1077
+ return self._return_slo_response(req, start_response, resp_iter,
1078
+ resp_attrs)
1079
+
1080
+ def convert_segment_listing(self, resp_iter):
902
1081
  """
903
1082
  Converts the manifest data to match with the format
904
1083
  that was put in through ?multipart-manifest=put
905
1084
 
906
- :param resp_headers: response headers
907
1085
  :param resp_iter: a response iterable
1086
+
1087
+ :raises HTTPServerError:
1088
+ :returns: the json-serialized raw format (as bytes)
908
1089
  """
909
1090
  segments = self._get_manifest_read(resp_iter)
910
1091
 
@@ -921,108 +1102,36 @@ class SloGetContext(WSGIContext):
921
1102
  json_data = json.dumps(segments, sort_keys=True) # convert to string
922
1103
  if six.PY3:
923
1104
  json_data = json_data.encode('utf-8')
924
-
925
- new_headers = []
926
- for header, value in resp_headers:
927
- if header.lower() == 'content-length':
928
- new_headers.append(('Content-Length', len(json_data)))
929
- elif header.lower() == 'etag':
930
- new_headers.append(
931
- ('Etag', md5(json_data, usedforsecurity=False)
932
- .hexdigest()))
933
- else:
934
- new_headers.append((header, value))
935
- self._response_headers = new_headers
936
-
937
- return [json_data]
1105
+ return json_data
938
1106
 
939
1107
  def _get_manifest_read(self, resp_iter):
940
1108
  with closing_if_possible(resp_iter):
941
1109
  resp_body = b''.join(resp_iter)
942
1110
  try:
943
1111
  segments = json.loads(resp_body)
944
- except ValueError:
945
- segments = []
946
-
1112
+ except ValueError as e:
1113
+ msg = 'Unable to load SLO manifest'
1114
+ self.slo.logger.error('%s: %s', msg, e)
1115
+ raise HTTPServerError(msg)
947
1116
  return segments
948
1117
 
949
- def get_or_head_response(self, req, resp_headers, resp_iter):
950
- segments = self._get_manifest_read(resp_iter)
951
- slo_etag = None
952
- content_length = None
953
- response_headers = []
954
- for header, value in resp_headers:
955
- lheader = header.lower()
956
- if lheader == 'etag':
957
- response_headers.append(('X-Manifest-Etag', value))
958
- elif lheader != 'content-length':
959
- response_headers.append((header, value))
960
-
961
- if lheader == SYSMETA_SLO_ETAG:
962
- slo_etag = value
963
- elif lheader == SYSMETA_SLO_SIZE:
964
- # it's from sysmeta, so we don't worry about non-integer
965
- # values here
966
- content_length = int(value)
967
-
968
- # Prep to calculate content_length & etag if necessary
969
- if slo_etag is None:
970
- calculated_etag = md5(usedforsecurity=False)
971
- if content_length is None:
972
- calculated_content_length = 0
973
-
974
- for seg_dict in segments:
975
- # Decode any inlined data; it's important that we do this *before*
976
- # calculating the segment length and etag
977
- if 'data' in seg_dict:
978
- seg_dict['raw_data'] = base64.b64decode(seg_dict.pop('data'))
979
-
980
- if slo_etag is None:
981
- if 'raw_data' in seg_dict:
982
- r = md5(seg_dict['raw_data'],
983
- usedforsecurity=False).hexdigest()
984
- elif seg_dict.get('range'):
985
- r = '%s:%s;' % (seg_dict['hash'], seg_dict['range'])
986
- else:
987
- r = seg_dict['hash']
988
- calculated_etag.update(r.encode('ascii'))
989
-
990
- if content_length is None:
991
- if config_true_value(seg_dict.get('sub_slo')):
992
- override_bytes_from_content_type(
993
- seg_dict, logger=self.slo.logger)
994
- calculated_content_length += self._segment_length(seg_dict)
995
-
996
- if slo_etag is None:
997
- slo_etag = calculated_etag.hexdigest()
998
- if content_length is None:
999
- content_length = calculated_content_length
1000
-
1001
- response_headers.append(('Content-Length', str(content_length)))
1002
- response_headers.append(('Etag', '"%s"' % slo_etag))
1003
-
1004
- if req.method == 'HEAD':
1005
- return self._manifest_head_response(req, response_headers)
1006
- else:
1007
- return self._manifest_get_response(
1008
- req, content_length, response_headers, segments)
1118
+ def _build_resp_iter(self, req, segments, resp_attrs):
1119
+ """
1120
+ Build a response iterable for a GET request.
1009
1121
 
1010
- def _manifest_head_response(self, req, response_headers):
1011
- conditional_etag = resolve_etag_is_at_header(req, response_headers)
1012
- return HTTPOk(request=req, headers=response_headers, body=b'',
1013
- conditional_etag=conditional_etag,
1014
- conditional_response=True)
1122
+ :param req: the request object
1123
+ :param resp_attrs: the slo attributes
1015
1124
 
1016
- def _manifest_get_response(self, req, content_length, response_headers,
1017
- segments):
1125
+ :returns: a segmented iterable
1126
+ """
1018
1127
  if req.range:
1019
1128
  byteranges = [
1020
1129
  # For some reason, swob.Range.ranges_for_length adds 1 to the
1021
1130
  # last byte's position.
1022
1131
  (start, end - 1) for start, end
1023
- in req.range.ranges_for_length(content_length)]
1132
+ in req.range.ranges_for_length(resp_attrs.slo_size)]
1024
1133
  else:
1025
- byteranges = []
1134
+ byteranges = [(0, resp_attrs.slo_size - 1)]
1026
1135
 
1027
1136
  ver, account, _junk = req.split_path(3, 3, rest_with_last=True)
1028
1137
  account = wsgi_to_str(account)
@@ -1067,15 +1176,8 @@ class SloGetContext(WSGIContext):
1067
1176
  # their Etag/Content Length no longer match the connection
1068
1177
  # will drop. In this case a 409 Conflict will be logged in
1069
1178
  # the proxy logs and the user will receive incomplete results.
1070
- return HTTPConflict(request=req)
1071
-
1072
- conditional_etag = resolve_etag_is_at_header(req, response_headers)
1073
- response = Response(request=req, content_length=content_length,
1074
- headers=response_headers,
1075
- conditional_response=True,
1076
- conditional_etag=conditional_etag,
1077
- app_iter=segmented_iter)
1078
- return response
1179
+ raise HTTPConflict(request=req)
1180
+ return segmented_iter
1079
1181
 
1080
1182
 
1081
1183
  class StaticLargeObject(object):
@@ -1128,12 +1230,7 @@ class StaticLargeObject(object):
1128
1230
  delete_concurrency=delete_concurrency,
1129
1231
  logger=self.logger)
1130
1232
 
1131
- # Need to know how to expire things to do async deletes
1132
- if conf.get('auto_create_account_prefix'):
1133
- # proxy app will log about how this should get moved to swift.conf
1134
- prefix = conf['auto_create_account_prefix']
1135
- else:
1136
- prefix = AUTO_CREATE_ACCOUNT_PREFIX
1233
+ prefix = AUTO_CREATE_ACCOUNT_PREFIX
1137
1234
  self.expiring_objects_account = prefix + (
1138
1235
  conf.get('expiring_objects_account_name') or 'expiring_objects')
1139
1236
  self.expiring_objects_container_divisor = int(
@@ -1505,7 +1602,7 @@ class StaticLargeObject(object):
1505
1602
  vrs, account, _junk = req.split_path(2, 3, True)
1506
1603
  new_env = req.environ.copy()
1507
1604
  new_env['REQUEST_METHOD'] = 'GET'
1508
- del(new_env['wsgi.input'])
1605
+ del new_env['wsgi.input']
1509
1606
  new_env['QUERY_STRING'] = 'multipart-manifest=get'
1510
1607
  if 'version-id' in req.params:
1511
1608
  new_env['QUERY_STRING'] += \
@@ -1524,7 +1621,7 @@ class StaticLargeObject(object):
1524
1621
  '/%s/%s/%s' % (vrs, account, str_to_wsgi(obj_name.lstrip('/')))
1525
1622
  )
1526
1623
  # Just request the last byte of non-SLO objects so we don't waste
1527
- # a bunch of resources in drain_and_close() below
1624
+ # a resources in friendly_close() below
1528
1625
  manifest_req = Request.blank('', new_env, range='bytes=-1')
1529
1626
  update_ignore_range_header(manifest_req, 'X-Static-Large-Object')
1530
1627
  resp = manifest_req.get_response(self.app)
@@ -1543,7 +1640,7 @@ class StaticLargeObject(object):
1543
1640
  raise HTTPServerError('Unable to load SLO manifest')
1544
1641
  else:
1545
1642
  # Drain and close GET request (prevents socket leaks)
1546
- drain_and_close(resp)
1643
+ friendly_close(resp)
1547
1644
  raise HTTPBadRequest('Not an SLO manifest')
1548
1645
  elif resp.status_int == HTTP_NOT_FOUND:
1549
1646
  raise HTTPNotFound('SLO manifest not found')
@@ -1624,7 +1721,7 @@ class StaticLargeObject(object):
1624
1721
  resp.status, resp.body)
1625
1722
  return HTTPServiceUnavailable()
1626
1723
  # consume the response (should be short)
1627
- drain_and_close(resp)
1724
+ friendly_close(resp)
1628
1725
 
1629
1726
  # Finally, delete the manifest
1630
1727
  return self.app