swift 2.23.3__py3-none-any.whl → 2.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (206) hide show
  1. swift/__init__.py +29 -50
  2. swift/account/auditor.py +21 -118
  3. swift/account/backend.py +33 -28
  4. swift/account/reaper.py +37 -28
  5. swift/account/replicator.py +22 -0
  6. swift/account/server.py +60 -26
  7. swift/account/utils.py +28 -11
  8. swift-2.23.3.data/scripts/swift-account-audit → swift/cli/account_audit.py +23 -13
  9. swift-2.23.3.data/scripts/swift-config → swift/cli/config.py +2 -2
  10. swift/cli/container_deleter.py +5 -11
  11. swift-2.23.3.data/scripts/swift-dispersion-populate → swift/cli/dispersion_populate.py +8 -7
  12. swift/cli/dispersion_report.py +10 -9
  13. swift-2.23.3.data/scripts/swift-drive-audit → swift/cli/drive_audit.py +63 -21
  14. swift/cli/form_signature.py +3 -7
  15. swift-2.23.3.data/scripts/swift-get-nodes → swift/cli/get_nodes.py +8 -2
  16. swift/cli/info.py +154 -14
  17. swift/cli/manage_shard_ranges.py +705 -37
  18. swift-2.23.3.data/scripts/swift-oldies → swift/cli/oldies.py +25 -14
  19. swift-2.23.3.data/scripts/swift-orphans → swift/cli/orphans.py +7 -3
  20. swift/cli/recon.py +196 -67
  21. swift-2.23.3.data/scripts/swift-recon-cron → swift/cli/recon_cron.py +17 -20
  22. swift-2.23.3.data/scripts/swift-reconciler-enqueue → swift/cli/reconciler_enqueue.py +2 -3
  23. swift/cli/relinker.py +807 -126
  24. swift/cli/reload.py +135 -0
  25. swift/cli/ringbuilder.py +217 -20
  26. swift/cli/ringcomposer.py +0 -1
  27. swift/cli/shard-info.py +4 -3
  28. swift/common/base_storage_server.py +9 -20
  29. swift/common/bufferedhttp.py +48 -74
  30. swift/common/constraints.py +20 -15
  31. swift/common/container_sync_realms.py +9 -11
  32. swift/common/daemon.py +25 -8
  33. swift/common/db.py +195 -128
  34. swift/common/db_auditor.py +168 -0
  35. swift/common/db_replicator.py +95 -55
  36. swift/common/digest.py +141 -0
  37. swift/common/direct_client.py +144 -33
  38. swift/common/error_limiter.py +93 -0
  39. swift/common/exceptions.py +25 -1
  40. swift/common/header_key_dict.py +2 -9
  41. swift/common/http_protocol.py +373 -0
  42. swift/common/internal_client.py +129 -59
  43. swift/common/linkat.py +3 -4
  44. swift/common/manager.py +284 -67
  45. swift/common/memcached.py +390 -145
  46. swift/common/middleware/__init__.py +4 -0
  47. swift/common/middleware/account_quotas.py +211 -46
  48. swift/common/middleware/acl.py +3 -8
  49. swift/common/middleware/backend_ratelimit.py +230 -0
  50. swift/common/middleware/bulk.py +22 -34
  51. swift/common/middleware/catch_errors.py +1 -3
  52. swift/common/middleware/cname_lookup.py +6 -11
  53. swift/common/middleware/container_quotas.py +1 -1
  54. swift/common/middleware/container_sync.py +39 -17
  55. swift/common/middleware/copy.py +12 -0
  56. swift/common/middleware/crossdomain.py +22 -9
  57. swift/common/middleware/crypto/__init__.py +2 -1
  58. swift/common/middleware/crypto/crypto_utils.py +11 -15
  59. swift/common/middleware/crypto/decrypter.py +28 -11
  60. swift/common/middleware/crypto/encrypter.py +12 -17
  61. swift/common/middleware/crypto/keymaster.py +8 -15
  62. swift/common/middleware/crypto/kms_keymaster.py +2 -1
  63. swift/common/middleware/dlo.py +15 -11
  64. swift/common/middleware/domain_remap.py +5 -4
  65. swift/common/middleware/etag_quoter.py +128 -0
  66. swift/common/middleware/formpost.py +73 -70
  67. swift/common/middleware/gatekeeper.py +8 -1
  68. swift/common/middleware/keystoneauth.py +33 -3
  69. swift/common/middleware/list_endpoints.py +4 -4
  70. swift/common/middleware/listing_formats.py +85 -49
  71. swift/common/middleware/memcache.py +4 -95
  72. swift/common/middleware/name_check.py +3 -2
  73. swift/common/middleware/proxy_logging.py +160 -92
  74. swift/common/middleware/ratelimit.py +17 -10
  75. swift/common/middleware/read_only.py +6 -4
  76. swift/common/middleware/recon.py +59 -22
  77. swift/common/middleware/s3api/acl_handlers.py +25 -3
  78. swift/common/middleware/s3api/acl_utils.py +6 -1
  79. swift/common/middleware/s3api/controllers/__init__.py +6 -0
  80. swift/common/middleware/s3api/controllers/acl.py +3 -2
  81. swift/common/middleware/s3api/controllers/bucket.py +242 -137
  82. swift/common/middleware/s3api/controllers/logging.py +2 -2
  83. swift/common/middleware/s3api/controllers/multi_delete.py +43 -20
  84. swift/common/middleware/s3api/controllers/multi_upload.py +219 -133
  85. swift/common/middleware/s3api/controllers/obj.py +112 -8
  86. swift/common/middleware/s3api/controllers/object_lock.py +44 -0
  87. swift/common/middleware/s3api/controllers/s3_acl.py +2 -2
  88. swift/common/middleware/s3api/controllers/tagging.py +57 -0
  89. swift/common/middleware/s3api/controllers/versioning.py +36 -7
  90. swift/common/middleware/s3api/etree.py +22 -9
  91. swift/common/middleware/s3api/exception.py +0 -4
  92. swift/common/middleware/s3api/s3api.py +113 -41
  93. swift/common/middleware/s3api/s3request.py +384 -218
  94. swift/common/middleware/s3api/s3response.py +126 -23
  95. swift/common/middleware/s3api/s3token.py +16 -17
  96. swift/common/middleware/s3api/schema/delete.rng +1 -1
  97. swift/common/middleware/s3api/subresource.py +7 -10
  98. swift/common/middleware/s3api/utils.py +27 -10
  99. swift/common/middleware/slo.py +665 -358
  100. swift/common/middleware/staticweb.py +64 -37
  101. swift/common/middleware/symlink.py +51 -18
  102. swift/common/middleware/tempauth.py +76 -58
  103. swift/common/middleware/tempurl.py +191 -173
  104. swift/common/middleware/versioned_writes/__init__.py +51 -0
  105. swift/common/middleware/{versioned_writes.py → versioned_writes/legacy.py} +27 -26
  106. swift/common/middleware/versioned_writes/object_versioning.py +1482 -0
  107. swift/common/middleware/x_profile/exceptions.py +1 -4
  108. swift/common/middleware/x_profile/html_viewer.py +18 -19
  109. swift/common/middleware/x_profile/profile_model.py +1 -2
  110. swift/common/middleware/xprofile.py +10 -10
  111. swift-2.23.3.data/scripts/swift-container-server → swift/common/recon.py +13 -8
  112. swift/common/registry.py +147 -0
  113. swift/common/request_helpers.py +324 -57
  114. swift/common/ring/builder.py +67 -25
  115. swift/common/ring/composite_builder.py +1 -1
  116. swift/common/ring/ring.py +177 -51
  117. swift/common/ring/utils.py +1 -1
  118. swift/common/splice.py +10 -6
  119. swift/common/statsd_client.py +205 -0
  120. swift/common/storage_policy.py +49 -44
  121. swift/common/swob.py +86 -102
  122. swift/common/{utils.py → utils/__init__.py} +2163 -2772
  123. swift/common/utils/base.py +131 -0
  124. swift/common/utils/config.py +433 -0
  125. swift/common/utils/ipaddrs.py +256 -0
  126. swift/common/utils/libc.py +345 -0
  127. swift/common/utils/logs.py +859 -0
  128. swift/common/utils/timestamp.py +412 -0
  129. swift/common/wsgi.py +553 -535
  130. swift/container/auditor.py +14 -100
  131. swift/container/backend.py +490 -231
  132. swift/container/reconciler.py +126 -37
  133. swift/container/replicator.py +96 -22
  134. swift/container/server.py +358 -165
  135. swift/container/sharder.py +1540 -684
  136. swift/container/sync.py +94 -88
  137. swift/container/updater.py +53 -32
  138. swift/obj/auditor.py +153 -35
  139. swift/obj/diskfile.py +466 -217
  140. swift/obj/expirer.py +406 -124
  141. swift/obj/mem_diskfile.py +7 -4
  142. swift/obj/mem_server.py +1 -0
  143. swift/obj/reconstructor.py +523 -262
  144. swift/obj/replicator.py +249 -188
  145. swift/obj/server.py +207 -122
  146. swift/obj/ssync_receiver.py +145 -85
  147. swift/obj/ssync_sender.py +113 -54
  148. swift/obj/updater.py +652 -139
  149. swift/obj/watchers/__init__.py +0 -0
  150. swift/obj/watchers/dark_data.py +213 -0
  151. swift/proxy/controllers/account.py +11 -11
  152. swift/proxy/controllers/base.py +848 -604
  153. swift/proxy/controllers/container.py +433 -92
  154. swift/proxy/controllers/info.py +3 -2
  155. swift/proxy/controllers/obj.py +1000 -489
  156. swift/proxy/server.py +185 -112
  157. {swift-2.23.3.dist-info → swift-2.35.0.dist-info}/AUTHORS +58 -11
  158. {swift-2.23.3.dist-info → swift-2.35.0.dist-info}/METADATA +51 -56
  159. swift-2.35.0.dist-info/RECORD +201 -0
  160. {swift-2.23.3.dist-info → swift-2.35.0.dist-info}/WHEEL +1 -1
  161. {swift-2.23.3.dist-info → swift-2.35.0.dist-info}/entry_points.txt +43 -0
  162. swift-2.35.0.dist-info/pbr.json +1 -0
  163. swift/locale/de/LC_MESSAGES/swift.po +0 -1216
  164. swift/locale/en_GB/LC_MESSAGES/swift.po +0 -1207
  165. swift/locale/es/LC_MESSAGES/swift.po +0 -1085
  166. swift/locale/fr/LC_MESSAGES/swift.po +0 -909
  167. swift/locale/it/LC_MESSAGES/swift.po +0 -894
  168. swift/locale/ja/LC_MESSAGES/swift.po +0 -965
  169. swift/locale/ko_KR/LC_MESSAGES/swift.po +0 -964
  170. swift/locale/pt_BR/LC_MESSAGES/swift.po +0 -881
  171. swift/locale/ru/LC_MESSAGES/swift.po +0 -891
  172. swift/locale/tr_TR/LC_MESSAGES/swift.po +0 -832
  173. swift/locale/zh_CN/LC_MESSAGES/swift.po +0 -833
  174. swift/locale/zh_TW/LC_MESSAGES/swift.po +0 -838
  175. swift-2.23.3.data/scripts/swift-account-auditor +0 -23
  176. swift-2.23.3.data/scripts/swift-account-info +0 -51
  177. swift-2.23.3.data/scripts/swift-account-reaper +0 -23
  178. swift-2.23.3.data/scripts/swift-account-replicator +0 -34
  179. swift-2.23.3.data/scripts/swift-account-server +0 -23
  180. swift-2.23.3.data/scripts/swift-container-auditor +0 -23
  181. swift-2.23.3.data/scripts/swift-container-info +0 -55
  182. swift-2.23.3.data/scripts/swift-container-reconciler +0 -21
  183. swift-2.23.3.data/scripts/swift-container-replicator +0 -34
  184. swift-2.23.3.data/scripts/swift-container-sharder +0 -37
  185. swift-2.23.3.data/scripts/swift-container-sync +0 -23
  186. swift-2.23.3.data/scripts/swift-container-updater +0 -23
  187. swift-2.23.3.data/scripts/swift-dispersion-report +0 -24
  188. swift-2.23.3.data/scripts/swift-form-signature +0 -20
  189. swift-2.23.3.data/scripts/swift-init +0 -119
  190. swift-2.23.3.data/scripts/swift-object-auditor +0 -29
  191. swift-2.23.3.data/scripts/swift-object-expirer +0 -33
  192. swift-2.23.3.data/scripts/swift-object-info +0 -60
  193. swift-2.23.3.data/scripts/swift-object-reconstructor +0 -33
  194. swift-2.23.3.data/scripts/swift-object-relinker +0 -41
  195. swift-2.23.3.data/scripts/swift-object-replicator +0 -37
  196. swift-2.23.3.data/scripts/swift-object-server +0 -27
  197. swift-2.23.3.data/scripts/swift-object-updater +0 -23
  198. swift-2.23.3.data/scripts/swift-proxy-server +0 -23
  199. swift-2.23.3.data/scripts/swift-recon +0 -24
  200. swift-2.23.3.data/scripts/swift-ring-builder +0 -24
  201. swift-2.23.3.data/scripts/swift-ring-builder-analyzer +0 -22
  202. swift-2.23.3.data/scripts/swift-ring-composer +0 -22
  203. swift-2.23.3.dist-info/RECORD +0 -220
  204. swift-2.23.3.dist-info/pbr.json +0 -1
  205. {swift-2.23.3.dist-info → swift-2.35.0.dist-info}/LICENSE +0 -0
  206. {swift-2.23.3.dist-info → swift-2.35.0.dist-info}/top_level.txt +0 -0
@@ -12,32 +12,38 @@
12
12
  # implied.
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
-
15
+ import collections
16
16
  import errno
17
17
  import json
18
+ import logging
19
+ import operator
20
+ from optparse import OptionParser
18
21
  import time
19
22
  from collections import defaultdict
23
+ from operator import itemgetter
20
24
  from random import random
21
25
 
22
26
  import os
23
- import six
24
- from six.moves.urllib.parse import quote
27
+ from urllib.parse import quote
25
28
  from eventlet import Timeout
29
+ from contextlib import contextmanager
26
30
 
27
31
  from swift.common import internal_client
28
- from swift.common.constraints import check_drive
32
+ from swift.common.constraints import check_drive, AUTO_CREATE_ACCOUNT_PREFIX
29
33
  from swift.common.direct_client import (direct_put_container,
30
34
  DirectClientException)
31
- from swift.common.exceptions import DeviceUnavailable
35
+ from swift.common.daemon import run_daemon
36
+ from swift.common.request_helpers import USE_REPLICATION_NETWORK_HEADER
32
37
  from swift.common.ring.utils import is_local_device
33
38
  from swift.common.swob import str_to_wsgi
34
39
  from swift.common.utils import get_logger, config_true_value, \
35
40
  dump_recon_cache, whataremyips, Timestamp, ShardRange, GreenAsyncPile, \
36
- config_float_value, config_positive_int_value, \
37
- quorum_size, parse_override_options, Everything, config_auto_int_value
41
+ config_positive_int_value, quorum_size, parse_override_options, \
42
+ Everything, config_auto_int_value, ShardRangeList, config_percent_value, \
43
+ node_to_string, parse_options
38
44
  from swift.container.backend import ContainerBroker, \
39
45
  RECORD_TYPE_SHARD, UNSHARDED, SHARDING, SHARDED, COLLAPSED, \
40
- SHARD_UPDATE_STATES
46
+ SHARD_UPDATE_STATES, sift_shard_ranges, SHARD_UPDATE_STAT_STATES
41
47
  from swift.container.replicator import ContainerReplicator
42
48
 
43
49
 
@@ -45,6 +51,8 @@ CLEAVE_SUCCESS = 0
45
51
  CLEAVE_FAILED = 1
46
52
  CLEAVE_EMPTY = 2
47
53
 
54
+ DEFAULT_PERIODIC_WARNINGS_INTERVAL = 24 * 3600
55
+
48
56
 
49
57
  def sharding_enabled(broker):
50
58
  # NB all shards will by default have been created with
@@ -56,7 +64,7 @@ def sharding_enabled(broker):
56
64
  # if broker has been marked deleted it will have lost sysmeta, but we still
57
65
  # need to process the broker (for example, to shrink any shard ranges) so
58
66
  # fallback to checking if it has any shard ranges
59
- if broker.get_shard_ranges():
67
+ if broker.has_other_shard_ranges():
60
68
  return True
61
69
  return False
62
70
 
@@ -76,61 +84,166 @@ def make_shard_ranges(broker, shard_data, shards_account_prefix):
76
84
  return shard_ranges
77
85
 
78
86
 
79
- def find_missing_ranges(shard_ranges):
87
+ def _find_discontinuity(paths, start):
88
+ # select the path that reaches furthest from start into the namespace
89
+ start_paths = [path for path in paths if path.lower == start]
90
+ start_paths.sort(key=lambda p: p.upper)
91
+ longest_start_path = start_paths[-1]
92
+ # search for paths that end further into the namespace (note: these must
93
+ # have a lower that differs from the start_path upper, otherwise they would
94
+ # be part of the start_path longer!)
95
+ end_paths = [path for path in paths
96
+ if path.upper > longest_start_path.upper]
97
+ if end_paths:
98
+ # select those that begin nearest the start of the namespace
99
+ end_paths.sort(key=lambda p: p.lower)
100
+ end_paths = [p for p in end_paths if p.lower == end_paths[0].lower]
101
+ # select the longest of those
102
+ end_paths.sort(key=lambda p: p.upper)
103
+ longest_end_path = end_paths[-1]
104
+ else:
105
+ longest_end_path = None
106
+ return longest_start_path, longest_end_path
107
+
108
+
109
+ def find_paths_with_gaps(shard_ranges, within_range=None):
80
110
  """
81
- Find any ranges in the entire object namespace that are not covered by any
82
- shard range in the given list.
111
+ Find gaps in the shard ranges and pairs of shard range paths that lead to
112
+ and from those gaps. For each gap a single pair of adjacent paths is
113
+ selected. The concatenation of all selected paths and gaps will span the
114
+ entire namespace with no overlaps.
115
+
116
+ :param shard_ranges: a list of instances of ShardRange.
117
+ :param within_range: an optional ShardRange that constrains the search
118
+ space; the method will only return gaps within this range. The default
119
+ is the entire namespace.
120
+ :return: A list of tuples of ``(start_path, gap_range, end_path)`` where
121
+ ``start_path`` is a list of ShardRanges leading to the gap,
122
+ ``gap_range`` is a ShardRange synthesized to describe the namespace
123
+ gap, and ``end_path`` is a list of ShardRanges leading from the gap.
124
+ When gaps start or end at the namespace minimum or maximum bounds,
125
+ ``start_path`` and ``end_path`` may be 'null' paths that contain a
126
+ single ShardRange covering either the minimum or maximum of the
127
+ namespace.
128
+ """
129
+ timestamp = Timestamp.now()
130
+ within_range = within_range or ShardRange('entire/namespace', timestamp)
131
+ shard_ranges = ShardRangeList(shard_ranges)
132
+ # note: find_paths results do not include shrinking ranges
133
+ paths = find_paths(shard_ranges)
134
+ # add paths covering no namespace at start and end of namespace to ensure
135
+ # that a start_path and end_path is always found even when there is a gap
136
+ # at the start or end of the namespace
137
+ null_start = ShardRange('null/start', timestamp,
138
+ lower=ShardRange.MIN,
139
+ upper=ShardRange.MIN,
140
+ state=ShardRange.FOUND)
141
+ null_end = ShardRange('null/end', timestamp,
142
+ lower=ShardRange.MAX,
143
+ upper=ShardRange.MAX,
144
+ state=ShardRange.FOUND)
145
+ paths.extend([ShardRangeList([null_start]), ShardRangeList([null_end])])
146
+ paths_with_gaps = []
147
+ start = null_start.lower
148
+ while True:
149
+ start_path, end_path = _find_discontinuity(paths, start)
150
+ if end_path is None:
151
+ # end of namespace reached
152
+ break
153
+ start = end_path.lower
154
+ if start_path.upper > end_path.lower:
155
+ # overlap
156
+ continue
157
+ gap_range = ShardRange('gap/index_%06d' % len(paths_with_gaps),
158
+ timestamp,
159
+ lower=start_path.upper,
160
+ upper=end_path.lower)
161
+ if gap_range.overlaps(within_range):
162
+ paths_with_gaps.append((start_path, gap_range, end_path))
163
+ return paths_with_gaps
83
164
 
84
- :param shard_ranges: A list of :class:`~swift.utils.ShardRange`
85
- :return: a list of missing ranges
165
+
166
+ def _is_parent_or_child(shard_range, other, time_period):
86
167
  """
87
- gaps = []
88
- if not shard_ranges:
89
- return ((ShardRange.MIN, ShardRange.MAX),)
90
- if shard_ranges[0].lower > ShardRange.MIN:
91
- gaps.append((ShardRange.MIN, shard_ranges[0].lower))
92
- for first, second in zip(shard_ranges, shard_ranges[1:]):
93
- if first.upper < second.lower:
94
- gaps.append((first.upper, second.lower))
95
- if shard_ranges[-1].upper < ShardRange.MAX:
96
- gaps.append((shard_ranges[-1].upper, ShardRange.MAX))
97
- return gaps
98
-
99
-
100
- def find_overlapping_ranges(shard_ranges):
168
+ Test if shard range ``shard_range`` is the parent or a child of another
169
+ shard range ``other`` within past time period ``time_period``. This method
170
+ is limited to work only within the scope of the same user-facing account
171
+ (with and without shard prefix).
172
+
173
+ :param shard_range: an instance of ``ShardRange``.
174
+ :param other: an instance of ``ShardRange``.
175
+ :param time_period: the specified past time period in seconds. Value of
176
+ 0 means all time in the past.
177
+ :return: True if ``shard_range`` is the parent or a child of ``other``
178
+ within past time period, False otherwise, assuming that they are within
179
+ the same account.
180
+ """
181
+ exclude_age = (time.time() - float(time_period)) if time_period > 0 else 0
182
+ if shard_range.is_child_of(other) and shard_range.timestamp >= exclude_age:
183
+ return True
184
+ if other.is_child_of(shard_range) and other.timestamp >= exclude_age:
185
+ return True
186
+ return False
187
+
188
+
189
+ def find_overlapping_ranges(
190
+ shard_ranges, exclude_parent_child=False, time_period=0):
101
191
  """
102
192
  Find all pairs of overlapping ranges in the given list.
103
193
 
104
194
  :param shard_ranges: A list of :class:`~swift.utils.ShardRange`
195
+ :param exclude_parent_child: If True then overlapping pairs that have a
196
+ parent-child relationship within the past time period
197
+ ``time_period`` are excluded from the returned set. Default is
198
+ False.
199
+ :param time_period: the specified past time period in seconds. Value of
200
+ 0 means all time in the past.
105
201
  :return: a set of tuples, each tuple containing ranges that overlap with
106
202
  each other.
107
203
  """
108
204
  result = set()
109
- for shard_range in shard_ranges:
110
- overlapping = [sr for sr in shard_ranges
111
- if shard_range != sr and shard_range.overlaps(sr)]
205
+ for i, shard_range in enumerate(shard_ranges):
206
+ if exclude_parent_child:
207
+ overlapping = [
208
+ sr for sr in shard_ranges[i + 1:]
209
+ if shard_range.name != sr.name and shard_range.overlaps(sr) and
210
+ not _is_parent_or_child(shard_range, sr, time_period)]
211
+ else:
212
+ overlapping = [
213
+ sr for sr in shard_ranges[i + 1:]
214
+ if shard_range.name != sr.name and shard_range.overlaps(sr)]
112
215
  if overlapping:
113
216
  overlapping.append(shard_range)
114
- overlapping.sort()
217
+ overlapping.sort(key=ShardRange.sort_key)
115
218
  result.add(tuple(overlapping))
116
219
 
117
220
  return result
118
221
 
119
222
 
120
223
  def is_sharding_candidate(shard_range, threshold):
224
+ # note: use *object* count as the condition for sharding: tombstones will
225
+ # eventually be reclaimed so should not trigger sharding
121
226
  return (shard_range.state == ShardRange.ACTIVE and
122
227
  shard_range.object_count >= threshold)
123
228
 
124
229
 
230
+ def is_shrinking_candidate(shard_range, shrink_threshold, expansion_limit,
231
+ states=None):
232
+ # typically shrink_threshold < expansion_limit but check both just in case
233
+ # note: use *row* count (objects plus tombstones) as the condition for
234
+ # shrinking to avoid inadvertently moving large numbers of tombstones into
235
+ # an acceptor
236
+ states = states or (ShardRange.ACTIVE,)
237
+ return (shard_range.state in states and
238
+ shard_range.row_count < shrink_threshold and
239
+ shard_range.row_count <= expansion_limit)
240
+
241
+
125
242
  def find_sharding_candidates(broker, threshold, shard_ranges=None):
126
243
  # this should only execute on root containers; the goal is to find
127
244
  # large shard containers that should be sharded.
128
245
  # First cut is simple: assume root container shard usage stats are good
129
246
  # enough to make decision.
130
- # TODO: object counts may well not be the appropriate metric for
131
- # deciding to shrink because a shard with low object_count may have a
132
- # large number of deleted object rows that will need to be merged with
133
- # a neighbour. We may need to expose row count as well as object count.
134
247
  if shard_ranges is None:
135
248
  shard_ranges = broker.get_shard_ranges(states=[ShardRange.ACTIVE])
136
249
  candidates = []
@@ -144,63 +257,376 @@ def find_sharding_candidates(broker, threshold, shard_ranges=None):
144
257
  return candidates
145
258
 
146
259
 
147
- def find_shrinking_candidates(broker, shrink_threshold, merge_size):
260
+ def find_shrinking_candidates(broker, shrink_threshold, expansion_limit):
261
+ # this is only here to preserve a legacy public function signature;
262
+ # superseded by find_compactible_shard_sequences
263
+ merge_pairs = {}
264
+ # restrict search to sequences with one donor
265
+ results = find_compactible_shard_sequences(broker, shrink_threshold,
266
+ expansion_limit, 1, -1,
267
+ include_shrinking=True)
268
+ for sequence in results:
269
+ # map acceptor -> donor list
270
+ merge_pairs[sequence[-1]] = sequence[-2]
271
+ return merge_pairs
272
+
273
+
274
+ def find_compactible_shard_sequences(broker,
275
+ shrink_threshold,
276
+ expansion_limit,
277
+ max_shrinking,
278
+ max_expanding,
279
+ include_shrinking=False):
280
+ """
281
+ Find sequences of shard ranges that could be compacted into a single
282
+ acceptor shard range.
283
+
284
+ This function does not modify shard ranges.
285
+
286
+ :param broker: A :class:`~swift.container.backend.ContainerBroker`.
287
+ :param shrink_threshold: the number of rows below which a shard may be
288
+ considered for shrinking into another shard
289
+ :param expansion_limit: the maximum number of rows that an acceptor shard
290
+ range should have after other shard ranges have been compacted into it
291
+ :param max_shrinking: the maximum number of shard ranges that should be
292
+ compacted into each acceptor; -1 implies unlimited.
293
+ :param max_expanding: the maximum number of acceptors to be found (i.e. the
294
+ maximum number of sequences to be returned); -1 implies unlimited.
295
+ :param include_shrinking: if True then existing compactible sequences are
296
+ included in the results; default is False.
297
+ :returns: A list of :class:`~swift.common.utils.ShardRangeList` each
298
+ containing a sequence of neighbouring shard ranges that may be
299
+ compacted; the final shard range in the list is the acceptor
300
+ """
148
301
  # this should only execute on root containers that have sharded; the
149
302
  # goal is to find small shard containers that could be retired by
150
303
  # merging with a neighbour.
151
304
  # First cut is simple: assume root container shard usage stats are good
152
305
  # enough to make decision; only merge with upper neighbour so that
153
306
  # upper bounds never change (shard names include upper bound).
154
- # TODO: object counts may well not be the appropriate metric for
155
- # deciding to shrink because a shard with low object_count may have a
156
- # large number of deleted object rows that will need to be merged with
157
- # a neighbour. We may need to expose row count as well as object count.
158
307
  shard_ranges = broker.get_shard_ranges()
159
308
  own_shard_range = broker.get_own_shard_range()
160
- if len(shard_ranges) == 1:
161
- # special case to enable final shard to shrink into root
162
- shard_ranges.append(own_shard_range)
163
309
 
164
- merge_pairs = {}
165
- for donor, acceptor in zip(shard_ranges, shard_ranges[1:]):
166
- if donor in merge_pairs:
167
- # this range may already have been made an acceptor; if so then
168
- # move on. In principle it might be that even after expansion
169
- # this range and its donor(s) could all be merged with the next
170
- # range. In practice it is much easier to reason about a single
171
- # donor merging into a single acceptor. Don't fret - eventually
172
- # all the small ranges will be retired.
310
+ def sequence_complete(sequence):
311
+ # a sequence is considered complete if any of the following are true:
312
+ # - the final shard range has more objects than the shrink_threshold,
313
+ # so should not be shrunk (this shard will be the acceptor)
314
+ # - the max number of shard ranges to be compacted (max_shrinking) has
315
+ # been reached
316
+ # - the total number of objects in the sequence has reached the
317
+ # expansion_limit
318
+ if (sequence and
319
+ (not is_shrinking_candidate(
320
+ sequence[-1], shrink_threshold, expansion_limit,
321
+ states=(ShardRange.ACTIVE, ShardRange.SHRINKING)) or
322
+ 0 < max_shrinking < len(sequence) or
323
+ sequence.row_count >= expansion_limit)):
324
+ return True
325
+ return False
326
+
327
+ compactible_sequences = []
328
+ index = 0
329
+ expanding = 0
330
+ while ((max_expanding < 0 or expanding < max_expanding) and
331
+ index < len(shard_ranges)):
332
+ if not is_shrinking_candidate(
333
+ shard_ranges[index], shrink_threshold, expansion_limit,
334
+ states=(ShardRange.ACTIVE, ShardRange.SHRINKING)):
335
+ # this shard range cannot be the start of a new or existing
336
+ # compactible sequence, move on
337
+ index += 1
338
+ continue
339
+
340
+ # start of a *possible* sequence
341
+ sequence = ShardRangeList([shard_ranges[index]])
342
+ for shard_range in shard_ranges[index + 1:]:
343
+ # attempt to add contiguous shard ranges to the sequence
344
+ if sequence.upper < shard_range.lower:
345
+ # found a gap! break before consuming this range because it
346
+ # could become the first in the next sequence
347
+ break
348
+
349
+ if shard_range.state not in (ShardRange.ACTIVE,
350
+ ShardRange.SHRINKING):
351
+ # found? created? sharded? don't touch it
352
+ break
353
+
354
+ if shard_range.state == ShardRange.SHRINKING:
355
+ # already shrinking: add to sequence unconditionally
356
+ sequence.append(shard_range)
357
+ elif (sequence.row_count + shard_range.row_count
358
+ <= expansion_limit):
359
+ # add to sequence: could be a donor or acceptor
360
+ sequence.append(shard_range)
361
+ if sequence_complete(sequence):
362
+ break
363
+ else:
364
+ break
365
+
366
+ index += len(sequence)
367
+ if (index == len(shard_ranges) and
368
+ len(shard_ranges) == len(sequence) and
369
+ not sequence_complete(sequence) and
370
+ sequence.includes(own_shard_range)):
371
+ # special case: only one sequence has been found, which consumes
372
+ # all shard ranges, encompasses the entire namespace, has no more
373
+ # than expansion_limit records and whose shard ranges are all
374
+ # shrinkable; all the shards in the sequence can be shrunk to the
375
+ # root, so append own_shard_range to the sequence to act as an
376
+ # acceptor; note: only shrink to the root when *all* the remaining
377
+ # shard ranges can be simultaneously shrunk to the root.
378
+ sequence.append(own_shard_range)
379
+
380
+ if len(sequence) < 2 or sequence[-1].state not in (ShardRange.ACTIVE,
381
+ ShardRange.SHARDED):
382
+ # this sequence doesn't end with a suitable acceptor shard range
173
383
  continue
174
- if (acceptor.name != own_shard_range.name and
175
- acceptor.state != ShardRange.ACTIVE):
176
- # don't shrink into a range that is not yet ACTIVE
384
+
385
+ # all valid sequences are counted against the max_expanding allowance
386
+ # even if the sequence is already shrinking
387
+ expanding += 1
388
+ if (all([sr.state != ShardRange.SHRINKING for sr in sequence]) or
389
+ include_shrinking):
390
+ compactible_sequences.append(sequence)
391
+
392
+ return compactible_sequences
393
+
394
+
395
+ def finalize_shrinking(broker, acceptor_ranges, donor_ranges, timestamp):
396
+ """
397
+ Update donor shard ranges to shrinking state and merge donors and acceptors
398
+ to broker.
399
+
400
+ :param broker: A :class:`~swift.container.backend.ContainerBroker`.
401
+ :param acceptor_ranges: A list of :class:`~swift.common.utils.ShardRange`
402
+ that are to be acceptors.
403
+ :param donor_ranges: A list of :class:`~swift.common.utils.ShardRange`
404
+ that are to be donors; these will have their state and timestamp
405
+ updated.
406
+ :param timestamp: timestamp to use when updating donor state
407
+ """
408
+ for donor in donor_ranges:
409
+ if donor.update_state(ShardRange.SHRINKING):
410
+ # Set donor state to shrinking state_timestamp defines new epoch
411
+ donor.epoch = donor.state_timestamp = timestamp
412
+ broker.merge_shard_ranges(acceptor_ranges + donor_ranges)
413
+
414
+
415
+ def process_compactible_shard_sequences(broker, sequences):
416
+ """
417
+ Transform the given sequences of shard ranges into a list of acceptors and
418
+ a list of shrinking donors. For each given sequence the final ShardRange in
419
+ the sequence (the acceptor) is expanded to accommodate the other
420
+ ShardRanges in the sequence (the donors). The donors and acceptors are then
421
+ merged into the broker.
422
+
423
+ :param broker: A :class:`~swift.container.backend.ContainerBroker`.
424
+ :param sequences: A list of :class:`~swift.common.utils.ShardRangeList`
425
+ """
426
+ timestamp = Timestamp.now()
427
+ acceptor_ranges = []
428
+ shrinking_ranges = []
429
+ for sequence in sequences:
430
+ donors = sequence[:-1]
431
+ shrinking_ranges.extend(donors)
432
+ # Update the acceptor container with its expanded bounds to prevent it
433
+ # treating objects cleaved from the donor as misplaced.
434
+ acceptor = sequence[-1]
435
+ if acceptor.expand(donors):
436
+ # Update the acceptor container with its expanded bounds to prevent
437
+ # it treating objects cleaved from the donor as misplaced.
438
+ acceptor.timestamp = timestamp
439
+ if acceptor.update_state(ShardRange.ACTIVE):
440
+ # Ensure acceptor state is ACTIVE (when acceptor is root)
441
+ acceptor.state_timestamp = timestamp
442
+ acceptor_ranges.append(acceptor)
443
+ finalize_shrinking(broker, acceptor_ranges, shrinking_ranges, timestamp)
444
+
445
+
446
+ def find_paths(shard_ranges):
447
+ """
448
+ Returns a list of all continuous paths through the shard ranges. An
449
+ individual path may not necessarily span the entire namespace, but it will
450
+ span a continuous namespace without gaps.
451
+
452
+ :param shard_ranges: A list of :class:`~swift.common.utils.ShardRange`.
453
+ :return: A list of :class:`~swift.common.utils.ShardRangeList`.
454
+ """
455
+ # A node is a point in the namespace that is used as a bound of any shard
456
+ # range. Shard ranges form the edges between nodes.
457
+
458
+ # First build a dict mapping nodes to a list of edges that leave that node
459
+ # (in other words, shard ranges whose lower bound equals the node)
460
+ node_successors = collections.defaultdict(list)
461
+ for shard_range in shard_ranges:
462
+ if shard_range.state == ShardRange.SHRINKING:
463
+ # shrinking shards are not a viable edge in any path
177
464
  continue
178
- if donor.state not in (ShardRange.ACTIVE, ShardRange.SHRINKING):
179
- # found? created? sharded? don't touch it
465
+ node_successors[shard_range.lower].append(shard_range)
466
+
467
+ paths = []
468
+
469
+ def clone_path(other=None):
470
+ # create a new path, possibly cloning another path, and add it to the
471
+ # list of all paths through the shards
472
+ path = ShardRangeList() if other is None else ShardRangeList(other)
473
+ paths.append(path)
474
+ return path
475
+
476
+ # we need to keep track of every path that ends at each node so that when
477
+ # we visit the node we can extend those paths, or clones of them, with the
478
+ # edges that leave the node
479
+ paths_to_node = collections.defaultdict(list)
480
+
481
+ # visit the nodes in ascending order by name...
482
+ for node, edges in sorted(node_successors.items()):
483
+ if not edges:
484
+ # this node is a dead-end, so there's no path updates to make
180
485
  continue
486
+ if not paths_to_node[node]:
487
+ # this is either the first node to be visited, or it has no paths
488
+ # leading to it, so we need to start a new path here
489
+ paths_to_node[node].append(clone_path([]))
490
+ for path_to_node in paths_to_node[node]:
491
+ # extend each path that arrives at this node with all of the
492
+ # possible edges that leave the node; if more than edge leaves the
493
+ # node then we will make clones of the path to the node and extend
494
+ # those clones, adding to the collection of all paths though the
495
+ # shards
496
+ for i, edge in enumerate(edges):
497
+ if i == len(edges) - 1:
498
+ # the last edge is used to extend the original path to the
499
+ # node; there is nothing special about the last edge, but
500
+ # doing this last means the original path to the node can
501
+ # be cloned for all other edges before being modified here
502
+ path = path_to_node
503
+ else:
504
+ # for all but one of the edges leaving the node we need to
505
+ # make a clone the original path
506
+ path = clone_path(path_to_node)
507
+ # extend the path with the edge
508
+ path.append(edge)
509
+ # keep track of which node this path now arrives at
510
+ paths_to_node[edge.upper].append(path)
511
+ return paths
181
512
 
182
- proposed_object_count = donor.object_count + acceptor.object_count
183
- if (donor.state == ShardRange.SHRINKING or
184
- (donor.object_count < shrink_threshold and
185
- proposed_object_count < merge_size)):
186
- # include previously identified merge pairs on presumption that
187
- # following shrink procedure is idempotent
188
- merge_pairs[acceptor] = donor
189
- if donor.update_state(ShardRange.SHRINKING):
190
- # Set donor state to shrinking so that next cycle won't use
191
- # it as an acceptor; state_timestamp defines new epoch for
192
- # donor and new timestamp for the expanded acceptor below.
193
- donor.epoch = donor.state_timestamp = Timestamp.now()
194
- if acceptor.lower != donor.lower:
195
- # Update the acceptor container with its expanding state to
196
- # prevent it treating objects cleaved from the donor
197
- # as misplaced.
198
- acceptor.lower = donor.lower
199
- acceptor.timestamp = donor.state_timestamp
200
- return merge_pairs
513
+
514
+ def rank_paths(paths, shard_range_to_span):
515
+ """
516
+ Sorts the given list of paths such that the most preferred path is the
517
+ first item in the list.
518
+
519
+ :param paths: A list of :class:`~swift.common.utils.ShardRangeList`.
520
+ :param shard_range_to_span: An instance of
521
+ :class:`~swift.common.utils.ShardRange` that describes the namespace
522
+ that would ideally be spanned by a path. Paths that include this
523
+ namespace will be preferred over those that do not.
524
+ :return: A sorted list of :class:`~swift.common.utils.ShardRangeList`.
525
+ """
526
+ def sort_key(path):
527
+ # defines the order of preference for paths through shards
528
+ return (
529
+ # complete path for the namespace
530
+ path.includes(shard_range_to_span),
531
+ # most cleaving progress
532
+ path.find_lower(lambda sr: sr.state not in (
533
+ ShardRange.CLEAVED, ShardRange.ACTIVE)),
534
+ # largest object count
535
+ path.object_count,
536
+ # fewest timestamps
537
+ -1 * len(path.timestamps),
538
+ # newest timestamp
539
+ sorted(path.timestamps)[-1]
540
+ )
541
+
542
+ paths.sort(key=sort_key, reverse=True)
543
+ return paths
544
+
545
+
546
+ def combine_shard_ranges(new_shard_ranges, existing_shard_ranges):
547
+ """
548
+ Combines new and existing shard ranges based on most recent state.
549
+
550
+ :param new_shard_ranges: a list of ShardRange instances.
551
+ :param existing_shard_ranges: a list of ShardRange instances.
552
+ :return: a list of ShardRange instances.
553
+ """
554
+ new_shard_ranges = [dict(sr) for sr in new_shard_ranges]
555
+ existing_shard_ranges = [dict(sr) for sr in existing_shard_ranges]
556
+ to_add, to_delete = sift_shard_ranges(
557
+ new_shard_ranges,
558
+ dict((sr['name'], sr) for sr in existing_shard_ranges))
559
+ result = [ShardRange.from_dict(existing)
560
+ for existing in existing_shard_ranges
561
+ if existing['name'] not in to_delete]
562
+ result.extend([ShardRange.from_dict(sr) for sr in to_add])
563
+ return sorted([sr for sr in result if not sr.deleted],
564
+ key=ShardRange.sort_key)
565
+
566
+
567
+ def update_own_shard_range_stats(broker, own_shard_range):
568
+ """
569
+ Update the ``own_shard_range`` with the up-to-date object stats from
570
+ the ``broker``.
571
+
572
+ Note: this method does not persist the updated ``own_shard_range``;
573
+ callers should use ``broker.merge_shard_ranges`` if the updated stats
574
+ need to be persisted.
575
+
576
+ :param broker: an instance of ``ContainerBroker``.
577
+ :param own_shard_range: and instance of ``ShardRange``.
578
+ :returns: ``own_shard_range`` with up-to-date ``object_count``
579
+ and ``bytes_used``.
580
+ """
581
+ info = broker.get_info()
582
+ own_shard_range.update_meta(
583
+ info['object_count'], info['bytes_used'])
584
+ return own_shard_range
201
585
 
202
586
 
203
587
  class CleavingContext(object):
588
+ """
589
+ Encapsulates metadata associated with the process of cleaving a retiring
590
+ DB. This metadata includes:
591
+
592
+ * ``ref``: The unique part of the key that is used when persisting a
593
+ serialized ``CleavingContext`` as sysmeta in the DB. The unique part of
594
+ the key is based off the DB id. This ensures that each context is
595
+ associated with a specific DB file. The unique part of the key is
596
+ included in the ``CleavingContext`` but should not be modified by any
597
+ caller.
598
+
599
+ * ``cursor``: the upper bound of the last shard range to have been
600
+ cleaved from the retiring DB.
601
+
602
+ * ``max_row``: the retiring DB's max row; this is updated to the value of
603
+ the retiring DB's ``max_row`` every time a ``CleavingContext`` is
604
+ loaded for that DB, and may change during the process of cleaving the
605
+ DB.
606
+
607
+ * ``cleave_to_row``: the value of ``max_row`` at the moment when cleaving
608
+ starts for the DB. When cleaving completes (i.e. the cleave cursor has
609
+ reached the upper bound of the cleaving namespace), ``cleave_to_row``
610
+ is compared to the current ``max_row``: if the two values are not equal
611
+ then rows have been added to the DB which may not have been cleaved, in
612
+ which case the ``CleavingContext`` is ``reset`` and cleaving is
613
+ re-started.
614
+
615
+ * ``last_cleave_to_row``: the minimum DB row from which cleaving should
616
+ select objects to cleave; this is initially set to None i.e. all rows
617
+ should be cleaved. If the ``CleavingContext`` is ``reset`` then the
618
+ ``last_cleave_to_row`` is set to the current value of
619
+ ``cleave_to_row``, which in turn is set to the current value of
620
+ ``max_row`` by a subsequent call to ``start``. The repeated cleaving
621
+ therefore only selects objects in rows greater than the
622
+ ``last_cleave_to_row``, rather than cleaving the whole DB again.
623
+
624
+ * ``ranges_done``: the number of shard ranges that have been cleaved from
625
+ the retiring DB.
626
+
627
+ * ``ranges_todo``: the number of shard ranges that are yet to be
628
+ cleaved from the retiring DB.
629
+ """
204
630
  def __init__(self, ref, cursor='', max_row=None, cleave_to_row=None,
205
631
  last_cleave_to_row=None, cleaving_done=False,
206
632
  misplaced_done=False, ranges_done=0, ranges_todo=0):
@@ -230,18 +656,13 @@ class CleavingContext(object):
230
656
  return '%s(%s)' % (self.__class__.__name__, ', '.join(
231
657
  '%s=%r' % prop for prop in self))
232
658
 
233
- def _encode(cls, value):
234
- if value is not None and six.PY2 and isinstance(value, six.text_type):
235
- return value.encode('utf-8')
236
- return value
237
-
238
659
  @property
239
660
  def cursor(self):
240
661
  return self._cursor
241
662
 
242
663
  @cursor.setter
243
664
  def cursor(self, value):
244
- self._cursor = self._encode(value)
665
+ self._cursor = value
245
666
 
246
667
  @property
247
668
  def marker(self):
@@ -254,37 +675,33 @@ class CleavingContext(object):
254
675
  @classmethod
255
676
  def load_all(cls, broker):
256
677
  """
257
- Returns all cleaving contexts stored in the broker.
678
+ Returns all cleaving contexts stored in the broker's DB.
258
679
 
259
- :param broker:
680
+ :param broker: an instance of :class:`ContainerBroker`
260
681
  :return: list of tuples of (CleavingContext, timestamp)
261
682
  """
262
683
  brokers = broker.get_brokers()
263
684
  sysmeta = brokers[-1].get_sharding_sysmeta_with_timestamps()
264
685
 
686
+ contexts = []
265
687
  for key, (val, timestamp) in sysmeta.items():
266
- # If the value is of length 0, then the metadata is
688
+ # If the value is blank, then the metadata is
267
689
  # marked for deletion
268
- if key.startswith("Context-") and len(val) > 0:
690
+ if key.startswith("Context-") and val:
269
691
  try:
270
- yield cls(**json.loads(val)), timestamp
692
+ contexts.append((cls(**json.loads(val)), timestamp))
271
693
  except ValueError:
272
694
  continue
695
+ return contexts
273
696
 
274
697
  @classmethod
275
698
  def load(cls, broker):
276
699
  """
277
- Returns a context dict for tracking the progress of cleaving this
278
- broker's retiring DB. The context is persisted in sysmeta using a key
279
- that is based off the retiring db id and max row. This form of
280
- key ensures that a cleaving context is only loaded for a db that
281
- matches the id and max row when the context was created; if a db is
282
- modified such that its max row changes then a different context, or no
283
- context, will be loaded.
284
-
285
- :return: A dict to which cleave progress metadata may be added. The
286
- dict initially has a key ``ref`` which should not be modified by
287
- any caller.
700
+ Returns a CleavingContext tracking the cleaving progress of the given
701
+ broker's DB.
702
+
703
+ :param broker: an instances of :class:`ContainerBroker`
704
+ :return: An instance of :class:`CleavingContext`.
288
705
  """
289
706
  brokers = broker.get_brokers()
290
707
  ref = cls._make_ref(brokers[0])
@@ -295,6 +712,12 @@ class CleavingContext(object):
295
712
  return cls(**data)
296
713
 
297
714
  def store(self, broker):
715
+ """
716
+ Persists the serialized ``CleavingContext`` as sysmeta in the given
717
+ broker's DB.
718
+
719
+ :param broker: an instances of :class:`ContainerBroker`
720
+ """
298
721
  broker.set_sharding_sysmeta('Context-' + self.ref,
299
722
  json.dumps(dict(self)))
300
723
 
@@ -316,8 +739,7 @@ class CleavingContext(object):
316
739
  def range_done(self, new_cursor):
317
740
  self.ranges_done += 1
318
741
  self.ranges_todo -= 1
319
- if new_cursor is not None:
320
- self.cursor = new_cursor
742
+ self.cursor = new_cursor
321
743
 
322
744
  def done(self):
323
745
  return all((self.misplaced_done, self.cleaving_done,
@@ -329,51 +751,108 @@ class CleavingContext(object):
329
751
  broker.set_sharding_sysmeta('Context-' + self.ref, '')
330
752
 
331
753
 
332
- DEFAULT_SHARD_CONTAINER_THRESHOLD = 1000000
333
- DEFAULT_SHARD_SHRINK_POINT = 25
334
- DEFAULT_SHARD_MERGE_POINT = 75
754
+ class ContainerSharderConf(object):
755
+ def __init__(self, conf=None):
756
+ conf = conf if conf else {}
335
757
 
758
+ def get_val(key, validator, default):
759
+ """
760
+ Get a value from conf and validate it.
761
+
762
+ :param key: key to lookup value in the ``conf`` dict.
763
+ :param validator: A function that will passed the value from the
764
+ ``conf`` dict and should return the value to be set. This
765
+ function should raise a ValueError if the ``conf`` value if not
766
+ valid.
767
+ :param default: value to use if ``key`` is not found in ``conf``.
768
+ :raises: ValueError if the value read from ``conf`` is invalid.
769
+ :returns: the configuration value.
770
+ """
771
+ try:
772
+ return validator(conf.get(key, default))
773
+ except ValueError as err:
774
+ raise ValueError('Error setting %s: %s' % (key, err))
775
+
776
+ self.shard_container_threshold = get_val(
777
+ 'shard_container_threshold', config_positive_int_value, 1000000)
778
+ self.max_shrinking = get_val(
779
+ 'max_shrinking', int, 1)
780
+ self.max_expanding = get_val(
781
+ 'max_expanding', int, -1)
782
+ self.shard_scanner_batch_size = get_val(
783
+ 'shard_scanner_batch_size', config_positive_int_value, 10)
784
+ self.cleave_batch_size = get_val(
785
+ 'cleave_batch_size', config_positive_int_value, 2)
786
+ self.cleave_row_batch_size = get_val(
787
+ 'cleave_row_batch_size', config_positive_int_value, 10000)
788
+ self.broker_timeout = get_val(
789
+ 'broker_timeout', config_positive_int_value, 60)
790
+ self.recon_candidates_limit = get_val(
791
+ 'recon_candidates_limit', int, 5)
792
+ self.recon_sharded_timeout = get_val(
793
+ 'recon_sharded_timeout', int, 43200)
794
+ self.container_sharding_timeout = get_val(
795
+ 'container_sharding_timeout', int, 172800)
796
+ self.conn_timeout = get_val(
797
+ 'conn_timeout', float, 5)
798
+ self.auto_shard = get_val(
799
+ 'auto_shard', config_true_value, False)
800
+ # deprecated percent options still loaded...
801
+ self.shrink_threshold = get_val(
802
+ 'shard_shrink_point', self.percent_of_threshold, 10)
803
+ self.expansion_limit = get_val(
804
+ 'shard_shrink_merge_point', self.percent_of_threshold, 75)
805
+ # ...but superseded by absolute options if present in conf
806
+ self.shrink_threshold = get_val(
807
+ 'shrink_threshold', int, self.shrink_threshold)
808
+ self.expansion_limit = get_val(
809
+ 'expansion_limit', int, self.expansion_limit)
810
+ self.rows_per_shard = get_val(
811
+ 'rows_per_shard', config_positive_int_value,
812
+ max(self.shard_container_threshold // 2, 1))
813
+ self.minimum_shard_size = get_val(
814
+ 'minimum_shard_size', config_positive_int_value,
815
+ max(self.rows_per_shard // 5, 1))
816
+
817
+ def percent_of_threshold(self, val):
818
+ return int(config_percent_value(val) * self.shard_container_threshold)
336
819
 
337
- class ContainerSharder(ContainerReplicator):
820
+ @classmethod
821
+ def validate_conf(cls, namespace):
822
+ ops = {'<': operator.lt,
823
+ '<=': operator.le}
824
+ checks = (('minimum_shard_size', '<=', 'rows_per_shard'),
825
+ ('shrink_threshold', '<=', 'minimum_shard_size'),
826
+ ('rows_per_shard', '<', 'shard_container_threshold'),
827
+ ('expansion_limit', '<', 'shard_container_threshold'))
828
+ for key1, op, key2 in checks:
829
+ try:
830
+ val1 = getattr(namespace, key1)
831
+ val2 = getattr(namespace, key2)
832
+ except AttributeError:
833
+ # swift-manage-shard-ranges uses a subset of conf options for
834
+ # each command so only validate those actually in the namespace
835
+ continue
836
+ if not ops[op](val1, val2):
837
+ raise ValueError('%s (%d) must be %s %s (%d)'
838
+ % (key1, val1, op, key2, val2))
839
+
840
+
841
+ DEFAULT_SHARDER_CONF = vars(ContainerSharderConf())
842
+
843
+
844
+ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
338
845
  """Shards containers."""
846
+ log_route = 'container-sharder'
339
847
 
340
848
  def __init__(self, conf, logger=None):
341
- logger = logger or get_logger(conf, log_route='container-sharder')
342
- super(ContainerSharder, self).__init__(conf, logger=logger)
343
- self.shards_account_prefix = (
344
- (conf.get('auto_create_account_prefix') or '.') + 'shards_')
345
-
346
- def percent_value(key, default):
347
- try:
348
- value = conf.get(key, default)
349
- return config_float_value(value, 0, 100) / 100.0
350
- except ValueError as err:
351
- raise ValueError("%s: %s" % (str(err), key))
352
-
353
- self.shard_shrink_point = percent_value('shard_shrink_point',
354
- DEFAULT_SHARD_SHRINK_POINT)
355
- self.shrink_merge_point = percent_value('shard_shrink_merge_point',
356
- DEFAULT_SHARD_MERGE_POINT)
357
- self.shard_container_threshold = config_positive_int_value(
358
- conf.get('shard_container_threshold',
359
- DEFAULT_SHARD_CONTAINER_THRESHOLD))
360
- self.shrink_size = (self.shard_container_threshold *
361
- self.shard_shrink_point)
362
- self.merge_size = (self.shard_container_threshold *
363
- self.shrink_merge_point)
364
- self.split_size = self.shard_container_threshold // 2
365
- self.scanner_batch_size = config_positive_int_value(
366
- conf.get('shard_scanner_batch_size', 10))
367
- self.cleave_batch_size = config_positive_int_value(
368
- conf.get('cleave_batch_size', 2))
369
- self.cleave_row_batch_size = config_positive_int_value(
370
- conf.get('cleave_row_batch_size', 10000))
371
- self.auto_shard = config_true_value(conf.get('auto_shard', False))
849
+ logger = logger or get_logger(conf, log_route=self.log_route)
850
+ ContainerReplicator.__init__(self, conf, logger=logger)
851
+ ContainerSharderConf.__init__(self, conf)
852
+ ContainerSharderConf.validate_conf(self)
853
+ self.shards_account_prefix = (AUTO_CREATE_ACCOUNT_PREFIX + 'shards_')
372
854
  self.sharding_candidates = []
373
- self.recon_candidates_limit = int(
374
- conf.get('recon_candidates_limit', 5))
375
- self.broker_timeout = config_positive_int_value(
376
- conf.get('broker_timeout', 60))
855
+ self.shrinking_candidates = []
377
856
  replica_count = self.ring.replica_count
378
857
  quorum = quorum_size(replica_count)
379
858
  self.shard_replication_quorum = config_auto_int_value(
@@ -395,7 +874,6 @@ class ContainerSharder(ContainerReplicator):
395
874
  self.existing_shard_replication_quorum = replica_count
396
875
 
397
876
  # internal client
398
- self.conn_timeout = float(conf.get('conn_timeout', 5))
399
877
  request_tries = config_positive_int_value(
400
878
  conf.get('request_tries', 3))
401
879
  internal_client_conf_path = conf.get('internal_client_conf_path',
@@ -405,7 +883,9 @@ class ContainerSharder(ContainerReplicator):
405
883
  internal_client_conf_path,
406
884
  'Swift Container Sharder',
407
885
  request_tries,
408
- allow_modify_pipeline=False)
886
+ use_replication_network=True,
887
+ global_conf={'log_name': '%s-ic' % conf.get(
888
+ 'log_name', self.log_route)})
409
889
  except (OSError, IOError) as err:
410
890
  if err.errno != errno.ENOENT and \
411
891
  not str(err).endswith(' not found'):
@@ -413,7 +893,67 @@ class ContainerSharder(ContainerReplicator):
413
893
  raise SystemExit(
414
894
  'Unable to load internal client from config: %r (%s)' %
415
895
  (internal_client_conf_path, err))
896
+ self.stats_interval = float(conf.get('stats_interval', '3600'))
416
897
  self.reported = 0
898
+ self.periodic_warnings_interval = float(
899
+ conf.get('periodic_warnings_interval',
900
+ DEFAULT_PERIODIC_WARNINGS_INTERVAL))
901
+ self.periodic_warnings_start = time.time()
902
+ self.periodic_warnings = set()
903
+
904
+ def _get_broker_details(self, broker):
905
+ try:
906
+ db_file = broker.db_file
907
+ except Exception: # noqa
908
+ db_file = ''
909
+ try:
910
+ path = broker.path
911
+ except Exception: # noqa
912
+ path = ''
913
+ return db_file, path
914
+
915
+ def _format_log_msg(self, broker, msg, *args):
916
+ # make best effort to include broker properties...
917
+ db_file, path = self._get_broker_details(broker)
918
+ if args:
919
+ msg = msg % args
920
+ return '%s, path: %s, db: %s' % (msg, quote(path), db_file)
921
+
922
+ def _log(self, level, broker, msg, *args):
923
+ if not self.logger.isEnabledFor(level):
924
+ return
925
+
926
+ self.logger.log(level, self._format_log_msg(broker, msg, *args))
927
+
928
+ def debug(self, broker, msg, *args, **kwargs):
929
+ self._log(logging.DEBUG, broker, msg, *args, **kwargs)
930
+
931
+ def info(self, broker, msg, *args, **kwargs):
932
+ self._log(logging.INFO, broker, msg, *args, **kwargs)
933
+
934
+ def warning(self, broker, msg, *args, **kwargs):
935
+ self._log(logging.WARNING, broker, msg, *args, **kwargs)
936
+
937
+ def periodic_warning(self, broker, msg, *args, **kwargs):
938
+ now = time.time()
939
+ if now - self.periodic_warnings_start >= \
940
+ self.periodic_warnings_interval:
941
+ self.periodic_warnings.clear()
942
+ self.periodic_warnings_start = now
943
+
944
+ db_file, path = self._get_broker_details(broker)
945
+ key = (db_file, msg)
946
+ if key not in self.periodic_warnings:
947
+ self.periodic_warnings.add(key)
948
+ self._log(logging.WARNING, broker, msg, *args, **kwargs)
949
+
950
+ def error(self, broker, msg, *args, **kwargs):
951
+ self._log(logging.ERROR, broker, msg, *args, **kwargs)
952
+
953
+ def exception(self, broker, msg, *args, **kwargs):
954
+ if not self.logger.isEnabledFor(logging.ERROR):
955
+ return
956
+ self.logger.exception(self._format_log_msg(broker, msg, *args))
417
957
 
418
958
  def _zero_stats(self):
419
959
  """Zero out the stats."""
@@ -422,6 +962,7 @@ class ContainerSharder(ContainerReplicator):
422
962
  # stats are maintained under the 'sharding' key in self.stats
423
963
  self.stats['sharding'] = defaultdict(lambda: defaultdict(int))
424
964
  self.sharding_candidates = []
965
+ self.shrinking_candidates = []
425
966
 
426
967
  def _append_stat(self, category, key, value):
427
968
  if not self.stats['sharding'][category][key]:
@@ -442,11 +983,15 @@ class ContainerSharder(ContainerReplicator):
442
983
  else:
443
984
  self.stats['sharding'][category][key] = max(current, value)
444
985
 
445
- def _increment_stat(self, category, key, step=1, statsd=False):
446
- self.stats['sharding'][category][key] += step
447
- if statsd:
448
- statsd_key = '%s_%s' % (category, key)
449
- self.logger.increment(statsd_key)
986
+ def _increment_stat(self, category, key, statsd=False):
987
+ self._update_stat(category, key, step=1, statsd=statsd)
988
+
989
+ def _update_stat(self, category, key, step=1, statsd=False):
990
+ if step:
991
+ self.stats['sharding'][category][key] += step
992
+ if statsd:
993
+ statsd_key = '%s_%s' % (category, key)
994
+ self.logger.update_stats(statsd_key, step)
450
995
 
451
996
  def _make_stats_info(self, broker, node, own_shard_range):
452
997
  try:
@@ -465,40 +1010,90 @@ class ContainerSharder(ContainerReplicator):
465
1010
 
466
1011
  def _identify_sharding_candidate(self, broker, node):
467
1012
  own_shard_range = broker.get_own_shard_range()
1013
+ update_own_shard_range_stats(broker, own_shard_range)
468
1014
  if is_sharding_candidate(
469
1015
  own_shard_range, self.shard_container_threshold):
470
1016
  self.sharding_candidates.append(
471
1017
  self._make_stats_info(broker, node, own_shard_range))
472
1018
 
473
- def _transform_sharding_candidate_stats(self):
474
- category = self.stats['sharding']['sharding_candidates']
475
- candidates = self.sharding_candidates
1019
+ def _identify_shrinking_candidate(self, broker, node):
1020
+ sequences = find_compactible_shard_sequences(
1021
+ broker, self.shrink_threshold, self.expansion_limit,
1022
+ self.max_shrinking, self.max_expanding)
1023
+ # compactible_ranges are all apart from final acceptor in each sequence
1024
+ compactible_ranges = sum(len(seq) - 1 for seq in sequences)
1025
+
1026
+ if compactible_ranges:
1027
+ own_shard_range = broker.get_own_shard_range()
1028
+ update_own_shard_range_stats(broker, own_shard_range)
1029
+ shrink_candidate = self._make_stats_info(
1030
+ broker, node, own_shard_range)
1031
+ # The number of ranges/donors that can be shrunk if the
1032
+ # tool is used with the current max_shrinking, max_expanding
1033
+ # settings.
1034
+ shrink_candidate['compactible_ranges'] = compactible_ranges
1035
+ self.shrinking_candidates.append(shrink_candidate)
1036
+
1037
+ def _transform_candidate_stats(self, category, candidates, sort_keys):
476
1038
  category['found'] = len(candidates)
477
- candidates.sort(key=lambda c: c['object_count'], reverse=True)
1039
+ candidates.sort(key=itemgetter(*sort_keys), reverse=True)
478
1040
  if self.recon_candidates_limit >= 0:
479
1041
  category['top'] = candidates[:self.recon_candidates_limit]
480
1042
  else:
481
1043
  category['top'] = candidates
482
1044
 
483
1045
  def _record_sharding_progress(self, broker, node, error):
1046
+ db_state = broker.get_db_state()
1047
+ if db_state not in (UNSHARDED, SHARDING, SHARDED):
1048
+ return
484
1049
  own_shard_range = broker.get_own_shard_range()
485
- if (broker.get_db_state() in (UNSHARDED, SHARDING) and
486
- own_shard_range.state in (ShardRange.SHARDING,
487
- ShardRange.SHARDED)):
488
- info = self._make_stats_info(broker, node, own_shard_range)
489
- info['state'] = own_shard_range.state_text
490
- info['db_state'] = broker.get_db_state()
491
- states = [ShardRange.FOUND, ShardRange.CREATED,
492
- ShardRange.CLEAVED, ShardRange.ACTIVE]
493
- shard_ranges = broker.get_shard_ranges(states=states)
494
- state_count = {}
495
- for state in states:
496
- state_count[ShardRange.STATES[state]] = 0
497
- for shard_range in shard_ranges:
498
- state_count[shard_range.state_text] += 1
499
- info.update(state_count)
500
- info['error'] = error and str(error)
501
- self._append_stat('sharding_in_progress', 'all', info)
1050
+ if own_shard_range.state not in ShardRange.CLEAVING_STATES:
1051
+ return
1052
+
1053
+ if db_state == SHARDED:
1054
+ contexts = CleavingContext.load_all(broker)
1055
+ if not contexts:
1056
+ return
1057
+ context_ts = max(float(ts) for c, ts in contexts)
1058
+ if context_ts + self.recon_sharded_timeout \
1059
+ < float(Timestamp.now()):
1060
+ # last context timestamp too old for the
1061
+ # broker to be recorded
1062
+ return
1063
+
1064
+ update_own_shard_range_stats(broker, own_shard_range)
1065
+ info = self._make_stats_info(broker, node, own_shard_range)
1066
+ info['state'] = own_shard_range.state_text
1067
+ info['db_state'] = broker.get_db_state()
1068
+ states = [ShardRange.FOUND, ShardRange.CREATED,
1069
+ ShardRange.CLEAVED, ShardRange.ACTIVE]
1070
+ shard_ranges = broker.get_shard_ranges(states=states)
1071
+ state_count = {}
1072
+ for state in states:
1073
+ state_count[ShardRange.STATES[state]] = 0
1074
+ for shard_range in shard_ranges:
1075
+ state_count[shard_range.state_text] += 1
1076
+ info.update(state_count)
1077
+ info['error'] = error and str(error)
1078
+ self._append_stat('sharding_in_progress', 'all', info)
1079
+
1080
+ if broker.sharding_required() and (
1081
+ own_shard_range.epoch is not None) and (
1082
+ float(own_shard_range.epoch) +
1083
+ self.container_sharding_timeout <
1084
+ time.time()):
1085
+ # Note: There is no requirement that own_shard_range.epoch equals
1086
+ # the time at which the own_shard_range was merged into the
1087
+ # container DB, which predicates sharding starting. But s-m-s-r and
1088
+ # auto-sharding do set epoch and then merge, so we use it to tell
1089
+ # whether sharding has been taking too long or not.
1090
+ self.warning(
1091
+ broker, 'Cleaving has not completed in %.2f seconds since %s. '
1092
+ 'DB state: %s, own_shard_range state: %s, state count of '
1093
+ 'shard ranges: %s' %
1094
+ (time.time() - float(own_shard_range.epoch),
1095
+ own_shard_range.epoch.isoformat, db_state,
1096
+ own_shard_range.state_text, str(state_count)))
502
1097
 
503
1098
  def _report_stats(self):
504
1099
  # report accumulated stats since start of one sharder cycle
@@ -509,7 +1104,7 @@ class ContainerSharder(ContainerReplicator):
509
1104
  ('created', default_stats),
510
1105
  ('cleaved', default_stats + ('min_time', 'max_time',)),
511
1106
  ('misplaced', default_stats + ('found', 'placed', 'unplaced')),
512
- ('audit_root', default_stats),
1107
+ ('audit_root', default_stats + ('has_overlap', 'num_overlap')),
513
1108
  ('audit_shard', default_stats),
514
1109
  )
515
1110
 
@@ -522,7 +1117,16 @@ class ContainerSharder(ContainerReplicator):
522
1117
  msg = ' '.join(['%s:%s' % (k, str(stats[k])) for k in keys])
523
1118
  self.logger.info('Since %s %s - %s', last_report, category, msg)
524
1119
 
525
- self._transform_sharding_candidate_stats()
1120
+ # transform the sharding and shrinking candidate states
1121
+ # first sharding
1122
+ category = self.stats['sharding']['sharding_candidates']
1123
+ self._transform_candidate_stats(category, self.sharding_candidates,
1124
+ sort_keys=('object_count',))
1125
+
1126
+ # next shrinking
1127
+ category = self.stats['sharding']['shrinking_candidates']
1128
+ self._transform_candidate_stats(category, self.shrinking_candidates,
1129
+ sort_keys=('compactible_ranges',))
526
1130
 
527
1131
  dump_recon_cache(
528
1132
  {'sharding_stats': self.stats,
@@ -532,7 +1136,7 @@ class ContainerSharder(ContainerReplicator):
532
1136
  self.reported = now
533
1137
 
534
1138
  def _periodic_report_stats(self):
535
- if (time.time() - self.reported) >= 3600: # once an hour
1139
+ if (time.time() - self.reported) >= self.stats_interval:
536
1140
  self._report_stats()
537
1141
 
538
1142
  def _check_node(self, node):
@@ -560,65 +1164,67 @@ class ContainerSharder(ContainerReplicator):
560
1164
  params = params or {}
561
1165
  params.setdefault('format', 'json')
562
1166
  headers = {'X-Backend-Record-Type': 'shard',
1167
+ 'X-Backend-Record-Shard-Format': 'full',
563
1168
  'X-Backend-Override-Deleted': 'true',
564
1169
  'X-Backend-Include-Deleted': str(include_deleted)}
565
1170
  if newest:
566
1171
  headers['X-Newest'] = 'true'
567
1172
  try:
568
- try:
569
- resp = self.int_client.make_request(
570
- 'GET', path, headers, acceptable_statuses=(2,),
571
- params=params)
572
- except internal_client.UnexpectedResponse as err:
573
- self.logger.warning("Failed to get shard ranges from %s: %s",
574
- quote(broker.root_path), err)
575
- return None
576
- record_type = resp.headers.get('x-backend-record-type')
577
- if record_type != 'shard':
578
- err = 'unexpected record type %r' % record_type
579
- self.logger.error("Failed to get shard ranges from %s: %s",
580
- quote(broker.root_path), err)
581
- return None
582
-
583
- try:
584
- data = json.loads(resp.body)
585
- if not isinstance(data, list):
586
- raise ValueError('not a list')
587
- return [ShardRange.from_dict(shard_range)
588
- for shard_range in data]
589
- except (ValueError, TypeError, KeyError) as err:
590
- self.logger.error(
591
- "Failed to get shard ranges from %s: invalid data: %r",
592
- quote(broker.root_path), err)
1173
+ resp = self.int_client.make_request(
1174
+ 'GET', path, headers, acceptable_statuses=(2,),
1175
+ params=params)
1176
+ except internal_client.UnexpectedResponse as err:
1177
+ self.warning(broker, "Failed to get shard ranges from %s: %s",
1178
+ quote(broker.root_path), err)
1179
+ return None
1180
+ record_type = resp.headers.get('x-backend-record-type')
1181
+ if record_type != 'shard':
1182
+ err = 'unexpected record type %r' % record_type
1183
+ self.error(broker, "Failed to get shard ranges from %s: %s",
1184
+ quote(broker.root_path), err)
593
1185
  return None
594
- finally:
595
- self.logger.txn_id = None
596
1186
 
597
- def _put_container(self, node, part, account, container, headers, body):
1187
+ try:
1188
+ data = json.loads(resp.body)
1189
+ if not isinstance(data, list):
1190
+ raise ValueError('not a list')
1191
+ return [ShardRange.from_dict(shard_range)
1192
+ for shard_range in data]
1193
+ except (ValueError, TypeError, KeyError) as err:
1194
+ self.error(broker,
1195
+ "Failed to get shard ranges from %s: invalid data: %r",
1196
+ quote(broker.root_path), err)
1197
+ return None
1198
+
1199
+ def _put_container(self, broker, node, part, account, container, headers,
1200
+ body):
598
1201
  try:
599
1202
  direct_put_container(node, part, account, container,
600
1203
  conn_timeout=self.conn_timeout,
601
1204
  response_timeout=self.node_timeout,
602
1205
  headers=headers, contents=body)
603
1206
  except DirectClientException as err:
604
- self.logger.warning(
605
- 'Failed to put shard ranges to %s:%s/%s: %s',
606
- node['ip'], node['port'], node['device'], err.http_status)
1207
+ self.warning(broker,
1208
+ 'Failed to put shard ranges to %s %s/%s: %s',
1209
+ node_to_string(node, replication=True),
1210
+ quote(account), quote(container), err.http_status)
607
1211
  except (Exception, Timeout) as err:
608
- self.logger.exception(
609
- 'Failed to put shard ranges to %s:%s/%s: %s',
610
- node['ip'], node['port'], node['device'], err)
1212
+ self.exception(broker,
1213
+ 'Failed to put shard ranges to %s %s/%s: %s',
1214
+ node_to_string(node, replication=True),
1215
+ quote(account), quote(container), err)
611
1216
  else:
612
1217
  return True
613
1218
  return False
614
1219
 
615
- def _send_shard_ranges(self, account, container, shard_ranges,
1220
+ def _send_shard_ranges(self, broker, account, container, shard_ranges,
616
1221
  headers=None):
617
1222
  body = json.dumps([dict(sr, reported=0)
618
1223
  for sr in shard_ranges]).encode('ascii')
619
1224
  part, nodes = self.ring.get_nodes(account, container)
620
1225
  headers = headers or {}
621
1226
  headers.update({'X-Backend-Record-Type': RECORD_TYPE_SHARD,
1227
+ USE_REPLICATION_NETWORK_HEADER: 'True',
622
1228
  'User-Agent': 'container-sharder %s' % os.getpid(),
623
1229
  'X-Timestamp': Timestamp.now().normal,
624
1230
  'Content-Length': len(body),
@@ -626,7 +1232,7 @@ class ContainerSharder(ContainerReplicator):
626
1232
 
627
1233
  pool = GreenAsyncPile(len(nodes))
628
1234
  for node in nodes:
629
- pool.spawn(self._put_container, node, part, account,
1235
+ pool.spawn(self._put_container, broker, node, part, account,
630
1236
  container, headers, body)
631
1237
 
632
1238
  results = pool.waitall(None)
@@ -642,20 +1248,19 @@ class ContainerSharder(ContainerReplicator):
642
1248
  :param shard_range: a :class:`~swift.common.utils.ShardRange`
643
1249
  :param root_path: the path of the shard's root container
644
1250
  :param policy_index: the storage policy index
645
- :returns: a tuple of ``(part, broker, node_id)`` where ``part`` is the
646
- shard container's partition, ``broker`` is an instance of
1251
+ :returns: a tuple of ``(part, broker, node_id, put_timestamp)`` where
1252
+ ``part`` is the shard container's partition,
1253
+ ``broker`` is an instance of
647
1254
  :class:`~swift.container.backend.ContainerBroker`,
648
- ``node_id`` is the id of the selected node.
1255
+ ``node_id`` is the id of the selected node,
1256
+ ``put_timestamp`` is the put_timestamp if the broker needed to
1257
+ be initialized.
649
1258
  """
650
1259
  part = self.ring.get_part(shard_range.account, shard_range.container)
651
1260
  node = self.find_local_handoff_for_part(part)
652
- put_timestamp = Timestamp.now().internal
653
- if not node:
654
- raise DeviceUnavailable(
655
- 'No mounted devices found suitable for creating shard broker '
656
- 'for %s in partition %s' % (quote(shard_range.name), part))
657
1261
 
658
- shard_broker = ContainerBroker.create_broker(
1262
+ put_timestamp = Timestamp.now().internal
1263
+ shard_broker, initialized = ContainerBroker.create_broker(
659
1264
  os.path.join(self.root, node['device']), part, shard_range.account,
660
1265
  shard_range.container, epoch=shard_range.epoch,
661
1266
  storage_policy_index=policy_index, put_timestamp=put_timestamp)
@@ -675,6 +1280,7 @@ class ContainerSharder(ContainerReplicator):
675
1280
  'X-Container-Sysmeta-Sharding':
676
1281
  ('True', Timestamp.now().internal)})
677
1282
 
1283
+ put_timestamp = put_timestamp if initialized else None
678
1284
  return part, shard_broker, node['id'], put_timestamp
679
1285
 
680
1286
  def _audit_root_container(self, broker):
@@ -684,121 +1290,196 @@ class ContainerSharder(ContainerReplicator):
684
1290
  warnings = []
685
1291
  own_shard_range = broker.get_own_shard_range()
686
1292
 
687
- if own_shard_range.state in (ShardRange.SHARDING, ShardRange.SHARDED):
1293
+ if own_shard_range.state in ShardRange.SHARDING_STATES:
688
1294
  shard_ranges = [sr for sr in broker.get_shard_ranges()
689
1295
  if sr.state != ShardRange.SHRINKING]
690
- missing_ranges = find_missing_ranges(shard_ranges)
691
- if missing_ranges:
1296
+ paths_with_gaps = find_paths_with_gaps(shard_ranges)
1297
+ if paths_with_gaps:
692
1298
  warnings.append(
693
1299
  'missing range(s): %s' %
694
- ' '.join(['%s-%s' % (lower, upper)
695
- for lower, upper in missing_ranges]))
1300
+ ' '.join(['%s-%s' % (gap.lower, gap.upper)
1301
+ for (_, gap, _) in paths_with_gaps]))
696
1302
 
697
1303
  for state in ShardRange.STATES:
698
1304
  if state == ShardRange.SHRINKING:
699
1305
  # Shrinking is how we resolve overlaps; we've got to
700
1306
  # allow multiple shards in that state
701
1307
  continue
702
- shard_ranges = broker.get_shard_ranges(states=state)
703
- overlaps = find_overlapping_ranges(shard_ranges)
704
- for overlapping_ranges in overlaps:
1308
+ shard_ranges = broker.get_shard_ranges(states=[state])
1309
+ # Transient overlaps can occur during the period immediately after
1310
+ # sharding if a root learns about new child shards before it learns
1311
+ # that the parent has sharded. These overlaps are normally
1312
+ # corrected as an up-to-date version of the parent shard range is
1313
+ # replicated to the root. Parent-child overlaps are therefore
1314
+ # ignored for a reclaim age after the child was created. After
1315
+ # that, parent-child overlaps may indicate that there is
1316
+ # permanently stale parent shard range data, perhaps from a node
1317
+ # that has been offline, so these are reported.
1318
+ overlaps = find_overlapping_ranges(
1319
+ shard_ranges, exclude_parent_child=True,
1320
+ time_period=self.reclaim_age)
1321
+ if overlaps:
1322
+ self._increment_stat('audit_root', 'has_overlap')
1323
+ self._update_stat('audit_root', 'num_overlap',
1324
+ step=len(overlaps))
1325
+ all_overlaps = ', '.join(
1326
+ [' '.join(['%s-%s' % (sr.lower, sr.upper)
1327
+ for sr in overlapping_ranges])
1328
+ for overlapping_ranges in sorted(list(overlaps))])
705
1329
  warnings.append(
706
- 'overlapping ranges in state %s: %s' %
707
- (ShardRange.STATES[state],
708
- ' '.join(['%s-%s' % (sr.lower, sr.upper)
709
- for sr in overlapping_ranges])))
1330
+ 'overlapping ranges in state %r: %s' %
1331
+ (ShardRange.STATES[state], all_overlaps))
1332
+
1333
+ # We've seen a case in production where the roots own_shard_range
1334
+ # epoch is reset to None, and state set to ACTIVE (like re-defaulted)
1335
+ # Epoch it important to sharding so we want to detect if this happens
1336
+ # 1. So we can alert, and 2. to see how common it is.
1337
+ if own_shard_range.epoch is None and broker.db_epoch:
1338
+ warnings.append('own_shard_range reset to None should be %s'
1339
+ % broker.db_epoch)
710
1340
 
711
1341
  if warnings:
712
- self.logger.warning(
713
- 'Audit failed for root %s (%s): %s',
714
- broker.db_file, quote(broker.path), ', '.join(warnings))
1342
+ self.warning(broker, 'Audit failed for root: %s',
1343
+ ', '.join(warnings))
715
1344
  self._increment_stat('audit_root', 'failure', statsd=True)
716
1345
  return False
717
1346
 
718
1347
  self._increment_stat('audit_root', 'success', statsd=True)
719
1348
  return True
720
1349
 
721
- def _audit_shard_container(self, broker):
722
- self._increment_stat('audit_shard', 'attempted')
723
- warnings = []
724
- errors = []
725
- if not broker.account.startswith(self.shards_account_prefix):
726
- warnings.append('account not in shards namespace %r' %
727
- self.shards_account_prefix)
728
-
729
- own_shard_range = broker.get_own_shard_range(no_default=True)
730
-
731
- shard_ranges = own_shard_range_from_root = None
732
- if own_shard_range:
733
- # Get the root view of the world, at least that part of the world
734
- # that overlaps with this shard's namespace
735
- shard_ranges = self._fetch_shard_ranges(
736
- broker, newest=True,
737
- params={'marker': str_to_wsgi(own_shard_range.lower_str),
738
- 'end_marker': str_to_wsgi(own_shard_range.upper_str)},
739
- include_deleted=True)
740
- if shard_ranges:
741
- for shard_range in shard_ranges:
742
- # look for this shard range in the list of shard ranges
743
- # received from root; the root may have different lower and
744
- # upper bounds for this shard (e.g. if this shard has been
745
- # expanded in the root to accept a shrinking shard) so we
746
- # only match on name.
747
- if shard_range.name == own_shard_range.name:
748
- own_shard_range_from_root = shard_range
749
- break
750
- else:
751
- # this is not necessarily an error - some replicas of the
752
- # root may not yet know about this shard container
753
- warnings.append('root has no matching shard range')
754
- elif not own_shard_range.deleted:
755
- warnings.append('unable to get shard ranges from root')
756
- # else, our shard range is deleted, so root may have reclaimed it
757
- else:
758
- errors.append('missing own shard range')
759
-
760
- if warnings:
761
- self.logger.warning(
762
- 'Audit warnings for shard %s (%s): %s',
763
- broker.db_file, quote(broker.path), ', '.join(warnings))
764
-
765
- if errors:
766
- self.logger.warning(
767
- 'Audit failed for shard %s (%s) - skipping: %s',
768
- broker.db_file, quote(broker.path), ', '.join(errors))
769
- self._increment_stat('audit_shard', 'failure', statsd=True)
770
- return False
1350
+ def _merge_shard_ranges_from_root(self, broker, shard_ranges,
1351
+ own_shard_range):
1352
+ """
1353
+ Merge appropriate items from the given ``shard_ranges`` into the
1354
+ ``broker``. The selection of items that are merged will depend upon the
1355
+ state of the shard.
771
1356
 
772
- if own_shard_range_from_root:
773
- # iff we find our own shard range in the root response, merge it
774
- # and reload own shard range (note: own_range_from_root may not
775
- # necessarily be 'newer' than the own shard range we already have,
776
- # but merging will get us to the 'newest' state)
777
- self.logger.debug('Updating own shard range from root')
778
- broker.merge_shard_ranges(own_shard_range_from_root)
779
- orig_own_shard_range = own_shard_range
780
- own_shard_range = broker.get_own_shard_range()
781
- if (orig_own_shard_range != own_shard_range or
782
- orig_own_shard_range.state != own_shard_range.state):
783
- self.logger.debug(
784
- 'Updated own shard range from %s to %s',
785
- orig_own_shard_range, own_shard_range)
786
- if own_shard_range.state in (ShardRange.SHRINKING,
787
- ShardRange.SHRUNK):
788
- # If the up-to-date state is shrinking, save off *all* shards
789
- # returned because these may contain shards into which this
790
- # shard is to shrink itself; shrinking is the only case when we
791
- # want to learn about *other* shard ranges from the root.
792
- # We need to include shrunk state too, because one replica of a
793
- # shard may already have moved the own_shard_range state to
794
- # shrunk while another replica may still be in the process of
795
- # shrinking.
796
- other_shard_ranges = [sr for sr in shard_ranges
797
- if sr is not own_shard_range_from_root]
798
- self.logger.debug('Updating %s other shard range(s) from root',
799
- len(other_shard_ranges))
800
- broker.merge_shard_ranges(other_shard_ranges)
1357
+ :param broker: A :class:`~swift.container.backend.ContainerBroker`.
1358
+ :param shard_ranges: A list of instances of
1359
+ :class:`~swift.common.utils.ShardRange` describing the shard ranges
1360
+ fetched from the root container.
1361
+ :param own_shard_range: A :class:`~swift.common.utils.ShardRange`
1362
+ describing the shard's own shard range.
1363
+ :return: a tuple of ``own_shard_range, own_shard_range_from_root``. The
1364
+ returned``own_shard_range`` will have been updated if the matching
1365
+ ``own_shard_range_from_root`` has newer data.
1366
+ ``own_shard_range_from_root`` will be None if no such matching
1367
+ shard range is found in ``shard_ranges``.
1368
+ """
1369
+ own_shard_range_from_root = None
1370
+ children_shard_ranges = []
1371
+ other_shard_ranges = []
1372
+ for shard_range in shard_ranges:
1373
+ # look for this shard range in the list of shard ranges received
1374
+ # from root; the root may have different lower and upper bounds for
1375
+ # this shard (e.g. if this shard has been expanded in the root to
1376
+ # accept a shrinking shard) so we only match on name.
1377
+ if shard_range.name == own_shard_range.name:
1378
+ # If we find our own shard range in the root response, merge
1379
+ # it and reload own shard range (note: own_range_from_root may
1380
+ # not necessarily be 'newer' than the own shard range we
1381
+ # already have, but merging will get us to the 'newest' state)
1382
+ self.debug(broker, 'Updating own shard range from root')
1383
+ own_shard_range_from_root = shard_range
1384
+ broker.merge_shard_ranges(own_shard_range_from_root)
1385
+ orig_own_shard_range = own_shard_range
1386
+ own_shard_range = broker.get_own_shard_range()
1387
+ if (orig_own_shard_range != own_shard_range or
1388
+ orig_own_shard_range.state != own_shard_range.state):
1389
+ self.info(broker,
1390
+ 'Updated own shard range from %s to %s',
1391
+ orig_own_shard_range, own_shard_range)
1392
+ elif shard_range.is_child_of(own_shard_range):
1393
+ children_shard_ranges.append(shard_range)
1394
+ else:
1395
+ other_shard_ranges.append(shard_range)
1396
+
1397
+ if children_shard_ranges and not broker.is_sharded():
1398
+ # Merging shard ranges from the root is only necessary until this
1399
+ # DB is fully cleaved and reaches SHARDED DB state, after which it
1400
+ # is useful for debugging for the set of sub-shards to which a
1401
+ # shards has sharded to be frozen.
1402
+ self.debug(broker, 'Updating %d children shard ranges from root',
1403
+ len(children_shard_ranges))
1404
+ broker.merge_shard_ranges(children_shard_ranges)
1405
+
1406
+ if (other_shard_ranges
1407
+ and own_shard_range.state in ShardRange.CLEAVING_STATES
1408
+ and not broker.is_sharded()):
1409
+ # Other shard ranges returned from the root may need to be merged
1410
+ # for the purposes of sharding or shrinking this shard:
1411
+ #
1412
+ # Shrinking states: If the up-to-date state is shrinking, the
1413
+ # shards fetched from root may contain shards into which this shard
1414
+ # is to shrink itself. Shrinking is initiated by modifying multiple
1415
+ # neighboring shard range states *in the root*, rather than
1416
+ # modifying a shard directly. We therefore need to learn about
1417
+ # *other* neighboring shard ranges from the root, possibly
1418
+ # including the root itself. We need to include shrunk state too,
1419
+ # because one replica of a shard may already have moved the
1420
+ # own_shard_range state to shrunk while another replica may still
1421
+ # be in the process of shrinking.
1422
+ #
1423
+ # Sharding states: Normally a shard will shard to its own children.
1424
+ # However, in some circumstances a shard may need to shard to other
1425
+ # non-children sub-shards. For example, a shard range repair may
1426
+ # cause a child sub-shard to be deleted and its namespace covered
1427
+ # by another 'acceptor' shard.
1428
+ #
1429
+ # Therefore, if the up-to-date own_shard_range state indicates that
1430
+ # sharding or shrinking is in progress, then other shard ranges
1431
+ # will be merged, with the following caveats: we never expect a
1432
+ # shard to shard to any ancestor shard range including the root,
1433
+ # but containers might ultimately *shrink* to root; we never want
1434
+ # to cleave to a container that is itself sharding or shrinking;
1435
+ # the merged shard ranges should not result in gaps or overlaps in
1436
+ # the namespace of this shard.
1437
+ #
1438
+ # Note: the search for ancestors is guaranteed to find the parent
1439
+ # and root *if they are present*, but if any ancestor is missing
1440
+ # then there is a chance that older generations in the
1441
+ # other_shard_ranges will not be filtered and could be merged. That
1442
+ # is only a problem if they are somehow still in ACTIVE state, and
1443
+ # no overlap is detected, so the ancestor is merged.
1444
+ ancestor_names = [
1445
+ sr.name for sr in own_shard_range.find_ancestors(shard_ranges)]
1446
+ filtered_other_shard_ranges = [
1447
+ sr for sr in other_shard_ranges
1448
+ if (sr.name not in ancestor_names
1449
+ and (sr.state not in ShardRange.CLEAVING_STATES
1450
+ or sr.deleted))
1451
+ ]
1452
+ if own_shard_range.state in ShardRange.SHRINKING_STATES:
1453
+ root_shard_range = own_shard_range.find_root(
1454
+ other_shard_ranges)
1455
+ if (root_shard_range and
1456
+ root_shard_range.state == ShardRange.ACTIVE):
1457
+ filtered_other_shard_ranges.append(root_shard_range)
1458
+ existing_shard_ranges = broker.get_shard_ranges()
1459
+ combined_shard_ranges = combine_shard_ranges(
1460
+ filtered_other_shard_ranges, existing_shard_ranges)
1461
+ overlaps = find_overlapping_ranges(combined_shard_ranges)
1462
+ paths_with_gaps = find_paths_with_gaps(
1463
+ combined_shard_ranges, own_shard_range)
1464
+ if not (overlaps or paths_with_gaps):
1465
+ # only merge if shard ranges appear to be *good*
1466
+ self.debug(broker,
1467
+ 'Updating %s other shard range(s) from root',
1468
+ len(filtered_other_shard_ranges))
1469
+ broker.merge_shard_ranges(filtered_other_shard_ranges)
1470
+
1471
+ return own_shard_range, own_shard_range_from_root
1472
+
1473
+ def _delete_shard_container(self, broker, own_shard_range):
1474
+ """
1475
+ Mark a shard container as deleted if it was sharded or shrunk more than
1476
+ reclaim_age in the past. (The DB file will be removed by the replicator
1477
+ after a further reclaim_age.)
801
1478
 
1479
+ :param broker: A :class:`~swift.container.backend.ContainerBroker`.
1480
+ :param own_shard_range: A :class:`~swift.common.utils.ShardRange`
1481
+ describing the shard's own shard range.
1482
+ """
802
1483
  delete_age = time.time() - self.reclaim_age
803
1484
  deletable_states = (ShardRange.SHARDED, ShardRange.SHRUNK)
804
1485
  if (own_shard_range.state in deletable_states and
@@ -806,21 +1487,79 @@ class ContainerSharder(ContainerReplicator):
806
1487
  own_shard_range.timestamp < delete_age and
807
1488
  broker.empty()):
808
1489
  broker.delete_db(Timestamp.now().internal)
809
- self.logger.debug('Deleted shard container %s (%s)',
810
- broker.db_file, quote(broker.path))
1490
+ self.debug(broker, 'Marked shard container as deleted')
811
1491
 
812
- self._increment_stat('audit_shard', 'success', statsd=True)
813
- return True
1492
+ def _do_audit_shard_container(self, broker):
1493
+ warnings = []
1494
+ if not broker.account.startswith(self.shards_account_prefix):
1495
+ warnings.append('account not in shards namespace %r' %
1496
+ self.shards_account_prefix)
1497
+
1498
+ own_shard_range = broker.get_own_shard_range(no_default=True)
1499
+
1500
+ if not own_shard_range:
1501
+ self.warning(broker, 'Audit failed for shard: missing own shard '
1502
+ 'range (skipping)')
1503
+ return False, warnings
1504
+
1505
+ # Get the root view of the world, at least that part of the world
1506
+ # that overlaps with this shard's namespace. The
1507
+ # 'states=auditing' parameter will cause the root to include
1508
+ # its own shard range in the response, which is necessary for the
1509
+ # particular case when this shard should be shrinking to the root
1510
+ # container; when not shrinking to root, but to another acceptor,
1511
+ # the root range should be in sharded state and will not interfere
1512
+ # with cleaving, listing or updating behaviour.
1513
+ shard_ranges = self._fetch_shard_ranges(
1514
+ broker, newest=True,
1515
+ params={'marker': str_to_wsgi(own_shard_range.lower_str),
1516
+ 'end_marker': str_to_wsgi(own_shard_range.upper_str),
1517
+ 'states': 'auditing'},
1518
+ include_deleted=True)
1519
+ if shard_ranges:
1520
+ own_shard_range, own_shard_range_from_root = \
1521
+ self._merge_shard_ranges_from_root(
1522
+ broker, shard_ranges, own_shard_range)
1523
+ if not own_shard_range_from_root:
1524
+ # this is not necessarily an error - some replicas of the
1525
+ # root may not yet know about this shard container, or the
1526
+ # shard's own shard range could become deleted and
1527
+ # reclaimed from the root under rare conditions
1528
+ warnings.append('root has no matching shard range')
1529
+ elif not own_shard_range.deleted:
1530
+ warnings.append('unable to get shard ranges from root')
1531
+ # else, our shard range is deleted, so root may have reclaimed it
1532
+
1533
+ self._delete_shard_container(broker, own_shard_range)
1534
+
1535
+ return True, warnings
1536
+
1537
+ def _audit_shard_container(self, broker):
1538
+ self._increment_stat('audit_shard', 'attempted')
1539
+ success, warnings = self._do_audit_shard_container(broker)
1540
+ if warnings:
1541
+ self.warning(broker, 'Audit warnings for shard: %s',
1542
+ ', '.join(warnings))
1543
+ self._increment_stat(
1544
+ 'audit_shard', 'success' if success else 'failure', statsd=True)
1545
+ return success
814
1546
 
815
1547
  def _audit_cleave_contexts(self, broker):
816
1548
  now = Timestamp.now()
817
1549
  for context, last_mod in CleavingContext.load_all(broker):
818
- if Timestamp(last_mod).timestamp + self.reclaim_age < \
819
- now.timestamp:
1550
+ last_mod = Timestamp(last_mod)
1551
+ is_done = context.done() and last_mod.timestamp + \
1552
+ self.recon_sharded_timeout < now.timestamp
1553
+ is_stale = last_mod.timestamp + self.reclaim_age < now.timestamp
1554
+ if is_done or is_stale:
820
1555
  context.delete(broker)
821
1556
 
822
1557
  def _audit_container(self, broker):
823
1558
  if broker.is_deleted():
1559
+ if broker.is_old_enough_to_reclaim(time.time(), self.reclaim_age) \
1560
+ and not broker.is_empty_enough_to_reclaim():
1561
+ self.periodic_warning(
1562
+ broker, 'Reclaimable db stuck waiting for shrinking')
824
1563
  # if the container has been marked as deleted, all metadata will
825
1564
  # have been erased so no point auditing. But we want it to pass, in
826
1565
  # case any objects exist inside it.
@@ -830,18 +1569,32 @@ class ContainerSharder(ContainerReplicator):
830
1569
  return self._audit_root_container(broker)
831
1570
  return self._audit_shard_container(broker)
832
1571
 
833
- def yield_objects(self, broker, src_shard_range, since_row=None):
1572
+ def yield_objects(self, broker, src_shard_range, since_row=None,
1573
+ batch_size=None):
834
1574
  """
835
- Iterates through all objects in ``src_shard_range`` in name order
836
- yielding them in lists of up to CONTAINER_LISTING_LIMIT length.
1575
+ Iterates through all object rows in ``src_shard_range`` in name order
1576
+ yielding them in lists of up to ``batch_size`` in length. All batches
1577
+ of rows that are not marked deleted are yielded before all batches of
1578
+ rows that are marked deleted.
837
1579
 
838
1580
  :param broker: A :class:`~swift.container.backend.ContainerBroker`.
839
1581
  :param src_shard_range: A :class:`~swift.common.utils.ShardRange`
840
1582
  describing the source range.
841
- :param since_row: include only items whose ROWID is greater than
842
- the given row id; by default all rows are included.
843
- :return: a generator of tuples of (list of objects, broker info dict)
1583
+ :param since_row: include only object rows whose ROWID is greater than
1584
+ the given row id; by default all object rows are included.
1585
+ :param batch_size: The maximum number of object rows to include in each
1586
+ yielded batch; defaults to cleave_row_batch_size.
1587
+ :return: a generator of tuples of (list of rows, broker info dict)
844
1588
  """
1589
+ if (src_shard_range.lower == ShardRange.MAX or
1590
+ src_shard_range.upper == ShardRange.MIN):
1591
+ # this is an unexpected condition but handled with an early return
1592
+ # just in case, because:
1593
+ # lower == ShardRange.MAX -> marker == ''
1594
+ # which could result in rows being erroneously yielded.
1595
+ return
1596
+
1597
+ batch_size = batch_size or self.cleave_row_batch_size
845
1598
  for include_deleted in (False, True):
846
1599
  marker = src_shard_range.lower_str
847
1600
  while True:
@@ -849,87 +1602,82 @@ class ContainerSharder(ContainerReplicator):
849
1602
  info['max_row'] = broker.get_max_row()
850
1603
  start = time.time()
851
1604
  objects = broker.get_objects(
852
- self.cleave_row_batch_size,
1605
+ limit=batch_size,
853
1606
  marker=marker,
854
1607
  end_marker=src_shard_range.end_marker,
855
1608
  include_deleted=include_deleted,
856
1609
  since_row=since_row)
1610
+ self.debug(broker, 'got %s rows (deleted=%s) in %ss',
1611
+ len(objects), include_deleted, time.time() - start)
857
1612
  if objects:
858
- self.logger.debug('got %s objects from %s in %ss',
859
- len(objects), broker.db_file,
860
- time.time() - start)
861
1613
  yield objects, info
862
1614
 
863
- if len(objects) < self.cleave_row_batch_size:
1615
+ if len(objects) < batch_size:
864
1616
  break
865
1617
  marker = objects[-1]['name']
866
1618
 
867
1619
  def yield_objects_to_shard_range(self, broker, src_shard_range,
868
1620
  dest_shard_ranges):
869
1621
  """
870
- Iterates through all objects in ``src_shard_range`` to place them in
871
- destination shard ranges provided by the ``next_shard_range`` function.
872
- Yields tuples of (object list, destination shard range in which those
873
- objects belong). Note that the same destination shard range may be
874
- referenced in more than one yielded tuple.
1622
+ Iterates through all object rows in ``src_shard_range`` to place them
1623
+ in destination shard ranges provided by the ``dest_shard_ranges``
1624
+ function. Yields tuples of ``(batch of object rows, destination shard
1625
+ range in which those object rows belong, broker info)``.
1626
+
1627
+ If no destination shard range exists for a batch of object rows then
1628
+ tuples are yielded of ``(batch of object rows, None, broker info)``.
1629
+ This indicates to the caller that there are a non-zero number of object
1630
+ rows for which no destination shard range was found.
1631
+
1632
+ Note that the same destination shard range may be referenced in more
1633
+ than one yielded tuple.
875
1634
 
876
1635
  :param broker: A :class:`~swift.container.backend.ContainerBroker`.
877
1636
  :param src_shard_range: A :class:`~swift.common.utils.ShardRange`
878
1637
  describing the source range.
879
1638
  :param dest_shard_ranges: A function which should return a list of
880
- destination shard ranges in name order.
881
- :return: a generator of tuples of
882
- (object list, shard range, broker info dict)
1639
+ destination shard ranges sorted in the order defined by
1640
+ :meth:`~swift.common.utils.ShardRange.sort_key`.
1641
+ :return: a generator of tuples of ``(object row list, shard range,
1642
+ broker info dict)`` where ``shard_range`` may be ``None``.
883
1643
  """
884
- dest_shard_range_iter = dest_shard_range = None
885
- for objs, info in self.yield_objects(broker, src_shard_range):
886
- if not objs:
887
- return
1644
+ # calling dest_shard_ranges() may result in a request to fetch shard
1645
+ # ranges, so first check that the broker actually has misplaced object
1646
+ # rows in the source namespace
1647
+ for _ in self.yield_objects(broker, src_shard_range, batch_size=1):
1648
+ break
1649
+ else:
1650
+ return
888
1651
 
889
- def next_or_none(it):
890
- try:
891
- return next(it)
892
- except StopIteration:
893
- return None
894
-
895
- if dest_shard_range_iter is None:
896
- dest_shard_range_iter = iter(dest_shard_ranges())
897
- dest_shard_range = next_or_none(dest_shard_range_iter)
898
-
899
- unplaced = False
900
- last_index = next_index = 0
901
- for obj in objs:
902
- if dest_shard_range is None:
903
- # no more destinations: yield remainder of batch and bail
904
- # NB there may be more batches of objects but none of them
905
- # will be placed so no point fetching them
906
- yield objs[last_index:], None, info
907
- return
908
- if obj['name'] <= dest_shard_range.lower:
909
- unplaced = True
910
- elif unplaced:
911
- # end of run of unplaced objects, yield them
912
- yield objs[last_index:next_index], None, info
913
- last_index = next_index
914
- unplaced = False
915
- while (dest_shard_range and
916
- obj['name'] > dest_shard_range.upper):
917
- if next_index != last_index:
918
- # yield the objects in current dest_shard_range
919
- yield (objs[last_index:next_index],
920
- dest_shard_range,
921
- info)
922
- last_index = next_index
923
- dest_shard_range = next_or_none(dest_shard_range_iter)
924
- next_index += 1
925
-
926
- if next_index != last_index:
927
- # yield tail of current batch of objects
928
- # NB there may be more objects for the current
929
- # dest_shard_range in the next batch from yield_objects
930
- yield (objs[last_index:next_index],
931
- None if unplaced else dest_shard_range,
932
- info)
1652
+ dest_shard_range_iter = iter(dest_shard_ranges())
1653
+ src_shard_range_marker = src_shard_range.lower
1654
+ for dest_shard_range in dest_shard_range_iter:
1655
+ if dest_shard_range.upper <= src_shard_range.lower:
1656
+ continue
1657
+
1658
+ if dest_shard_range.lower > src_shard_range_marker:
1659
+ # no destination for a sub-namespace of the source namespace
1660
+ sub_src_range = src_shard_range.copy(
1661
+ lower=src_shard_range_marker, upper=dest_shard_range.lower)
1662
+ for objs, info in self.yield_objects(broker, sub_src_range):
1663
+ yield objs, None, info
1664
+
1665
+ sub_src_range = src_shard_range.copy(
1666
+ lower=max(dest_shard_range.lower, src_shard_range.lower),
1667
+ upper=min(dest_shard_range.upper, src_shard_range.upper))
1668
+ for objs, info in self.yield_objects(broker, sub_src_range):
1669
+ yield objs, dest_shard_range, info
1670
+
1671
+ src_shard_range_marker = dest_shard_range.upper
1672
+ if dest_shard_range.upper >= src_shard_range.upper:
1673
+ # the entire source namespace has been traversed
1674
+ break
1675
+ else:
1676
+ # dest_shard_ranges_iter was exhausted before reaching the end of
1677
+ # the source namespace
1678
+ sub_src_range = src_shard_range.copy(lower=src_shard_range_marker)
1679
+ for objs, info in self.yield_objects(broker, sub_src_range):
1680
+ yield objs, None, info
933
1681
 
934
1682
  def _post_replicate_hook(self, broker, info, responses):
935
1683
  # override superclass behaviour
@@ -939,11 +1687,15 @@ class ContainerSharder(ContainerReplicator):
939
1687
  dest_broker, node_id, info):
940
1688
  success, responses = self._replicate_object(
941
1689
  part, dest_broker.db_file, node_id)
1690
+ replication_successes = responses.count(True)
942
1691
  quorum = quorum_size(self.ring.replica_count)
943
- if not success and responses.count(True) < quorum:
944
- self.logger.warning(
945
- 'Failed to sufficiently replicate misplaced objects: %s in %s '
946
- '(not removing)', dest_shard_range, quote(broker.path))
1692
+ if not success and replication_successes < quorum:
1693
+ self.warning(
1694
+ broker, 'Failed to sufficiently replicate misplaced objects '
1695
+ 'shard %s in state %s: %s successes, %s required '
1696
+ '(not removing objects), shard db: %s',
1697
+ dest_shard_range.name, dest_shard_range.state_text,
1698
+ replication_successes, quorum, dest_broker.db_file)
947
1699
  return False
948
1700
 
949
1701
  if broker.get_info()['id'] != info['id']:
@@ -961,9 +1713,9 @@ class ContainerSharder(ContainerReplicator):
961
1713
  success = True
962
1714
 
963
1715
  if not success:
964
- self.logger.warning(
965
- 'Refused to remove misplaced objects: %s in %s',
966
- dest_shard_range, quote(broker.path))
1716
+ self.warning(broker, 'Refused to remove misplaced objects for '
1717
+ 'dest %s in state %s',
1718
+ dest_shard_range.name, dest_shard_range.state_text)
967
1719
  return success
968
1720
 
969
1721
  def _move_objects(self, src_broker, src_shard_range, policy_index,
@@ -981,16 +1733,19 @@ class ContainerSharder(ContainerReplicator):
981
1733
  continue
982
1734
 
983
1735
  if dest_shard_range.name == src_broker.path:
984
- self.logger.debug(
985
- 'Skipping source as misplaced objects destination')
1736
+ self.debug(src_broker,
1737
+ 'Skipping source as misplaced objects destination')
986
1738
  # in shrinking context, the misplaced objects might actually be
987
1739
  # correctly placed if the root has expanded this shard but this
988
1740
  # broker has not yet been updated
989
1741
  continue
990
1742
 
991
1743
  if dest_shard_range not in dest_brokers:
992
- part, dest_broker, node_id, _junk = self._get_shard_broker(
993
- dest_shard_range, src_broker.root_path, policy_index)
1744
+ part, dest_broker, node_id, put_timestamp = \
1745
+ self._get_shard_broker(
1746
+ dest_shard_range, src_broker.root_path, policy_index)
1747
+ stat = 'db_exists' if put_timestamp is None else 'db_created'
1748
+ self._increment_stat('misplaced', stat, statsd=True)
994
1749
  # save the broker info that was sampled prior to the *first*
995
1750
  # yielded objects for this destination
996
1751
  destination = {'part': part,
@@ -1004,20 +1759,20 @@ class ContainerSharder(ContainerReplicator):
1004
1759
  placed += len(objs)
1005
1760
 
1006
1761
  if unplaced:
1007
- self.logger.warning(
1008
- 'Failed to find destination for at least %s misplaced objects '
1009
- 'in %s', unplaced, quote(src_broker.path))
1762
+ self.warning(src_broker, 'Failed to find destination for at least '
1763
+ '%s misplaced objects', unplaced)
1010
1764
 
1011
1765
  # TODO: consider executing the replication jobs concurrently
1012
1766
  for dest_shard_range, dest_args in dest_brokers.items():
1013
- self.logger.debug('moving misplaced objects found in range %s' %
1014
- dest_shard_range)
1767
+ self.debug(src_broker,
1768
+ 'moving misplaced objects found in range %s',
1769
+ dest_shard_range)
1015
1770
  success &= self._replicate_and_delete(
1016
1771
  src_broker, dest_shard_range, **dest_args)
1017
1772
 
1018
- self._increment_stat('misplaced', 'placed', step=placed)
1019
- self._increment_stat('misplaced', 'unplaced', step=unplaced)
1020
- return success, placed + unplaced
1773
+ self._update_stat('misplaced', 'placed', step=placed, statsd=True)
1774
+ self._update_stat('misplaced', 'unplaced', step=unplaced, statsd=True)
1775
+ return success, placed, unplaced
1021
1776
 
1022
1777
  def _make_shard_range_fetcher(self, broker, src_shard_range):
1023
1778
  # returns a function that will lazy load shard ranges on demand;
@@ -1058,12 +1813,12 @@ class ContainerSharder(ContainerReplicator):
1058
1813
 
1059
1814
  def _make_misplaced_object_bounds(self, broker):
1060
1815
  bounds = []
1061
- state = broker.get_db_state()
1062
- if state == SHARDED:
1816
+ db_state = broker.get_db_state()
1817
+ if db_state == SHARDED:
1063
1818
  # Anything in the object table is treated as a misplaced object.
1064
1819
  bounds.append(('', ''))
1065
1820
 
1066
- if not bounds and state == SHARDING:
1821
+ if not bounds and db_state == SHARDING:
1067
1822
  # Objects outside of this container's own range are misplaced.
1068
1823
  # Objects in already cleaved shard ranges are also misplaced.
1069
1824
  cleave_context = CleavingContext.load(broker)
@@ -1091,8 +1846,7 @@ class ContainerSharder(ContainerReplicator):
1091
1846
  :return: True if all misplaced objects were sufficiently replicated to
1092
1847
  their correct shard containers, False otherwise
1093
1848
  """
1094
- self.logger.debug('Looking for misplaced objects in %s (%s)',
1095
- quote(broker.path), broker.db_file)
1849
+ self.debug(broker, 'Looking for misplaced objects')
1096
1850
  self._increment_stat('misplaced', 'attempted')
1097
1851
  src_broker = src_broker or broker
1098
1852
  if src_bounds is None:
@@ -1100,22 +1854,27 @@ class ContainerSharder(ContainerReplicator):
1100
1854
  # (ab)use ShardRange instances to encapsulate source namespaces
1101
1855
  src_ranges = [ShardRange('dont/care', Timestamp.now(), lower, upper)
1102
1856
  for lower, upper in src_bounds]
1103
- self.logger.debug('misplaced object source bounds %s' % src_bounds)
1857
+ self.debug(broker, 'misplaced object source bounds %s', src_bounds)
1104
1858
  policy_index = broker.storage_policy_index
1105
1859
  success = True
1106
- num_found = 0
1860
+ num_placed = num_unplaced = 0
1107
1861
  for src_shard_range in src_ranges:
1108
- part_success, part_num_found = self._move_objects(
1862
+ part_success, part_placed, part_unplaced = self._move_objects(
1109
1863
  src_broker, src_shard_range, policy_index,
1110
1864
  self._make_shard_range_fetcher(broker, src_shard_range))
1111
1865
  success &= part_success
1112
- num_found += part_num_found
1866
+ num_placed += part_placed
1867
+ num_unplaced += part_unplaced
1113
1868
 
1114
- if num_found:
1869
+ if num_placed or num_unplaced:
1870
+ # the found stat records the number of DBs in which any misplaced
1871
+ # rows were found, not the total number of misplaced rows
1115
1872
  self._increment_stat('misplaced', 'found', statsd=True)
1116
- self.logger.debug('Moved %s misplaced objects' % num_found)
1117
- self._increment_stat('misplaced', 'success' if success else 'failure')
1118
- self.logger.debug('Finished handling misplaced objects')
1873
+ self.debug(broker, 'Placed %s misplaced objects (%s unplaced)',
1874
+ num_placed, num_unplaced)
1875
+ self._increment_stat('misplaced', 'success' if success else 'failure',
1876
+ statsd=True)
1877
+ self.debug(broker, 'Finished handling misplaced objects')
1119
1878
  return success
1120
1879
 
1121
1880
  def _find_shard_ranges(self, broker):
@@ -1131,27 +1890,26 @@ class ContainerSharder(ContainerReplicator):
1131
1890
  own_shard_range = broker.get_own_shard_range()
1132
1891
  shard_ranges = broker.get_shard_ranges()
1133
1892
  if shard_ranges and shard_ranges[-1].upper >= own_shard_range.upper:
1134
- self.logger.debug('Scan for shard ranges already completed for %s',
1135
- quote(broker.path))
1893
+ self.debug(broker, 'Scan for shard ranges already completed')
1136
1894
  return 0
1137
1895
 
1138
- self.logger.info('Starting scan for shard ranges on %s',
1139
- quote(broker.path))
1896
+ self.info(broker, 'Starting scan for shard ranges')
1140
1897
  self._increment_stat('scanned', 'attempted')
1141
1898
 
1142
1899
  start = time.time()
1143
1900
  shard_data, last_found = broker.find_shard_ranges(
1144
- self.split_size, limit=self.scanner_batch_size,
1145
- existing_ranges=shard_ranges)
1901
+ self.rows_per_shard, limit=self.shard_scanner_batch_size,
1902
+ existing_ranges=shard_ranges,
1903
+ minimum_shard_size=self.minimum_shard_size)
1146
1904
  elapsed = time.time() - start
1147
1905
 
1148
1906
  if not shard_data:
1149
1907
  if last_found:
1150
- self.logger.info("Already found all shard ranges")
1908
+ self.info(broker, "Already found all shard ranges")
1151
1909
  self._increment_stat('scanned', 'success', statsd=True)
1152
1910
  else:
1153
1911
  # we didn't find anything
1154
- self.logger.warning("No shard ranges found")
1912
+ self.warning(broker, "No shard ranges found")
1155
1913
  self._increment_stat('scanned', 'failure', statsd=True)
1156
1914
  return 0
1157
1915
 
@@ -1159,14 +1917,14 @@ class ContainerSharder(ContainerReplicator):
1159
1917
  broker, shard_data, self.shards_account_prefix)
1160
1918
  broker.merge_shard_ranges(shard_ranges)
1161
1919
  num_found = len(shard_ranges)
1162
- self.logger.info(
1163
- "Completed scan for shard ranges: %d found", num_found)
1164
- self._increment_stat('scanned', 'found', step=num_found)
1920
+ self.info(broker, "Completed scan for shard ranges: %d found",
1921
+ num_found)
1922
+ self._update_stat('scanned', 'found', step=num_found)
1165
1923
  self._min_stat('scanned', 'min_time', round(elapsed / num_found, 3))
1166
1924
  self._max_stat('scanned', 'max_time', round(elapsed / num_found, 3))
1167
1925
 
1168
1926
  if last_found:
1169
- self.logger.info("Final shard range reached.")
1927
+ self.info(broker, "Final shard range reached.")
1170
1928
  self._increment_stat('scanned', 'success', statsd=True)
1171
1929
  return num_found
1172
1930
 
@@ -1174,7 +1932,7 @@ class ContainerSharder(ContainerReplicator):
1174
1932
  # Create shard containers that are ready to receive redirected object
1175
1933
  # updates. Do this now, so that redirection can begin immediately
1176
1934
  # without waiting for cleaving to complete.
1177
- found_ranges = broker.get_shard_ranges(states=ShardRange.FOUND)
1935
+ found_ranges = broker.get_shard_ranges(states=[ShardRange.FOUND])
1178
1936
  created_ranges = []
1179
1937
  for shard_range in found_ranges:
1180
1938
  self._increment_stat('created', 'attempted')
@@ -1193,16 +1951,15 @@ class ContainerSharder(ContainerReplicator):
1193
1951
  # may think they are in fact roots, but it cleans up well enough
1194
1952
  # once everyone's upgraded.
1195
1953
  success = self._send_shard_ranges(
1196
- shard_range.account, shard_range.container,
1954
+ broker, shard_range.account, shard_range.container,
1197
1955
  [shard_range], headers=headers)
1198
1956
  if success:
1199
- self.logger.debug('PUT new shard range container for %s',
1200
- shard_range)
1957
+ self.debug(broker, 'PUT new shard range container for %s',
1958
+ shard_range)
1201
1959
  self._increment_stat('created', 'success', statsd=True)
1202
1960
  else:
1203
- self.logger.error(
1204
- 'PUT of new shard container %r failed for %s.',
1205
- shard_range, quote(broker.path))
1961
+ self.error(broker, 'PUT of new shard container %r failed',
1962
+ shard_range)
1206
1963
  self._increment_stat('created', 'failure', statsd=True)
1207
1964
  # break, not continue, because elsewhere it is assumed that
1208
1965
  # finding and cleaving shard ranges progresses linearly, so we
@@ -1214,32 +1971,17 @@ class ContainerSharder(ContainerReplicator):
1214
1971
  if created_ranges:
1215
1972
  broker.merge_shard_ranges(created_ranges)
1216
1973
  if not broker.is_root_container():
1217
- self._send_shard_ranges(
1218
- broker.root_account, broker.root_container, created_ranges)
1219
- self.logger.info(
1220
- "Completed creating shard range containers: %d created.",
1221
- len(created_ranges))
1974
+ self._send_shard_ranges(broker, broker.root_account,
1975
+ broker.root_container, created_ranges)
1976
+ self.info(broker, "Completed creating %d shard range containers",
1977
+ len(created_ranges))
1222
1978
  return len(created_ranges)
1223
1979
 
1224
- def _cleave_shard_range(self, broker, cleaving_context, shard_range):
1225
- self.logger.info("Cleaving '%s' from row %s into %s for %r",
1226
- quote(broker.path),
1227
- cleaving_context.last_cleave_to_row,
1228
- quote(shard_range.name), shard_range)
1229
- self._increment_stat('cleaved', 'attempted')
1980
+ def _cleave_shard_broker(self, broker, cleaving_context, shard_range,
1981
+ own_shard_range, shard_broker, put_timestamp,
1982
+ shard_part, node_id):
1983
+ result = CLEAVE_SUCCESS
1230
1984
  start = time.time()
1231
- policy_index = broker.storage_policy_index
1232
- try:
1233
- shard_part, shard_broker, node_id, put_timestamp = \
1234
- self._get_shard_broker(shard_range, broker.root_path,
1235
- policy_index)
1236
- except DeviceUnavailable as duex:
1237
- self.logger.warning(str(duex))
1238
- self._increment_stat('cleaved', 'failure', statsd=True)
1239
- return CLEAVE_FAILED
1240
-
1241
- own_shard_range = broker.get_own_shard_range()
1242
-
1243
1985
  # only cleave from the retiring db - misplaced objects handler will
1244
1986
  # deal with any objects in the fresh db
1245
1987
  source_broker = broker.get_brokers()[0]
@@ -1258,21 +2000,15 @@ class ContainerSharder(ContainerReplicator):
1258
2000
  since_row=sync_from_row):
1259
2001
  shard_broker.merge_items(objects)
1260
2002
  if objects is None:
1261
- self.logger.info("Cleaving '%s': %r - zero objects found",
1262
- quote(broker.path), shard_range)
2003
+ self.info(broker, "Cleaving %r - zero objects found",
2004
+ shard_range)
1263
2005
  if shard_broker.get_info()['put_timestamp'] == put_timestamp:
1264
2006
  # This was just created; don't need to replicate this
1265
2007
  # SR because there was nothing there. So cleanup and
1266
2008
  # remove the shard_broker from its hand off location.
1267
- self.delete_db(shard_broker)
1268
- cleaving_context.range_done(shard_range.upper_str)
1269
- if shard_range.upper >= own_shard_range.upper:
1270
- # cleaving complete
1271
- cleaving_context.cleaving_done = True
1272
- cleaving_context.store(broker)
1273
2009
  # Because nothing was here we wont count it in the shard
1274
2010
  # batch count.
1275
- return CLEAVE_EMPTY
2011
+ result = CLEAVE_EMPTY
1276
2012
  # Else, it wasn't newly created by us, and
1277
2013
  # we don't know what's in it or why. Let it get
1278
2014
  # replicated and counted in the batch count.
@@ -1288,20 +2024,25 @@ class ContainerSharder(ContainerReplicator):
1288
2024
  [{'sync_point': source_max_row, 'remote_id': source_db_id}] +
1289
2025
  source_broker.get_syncs())
1290
2026
  else:
1291
- self.logger.debug("Cleaving '%s': %r - shard db already in sync",
1292
- quote(broker.path), shard_range)
2027
+ self.debug(broker, "Cleaving %r - shard db already in sync",
2028
+ shard_range)
1293
2029
 
1294
2030
  replication_quorum = self.existing_shard_replication_quorum
1295
- if shard_range.includes(own_shard_range):
1296
- # When shrinking, include deleted own (donor) shard range in
1297
- # the replicated db so that when acceptor next updates root it
1298
- # will atomically update its namespace *and* delete the donor.
1299
- # Don't do this when sharding a shard because the donor
1300
- # namespace should not be deleted until all shards are cleaved.
1301
- if own_shard_range.update_state(ShardRange.SHRUNK):
1302
- own_shard_range.set_deleted()
1303
- broker.merge_shard_ranges(own_shard_range)
1304
- shard_broker.merge_shard_ranges(own_shard_range)
2031
+ if own_shard_range.state in ShardRange.SHRINKING_STATES:
2032
+ if shard_range.includes(own_shard_range):
2033
+ # When shrinking to a single acceptor that completely encloses
2034
+ # this shard's namespace, include deleted own (donor) shard
2035
+ # range in the replicated db so that when acceptor next updates
2036
+ # root it will atomically update its namespace *and* delete the
2037
+ # donor. This reduces the chance of a temporary listing gap if
2038
+ # this shard fails to update the root with its SHRUNK/deleted
2039
+ # state. Don't do this when sharding a shard or shrinking to
2040
+ # multiple acceptors because in those cases the donor namespace
2041
+ # should not be deleted until *all* shards are cleaved.
2042
+ if own_shard_range.update_state(ShardRange.SHRUNK):
2043
+ own_shard_range.set_deleted()
2044
+ broker.merge_shard_ranges(own_shard_range)
2045
+ shard_broker.merge_shard_ranges(own_shard_range)
1305
2046
  elif shard_range.state == ShardRange.CREATED:
1306
2047
  # The shard range object stats may have changed since the shard
1307
2048
  # range was found, so update with stats of objects actually
@@ -1310,51 +2051,74 @@ class ContainerSharder(ContainerReplicator):
1310
2051
  info = shard_broker.get_info()
1311
2052
  shard_range.update_meta(
1312
2053
  info['object_count'], info['bytes_used'])
2054
+ # Update state to CLEAVED; only do this when sharding, not when
2055
+ # shrinking
1313
2056
  shard_range.update_state(ShardRange.CLEAVED)
1314
2057
  shard_broker.merge_shard_ranges(shard_range)
1315
2058
  replication_quorum = self.shard_replication_quorum
1316
2059
 
1317
- self.logger.info(
1318
- 'Replicating new shard container %s for %s',
1319
- quote(shard_broker.path), shard_broker.get_own_shard_range())
1320
-
1321
- success, responses = self._replicate_object(
1322
- shard_part, shard_broker.db_file, node_id)
2060
+ if result == CLEAVE_EMPTY:
2061
+ self.delete_db(shard_broker)
2062
+ else: # result == CLEAVE_SUCCESS:
2063
+ self.info(broker, 'Replicating new shard container %s for %s',
2064
+ quote(shard_broker.path), own_shard_range)
2065
+
2066
+ success, responses = self._replicate_object(
2067
+ shard_part, shard_broker.db_file, node_id)
2068
+
2069
+ replication_successes = responses.count(True)
2070
+ if (not success and (not responses or
2071
+ replication_successes < replication_quorum)):
2072
+ # insufficient replication or replication not even attempted;
2073
+ # break because we don't want to progress the cleave cursor
2074
+ # until each shard range has been successfully cleaved
2075
+ self.warning(
2076
+ broker, 'Failed to sufficiently replicate cleaved shard '
2077
+ '%s in state %s: %s successes, %s required, '
2078
+ 'shard db: %s',
2079
+ shard_broker.path, shard_range.state_text,
2080
+ replication_successes, replication_quorum,
2081
+ shard_broker.db_file)
2082
+ self._increment_stat('cleaved', 'failure', statsd=True)
2083
+ result = CLEAVE_FAILED
2084
+ else:
2085
+ elapsed = round(time.time() - start, 3)
2086
+ self._min_stat('cleaved', 'min_time', elapsed)
2087
+ self._max_stat('cleaved', 'max_time', elapsed)
2088
+ self.info(broker, 'Cleaved %s in %gs', shard_range,
2089
+ elapsed)
2090
+ self._increment_stat('cleaved', 'success', statsd=True)
2091
+
2092
+ if result in (CLEAVE_SUCCESS, CLEAVE_EMPTY):
2093
+ broker.merge_shard_ranges(shard_range)
2094
+ cleaving_context.range_done(shard_range.upper_str)
2095
+ if shard_range.upper >= own_shard_range.upper:
2096
+ # cleaving complete
2097
+ cleaving_context.cleaving_done = True
2098
+ cleaving_context.store(broker)
2099
+ return result
1323
2100
 
1324
- replication_successes = responses.count(True)
1325
- if (not success and (not responses or
1326
- replication_successes < replication_quorum)):
1327
- # insufficient replication or replication not even attempted;
1328
- # break because we don't want to progress the cleave cursor
1329
- # until each shard range has been successfully cleaved
1330
- self.logger.warning(
1331
- 'Failed to sufficiently replicate cleaved shard %s for %s: '
1332
- '%s successes, %s required.', shard_range, quote(broker.path),
1333
- replication_successes, replication_quorum)
1334
- self._increment_stat('cleaved', 'failure', statsd=True)
1335
- return CLEAVE_FAILED
1336
-
1337
- elapsed = round(time.time() - start, 3)
1338
- self._min_stat('cleaved', 'min_time', elapsed)
1339
- self._max_stat('cleaved', 'max_time', elapsed)
1340
- broker.merge_shard_ranges(shard_range)
1341
- cleaving_context.range_done(shard_range.upper_str)
1342
- if shard_range.upper >= own_shard_range.upper:
1343
- # cleaving complete
1344
- cleaving_context.cleaving_done = True
1345
- cleaving_context.store(broker)
1346
- self.logger.info(
1347
- 'Cleaved %s for shard range %s in %gs.',
1348
- quote(broker.path), shard_range, elapsed)
1349
- self._increment_stat('cleaved', 'success', statsd=True)
1350
- return CLEAVE_SUCCESS
2101
+ def _cleave_shard_range(self, broker, cleaving_context, shard_range,
2102
+ own_shard_range):
2103
+ self.info(broker, "Cleaving from row %s into %s for %r",
2104
+ cleaving_context.last_cleave_to_row,
2105
+ quote(shard_range.name), shard_range)
2106
+ self._increment_stat('cleaved', 'attempted')
2107
+ policy_index = broker.storage_policy_index
2108
+ shard_part, shard_broker, node_id, put_timestamp = \
2109
+ self._get_shard_broker(shard_range, broker.root_path,
2110
+ policy_index)
2111
+ stat = 'db_exists' if put_timestamp is None else 'db_created'
2112
+ self._increment_stat('cleaved', stat, statsd=True)
2113
+ return self._cleave_shard_broker(
2114
+ broker, cleaving_context, shard_range, own_shard_range,
2115
+ shard_broker, put_timestamp, shard_part, node_id)
1351
2116
 
1352
2117
  def _cleave(self, broker):
1353
2118
  # Returns True if misplaced objects have been moved and the entire
1354
2119
  # container namespace has been successfully cleaved, False otherwise
1355
2120
  if broker.is_sharded():
1356
- self.logger.debug('Passing over already sharded container %s',
1357
- quote(broker.path))
2121
+ self.debug(broker, 'Passing over already sharded container')
1358
2122
  return True
1359
2123
 
1360
2124
  cleaving_context = CleavingContext.load(broker)
@@ -1362,9 +2126,8 @@ class ContainerSharder(ContainerReplicator):
1362
2126
  # ensure any misplaced objects in the source broker are moved; note
1363
2127
  # that this invocation of _move_misplaced_objects is targetted at
1364
2128
  # the *retiring* db.
1365
- self.logger.debug(
1366
- 'Moving any misplaced objects from sharding container: %s',
1367
- quote(broker.path))
2129
+ self.debug(broker,
2130
+ 'Moving any misplaced objects from sharding container')
1368
2131
  bounds = self._make_default_misplaced_object_bounds(broker)
1369
2132
  cleaving_context.misplaced_done = self._move_misplaced_objects(
1370
2133
  broker, src_broker=broker.get_brokers()[0],
@@ -1372,60 +2135,78 @@ class ContainerSharder(ContainerReplicator):
1372
2135
  cleaving_context.store(broker)
1373
2136
 
1374
2137
  if cleaving_context.cleaving_done:
1375
- self.logger.debug('Cleaving already complete for container %s',
1376
- quote(broker.path))
2138
+ self.debug(broker, 'Cleaving already complete for container')
1377
2139
  return cleaving_context.misplaced_done
1378
2140
 
1379
- ranges_todo = broker.get_shard_ranges(marker=cleaving_context.marker)
2141
+ shard_ranges = broker.get_shard_ranges(marker=cleaving_context.marker)
2142
+ # Ignore shrinking shard ranges: we never want to cleave objects to a
2143
+ # shrinking shard. Shrinking shard ranges are to be expected in a root;
2144
+ # shrinking shard ranges (other than own shard range) are not normally
2145
+ # expected in a shard but can occur if there is an overlapping shard
2146
+ # range that has been discovered from the root.
2147
+ ranges_todo = [sr for sr in shard_ranges
2148
+ if sr.state != ShardRange.SHRINKING]
1380
2149
  if cleaving_context.cursor:
1381
- # always update ranges_todo in case more ranges have been found
1382
- # since last visit
2150
+ # always update ranges_todo in case shard ranges have changed since
2151
+ # last visit
1383
2152
  cleaving_context.ranges_todo = len(ranges_todo)
1384
- self.logger.debug('Continuing to cleave (%s done, %s todo): %s',
1385
- cleaving_context.ranges_done,
1386
- cleaving_context.ranges_todo,
1387
- quote(broker.path))
2153
+ self.debug(broker, 'Continuing to cleave (%s done, %s todo)',
2154
+ cleaving_context.ranges_done,
2155
+ cleaving_context.ranges_todo)
1388
2156
  else:
1389
2157
  cleaving_context.start()
2158
+ own_shard_range = broker.get_own_shard_range()
2159
+ cleaving_context.cursor = own_shard_range.lower_str
1390
2160
  cleaving_context.ranges_todo = len(ranges_todo)
1391
- self.logger.debug('Starting to cleave (%s todo): %s',
1392
- cleaving_context.ranges_todo, quote(broker.path))
2161
+ self.info(broker, 'Starting to cleave (%s todo)',
2162
+ cleaving_context.ranges_todo)
2163
+
2164
+ own_shard_range = broker.get_own_shard_range(no_default=True)
2165
+ if own_shard_range is None:
2166
+ # A default should never be SHRINKING or SHRUNK but because we
2167
+ # may write own_shard_range back to broker, let's make sure
2168
+ # it can't be defaulted.
2169
+ self.warning(broker, 'Failed to get own_shard_range')
2170
+ ranges_todo = [] # skip cleaving
1393
2171
 
1394
2172
  ranges_done = []
1395
2173
  for shard_range in ranges_todo:
1396
- if shard_range.state == ShardRange.SHRINKING:
1397
- # Ignore shrinking shard ranges: we never want to cleave
1398
- # objects to a shrinking shard. Shrinking shard ranges are to
1399
- # be expected in a root; shrinking shard ranges (other than own
1400
- # shard range) are not normally expected in a shard but can
1401
- # occur if there is an overlapping shard range that has been
1402
- # discovered from the root.
1403
- cleaving_context.range_done(None) # don't move the cursor
1404
- continue
1405
- elif shard_range.state in (ShardRange.CREATED,
1406
- ShardRange.CLEAVED,
1407
- ShardRange.ACTIVE):
1408
- cleave_result = self._cleave_shard_range(
1409
- broker, cleaving_context, shard_range)
1410
- if cleave_result == CLEAVE_SUCCESS:
1411
- ranges_done.append(shard_range)
1412
- if len(ranges_done) == self.cleave_batch_size:
1413
- break
1414
- elif cleave_result == CLEAVE_FAILED:
1415
- break
1416
- # else, no errors, but no rows found either. keep going,
1417
- # and don't count it against our batch size
1418
- else:
1419
- self.logger.info('Stopped cleave at unready %s', shard_range)
2174
+ if cleaving_context.cleaving_done:
2175
+ # note: there may still be ranges_todo, for example: if this
2176
+ # shard is shrinking and has merged a root shard range in
2177
+ # sharded state along with an active acceptor shard range, but
2178
+ # the root range is irrelevant
1420
2179
  break
1421
2180
 
1422
- if not ranges_done:
1423
- # _cleave_shard_range always store()s the context on success; make
1424
- # sure we *also* do that if we hit a failure right off the bat
1425
- cleaving_context.store(broker)
1426
- self.logger.debug(
1427
- 'Cleaved %s shard ranges for %s',
1428
- len(ranges_done), quote(broker.path))
2181
+ if len(ranges_done) == self.cleave_batch_size:
2182
+ break
2183
+
2184
+ if shard_range.lower > cleaving_context.cursor:
2185
+ self.info(broker, 'Stopped cleave at gap: %r - %r' %
2186
+ (cleaving_context.cursor, shard_range.lower))
2187
+ break
2188
+
2189
+ if shard_range.state not in (ShardRange.CREATED,
2190
+ ShardRange.CLEAVED,
2191
+ ShardRange.ACTIVE):
2192
+ self.info(broker, 'Stopped cleave at unready %s', shard_range)
2193
+ break
2194
+
2195
+ cleave_result = self._cleave_shard_range(
2196
+ broker, cleaving_context, shard_range, own_shard_range)
2197
+
2198
+ if cleave_result == CLEAVE_SUCCESS:
2199
+ ranges_done.append(shard_range)
2200
+ elif cleave_result == CLEAVE_FAILED:
2201
+ break
2202
+ # else: CLEAVE_EMPTY: no errors, but no rows found either. keep
2203
+ # going, and don't count it against our batch size
2204
+
2205
+ # _cleave_shard_range always store()s the context on success; *also* do
2206
+ # that here in case we hit a failure right off the bat or ended loop
2207
+ # with skipped ranges
2208
+ cleaving_context.store(broker)
2209
+ self.debug(broker, 'Cleaved %s shard ranges', len(ranges_done))
1429
2210
  return (cleaving_context.misplaced_done and
1430
2211
  cleaving_context.cleaving_done)
1431
2212
 
@@ -1435,18 +2216,23 @@ class ContainerSharder(ContainerReplicator):
1435
2216
  # Move all CLEAVED shards to ACTIVE state and if a shard then
1436
2217
  # delete own shard range; these changes will be simultaneously
1437
2218
  # reported in the next update to the root container.
1438
- modified_shard_ranges = broker.get_shard_ranges(
1439
- states=ShardRange.CLEAVED)
1440
- for sr in modified_shard_ranges:
1441
- sr.update_state(ShardRange.ACTIVE)
1442
- own_shard_range = broker.get_own_shard_range()
1443
- if own_shard_range.state in (ShardRange.SHRINKING,
1444
- ShardRange.SHRUNK):
1445
- next_state = ShardRange.SHRUNK
1446
- else:
1447
- next_state = ShardRange.SHARDED
1448
- own_shard_range.update_state(next_state)
2219
+ own_shard_range = broker.get_own_shard_range(no_default=True)
2220
+ if own_shard_range is None:
2221
+ # This is more of a belts and braces, not sure we could even
2222
+ # get this far with without an own_shard_range. But because
2223
+ # we will be writing own_shard_range back, we need to make sure
2224
+ self.warning(broker, 'Failed to get own_shard_range')
2225
+ return False
1449
2226
  own_shard_range.update_meta(0, 0)
2227
+ if own_shard_range.state in ShardRange.SHRINKING_STATES:
2228
+ own_shard_range.update_state(ShardRange.SHRUNK)
2229
+ modified_shard_ranges = []
2230
+ else:
2231
+ own_shard_range.update_state(ShardRange.SHARDED)
2232
+ modified_shard_ranges = broker.get_shard_ranges(
2233
+ states=[ShardRange.CLEAVED])
2234
+ for sr in modified_shard_ranges:
2235
+ sr.update_state(ShardRange.ACTIVE)
1450
2236
  if (not broker.is_root_container() and not
1451
2237
  own_shard_range.deleted):
1452
2238
  own_shard_range = own_shard_range.copy(
@@ -1454,16 +2240,12 @@ class ContainerSharder(ContainerReplicator):
1454
2240
  modified_shard_ranges.append(own_shard_range)
1455
2241
  broker.merge_shard_ranges(modified_shard_ranges)
1456
2242
  if broker.set_sharded_state():
1457
- cleaving_context.delete(broker)
1458
2243
  return True
1459
2244
  else:
1460
- self.logger.warning(
1461
- 'Failed to remove retiring db file for %s',
1462
- quote(broker.path))
2245
+ self.warning(broker, 'Failed to remove retiring db file')
1463
2246
  else:
1464
- self.logger.warning(
1465
- 'Repeat cleaving required for %r with context: %s',
1466
- broker.db_files[0], dict(cleaving_context))
2247
+ self.warning(broker, 'Repeat cleaving required, context: %s',
2248
+ dict(cleaving_context))
1467
2249
  cleaving_context.reset()
1468
2250
  cleaving_context.store(broker)
1469
2251
 
@@ -1473,106 +2255,138 @@ class ContainerSharder(ContainerReplicator):
1473
2255
  candidates = find_sharding_candidates(
1474
2256
  broker, self.shard_container_threshold, shard_ranges)
1475
2257
  if candidates:
1476
- self.logger.debug('Identified %s sharding candidates',
1477
- len(candidates))
2258
+ self.debug(broker, 'Identified %s sharding candidates',
2259
+ len(candidates))
1478
2260
  broker.merge_shard_ranges(candidates)
1479
2261
 
1480
2262
  def _find_and_enable_shrinking_candidates(self, broker):
1481
2263
  if not broker.is_sharded():
1482
- self.logger.warning('Cannot shrink a not yet sharded container %s',
1483
- quote(broker.path))
2264
+ self.warning(broker, 'Cannot shrink a not yet sharded container')
1484
2265
  return
1485
2266
 
1486
- merge_pairs = find_shrinking_candidates(
1487
- broker, self.shrink_size, self.merge_size)
1488
- self.logger.debug('Found %s shrinking candidates' % len(merge_pairs))
2267
+ compactible_sequences = find_compactible_shard_sequences(
2268
+ broker, self.shrink_threshold, self.expansion_limit,
2269
+ self.max_shrinking, self.max_expanding, include_shrinking=True)
2270
+ self.debug(broker, 'Found %s compactible sequences of length(s) %s' %
2271
+ (len(compactible_sequences),
2272
+ [len(s) for s in compactible_sequences]))
2273
+ process_compactible_shard_sequences(broker, compactible_sequences)
1489
2274
  own_shard_range = broker.get_own_shard_range()
1490
- for acceptor, donor in merge_pairs.items():
1491
- self.logger.debug('shrinking shard range %s into %s in %s' %
1492
- (donor, acceptor, broker.db_file))
1493
- broker.merge_shard_ranges([acceptor, donor])
2275
+ for sequence in compactible_sequences:
2276
+ acceptor = sequence[-1]
2277
+ donors = ShardRangeList(sequence[:-1])
2278
+ self.debug(broker,
2279
+ 'shrinking %d objects from %d shard ranges into %s' %
2280
+ (donors.object_count, len(donors), acceptor))
1494
2281
  if acceptor.name != own_shard_range.name:
1495
- self._send_shard_ranges(
1496
- acceptor.account, acceptor.container, [acceptor])
1497
- acceptor.increment_meta(donor.object_count, donor.bytes_used)
1498
- else:
1499
- # no need to change namespace or stats
1500
- acceptor.update_state(ShardRange.ACTIVE,
1501
- state_timestamp=Timestamp.now())
2282
+ self._send_shard_ranges(broker, acceptor.account,
2283
+ acceptor.container, [acceptor])
2284
+ acceptor.increment_meta(donors.object_count, donors.bytes_used)
1502
2285
  # Now send a copy of the expanded acceptor, with an updated
1503
- # timestamp, to the donor container. This forces the donor to
2286
+ # timestamp, to each donor container. This forces each donor to
1504
2287
  # asynchronously cleave its entire contents to the acceptor and
1505
2288
  # delete itself. The donor will pass its own deleted shard range to
1506
2289
  # the acceptor when cleaving. Subsequent updates from the donor or
1507
2290
  # the acceptor will then update the root to have the deleted donor
1508
2291
  # shard range.
1509
- self._send_shard_ranges(
1510
- donor.account, donor.container, [donor, acceptor])
2292
+ for donor in donors:
2293
+ self._send_shard_ranges(broker, donor.account,
2294
+ donor.container, [donor, acceptor])
1511
2295
 
1512
2296
  def _update_root_container(self, broker):
1513
2297
  own_shard_range = broker.get_own_shard_range(no_default=True)
1514
- if not own_shard_range or own_shard_range.reported:
2298
+ if not own_shard_range:
2299
+ return
2300
+
2301
+ # Don't update the osr stats including tombstones unless its CLEAVED+
2302
+ if own_shard_range.state in SHARD_UPDATE_STAT_STATES:
2303
+ # do a reclaim *now* in order to get best estimate of tombstone
2304
+ # count that is consistent with the current object_count
2305
+ reclaimer = self._reclaim(broker)
2306
+ tombstones = reclaimer.get_tombstone_count()
2307
+ self.debug(broker, 'tombstones = %d', tombstones)
2308
+ # shrinking candidates are found in the root DB so that's the only
2309
+ # place we need up to date tombstone stats.
2310
+ own_shard_range.update_tombstones(tombstones)
2311
+ update_own_shard_range_stats(broker, own_shard_range)
2312
+
2313
+ if not own_shard_range.reported:
2314
+ broker.merge_shard_ranges(own_shard_range)
2315
+
2316
+ # we can't use `state not in SHARD_UPDATE_STAT_STATES` to return
2317
+ # because there are cases we still want to update root even if the
2318
+ # stats are wrong. Such as it's a new shard or something else has
2319
+ # decided to remove the latch to update root.
2320
+ if own_shard_range.reported:
1515
2321
  return
1516
2322
 
1517
- # persist the reported shard metadata
1518
- broker.merge_shard_ranges(own_shard_range)
1519
2323
  # now get a consistent list of own and other shard ranges
1520
2324
  shard_ranges = broker.get_shard_ranges(
1521
2325
  include_own=True,
1522
2326
  include_deleted=True)
1523
2327
  # send everything
1524
- if self._send_shard_ranges(
1525
- broker.root_account, broker.root_container, shard_ranges):
2328
+ if self._send_shard_ranges(broker, broker.root_account,
2329
+ broker.root_container, shard_ranges,
2330
+ {'Referer': quote(broker.path)}):
1526
2331
  # on success, mark ourselves as reported so we don't keep
1527
2332
  # hammering the root
1528
2333
  own_shard_range.reported = True
1529
2334
  broker.merge_shard_ranges(own_shard_range)
2335
+ self.debug(broker, 'updated root objs=%d, tombstones=%s',
2336
+ own_shard_range.object_count,
2337
+ own_shard_range.tombstones)
1530
2338
 
1531
2339
  def _process_broker(self, broker, node, part):
1532
2340
  broker.get_info() # make sure account/container are populated
1533
- state = broker.get_db_state()
1534
- self.logger.debug('Starting processing %s state %s',
1535
- quote(broker.path), state)
2341
+ db_state = broker.get_db_state()
2342
+ is_deleted = broker.is_deleted()
2343
+ self.debug(broker, 'Starting processing, state %s%s', db_state,
2344
+ ' (deleted)' if is_deleted else '')
1536
2345
 
1537
2346
  if not self._audit_container(broker):
1538
2347
  return
1539
2348
 
1540
2349
  # now look and deal with misplaced objects.
2350
+ move_start_ts = time.time()
1541
2351
  self._move_misplaced_objects(broker)
2352
+ self.logger.timing_since(
2353
+ 'sharder.sharding.move_misplaced', move_start_ts)
1542
2354
 
1543
- if broker.is_deleted():
1544
- # This container is deleted so we can skip it. We still want
1545
- # deleted containers to go via misplaced items because they may
1546
- # have new objects sitting in them that may need to move.
1547
- return
2355
+ is_leader = node['index'] == 0 and self.auto_shard and not is_deleted
1548
2356
 
1549
- is_leader = node['index'] == 0 and self.auto_shard
1550
- if state in (UNSHARDED, COLLAPSED):
2357
+ if db_state in (UNSHARDED, COLLAPSED):
1551
2358
  if is_leader and broker.is_root_container():
1552
2359
  # bootstrap sharding of root container
2360
+ own_shard_range = broker.get_own_shard_range()
2361
+ update_own_shard_range_stats(broker, own_shard_range)
1553
2362
  self._find_and_enable_sharding_candidates(
1554
- broker, shard_ranges=[broker.get_own_shard_range()])
2363
+ broker, shard_ranges=[own_shard_range])
1555
2364
 
1556
2365
  own_shard_range = broker.get_own_shard_range()
1557
- if own_shard_range.state in (ShardRange.SHARDING,
1558
- ShardRange.SHRINKING,
1559
- ShardRange.SHARDED,
1560
- ShardRange.SHRUNK):
1561
- if broker.get_shard_ranges():
2366
+ if own_shard_range.state in ShardRange.CLEAVING_STATES:
2367
+ if broker.has_other_shard_ranges():
1562
2368
  # container has been given shard ranges rather than
1563
- # found them e.g. via replication or a shrink event
2369
+ # found them e.g. via replication or a shrink event,
2370
+ # or manually triggered cleaving.
2371
+ db_start_ts = time.time()
1564
2372
  if broker.set_sharding_state():
1565
- state = SHARDING
2373
+ db_state = SHARDING
2374
+ self.info(broker, 'Kick off container cleaving, '
2375
+ 'own shard range in state %r',
2376
+ own_shard_range.state_text)
2377
+ self.logger.timing_since(
2378
+ 'sharder.sharding.set_state', db_start_ts)
1566
2379
  elif is_leader:
1567
2380
  if broker.set_sharding_state():
1568
- state = SHARDING
2381
+ db_state = SHARDING
1569
2382
  else:
1570
- self.logger.debug(
1571
- 'Own shard range in state %r but no shard ranges '
1572
- 'and not leader; remaining unsharded: %s',
1573
- own_shard_range.state_text, quote(broker.path))
2383
+ self.debug(broker,
2384
+ 'Own shard range in state %r but no shard '
2385
+ 'ranges and not leader; remaining unsharded',
2386
+ own_shard_range.state_text)
1574
2387
 
1575
- if state == SHARDING:
2388
+ if db_state == SHARDING:
2389
+ cleave_start_ts = time.time()
1576
2390
  if is_leader:
1577
2391
  num_found = self._find_shard_ranges(broker)
1578
2392
  else:
@@ -1587,38 +2401,53 @@ class ContainerSharder(ContainerReplicator):
1587
2401
 
1588
2402
  # always try to cleave any pending shard ranges
1589
2403
  cleave_complete = self._cleave(broker)
2404
+ self.logger.timing_since(
2405
+ 'sharder.sharding.cleave', cleave_start_ts)
1590
2406
 
1591
2407
  if cleave_complete:
1592
- self.logger.info('Completed cleaving of %s',
1593
- quote(broker.path))
1594
2408
  if self._complete_sharding(broker):
1595
- state = SHARDED
2409
+ db_state = SHARDED
1596
2410
  self._increment_stat('visited', 'completed', statsd=True)
2411
+ self.info(broker, 'Completed cleaving, DB set to sharded '
2412
+ 'state')
2413
+ self.logger.timing_since(
2414
+ 'sharder.sharding.completed',
2415
+ float(broker.get_own_shard_range().epoch))
1597
2416
  else:
1598
- self.logger.debug('Remaining in sharding state %s',
1599
- quote(broker.path))
2417
+ self.info(broker, 'Completed cleaving, DB remaining in '
2418
+ 'sharding state')
2419
+
2420
+ if not broker.is_deleted():
2421
+ if db_state == SHARDED and broker.is_root_container():
2422
+ # look for shrink stats
2423
+ send_start_ts = time.time()
2424
+ self._identify_shrinking_candidate(broker, node)
2425
+ if is_leader:
2426
+ self._find_and_enable_shrinking_candidates(broker)
2427
+ self._find_and_enable_sharding_candidates(broker)
2428
+ for shard_range in broker.get_shard_ranges(
2429
+ states=[ShardRange.SHARDING]):
2430
+ self._send_shard_ranges(broker, shard_range.account,
2431
+ shard_range.container,
2432
+ [shard_range])
2433
+ self.logger.timing_since(
2434
+ 'sharder.sharding.send_sr', send_start_ts)
1600
2435
 
1601
- if state == SHARDED and broker.is_root_container():
1602
- if is_leader:
1603
- self._find_and_enable_shrinking_candidates(broker)
1604
- self._find_and_enable_sharding_candidates(broker)
1605
- for shard_range in broker.get_shard_ranges(
1606
- states=[ShardRange.SHARDING]):
1607
- self._send_shard_ranges(
1608
- shard_range.account, shard_range.container,
1609
- [shard_range])
1610
-
1611
- if not broker.is_root_container():
1612
- # Update the root container with this container's shard range
1613
- # info; do this even when sharded in case previous attempts
1614
- # failed; don't do this if there is no own shard range. When
1615
- # sharding a shard, this is when the root will see the new
1616
- # shards move to ACTIVE state and the sharded shard
1617
- # simultaneously become deleted.
1618
- self._update_root_container(broker)
1619
-
1620
- self.logger.debug('Finished processing %s state %s',
1621
- quote(broker.path), broker.get_db_state())
2436
+ if not broker.is_root_container():
2437
+ # Update the root container with this container's shard range
2438
+ # info; do this even when sharded in case previous attempts
2439
+ # failed; don't do this if there is no own shard range. When
2440
+ # sharding a shard, this is when the root will see the new
2441
+ # shards move to ACTIVE state and the sharded shard
2442
+ # simultaneously become deleted.
2443
+ update_start_ts = time.time()
2444
+ self._update_root_container(broker)
2445
+ self.logger.timing_since(
2446
+ 'sharder.sharding.update_root', update_start_ts)
2447
+
2448
+ self.debug(broker,
2449
+ 'Finished processing, state %s%s',
2450
+ broker.get_db_state(), ' (deleted)' if is_deleted else '')
1622
2451
 
1623
2452
  def _one_shard_cycle(self, devices_to_shard, partitions_to_shard):
1624
2453
  """
@@ -1642,9 +2471,9 @@ class ContainerSharder(ContainerReplicator):
1642
2471
  self.logger.info('(Override partitions: %s)',
1643
2472
  ', '.join(str(p) for p in partitions_to_shard))
1644
2473
  self._zero_stats()
1645
- self._local_device_ids = set()
2474
+ self._local_device_ids = {}
1646
2475
  dirs = []
1647
- self.ips = whataremyips(bind_ip=self.bind_ip)
2476
+ self.ips = whataremyips(self.bind_ip)
1648
2477
  for node in self.ring.devs:
1649
2478
  device_path = self._check_node(node)
1650
2479
  if not device_path:
@@ -1653,7 +2482,7 @@ class ContainerSharder(ContainerReplicator):
1653
2482
  if os.path.isdir(datadir):
1654
2483
  # Populate self._local_device_ids so we can find devices for
1655
2484
  # shard containers later
1656
- self._local_device_ids.add(node['id'])
2485
+ self._local_device_ids[node['id']] = node
1657
2486
  if node['device'] not in devices_to_shard:
1658
2487
  continue
1659
2488
  part_filt = self._partition_dir_filter(
@@ -1661,7 +2490,7 @@ class ContainerSharder(ContainerReplicator):
1661
2490
  partitions_to_shard)
1662
2491
  dirs.append((datadir, node, part_filt))
1663
2492
  if not dirs:
1664
- self.logger.warning('Found no data dirs!')
2493
+ self.logger.info('Found no containers directories')
1665
2494
  for part, path, node in self.roundrobin_datadirs(dirs):
1666
2495
  # NB: get_part_nodes always provides an 'index' key;
1667
2496
  # this will be used in leader selection
@@ -1686,42 +2515,47 @@ class ContainerSharder(ContainerReplicator):
1686
2515
  self._increment_stat('visited', 'skipped')
1687
2516
  except (Exception, Timeout) as err:
1688
2517
  self._increment_stat('visited', 'failure', statsd=True)
1689
- self.logger.exception(
1690
- 'Unhandled exception while processing %s: %s', path, err)
2518
+ self.exception(broker, 'Unhandled exception while processing: '
2519
+ '%s', err)
1691
2520
  error = err
1692
2521
  try:
1693
2522
  self._record_sharding_progress(broker, node, error)
1694
2523
  except (Exception, Timeout) as error:
1695
- self.logger.exception(
1696
- 'Unhandled exception while dumping progress for %s: %s',
1697
- path, error)
2524
+ self.exception(broker, 'Unhandled exception while dumping '
2525
+ 'progress: %s', error)
1698
2526
  self._periodic_report_stats()
1699
2527
 
1700
2528
  self._report_stats()
1701
2529
 
2530
+ @contextmanager
1702
2531
  def _set_auto_shard_from_command_line(self, **kwargs):
2532
+ conf_auto_shard = self.auto_shard
1703
2533
  auto_shard = kwargs.get('auto_shard', None)
1704
2534
  if auto_shard is not None:
1705
2535
  self.auto_shard = config_true_value(auto_shard)
2536
+ try:
2537
+ yield
2538
+ finally:
2539
+ self.auto_shard = conf_auto_shard
1706
2540
 
1707
2541
  def run_forever(self, *args, **kwargs):
1708
2542
  """Run the container sharder until stopped."""
1709
- self._set_auto_shard_from_command_line(**kwargs)
1710
- self.reported = time.time()
1711
- time.sleep(random() * self.interval)
1712
- while True:
1713
- begin = time.time()
1714
- try:
1715
- self._one_shard_cycle(devices_to_shard=Everything(),
1716
- partitions_to_shard=Everything())
1717
- except (Exception, Timeout):
1718
- self.logger.increment('errors')
1719
- self.logger.exception('Exception in sharder')
1720
- elapsed = time.time() - begin
1721
- self.logger.info(
1722
- 'Container sharder cycle completed: %.02fs', elapsed)
1723
- if elapsed < self.interval:
1724
- time.sleep(self.interval - elapsed)
2543
+ with self._set_auto_shard_from_command_line(**kwargs):
2544
+ self.reported = time.time()
2545
+ time.sleep(random() * self.interval)
2546
+ while True:
2547
+ begin = time.time()
2548
+ try:
2549
+ self._one_shard_cycle(devices_to_shard=Everything(),
2550
+ partitions_to_shard=Everything())
2551
+ except (Exception, Timeout):
2552
+ self.logger.increment('errors')
2553
+ self.logger.exception('Exception in sharder')
2554
+ elapsed = time.time() - begin
2555
+ self.logger.info(
2556
+ 'Container sharder cycle completed: %.02fs', elapsed)
2557
+ if elapsed < self.interval:
2558
+ time.sleep(self.interval - elapsed)
1725
2559
 
1726
2560
  def run_once(self, *args, **kwargs):
1727
2561
  """Run the container sharder once."""
@@ -1729,10 +2563,32 @@ class ContainerSharder(ContainerReplicator):
1729
2563
  override_options = parse_override_options(once=True, **kwargs)
1730
2564
  devices_to_shard = override_options.devices or Everything()
1731
2565
  partitions_to_shard = override_options.partitions or Everything()
1732
- self._set_auto_shard_from_command_line(**kwargs)
1733
- begin = self.reported = time.time()
1734
- self._one_shard_cycle(devices_to_shard=devices_to_shard,
1735
- partitions_to_shard=partitions_to_shard)
1736
- elapsed = time.time() - begin
1737
- self.logger.info(
1738
- 'Container sharder "once" mode completed: %.02fs', elapsed)
2566
+ with self._set_auto_shard_from_command_line(**kwargs):
2567
+ begin = self.reported = time.time()
2568
+ self._one_shard_cycle(devices_to_shard=devices_to_shard,
2569
+ partitions_to_shard=partitions_to_shard)
2570
+ elapsed = time.time() - begin
2571
+ self.logger.info(
2572
+ 'Container sharder "once" mode completed: %.02fs', elapsed)
2573
+
2574
+
2575
+ def main():
2576
+ parser = OptionParser("%prog CONFIG [options]")
2577
+ parser.add_option('-d', '--devices',
2578
+ help='Shard containers only on given devices. '
2579
+ 'Comma-separated list. '
2580
+ 'Only has effect if --once is used.')
2581
+ parser.add_option('-p', '--partitions',
2582
+ help='Shard containers only in given partitions. '
2583
+ 'Comma-separated list. '
2584
+ 'Only has effect if --once is used.')
2585
+ parser.add_option('--no-auto-shard', action='store_false',
2586
+ dest='auto_shard', default=None,
2587
+ help='Disable auto-sharding. Overrides the auto_shard '
2588
+ 'value in the config file.')
2589
+ conf_file, options = parse_options(parser=parser, once=True)
2590
+ run_daemon(ContainerSharder, conf_file, **options)
2591
+
2592
+
2593
+ if __name__ == '__main__':
2594
+ main()