swift 2.23.2__py3-none-any.whl → 2.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. swift/__init__.py +29 -50
  2. swift/account/auditor.py +21 -118
  3. swift/account/backend.py +33 -28
  4. swift/account/reaper.py +37 -28
  5. swift/account/replicator.py +22 -0
  6. swift/account/server.py +60 -26
  7. swift/account/utils.py +28 -11
  8. swift-2.23.2.data/scripts/swift-account-audit → swift/cli/account_audit.py +23 -13
  9. swift-2.23.2.data/scripts/swift-config → swift/cli/config.py +2 -2
  10. swift/cli/container_deleter.py +5 -11
  11. swift-2.23.2.data/scripts/swift-dispersion-populate → swift/cli/dispersion_populate.py +8 -7
  12. swift/cli/dispersion_report.py +10 -9
  13. swift-2.23.2.data/scripts/swift-drive-audit → swift/cli/drive_audit.py +63 -21
  14. swift/cli/form_signature.py +3 -7
  15. swift-2.23.2.data/scripts/swift-get-nodes → swift/cli/get_nodes.py +8 -2
  16. swift/cli/info.py +183 -29
  17. swift/cli/manage_shard_ranges.py +708 -37
  18. swift-2.23.2.data/scripts/swift-oldies → swift/cli/oldies.py +25 -14
  19. swift-2.23.2.data/scripts/swift-orphans → swift/cli/orphans.py +7 -3
  20. swift/cli/recon.py +196 -67
  21. swift-2.23.2.data/scripts/swift-recon-cron → swift/cli/recon_cron.py +17 -20
  22. swift-2.23.2.data/scripts/swift-reconciler-enqueue → swift/cli/reconciler_enqueue.py +2 -3
  23. swift/cli/relinker.py +807 -126
  24. swift/cli/reload.py +135 -0
  25. swift/cli/ringbuilder.py +217 -20
  26. swift/cli/ringcomposer.py +0 -1
  27. swift/cli/shard-info.py +4 -3
  28. swift/common/base_storage_server.py +9 -20
  29. swift/common/bufferedhttp.py +48 -74
  30. swift/common/constraints.py +20 -15
  31. swift/common/container_sync_realms.py +9 -11
  32. swift/common/daemon.py +25 -8
  33. swift/common/db.py +198 -127
  34. swift/common/db_auditor.py +168 -0
  35. swift/common/db_replicator.py +95 -55
  36. swift/common/digest.py +141 -0
  37. swift/common/direct_client.py +144 -33
  38. swift/common/error_limiter.py +93 -0
  39. swift/common/exceptions.py +25 -1
  40. swift/common/header_key_dict.py +2 -9
  41. swift/common/http_protocol.py +373 -0
  42. swift/common/internal_client.py +129 -59
  43. swift/common/linkat.py +3 -4
  44. swift/common/manager.py +284 -67
  45. swift/common/memcached.py +396 -147
  46. swift/common/middleware/__init__.py +4 -0
  47. swift/common/middleware/account_quotas.py +211 -46
  48. swift/common/middleware/acl.py +3 -8
  49. swift/common/middleware/backend_ratelimit.py +230 -0
  50. swift/common/middleware/bulk.py +22 -34
  51. swift/common/middleware/catch_errors.py +1 -3
  52. swift/common/middleware/cname_lookup.py +6 -11
  53. swift/common/middleware/container_quotas.py +1 -1
  54. swift/common/middleware/container_sync.py +39 -17
  55. swift/common/middleware/copy.py +12 -0
  56. swift/common/middleware/crossdomain.py +22 -9
  57. swift/common/middleware/crypto/__init__.py +2 -1
  58. swift/common/middleware/crypto/crypto_utils.py +11 -15
  59. swift/common/middleware/crypto/decrypter.py +28 -11
  60. swift/common/middleware/crypto/encrypter.py +12 -17
  61. swift/common/middleware/crypto/keymaster.py +8 -15
  62. swift/common/middleware/crypto/kms_keymaster.py +2 -1
  63. swift/common/middleware/dlo.py +15 -11
  64. swift/common/middleware/domain_remap.py +5 -4
  65. swift/common/middleware/etag_quoter.py +128 -0
  66. swift/common/middleware/formpost.py +73 -70
  67. swift/common/middleware/gatekeeper.py +8 -1
  68. swift/common/middleware/keystoneauth.py +33 -3
  69. swift/common/middleware/list_endpoints.py +4 -4
  70. swift/common/middleware/listing_formats.py +85 -49
  71. swift/common/middleware/memcache.py +4 -81
  72. swift/common/middleware/name_check.py +3 -2
  73. swift/common/middleware/proxy_logging.py +160 -92
  74. swift/common/middleware/ratelimit.py +17 -10
  75. swift/common/middleware/read_only.py +6 -4
  76. swift/common/middleware/recon.py +59 -22
  77. swift/common/middleware/s3api/acl_handlers.py +25 -3
  78. swift/common/middleware/s3api/acl_utils.py +6 -1
  79. swift/common/middleware/s3api/controllers/__init__.py +6 -0
  80. swift/common/middleware/s3api/controllers/acl.py +3 -2
  81. swift/common/middleware/s3api/controllers/bucket.py +242 -137
  82. swift/common/middleware/s3api/controllers/logging.py +2 -2
  83. swift/common/middleware/s3api/controllers/multi_delete.py +43 -20
  84. swift/common/middleware/s3api/controllers/multi_upload.py +219 -133
  85. swift/common/middleware/s3api/controllers/obj.py +112 -8
  86. swift/common/middleware/s3api/controllers/object_lock.py +44 -0
  87. swift/common/middleware/s3api/controllers/s3_acl.py +2 -2
  88. swift/common/middleware/s3api/controllers/tagging.py +57 -0
  89. swift/common/middleware/s3api/controllers/versioning.py +36 -7
  90. swift/common/middleware/s3api/etree.py +22 -9
  91. swift/common/middleware/s3api/exception.py +0 -4
  92. swift/common/middleware/s3api/s3api.py +113 -41
  93. swift/common/middleware/s3api/s3request.py +384 -218
  94. swift/common/middleware/s3api/s3response.py +126 -23
  95. swift/common/middleware/s3api/s3token.py +16 -17
  96. swift/common/middleware/s3api/schema/delete.rng +1 -1
  97. swift/common/middleware/s3api/subresource.py +7 -10
  98. swift/common/middleware/s3api/utils.py +27 -10
  99. swift/common/middleware/slo.py +665 -358
  100. swift/common/middleware/staticweb.py +64 -37
  101. swift/common/middleware/symlink.py +52 -19
  102. swift/common/middleware/tempauth.py +76 -58
  103. swift/common/middleware/tempurl.py +192 -174
  104. swift/common/middleware/versioned_writes/__init__.py +51 -0
  105. swift/common/middleware/{versioned_writes.py → versioned_writes/legacy.py} +27 -26
  106. swift/common/middleware/versioned_writes/object_versioning.py +1482 -0
  107. swift/common/middleware/x_profile/exceptions.py +1 -4
  108. swift/common/middleware/x_profile/html_viewer.py +18 -19
  109. swift/common/middleware/x_profile/profile_model.py +1 -2
  110. swift/common/middleware/xprofile.py +10 -10
  111. swift-2.23.2.data/scripts/swift-container-server → swift/common/recon.py +13 -8
  112. swift/common/registry.py +147 -0
  113. swift/common/request_helpers.py +324 -57
  114. swift/common/ring/builder.py +67 -25
  115. swift/common/ring/composite_builder.py +1 -1
  116. swift/common/ring/ring.py +177 -51
  117. swift/common/ring/utils.py +1 -1
  118. swift/common/splice.py +10 -6
  119. swift/common/statsd_client.py +205 -0
  120. swift/common/storage_policy.py +49 -44
  121. swift/common/swob.py +86 -102
  122. swift/common/{utils.py → utils/__init__.py} +2191 -2762
  123. swift/common/utils/base.py +131 -0
  124. swift/common/utils/config.py +433 -0
  125. swift/common/utils/ipaddrs.py +256 -0
  126. swift/common/utils/libc.py +345 -0
  127. swift/common/utils/logs.py +859 -0
  128. swift/common/utils/timestamp.py +412 -0
  129. swift/common/wsgi.py +555 -536
  130. swift/container/auditor.py +14 -100
  131. swift/container/backend.py +552 -227
  132. swift/container/reconciler.py +126 -37
  133. swift/container/replicator.py +96 -22
  134. swift/container/server.py +397 -176
  135. swift/container/sharder.py +1580 -639
  136. swift/container/sync.py +94 -88
  137. swift/container/updater.py +53 -32
  138. swift/obj/auditor.py +153 -35
  139. swift/obj/diskfile.py +466 -217
  140. swift/obj/expirer.py +406 -124
  141. swift/obj/mem_diskfile.py +7 -4
  142. swift/obj/mem_server.py +1 -0
  143. swift/obj/reconstructor.py +523 -262
  144. swift/obj/replicator.py +249 -188
  145. swift/obj/server.py +213 -122
  146. swift/obj/ssync_receiver.py +145 -85
  147. swift/obj/ssync_sender.py +113 -54
  148. swift/obj/updater.py +653 -139
  149. swift/obj/watchers/__init__.py +0 -0
  150. swift/obj/watchers/dark_data.py +213 -0
  151. swift/proxy/controllers/account.py +11 -11
  152. swift/proxy/controllers/base.py +848 -604
  153. swift/proxy/controllers/container.py +452 -86
  154. swift/proxy/controllers/info.py +3 -2
  155. swift/proxy/controllers/obj.py +1009 -490
  156. swift/proxy/server.py +185 -112
  157. swift-2.35.0.dist-info/AUTHORS +501 -0
  158. swift-2.35.0.dist-info/LICENSE +202 -0
  159. {swift-2.23.2.dist-info → swift-2.35.0.dist-info}/METADATA +52 -61
  160. swift-2.35.0.dist-info/RECORD +201 -0
  161. {swift-2.23.2.dist-info → swift-2.35.0.dist-info}/WHEEL +1 -1
  162. {swift-2.23.2.dist-info → swift-2.35.0.dist-info}/entry_points.txt +43 -0
  163. swift-2.35.0.dist-info/pbr.json +1 -0
  164. swift/locale/de/LC_MESSAGES/swift.po +0 -1216
  165. swift/locale/en_GB/LC_MESSAGES/swift.po +0 -1207
  166. swift/locale/es/LC_MESSAGES/swift.po +0 -1085
  167. swift/locale/fr/LC_MESSAGES/swift.po +0 -909
  168. swift/locale/it/LC_MESSAGES/swift.po +0 -894
  169. swift/locale/ja/LC_MESSAGES/swift.po +0 -965
  170. swift/locale/ko_KR/LC_MESSAGES/swift.po +0 -964
  171. swift/locale/pt_BR/LC_MESSAGES/swift.po +0 -881
  172. swift/locale/ru/LC_MESSAGES/swift.po +0 -891
  173. swift/locale/tr_TR/LC_MESSAGES/swift.po +0 -832
  174. swift/locale/zh_CN/LC_MESSAGES/swift.po +0 -833
  175. swift/locale/zh_TW/LC_MESSAGES/swift.po +0 -838
  176. swift-2.23.2.data/scripts/swift-account-auditor +0 -23
  177. swift-2.23.2.data/scripts/swift-account-info +0 -51
  178. swift-2.23.2.data/scripts/swift-account-reaper +0 -23
  179. swift-2.23.2.data/scripts/swift-account-replicator +0 -34
  180. swift-2.23.2.data/scripts/swift-account-server +0 -23
  181. swift-2.23.2.data/scripts/swift-container-auditor +0 -23
  182. swift-2.23.2.data/scripts/swift-container-info +0 -51
  183. swift-2.23.2.data/scripts/swift-container-reconciler +0 -21
  184. swift-2.23.2.data/scripts/swift-container-replicator +0 -34
  185. swift-2.23.2.data/scripts/swift-container-sharder +0 -33
  186. swift-2.23.2.data/scripts/swift-container-sync +0 -23
  187. swift-2.23.2.data/scripts/swift-container-updater +0 -23
  188. swift-2.23.2.data/scripts/swift-dispersion-report +0 -24
  189. swift-2.23.2.data/scripts/swift-form-signature +0 -20
  190. swift-2.23.2.data/scripts/swift-init +0 -119
  191. swift-2.23.2.data/scripts/swift-object-auditor +0 -29
  192. swift-2.23.2.data/scripts/swift-object-expirer +0 -33
  193. swift-2.23.2.data/scripts/swift-object-info +0 -60
  194. swift-2.23.2.data/scripts/swift-object-reconstructor +0 -33
  195. swift-2.23.2.data/scripts/swift-object-relinker +0 -41
  196. swift-2.23.2.data/scripts/swift-object-replicator +0 -37
  197. swift-2.23.2.data/scripts/swift-object-server +0 -27
  198. swift-2.23.2.data/scripts/swift-object-updater +0 -23
  199. swift-2.23.2.data/scripts/swift-proxy-server +0 -23
  200. swift-2.23.2.data/scripts/swift-recon +0 -24
  201. swift-2.23.2.data/scripts/swift-ring-builder +0 -24
  202. swift-2.23.2.data/scripts/swift-ring-builder-analyzer +0 -22
  203. swift-2.23.2.data/scripts/swift-ring-composer +0 -22
  204. swift-2.23.2.dist-info/DESCRIPTION.rst +0 -166
  205. swift-2.23.2.dist-info/RECORD +0 -220
  206. swift-2.23.2.dist-info/metadata.json +0 -1
  207. swift-2.23.2.dist-info/pbr.json +0 -1
  208. {swift-2.23.2.dist-info → swift-2.35.0.dist-info}/top_level.txt +0 -0
@@ -12,31 +12,38 @@
12
12
  # implied.
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
-
15
+ import collections
16
16
  import errno
17
17
  import json
18
+ import logging
19
+ import operator
20
+ from optparse import OptionParser
18
21
  import time
19
22
  from collections import defaultdict
23
+ from operator import itemgetter
20
24
  from random import random
21
25
 
22
26
  import os
23
- import six
27
+ from urllib.parse import quote
24
28
  from eventlet import Timeout
29
+ from contextlib import contextmanager
25
30
 
26
31
  from swift.common import internal_client
27
- from swift.common.constraints import check_drive
32
+ from swift.common.constraints import check_drive, AUTO_CREATE_ACCOUNT_PREFIX
28
33
  from swift.common.direct_client import (direct_put_container,
29
34
  DirectClientException)
30
- from swift.common.exceptions import DeviceUnavailable
35
+ from swift.common.daemon import run_daemon
36
+ from swift.common.request_helpers import USE_REPLICATION_NETWORK_HEADER
31
37
  from swift.common.ring.utils import is_local_device
32
38
  from swift.common.swob import str_to_wsgi
33
39
  from swift.common.utils import get_logger, config_true_value, \
34
40
  dump_recon_cache, whataremyips, Timestamp, ShardRange, GreenAsyncPile, \
35
- config_float_value, config_positive_int_value, \
36
- quorum_size, parse_override_options, Everything, config_auto_int_value
41
+ config_positive_int_value, quorum_size, parse_override_options, \
42
+ Everything, config_auto_int_value, ShardRangeList, config_percent_value, \
43
+ node_to_string, parse_options
37
44
  from swift.container.backend import ContainerBroker, \
38
45
  RECORD_TYPE_SHARD, UNSHARDED, SHARDING, SHARDED, COLLAPSED, \
39
- SHARD_UPDATE_STATES
46
+ SHARD_UPDATE_STATES, sift_shard_ranges, SHARD_UPDATE_STAT_STATES
40
47
  from swift.container.replicator import ContainerReplicator
41
48
 
42
49
 
@@ -44,6 +51,8 @@ CLEAVE_SUCCESS = 0
44
51
  CLEAVE_FAILED = 1
45
52
  CLEAVE_EMPTY = 2
46
53
 
54
+ DEFAULT_PERIODIC_WARNINGS_INTERVAL = 24 * 3600
55
+
47
56
 
48
57
  def sharding_enabled(broker):
49
58
  # NB all shards will by default have been created with
@@ -55,7 +64,7 @@ def sharding_enabled(broker):
55
64
  # if broker has been marked deleted it will have lost sysmeta, but we still
56
65
  # need to process the broker (for example, to shrink any shard ranges) so
57
66
  # fallback to checking if it has any shard ranges
58
- if broker.get_shard_ranges():
67
+ if broker.has_other_shard_ranges():
59
68
  return True
60
69
  return False
61
70
 
@@ -75,61 +84,166 @@ def make_shard_ranges(broker, shard_data, shards_account_prefix):
75
84
  return shard_ranges
76
85
 
77
86
 
78
- def find_missing_ranges(shard_ranges):
87
+ def _find_discontinuity(paths, start):
88
+ # select the path that reaches furthest from start into the namespace
89
+ start_paths = [path for path in paths if path.lower == start]
90
+ start_paths.sort(key=lambda p: p.upper)
91
+ longest_start_path = start_paths[-1]
92
+ # search for paths that end further into the namespace (note: these must
93
+ # have a lower that differs from the start_path upper, otherwise they would
94
+ # be part of the start_path longer!)
95
+ end_paths = [path for path in paths
96
+ if path.upper > longest_start_path.upper]
97
+ if end_paths:
98
+ # select those that begin nearest the start of the namespace
99
+ end_paths.sort(key=lambda p: p.lower)
100
+ end_paths = [p for p in end_paths if p.lower == end_paths[0].lower]
101
+ # select the longest of those
102
+ end_paths.sort(key=lambda p: p.upper)
103
+ longest_end_path = end_paths[-1]
104
+ else:
105
+ longest_end_path = None
106
+ return longest_start_path, longest_end_path
107
+
108
+
109
+ def find_paths_with_gaps(shard_ranges, within_range=None):
79
110
  """
80
- Find any ranges in the entire object namespace that are not covered by any
81
- shard range in the given list.
111
+ Find gaps in the shard ranges and pairs of shard range paths that lead to
112
+ and from those gaps. For each gap a single pair of adjacent paths is
113
+ selected. The concatenation of all selected paths and gaps will span the
114
+ entire namespace with no overlaps.
115
+
116
+ :param shard_ranges: a list of instances of ShardRange.
117
+ :param within_range: an optional ShardRange that constrains the search
118
+ space; the method will only return gaps within this range. The default
119
+ is the entire namespace.
120
+ :return: A list of tuples of ``(start_path, gap_range, end_path)`` where
121
+ ``start_path`` is a list of ShardRanges leading to the gap,
122
+ ``gap_range`` is a ShardRange synthesized to describe the namespace
123
+ gap, and ``end_path`` is a list of ShardRanges leading from the gap.
124
+ When gaps start or end at the namespace minimum or maximum bounds,
125
+ ``start_path`` and ``end_path`` may be 'null' paths that contain a
126
+ single ShardRange covering either the minimum or maximum of the
127
+ namespace.
128
+ """
129
+ timestamp = Timestamp.now()
130
+ within_range = within_range or ShardRange('entire/namespace', timestamp)
131
+ shard_ranges = ShardRangeList(shard_ranges)
132
+ # note: find_paths results do not include shrinking ranges
133
+ paths = find_paths(shard_ranges)
134
+ # add paths covering no namespace at start and end of namespace to ensure
135
+ # that a start_path and end_path is always found even when there is a gap
136
+ # at the start or end of the namespace
137
+ null_start = ShardRange('null/start', timestamp,
138
+ lower=ShardRange.MIN,
139
+ upper=ShardRange.MIN,
140
+ state=ShardRange.FOUND)
141
+ null_end = ShardRange('null/end', timestamp,
142
+ lower=ShardRange.MAX,
143
+ upper=ShardRange.MAX,
144
+ state=ShardRange.FOUND)
145
+ paths.extend([ShardRangeList([null_start]), ShardRangeList([null_end])])
146
+ paths_with_gaps = []
147
+ start = null_start.lower
148
+ while True:
149
+ start_path, end_path = _find_discontinuity(paths, start)
150
+ if end_path is None:
151
+ # end of namespace reached
152
+ break
153
+ start = end_path.lower
154
+ if start_path.upper > end_path.lower:
155
+ # overlap
156
+ continue
157
+ gap_range = ShardRange('gap/index_%06d' % len(paths_with_gaps),
158
+ timestamp,
159
+ lower=start_path.upper,
160
+ upper=end_path.lower)
161
+ if gap_range.overlaps(within_range):
162
+ paths_with_gaps.append((start_path, gap_range, end_path))
163
+ return paths_with_gaps
82
164
 
83
- :param shard_ranges: A list of :class:`~swift.utils.ShardRange`
84
- :return: a list of missing ranges
165
+
166
+ def _is_parent_or_child(shard_range, other, time_period):
85
167
  """
86
- gaps = []
87
- if not shard_ranges:
88
- return ((ShardRange.MIN, ShardRange.MAX),)
89
- if shard_ranges[0].lower > ShardRange.MIN:
90
- gaps.append((ShardRange.MIN, shard_ranges[0].lower))
91
- for first, second in zip(shard_ranges, shard_ranges[1:]):
92
- if first.upper < second.lower:
93
- gaps.append((first.upper, second.lower))
94
- if shard_ranges[-1].upper < ShardRange.MAX:
95
- gaps.append((shard_ranges[-1].upper, ShardRange.MAX))
96
- return gaps
97
-
98
-
99
- def find_overlapping_ranges(shard_ranges):
168
+ Test if shard range ``shard_range`` is the parent or a child of another
169
+ shard range ``other`` within past time period ``time_period``. This method
170
+ is limited to work only within the scope of the same user-facing account
171
+ (with and without shard prefix).
172
+
173
+ :param shard_range: an instance of ``ShardRange``.
174
+ :param other: an instance of ``ShardRange``.
175
+ :param time_period: the specified past time period in seconds. Value of
176
+ 0 means all time in the past.
177
+ :return: True if ``shard_range`` is the parent or a child of ``other``
178
+ within past time period, False otherwise, assuming that they are within
179
+ the same account.
180
+ """
181
+ exclude_age = (time.time() - float(time_period)) if time_period > 0 else 0
182
+ if shard_range.is_child_of(other) and shard_range.timestamp >= exclude_age:
183
+ return True
184
+ if other.is_child_of(shard_range) and other.timestamp >= exclude_age:
185
+ return True
186
+ return False
187
+
188
+
189
+ def find_overlapping_ranges(
190
+ shard_ranges, exclude_parent_child=False, time_period=0):
100
191
  """
101
192
  Find all pairs of overlapping ranges in the given list.
102
193
 
103
194
  :param shard_ranges: A list of :class:`~swift.utils.ShardRange`
195
+ :param exclude_parent_child: If True then overlapping pairs that have a
196
+ parent-child relationship within the past time period
197
+ ``time_period`` are excluded from the returned set. Default is
198
+ False.
199
+ :param time_period: the specified past time period in seconds. Value of
200
+ 0 means all time in the past.
104
201
  :return: a set of tuples, each tuple containing ranges that overlap with
105
202
  each other.
106
203
  """
107
204
  result = set()
108
- for shard_range in shard_ranges:
109
- overlapping = [sr for sr in shard_ranges
110
- if shard_range != sr and shard_range.overlaps(sr)]
205
+ for i, shard_range in enumerate(shard_ranges):
206
+ if exclude_parent_child:
207
+ overlapping = [
208
+ sr for sr in shard_ranges[i + 1:]
209
+ if shard_range.name != sr.name and shard_range.overlaps(sr) and
210
+ not _is_parent_or_child(shard_range, sr, time_period)]
211
+ else:
212
+ overlapping = [
213
+ sr for sr in shard_ranges[i + 1:]
214
+ if shard_range.name != sr.name and shard_range.overlaps(sr)]
111
215
  if overlapping:
112
216
  overlapping.append(shard_range)
113
- overlapping.sort()
217
+ overlapping.sort(key=ShardRange.sort_key)
114
218
  result.add(tuple(overlapping))
115
219
 
116
220
  return result
117
221
 
118
222
 
119
223
  def is_sharding_candidate(shard_range, threshold):
224
+ # note: use *object* count as the condition for sharding: tombstones will
225
+ # eventually be reclaimed so should not trigger sharding
120
226
  return (shard_range.state == ShardRange.ACTIVE and
121
227
  shard_range.object_count >= threshold)
122
228
 
123
229
 
230
+ def is_shrinking_candidate(shard_range, shrink_threshold, expansion_limit,
231
+ states=None):
232
+ # typically shrink_threshold < expansion_limit but check both just in case
233
+ # note: use *row* count (objects plus tombstones) as the condition for
234
+ # shrinking to avoid inadvertently moving large numbers of tombstones into
235
+ # an acceptor
236
+ states = states or (ShardRange.ACTIVE,)
237
+ return (shard_range.state in states and
238
+ shard_range.row_count < shrink_threshold and
239
+ shard_range.row_count <= expansion_limit)
240
+
241
+
124
242
  def find_sharding_candidates(broker, threshold, shard_ranges=None):
125
243
  # this should only execute on root containers; the goal is to find
126
244
  # large shard containers that should be sharded.
127
245
  # First cut is simple: assume root container shard usage stats are good
128
246
  # enough to make decision.
129
- # TODO: object counts may well not be the appropriate metric for
130
- # deciding to shrink because a shard with low object_count may have a
131
- # large number of deleted object rows that will need to be merged with
132
- # a neighbour. We may need to expose row count as well as object count.
133
247
  if shard_ranges is None:
134
248
  shard_ranges = broker.get_shard_ranges(states=[ShardRange.ACTIVE])
135
249
  candidates = []
@@ -143,63 +257,376 @@ def find_sharding_candidates(broker, threshold, shard_ranges=None):
143
257
  return candidates
144
258
 
145
259
 
146
- def find_shrinking_candidates(broker, shrink_threshold, merge_size):
260
+ def find_shrinking_candidates(broker, shrink_threshold, expansion_limit):
261
+ # this is only here to preserve a legacy public function signature;
262
+ # superseded by find_compactible_shard_sequences
263
+ merge_pairs = {}
264
+ # restrict search to sequences with one donor
265
+ results = find_compactible_shard_sequences(broker, shrink_threshold,
266
+ expansion_limit, 1, -1,
267
+ include_shrinking=True)
268
+ for sequence in results:
269
+ # map acceptor -> donor list
270
+ merge_pairs[sequence[-1]] = sequence[-2]
271
+ return merge_pairs
272
+
273
+
274
+ def find_compactible_shard_sequences(broker,
275
+ shrink_threshold,
276
+ expansion_limit,
277
+ max_shrinking,
278
+ max_expanding,
279
+ include_shrinking=False):
280
+ """
281
+ Find sequences of shard ranges that could be compacted into a single
282
+ acceptor shard range.
283
+
284
+ This function does not modify shard ranges.
285
+
286
+ :param broker: A :class:`~swift.container.backend.ContainerBroker`.
287
+ :param shrink_threshold: the number of rows below which a shard may be
288
+ considered for shrinking into another shard
289
+ :param expansion_limit: the maximum number of rows that an acceptor shard
290
+ range should have after other shard ranges have been compacted into it
291
+ :param max_shrinking: the maximum number of shard ranges that should be
292
+ compacted into each acceptor; -1 implies unlimited.
293
+ :param max_expanding: the maximum number of acceptors to be found (i.e. the
294
+ maximum number of sequences to be returned); -1 implies unlimited.
295
+ :param include_shrinking: if True then existing compactible sequences are
296
+ included in the results; default is False.
297
+ :returns: A list of :class:`~swift.common.utils.ShardRangeList` each
298
+ containing a sequence of neighbouring shard ranges that may be
299
+ compacted; the final shard range in the list is the acceptor
300
+ """
147
301
  # this should only execute on root containers that have sharded; the
148
302
  # goal is to find small shard containers that could be retired by
149
303
  # merging with a neighbour.
150
304
  # First cut is simple: assume root container shard usage stats are good
151
305
  # enough to make decision; only merge with upper neighbour so that
152
306
  # upper bounds never change (shard names include upper bound).
153
- # TODO: object counts may well not be the appropriate metric for
154
- # deciding to shrink because a shard with low object_count may have a
155
- # large number of deleted object rows that will need to be merged with
156
- # a neighbour. We may need to expose row count as well as object count.
157
307
  shard_ranges = broker.get_shard_ranges()
158
308
  own_shard_range = broker.get_own_shard_range()
159
- if len(shard_ranges) == 1:
160
- # special case to enable final shard to shrink into root
161
- shard_ranges.append(own_shard_range)
162
309
 
163
- merge_pairs = {}
164
- for donor, acceptor in zip(shard_ranges, shard_ranges[1:]):
165
- if donor in merge_pairs:
166
- # this range may already have been made an acceptor; if so then
167
- # move on. In principle it might be that even after expansion
168
- # this range and its donor(s) could all be merged with the next
169
- # range. In practice it is much easier to reason about a single
170
- # donor merging into a single acceptor. Don't fret - eventually
171
- # all the small ranges will be retired.
310
+ def sequence_complete(sequence):
311
+ # a sequence is considered complete if any of the following are true:
312
+ # - the final shard range has more objects than the shrink_threshold,
313
+ # so should not be shrunk (this shard will be the acceptor)
314
+ # - the max number of shard ranges to be compacted (max_shrinking) has
315
+ # been reached
316
+ # - the total number of objects in the sequence has reached the
317
+ # expansion_limit
318
+ if (sequence and
319
+ (not is_shrinking_candidate(
320
+ sequence[-1], shrink_threshold, expansion_limit,
321
+ states=(ShardRange.ACTIVE, ShardRange.SHRINKING)) or
322
+ 0 < max_shrinking < len(sequence) or
323
+ sequence.row_count >= expansion_limit)):
324
+ return True
325
+ return False
326
+
327
+ compactible_sequences = []
328
+ index = 0
329
+ expanding = 0
330
+ while ((max_expanding < 0 or expanding < max_expanding) and
331
+ index < len(shard_ranges)):
332
+ if not is_shrinking_candidate(
333
+ shard_ranges[index], shrink_threshold, expansion_limit,
334
+ states=(ShardRange.ACTIVE, ShardRange.SHRINKING)):
335
+ # this shard range cannot be the start of a new or existing
336
+ # compactible sequence, move on
337
+ index += 1
172
338
  continue
173
- if (acceptor.name != own_shard_range.name and
174
- acceptor.state != ShardRange.ACTIVE):
175
- # don't shrink into a range that is not yet ACTIVE
339
+
340
+ # start of a *possible* sequence
341
+ sequence = ShardRangeList([shard_ranges[index]])
342
+ for shard_range in shard_ranges[index + 1:]:
343
+ # attempt to add contiguous shard ranges to the sequence
344
+ if sequence.upper < shard_range.lower:
345
+ # found a gap! break before consuming this range because it
346
+ # could become the first in the next sequence
347
+ break
348
+
349
+ if shard_range.state not in (ShardRange.ACTIVE,
350
+ ShardRange.SHRINKING):
351
+ # found? created? sharded? don't touch it
352
+ break
353
+
354
+ if shard_range.state == ShardRange.SHRINKING:
355
+ # already shrinking: add to sequence unconditionally
356
+ sequence.append(shard_range)
357
+ elif (sequence.row_count + shard_range.row_count
358
+ <= expansion_limit):
359
+ # add to sequence: could be a donor or acceptor
360
+ sequence.append(shard_range)
361
+ if sequence_complete(sequence):
362
+ break
363
+ else:
364
+ break
365
+
366
+ index += len(sequence)
367
+ if (index == len(shard_ranges) and
368
+ len(shard_ranges) == len(sequence) and
369
+ not sequence_complete(sequence) and
370
+ sequence.includes(own_shard_range)):
371
+ # special case: only one sequence has been found, which consumes
372
+ # all shard ranges, encompasses the entire namespace, has no more
373
+ # than expansion_limit records and whose shard ranges are all
374
+ # shrinkable; all the shards in the sequence can be shrunk to the
375
+ # root, so append own_shard_range to the sequence to act as an
376
+ # acceptor; note: only shrink to the root when *all* the remaining
377
+ # shard ranges can be simultaneously shrunk to the root.
378
+ sequence.append(own_shard_range)
379
+
380
+ if len(sequence) < 2 or sequence[-1].state not in (ShardRange.ACTIVE,
381
+ ShardRange.SHARDED):
382
+ # this sequence doesn't end with a suitable acceptor shard range
383
+ continue
384
+
385
+ # all valid sequences are counted against the max_expanding allowance
386
+ # even if the sequence is already shrinking
387
+ expanding += 1
388
+ if (all([sr.state != ShardRange.SHRINKING for sr in sequence]) or
389
+ include_shrinking):
390
+ compactible_sequences.append(sequence)
391
+
392
+ return compactible_sequences
393
+
394
+
395
+ def finalize_shrinking(broker, acceptor_ranges, donor_ranges, timestamp):
396
+ """
397
+ Update donor shard ranges to shrinking state and merge donors and acceptors
398
+ to broker.
399
+
400
+ :param broker: A :class:`~swift.container.backend.ContainerBroker`.
401
+ :param acceptor_ranges: A list of :class:`~swift.common.utils.ShardRange`
402
+ that are to be acceptors.
403
+ :param donor_ranges: A list of :class:`~swift.common.utils.ShardRange`
404
+ that are to be donors; these will have their state and timestamp
405
+ updated.
406
+ :param timestamp: timestamp to use when updating donor state
407
+ """
408
+ for donor in donor_ranges:
409
+ if donor.update_state(ShardRange.SHRINKING):
410
+ # Set donor state to shrinking state_timestamp defines new epoch
411
+ donor.epoch = donor.state_timestamp = timestamp
412
+ broker.merge_shard_ranges(acceptor_ranges + donor_ranges)
413
+
414
+
415
+ def process_compactible_shard_sequences(broker, sequences):
416
+ """
417
+ Transform the given sequences of shard ranges into a list of acceptors and
418
+ a list of shrinking donors. For each given sequence the final ShardRange in
419
+ the sequence (the acceptor) is expanded to accommodate the other
420
+ ShardRanges in the sequence (the donors). The donors and acceptors are then
421
+ merged into the broker.
422
+
423
+ :param broker: A :class:`~swift.container.backend.ContainerBroker`.
424
+ :param sequences: A list of :class:`~swift.common.utils.ShardRangeList`
425
+ """
426
+ timestamp = Timestamp.now()
427
+ acceptor_ranges = []
428
+ shrinking_ranges = []
429
+ for sequence in sequences:
430
+ donors = sequence[:-1]
431
+ shrinking_ranges.extend(donors)
432
+ # Update the acceptor container with its expanded bounds to prevent it
433
+ # treating objects cleaved from the donor as misplaced.
434
+ acceptor = sequence[-1]
435
+ if acceptor.expand(donors):
436
+ # Update the acceptor container with its expanded bounds to prevent
437
+ # it treating objects cleaved from the donor as misplaced.
438
+ acceptor.timestamp = timestamp
439
+ if acceptor.update_state(ShardRange.ACTIVE):
440
+ # Ensure acceptor state is ACTIVE (when acceptor is root)
441
+ acceptor.state_timestamp = timestamp
442
+ acceptor_ranges.append(acceptor)
443
+ finalize_shrinking(broker, acceptor_ranges, shrinking_ranges, timestamp)
444
+
445
+
446
+ def find_paths(shard_ranges):
447
+ """
448
+ Returns a list of all continuous paths through the shard ranges. An
449
+ individual path may not necessarily span the entire namespace, but it will
450
+ span a continuous namespace without gaps.
451
+
452
+ :param shard_ranges: A list of :class:`~swift.common.utils.ShardRange`.
453
+ :return: A list of :class:`~swift.common.utils.ShardRangeList`.
454
+ """
455
+ # A node is a point in the namespace that is used as a bound of any shard
456
+ # range. Shard ranges form the edges between nodes.
457
+
458
+ # First build a dict mapping nodes to a list of edges that leave that node
459
+ # (in other words, shard ranges whose lower bound equals the node)
460
+ node_successors = collections.defaultdict(list)
461
+ for shard_range in shard_ranges:
462
+ if shard_range.state == ShardRange.SHRINKING:
463
+ # shrinking shards are not a viable edge in any path
176
464
  continue
177
- if donor.state not in (ShardRange.ACTIVE, ShardRange.SHRINKING):
178
- # found? created? sharded? don't touch it
465
+ node_successors[shard_range.lower].append(shard_range)
466
+
467
+ paths = []
468
+
469
+ def clone_path(other=None):
470
+ # create a new path, possibly cloning another path, and add it to the
471
+ # list of all paths through the shards
472
+ path = ShardRangeList() if other is None else ShardRangeList(other)
473
+ paths.append(path)
474
+ return path
475
+
476
+ # we need to keep track of every path that ends at each node so that when
477
+ # we visit the node we can extend those paths, or clones of them, with the
478
+ # edges that leave the node
479
+ paths_to_node = collections.defaultdict(list)
480
+
481
+ # visit the nodes in ascending order by name...
482
+ for node, edges in sorted(node_successors.items()):
483
+ if not edges:
484
+ # this node is a dead-end, so there's no path updates to make
179
485
  continue
486
+ if not paths_to_node[node]:
487
+ # this is either the first node to be visited, or it has no paths
488
+ # leading to it, so we need to start a new path here
489
+ paths_to_node[node].append(clone_path([]))
490
+ for path_to_node in paths_to_node[node]:
491
+ # extend each path that arrives at this node with all of the
492
+ # possible edges that leave the node; if more than edge leaves the
493
+ # node then we will make clones of the path to the node and extend
494
+ # those clones, adding to the collection of all paths though the
495
+ # shards
496
+ for i, edge in enumerate(edges):
497
+ if i == len(edges) - 1:
498
+ # the last edge is used to extend the original path to the
499
+ # node; there is nothing special about the last edge, but
500
+ # doing this last means the original path to the node can
501
+ # be cloned for all other edges before being modified here
502
+ path = path_to_node
503
+ else:
504
+ # for all but one of the edges leaving the node we need to
505
+ # make a clone the original path
506
+ path = clone_path(path_to_node)
507
+ # extend the path with the edge
508
+ path.append(edge)
509
+ # keep track of which node this path now arrives at
510
+ paths_to_node[edge.upper].append(path)
511
+ return paths
512
+
513
+
514
+ def rank_paths(paths, shard_range_to_span):
515
+ """
516
+ Sorts the given list of paths such that the most preferred path is the
517
+ first item in the list.
518
+
519
+ :param paths: A list of :class:`~swift.common.utils.ShardRangeList`.
520
+ :param shard_range_to_span: An instance of
521
+ :class:`~swift.common.utils.ShardRange` that describes the namespace
522
+ that would ideally be spanned by a path. Paths that include this
523
+ namespace will be preferred over those that do not.
524
+ :return: A sorted list of :class:`~swift.common.utils.ShardRangeList`.
525
+ """
526
+ def sort_key(path):
527
+ # defines the order of preference for paths through shards
528
+ return (
529
+ # complete path for the namespace
530
+ path.includes(shard_range_to_span),
531
+ # most cleaving progress
532
+ path.find_lower(lambda sr: sr.state not in (
533
+ ShardRange.CLEAVED, ShardRange.ACTIVE)),
534
+ # largest object count
535
+ path.object_count,
536
+ # fewest timestamps
537
+ -1 * len(path.timestamps),
538
+ # newest timestamp
539
+ sorted(path.timestamps)[-1]
540
+ )
541
+
542
+ paths.sort(key=sort_key, reverse=True)
543
+ return paths
180
544
 
181
- proposed_object_count = donor.object_count + acceptor.object_count
182
- if (donor.state == ShardRange.SHRINKING or
183
- (donor.object_count < shrink_threshold and
184
- proposed_object_count < merge_size)):
185
- # include previously identified merge pairs on presumption that
186
- # following shrink procedure is idempotent
187
- merge_pairs[acceptor] = donor
188
- if donor.update_state(ShardRange.SHRINKING):
189
- # Set donor state to shrinking so that next cycle won't use
190
- # it as an acceptor; state_timestamp defines new epoch for
191
- # donor and new timestamp for the expanded acceptor below.
192
- donor.epoch = donor.state_timestamp = Timestamp.now()
193
- if acceptor.lower != donor.lower:
194
- # Update the acceptor container with its expanding state to
195
- # prevent it treating objects cleaved from the donor
196
- # as misplaced.
197
- acceptor.lower = donor.lower
198
- acceptor.timestamp = donor.state_timestamp
199
- return merge_pairs
545
+
546
+ def combine_shard_ranges(new_shard_ranges, existing_shard_ranges):
547
+ """
548
+ Combines new and existing shard ranges based on most recent state.
549
+
550
+ :param new_shard_ranges: a list of ShardRange instances.
551
+ :param existing_shard_ranges: a list of ShardRange instances.
552
+ :return: a list of ShardRange instances.
553
+ """
554
+ new_shard_ranges = [dict(sr) for sr in new_shard_ranges]
555
+ existing_shard_ranges = [dict(sr) for sr in existing_shard_ranges]
556
+ to_add, to_delete = sift_shard_ranges(
557
+ new_shard_ranges,
558
+ dict((sr['name'], sr) for sr in existing_shard_ranges))
559
+ result = [ShardRange.from_dict(existing)
560
+ for existing in existing_shard_ranges
561
+ if existing['name'] not in to_delete]
562
+ result.extend([ShardRange.from_dict(sr) for sr in to_add])
563
+ return sorted([sr for sr in result if not sr.deleted],
564
+ key=ShardRange.sort_key)
565
+
566
+
567
+ def update_own_shard_range_stats(broker, own_shard_range):
568
+ """
569
+ Update the ``own_shard_range`` with the up-to-date object stats from
570
+ the ``broker``.
571
+
572
+ Note: this method does not persist the updated ``own_shard_range``;
573
+ callers should use ``broker.merge_shard_ranges`` if the updated stats
574
+ need to be persisted.
575
+
576
+ :param broker: an instance of ``ContainerBroker``.
577
+ :param own_shard_range: and instance of ``ShardRange``.
578
+ :returns: ``own_shard_range`` with up-to-date ``object_count``
579
+ and ``bytes_used``.
580
+ """
581
+ info = broker.get_info()
582
+ own_shard_range.update_meta(
583
+ info['object_count'], info['bytes_used'])
584
+ return own_shard_range
200
585
 
201
586
 
202
587
  class CleavingContext(object):
588
+ """
589
+ Encapsulates metadata associated with the process of cleaving a retiring
590
+ DB. This metadata includes:
591
+
592
+ * ``ref``: The unique part of the key that is used when persisting a
593
+ serialized ``CleavingContext`` as sysmeta in the DB. The unique part of
594
+ the key is based off the DB id. This ensures that each context is
595
+ associated with a specific DB file. The unique part of the key is
596
+ included in the ``CleavingContext`` but should not be modified by any
597
+ caller.
598
+
599
+ * ``cursor``: the upper bound of the last shard range to have been
600
+ cleaved from the retiring DB.
601
+
602
+ * ``max_row``: the retiring DB's max row; this is updated to the value of
603
+ the retiring DB's ``max_row`` every time a ``CleavingContext`` is
604
+ loaded for that DB, and may change during the process of cleaving the
605
+ DB.
606
+
607
+ * ``cleave_to_row``: the value of ``max_row`` at the moment when cleaving
608
+ starts for the DB. When cleaving completes (i.e. the cleave cursor has
609
+ reached the upper bound of the cleaving namespace), ``cleave_to_row``
610
+ is compared to the current ``max_row``: if the two values are not equal
611
+ then rows have been added to the DB which may not have been cleaved, in
612
+ which case the ``CleavingContext`` is ``reset`` and cleaving is
613
+ re-started.
614
+
615
+ * ``last_cleave_to_row``: the minimum DB row from which cleaving should
616
+ select objects to cleave; this is initially set to None i.e. all rows
617
+ should be cleaved. If the ``CleavingContext`` is ``reset`` then the
618
+ ``last_cleave_to_row`` is set to the current value of
619
+ ``cleave_to_row``, which in turn is set to the current value of
620
+ ``max_row`` by a subsequent call to ``start``. The repeated cleaving
621
+ therefore only selects objects in rows greater than the
622
+ ``last_cleave_to_row``, rather than cleaving the whole DB again.
623
+
624
+ * ``ranges_done``: the number of shard ranges that have been cleaved from
625
+ the retiring DB.
626
+
627
+ * ``ranges_todo``: the number of shard ranges that are yet to be
628
+ cleaved from the retiring DB.
629
+ """
203
630
  def __init__(self, ref, cursor='', max_row=None, cleave_to_row=None,
204
631
  last_cleave_to_row=None, cleaving_done=False,
205
632
  misplaced_done=False, ranges_done=0, ranges_todo=0):
@@ -229,18 +656,13 @@ class CleavingContext(object):
229
656
  return '%s(%s)' % (self.__class__.__name__, ', '.join(
230
657
  '%s=%r' % prop for prop in self))
231
658
 
232
- def _encode(cls, value):
233
- if value is not None and six.PY2 and isinstance(value, six.text_type):
234
- return value.encode('utf-8')
235
- return value
236
-
237
659
  @property
238
660
  def cursor(self):
239
661
  return self._cursor
240
662
 
241
663
  @cursor.setter
242
664
  def cursor(self, value):
243
- self._cursor = self._encode(value)
665
+ self._cursor = value
244
666
 
245
667
  @property
246
668
  def marker(self):
@@ -253,37 +675,33 @@ class CleavingContext(object):
253
675
  @classmethod
254
676
  def load_all(cls, broker):
255
677
  """
256
- Returns all cleaving contexts stored in the broker.
678
+ Returns all cleaving contexts stored in the broker's DB.
257
679
 
258
- :param broker:
680
+ :param broker: an instance of :class:`ContainerBroker`
259
681
  :return: list of tuples of (CleavingContext, timestamp)
260
682
  """
261
683
  brokers = broker.get_brokers()
262
684
  sysmeta = brokers[-1].get_sharding_sysmeta_with_timestamps()
263
685
 
686
+ contexts = []
264
687
  for key, (val, timestamp) in sysmeta.items():
265
- # If the value is of length 0, then the metadata is
688
+ # If the value is blank, then the metadata is
266
689
  # marked for deletion
267
- if key.startswith("Context-") and len(val) > 0:
690
+ if key.startswith("Context-") and val:
268
691
  try:
269
- yield cls(**json.loads(val)), timestamp
692
+ contexts.append((cls(**json.loads(val)), timestamp))
270
693
  except ValueError:
271
694
  continue
695
+ return contexts
272
696
 
273
697
  @classmethod
274
698
  def load(cls, broker):
275
699
  """
276
- Returns a context dict for tracking the progress of cleaving this
277
- broker's retiring DB. The context is persisted in sysmeta using a key
278
- that is based off the retiring db id and max row. This form of
279
- key ensures that a cleaving context is only loaded for a db that
280
- matches the id and max row when the context was created; if a db is
281
- modified such that its max row changes then a different context, or no
282
- context, will be loaded.
283
-
284
- :return: A dict to which cleave progress metadata may be added. The
285
- dict initially has a key ``ref`` which should not be modified by
286
- any caller.
700
+ Returns a CleavingContext tracking the cleaving progress of the given
701
+ broker's DB.
702
+
703
+ :param broker: an instances of :class:`ContainerBroker`
704
+ :return: An instance of :class:`CleavingContext`.
287
705
  """
288
706
  brokers = broker.get_brokers()
289
707
  ref = cls._make_ref(brokers[0])
@@ -294,6 +712,12 @@ class CleavingContext(object):
294
712
  return cls(**data)
295
713
 
296
714
  def store(self, broker):
715
+ """
716
+ Persists the serialized ``CleavingContext`` as sysmeta in the given
717
+ broker's DB.
718
+
719
+ :param broker: an instances of :class:`ContainerBroker`
720
+ """
297
721
  broker.set_sharding_sysmeta('Context-' + self.ref,
298
722
  json.dumps(dict(self)))
299
723
 
@@ -312,6 +736,11 @@ class CleavingContext(object):
312
736
  self.cleaving_done = False
313
737
  self.cleave_to_row = self.max_row
314
738
 
739
+ def range_done(self, new_cursor):
740
+ self.ranges_done += 1
741
+ self.ranges_todo -= 1
742
+ self.cursor = new_cursor
743
+
315
744
  def done(self):
316
745
  return all((self.misplaced_done, self.cleaving_done,
317
746
  self.max_row == self.cleave_to_row))
@@ -322,51 +751,108 @@ class CleavingContext(object):
322
751
  broker.set_sharding_sysmeta('Context-' + self.ref, '')
323
752
 
324
753
 
325
- DEFAULT_SHARD_CONTAINER_THRESHOLD = 1000000
326
- DEFAULT_SHARD_SHRINK_POINT = 25
327
- DEFAULT_SHARD_MERGE_POINT = 75
754
+ class ContainerSharderConf(object):
755
+ def __init__(self, conf=None):
756
+ conf = conf if conf else {}
757
+
758
+ def get_val(key, validator, default):
759
+ """
760
+ Get a value from conf and validate it.
761
+
762
+ :param key: key to lookup value in the ``conf`` dict.
763
+ :param validator: A function that will passed the value from the
764
+ ``conf`` dict and should return the value to be set. This
765
+ function should raise a ValueError if the ``conf`` value if not
766
+ valid.
767
+ :param default: value to use if ``key`` is not found in ``conf``.
768
+ :raises: ValueError if the value read from ``conf`` is invalid.
769
+ :returns: the configuration value.
770
+ """
771
+ try:
772
+ return validator(conf.get(key, default))
773
+ except ValueError as err:
774
+ raise ValueError('Error setting %s: %s' % (key, err))
775
+
776
+ self.shard_container_threshold = get_val(
777
+ 'shard_container_threshold', config_positive_int_value, 1000000)
778
+ self.max_shrinking = get_val(
779
+ 'max_shrinking', int, 1)
780
+ self.max_expanding = get_val(
781
+ 'max_expanding', int, -1)
782
+ self.shard_scanner_batch_size = get_val(
783
+ 'shard_scanner_batch_size', config_positive_int_value, 10)
784
+ self.cleave_batch_size = get_val(
785
+ 'cleave_batch_size', config_positive_int_value, 2)
786
+ self.cleave_row_batch_size = get_val(
787
+ 'cleave_row_batch_size', config_positive_int_value, 10000)
788
+ self.broker_timeout = get_val(
789
+ 'broker_timeout', config_positive_int_value, 60)
790
+ self.recon_candidates_limit = get_val(
791
+ 'recon_candidates_limit', int, 5)
792
+ self.recon_sharded_timeout = get_val(
793
+ 'recon_sharded_timeout', int, 43200)
794
+ self.container_sharding_timeout = get_val(
795
+ 'container_sharding_timeout', int, 172800)
796
+ self.conn_timeout = get_val(
797
+ 'conn_timeout', float, 5)
798
+ self.auto_shard = get_val(
799
+ 'auto_shard', config_true_value, False)
800
+ # deprecated percent options still loaded...
801
+ self.shrink_threshold = get_val(
802
+ 'shard_shrink_point', self.percent_of_threshold, 10)
803
+ self.expansion_limit = get_val(
804
+ 'shard_shrink_merge_point', self.percent_of_threshold, 75)
805
+ # ...but superseded by absolute options if present in conf
806
+ self.shrink_threshold = get_val(
807
+ 'shrink_threshold', int, self.shrink_threshold)
808
+ self.expansion_limit = get_val(
809
+ 'expansion_limit', int, self.expansion_limit)
810
+ self.rows_per_shard = get_val(
811
+ 'rows_per_shard', config_positive_int_value,
812
+ max(self.shard_container_threshold // 2, 1))
813
+ self.minimum_shard_size = get_val(
814
+ 'minimum_shard_size', config_positive_int_value,
815
+ max(self.rows_per_shard // 5, 1))
816
+
817
+ def percent_of_threshold(self, val):
818
+ return int(config_percent_value(val) * self.shard_container_threshold)
819
+
820
+ @classmethod
821
+ def validate_conf(cls, namespace):
822
+ ops = {'<': operator.lt,
823
+ '<=': operator.le}
824
+ checks = (('minimum_shard_size', '<=', 'rows_per_shard'),
825
+ ('shrink_threshold', '<=', 'minimum_shard_size'),
826
+ ('rows_per_shard', '<', 'shard_container_threshold'),
827
+ ('expansion_limit', '<', 'shard_container_threshold'))
828
+ for key1, op, key2 in checks:
829
+ try:
830
+ val1 = getattr(namespace, key1)
831
+ val2 = getattr(namespace, key2)
832
+ except AttributeError:
833
+ # swift-manage-shard-ranges uses a subset of conf options for
834
+ # each command so only validate those actually in the namespace
835
+ continue
836
+ if not ops[op](val1, val2):
837
+ raise ValueError('%s (%d) must be %s %s (%d)'
838
+ % (key1, val1, op, key2, val2))
839
+
328
840
 
841
+ DEFAULT_SHARDER_CONF = vars(ContainerSharderConf())
329
842
 
330
- class ContainerSharder(ContainerReplicator):
843
+
844
+ class ContainerSharder(ContainerSharderConf, ContainerReplicator):
331
845
  """Shards containers."""
846
+ log_route = 'container-sharder'
332
847
 
333
848
  def __init__(self, conf, logger=None):
334
- logger = logger or get_logger(conf, log_route='container-sharder')
335
- super(ContainerSharder, self).__init__(conf, logger=logger)
336
- self.shards_account_prefix = (
337
- (conf.get('auto_create_account_prefix') or '.') + 'shards_')
338
-
339
- def percent_value(key, default):
340
- try:
341
- value = conf.get(key, default)
342
- return config_float_value(value, 0, 100) / 100.0
343
- except ValueError as err:
344
- raise ValueError("%s: %s" % (str(err), key))
345
-
346
- self.shard_shrink_point = percent_value('shard_shrink_point',
347
- DEFAULT_SHARD_SHRINK_POINT)
348
- self.shrink_merge_point = percent_value('shard_shrink_merge_point',
349
- DEFAULT_SHARD_MERGE_POINT)
350
- self.shard_container_threshold = config_positive_int_value(
351
- conf.get('shard_container_threshold',
352
- DEFAULT_SHARD_CONTAINER_THRESHOLD))
353
- self.shrink_size = (self.shard_container_threshold *
354
- self.shard_shrink_point)
355
- self.merge_size = (self.shard_container_threshold *
356
- self.shrink_merge_point)
357
- self.split_size = self.shard_container_threshold // 2
358
- self.scanner_batch_size = config_positive_int_value(
359
- conf.get('shard_scanner_batch_size', 10))
360
- self.cleave_batch_size = config_positive_int_value(
361
- conf.get('cleave_batch_size', 2))
362
- self.cleave_row_batch_size = config_positive_int_value(
363
- conf.get('cleave_row_batch_size', 10000))
364
- self.auto_shard = config_true_value(conf.get('auto_shard', False))
849
+ logger = logger or get_logger(conf, log_route=self.log_route)
850
+ ContainerReplicator.__init__(self, conf, logger=logger)
851
+ ContainerSharderConf.__init__(self, conf)
852
+ ContainerSharderConf.validate_conf(self)
853
+ self.shards_account_prefix = (AUTO_CREATE_ACCOUNT_PREFIX + 'shards_')
365
854
  self.sharding_candidates = []
366
- self.recon_candidates_limit = int(
367
- conf.get('recon_candidates_limit', 5))
368
- self.broker_timeout = config_positive_int_value(
369
- conf.get('broker_timeout', 60))
855
+ self.shrinking_candidates = []
370
856
  replica_count = self.ring.replica_count
371
857
  quorum = quorum_size(replica_count)
372
858
  self.shard_replication_quorum = config_auto_int_value(
@@ -388,7 +874,6 @@ class ContainerSharder(ContainerReplicator):
388
874
  self.existing_shard_replication_quorum = replica_count
389
875
 
390
876
  # internal client
391
- self.conn_timeout = float(conf.get('conn_timeout', 5))
392
877
  request_tries = config_positive_int_value(
393
878
  conf.get('request_tries', 3))
394
879
  internal_client_conf_path = conf.get('internal_client_conf_path',
@@ -398,7 +883,9 @@ class ContainerSharder(ContainerReplicator):
398
883
  internal_client_conf_path,
399
884
  'Swift Container Sharder',
400
885
  request_tries,
401
- allow_modify_pipeline=False)
886
+ use_replication_network=True,
887
+ global_conf={'log_name': '%s-ic' % conf.get(
888
+ 'log_name', self.log_route)})
402
889
  except (OSError, IOError) as err:
403
890
  if err.errno != errno.ENOENT and \
404
891
  not str(err).endswith(' not found'):
@@ -406,7 +893,67 @@ class ContainerSharder(ContainerReplicator):
406
893
  raise SystemExit(
407
894
  'Unable to load internal client from config: %r (%s)' %
408
895
  (internal_client_conf_path, err))
896
+ self.stats_interval = float(conf.get('stats_interval', '3600'))
409
897
  self.reported = 0
898
+ self.periodic_warnings_interval = float(
899
+ conf.get('periodic_warnings_interval',
900
+ DEFAULT_PERIODIC_WARNINGS_INTERVAL))
901
+ self.periodic_warnings_start = time.time()
902
+ self.periodic_warnings = set()
903
+
904
+ def _get_broker_details(self, broker):
905
+ try:
906
+ db_file = broker.db_file
907
+ except Exception: # noqa
908
+ db_file = ''
909
+ try:
910
+ path = broker.path
911
+ except Exception: # noqa
912
+ path = ''
913
+ return db_file, path
914
+
915
+ def _format_log_msg(self, broker, msg, *args):
916
+ # make best effort to include broker properties...
917
+ db_file, path = self._get_broker_details(broker)
918
+ if args:
919
+ msg = msg % args
920
+ return '%s, path: %s, db: %s' % (msg, quote(path), db_file)
921
+
922
+ def _log(self, level, broker, msg, *args):
923
+ if not self.logger.isEnabledFor(level):
924
+ return
925
+
926
+ self.logger.log(level, self._format_log_msg(broker, msg, *args))
927
+
928
+ def debug(self, broker, msg, *args, **kwargs):
929
+ self._log(logging.DEBUG, broker, msg, *args, **kwargs)
930
+
931
+ def info(self, broker, msg, *args, **kwargs):
932
+ self._log(logging.INFO, broker, msg, *args, **kwargs)
933
+
934
+ def warning(self, broker, msg, *args, **kwargs):
935
+ self._log(logging.WARNING, broker, msg, *args, **kwargs)
936
+
937
+ def periodic_warning(self, broker, msg, *args, **kwargs):
938
+ now = time.time()
939
+ if now - self.periodic_warnings_start >= \
940
+ self.periodic_warnings_interval:
941
+ self.periodic_warnings.clear()
942
+ self.periodic_warnings_start = now
943
+
944
+ db_file, path = self._get_broker_details(broker)
945
+ key = (db_file, msg)
946
+ if key not in self.periodic_warnings:
947
+ self.periodic_warnings.add(key)
948
+ self._log(logging.WARNING, broker, msg, *args, **kwargs)
949
+
950
+ def error(self, broker, msg, *args, **kwargs):
951
+ self._log(logging.ERROR, broker, msg, *args, **kwargs)
952
+
953
+ def exception(self, broker, msg, *args, **kwargs):
954
+ if not self.logger.isEnabledFor(logging.ERROR):
955
+ return
956
+ self.logger.exception(self._format_log_msg(broker, msg, *args))
410
957
 
411
958
  def _zero_stats(self):
412
959
  """Zero out the stats."""
@@ -415,6 +962,7 @@ class ContainerSharder(ContainerReplicator):
415
962
  # stats are maintained under the 'sharding' key in self.stats
416
963
  self.stats['sharding'] = defaultdict(lambda: defaultdict(int))
417
964
  self.sharding_candidates = []
965
+ self.shrinking_candidates = []
418
966
 
419
967
  def _append_stat(self, category, key, value):
420
968
  if not self.stats['sharding'][category][key]:
@@ -435,11 +983,15 @@ class ContainerSharder(ContainerReplicator):
435
983
  else:
436
984
  self.stats['sharding'][category][key] = max(current, value)
437
985
 
438
- def _increment_stat(self, category, key, step=1, statsd=False):
439
- self.stats['sharding'][category][key] += step
440
- if statsd:
441
- statsd_key = '%s_%s' % (category, key)
442
- self.logger.increment(statsd_key)
986
+ def _increment_stat(self, category, key, statsd=False):
987
+ self._update_stat(category, key, step=1, statsd=statsd)
988
+
989
+ def _update_stat(self, category, key, step=1, statsd=False):
990
+ if step:
991
+ self.stats['sharding'][category][key] += step
992
+ if statsd:
993
+ statsd_key = '%s_%s' % (category, key)
994
+ self.logger.update_stats(statsd_key, step)
443
995
 
444
996
  def _make_stats_info(self, broker, node, own_shard_range):
445
997
  try:
@@ -458,40 +1010,90 @@ class ContainerSharder(ContainerReplicator):
458
1010
 
459
1011
  def _identify_sharding_candidate(self, broker, node):
460
1012
  own_shard_range = broker.get_own_shard_range()
1013
+ update_own_shard_range_stats(broker, own_shard_range)
461
1014
  if is_sharding_candidate(
462
1015
  own_shard_range, self.shard_container_threshold):
463
1016
  self.sharding_candidates.append(
464
1017
  self._make_stats_info(broker, node, own_shard_range))
465
1018
 
466
- def _transform_sharding_candidate_stats(self):
467
- category = self.stats['sharding']['sharding_candidates']
468
- candidates = self.sharding_candidates
1019
+ def _identify_shrinking_candidate(self, broker, node):
1020
+ sequences = find_compactible_shard_sequences(
1021
+ broker, self.shrink_threshold, self.expansion_limit,
1022
+ self.max_shrinking, self.max_expanding)
1023
+ # compactible_ranges are all apart from final acceptor in each sequence
1024
+ compactible_ranges = sum(len(seq) - 1 for seq in sequences)
1025
+
1026
+ if compactible_ranges:
1027
+ own_shard_range = broker.get_own_shard_range()
1028
+ update_own_shard_range_stats(broker, own_shard_range)
1029
+ shrink_candidate = self._make_stats_info(
1030
+ broker, node, own_shard_range)
1031
+ # The number of ranges/donors that can be shrunk if the
1032
+ # tool is used with the current max_shrinking, max_expanding
1033
+ # settings.
1034
+ shrink_candidate['compactible_ranges'] = compactible_ranges
1035
+ self.shrinking_candidates.append(shrink_candidate)
1036
+
1037
+ def _transform_candidate_stats(self, category, candidates, sort_keys):
469
1038
  category['found'] = len(candidates)
470
- candidates.sort(key=lambda c: c['object_count'], reverse=True)
1039
+ candidates.sort(key=itemgetter(*sort_keys), reverse=True)
471
1040
  if self.recon_candidates_limit >= 0:
472
1041
  category['top'] = candidates[:self.recon_candidates_limit]
473
1042
  else:
474
1043
  category['top'] = candidates
475
1044
 
476
1045
  def _record_sharding_progress(self, broker, node, error):
1046
+ db_state = broker.get_db_state()
1047
+ if db_state not in (UNSHARDED, SHARDING, SHARDED):
1048
+ return
477
1049
  own_shard_range = broker.get_own_shard_range()
478
- if (broker.get_db_state() in (UNSHARDED, SHARDING) and
479
- own_shard_range.state in (ShardRange.SHARDING,
480
- ShardRange.SHARDED)):
481
- info = self._make_stats_info(broker, node, own_shard_range)
482
- info['state'] = own_shard_range.state_text
483
- info['db_state'] = broker.get_db_state()
484
- states = [ShardRange.FOUND, ShardRange.CREATED,
485
- ShardRange.CLEAVED, ShardRange.ACTIVE]
486
- shard_ranges = broker.get_shard_ranges(states=states)
487
- state_count = {}
488
- for state in states:
489
- state_count[ShardRange.STATES[state]] = 0
490
- for shard_range in shard_ranges:
491
- state_count[shard_range.state_text] += 1
492
- info.update(state_count)
493
- info['error'] = error and str(error)
494
- self._append_stat('sharding_in_progress', 'all', info)
1050
+ if own_shard_range.state not in ShardRange.CLEAVING_STATES:
1051
+ return
1052
+
1053
+ if db_state == SHARDED:
1054
+ contexts = CleavingContext.load_all(broker)
1055
+ if not contexts:
1056
+ return
1057
+ context_ts = max(float(ts) for c, ts in contexts)
1058
+ if context_ts + self.recon_sharded_timeout \
1059
+ < float(Timestamp.now()):
1060
+ # last context timestamp too old for the
1061
+ # broker to be recorded
1062
+ return
1063
+
1064
+ update_own_shard_range_stats(broker, own_shard_range)
1065
+ info = self._make_stats_info(broker, node, own_shard_range)
1066
+ info['state'] = own_shard_range.state_text
1067
+ info['db_state'] = broker.get_db_state()
1068
+ states = [ShardRange.FOUND, ShardRange.CREATED,
1069
+ ShardRange.CLEAVED, ShardRange.ACTIVE]
1070
+ shard_ranges = broker.get_shard_ranges(states=states)
1071
+ state_count = {}
1072
+ for state in states:
1073
+ state_count[ShardRange.STATES[state]] = 0
1074
+ for shard_range in shard_ranges:
1075
+ state_count[shard_range.state_text] += 1
1076
+ info.update(state_count)
1077
+ info['error'] = error and str(error)
1078
+ self._append_stat('sharding_in_progress', 'all', info)
1079
+
1080
+ if broker.sharding_required() and (
1081
+ own_shard_range.epoch is not None) and (
1082
+ float(own_shard_range.epoch) +
1083
+ self.container_sharding_timeout <
1084
+ time.time()):
1085
+ # Note: There is no requirement that own_shard_range.epoch equals
1086
+ # the time at which the own_shard_range was merged into the
1087
+ # container DB, which predicates sharding starting. But s-m-s-r and
1088
+ # auto-sharding do set epoch and then merge, so we use it to tell
1089
+ # whether sharding has been taking too long or not.
1090
+ self.warning(
1091
+ broker, 'Cleaving has not completed in %.2f seconds since %s. '
1092
+ 'DB state: %s, own_shard_range state: %s, state count of '
1093
+ 'shard ranges: %s' %
1094
+ (time.time() - float(own_shard_range.epoch),
1095
+ own_shard_range.epoch.isoformat, db_state,
1096
+ own_shard_range.state_text, str(state_count)))
495
1097
 
496
1098
  def _report_stats(self):
497
1099
  # report accumulated stats since start of one sharder cycle
@@ -502,7 +1104,7 @@ class ContainerSharder(ContainerReplicator):
502
1104
  ('created', default_stats),
503
1105
  ('cleaved', default_stats + ('min_time', 'max_time',)),
504
1106
  ('misplaced', default_stats + ('found', 'placed', 'unplaced')),
505
- ('audit_root', default_stats),
1107
+ ('audit_root', default_stats + ('has_overlap', 'num_overlap')),
506
1108
  ('audit_shard', default_stats),
507
1109
  )
508
1110
 
@@ -515,7 +1117,16 @@ class ContainerSharder(ContainerReplicator):
515
1117
  msg = ' '.join(['%s:%s' % (k, str(stats[k])) for k in keys])
516
1118
  self.logger.info('Since %s %s - %s', last_report, category, msg)
517
1119
 
518
- self._transform_sharding_candidate_stats()
1120
+ # transform the sharding and shrinking candidate states
1121
+ # first sharding
1122
+ category = self.stats['sharding']['sharding_candidates']
1123
+ self._transform_candidate_stats(category, self.sharding_candidates,
1124
+ sort_keys=('object_count',))
1125
+
1126
+ # next shrinking
1127
+ category = self.stats['sharding']['shrinking_candidates']
1128
+ self._transform_candidate_stats(category, self.shrinking_candidates,
1129
+ sort_keys=('compactible_ranges',))
519
1130
 
520
1131
  dump_recon_cache(
521
1132
  {'sharding_stats': self.stats,
@@ -525,7 +1136,7 @@ class ContainerSharder(ContainerReplicator):
525
1136
  self.reported = now
526
1137
 
527
1138
  def _periodic_report_stats(self):
528
- if (time.time() - self.reported) >= 3600: # once an hour
1139
+ if (time.time() - self.reported) >= self.stats_interval:
529
1140
  self._report_stats()
530
1141
 
531
1142
  def _check_node(self, node):
@@ -553,64 +1164,67 @@ class ContainerSharder(ContainerReplicator):
553
1164
  params = params or {}
554
1165
  params.setdefault('format', 'json')
555
1166
  headers = {'X-Backend-Record-Type': 'shard',
1167
+ 'X-Backend-Record-Shard-Format': 'full',
556
1168
  'X-Backend-Override-Deleted': 'true',
557
1169
  'X-Backend-Include-Deleted': str(include_deleted)}
558
1170
  if newest:
559
1171
  headers['X-Newest'] = 'true'
560
1172
  try:
561
- try:
562
- resp = self.int_client.make_request(
563
- 'GET', path, headers, acceptable_statuses=(2,),
564
- params=params)
565
- except internal_client.UnexpectedResponse as err:
566
- self.logger.warning("Failed to get shard ranges from %s: %s",
567
- broker.root_path, err)
568
- return None
569
- record_type = resp.headers.get('x-backend-record-type')
570
- if record_type != 'shard':
571
- err = 'unexpected record type %r' % record_type
572
- self.logger.error("Failed to get shard ranges from %s: %s",
573
- broker.root_path, err)
574
- return None
575
-
576
- try:
577
- data = json.loads(resp.body)
578
- if not isinstance(data, list):
579
- raise ValueError('not a list')
580
- return [ShardRange.from_dict(shard_range)
581
- for shard_range in data]
582
- except (ValueError, TypeError, KeyError) as err:
583
- self.logger.error(
584
- "Failed to get shard ranges from %s: invalid data: %r",
585
- broker.root_path, err)
1173
+ resp = self.int_client.make_request(
1174
+ 'GET', path, headers, acceptable_statuses=(2,),
1175
+ params=params)
1176
+ except internal_client.UnexpectedResponse as err:
1177
+ self.warning(broker, "Failed to get shard ranges from %s: %s",
1178
+ quote(broker.root_path), err)
1179
+ return None
1180
+ record_type = resp.headers.get('x-backend-record-type')
1181
+ if record_type != 'shard':
1182
+ err = 'unexpected record type %r' % record_type
1183
+ self.error(broker, "Failed to get shard ranges from %s: %s",
1184
+ quote(broker.root_path), err)
586
1185
  return None
587
- finally:
588
- self.logger.txn_id = None
589
1186
 
590
- def _put_container(self, node, part, account, container, headers, body):
1187
+ try:
1188
+ data = json.loads(resp.body)
1189
+ if not isinstance(data, list):
1190
+ raise ValueError('not a list')
1191
+ return [ShardRange.from_dict(shard_range)
1192
+ for shard_range in data]
1193
+ except (ValueError, TypeError, KeyError) as err:
1194
+ self.error(broker,
1195
+ "Failed to get shard ranges from %s: invalid data: %r",
1196
+ quote(broker.root_path), err)
1197
+ return None
1198
+
1199
+ def _put_container(self, broker, node, part, account, container, headers,
1200
+ body):
591
1201
  try:
592
1202
  direct_put_container(node, part, account, container,
593
1203
  conn_timeout=self.conn_timeout,
594
1204
  response_timeout=self.node_timeout,
595
1205
  headers=headers, contents=body)
596
1206
  except DirectClientException as err:
597
- self.logger.warning(
598
- 'Failed to put shard ranges to %s:%s/%s: %s',
599
- node['ip'], node['port'], node['device'], err.http_status)
1207
+ self.warning(broker,
1208
+ 'Failed to put shard ranges to %s %s/%s: %s',
1209
+ node_to_string(node, replication=True),
1210
+ quote(account), quote(container), err.http_status)
600
1211
  except (Exception, Timeout) as err:
601
- self.logger.exception(
602
- 'Failed to put shard ranges to %s:%s/%s: %s',
603
- node['ip'], node['port'], node['device'], err)
1212
+ self.exception(broker,
1213
+ 'Failed to put shard ranges to %s %s/%s: %s',
1214
+ node_to_string(node, replication=True),
1215
+ quote(account), quote(container), err)
604
1216
  else:
605
1217
  return True
606
1218
  return False
607
1219
 
608
- def _send_shard_ranges(self, account, container, shard_ranges,
1220
+ def _send_shard_ranges(self, broker, account, container, shard_ranges,
609
1221
  headers=None):
610
- body = json.dumps([dict(sr) for sr in shard_ranges]).encode('ascii')
1222
+ body = json.dumps([dict(sr, reported=0)
1223
+ for sr in shard_ranges]).encode('ascii')
611
1224
  part, nodes = self.ring.get_nodes(account, container)
612
1225
  headers = headers or {}
613
1226
  headers.update({'X-Backend-Record-Type': RECORD_TYPE_SHARD,
1227
+ USE_REPLICATION_NETWORK_HEADER: 'True',
614
1228
  'User-Agent': 'container-sharder %s' % os.getpid(),
615
1229
  'X-Timestamp': Timestamp.now().normal,
616
1230
  'Content-Length': len(body),
@@ -618,7 +1232,7 @@ class ContainerSharder(ContainerReplicator):
618
1232
 
619
1233
  pool = GreenAsyncPile(len(nodes))
620
1234
  for node in nodes:
621
- pool.spawn(self._put_container, node, part, account,
1235
+ pool.spawn(self._put_container, broker, node, part, account,
622
1236
  container, headers, body)
623
1237
 
624
1238
  results = pool.waitall(None)
@@ -634,20 +1248,19 @@ class ContainerSharder(ContainerReplicator):
634
1248
  :param shard_range: a :class:`~swift.common.utils.ShardRange`
635
1249
  :param root_path: the path of the shard's root container
636
1250
  :param policy_index: the storage policy index
637
- :returns: a tuple of ``(part, broker, node_id)`` where ``part`` is the
638
- shard container's partition, ``broker`` is an instance of
1251
+ :returns: a tuple of ``(part, broker, node_id, put_timestamp)`` where
1252
+ ``part`` is the shard container's partition,
1253
+ ``broker`` is an instance of
639
1254
  :class:`~swift.container.backend.ContainerBroker`,
640
- ``node_id`` is the id of the selected node.
1255
+ ``node_id`` is the id of the selected node,
1256
+ ``put_timestamp`` is the put_timestamp if the broker needed to
1257
+ be initialized.
641
1258
  """
642
1259
  part = self.ring.get_part(shard_range.account, shard_range.container)
643
1260
  node = self.find_local_handoff_for_part(part)
644
- put_timestamp = Timestamp.now().internal
645
- if not node:
646
- raise DeviceUnavailable(
647
- 'No mounted devices found suitable for creating shard broker '
648
- 'for %s in partition %s' % (shard_range.name, part))
649
1261
 
650
- shard_broker = ContainerBroker.create_broker(
1262
+ put_timestamp = Timestamp.now().internal
1263
+ shard_broker, initialized = ContainerBroker.create_broker(
651
1264
  os.path.join(self.root, node['device']), part, shard_range.account,
652
1265
  shard_range.container, epoch=shard_range.epoch,
653
1266
  storage_policy_index=policy_index, put_timestamp=put_timestamp)
@@ -655,11 +1268,19 @@ class ContainerSharder(ContainerReplicator):
655
1268
  # Get the valid info into the broker.container, etc
656
1269
  shard_broker.get_info()
657
1270
  shard_broker.merge_shard_ranges(shard_range)
658
- shard_broker.set_sharding_sysmeta('Root', root_path)
1271
+ shard_broker.set_sharding_sysmeta('Quoted-Root', quote(root_path))
1272
+ # NB: we *used* to do
1273
+ # shard_broker.set_sharding_sysmeta('Root', root_path)
1274
+ # but that isn't safe for container names with nulls or newlines (or
1275
+ # possibly some other characters). We consciously *don't* make any
1276
+ # attempt to set the old meta; during an upgrade, some shards may think
1277
+ # they are in fact roots, but it cleans up well enough once everyone's
1278
+ # upgraded.
659
1279
  shard_broker.update_metadata({
660
1280
  'X-Container-Sysmeta-Sharding':
661
1281
  ('True', Timestamp.now().internal)})
662
1282
 
1283
+ put_timestamp = put_timestamp if initialized else None
663
1284
  return part, shard_broker, node['id'], put_timestamp
664
1285
 
665
1286
  def _audit_root_container(self, broker):
@@ -669,105 +1290,276 @@ class ContainerSharder(ContainerReplicator):
669
1290
  warnings = []
670
1291
  own_shard_range = broker.get_own_shard_range()
671
1292
 
672
- if own_shard_range.state in (ShardRange.SHARDING, ShardRange.SHARDED):
673
- shard_ranges = broker.get_shard_ranges()
674
- missing_ranges = find_missing_ranges(shard_ranges)
675
- if missing_ranges:
1293
+ if own_shard_range.state in ShardRange.SHARDING_STATES:
1294
+ shard_ranges = [sr for sr in broker.get_shard_ranges()
1295
+ if sr.state != ShardRange.SHRINKING]
1296
+ paths_with_gaps = find_paths_with_gaps(shard_ranges)
1297
+ if paths_with_gaps:
676
1298
  warnings.append(
677
1299
  'missing range(s): %s' %
678
- ' '.join(['%s-%s' % (lower, upper)
679
- for lower, upper in missing_ranges]))
1300
+ ' '.join(['%s-%s' % (gap.lower, gap.upper)
1301
+ for (_, gap, _) in paths_with_gaps]))
680
1302
 
681
1303
  for state in ShardRange.STATES:
682
- shard_ranges = broker.get_shard_ranges(states=state)
683
- overlaps = find_overlapping_ranges(shard_ranges)
684
- for overlapping_ranges in overlaps:
1304
+ if state == ShardRange.SHRINKING:
1305
+ # Shrinking is how we resolve overlaps; we've got to
1306
+ # allow multiple shards in that state
1307
+ continue
1308
+ shard_ranges = broker.get_shard_ranges(states=[state])
1309
+ # Transient overlaps can occur during the period immediately after
1310
+ # sharding if a root learns about new child shards before it learns
1311
+ # that the parent has sharded. These overlaps are normally
1312
+ # corrected as an up-to-date version of the parent shard range is
1313
+ # replicated to the root. Parent-child overlaps are therefore
1314
+ # ignored for a reclaim age after the child was created. After
1315
+ # that, parent-child overlaps may indicate that there is
1316
+ # permanently stale parent shard range data, perhaps from a node
1317
+ # that has been offline, so these are reported.
1318
+ overlaps = find_overlapping_ranges(
1319
+ shard_ranges, exclude_parent_child=True,
1320
+ time_period=self.reclaim_age)
1321
+ if overlaps:
1322
+ self._increment_stat('audit_root', 'has_overlap')
1323
+ self._update_stat('audit_root', 'num_overlap',
1324
+ step=len(overlaps))
1325
+ all_overlaps = ', '.join(
1326
+ [' '.join(['%s-%s' % (sr.lower, sr.upper)
1327
+ for sr in overlapping_ranges])
1328
+ for overlapping_ranges in sorted(list(overlaps))])
685
1329
  warnings.append(
686
- 'overlapping ranges in state %s: %s' %
687
- (ShardRange.STATES[state],
688
- ' '.join(['%s-%s' % (sr.lower, sr.upper)
689
- for sr in overlapping_ranges])))
1330
+ 'overlapping ranges in state %r: %s' %
1331
+ (ShardRange.STATES[state], all_overlaps))
1332
+
1333
+ # We've seen a case in production where the roots own_shard_range
1334
+ # epoch is reset to None, and state set to ACTIVE (like re-defaulted)
1335
+ # Epoch it important to sharding so we want to detect if this happens
1336
+ # 1. So we can alert, and 2. to see how common it is.
1337
+ if own_shard_range.epoch is None and broker.db_epoch:
1338
+ warnings.append('own_shard_range reset to None should be %s'
1339
+ % broker.db_epoch)
690
1340
 
691
1341
  if warnings:
692
- self.logger.warning(
693
- 'Audit failed for root %s (%s): %s' %
694
- (broker.db_file, broker.path, ', '.join(warnings)))
1342
+ self.warning(broker, 'Audit failed for root: %s',
1343
+ ', '.join(warnings))
695
1344
  self._increment_stat('audit_root', 'failure', statsd=True)
696
1345
  return False
697
1346
 
698
1347
  self._increment_stat('audit_root', 'success', statsd=True)
699
1348
  return True
700
1349
 
701
- def _audit_shard_container(self, broker):
702
- # Get the root view of the world.
703
- self._increment_stat('audit_shard', 'attempted')
1350
+ def _merge_shard_ranges_from_root(self, broker, shard_ranges,
1351
+ own_shard_range):
1352
+ """
1353
+ Merge appropriate items from the given ``shard_ranges`` into the
1354
+ ``broker``. The selection of items that are merged will depend upon the
1355
+ state of the shard.
1356
+
1357
+ :param broker: A :class:`~swift.container.backend.ContainerBroker`.
1358
+ :param shard_ranges: A list of instances of
1359
+ :class:`~swift.common.utils.ShardRange` describing the shard ranges
1360
+ fetched from the root container.
1361
+ :param own_shard_range: A :class:`~swift.common.utils.ShardRange`
1362
+ describing the shard's own shard range.
1363
+ :return: a tuple of ``own_shard_range, own_shard_range_from_root``. The
1364
+ returned``own_shard_range`` will have been updated if the matching
1365
+ ``own_shard_range_from_root`` has newer data.
1366
+ ``own_shard_range_from_root`` will be None if no such matching
1367
+ shard range is found in ``shard_ranges``.
1368
+ """
1369
+ own_shard_range_from_root = None
1370
+ children_shard_ranges = []
1371
+ other_shard_ranges = []
1372
+ for shard_range in shard_ranges:
1373
+ # look for this shard range in the list of shard ranges received
1374
+ # from root; the root may have different lower and upper bounds for
1375
+ # this shard (e.g. if this shard has been expanded in the root to
1376
+ # accept a shrinking shard) so we only match on name.
1377
+ if shard_range.name == own_shard_range.name:
1378
+ # If we find our own shard range in the root response, merge
1379
+ # it and reload own shard range (note: own_range_from_root may
1380
+ # not necessarily be 'newer' than the own shard range we
1381
+ # already have, but merging will get us to the 'newest' state)
1382
+ self.debug(broker, 'Updating own shard range from root')
1383
+ own_shard_range_from_root = shard_range
1384
+ broker.merge_shard_ranges(own_shard_range_from_root)
1385
+ orig_own_shard_range = own_shard_range
1386
+ own_shard_range = broker.get_own_shard_range()
1387
+ if (orig_own_shard_range != own_shard_range or
1388
+ orig_own_shard_range.state != own_shard_range.state):
1389
+ self.info(broker,
1390
+ 'Updated own shard range from %s to %s',
1391
+ orig_own_shard_range, own_shard_range)
1392
+ elif shard_range.is_child_of(own_shard_range):
1393
+ children_shard_ranges.append(shard_range)
1394
+ else:
1395
+ other_shard_ranges.append(shard_range)
1396
+
1397
+ if children_shard_ranges and not broker.is_sharded():
1398
+ # Merging shard ranges from the root is only necessary until this
1399
+ # DB is fully cleaved and reaches SHARDED DB state, after which it
1400
+ # is useful for debugging for the set of sub-shards to which a
1401
+ # shards has sharded to be frozen.
1402
+ self.debug(broker, 'Updating %d children shard ranges from root',
1403
+ len(children_shard_ranges))
1404
+ broker.merge_shard_ranges(children_shard_ranges)
1405
+
1406
+ if (other_shard_ranges
1407
+ and own_shard_range.state in ShardRange.CLEAVING_STATES
1408
+ and not broker.is_sharded()):
1409
+ # Other shard ranges returned from the root may need to be merged
1410
+ # for the purposes of sharding or shrinking this shard:
1411
+ #
1412
+ # Shrinking states: If the up-to-date state is shrinking, the
1413
+ # shards fetched from root may contain shards into which this shard
1414
+ # is to shrink itself. Shrinking is initiated by modifying multiple
1415
+ # neighboring shard range states *in the root*, rather than
1416
+ # modifying a shard directly. We therefore need to learn about
1417
+ # *other* neighboring shard ranges from the root, possibly
1418
+ # including the root itself. We need to include shrunk state too,
1419
+ # because one replica of a shard may already have moved the
1420
+ # own_shard_range state to shrunk while another replica may still
1421
+ # be in the process of shrinking.
1422
+ #
1423
+ # Sharding states: Normally a shard will shard to its own children.
1424
+ # However, in some circumstances a shard may need to shard to other
1425
+ # non-children sub-shards. For example, a shard range repair may
1426
+ # cause a child sub-shard to be deleted and its namespace covered
1427
+ # by another 'acceptor' shard.
1428
+ #
1429
+ # Therefore, if the up-to-date own_shard_range state indicates that
1430
+ # sharding or shrinking is in progress, then other shard ranges
1431
+ # will be merged, with the following caveats: we never expect a
1432
+ # shard to shard to any ancestor shard range including the root,
1433
+ # but containers might ultimately *shrink* to root; we never want
1434
+ # to cleave to a container that is itself sharding or shrinking;
1435
+ # the merged shard ranges should not result in gaps or overlaps in
1436
+ # the namespace of this shard.
1437
+ #
1438
+ # Note: the search for ancestors is guaranteed to find the parent
1439
+ # and root *if they are present*, but if any ancestor is missing
1440
+ # then there is a chance that older generations in the
1441
+ # other_shard_ranges will not be filtered and could be merged. That
1442
+ # is only a problem if they are somehow still in ACTIVE state, and
1443
+ # no overlap is detected, so the ancestor is merged.
1444
+ ancestor_names = [
1445
+ sr.name for sr in own_shard_range.find_ancestors(shard_ranges)]
1446
+ filtered_other_shard_ranges = [
1447
+ sr for sr in other_shard_ranges
1448
+ if (sr.name not in ancestor_names
1449
+ and (sr.state not in ShardRange.CLEAVING_STATES
1450
+ or sr.deleted))
1451
+ ]
1452
+ if own_shard_range.state in ShardRange.SHRINKING_STATES:
1453
+ root_shard_range = own_shard_range.find_root(
1454
+ other_shard_ranges)
1455
+ if (root_shard_range and
1456
+ root_shard_range.state == ShardRange.ACTIVE):
1457
+ filtered_other_shard_ranges.append(root_shard_range)
1458
+ existing_shard_ranges = broker.get_shard_ranges()
1459
+ combined_shard_ranges = combine_shard_ranges(
1460
+ filtered_other_shard_ranges, existing_shard_ranges)
1461
+ overlaps = find_overlapping_ranges(combined_shard_ranges)
1462
+ paths_with_gaps = find_paths_with_gaps(
1463
+ combined_shard_ranges, own_shard_range)
1464
+ if not (overlaps or paths_with_gaps):
1465
+ # only merge if shard ranges appear to be *good*
1466
+ self.debug(broker,
1467
+ 'Updating %s other shard range(s) from root',
1468
+ len(filtered_other_shard_ranges))
1469
+ broker.merge_shard_ranges(filtered_other_shard_ranges)
1470
+
1471
+ return own_shard_range, own_shard_range_from_root
1472
+
1473
+ def _delete_shard_container(self, broker, own_shard_range):
1474
+ """
1475
+ Mark a shard container as deleted if it was sharded or shrunk more than
1476
+ reclaim_age in the past. (The DB file will be removed by the replicator
1477
+ after a further reclaim_age.)
1478
+
1479
+ :param broker: A :class:`~swift.container.backend.ContainerBroker`.
1480
+ :param own_shard_range: A :class:`~swift.common.utils.ShardRange`
1481
+ describing the shard's own shard range.
1482
+ """
1483
+ delete_age = time.time() - self.reclaim_age
1484
+ deletable_states = (ShardRange.SHARDED, ShardRange.SHRUNK)
1485
+ if (own_shard_range.state in deletable_states and
1486
+ own_shard_range.deleted and
1487
+ own_shard_range.timestamp < delete_age and
1488
+ broker.empty()):
1489
+ broker.delete_db(Timestamp.now().internal)
1490
+ self.debug(broker, 'Marked shard container as deleted')
1491
+
1492
+ def _do_audit_shard_container(self, broker):
704
1493
  warnings = []
705
- errors = []
706
1494
  if not broker.account.startswith(self.shards_account_prefix):
707
1495
  warnings.append('account not in shards namespace %r' %
708
1496
  self.shards_account_prefix)
709
1497
 
710
1498
  own_shard_range = broker.get_own_shard_range(no_default=True)
711
1499
 
712
- shard_range = None
713
- if own_shard_range:
714
- shard_ranges = self._fetch_shard_ranges(
715
- broker, newest=True,
716
- params={'marker': str_to_wsgi(own_shard_range.lower_str),
717
- 'end_marker': str_to_wsgi(own_shard_range.upper_str)},
718
- include_deleted=True)
719
- if shard_ranges:
720
- for shard_range in shard_ranges:
721
- if (shard_range.lower == own_shard_range.lower and
722
- shard_range.upper == own_shard_range.upper and
723
- shard_range.name == own_shard_range.name):
724
- break
725
- else:
726
- # this is not necessarily an error - some replicas of the
727
- # root may not yet know about this shard container
728
- warnings.append('root has no matching shard range')
729
- shard_range = None
730
- else:
731
- warnings.append('unable to get shard ranges from root')
732
- else:
733
- errors.append('missing own shard range')
1500
+ if not own_shard_range:
1501
+ self.warning(broker, 'Audit failed for shard: missing own shard '
1502
+ 'range (skipping)')
1503
+ return False, warnings
1504
+
1505
+ # Get the root view of the world, at least that part of the world
1506
+ # that overlaps with this shard's namespace. The
1507
+ # 'states=auditing' parameter will cause the root to include
1508
+ # its own shard range in the response, which is necessary for the
1509
+ # particular case when this shard should be shrinking to the root
1510
+ # container; when not shrinking to root, but to another acceptor,
1511
+ # the root range should be in sharded state and will not interfere
1512
+ # with cleaving, listing or updating behaviour.
1513
+ shard_ranges = self._fetch_shard_ranges(
1514
+ broker, newest=True,
1515
+ params={'marker': str_to_wsgi(own_shard_range.lower_str),
1516
+ 'end_marker': str_to_wsgi(own_shard_range.upper_str),
1517
+ 'states': 'auditing'},
1518
+ include_deleted=True)
1519
+ if shard_ranges:
1520
+ own_shard_range, own_shard_range_from_root = \
1521
+ self._merge_shard_ranges_from_root(
1522
+ broker, shard_ranges, own_shard_range)
1523
+ if not own_shard_range_from_root:
1524
+ # this is not necessarily an error - some replicas of the
1525
+ # root may not yet know about this shard container, or the
1526
+ # shard's own shard range could become deleted and
1527
+ # reclaimed from the root under rare conditions
1528
+ warnings.append('root has no matching shard range')
1529
+ elif not own_shard_range.deleted:
1530
+ warnings.append('unable to get shard ranges from root')
1531
+ # else, our shard range is deleted, so root may have reclaimed it
1532
+
1533
+ self._delete_shard_container(broker, own_shard_range)
1534
+
1535
+ return True, warnings
734
1536
 
1537
+ def _audit_shard_container(self, broker):
1538
+ self._increment_stat('audit_shard', 'attempted')
1539
+ success, warnings = self._do_audit_shard_container(broker)
735
1540
  if warnings:
736
- self.logger.warning(
737
- 'Audit warnings for shard %s (%s): %s' %
738
- (broker.db_file, broker.path, ', '.join(warnings)))
739
-
740
- if errors:
741
- self.logger.warning(
742
- 'Audit failed for shard %s (%s) - skipping: %s' %
743
- (broker.db_file, broker.path, ', '.join(errors)))
744
- self._increment_stat('audit_shard', 'failure', statsd=True)
745
- return False
746
-
747
- if shard_range:
748
- self.logger.debug('Updating shard from root %s', dict(shard_range))
749
- broker.merge_shard_ranges(shard_range)
750
- own_shard_range = broker.get_own_shard_range()
751
- delete_age = time.time() - self.reclaim_age
752
- if (own_shard_range.state == ShardRange.SHARDED and
753
- own_shard_range.deleted and
754
- own_shard_range.timestamp < delete_age and
755
- broker.empty()):
756
- broker.delete_db(Timestamp.now().internal)
757
- self.logger.debug('Deleted shard container %s (%s)',
758
- broker.db_file, broker.path)
759
- self._increment_stat('audit_shard', 'success', statsd=True)
760
- return True
1541
+ self.warning(broker, 'Audit warnings for shard: %s',
1542
+ ', '.join(warnings))
1543
+ self._increment_stat(
1544
+ 'audit_shard', 'success' if success else 'failure', statsd=True)
1545
+ return success
761
1546
 
762
1547
  def _audit_cleave_contexts(self, broker):
763
1548
  now = Timestamp.now()
764
1549
  for context, last_mod in CleavingContext.load_all(broker):
765
- if Timestamp(last_mod).timestamp + self.reclaim_age < \
766
- now.timestamp:
1550
+ last_mod = Timestamp(last_mod)
1551
+ is_done = context.done() and last_mod.timestamp + \
1552
+ self.recon_sharded_timeout < now.timestamp
1553
+ is_stale = last_mod.timestamp + self.reclaim_age < now.timestamp
1554
+ if is_done or is_stale:
767
1555
  context.delete(broker)
768
1556
 
769
1557
  def _audit_container(self, broker):
770
1558
  if broker.is_deleted():
1559
+ if broker.is_old_enough_to_reclaim(time.time(), self.reclaim_age) \
1560
+ and not broker.is_empty_enough_to_reclaim():
1561
+ self.periodic_warning(
1562
+ broker, 'Reclaimable db stuck waiting for shrinking')
771
1563
  # if the container has been marked as deleted, all metadata will
772
1564
  # have been erased so no point auditing. But we want it to pass, in
773
1565
  # case any objects exist inside it.
@@ -777,18 +1569,32 @@ class ContainerSharder(ContainerReplicator):
777
1569
  return self._audit_root_container(broker)
778
1570
  return self._audit_shard_container(broker)
779
1571
 
780
- def yield_objects(self, broker, src_shard_range, since_row=None):
1572
+ def yield_objects(self, broker, src_shard_range, since_row=None,
1573
+ batch_size=None):
781
1574
  """
782
- Iterates through all objects in ``src_shard_range`` in name order
783
- yielding them in lists of up to CONTAINER_LISTING_LIMIT length.
1575
+ Iterates through all object rows in ``src_shard_range`` in name order
1576
+ yielding them in lists of up to ``batch_size`` in length. All batches
1577
+ of rows that are not marked deleted are yielded before all batches of
1578
+ rows that are marked deleted.
784
1579
 
785
1580
  :param broker: A :class:`~swift.container.backend.ContainerBroker`.
786
1581
  :param src_shard_range: A :class:`~swift.common.utils.ShardRange`
787
1582
  describing the source range.
788
- :param since_row: include only items whose ROWID is greater than
789
- the given row id; by default all rows are included.
790
- :return: a generator of tuples of (list of objects, broker info dict)
1583
+ :param since_row: include only object rows whose ROWID is greater than
1584
+ the given row id; by default all object rows are included.
1585
+ :param batch_size: The maximum number of object rows to include in each
1586
+ yielded batch; defaults to cleave_row_batch_size.
1587
+ :return: a generator of tuples of (list of rows, broker info dict)
791
1588
  """
1589
+ if (src_shard_range.lower == ShardRange.MAX or
1590
+ src_shard_range.upper == ShardRange.MIN):
1591
+ # this is an unexpected condition but handled with an early return
1592
+ # just in case, because:
1593
+ # lower == ShardRange.MAX -> marker == ''
1594
+ # which could result in rows being erroneously yielded.
1595
+ return
1596
+
1597
+ batch_size = batch_size or self.cleave_row_batch_size
792
1598
  for include_deleted in (False, True):
793
1599
  marker = src_shard_range.lower_str
794
1600
  while True:
@@ -796,87 +1602,82 @@ class ContainerSharder(ContainerReplicator):
796
1602
  info['max_row'] = broker.get_max_row()
797
1603
  start = time.time()
798
1604
  objects = broker.get_objects(
799
- self.cleave_row_batch_size,
1605
+ limit=batch_size,
800
1606
  marker=marker,
801
1607
  end_marker=src_shard_range.end_marker,
802
1608
  include_deleted=include_deleted,
803
1609
  since_row=since_row)
1610
+ self.debug(broker, 'got %s rows (deleted=%s) in %ss',
1611
+ len(objects), include_deleted, time.time() - start)
804
1612
  if objects:
805
- self.logger.debug('got %s objects from %s in %ss',
806
- len(objects), broker.db_file,
807
- time.time() - start)
808
1613
  yield objects, info
809
1614
 
810
- if len(objects) < self.cleave_row_batch_size:
1615
+ if len(objects) < batch_size:
811
1616
  break
812
1617
  marker = objects[-1]['name']
813
1618
 
814
1619
  def yield_objects_to_shard_range(self, broker, src_shard_range,
815
1620
  dest_shard_ranges):
816
1621
  """
817
- Iterates through all objects in ``src_shard_range`` to place them in
818
- destination shard ranges provided by the ``next_shard_range`` function.
819
- Yields tuples of (object list, destination shard range in which those
820
- objects belong). Note that the same destination shard range may be
821
- referenced in more than one yielded tuple.
1622
+ Iterates through all object rows in ``src_shard_range`` to place them
1623
+ in destination shard ranges provided by the ``dest_shard_ranges``
1624
+ function. Yields tuples of ``(batch of object rows, destination shard
1625
+ range in which those object rows belong, broker info)``.
1626
+
1627
+ If no destination shard range exists for a batch of object rows then
1628
+ tuples are yielded of ``(batch of object rows, None, broker info)``.
1629
+ This indicates to the caller that there are a non-zero number of object
1630
+ rows for which no destination shard range was found.
1631
+
1632
+ Note that the same destination shard range may be referenced in more
1633
+ than one yielded tuple.
822
1634
 
823
1635
  :param broker: A :class:`~swift.container.backend.ContainerBroker`.
824
1636
  :param src_shard_range: A :class:`~swift.common.utils.ShardRange`
825
1637
  describing the source range.
826
1638
  :param dest_shard_ranges: A function which should return a list of
827
- destination shard ranges in name order.
828
- :return: a generator of tuples of
829
- (object list, shard range, broker info dict)
1639
+ destination shard ranges sorted in the order defined by
1640
+ :meth:`~swift.common.utils.ShardRange.sort_key`.
1641
+ :return: a generator of tuples of ``(object row list, shard range,
1642
+ broker info dict)`` where ``shard_range`` may be ``None``.
830
1643
  """
831
- dest_shard_range_iter = dest_shard_range = None
832
- for objs, info in self.yield_objects(broker, src_shard_range):
833
- if not objs:
834
- return
1644
+ # calling dest_shard_ranges() may result in a request to fetch shard
1645
+ # ranges, so first check that the broker actually has misplaced object
1646
+ # rows in the source namespace
1647
+ for _ in self.yield_objects(broker, src_shard_range, batch_size=1):
1648
+ break
1649
+ else:
1650
+ return
835
1651
 
836
- def next_or_none(it):
837
- try:
838
- return next(it)
839
- except StopIteration:
840
- return None
841
-
842
- if dest_shard_range_iter is None:
843
- dest_shard_range_iter = iter(dest_shard_ranges())
844
- dest_shard_range = next_or_none(dest_shard_range_iter)
845
-
846
- unplaced = False
847
- last_index = next_index = 0
848
- for obj in objs:
849
- if dest_shard_range is None:
850
- # no more destinations: yield remainder of batch and bail
851
- # NB there may be more batches of objects but none of them
852
- # will be placed so no point fetching them
853
- yield objs[last_index:], None, info
854
- return
855
- if obj['name'] <= dest_shard_range.lower:
856
- unplaced = True
857
- elif unplaced:
858
- # end of run of unplaced objects, yield them
859
- yield objs[last_index:next_index], None, info
860
- last_index = next_index
861
- unplaced = False
862
- while (dest_shard_range and
863
- obj['name'] > dest_shard_range.upper):
864
- if next_index != last_index:
865
- # yield the objects in current dest_shard_range
866
- yield (objs[last_index:next_index],
867
- dest_shard_range,
868
- info)
869
- last_index = next_index
870
- dest_shard_range = next_or_none(dest_shard_range_iter)
871
- next_index += 1
872
-
873
- if next_index != last_index:
874
- # yield tail of current batch of objects
875
- # NB there may be more objects for the current
876
- # dest_shard_range in the next batch from yield_objects
877
- yield (objs[last_index:next_index],
878
- None if unplaced else dest_shard_range,
879
- info)
1652
+ dest_shard_range_iter = iter(dest_shard_ranges())
1653
+ src_shard_range_marker = src_shard_range.lower
1654
+ for dest_shard_range in dest_shard_range_iter:
1655
+ if dest_shard_range.upper <= src_shard_range.lower:
1656
+ continue
1657
+
1658
+ if dest_shard_range.lower > src_shard_range_marker:
1659
+ # no destination for a sub-namespace of the source namespace
1660
+ sub_src_range = src_shard_range.copy(
1661
+ lower=src_shard_range_marker, upper=dest_shard_range.lower)
1662
+ for objs, info in self.yield_objects(broker, sub_src_range):
1663
+ yield objs, None, info
1664
+
1665
+ sub_src_range = src_shard_range.copy(
1666
+ lower=max(dest_shard_range.lower, src_shard_range.lower),
1667
+ upper=min(dest_shard_range.upper, src_shard_range.upper))
1668
+ for objs, info in self.yield_objects(broker, sub_src_range):
1669
+ yield objs, dest_shard_range, info
1670
+
1671
+ src_shard_range_marker = dest_shard_range.upper
1672
+ if dest_shard_range.upper >= src_shard_range.upper:
1673
+ # the entire source namespace has been traversed
1674
+ break
1675
+ else:
1676
+ # dest_shard_ranges_iter was exhausted before reaching the end of
1677
+ # the source namespace
1678
+ sub_src_range = src_shard_range.copy(lower=src_shard_range_marker)
1679
+ for objs, info in self.yield_objects(broker, sub_src_range):
1680
+ yield objs, None, info
880
1681
 
881
1682
  def _post_replicate_hook(self, broker, info, responses):
882
1683
  # override superclass behaviour
@@ -886,11 +1687,15 @@ class ContainerSharder(ContainerReplicator):
886
1687
  dest_broker, node_id, info):
887
1688
  success, responses = self._replicate_object(
888
1689
  part, dest_broker.db_file, node_id)
1690
+ replication_successes = responses.count(True)
889
1691
  quorum = quorum_size(self.ring.replica_count)
890
- if not success and responses.count(True) < quorum:
891
- self.logger.warning(
892
- 'Failed to sufficiently replicate misplaced objects: %s in %s '
893
- '(not removing)', dest_shard_range, broker.path)
1692
+ if not success and replication_successes < quorum:
1693
+ self.warning(
1694
+ broker, 'Failed to sufficiently replicate misplaced objects '
1695
+ 'shard %s in state %s: %s successes, %s required '
1696
+ '(not removing objects), shard db: %s',
1697
+ dest_shard_range.name, dest_shard_range.state_text,
1698
+ replication_successes, quorum, dest_broker.db_file)
894
1699
  return False
895
1700
 
896
1701
  if broker.get_info()['id'] != info['id']:
@@ -908,9 +1713,9 @@ class ContainerSharder(ContainerReplicator):
908
1713
  success = True
909
1714
 
910
1715
  if not success:
911
- self.logger.warning(
912
- 'Refused to remove misplaced objects: %s in %s',
913
- dest_shard_range, broker.path)
1716
+ self.warning(broker, 'Refused to remove misplaced objects for '
1717
+ 'dest %s in state %s',
1718
+ dest_shard_range.name, dest_shard_range.state_text)
914
1719
  return success
915
1720
 
916
1721
  def _move_objects(self, src_broker, src_shard_range, policy_index,
@@ -928,16 +1733,19 @@ class ContainerSharder(ContainerReplicator):
928
1733
  continue
929
1734
 
930
1735
  if dest_shard_range.name == src_broker.path:
931
- self.logger.debug(
932
- 'Skipping source as misplaced objects destination')
1736
+ self.debug(src_broker,
1737
+ 'Skipping source as misplaced objects destination')
933
1738
  # in shrinking context, the misplaced objects might actually be
934
1739
  # correctly placed if the root has expanded this shard but this
935
1740
  # broker has not yet been updated
936
1741
  continue
937
1742
 
938
1743
  if dest_shard_range not in dest_brokers:
939
- part, dest_broker, node_id, _junk = self._get_shard_broker(
940
- dest_shard_range, src_broker.root_path, policy_index)
1744
+ part, dest_broker, node_id, put_timestamp = \
1745
+ self._get_shard_broker(
1746
+ dest_shard_range, src_broker.root_path, policy_index)
1747
+ stat = 'db_exists' if put_timestamp is None else 'db_created'
1748
+ self._increment_stat('misplaced', stat, statsd=True)
941
1749
  # save the broker info that was sampled prior to the *first*
942
1750
  # yielded objects for this destination
943
1751
  destination = {'part': part,
@@ -951,20 +1759,20 @@ class ContainerSharder(ContainerReplicator):
951
1759
  placed += len(objs)
952
1760
 
953
1761
  if unplaced:
954
- self.logger.warning(
955
- 'Failed to find destination for at least %s misplaced objects '
956
- 'in %s' % (unplaced, src_broker.path))
1762
+ self.warning(src_broker, 'Failed to find destination for at least '
1763
+ '%s misplaced objects', unplaced)
957
1764
 
958
1765
  # TODO: consider executing the replication jobs concurrently
959
1766
  for dest_shard_range, dest_args in dest_brokers.items():
960
- self.logger.debug('moving misplaced objects found in range %s' %
961
- dest_shard_range)
1767
+ self.debug(src_broker,
1768
+ 'moving misplaced objects found in range %s',
1769
+ dest_shard_range)
962
1770
  success &= self._replicate_and_delete(
963
1771
  src_broker, dest_shard_range, **dest_args)
964
1772
 
965
- self._increment_stat('misplaced', 'placed', step=placed)
966
- self._increment_stat('misplaced', 'unplaced', step=unplaced)
967
- return success, placed + unplaced
1773
+ self._update_stat('misplaced', 'placed', step=placed, statsd=True)
1774
+ self._update_stat('misplaced', 'unplaced', step=unplaced, statsd=True)
1775
+ return success, placed, unplaced
968
1776
 
969
1777
  def _make_shard_range_fetcher(self, broker, src_shard_range):
970
1778
  # returns a function that will lazy load shard ranges on demand;
@@ -1005,12 +1813,12 @@ class ContainerSharder(ContainerReplicator):
1005
1813
 
1006
1814
  def _make_misplaced_object_bounds(self, broker):
1007
1815
  bounds = []
1008
- state = broker.get_db_state()
1009
- if state == SHARDED:
1816
+ db_state = broker.get_db_state()
1817
+ if db_state == SHARDED:
1010
1818
  # Anything in the object table is treated as a misplaced object.
1011
1819
  bounds.append(('', ''))
1012
1820
 
1013
- if not bounds and state == SHARDING:
1821
+ if not bounds and db_state == SHARDING:
1014
1822
  # Objects outside of this container's own range are misplaced.
1015
1823
  # Objects in already cleaved shard ranges are also misplaced.
1016
1824
  cleave_context = CleavingContext.load(broker)
@@ -1038,8 +1846,7 @@ class ContainerSharder(ContainerReplicator):
1038
1846
  :return: True if all misplaced objects were sufficiently replicated to
1039
1847
  their correct shard containers, False otherwise
1040
1848
  """
1041
- self.logger.debug('Looking for misplaced objects in %s (%s)',
1042
- broker.path, broker.db_file)
1849
+ self.debug(broker, 'Looking for misplaced objects')
1043
1850
  self._increment_stat('misplaced', 'attempted')
1044
1851
  src_broker = src_broker or broker
1045
1852
  if src_bounds is None:
@@ -1047,22 +1854,27 @@ class ContainerSharder(ContainerReplicator):
1047
1854
  # (ab)use ShardRange instances to encapsulate source namespaces
1048
1855
  src_ranges = [ShardRange('dont/care', Timestamp.now(), lower, upper)
1049
1856
  for lower, upper in src_bounds]
1050
- self.logger.debug('misplaced object source bounds %s' % src_bounds)
1857
+ self.debug(broker, 'misplaced object source bounds %s', src_bounds)
1051
1858
  policy_index = broker.storage_policy_index
1052
1859
  success = True
1053
- num_found = 0
1860
+ num_placed = num_unplaced = 0
1054
1861
  for src_shard_range in src_ranges:
1055
- part_success, part_num_found = self._move_objects(
1862
+ part_success, part_placed, part_unplaced = self._move_objects(
1056
1863
  src_broker, src_shard_range, policy_index,
1057
1864
  self._make_shard_range_fetcher(broker, src_shard_range))
1058
1865
  success &= part_success
1059
- num_found += part_num_found
1866
+ num_placed += part_placed
1867
+ num_unplaced += part_unplaced
1060
1868
 
1061
- if num_found:
1869
+ if num_placed or num_unplaced:
1870
+ # the found stat records the number of DBs in which any misplaced
1871
+ # rows were found, not the total number of misplaced rows
1062
1872
  self._increment_stat('misplaced', 'found', statsd=True)
1063
- self.logger.debug('Moved %s misplaced objects' % num_found)
1064
- self._increment_stat('misplaced', 'success' if success else 'failure')
1065
- self.logger.debug('Finished handling misplaced objects')
1873
+ self.debug(broker, 'Placed %s misplaced objects (%s unplaced)',
1874
+ num_placed, num_unplaced)
1875
+ self._increment_stat('misplaced', 'success' if success else 'failure',
1876
+ statsd=True)
1877
+ self.debug(broker, 'Finished handling misplaced objects')
1066
1878
  return success
1067
1879
 
1068
1880
  def _find_shard_ranges(self, broker):
@@ -1078,25 +1890,26 @@ class ContainerSharder(ContainerReplicator):
1078
1890
  own_shard_range = broker.get_own_shard_range()
1079
1891
  shard_ranges = broker.get_shard_ranges()
1080
1892
  if shard_ranges and shard_ranges[-1].upper >= own_shard_range.upper:
1081
- self.logger.debug('Scan already completed for %s', broker.path)
1893
+ self.debug(broker, 'Scan for shard ranges already completed')
1082
1894
  return 0
1083
1895
 
1084
- self.logger.info('Starting scan for shard ranges on %s', broker.path)
1896
+ self.info(broker, 'Starting scan for shard ranges')
1085
1897
  self._increment_stat('scanned', 'attempted')
1086
1898
 
1087
1899
  start = time.time()
1088
1900
  shard_data, last_found = broker.find_shard_ranges(
1089
- self.split_size, limit=self.scanner_batch_size,
1090
- existing_ranges=shard_ranges)
1901
+ self.rows_per_shard, limit=self.shard_scanner_batch_size,
1902
+ existing_ranges=shard_ranges,
1903
+ minimum_shard_size=self.minimum_shard_size)
1091
1904
  elapsed = time.time() - start
1092
1905
 
1093
1906
  if not shard_data:
1094
1907
  if last_found:
1095
- self.logger.info("Already found all shard ranges")
1908
+ self.info(broker, "Already found all shard ranges")
1096
1909
  self._increment_stat('scanned', 'success', statsd=True)
1097
1910
  else:
1098
1911
  # we didn't find anything
1099
- self.logger.warning("No shard ranges found")
1912
+ self.warning(broker, "No shard ranges found")
1100
1913
  self._increment_stat('scanned', 'failure', statsd=True)
1101
1914
  return 0
1102
1915
 
@@ -1104,14 +1917,14 @@ class ContainerSharder(ContainerReplicator):
1104
1917
  broker, shard_data, self.shards_account_prefix)
1105
1918
  broker.merge_shard_ranges(shard_ranges)
1106
1919
  num_found = len(shard_ranges)
1107
- self.logger.info(
1108
- "Completed scan for shard ranges: %d found", num_found)
1109
- self._increment_stat('scanned', 'found', step=num_found)
1920
+ self.info(broker, "Completed scan for shard ranges: %d found",
1921
+ num_found)
1922
+ self._update_stat('scanned', 'found', step=num_found)
1110
1923
  self._min_stat('scanned', 'min_time', round(elapsed / num_found, 3))
1111
1924
  self._max_stat('scanned', 'max_time', round(elapsed / num_found, 3))
1112
1925
 
1113
1926
  if last_found:
1114
- self.logger.info("Final shard range reached.")
1927
+ self.info(broker, "Final shard range reached.")
1115
1928
  self._increment_stat('scanned', 'success', statsd=True)
1116
1929
  return num_found
1117
1930
 
@@ -1119,26 +1932,34 @@ class ContainerSharder(ContainerReplicator):
1119
1932
  # Create shard containers that are ready to receive redirected object
1120
1933
  # updates. Do this now, so that redirection can begin immediately
1121
1934
  # without waiting for cleaving to complete.
1122
- found_ranges = broker.get_shard_ranges(states=ShardRange.FOUND)
1935
+ found_ranges = broker.get_shard_ranges(states=[ShardRange.FOUND])
1123
1936
  created_ranges = []
1124
1937
  for shard_range in found_ranges:
1125
1938
  self._increment_stat('created', 'attempted')
1126
1939
  shard_range.update_state(ShardRange.CREATED)
1127
1940
  headers = {
1128
1941
  'X-Backend-Storage-Policy-Index': broker.storage_policy_index,
1129
- 'X-Container-Sysmeta-Shard-Root': broker.root_path,
1130
- 'X-Container-Sysmeta-Sharding': True}
1942
+ 'X-Container-Sysmeta-Shard-Quoted-Root': quote(
1943
+ broker.root_path),
1944
+ 'X-Container-Sysmeta-Sharding': 'True',
1945
+ 'X-Backend-Auto-Create': 'True'}
1946
+ # NB: we *used* to send along
1947
+ # 'X-Container-Sysmeta-Shard-Root': broker.root_path
1948
+ # but that isn't safe for container names with nulls or newlines
1949
+ # (or possibly some other characters). We consciously *don't* make
1950
+ # any attempt to set the old meta; during an upgrade, some shards
1951
+ # may think they are in fact roots, but it cleans up well enough
1952
+ # once everyone's upgraded.
1131
1953
  success = self._send_shard_ranges(
1132
- shard_range.account, shard_range.container,
1954
+ broker, shard_range.account, shard_range.container,
1133
1955
  [shard_range], headers=headers)
1134
1956
  if success:
1135
- self.logger.debug('PUT new shard range container for %s',
1136
- shard_range)
1957
+ self.debug(broker, 'PUT new shard range container for %s',
1958
+ shard_range)
1137
1959
  self._increment_stat('created', 'success', statsd=True)
1138
1960
  else:
1139
- self.logger.error(
1140
- 'PUT of new shard container %r failed for %s.',
1141
- shard_range, broker.path)
1961
+ self.error(broker, 'PUT of new shard container %r failed',
1962
+ shard_range)
1142
1963
  self._increment_stat('created', 'failure', statsd=True)
1143
1964
  # break, not continue, because elsewhere it is assumed that
1144
1965
  # finding and cleaving shard ranges progresses linearly, so we
@@ -1150,31 +1971,17 @@ class ContainerSharder(ContainerReplicator):
1150
1971
  if created_ranges:
1151
1972
  broker.merge_shard_ranges(created_ranges)
1152
1973
  if not broker.is_root_container():
1153
- self._send_shard_ranges(
1154
- broker.root_account, broker.root_container, created_ranges)
1155
- self.logger.info(
1156
- "Completed creating shard range containers: %d created.",
1157
- len(created_ranges))
1974
+ self._send_shard_ranges(broker, broker.root_account,
1975
+ broker.root_container, created_ranges)
1976
+ self.info(broker, "Completed creating %d shard range containers",
1977
+ len(created_ranges))
1158
1978
  return len(created_ranges)
1159
1979
 
1160
- def _cleave_shard_range(self, broker, cleaving_context, shard_range):
1161
- self.logger.info("Cleaving '%s' from row %s into %s for %r",
1162
- broker.path, cleaving_context.last_cleave_to_row,
1163
- shard_range.name, shard_range)
1164
- self._increment_stat('cleaved', 'attempted')
1980
+ def _cleave_shard_broker(self, broker, cleaving_context, shard_range,
1981
+ own_shard_range, shard_broker, put_timestamp,
1982
+ shard_part, node_id):
1983
+ result = CLEAVE_SUCCESS
1165
1984
  start = time.time()
1166
- policy_index = broker.storage_policy_index
1167
- try:
1168
- shard_part, shard_broker, node_id, put_timestamp = \
1169
- self._get_shard_broker(shard_range, broker.root_path,
1170
- policy_index)
1171
- except DeviceUnavailable as duex:
1172
- self.logger.warning(str(duex))
1173
- self._increment_stat('cleaved', 'failure', statsd=True)
1174
- return CLEAVE_FAILED
1175
-
1176
- own_shard_range = broker.get_own_shard_range()
1177
-
1178
1985
  # only cleave from the retiring db - misplaced objects handler will
1179
1986
  # deal with any objects in the fresh db
1180
1987
  source_broker = broker.get_brokers()[0]
@@ -1193,23 +2000,15 @@ class ContainerSharder(ContainerReplicator):
1193
2000
  since_row=sync_from_row):
1194
2001
  shard_broker.merge_items(objects)
1195
2002
  if objects is None:
1196
- self.logger.info("Cleaving '%s': %r - zero objects found",
1197
- broker.path, shard_range)
2003
+ self.info(broker, "Cleaving %r - zero objects found",
2004
+ shard_range)
1198
2005
  if shard_broker.get_info()['put_timestamp'] == put_timestamp:
1199
2006
  # This was just created; don't need to replicate this
1200
2007
  # SR because there was nothing there. So cleanup and
1201
2008
  # remove the shard_broker from its hand off location.
1202
- self.delete_db(shard_broker)
1203
- cleaving_context.cursor = shard_range.upper_str
1204
- cleaving_context.ranges_done += 1
1205
- cleaving_context.ranges_todo -= 1
1206
- if shard_range.upper >= own_shard_range.upper:
1207
- # cleaving complete
1208
- cleaving_context.cleaving_done = True
1209
- cleaving_context.store(broker)
1210
2009
  # Because nothing was here we wont count it in the shard
1211
2010
  # batch count.
1212
- return CLEAVE_EMPTY
2011
+ result = CLEAVE_EMPTY
1213
2012
  # Else, it wasn't newly created by us, and
1214
2013
  # we don't know what's in it or why. Let it get
1215
2014
  # replicated and counted in the batch count.
@@ -1225,20 +2024,25 @@ class ContainerSharder(ContainerReplicator):
1225
2024
  [{'sync_point': source_max_row, 'remote_id': source_db_id}] +
1226
2025
  source_broker.get_syncs())
1227
2026
  else:
1228
- self.logger.debug("Cleaving '%s': %r - shard db already in sync",
1229
- broker.path, shard_range)
2027
+ self.debug(broker, "Cleaving %r - shard db already in sync",
2028
+ shard_range)
1230
2029
 
1231
2030
  replication_quorum = self.existing_shard_replication_quorum
1232
- if shard_range.includes(own_shard_range):
1233
- # When shrinking, include deleted own (donor) shard range in
1234
- # the replicated db so that when acceptor next updates root it
1235
- # will atomically update its namespace *and* delete the donor.
1236
- # Don't do this when sharding a shard because the donor
1237
- # namespace should not be deleted until all shards are cleaved.
1238
- if own_shard_range.update_state(ShardRange.SHARDED):
1239
- own_shard_range.set_deleted()
1240
- broker.merge_shard_ranges(own_shard_range)
1241
- shard_broker.merge_shard_ranges(own_shard_range)
2031
+ if own_shard_range.state in ShardRange.SHRINKING_STATES:
2032
+ if shard_range.includes(own_shard_range):
2033
+ # When shrinking to a single acceptor that completely encloses
2034
+ # this shard's namespace, include deleted own (donor) shard
2035
+ # range in the replicated db so that when acceptor next updates
2036
+ # root it will atomically update its namespace *and* delete the
2037
+ # donor. This reduces the chance of a temporary listing gap if
2038
+ # this shard fails to update the root with its SHRUNK/deleted
2039
+ # state. Don't do this when sharding a shard or shrinking to
2040
+ # multiple acceptors because in those cases the donor namespace
2041
+ # should not be deleted until *all* shards are cleaved.
2042
+ if own_shard_range.update_state(ShardRange.SHRUNK):
2043
+ own_shard_range.set_deleted()
2044
+ broker.merge_shard_ranges(own_shard_range)
2045
+ shard_broker.merge_shard_ranges(own_shard_range)
1242
2046
  elif shard_range.state == ShardRange.CREATED:
1243
2047
  # The shard range object stats may have changed since the shard
1244
2048
  # range was found, so update with stats of objects actually
@@ -1247,53 +2051,74 @@ class ContainerSharder(ContainerReplicator):
1247
2051
  info = shard_broker.get_info()
1248
2052
  shard_range.update_meta(
1249
2053
  info['object_count'], info['bytes_used'])
2054
+ # Update state to CLEAVED; only do this when sharding, not when
2055
+ # shrinking
1250
2056
  shard_range.update_state(ShardRange.CLEAVED)
1251
2057
  shard_broker.merge_shard_ranges(shard_range)
1252
2058
  replication_quorum = self.shard_replication_quorum
1253
2059
 
1254
- self.logger.info(
1255
- 'Replicating new shard container %s for %s',
1256
- shard_broker.path, shard_broker.get_own_shard_range())
1257
-
1258
- success, responses = self._replicate_object(
1259
- shard_part, shard_broker.db_file, node_id)
2060
+ if result == CLEAVE_EMPTY:
2061
+ self.delete_db(shard_broker)
2062
+ else: # result == CLEAVE_SUCCESS:
2063
+ self.info(broker, 'Replicating new shard container %s for %s',
2064
+ quote(shard_broker.path), own_shard_range)
2065
+
2066
+ success, responses = self._replicate_object(
2067
+ shard_part, shard_broker.db_file, node_id)
2068
+
2069
+ replication_successes = responses.count(True)
2070
+ if (not success and (not responses or
2071
+ replication_successes < replication_quorum)):
2072
+ # insufficient replication or replication not even attempted;
2073
+ # break because we don't want to progress the cleave cursor
2074
+ # until each shard range has been successfully cleaved
2075
+ self.warning(
2076
+ broker, 'Failed to sufficiently replicate cleaved shard '
2077
+ '%s in state %s: %s successes, %s required, '
2078
+ 'shard db: %s',
2079
+ shard_broker.path, shard_range.state_text,
2080
+ replication_successes, replication_quorum,
2081
+ shard_broker.db_file)
2082
+ self._increment_stat('cleaved', 'failure', statsd=True)
2083
+ result = CLEAVE_FAILED
2084
+ else:
2085
+ elapsed = round(time.time() - start, 3)
2086
+ self._min_stat('cleaved', 'min_time', elapsed)
2087
+ self._max_stat('cleaved', 'max_time', elapsed)
2088
+ self.info(broker, 'Cleaved %s in %gs', shard_range,
2089
+ elapsed)
2090
+ self._increment_stat('cleaved', 'success', statsd=True)
2091
+
2092
+ if result in (CLEAVE_SUCCESS, CLEAVE_EMPTY):
2093
+ broker.merge_shard_ranges(shard_range)
2094
+ cleaving_context.range_done(shard_range.upper_str)
2095
+ if shard_range.upper >= own_shard_range.upper:
2096
+ # cleaving complete
2097
+ cleaving_context.cleaving_done = True
2098
+ cleaving_context.store(broker)
2099
+ return result
1260
2100
 
1261
- replication_successes = responses.count(True)
1262
- if (not success and (not responses or
1263
- replication_successes < replication_quorum)):
1264
- # insufficient replication or replication not even attempted;
1265
- # break because we don't want to progress the cleave cursor
1266
- # until each shard range has been successfully cleaved
1267
- self.logger.warning(
1268
- 'Failed to sufficiently replicate cleaved shard %s for %s: '
1269
- '%s successes, %s required.', shard_range, broker.path,
1270
- replication_successes, replication_quorum)
1271
- self._increment_stat('cleaved', 'failure', statsd=True)
1272
- return CLEAVE_FAILED
1273
-
1274
- elapsed = round(time.time() - start, 3)
1275
- self._min_stat('cleaved', 'min_time', elapsed)
1276
- self._max_stat('cleaved', 'max_time', elapsed)
1277
- broker.merge_shard_ranges(shard_range)
1278
- cleaving_context.cursor = shard_range.upper_str
1279
- cleaving_context.ranges_done += 1
1280
- cleaving_context.ranges_todo -= 1
1281
- if shard_range.upper >= own_shard_range.upper:
1282
- # cleaving complete
1283
- cleaving_context.cleaving_done = True
1284
- cleaving_context.store(broker)
1285
- self.logger.info(
1286
- 'Cleaved %s for shard range %s in %gs.',
1287
- broker.path, shard_range, elapsed)
1288
- self._increment_stat('cleaved', 'success', statsd=True)
1289
- return CLEAVE_SUCCESS
2101
+ def _cleave_shard_range(self, broker, cleaving_context, shard_range,
2102
+ own_shard_range):
2103
+ self.info(broker, "Cleaving from row %s into %s for %r",
2104
+ cleaving_context.last_cleave_to_row,
2105
+ quote(shard_range.name), shard_range)
2106
+ self._increment_stat('cleaved', 'attempted')
2107
+ policy_index = broker.storage_policy_index
2108
+ shard_part, shard_broker, node_id, put_timestamp = \
2109
+ self._get_shard_broker(shard_range, broker.root_path,
2110
+ policy_index)
2111
+ stat = 'db_exists' if put_timestamp is None else 'db_created'
2112
+ self._increment_stat('cleaved', stat, statsd=True)
2113
+ return self._cleave_shard_broker(
2114
+ broker, cleaving_context, shard_range, own_shard_range,
2115
+ shard_broker, put_timestamp, shard_part, node_id)
1290
2116
 
1291
2117
  def _cleave(self, broker):
1292
2118
  # Returns True if misplaced objects have been moved and the entire
1293
2119
  # container namespace has been successfully cleaved, False otherwise
1294
2120
  if broker.is_sharded():
1295
- self.logger.debug('Passing over already sharded container %s/%s',
1296
- broker.account, broker.container)
2121
+ self.debug(broker, 'Passing over already sharded container')
1297
2122
  return True
1298
2123
 
1299
2124
  cleaving_context = CleavingContext.load(broker)
@@ -1301,9 +2126,8 @@ class ContainerSharder(ContainerReplicator):
1301
2126
  # ensure any misplaced objects in the source broker are moved; note
1302
2127
  # that this invocation of _move_misplaced_objects is targetted at
1303
2128
  # the *retiring* db.
1304
- self.logger.debug(
1305
- 'Moving any misplaced objects from sharding container: %s',
1306
- broker.path)
2129
+ self.debug(broker,
2130
+ 'Moving any misplaced objects from sharding container')
1307
2131
  bounds = self._make_default_misplaced_object_bounds(broker)
1308
2132
  cleaving_context.misplaced_done = self._move_misplaced_objects(
1309
2133
  broker, src_broker=broker.get_brokers()[0],
@@ -1311,53 +2135,78 @@ class ContainerSharder(ContainerReplicator):
1311
2135
  cleaving_context.store(broker)
1312
2136
 
1313
2137
  if cleaving_context.cleaving_done:
1314
- self.logger.debug('Cleaving already complete for container %s',
1315
- broker.path)
2138
+ self.debug(broker, 'Cleaving already complete for container')
1316
2139
  return cleaving_context.misplaced_done
1317
2140
 
1318
- ranges_todo = broker.get_shard_ranges(marker=cleaving_context.marker)
2141
+ shard_ranges = broker.get_shard_ranges(marker=cleaving_context.marker)
2142
+ # Ignore shrinking shard ranges: we never want to cleave objects to a
2143
+ # shrinking shard. Shrinking shard ranges are to be expected in a root;
2144
+ # shrinking shard ranges (other than own shard range) are not normally
2145
+ # expected in a shard but can occur if there is an overlapping shard
2146
+ # range that has been discovered from the root.
2147
+ ranges_todo = [sr for sr in shard_ranges
2148
+ if sr.state != ShardRange.SHRINKING]
1319
2149
  if cleaving_context.cursor:
1320
- # always update ranges_todo in case more ranges have been found
1321
- # since last visit
2150
+ # always update ranges_todo in case shard ranges have changed since
2151
+ # last visit
1322
2152
  cleaving_context.ranges_todo = len(ranges_todo)
1323
- self.logger.debug('Continuing to cleave (%s done, %s todo): %s',
1324
- cleaving_context.ranges_done,
1325
- cleaving_context.ranges_todo,
1326
- broker.path)
2153
+ self.debug(broker, 'Continuing to cleave (%s done, %s todo)',
2154
+ cleaving_context.ranges_done,
2155
+ cleaving_context.ranges_todo)
1327
2156
  else:
1328
2157
  cleaving_context.start()
2158
+ own_shard_range = broker.get_own_shard_range()
2159
+ cleaving_context.cursor = own_shard_range.lower_str
1329
2160
  cleaving_context.ranges_todo = len(ranges_todo)
1330
- self.logger.debug('Starting to cleave (%s todo): %s',
1331
- cleaving_context.ranges_todo, broker.path)
2161
+ self.info(broker, 'Starting to cleave (%s todo)',
2162
+ cleaving_context.ranges_todo)
2163
+
2164
+ own_shard_range = broker.get_own_shard_range(no_default=True)
2165
+ if own_shard_range is None:
2166
+ # A default should never be SHRINKING or SHRUNK but because we
2167
+ # may write own_shard_range back to broker, let's make sure
2168
+ # it can't be defaulted.
2169
+ self.warning(broker, 'Failed to get own_shard_range')
2170
+ ranges_todo = [] # skip cleaving
1332
2171
 
1333
2172
  ranges_done = []
1334
2173
  for shard_range in ranges_todo:
1335
- if shard_range.state == ShardRange.FOUND:
2174
+ if cleaving_context.cleaving_done:
2175
+ # note: there may still be ranges_todo, for example: if this
2176
+ # shard is shrinking and has merged a root shard range in
2177
+ # sharded state along with an active acceptor shard range, but
2178
+ # the root range is irrelevant
1336
2179
  break
1337
- elif shard_range.state in (ShardRange.CREATED,
1338
- ShardRange.CLEAVED,
1339
- ShardRange.ACTIVE):
1340
- cleave_result = self._cleave_shard_range(
1341
- broker, cleaving_context, shard_range)
1342
- if cleave_result == CLEAVE_SUCCESS:
1343
- ranges_done.append(shard_range)
1344
- if len(ranges_done) == self.cleave_batch_size:
1345
- break
1346
- elif cleave_result == CLEAVE_FAILED:
1347
- break
1348
- # else, no errors, but no rows found either. keep going,
1349
- # and don't count it against our batch size
1350
- else:
1351
- self.logger.warning('Unexpected shard range state for cleave',
1352
- shard_range.state)
2180
+
2181
+ if len(ranges_done) == self.cleave_batch_size:
1353
2182
  break
1354
2183
 
1355
- if not ranges_done:
1356
- # _cleave_shard_range always store()s the context on success; make
1357
- # sure we *also* do that if we hit a failure right off the bat
1358
- cleaving_context.store(broker)
1359
- self.logger.debug(
1360
- 'Cleaved %s shard ranges for %s', len(ranges_done), broker.path)
2184
+ if shard_range.lower > cleaving_context.cursor:
2185
+ self.info(broker, 'Stopped cleave at gap: %r - %r' %
2186
+ (cleaving_context.cursor, shard_range.lower))
2187
+ break
2188
+
2189
+ if shard_range.state not in (ShardRange.CREATED,
2190
+ ShardRange.CLEAVED,
2191
+ ShardRange.ACTIVE):
2192
+ self.info(broker, 'Stopped cleave at unready %s', shard_range)
2193
+ break
2194
+
2195
+ cleave_result = self._cleave_shard_range(
2196
+ broker, cleaving_context, shard_range, own_shard_range)
2197
+
2198
+ if cleave_result == CLEAVE_SUCCESS:
2199
+ ranges_done.append(shard_range)
2200
+ elif cleave_result == CLEAVE_FAILED:
2201
+ break
2202
+ # else: CLEAVE_EMPTY: no errors, but no rows found either. keep
2203
+ # going, and don't count it against our batch size
2204
+
2205
+ # _cleave_shard_range always store()s the context on success; *also* do
2206
+ # that here in case we hit a failure right off the bat or ended loop
2207
+ # with skipped ranges
2208
+ cleaving_context.store(broker)
2209
+ self.debug(broker, 'Cleaved %s shard ranges', len(ranges_done))
1361
2210
  return (cleaving_context.misplaced_done and
1362
2211
  cleaving_context.cleaving_done)
1363
2212
 
@@ -1367,13 +2216,23 @@ class ContainerSharder(ContainerReplicator):
1367
2216
  # Move all CLEAVED shards to ACTIVE state and if a shard then
1368
2217
  # delete own shard range; these changes will be simultaneously
1369
2218
  # reported in the next update to the root container.
1370
- modified_shard_ranges = broker.get_shard_ranges(
1371
- states=ShardRange.CLEAVED)
1372
- for sr in modified_shard_ranges:
1373
- sr.update_state(ShardRange.ACTIVE)
1374
- own_shard_range = broker.get_own_shard_range()
1375
- own_shard_range.update_state(ShardRange.SHARDED)
2219
+ own_shard_range = broker.get_own_shard_range(no_default=True)
2220
+ if own_shard_range is None:
2221
+ # This is more of a belts and braces, not sure we could even
2222
+ # get this far with without an own_shard_range. But because
2223
+ # we will be writing own_shard_range back, we need to make sure
2224
+ self.warning(broker, 'Failed to get own_shard_range')
2225
+ return False
1376
2226
  own_shard_range.update_meta(0, 0)
2227
+ if own_shard_range.state in ShardRange.SHRINKING_STATES:
2228
+ own_shard_range.update_state(ShardRange.SHRUNK)
2229
+ modified_shard_ranges = []
2230
+ else:
2231
+ own_shard_range.update_state(ShardRange.SHARDED)
2232
+ modified_shard_ranges = broker.get_shard_ranges(
2233
+ states=[ShardRange.CLEAVED])
2234
+ for sr in modified_shard_ranges:
2235
+ sr.update_state(ShardRange.ACTIVE)
1377
2236
  if (not broker.is_root_container() and not
1378
2237
  own_shard_range.deleted):
1379
2238
  own_shard_range = own_shard_range.copy(
@@ -1381,16 +2240,12 @@ class ContainerSharder(ContainerReplicator):
1381
2240
  modified_shard_ranges.append(own_shard_range)
1382
2241
  broker.merge_shard_ranges(modified_shard_ranges)
1383
2242
  if broker.set_sharded_state():
1384
- cleaving_context.delete(broker)
1385
2243
  return True
1386
2244
  else:
1387
- self.logger.warning(
1388
- 'Failed to remove retiring db file for %s',
1389
- broker.path)
2245
+ self.warning(broker, 'Failed to remove retiring db file')
1390
2246
  else:
1391
- self.logger.warning(
1392
- 'Repeat cleaving required for %r with context: %s'
1393
- % (broker.db_files[0], dict(cleaving_context)))
2247
+ self.warning(broker, 'Repeat cleaving required, context: %s',
2248
+ dict(cleaving_context))
1394
2249
  cleaving_context.reset()
1395
2250
  cleaving_context.store(broker)
1396
2251
 
@@ -1400,102 +2255,138 @@ class ContainerSharder(ContainerReplicator):
1400
2255
  candidates = find_sharding_candidates(
1401
2256
  broker, self.shard_container_threshold, shard_ranges)
1402
2257
  if candidates:
1403
- self.logger.debug('Identified %s sharding candidates'
1404
- % len(candidates))
2258
+ self.debug(broker, 'Identified %s sharding candidates',
2259
+ len(candidates))
1405
2260
  broker.merge_shard_ranges(candidates)
1406
2261
 
1407
2262
  def _find_and_enable_shrinking_candidates(self, broker):
1408
2263
  if not broker.is_sharded():
1409
- self.logger.warning('Cannot shrink a not yet sharded container %s',
1410
- broker.path)
2264
+ self.warning(broker, 'Cannot shrink a not yet sharded container')
1411
2265
  return
1412
2266
 
1413
- merge_pairs = find_shrinking_candidates(
1414
- broker, self.shrink_size, self.merge_size)
1415
- self.logger.debug('Found %s shrinking candidates' % len(merge_pairs))
2267
+ compactible_sequences = find_compactible_shard_sequences(
2268
+ broker, self.shrink_threshold, self.expansion_limit,
2269
+ self.max_shrinking, self.max_expanding, include_shrinking=True)
2270
+ self.debug(broker, 'Found %s compactible sequences of length(s) %s' %
2271
+ (len(compactible_sequences),
2272
+ [len(s) for s in compactible_sequences]))
2273
+ process_compactible_shard_sequences(broker, compactible_sequences)
1416
2274
  own_shard_range = broker.get_own_shard_range()
1417
- for acceptor, donor in merge_pairs.items():
1418
- self.logger.debug('shrinking shard range %s into %s in %s' %
1419
- (donor, acceptor, broker.db_file))
1420
- broker.merge_shard_ranges([acceptor, donor])
2275
+ for sequence in compactible_sequences:
2276
+ acceptor = sequence[-1]
2277
+ donors = ShardRangeList(sequence[:-1])
2278
+ self.debug(broker,
2279
+ 'shrinking %d objects from %d shard ranges into %s' %
2280
+ (donors.object_count, len(donors), acceptor))
1421
2281
  if acceptor.name != own_shard_range.name:
1422
- self._send_shard_ranges(
1423
- acceptor.account, acceptor.container, [acceptor])
1424
- acceptor.increment_meta(donor.object_count, donor.bytes_used)
1425
- else:
1426
- # no need to change namespace or stats
1427
- acceptor.update_state(ShardRange.ACTIVE,
1428
- state_timestamp=Timestamp.now())
2282
+ self._send_shard_ranges(broker, acceptor.account,
2283
+ acceptor.container, [acceptor])
2284
+ acceptor.increment_meta(donors.object_count, donors.bytes_used)
1429
2285
  # Now send a copy of the expanded acceptor, with an updated
1430
- # timestamp, to the donor container. This forces the donor to
2286
+ # timestamp, to each donor container. This forces each donor to
1431
2287
  # asynchronously cleave its entire contents to the acceptor and
1432
2288
  # delete itself. The donor will pass its own deleted shard range to
1433
2289
  # the acceptor when cleaving. Subsequent updates from the donor or
1434
2290
  # the acceptor will then update the root to have the deleted donor
1435
2291
  # shard range.
1436
- self._send_shard_ranges(
1437
- donor.account, donor.container, [donor, acceptor])
2292
+ for donor in donors:
2293
+ self._send_shard_ranges(broker, donor.account,
2294
+ donor.container, [donor, acceptor])
1438
2295
 
1439
2296
  def _update_root_container(self, broker):
1440
2297
  own_shard_range = broker.get_own_shard_range(no_default=True)
1441
2298
  if not own_shard_range:
1442
2299
  return
1443
2300
 
1444
- # persist the reported shard metadata
1445
- broker.merge_shard_ranges(own_shard_range)
2301
+ # Don't update the osr stats including tombstones unless its CLEAVED+
2302
+ if own_shard_range.state in SHARD_UPDATE_STAT_STATES:
2303
+ # do a reclaim *now* in order to get best estimate of tombstone
2304
+ # count that is consistent with the current object_count
2305
+ reclaimer = self._reclaim(broker)
2306
+ tombstones = reclaimer.get_tombstone_count()
2307
+ self.debug(broker, 'tombstones = %d', tombstones)
2308
+ # shrinking candidates are found in the root DB so that's the only
2309
+ # place we need up to date tombstone stats.
2310
+ own_shard_range.update_tombstones(tombstones)
2311
+ update_own_shard_range_stats(broker, own_shard_range)
2312
+
2313
+ if not own_shard_range.reported:
2314
+ broker.merge_shard_ranges(own_shard_range)
2315
+
2316
+ # we can't use `state not in SHARD_UPDATE_STAT_STATES` to return
2317
+ # because there are cases we still want to update root even if the
2318
+ # stats are wrong. Such as it's a new shard or something else has
2319
+ # decided to remove the latch to update root.
2320
+ if own_shard_range.reported:
2321
+ return
2322
+
1446
2323
  # now get a consistent list of own and other shard ranges
1447
2324
  shard_ranges = broker.get_shard_ranges(
1448
2325
  include_own=True,
1449
2326
  include_deleted=True)
1450
2327
  # send everything
1451
- self._send_shard_ranges(
1452
- broker.root_account, broker.root_container,
1453
- shard_ranges)
2328
+ if self._send_shard_ranges(broker, broker.root_account,
2329
+ broker.root_container, shard_ranges,
2330
+ {'Referer': quote(broker.path)}):
2331
+ # on success, mark ourselves as reported so we don't keep
2332
+ # hammering the root
2333
+ own_shard_range.reported = True
2334
+ broker.merge_shard_ranges(own_shard_range)
2335
+ self.debug(broker, 'updated root objs=%d, tombstones=%s',
2336
+ own_shard_range.object_count,
2337
+ own_shard_range.tombstones)
1454
2338
 
1455
2339
  def _process_broker(self, broker, node, part):
1456
2340
  broker.get_info() # make sure account/container are populated
1457
- state = broker.get_db_state()
1458
- self.logger.debug('Starting processing %s state %s',
1459
- broker.path, state)
2341
+ db_state = broker.get_db_state()
2342
+ is_deleted = broker.is_deleted()
2343
+ self.debug(broker, 'Starting processing, state %s%s', db_state,
2344
+ ' (deleted)' if is_deleted else '')
1460
2345
 
1461
2346
  if not self._audit_container(broker):
1462
2347
  return
1463
2348
 
1464
2349
  # now look and deal with misplaced objects.
2350
+ move_start_ts = time.time()
1465
2351
  self._move_misplaced_objects(broker)
2352
+ self.logger.timing_since(
2353
+ 'sharder.sharding.move_misplaced', move_start_ts)
1466
2354
 
1467
- if broker.is_deleted():
1468
- # This container is deleted so we can skip it. We still want
1469
- # deleted containers to go via misplaced items because they may
1470
- # have new objects sitting in them that may need to move.
1471
- return
2355
+ is_leader = node['index'] == 0 and self.auto_shard and not is_deleted
1472
2356
 
1473
- is_leader = node['index'] == 0 and self.auto_shard
1474
- if state in (UNSHARDED, COLLAPSED):
2357
+ if db_state in (UNSHARDED, COLLAPSED):
1475
2358
  if is_leader and broker.is_root_container():
1476
2359
  # bootstrap sharding of root container
2360
+ own_shard_range = broker.get_own_shard_range()
2361
+ update_own_shard_range_stats(broker, own_shard_range)
1477
2362
  self._find_and_enable_sharding_candidates(
1478
- broker, shard_ranges=[broker.get_own_shard_range()])
2363
+ broker, shard_ranges=[own_shard_range])
1479
2364
 
1480
2365
  own_shard_range = broker.get_own_shard_range()
1481
- if own_shard_range.state in (ShardRange.SHARDING,
1482
- ShardRange.SHRINKING,
1483
- ShardRange.SHARDED):
1484
- if broker.get_shard_ranges():
2366
+ if own_shard_range.state in ShardRange.CLEAVING_STATES:
2367
+ if broker.has_other_shard_ranges():
1485
2368
  # container has been given shard ranges rather than
1486
- # found them e.g. via replication or a shrink event
2369
+ # found them e.g. via replication or a shrink event,
2370
+ # or manually triggered cleaving.
2371
+ db_start_ts = time.time()
1487
2372
  if broker.set_sharding_state():
1488
- state = SHARDING
2373
+ db_state = SHARDING
2374
+ self.info(broker, 'Kick off container cleaving, '
2375
+ 'own shard range in state %r',
2376
+ own_shard_range.state_text)
2377
+ self.logger.timing_since(
2378
+ 'sharder.sharding.set_state', db_start_ts)
1489
2379
  elif is_leader:
1490
2380
  if broker.set_sharding_state():
1491
- state = SHARDING
2381
+ db_state = SHARDING
1492
2382
  else:
1493
- self.logger.debug(
1494
- 'Own shard range in state %r but no shard ranges '
1495
- 'and not leader; remaining unsharded: %s'
1496
- % (own_shard_range.state_text, broker.path))
2383
+ self.debug(broker,
2384
+ 'Own shard range in state %r but no shard '
2385
+ 'ranges and not leader; remaining unsharded',
2386
+ own_shard_range.state_text)
1497
2387
 
1498
- if state == SHARDING:
2388
+ if db_state == SHARDING:
2389
+ cleave_start_ts = time.time()
1499
2390
  if is_leader:
1500
2391
  num_found = self._find_shard_ranges(broker)
1501
2392
  else:
@@ -1510,38 +2401,53 @@ class ContainerSharder(ContainerReplicator):
1510
2401
 
1511
2402
  # always try to cleave any pending shard ranges
1512
2403
  cleave_complete = self._cleave(broker)
2404
+ self.logger.timing_since(
2405
+ 'sharder.sharding.cleave', cleave_start_ts)
1513
2406
 
1514
2407
  if cleave_complete:
1515
- self.logger.info('Completed cleaving of %s', broker.path)
1516
2408
  if self._complete_sharding(broker):
1517
- state = SHARDED
2409
+ db_state = SHARDED
1518
2410
  self._increment_stat('visited', 'completed', statsd=True)
2411
+ self.info(broker, 'Completed cleaving, DB set to sharded '
2412
+ 'state')
2413
+ self.logger.timing_since(
2414
+ 'sharder.sharding.completed',
2415
+ float(broker.get_own_shard_range().epoch))
1519
2416
  else:
1520
- self.logger.debug('Remaining in sharding state %s',
1521
- broker.path)
2417
+ self.info(broker, 'Completed cleaving, DB remaining in '
2418
+ 'sharding state')
2419
+
2420
+ if not broker.is_deleted():
2421
+ if db_state == SHARDED and broker.is_root_container():
2422
+ # look for shrink stats
2423
+ send_start_ts = time.time()
2424
+ self._identify_shrinking_candidate(broker, node)
2425
+ if is_leader:
2426
+ self._find_and_enable_shrinking_candidates(broker)
2427
+ self._find_and_enable_sharding_candidates(broker)
2428
+ for shard_range in broker.get_shard_ranges(
2429
+ states=[ShardRange.SHARDING]):
2430
+ self._send_shard_ranges(broker, shard_range.account,
2431
+ shard_range.container,
2432
+ [shard_range])
2433
+ self.logger.timing_since(
2434
+ 'sharder.sharding.send_sr', send_start_ts)
1522
2435
 
1523
- if state == SHARDED and broker.is_root_container():
1524
- if is_leader:
1525
- self._find_and_enable_shrinking_candidates(broker)
1526
- self._find_and_enable_sharding_candidates(broker)
1527
- for shard_range in broker.get_shard_ranges(
1528
- states=[ShardRange.SHARDING]):
1529
- self._send_shard_ranges(
1530
- shard_range.account, shard_range.container,
1531
- [shard_range])
1532
-
1533
- if not broker.is_root_container():
1534
- # Update the root container with this container's shard range
1535
- # info; do this even when sharded in case previous attempts
1536
- # failed; don't do this if there is no own shard range. When
1537
- # sharding a shard, this is when the root will see the new
1538
- # shards move to ACTIVE state and the sharded shard
1539
- # simultaneously become deleted.
1540
- self._update_root_container(broker)
1541
-
1542
- self.logger.debug('Finished processing %s/%s state %s',
1543
- broker.account, broker.container,
1544
- broker.get_db_state())
2436
+ if not broker.is_root_container():
2437
+ # Update the root container with this container's shard range
2438
+ # info; do this even when sharded in case previous attempts
2439
+ # failed; don't do this if there is no own shard range. When
2440
+ # sharding a shard, this is when the root will see the new
2441
+ # shards move to ACTIVE state and the sharded shard
2442
+ # simultaneously become deleted.
2443
+ update_start_ts = time.time()
2444
+ self._update_root_container(broker)
2445
+ self.logger.timing_since(
2446
+ 'sharder.sharding.update_root', update_start_ts)
2447
+
2448
+ self.debug(broker,
2449
+ 'Finished processing, state %s%s',
2450
+ broker.get_db_state(), ' (deleted)' if is_deleted else '')
1545
2451
 
1546
2452
  def _one_shard_cycle(self, devices_to_shard, partitions_to_shard):
1547
2453
  """
@@ -1555,6 +2461,7 @@ class ContainerSharder(ContainerReplicator):
1555
2461
  - if not a root container, reports shard range stats to the root
1556
2462
  container
1557
2463
  """
2464
+
1558
2465
  self.logger.info('Container sharder cycle starting, auto-sharding %s',
1559
2466
  self.auto_shard)
1560
2467
  if isinstance(devices_to_shard, (list, tuple)):
@@ -1564,9 +2471,9 @@ class ContainerSharder(ContainerReplicator):
1564
2471
  self.logger.info('(Override partitions: %s)',
1565
2472
  ', '.join(str(p) for p in partitions_to_shard))
1566
2473
  self._zero_stats()
1567
- self._local_device_ids = set()
2474
+ self._local_device_ids = {}
1568
2475
  dirs = []
1569
- self.ips = whataremyips(bind_ip=self.bind_ip)
2476
+ self.ips = whataremyips(self.bind_ip)
1570
2477
  for node in self.ring.devs:
1571
2478
  device_path = self._check_node(node)
1572
2479
  if not device_path:
@@ -1575,7 +2482,7 @@ class ContainerSharder(ContainerReplicator):
1575
2482
  if os.path.isdir(datadir):
1576
2483
  # Populate self._local_device_ids so we can find devices for
1577
2484
  # shard containers later
1578
- self._local_device_ids.add(node['id'])
2485
+ self._local_device_ids[node['id']] = node
1579
2486
  if node['device'] not in devices_to_shard:
1580
2487
  continue
1581
2488
  part_filt = self._partition_dir_filter(
@@ -1583,7 +2490,7 @@ class ContainerSharder(ContainerReplicator):
1583
2490
  partitions_to_shard)
1584
2491
  dirs.append((datadir, node, part_filt))
1585
2492
  if not dirs:
1586
- self.logger.warning('Found no data dirs!')
2493
+ self.logger.info('Found no containers directories')
1587
2494
  for part, path, node in self.roundrobin_datadirs(dirs):
1588
2495
  # NB: get_part_nodes always provides an 'index' key;
1589
2496
  # this will be used in leader selection
@@ -1608,36 +2515,47 @@ class ContainerSharder(ContainerReplicator):
1608
2515
  self._increment_stat('visited', 'skipped')
1609
2516
  except (Exception, Timeout) as err:
1610
2517
  self._increment_stat('visited', 'failure', statsd=True)
1611
- self.logger.exception(
1612
- 'Unhandled exception while processing %s: %s', path, err)
2518
+ self.exception(broker, 'Unhandled exception while processing: '
2519
+ '%s', err)
1613
2520
  error = err
1614
2521
  try:
1615
2522
  self._record_sharding_progress(broker, node, error)
1616
2523
  except (Exception, Timeout) as error:
1617
- self.logger.exception(
1618
- 'Unhandled exception while dumping progress for %s: %s',
1619
- path, error)
2524
+ self.exception(broker, 'Unhandled exception while dumping '
2525
+ 'progress: %s', error)
1620
2526
  self._periodic_report_stats()
1621
2527
 
1622
2528
  self._report_stats()
1623
2529
 
2530
+ @contextmanager
2531
+ def _set_auto_shard_from_command_line(self, **kwargs):
2532
+ conf_auto_shard = self.auto_shard
2533
+ auto_shard = kwargs.get('auto_shard', None)
2534
+ if auto_shard is not None:
2535
+ self.auto_shard = config_true_value(auto_shard)
2536
+ try:
2537
+ yield
2538
+ finally:
2539
+ self.auto_shard = conf_auto_shard
2540
+
1624
2541
  def run_forever(self, *args, **kwargs):
1625
2542
  """Run the container sharder until stopped."""
1626
- self.reported = time.time()
1627
- time.sleep(random() * self.interval)
1628
- while True:
1629
- begin = time.time()
1630
- try:
1631
- self._one_shard_cycle(devices_to_shard=Everything(),
1632
- partitions_to_shard=Everything())
1633
- except (Exception, Timeout):
1634
- self.logger.increment('errors')
1635
- self.logger.exception('Exception in sharder')
1636
- elapsed = time.time() - begin
1637
- self.logger.info(
1638
- 'Container sharder cycle completed: %.02fs', elapsed)
1639
- if elapsed < self.interval:
1640
- time.sleep(self.interval - elapsed)
2543
+ with self._set_auto_shard_from_command_line(**kwargs):
2544
+ self.reported = time.time()
2545
+ time.sleep(random() * self.interval)
2546
+ while True:
2547
+ begin = time.time()
2548
+ try:
2549
+ self._one_shard_cycle(devices_to_shard=Everything(),
2550
+ partitions_to_shard=Everything())
2551
+ except (Exception, Timeout):
2552
+ self.logger.increment('errors')
2553
+ self.logger.exception('Exception in sharder')
2554
+ elapsed = time.time() - begin
2555
+ self.logger.info(
2556
+ 'Container sharder cycle completed: %.02fs', elapsed)
2557
+ if elapsed < self.interval:
2558
+ time.sleep(self.interval - elapsed)
1641
2559
 
1642
2560
  def run_once(self, *args, **kwargs):
1643
2561
  """Run the container sharder once."""
@@ -1645,9 +2563,32 @@ class ContainerSharder(ContainerReplicator):
1645
2563
  override_options = parse_override_options(once=True, **kwargs)
1646
2564
  devices_to_shard = override_options.devices or Everything()
1647
2565
  partitions_to_shard = override_options.partitions or Everything()
1648
- begin = self.reported = time.time()
1649
- self._one_shard_cycle(devices_to_shard=devices_to_shard,
1650
- partitions_to_shard=partitions_to_shard)
1651
- elapsed = time.time() - begin
1652
- self.logger.info(
1653
- 'Container sharder "once" mode completed: %.02fs', elapsed)
2566
+ with self._set_auto_shard_from_command_line(**kwargs):
2567
+ begin = self.reported = time.time()
2568
+ self._one_shard_cycle(devices_to_shard=devices_to_shard,
2569
+ partitions_to_shard=partitions_to_shard)
2570
+ elapsed = time.time() - begin
2571
+ self.logger.info(
2572
+ 'Container sharder "once" mode completed: %.02fs', elapsed)
2573
+
2574
+
2575
+ def main():
2576
+ parser = OptionParser("%prog CONFIG [options]")
2577
+ parser.add_option('-d', '--devices',
2578
+ help='Shard containers only on given devices. '
2579
+ 'Comma-separated list. '
2580
+ 'Only has effect if --once is used.')
2581
+ parser.add_option('-p', '--partitions',
2582
+ help='Shard containers only in given partitions. '
2583
+ 'Comma-separated list. '
2584
+ 'Only has effect if --once is used.')
2585
+ parser.add_option('--no-auto-shard', action='store_false',
2586
+ dest='auto_shard', default=None,
2587
+ help='Disable auto-sharding. Overrides the auto_shard '
2588
+ 'value in the config file.')
2589
+ conf_file, options = parse_options(parser=parser, once=True)
2590
+ run_daemon(ContainerSharder, conf_file, **options)
2591
+
2592
+
2593
+ if __name__ == '__main__':
2594
+ main()