teuthology 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. scripts/describe.py +1 -0
  2. scripts/dispatcher.py +55 -26
  3. scripts/exporter.py +18 -0
  4. scripts/lock.py +1 -1
  5. scripts/node_cleanup.py +58 -0
  6. scripts/openstack.py +9 -9
  7. scripts/results.py +12 -11
  8. scripts/schedule.py +4 -0
  9. scripts/suite.py +57 -16
  10. scripts/supervisor.py +44 -0
  11. scripts/update_inventory.py +10 -4
  12. teuthology/__init__.py +24 -26
  13. teuthology/beanstalk.py +4 -3
  14. teuthology/config.py +16 -6
  15. teuthology/contextutil.py +18 -14
  16. teuthology/describe_tests.py +25 -18
  17. teuthology/dispatcher/__init__.py +210 -35
  18. teuthology/dispatcher/supervisor.py +140 -58
  19. teuthology/exceptions.py +43 -0
  20. teuthology/exporter.py +347 -0
  21. teuthology/kill.py +76 -81
  22. teuthology/lock/cli.py +3 -3
  23. teuthology/lock/ops.py +135 -61
  24. teuthology/lock/query.py +61 -44
  25. teuthology/ls.py +1 -1
  26. teuthology/misc.py +61 -75
  27. teuthology/nuke/__init__.py +12 -353
  28. teuthology/openstack/__init__.py +4 -3
  29. teuthology/openstack/openstack-centos-7.0-user-data.txt +1 -1
  30. teuthology/openstack/openstack-centos-7.1-user-data.txt +1 -1
  31. teuthology/openstack/openstack-centos-7.2-user-data.txt +1 -1
  32. teuthology/openstack/openstack-debian-8.0-user-data.txt +1 -1
  33. teuthology/openstack/openstack-opensuse-42.1-user-data.txt +1 -1
  34. teuthology/openstack/openstack-teuthology.cron +0 -1
  35. teuthology/orchestra/cluster.py +49 -7
  36. teuthology/orchestra/connection.py +17 -4
  37. teuthology/orchestra/console.py +111 -50
  38. teuthology/orchestra/daemon/cephadmunit.py +15 -2
  39. teuthology/orchestra/daemon/state.py +8 -1
  40. teuthology/orchestra/daemon/systemd.py +4 -4
  41. teuthology/orchestra/opsys.py +30 -11
  42. teuthology/orchestra/remote.py +405 -338
  43. teuthology/orchestra/run.py +3 -3
  44. teuthology/packaging.py +19 -16
  45. teuthology/provision/__init__.py +30 -10
  46. teuthology/provision/cloud/openstack.py +12 -6
  47. teuthology/provision/cloud/util.py +1 -2
  48. teuthology/provision/downburst.py +4 -3
  49. teuthology/provision/fog.py +68 -20
  50. teuthology/provision/openstack.py +5 -4
  51. teuthology/provision/pelagos.py +1 -1
  52. teuthology/repo_utils.py +43 -13
  53. teuthology/report.py +57 -35
  54. teuthology/results.py +5 -3
  55. teuthology/run.py +13 -14
  56. teuthology/run_tasks.py +27 -43
  57. teuthology/schedule.py +4 -3
  58. teuthology/scrape.py +28 -22
  59. teuthology/suite/__init__.py +74 -45
  60. teuthology/suite/build_matrix.py +34 -24
  61. teuthology/suite/fragment-merge.lua +105 -0
  62. teuthology/suite/matrix.py +31 -2
  63. teuthology/suite/merge.py +175 -0
  64. teuthology/suite/placeholder.py +6 -9
  65. teuthology/suite/run.py +175 -100
  66. teuthology/suite/util.py +64 -218
  67. teuthology/task/__init__.py +1 -1
  68. teuthology/task/ansible.py +101 -32
  69. teuthology/task/buildpackages.py +2 -2
  70. teuthology/task/ceph_ansible.py +13 -6
  71. teuthology/task/cephmetrics.py +2 -1
  72. teuthology/task/clock.py +33 -14
  73. teuthology/task/exec.py +18 -0
  74. teuthology/task/hadoop.py +2 -2
  75. teuthology/task/install/__init__.py +29 -7
  76. teuthology/task/install/bin/adjust-ulimits +16 -0
  77. teuthology/task/install/bin/daemon-helper +114 -0
  78. teuthology/task/install/bin/stdin-killer +263 -0
  79. teuthology/task/install/deb.py +1 -1
  80. teuthology/task/install/rpm.py +17 -5
  81. teuthology/task/install/util.py +3 -3
  82. teuthology/task/internal/__init__.py +41 -10
  83. teuthology/task/internal/edit_sudoers.sh +10 -0
  84. teuthology/task/internal/lock_machines.py +2 -9
  85. teuthology/task/internal/redhat.py +31 -1
  86. teuthology/task/internal/syslog.py +31 -8
  87. teuthology/task/kernel.py +152 -145
  88. teuthology/task/lockfile.py +1 -1
  89. teuthology/task/mpi.py +10 -10
  90. teuthology/task/pcp.py +1 -1
  91. teuthology/task/selinux.py +16 -8
  92. teuthology/task/ssh_keys.py +4 -4
  93. teuthology/timer.py +3 -3
  94. teuthology/util/loggerfile.py +19 -0
  95. teuthology/util/scanner.py +159 -0
  96. teuthology/util/sentry.py +52 -0
  97. teuthology/util/time.py +52 -0
  98. teuthology-1.2.1.data/scripts/adjust-ulimits +16 -0
  99. teuthology-1.2.1.data/scripts/daemon-helper +114 -0
  100. teuthology-1.2.1.data/scripts/stdin-killer +263 -0
  101. teuthology-1.2.1.dist-info/METADATA +88 -0
  102. teuthology-1.2.1.dist-info/RECORD +168 -0
  103. {teuthology-1.1.0.dist-info → teuthology-1.2.1.dist-info}/WHEEL +1 -1
  104. {teuthology-1.1.0.dist-info → teuthology-1.2.1.dist-info}/entry_points.txt +3 -2
  105. scripts/nuke.py +0 -47
  106. scripts/worker.py +0 -37
  107. teuthology/lock/test/__init__.py +0 -0
  108. teuthology/lock/test/test_lock.py +0 -7
  109. teuthology/nuke/actions.py +0 -456
  110. teuthology/openstack/test/__init__.py +0 -0
  111. teuthology/openstack/test/openstack-integration.py +0 -286
  112. teuthology/openstack/test/test_config.py +0 -35
  113. teuthology/openstack/test/test_openstack.py +0 -1695
  114. teuthology/orchestra/test/__init__.py +0 -0
  115. teuthology/orchestra/test/integration/__init__.py +0 -0
  116. teuthology/orchestra/test/integration/test_integration.py +0 -94
  117. teuthology/orchestra/test/test_cluster.py +0 -240
  118. teuthology/orchestra/test/test_connection.py +0 -106
  119. teuthology/orchestra/test/test_console.py +0 -217
  120. teuthology/orchestra/test/test_opsys.py +0 -404
  121. teuthology/orchestra/test/test_remote.py +0 -185
  122. teuthology/orchestra/test/test_run.py +0 -286
  123. teuthology/orchestra/test/test_systemd.py +0 -54
  124. teuthology/orchestra/test/util.py +0 -12
  125. teuthology/task/tests/__init__.py +0 -110
  126. teuthology/task/tests/test_locking.py +0 -25
  127. teuthology/task/tests/test_run.py +0 -40
  128. teuthology/test/__init__.py +0 -0
  129. teuthology/test/fake_archive.py +0 -107
  130. teuthology/test/fake_fs.py +0 -92
  131. teuthology/test/integration/__init__.py +0 -0
  132. teuthology/test/integration/test_suite.py +0 -86
  133. teuthology/test/task/__init__.py +0 -205
  134. teuthology/test/task/test_ansible.py +0 -624
  135. teuthology/test/task/test_ceph_ansible.py +0 -176
  136. teuthology/test/task/test_console_log.py +0 -88
  137. teuthology/test/task/test_install.py +0 -337
  138. teuthology/test/task/test_internal.py +0 -57
  139. teuthology/test/task/test_kernel.py +0 -243
  140. teuthology/test/task/test_pcp.py +0 -379
  141. teuthology/test/task/test_selinux.py +0 -35
  142. teuthology/test/test_config.py +0 -189
  143. teuthology/test/test_contextutil.py +0 -68
  144. teuthology/test/test_describe_tests.py +0 -316
  145. teuthology/test/test_email_sleep_before_teardown.py +0 -81
  146. teuthology/test/test_exit.py +0 -97
  147. teuthology/test/test_get_distro.py +0 -47
  148. teuthology/test/test_get_distro_version.py +0 -47
  149. teuthology/test/test_get_multi_machine_types.py +0 -27
  150. teuthology/test/test_job_status.py +0 -60
  151. teuthology/test/test_ls.py +0 -48
  152. teuthology/test/test_misc.py +0 -391
  153. teuthology/test/test_nuke.py +0 -290
  154. teuthology/test/test_packaging.py +0 -763
  155. teuthology/test/test_parallel.py +0 -28
  156. teuthology/test/test_repo_utils.py +0 -225
  157. teuthology/test/test_report.py +0 -77
  158. teuthology/test/test_results.py +0 -155
  159. teuthology/test/test_run.py +0 -239
  160. teuthology/test/test_safepath.py +0 -55
  161. teuthology/test/test_schedule.py +0 -45
  162. teuthology/test/test_scrape.py +0 -167
  163. teuthology/test/test_timer.py +0 -80
  164. teuthology/test/test_vps_os_vers_parameter_checking.py +0 -84
  165. teuthology/test/test_worker.py +0 -303
  166. teuthology/worker.py +0 -354
  167. teuthology-1.1.0.dist-info/METADATA +0 -76
  168. teuthology-1.1.0.dist-info/RECORD +0 -213
  169. {teuthology-1.1.0.dist-info → teuthology-1.2.1.dist-info}/LICENSE +0 -0
  170. {teuthology-1.1.0.dist-info → teuthology-1.2.1.dist-info}/top_level.txt +0 -0
teuthology/misc.py CHANGED
@@ -15,11 +15,14 @@ import time
15
15
  import yaml
16
16
  import json
17
17
  import re
18
+ from sys import stdin
18
19
  import pprint
19
20
  import datetime
20
21
 
21
22
  from tarfile import ReadError
22
23
 
24
+ from typing import Optional, TypeVar
25
+
23
26
  from teuthology.util.compat import urljoin, urlopen, HTTPError
24
27
 
25
28
  from netaddr.strategy.ipv4 import valid_str as _is_ipv4
@@ -48,9 +51,9 @@ def host_shortname(hostname):
48
51
  else:
49
52
  return hostname.split('.', 1)[0]
50
53
 
51
- def canonicalize_hostname(hostname, user='ubuntu'):
54
+ def canonicalize_hostname(hostname, user: Optional[str] ='ubuntu'):
52
55
  hostname_expr = hostname_expr_templ.format(
53
- lab_domain=config.lab_domain.replace('.', '\.'))
56
+ lab_domain=config.lab_domain.replace('.', r'\.'))
54
57
  match = re.match(hostname_expr, hostname)
55
58
  if _is_ipv4(hostname) or _is_ipv6(hostname):
56
59
  return "%s@%s" % (user, hostname)
@@ -80,7 +83,7 @@ def canonicalize_hostname(hostname, user='ubuntu'):
80
83
  def decanonicalize_hostname(hostname):
81
84
  lab_domain = ''
82
85
  if config.lab_domain:
83
- lab_domain='\.' + config.lab_domain.replace('.', '\.')
86
+ lab_domain=r'\.' + config.lab_domain.replace('.', r'\.')
84
87
  hostname_expr = hostname_expr_templ.format(lab_domain=lab_domain)
85
88
  match = re.match(hostname_expr, hostname)
86
89
  if match:
@@ -106,31 +109,20 @@ def config_file(string):
106
109
  return config_dict
107
110
 
108
111
 
109
- class MergeConfig(argparse.Action):
110
- """
111
- Used by scripts to mergeg configurations. (nuke, run, and
112
- schedule, for example)
113
- """
114
- def __call__(self, parser, namespace, values, option_string=None):
115
- """
116
- Perform merges of all the day in the config dictionaries.
117
- """
118
- config_dict = getattr(namespace, self.dest)
119
- for new in values:
120
- deep_merge(config_dict, new)
121
-
122
-
123
- def merge_configs(config_paths):
112
+ def merge_configs(config_paths) -> dict:
124
113
  """ Takes one or many paths to yaml config files and merges them
125
114
  together, returning the result.
126
115
  """
127
116
  conf_dict = dict()
128
117
  for conf_path in config_paths:
129
- if not os.path.exists(conf_path):
118
+ if conf_path == "-":
119
+ partial_dict = yaml.safe_load(stdin)
120
+ elif not os.path.exists(conf_path):
130
121
  log.debug("The config path {0} does not exist, skipping.".format(conf_path))
131
122
  continue
132
- with open(conf_path) as partial_file:
133
- partial_dict = yaml.safe_load(partial_file)
123
+ else:
124
+ with open(conf_path) as partial_file:
125
+ partial_dict: dict = yaml.safe_load(partial_file)
134
126
  try:
135
127
  conf_dict = deep_merge(conf_dict, partial_dict)
136
128
  except Exception:
@@ -227,13 +219,13 @@ def get_ceph_binary_url(package=None,
227
219
  assert tag is None, "cannot set both sha1 and tag"
228
220
  else:
229
221
  # gitbuilder uses remote-style ref names for branches, mangled to
230
- # have underscores instead of slashes; e.g. origin_master
222
+ # have underscores instead of slashes; e.g. origin_main
231
223
  if tag is not None:
232
224
  ref = tag
233
225
  assert branch is None, "cannot set both branch and tag"
234
226
  else:
235
227
  if branch is None:
236
- branch = 'master'
228
+ branch = 'main'
237
229
  ref = branch
238
230
 
239
231
  sha1_url = urljoin(BASE, 'ref/{ref}/sha1'.format(ref=ref))
@@ -740,8 +732,8 @@ def pull_directory(remote, remotedir, localdir, write_to=copy_fileobj):
740
732
  remote.shortname, remotedir, localdir)
741
733
  if not os.path.exists(localdir):
742
734
  os.mkdir(localdir)
743
- r = remote.get_tar_stream(remotedir, sudo=True)
744
- tar = tarfile.open(mode='r|gz', fileobj=r.stdout)
735
+ r = remote.get_tar_stream(remotedir, sudo=True, compress=False)
736
+ tar = tarfile.open(mode='r|', fileobj=r.stdout)
745
737
  while True:
746
738
  ti = tar.next()
747
739
  if ti is None:
@@ -778,7 +770,7 @@ def pull_directory_tarball(remote, remotedir, localfile):
778
770
 
779
771
 
780
772
  def get_wwn_id_map(remote, devs):
781
- log.warn("Entering get_wwn_id_map, a deprecated function that will be removed")
773
+ log.warning("Entering get_wwn_id_map, a deprecated function that will be removed")
782
774
  return dict((d, d) for d in devs)
783
775
 
784
776
 
@@ -797,33 +789,31 @@ def get_scratch_devices(remote):
797
789
  for dev in devs:
798
790
  if 'vda' in dev:
799
791
  devs.remove(dev)
800
- log.warn("Removing root device: %s from device list" % dev)
792
+ log.warning("Removing root device: %s from device list" % dev)
801
793
 
802
794
  log.debug('devs={d}'.format(d=devs))
803
795
 
804
796
  retval = []
805
797
  for dev in devs:
806
- try:
807
- # FIXME: Split this into multiple calls.
808
- remote.run(
809
- args=[
810
- # node exists
811
- 'stat',
812
- dev,
813
- run.Raw('&&'),
814
- # readable
815
- 'sudo', 'dd', 'if=%s' % dev, 'of=/dev/null', 'count=1',
816
- run.Raw('&&'),
817
- # not mounted
818
- run.Raw('!'),
819
- 'mount',
820
- run.Raw('|'),
821
- 'grep', '-q', dev,
822
- ]
823
- )
798
+ dev_checks = [
799
+ [['stat', dev], "does not exist"],
800
+ [['sudo', 'dd', 'if=%s' % dev, 'of=/dev/null', 'count=1'], "is not readable"],
801
+ [
802
+ [run.Raw('!'), 'mount', run.Raw('|'), 'grep', '-v', 'devtmpfs', run.Raw('|'),
803
+ 'grep', '-q', dev],
804
+ "is in use"
805
+ ],
806
+ ]
807
+ for args, msg in dev_checks:
808
+ try:
809
+ remote.run(args=args)
810
+ except CommandFailedError:
811
+ log.debug(f"get_scratch_devices: {dev} {msg}")
812
+ break
813
+ else:
824
814
  retval.append(dev)
825
- except CommandFailedError:
826
- log.debug("get_scratch_devices: %s is in use" % dev)
815
+ continue
816
+ break
827
817
  return retval
828
818
 
829
819
 
@@ -995,7 +985,8 @@ def replace_all_with_clients(cluster, config):
995
985
  return norm_config
996
986
 
997
987
 
998
- def deep_merge(a, b):
988
+ DeepMerge = TypeVar('DeepMerge')
989
+ def deep_merge(a: DeepMerge, b: DeepMerge) -> DeepMerge:
999
990
  """
1000
991
  Deep Merge. If a and b are both lists, all elements in b are
1001
992
  added into a. If a and b are both dictionaries, elements in b are
@@ -1003,10 +994,10 @@ def deep_merge(a, b):
1003
994
  :param a: object items will be merged into
1004
995
  :param b: object items will be merged from
1005
996
  """
1006
- if a is None:
1007
- return b
1008
997
  if b is None:
1009
998
  return a
999
+ if a is None:
1000
+ return deep_merge(b.__class__(), b)
1010
1001
  if isinstance(a, list):
1011
1002
  assert isinstance(b, list)
1012
1003
  a.extend(b)
@@ -1014,10 +1005,7 @@ def deep_merge(a, b):
1014
1005
  if isinstance(a, dict):
1015
1006
  assert isinstance(b, dict)
1016
1007
  for (k, v) in b.items():
1017
- if k in a:
1018
- a[k] = deep_merge(a[k], v)
1019
- else:
1020
- a[k] = v
1008
+ a[k] = deep_merge(a.get(k), v)
1021
1009
  return a
1022
1010
  return b
1023
1011
 
@@ -1096,7 +1084,8 @@ def ssh_keyscan(hostnames, _raise=True):
1096
1084
  for hostname in hostnames:
1097
1085
  with safe_while(
1098
1086
  sleep=1,
1099
- tries=5 if _raise else 1,
1087
+ tries=15 if _raise else 1,
1088
+ increment=1,
1100
1089
  _raise=_raise,
1101
1090
  action="ssh_keyscan " + hostname,
1102
1091
  ) as proceed:
@@ -1109,7 +1098,7 @@ def ssh_keyscan(hostnames, _raise=True):
1109
1098
  missing = set(hostnames) - set(keys_dict.keys())
1110
1099
  msg = "Unable to scan these host keys: %s" % ' '.join(missing)
1111
1100
  if not _raise:
1112
- log.warn(msg)
1101
+ log.warning(msg)
1113
1102
  else:
1114
1103
  raise RuntimeError(msg)
1115
1104
  return keys_dict
@@ -1122,7 +1111,7 @@ def _ssh_keyscan(hostname):
1122
1111
  :param hostname: The hostname
1123
1112
  :returns: The host key
1124
1113
  """
1125
- args = ['ssh-keyscan', '-T', '1', '-t', 'rsa', hostname]
1114
+ args = ['ssh-keyscan', '-T', '1', hostname]
1126
1115
  p = subprocess.Popen(
1127
1116
  args=args,
1128
1117
  stdout=subprocess.PIPE,
@@ -1134,9 +1123,12 @@ def _ssh_keyscan(hostname):
1134
1123
  line = line.strip()
1135
1124
  if line and not line.startswith('#'):
1136
1125
  log.error(line)
1126
+ keys = list()
1137
1127
  for line in p.stdout:
1138
1128
  host, key = line.strip().decode().split(' ', 1)
1139
- return key
1129
+ keys.append(key)
1130
+ if len(keys) > 0:
1131
+ return sorted(keys)[0]
1140
1132
 
1141
1133
 
1142
1134
  def ssh_keyscan_wait(hostname):
@@ -1177,29 +1169,19 @@ def stop_daemons_of_type(ctx, type_, cluster='ceph'):
1177
1169
  def get_system_type(remote, distro=False, version=False):
1178
1170
  """
1179
1171
  If distro, return distro.
1180
- If version, return version (lsb_release -rs)
1172
+ If version, return version
1181
1173
  If both, return both.
1182
1174
  If neither, return 'deb' or 'rpm' if distro is known to be one of those
1183
- Finally, if unknown, return the unfiltered distro (from lsb_release -is)
1184
1175
  """
1185
- system_value = remote.sh('sudo lsb_release -is').strip()
1186
- log.debug("System to be installed: %s" % system_value)
1187
1176
  if version:
1188
- version = remote.sh('sudo lsb_release -rs').strip()
1177
+ version = remote.os.version
1189
1178
  if distro and version:
1190
- return system_value.lower(), version
1179
+ return remote.os.name, version
1191
1180
  if distro:
1192
- return system_value.lower()
1181
+ return remote.os.name
1193
1182
  if version:
1194
1183
  return version
1195
- if system_value in ['Ubuntu', 'Debian']:
1196
- return "deb"
1197
- if system_value in ['CentOS', 'Fedora', 'RedHatEnterpriseServer',
1198
- 'RedHatEnterprise',
1199
- 'CentOSStream',
1200
- 'openSUSE', 'openSUSE project', 'SUSE', 'SUSE LINUX']:
1201
- return "rpm"
1202
- return system_value
1184
+ return remote.os.package_type
1203
1185
 
1204
1186
  def get_pkg_type(os_type):
1205
1187
  if os_type in ('centos', 'fedora', 'opensuse', 'rhel', 'sle'):
@@ -1308,7 +1290,7 @@ def sh(command, log_limit=1024, cwd=None, env=None):
1308
1290
  for line in proc.stdout:
1309
1291
  line = line.decode()
1310
1292
  lines.append(line)
1311
- line = line.strip()
1293
+ line = line.rstrip()
1312
1294
  if len(line) > log_limit:
1313
1295
  truncated = True
1314
1296
  log.debug(line[:log_limit] +
@@ -1336,6 +1318,8 @@ def add_remote_path(ctx, local_dir, remote_dir):
1336
1318
  Add key/value pair (local_dir: remote_dir) to job's info.yaml.
1337
1319
  These key/value pairs are read to archive them in case of job timeout.
1338
1320
  """
1321
+ if ctx.archive is None:
1322
+ return
1339
1323
  with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file:
1340
1324
  info_yaml = yaml.safe_load(info_file)
1341
1325
  info_file.seek(0)
@@ -1351,6 +1335,8 @@ def archive_logs(ctx, remote_path, log_path):
1351
1335
  Archive directories from all nodes in a cliuster. It pulls all files in
1352
1336
  remote_path dir to job's archive dir under log_path dir.
1353
1337
  """
1338
+ if ctx.archive is None:
1339
+ return
1354
1340
  path = os.path.join(ctx.archive, 'remote')
1355
1341
  os.makedirs(path, exist_ok=True)
1356
1342
  for remote in ctx.cluster.remotes.keys():
@@ -1370,7 +1356,7 @@ def compress_logs(ctx, remote_dir):
1370
1356
  run.wait(
1371
1357
  ctx.cluster.run(
1372
1358
  args=(f"sudo find {remote_dir} -name *.log -print0 | "
1373
- f"sudo xargs -0 --no-run-if-empty -- gzip --"),
1359
+ f"sudo xargs --max-args=1 --max-procs=0 --verbose -0 --no-run-if-empty -- gzip -5 --verbose --"),
1374
1360
  wait=False,
1375
1361
  ),
1376
1362
  )
@@ -1,361 +1,20 @@
1
- import argparse
2
- import datetime
3
- import json
4
1
  import logging
5
- import os
6
- import subprocess
7
-
8
- import yaml
9
-
10
- import teuthology
11
- from teuthology import provision
12
- from teuthology.lock.ops import unlock_one
13
- from teuthology.lock.query import is_vm, list_locks, \
14
- find_stale_locks, get_status
15
- from teuthology.lock.util import locked_since_seconds
16
- from teuthology.nuke.actions import (
17
- check_console, clear_firewall, shutdown_daemons, remove_installed_packages,
18
- reboot, remove_osd_mounts, remove_osd_tmpfs, kill_hadoop,
19
- remove_ceph_packages, synch_clocks, unlock_firmware_repo,
20
- remove_configuration_files, undo_multipath, reset_syslog_dir,
21
- remove_ceph_data, remove_testing_tree, remove_yum_timedhosts,
22
- kill_valgrind,
23
- )
24
- from teuthology.config import config, FakeNamespace
25
- from teuthology.misc import (
26
- canonicalize_hostname, config_file, decanonicalize_hostname, merge_configs,
27
- get_user, sh
28
- )
29
- from teuthology.openstack import OpenStack, OpenStackInstance, enforce_json_dictionary
30
- from teuthology.orchestra.remote import Remote
31
- from teuthology.parallel import parallel
32
- from teuthology.task.internal import check_lock, add_remotes, connect
33
2
 
34
3
  log = logging.getLogger(__name__)
35
4
 
36
5
 
37
- def openstack_volume_id(volume):
38
- return (volume.get('ID') or volume['id'])
39
-
40
-
41
- def openstack_volume_name(volume):
42
- return (volume.get('Display Name') or
43
- volume.get('display_name') or
44
- volume.get('Name') or
45
- volume.get('name') or "")
46
-
47
-
48
- def stale_openstack(ctx):
49
- targets = dict(map(lambda i: (i['ID'], i),
50
- OpenStack.list_instances()))
51
- nodes = list_locks(keyed_by_name=True, locked=True)
52
- stale_openstack_instances(ctx, targets, nodes)
53
- stale_openstack_nodes(ctx, targets, nodes)
54
- stale_openstack_volumes(ctx, OpenStack.list_volumes())
55
- if not ctx.dry_run:
56
- openstack_remove_again()
57
-
58
- #
59
- # A delay, in seconds, that is significantly longer than
60
- # any kind of OpenStack server creation / deletion / etc.
61
- #
62
- OPENSTACK_DELAY = 30 * 60
63
-
64
-
65
- def stale_openstack_instances(ctx, instances, locked_nodes):
66
- for (instance_id, instance) in instances.items():
67
- i = OpenStackInstance(instance_id)
68
- if not i.exists():
69
- log.debug("stale-openstack: {instance} disappeared, ignored"
70
- .format(instance=instance_id))
71
- continue
72
- if (i.get_created() >
73
- config['max_job_time'] + OPENSTACK_DELAY):
74
- log.info(
75
- "stale-openstack: destroying instance {instance}"
76
- " because it was created {created} seconds ago"
77
- " which is older than"
78
- " max_job_time {max_job_time} + {delay}"
79
- .format(instance=i['name'],
80
- created=i.get_created(),
81
- max_job_time=config['max_job_time'],
82
- delay=OPENSTACK_DELAY))
83
- if not ctx.dry_run:
84
- i.destroy()
85
- continue
86
- name = canonicalize_hostname(i['name'], user=None)
87
- if i.get_created() > OPENSTACK_DELAY and name not in locked_nodes:
88
- log.info("stale-openstack: destroying instance {instance}"
89
- " because it was created {created} seconds ago"
90
- " is older than {delay}s and it is not locked"
91
- .format(instance=i['name'],
92
- created=i.get_created(),
93
- delay=OPENSTACK_DELAY))
94
- if not ctx.dry_run:
95
- i.destroy()
96
- continue
97
- log.debug("stale-openstack: instance " + i['name'] + " OK")
98
-
99
-
100
- def openstack_delete_volume(id):
101
- OpenStack().run("volume delete " + id + " || true")
102
-
103
-
104
- def stale_openstack_volumes(ctx, volumes):
105
- now = datetime.datetime.now()
106
- for volume in volumes:
107
- volume_id = openstack_volume_id(volume)
108
- try:
109
- volume = json.loads(OpenStack().run("volume show -f json " +
110
- volume_id))
111
- except subprocess.CalledProcessError:
112
- log.debug("stale-openstack: {id} disappeared, ignored"
113
- .format(id=volume_id))
114
- continue
115
- volume_name = openstack_volume_name(volume)
116
- enforce_json_dictionary(volume)
117
- created_at = datetime.datetime.strptime(
118
- volume['created_at'], '%Y-%m-%dT%H:%M:%S.%f')
119
- created = (now - created_at).total_seconds()
120
- if created > config['max_job_time'] + OPENSTACK_DELAY:
121
- log.info(
122
- "stale-openstack: destroying volume {volume}({id})"
123
- " because it was created {created} seconds ago"
124
- " which is older than"
125
- " max_job_time {max_job_time} + {delay}"
126
- .format(volume=volume_name,
127
- id=volume_id,
128
- created=created,
129
- max_job_time=config['max_job_time'],
130
- delay=OPENSTACK_DELAY))
131
- if not ctx.dry_run:
132
- openstack_delete_volume(volume_id)
133
- continue
134
- log.debug("stale-openstack: volume " + volume_id + " OK")
135
-
136
-
137
- def stale_openstack_nodes(ctx, instances, locked_nodes):
138
- names = set([ i['Name'] for i in instances.values() ])
139
- for (name, node) in locked_nodes.items():
140
- name = decanonicalize_hostname(name)
141
- if node['machine_type'] != 'openstack':
142
- continue
143
- if (name not in names and
144
- locked_since_seconds(node) > OPENSTACK_DELAY):
145
- log.info("stale-openstack: unlocking node {name} unlocked"
146
- " because it was created {created}"
147
- " seconds ago which is older than {delay}"
148
- " and it has no instance"
149
- .format(name=name,
150
- created=locked_since_seconds(node),
151
- delay=OPENSTACK_DELAY))
152
- if not ctx.dry_run:
153
- unlock_one(ctx, name, node['locked_by'])
154
- continue
155
- log.debug("stale-openstack: node " + name + " OK")
156
-
157
-
158
- def openstack_remove_again():
6
+ # This is being kept because ceph.git/qa/tasks/cephfs/filesystem.py references it.
7
+ def clear_firewall(ctx):
159
8
  """
160
- Volumes and servers with REMOVE-ME in the name are leftover
161
- that failed to be removed. It is not uncommon for a failed removal
162
- to succeed later on.
9
+ Remove any iptables rules created by teuthology. These rules are
10
+ identified by containing a comment with 'teuthology' in it. Non-teuthology
11
+ firewall rules are unaffected.
163
12
  """
164
- sh("""
165
- openstack server list --name REMOVE-ME --column ID --format value |
166
- xargs --no-run-if-empty --max-args 1 -P20 openstack server delete --wait
167
- true
168
- """)
169
- volumes = json.loads(OpenStack().run("volume list -f json --long"))
170
- remove_me = [openstack_volume_id(v) for v in volumes
171
- if 'REMOVE-ME' in openstack_volume_name(v)]
172
- for i in remove_me:
173
- log.info("Trying to remove stale volume %s" % i)
174
- openstack_delete_volume(i)
175
-
176
-
177
- def main(args):
178
- ctx = FakeNamespace(args)
179
- if ctx.verbose:
180
- teuthology.log.setLevel(logging.DEBUG)
181
-
182
- info = {}
183
- if ctx.archive:
184
- ctx.config = config_file(ctx.archive + '/config.yaml')
185
- ifn = os.path.join(ctx.archive, 'info.yaml')
186
- if os.path.exists(ifn):
187
- with open(ifn, 'r') as fd:
188
- info = yaml.safe_load(fd.read())
189
- if not ctx.pid:
190
- ctx.pid = info.get('pid')
191
- if not ctx.pid:
192
- ctx.pid = int(open(ctx.archive + '/pid').read().rstrip('\n'))
193
- if not ctx.owner:
194
- ctx.owner = info.get('owner')
195
- if not ctx.owner:
196
- ctx.owner = open(ctx.archive + '/owner').read().rstrip('\n')
197
-
198
- if ctx.targets:
199
- ctx.config = merge_configs(ctx.targets)
200
-
201
- if ctx.stale:
202
- stale_nodes = find_stale_locks(ctx.owner)
203
- targets = dict()
204
- for node in stale_nodes:
205
- targets[node['name']] = node['ssh_pub_key']
206
- ctx.config = dict(targets=targets)
207
-
208
- if ctx.stale_openstack:
209
- stale_openstack(ctx)
210
- return
211
-
212
- log.info(
213
- '\n '.join(
214
- ['targets:', ] + yaml.safe_dump(
215
- ctx.config['targets'],
216
- default_flow_style=False).splitlines()))
217
-
218
- if ctx.dry_run:
219
- log.info("Not actually nuking anything since --dry-run was passed")
220
- return
221
-
222
- if ctx.owner is None:
223
- ctx.owner = get_user()
224
-
225
- if ctx.pid:
226
- if ctx.archive:
227
- log.info('Killing teuthology process at pid %d', ctx.pid)
228
- os.system('grep -q %s /proc/%d/cmdline && sudo kill %d' % (
229
- ctx.archive,
230
- ctx.pid,
231
- ctx.pid))
232
- else:
233
- subprocess.check_call(["kill", "-9", str(ctx.pid)])
234
-
235
- nuke(ctx, ctx.unlock, ctx.synch_clocks, ctx.noipmi, ctx.keep_logs, not ctx.no_reboot)
236
-
237
-
238
- def nuke(ctx, should_unlock, sync_clocks=True, noipmi=False, keep_logs=False, should_reboot=True):
239
- if 'targets' not in ctx.config:
240
- return
241
- total_unnuked = {}
242
- targets = dict(ctx.config['targets'])
243
- if ctx.name:
244
- log.info('Checking targets against current locks')
245
- locks = list_locks()
246
- # Remove targets who's description doesn't match archive name.
247
- for lock in locks:
248
- for target in targets:
249
- if target == lock['name']:
250
- if ctx.name not in lock['description']:
251
- del ctx.config['targets'][lock['name']]
252
- log.info(
253
- "Not nuking %s because description doesn't match",
254
- lock['name'])
255
- with parallel() as p:
256
- for target, hostkey in ctx.config['targets'].items():
257
- p.spawn(
258
- nuke_one,
259
- ctx,
260
- {target: hostkey},
261
- should_unlock,
262
- sync_clocks,
263
- ctx.config.get('check-locks', True),
264
- noipmi,
265
- keep_logs,
266
- should_reboot,
267
- )
268
- for unnuked in p:
269
- if unnuked:
270
- total_unnuked.update(unnuked)
271
- if total_unnuked:
272
- log.error('Could not nuke the following targets:\n' +
273
- '\n '.join(['targets:', ] +
274
- yaml.safe_dump(
275
- total_unnuked,
276
- default_flow_style=False).splitlines()))
277
-
278
-
279
- def nuke_one(ctx, target, should_unlock, synch_clocks,
280
- check_locks, noipmi, keep_logs, should_reboot):
281
- ret = None
282
- ctx = argparse.Namespace(
283
- config=dict(targets=target),
284
- owner=ctx.owner,
285
- check_locks=check_locks,
286
- synch_clocks=synch_clocks,
287
- teuthology_config=config.to_dict(),
288
- name=ctx.name,
289
- noipmi=noipmi,
13
+ log.info("Clearing teuthology firewall rules...")
14
+ ctx.cluster.run(
15
+ args=[
16
+ "sudo", "sh", "-c",
17
+ "iptables-save | grep -v teuthology | iptables-restore"
18
+ ],
290
19
  )
291
- try:
292
- nuke_helper(ctx, should_unlock, keep_logs, should_reboot)
293
- except Exception:
294
- log.exception('Could not nuke %s' % target)
295
- # not re-raising the so that parallel calls aren't killed
296
- ret = target
297
- else:
298
- if should_unlock:
299
- unlock_one(ctx, list(target.keys())[0], ctx.owner)
300
- return ret
301
-
302
-
303
- def nuke_helper(ctx, should_unlock, keep_logs, should_reboot):
304
- # ensure node is up with ipmi
305
- (target,) = ctx.config['targets'].keys()
306
- host = target.split('@')[-1]
307
- shortname = host.split('.')[0]
308
- if should_unlock:
309
- if is_vm(shortname):
310
- return
311
- log.debug('shortname: %s' % shortname)
312
- if ctx.check_locks:
313
- # does not check to ensure if the node is 'up'
314
- # we want to be able to nuke a downed node
315
- check_lock.check_lock(ctx, None, check_up=False)
316
- status = get_status(host)
317
- if status['machine_type'] in provision.fog.get_types():
318
- remote = Remote(host)
319
- remote.console.power_off()
320
- return
321
- elif status['machine_type'] in provision.pelagos.get_types():
322
- provision.pelagos.park_node(host)
323
- return
324
-
325
- if (not ctx.noipmi and 'ipmi_user' in config and
326
- 'vpm' not in shortname):
327
- try:
328
- check_console(host)
329
- except Exception:
330
- log.exception('')
331
- log.info("Will attempt to connect via SSH")
332
- remote = Remote(host)
333
- remote.connect()
334
- add_remotes(ctx, None)
335
- connect(ctx, None)
336
- clear_firewall(ctx)
337
- shutdown_daemons(ctx)
338
- kill_valgrind(ctx)
339
- # Try to remove packages before reboot
340
- remove_installed_packages(ctx)
341
- remotes = ctx.cluster.remotes.keys()
342
- if should_reboot:
343
- reboot(ctx, remotes)
344
- # shutdown daemons again incase of startup
345
- shutdown_daemons(ctx)
346
- remove_osd_mounts(ctx)
347
- remove_osd_tmpfs(ctx)
348
- kill_hadoop(ctx)
349
- remove_ceph_packages(ctx)
350
- synch_clocks(remotes)
351
- unlock_firmware_repo(ctx)
352
- remove_configuration_files(ctx)
353
- undo_multipath(ctx)
354
- reset_syslog_dir(ctx)
355
- remove_ceph_data(ctx)
356
- if not keep_logs:
357
- remove_testing_tree(ctx)
358
- remove_yum_timedhosts(ctx)
359
- # Once again remove packages after reboot
360
- remove_installed_packages(ctx)
361
- log.info('Installed packages removed.')
20
+ log.info("Cleared teuthology firewall rules.")