teuthology 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scripts/describe.py +1 -0
- scripts/dispatcher.py +55 -26
- scripts/exporter.py +18 -0
- scripts/lock.py +1 -1
- scripts/node_cleanup.py +58 -0
- scripts/openstack.py +9 -9
- scripts/results.py +12 -11
- scripts/schedule.py +4 -0
- scripts/suite.py +57 -16
- scripts/supervisor.py +44 -0
- scripts/update_inventory.py +10 -4
- teuthology/__init__.py +24 -26
- teuthology/beanstalk.py +4 -3
- teuthology/config.py +16 -6
- teuthology/contextutil.py +18 -14
- teuthology/describe_tests.py +25 -18
- teuthology/dispatcher/__init__.py +210 -35
- teuthology/dispatcher/supervisor.py +140 -58
- teuthology/exceptions.py +43 -0
- teuthology/exporter.py +347 -0
- teuthology/kill.py +76 -81
- teuthology/lock/cli.py +3 -3
- teuthology/lock/ops.py +135 -61
- teuthology/lock/query.py +61 -44
- teuthology/ls.py +1 -1
- teuthology/misc.py +61 -75
- teuthology/nuke/__init__.py +12 -353
- teuthology/openstack/__init__.py +4 -3
- teuthology/openstack/openstack-centos-7.0-user-data.txt +1 -1
- teuthology/openstack/openstack-centos-7.1-user-data.txt +1 -1
- teuthology/openstack/openstack-centos-7.2-user-data.txt +1 -1
- teuthology/openstack/openstack-debian-8.0-user-data.txt +1 -1
- teuthology/openstack/openstack-opensuse-42.1-user-data.txt +1 -1
- teuthology/openstack/openstack-teuthology.cron +0 -1
- teuthology/orchestra/cluster.py +49 -7
- teuthology/orchestra/connection.py +16 -5
- teuthology/orchestra/console.py +111 -50
- teuthology/orchestra/daemon/cephadmunit.py +17 -4
- teuthology/orchestra/daemon/state.py +8 -1
- teuthology/orchestra/daemon/systemd.py +4 -4
- teuthology/orchestra/opsys.py +30 -11
- teuthology/orchestra/remote.py +405 -338
- teuthology/orchestra/run.py +3 -3
- teuthology/packaging.py +19 -16
- teuthology/provision/__init__.py +30 -10
- teuthology/provision/cloud/openstack.py +12 -6
- teuthology/provision/cloud/util.py +1 -2
- teuthology/provision/downburst.py +4 -3
- teuthology/provision/fog.py +68 -20
- teuthology/provision/openstack.py +5 -4
- teuthology/provision/pelagos.py +1 -1
- teuthology/repo_utils.py +43 -13
- teuthology/report.py +57 -35
- teuthology/results.py +5 -3
- teuthology/run.py +13 -14
- teuthology/run_tasks.py +27 -43
- teuthology/schedule.py +4 -3
- teuthology/scrape.py +28 -22
- teuthology/suite/__init__.py +74 -45
- teuthology/suite/build_matrix.py +34 -24
- teuthology/suite/fragment-merge.lua +105 -0
- teuthology/suite/matrix.py +31 -2
- teuthology/suite/merge.py +175 -0
- teuthology/suite/placeholder.py +6 -9
- teuthology/suite/run.py +175 -100
- teuthology/suite/util.py +64 -218
- teuthology/task/__init__.py +1 -1
- teuthology/task/ansible.py +101 -32
- teuthology/task/buildpackages.py +2 -2
- teuthology/task/ceph_ansible.py +13 -6
- teuthology/task/cephmetrics.py +2 -1
- teuthology/task/clock.py +33 -14
- teuthology/task/exec.py +18 -0
- teuthology/task/hadoop.py +2 -2
- teuthology/task/install/__init__.py +29 -7
- teuthology/task/install/bin/adjust-ulimits +16 -0
- teuthology/task/install/bin/daemon-helper +114 -0
- teuthology/task/install/bin/stdin-killer +263 -0
- teuthology/task/install/deb.py +1 -1
- teuthology/task/install/rpm.py +17 -5
- teuthology/task/install/util.py +3 -3
- teuthology/task/internal/__init__.py +41 -10
- teuthology/task/internal/edit_sudoers.sh +10 -0
- teuthology/task/internal/lock_machines.py +2 -9
- teuthology/task/internal/redhat.py +31 -1
- teuthology/task/internal/syslog.py +31 -8
- teuthology/task/kernel.py +152 -145
- teuthology/task/lockfile.py +1 -1
- teuthology/task/mpi.py +10 -10
- teuthology/task/pcp.py +1 -1
- teuthology/task/selinux.py +16 -8
- teuthology/task/ssh_keys.py +4 -4
- teuthology/task/tests/__init__.py +137 -77
- teuthology/task/tests/test_fetch_coredumps.py +116 -0
- teuthology/task/tests/test_run.py +4 -4
- teuthology/timer.py +3 -3
- teuthology/util/loggerfile.py +19 -0
- teuthology/util/scanner.py +159 -0
- teuthology/util/sentry.py +52 -0
- teuthology/util/time.py +52 -0
- teuthology-1.2.0.data/scripts/adjust-ulimits +16 -0
- teuthology-1.2.0.data/scripts/daemon-helper +114 -0
- teuthology-1.2.0.data/scripts/stdin-killer +263 -0
- teuthology-1.2.0.dist-info/METADATA +89 -0
- teuthology-1.2.0.dist-info/RECORD +174 -0
- {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/WHEEL +1 -1
- {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/entry_points.txt +3 -2
- scripts/nuke.py +0 -47
- scripts/worker.py +0 -37
- teuthology/nuke/actions.py +0 -456
- teuthology/openstack/test/__init__.py +0 -0
- teuthology/openstack/test/openstack-integration.py +0 -286
- teuthology/openstack/test/test_config.py +0 -35
- teuthology/openstack/test/test_openstack.py +0 -1695
- teuthology/orchestra/test/__init__.py +0 -0
- teuthology/orchestra/test/integration/__init__.py +0 -0
- teuthology/orchestra/test/integration/test_integration.py +0 -94
- teuthology/orchestra/test/test_cluster.py +0 -240
- teuthology/orchestra/test/test_connection.py +0 -106
- teuthology/orchestra/test/test_console.py +0 -217
- teuthology/orchestra/test/test_opsys.py +0 -404
- teuthology/orchestra/test/test_remote.py +0 -185
- teuthology/orchestra/test/test_run.py +0 -286
- teuthology/orchestra/test/test_systemd.py +0 -54
- teuthology/orchestra/test/util.py +0 -12
- teuthology/test/__init__.py +0 -0
- teuthology/test/fake_archive.py +0 -107
- teuthology/test/fake_fs.py +0 -92
- teuthology/test/integration/__init__.py +0 -0
- teuthology/test/integration/test_suite.py +0 -86
- teuthology/test/task/__init__.py +0 -205
- teuthology/test/task/test_ansible.py +0 -624
- teuthology/test/task/test_ceph_ansible.py +0 -176
- teuthology/test/task/test_console_log.py +0 -88
- teuthology/test/task/test_install.py +0 -337
- teuthology/test/task/test_internal.py +0 -57
- teuthology/test/task/test_kernel.py +0 -243
- teuthology/test/task/test_pcp.py +0 -379
- teuthology/test/task/test_selinux.py +0 -35
- teuthology/test/test_config.py +0 -189
- teuthology/test/test_contextutil.py +0 -68
- teuthology/test/test_describe_tests.py +0 -316
- teuthology/test/test_email_sleep_before_teardown.py +0 -81
- teuthology/test/test_exit.py +0 -97
- teuthology/test/test_get_distro.py +0 -47
- teuthology/test/test_get_distro_version.py +0 -47
- teuthology/test/test_get_multi_machine_types.py +0 -27
- teuthology/test/test_job_status.py +0 -60
- teuthology/test/test_ls.py +0 -48
- teuthology/test/test_misc.py +0 -391
- teuthology/test/test_nuke.py +0 -290
- teuthology/test/test_packaging.py +0 -763
- teuthology/test/test_parallel.py +0 -28
- teuthology/test/test_repo_utils.py +0 -225
- teuthology/test/test_report.py +0 -77
- teuthology/test/test_results.py +0 -155
- teuthology/test/test_run.py +0 -239
- teuthology/test/test_safepath.py +0 -55
- teuthology/test/test_schedule.py +0 -45
- teuthology/test/test_scrape.py +0 -167
- teuthology/test/test_timer.py +0 -80
- teuthology/test/test_vps_os_vers_parameter_checking.py +0 -84
- teuthology/test/test_worker.py +0 -303
- teuthology/worker.py +0 -354
- teuthology-1.1.0.dist-info/METADATA +0 -76
- teuthology-1.1.0.dist-info/RECORD +0 -213
- {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/LICENSE +0 -0
- {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/top_level.txt +0 -0
teuthology/misc.py
CHANGED
@@ -15,11 +15,14 @@ import time
|
|
15
15
|
import yaml
|
16
16
|
import json
|
17
17
|
import re
|
18
|
+
from sys import stdin
|
18
19
|
import pprint
|
19
20
|
import datetime
|
20
21
|
|
21
22
|
from tarfile import ReadError
|
22
23
|
|
24
|
+
from typing import Optional, TypeVar
|
25
|
+
|
23
26
|
from teuthology.util.compat import urljoin, urlopen, HTTPError
|
24
27
|
|
25
28
|
from netaddr.strategy.ipv4 import valid_str as _is_ipv4
|
@@ -48,9 +51,9 @@ def host_shortname(hostname):
|
|
48
51
|
else:
|
49
52
|
return hostname.split('.', 1)[0]
|
50
53
|
|
51
|
-
def canonicalize_hostname(hostname, user='ubuntu'):
|
54
|
+
def canonicalize_hostname(hostname, user: Optional[str] ='ubuntu'):
|
52
55
|
hostname_expr = hostname_expr_templ.format(
|
53
|
-
lab_domain=config.lab_domain.replace('.', '\.'))
|
56
|
+
lab_domain=config.lab_domain.replace('.', r'\.'))
|
54
57
|
match = re.match(hostname_expr, hostname)
|
55
58
|
if _is_ipv4(hostname) or _is_ipv6(hostname):
|
56
59
|
return "%s@%s" % (user, hostname)
|
@@ -80,7 +83,7 @@ def canonicalize_hostname(hostname, user='ubuntu'):
|
|
80
83
|
def decanonicalize_hostname(hostname):
|
81
84
|
lab_domain = ''
|
82
85
|
if config.lab_domain:
|
83
|
-
lab_domain='\.' + config.lab_domain.replace('.', '\.')
|
86
|
+
lab_domain=r'\.' + config.lab_domain.replace('.', r'\.')
|
84
87
|
hostname_expr = hostname_expr_templ.format(lab_domain=lab_domain)
|
85
88
|
match = re.match(hostname_expr, hostname)
|
86
89
|
if match:
|
@@ -106,31 +109,20 @@ def config_file(string):
|
|
106
109
|
return config_dict
|
107
110
|
|
108
111
|
|
109
|
-
|
110
|
-
"""
|
111
|
-
Used by scripts to mergeg configurations. (nuke, run, and
|
112
|
-
schedule, for example)
|
113
|
-
"""
|
114
|
-
def __call__(self, parser, namespace, values, option_string=None):
|
115
|
-
"""
|
116
|
-
Perform merges of all the day in the config dictionaries.
|
117
|
-
"""
|
118
|
-
config_dict = getattr(namespace, self.dest)
|
119
|
-
for new in values:
|
120
|
-
deep_merge(config_dict, new)
|
121
|
-
|
122
|
-
|
123
|
-
def merge_configs(config_paths):
|
112
|
+
def merge_configs(config_paths) -> dict:
|
124
113
|
""" Takes one or many paths to yaml config files and merges them
|
125
114
|
together, returning the result.
|
126
115
|
"""
|
127
116
|
conf_dict = dict()
|
128
117
|
for conf_path in config_paths:
|
129
|
-
if
|
118
|
+
if conf_path == "-":
|
119
|
+
partial_dict = yaml.safe_load(stdin)
|
120
|
+
elif not os.path.exists(conf_path):
|
130
121
|
log.debug("The config path {0} does not exist, skipping.".format(conf_path))
|
131
122
|
continue
|
132
|
-
|
133
|
-
|
123
|
+
else:
|
124
|
+
with open(conf_path) as partial_file:
|
125
|
+
partial_dict: dict = yaml.safe_load(partial_file)
|
134
126
|
try:
|
135
127
|
conf_dict = deep_merge(conf_dict, partial_dict)
|
136
128
|
except Exception:
|
@@ -227,13 +219,13 @@ def get_ceph_binary_url(package=None,
|
|
227
219
|
assert tag is None, "cannot set both sha1 and tag"
|
228
220
|
else:
|
229
221
|
# gitbuilder uses remote-style ref names for branches, mangled to
|
230
|
-
# have underscores instead of slashes; e.g.
|
222
|
+
# have underscores instead of slashes; e.g. origin_main
|
231
223
|
if tag is not None:
|
232
224
|
ref = tag
|
233
225
|
assert branch is None, "cannot set both branch and tag"
|
234
226
|
else:
|
235
227
|
if branch is None:
|
236
|
-
branch = '
|
228
|
+
branch = 'main'
|
237
229
|
ref = branch
|
238
230
|
|
239
231
|
sha1_url = urljoin(BASE, 'ref/{ref}/sha1'.format(ref=ref))
|
@@ -740,8 +732,8 @@ def pull_directory(remote, remotedir, localdir, write_to=copy_fileobj):
|
|
740
732
|
remote.shortname, remotedir, localdir)
|
741
733
|
if not os.path.exists(localdir):
|
742
734
|
os.mkdir(localdir)
|
743
|
-
r = remote.get_tar_stream(remotedir, sudo=True)
|
744
|
-
tar = tarfile.open(mode='r|
|
735
|
+
r = remote.get_tar_stream(remotedir, sudo=True, compress=False)
|
736
|
+
tar = tarfile.open(mode='r|', fileobj=r.stdout)
|
745
737
|
while True:
|
746
738
|
ti = tar.next()
|
747
739
|
if ti is None:
|
@@ -778,7 +770,7 @@ def pull_directory_tarball(remote, remotedir, localfile):
|
|
778
770
|
|
779
771
|
|
780
772
|
def get_wwn_id_map(remote, devs):
|
781
|
-
log.
|
773
|
+
log.warning("Entering get_wwn_id_map, a deprecated function that will be removed")
|
782
774
|
return dict((d, d) for d in devs)
|
783
775
|
|
784
776
|
|
@@ -797,33 +789,31 @@ def get_scratch_devices(remote):
|
|
797
789
|
for dev in devs:
|
798
790
|
if 'vda' in dev:
|
799
791
|
devs.remove(dev)
|
800
|
-
log.
|
792
|
+
log.warning("Removing root device: %s from device list" % dev)
|
801
793
|
|
802
794
|
log.debug('devs={d}'.format(d=devs))
|
803
795
|
|
804
796
|
retval = []
|
805
797
|
for dev in devs:
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
]
|
823
|
-
)
|
798
|
+
dev_checks = [
|
799
|
+
[['stat', dev], "does not exist"],
|
800
|
+
[['sudo', 'dd', 'if=%s' % dev, 'of=/dev/null', 'count=1'], "is not readable"],
|
801
|
+
[
|
802
|
+
[run.Raw('!'), 'mount', run.Raw('|'), 'grep', '-v', 'devtmpfs', run.Raw('|'),
|
803
|
+
'grep', '-q', dev],
|
804
|
+
"is in use"
|
805
|
+
],
|
806
|
+
]
|
807
|
+
for args, msg in dev_checks:
|
808
|
+
try:
|
809
|
+
remote.run(args=args)
|
810
|
+
except CommandFailedError:
|
811
|
+
log.debug(f"get_scratch_devices: {dev} {msg}")
|
812
|
+
break
|
813
|
+
else:
|
824
814
|
retval.append(dev)
|
825
|
-
|
826
|
-
|
815
|
+
continue
|
816
|
+
break
|
827
817
|
return retval
|
828
818
|
|
829
819
|
|
@@ -995,7 +985,8 @@ def replace_all_with_clients(cluster, config):
|
|
995
985
|
return norm_config
|
996
986
|
|
997
987
|
|
998
|
-
|
988
|
+
DeepMerge = TypeVar('DeepMerge')
|
989
|
+
def deep_merge(a: DeepMerge, b: DeepMerge) -> DeepMerge:
|
999
990
|
"""
|
1000
991
|
Deep Merge. If a and b are both lists, all elements in b are
|
1001
992
|
added into a. If a and b are both dictionaries, elements in b are
|
@@ -1003,10 +994,10 @@ def deep_merge(a, b):
|
|
1003
994
|
:param a: object items will be merged into
|
1004
995
|
:param b: object items will be merged from
|
1005
996
|
"""
|
1006
|
-
if a is None:
|
1007
|
-
return b
|
1008
997
|
if b is None:
|
1009
998
|
return a
|
999
|
+
if a is None:
|
1000
|
+
return deep_merge(b.__class__(), b)
|
1010
1001
|
if isinstance(a, list):
|
1011
1002
|
assert isinstance(b, list)
|
1012
1003
|
a.extend(b)
|
@@ -1014,10 +1005,7 @@ def deep_merge(a, b):
|
|
1014
1005
|
if isinstance(a, dict):
|
1015
1006
|
assert isinstance(b, dict)
|
1016
1007
|
for (k, v) in b.items():
|
1017
|
-
|
1018
|
-
a[k] = deep_merge(a[k], v)
|
1019
|
-
else:
|
1020
|
-
a[k] = v
|
1008
|
+
a[k] = deep_merge(a.get(k), v)
|
1021
1009
|
return a
|
1022
1010
|
return b
|
1023
1011
|
|
@@ -1096,7 +1084,8 @@ def ssh_keyscan(hostnames, _raise=True):
|
|
1096
1084
|
for hostname in hostnames:
|
1097
1085
|
with safe_while(
|
1098
1086
|
sleep=1,
|
1099
|
-
tries=
|
1087
|
+
tries=15 if _raise else 1,
|
1088
|
+
increment=1,
|
1100
1089
|
_raise=_raise,
|
1101
1090
|
action="ssh_keyscan " + hostname,
|
1102
1091
|
) as proceed:
|
@@ -1109,7 +1098,7 @@ def ssh_keyscan(hostnames, _raise=True):
|
|
1109
1098
|
missing = set(hostnames) - set(keys_dict.keys())
|
1110
1099
|
msg = "Unable to scan these host keys: %s" % ' '.join(missing)
|
1111
1100
|
if not _raise:
|
1112
|
-
log.
|
1101
|
+
log.warning(msg)
|
1113
1102
|
else:
|
1114
1103
|
raise RuntimeError(msg)
|
1115
1104
|
return keys_dict
|
@@ -1122,7 +1111,7 @@ def _ssh_keyscan(hostname):
|
|
1122
1111
|
:param hostname: The hostname
|
1123
1112
|
:returns: The host key
|
1124
1113
|
"""
|
1125
|
-
args = ['ssh-keyscan', '-T', '1',
|
1114
|
+
args = ['ssh-keyscan', '-T', '1', hostname]
|
1126
1115
|
p = subprocess.Popen(
|
1127
1116
|
args=args,
|
1128
1117
|
stdout=subprocess.PIPE,
|
@@ -1134,9 +1123,12 @@ def _ssh_keyscan(hostname):
|
|
1134
1123
|
line = line.strip()
|
1135
1124
|
if line and not line.startswith('#'):
|
1136
1125
|
log.error(line)
|
1126
|
+
keys = list()
|
1137
1127
|
for line in p.stdout:
|
1138
1128
|
host, key = line.strip().decode().split(' ', 1)
|
1139
|
-
|
1129
|
+
keys.append(key)
|
1130
|
+
if len(keys) > 0:
|
1131
|
+
return sorted(keys)[0]
|
1140
1132
|
|
1141
1133
|
|
1142
1134
|
def ssh_keyscan_wait(hostname):
|
@@ -1177,29 +1169,19 @@ def stop_daemons_of_type(ctx, type_, cluster='ceph'):
|
|
1177
1169
|
def get_system_type(remote, distro=False, version=False):
|
1178
1170
|
"""
|
1179
1171
|
If distro, return distro.
|
1180
|
-
If version, return version
|
1172
|
+
If version, return version
|
1181
1173
|
If both, return both.
|
1182
1174
|
If neither, return 'deb' or 'rpm' if distro is known to be one of those
|
1183
|
-
Finally, if unknown, return the unfiltered distro (from lsb_release -is)
|
1184
1175
|
"""
|
1185
|
-
system_value = remote.sh('sudo lsb_release -is').strip()
|
1186
|
-
log.debug("System to be installed: %s" % system_value)
|
1187
1176
|
if version:
|
1188
|
-
version = remote.
|
1177
|
+
version = remote.os.version
|
1189
1178
|
if distro and version:
|
1190
|
-
return
|
1179
|
+
return remote.os.name, version
|
1191
1180
|
if distro:
|
1192
|
-
return
|
1181
|
+
return remote.os.name
|
1193
1182
|
if version:
|
1194
1183
|
return version
|
1195
|
-
|
1196
|
-
return "deb"
|
1197
|
-
if system_value in ['CentOS', 'Fedora', 'RedHatEnterpriseServer',
|
1198
|
-
'RedHatEnterprise',
|
1199
|
-
'CentOSStream',
|
1200
|
-
'openSUSE', 'openSUSE project', 'SUSE', 'SUSE LINUX']:
|
1201
|
-
return "rpm"
|
1202
|
-
return system_value
|
1184
|
+
return remote.os.package_type
|
1203
1185
|
|
1204
1186
|
def get_pkg_type(os_type):
|
1205
1187
|
if os_type in ('centos', 'fedora', 'opensuse', 'rhel', 'sle'):
|
@@ -1308,7 +1290,7 @@ def sh(command, log_limit=1024, cwd=None, env=None):
|
|
1308
1290
|
for line in proc.stdout:
|
1309
1291
|
line = line.decode()
|
1310
1292
|
lines.append(line)
|
1311
|
-
line = line.
|
1293
|
+
line = line.rstrip()
|
1312
1294
|
if len(line) > log_limit:
|
1313
1295
|
truncated = True
|
1314
1296
|
log.debug(line[:log_limit] +
|
@@ -1336,6 +1318,8 @@ def add_remote_path(ctx, local_dir, remote_dir):
|
|
1336
1318
|
Add key/value pair (local_dir: remote_dir) to job's info.yaml.
|
1337
1319
|
These key/value pairs are read to archive them in case of job timeout.
|
1338
1320
|
"""
|
1321
|
+
if ctx.archive is None:
|
1322
|
+
return
|
1339
1323
|
with open(os.path.join(ctx.archive, 'info.yaml'), 'r+') as info_file:
|
1340
1324
|
info_yaml = yaml.safe_load(info_file)
|
1341
1325
|
info_file.seek(0)
|
@@ -1351,6 +1335,8 @@ def archive_logs(ctx, remote_path, log_path):
|
|
1351
1335
|
Archive directories from all nodes in a cliuster. It pulls all files in
|
1352
1336
|
remote_path dir to job's archive dir under log_path dir.
|
1353
1337
|
"""
|
1338
|
+
if ctx.archive is None:
|
1339
|
+
return
|
1354
1340
|
path = os.path.join(ctx.archive, 'remote')
|
1355
1341
|
os.makedirs(path, exist_ok=True)
|
1356
1342
|
for remote in ctx.cluster.remotes.keys():
|
@@ -1370,7 +1356,7 @@ def compress_logs(ctx, remote_dir):
|
|
1370
1356
|
run.wait(
|
1371
1357
|
ctx.cluster.run(
|
1372
1358
|
args=(f"sudo find {remote_dir} -name *.log -print0 | "
|
1373
|
-
f"sudo xargs -0 --no-run-if-empty -- gzip --"),
|
1359
|
+
f"sudo xargs --max-args=1 --max-procs=0 --verbose -0 --no-run-if-empty -- gzip -5 --verbose --"),
|
1374
1360
|
wait=False,
|
1375
1361
|
),
|
1376
1362
|
)
|
teuthology/nuke/__init__.py
CHANGED
@@ -1,361 +1,20 @@
|
|
1
|
-
import argparse
|
2
|
-
import datetime
|
3
|
-
import json
|
4
1
|
import logging
|
5
|
-
import os
|
6
|
-
import subprocess
|
7
|
-
|
8
|
-
import yaml
|
9
|
-
|
10
|
-
import teuthology
|
11
|
-
from teuthology import provision
|
12
|
-
from teuthology.lock.ops import unlock_one
|
13
|
-
from teuthology.lock.query import is_vm, list_locks, \
|
14
|
-
find_stale_locks, get_status
|
15
|
-
from teuthology.lock.util import locked_since_seconds
|
16
|
-
from teuthology.nuke.actions import (
|
17
|
-
check_console, clear_firewall, shutdown_daemons, remove_installed_packages,
|
18
|
-
reboot, remove_osd_mounts, remove_osd_tmpfs, kill_hadoop,
|
19
|
-
remove_ceph_packages, synch_clocks, unlock_firmware_repo,
|
20
|
-
remove_configuration_files, undo_multipath, reset_syslog_dir,
|
21
|
-
remove_ceph_data, remove_testing_tree, remove_yum_timedhosts,
|
22
|
-
kill_valgrind,
|
23
|
-
)
|
24
|
-
from teuthology.config import config, FakeNamespace
|
25
|
-
from teuthology.misc import (
|
26
|
-
canonicalize_hostname, config_file, decanonicalize_hostname, merge_configs,
|
27
|
-
get_user, sh
|
28
|
-
)
|
29
|
-
from teuthology.openstack import OpenStack, OpenStackInstance, enforce_json_dictionary
|
30
|
-
from teuthology.orchestra.remote import Remote
|
31
|
-
from teuthology.parallel import parallel
|
32
|
-
from teuthology.task.internal import check_lock, add_remotes, connect
|
33
2
|
|
34
3
|
log = logging.getLogger(__name__)
|
35
4
|
|
36
5
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
def openstack_volume_name(volume):
|
42
|
-
return (volume.get('Display Name') or
|
43
|
-
volume.get('display_name') or
|
44
|
-
volume.get('Name') or
|
45
|
-
volume.get('name') or "")
|
46
|
-
|
47
|
-
|
48
|
-
def stale_openstack(ctx):
|
49
|
-
targets = dict(map(lambda i: (i['ID'], i),
|
50
|
-
OpenStack.list_instances()))
|
51
|
-
nodes = list_locks(keyed_by_name=True, locked=True)
|
52
|
-
stale_openstack_instances(ctx, targets, nodes)
|
53
|
-
stale_openstack_nodes(ctx, targets, nodes)
|
54
|
-
stale_openstack_volumes(ctx, OpenStack.list_volumes())
|
55
|
-
if not ctx.dry_run:
|
56
|
-
openstack_remove_again()
|
57
|
-
|
58
|
-
#
|
59
|
-
# A delay, in seconds, that is significantly longer than
|
60
|
-
# any kind of OpenStack server creation / deletion / etc.
|
61
|
-
#
|
62
|
-
OPENSTACK_DELAY = 30 * 60
|
63
|
-
|
64
|
-
|
65
|
-
def stale_openstack_instances(ctx, instances, locked_nodes):
|
66
|
-
for (instance_id, instance) in instances.items():
|
67
|
-
i = OpenStackInstance(instance_id)
|
68
|
-
if not i.exists():
|
69
|
-
log.debug("stale-openstack: {instance} disappeared, ignored"
|
70
|
-
.format(instance=instance_id))
|
71
|
-
continue
|
72
|
-
if (i.get_created() >
|
73
|
-
config['max_job_time'] + OPENSTACK_DELAY):
|
74
|
-
log.info(
|
75
|
-
"stale-openstack: destroying instance {instance}"
|
76
|
-
" because it was created {created} seconds ago"
|
77
|
-
" which is older than"
|
78
|
-
" max_job_time {max_job_time} + {delay}"
|
79
|
-
.format(instance=i['name'],
|
80
|
-
created=i.get_created(),
|
81
|
-
max_job_time=config['max_job_time'],
|
82
|
-
delay=OPENSTACK_DELAY))
|
83
|
-
if not ctx.dry_run:
|
84
|
-
i.destroy()
|
85
|
-
continue
|
86
|
-
name = canonicalize_hostname(i['name'], user=None)
|
87
|
-
if i.get_created() > OPENSTACK_DELAY and name not in locked_nodes:
|
88
|
-
log.info("stale-openstack: destroying instance {instance}"
|
89
|
-
" because it was created {created} seconds ago"
|
90
|
-
" is older than {delay}s and it is not locked"
|
91
|
-
.format(instance=i['name'],
|
92
|
-
created=i.get_created(),
|
93
|
-
delay=OPENSTACK_DELAY))
|
94
|
-
if not ctx.dry_run:
|
95
|
-
i.destroy()
|
96
|
-
continue
|
97
|
-
log.debug("stale-openstack: instance " + i['name'] + " OK")
|
98
|
-
|
99
|
-
|
100
|
-
def openstack_delete_volume(id):
|
101
|
-
OpenStack().run("volume delete " + id + " || true")
|
102
|
-
|
103
|
-
|
104
|
-
def stale_openstack_volumes(ctx, volumes):
|
105
|
-
now = datetime.datetime.now()
|
106
|
-
for volume in volumes:
|
107
|
-
volume_id = openstack_volume_id(volume)
|
108
|
-
try:
|
109
|
-
volume = json.loads(OpenStack().run("volume show -f json " +
|
110
|
-
volume_id))
|
111
|
-
except subprocess.CalledProcessError:
|
112
|
-
log.debug("stale-openstack: {id} disappeared, ignored"
|
113
|
-
.format(id=volume_id))
|
114
|
-
continue
|
115
|
-
volume_name = openstack_volume_name(volume)
|
116
|
-
enforce_json_dictionary(volume)
|
117
|
-
created_at = datetime.datetime.strptime(
|
118
|
-
volume['created_at'], '%Y-%m-%dT%H:%M:%S.%f')
|
119
|
-
created = (now - created_at).total_seconds()
|
120
|
-
if created > config['max_job_time'] + OPENSTACK_DELAY:
|
121
|
-
log.info(
|
122
|
-
"stale-openstack: destroying volume {volume}({id})"
|
123
|
-
" because it was created {created} seconds ago"
|
124
|
-
" which is older than"
|
125
|
-
" max_job_time {max_job_time} + {delay}"
|
126
|
-
.format(volume=volume_name,
|
127
|
-
id=volume_id,
|
128
|
-
created=created,
|
129
|
-
max_job_time=config['max_job_time'],
|
130
|
-
delay=OPENSTACK_DELAY))
|
131
|
-
if not ctx.dry_run:
|
132
|
-
openstack_delete_volume(volume_id)
|
133
|
-
continue
|
134
|
-
log.debug("stale-openstack: volume " + volume_id + " OK")
|
135
|
-
|
136
|
-
|
137
|
-
def stale_openstack_nodes(ctx, instances, locked_nodes):
|
138
|
-
names = set([ i['Name'] for i in instances.values() ])
|
139
|
-
for (name, node) in locked_nodes.items():
|
140
|
-
name = decanonicalize_hostname(name)
|
141
|
-
if node['machine_type'] != 'openstack':
|
142
|
-
continue
|
143
|
-
if (name not in names and
|
144
|
-
locked_since_seconds(node) > OPENSTACK_DELAY):
|
145
|
-
log.info("stale-openstack: unlocking node {name} unlocked"
|
146
|
-
" because it was created {created}"
|
147
|
-
" seconds ago which is older than {delay}"
|
148
|
-
" and it has no instance"
|
149
|
-
.format(name=name,
|
150
|
-
created=locked_since_seconds(node),
|
151
|
-
delay=OPENSTACK_DELAY))
|
152
|
-
if not ctx.dry_run:
|
153
|
-
unlock_one(ctx, name, node['locked_by'])
|
154
|
-
continue
|
155
|
-
log.debug("stale-openstack: node " + name + " OK")
|
156
|
-
|
157
|
-
|
158
|
-
def openstack_remove_again():
|
6
|
+
# This is being kept because ceph.git/qa/tasks/cephfs/filesystem.py references it.
|
7
|
+
def clear_firewall(ctx):
|
159
8
|
"""
|
160
|
-
|
161
|
-
|
162
|
-
|
9
|
+
Remove any iptables rules created by teuthology. These rules are
|
10
|
+
identified by containing a comment with 'teuthology' in it. Non-teuthology
|
11
|
+
firewall rules are unaffected.
|
163
12
|
"""
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
remove_me = [openstack_volume_id(v) for v in volumes
|
171
|
-
if 'REMOVE-ME' in openstack_volume_name(v)]
|
172
|
-
for i in remove_me:
|
173
|
-
log.info("Trying to remove stale volume %s" % i)
|
174
|
-
openstack_delete_volume(i)
|
175
|
-
|
176
|
-
|
177
|
-
def main(args):
|
178
|
-
ctx = FakeNamespace(args)
|
179
|
-
if ctx.verbose:
|
180
|
-
teuthology.log.setLevel(logging.DEBUG)
|
181
|
-
|
182
|
-
info = {}
|
183
|
-
if ctx.archive:
|
184
|
-
ctx.config = config_file(ctx.archive + '/config.yaml')
|
185
|
-
ifn = os.path.join(ctx.archive, 'info.yaml')
|
186
|
-
if os.path.exists(ifn):
|
187
|
-
with open(ifn, 'r') as fd:
|
188
|
-
info = yaml.safe_load(fd.read())
|
189
|
-
if not ctx.pid:
|
190
|
-
ctx.pid = info.get('pid')
|
191
|
-
if not ctx.pid:
|
192
|
-
ctx.pid = int(open(ctx.archive + '/pid').read().rstrip('\n'))
|
193
|
-
if not ctx.owner:
|
194
|
-
ctx.owner = info.get('owner')
|
195
|
-
if not ctx.owner:
|
196
|
-
ctx.owner = open(ctx.archive + '/owner').read().rstrip('\n')
|
197
|
-
|
198
|
-
if ctx.targets:
|
199
|
-
ctx.config = merge_configs(ctx.targets)
|
200
|
-
|
201
|
-
if ctx.stale:
|
202
|
-
stale_nodes = find_stale_locks(ctx.owner)
|
203
|
-
targets = dict()
|
204
|
-
for node in stale_nodes:
|
205
|
-
targets[node['name']] = node['ssh_pub_key']
|
206
|
-
ctx.config = dict(targets=targets)
|
207
|
-
|
208
|
-
if ctx.stale_openstack:
|
209
|
-
stale_openstack(ctx)
|
210
|
-
return
|
211
|
-
|
212
|
-
log.info(
|
213
|
-
'\n '.join(
|
214
|
-
['targets:', ] + yaml.safe_dump(
|
215
|
-
ctx.config['targets'],
|
216
|
-
default_flow_style=False).splitlines()))
|
217
|
-
|
218
|
-
if ctx.dry_run:
|
219
|
-
log.info("Not actually nuking anything since --dry-run was passed")
|
220
|
-
return
|
221
|
-
|
222
|
-
if ctx.owner is None:
|
223
|
-
ctx.owner = get_user()
|
224
|
-
|
225
|
-
if ctx.pid:
|
226
|
-
if ctx.archive:
|
227
|
-
log.info('Killing teuthology process at pid %d', ctx.pid)
|
228
|
-
os.system('grep -q %s /proc/%d/cmdline && sudo kill %d' % (
|
229
|
-
ctx.archive,
|
230
|
-
ctx.pid,
|
231
|
-
ctx.pid))
|
232
|
-
else:
|
233
|
-
subprocess.check_call(["kill", "-9", str(ctx.pid)])
|
234
|
-
|
235
|
-
nuke(ctx, ctx.unlock, ctx.synch_clocks, ctx.noipmi, ctx.keep_logs, not ctx.no_reboot)
|
236
|
-
|
237
|
-
|
238
|
-
def nuke(ctx, should_unlock, sync_clocks=True, noipmi=False, keep_logs=False, should_reboot=True):
|
239
|
-
if 'targets' not in ctx.config:
|
240
|
-
return
|
241
|
-
total_unnuked = {}
|
242
|
-
targets = dict(ctx.config['targets'])
|
243
|
-
if ctx.name:
|
244
|
-
log.info('Checking targets against current locks')
|
245
|
-
locks = list_locks()
|
246
|
-
# Remove targets who's description doesn't match archive name.
|
247
|
-
for lock in locks:
|
248
|
-
for target in targets:
|
249
|
-
if target == lock['name']:
|
250
|
-
if ctx.name not in lock['description']:
|
251
|
-
del ctx.config['targets'][lock['name']]
|
252
|
-
log.info(
|
253
|
-
"Not nuking %s because description doesn't match",
|
254
|
-
lock['name'])
|
255
|
-
with parallel() as p:
|
256
|
-
for target, hostkey in ctx.config['targets'].items():
|
257
|
-
p.spawn(
|
258
|
-
nuke_one,
|
259
|
-
ctx,
|
260
|
-
{target: hostkey},
|
261
|
-
should_unlock,
|
262
|
-
sync_clocks,
|
263
|
-
ctx.config.get('check-locks', True),
|
264
|
-
noipmi,
|
265
|
-
keep_logs,
|
266
|
-
should_reboot,
|
267
|
-
)
|
268
|
-
for unnuked in p:
|
269
|
-
if unnuked:
|
270
|
-
total_unnuked.update(unnuked)
|
271
|
-
if total_unnuked:
|
272
|
-
log.error('Could not nuke the following targets:\n' +
|
273
|
-
'\n '.join(['targets:', ] +
|
274
|
-
yaml.safe_dump(
|
275
|
-
total_unnuked,
|
276
|
-
default_flow_style=False).splitlines()))
|
277
|
-
|
278
|
-
|
279
|
-
def nuke_one(ctx, target, should_unlock, synch_clocks,
|
280
|
-
check_locks, noipmi, keep_logs, should_reboot):
|
281
|
-
ret = None
|
282
|
-
ctx = argparse.Namespace(
|
283
|
-
config=dict(targets=target),
|
284
|
-
owner=ctx.owner,
|
285
|
-
check_locks=check_locks,
|
286
|
-
synch_clocks=synch_clocks,
|
287
|
-
teuthology_config=config.to_dict(),
|
288
|
-
name=ctx.name,
|
289
|
-
noipmi=noipmi,
|
13
|
+
log.info("Clearing teuthology firewall rules...")
|
14
|
+
ctx.cluster.run(
|
15
|
+
args=[
|
16
|
+
"sudo", "sh", "-c",
|
17
|
+
"iptables-save | grep -v teuthology | iptables-restore"
|
18
|
+
],
|
290
19
|
)
|
291
|
-
|
292
|
-
nuke_helper(ctx, should_unlock, keep_logs, should_reboot)
|
293
|
-
except Exception:
|
294
|
-
log.exception('Could not nuke %s' % target)
|
295
|
-
# not re-raising the so that parallel calls aren't killed
|
296
|
-
ret = target
|
297
|
-
else:
|
298
|
-
if should_unlock:
|
299
|
-
unlock_one(ctx, list(target.keys())[0], ctx.owner)
|
300
|
-
return ret
|
301
|
-
|
302
|
-
|
303
|
-
def nuke_helper(ctx, should_unlock, keep_logs, should_reboot):
|
304
|
-
# ensure node is up with ipmi
|
305
|
-
(target,) = ctx.config['targets'].keys()
|
306
|
-
host = target.split('@')[-1]
|
307
|
-
shortname = host.split('.')[0]
|
308
|
-
if should_unlock:
|
309
|
-
if is_vm(shortname):
|
310
|
-
return
|
311
|
-
log.debug('shortname: %s' % shortname)
|
312
|
-
if ctx.check_locks:
|
313
|
-
# does not check to ensure if the node is 'up'
|
314
|
-
# we want to be able to nuke a downed node
|
315
|
-
check_lock.check_lock(ctx, None, check_up=False)
|
316
|
-
status = get_status(host)
|
317
|
-
if status['machine_type'] in provision.fog.get_types():
|
318
|
-
remote = Remote(host)
|
319
|
-
remote.console.power_off()
|
320
|
-
return
|
321
|
-
elif status['machine_type'] in provision.pelagos.get_types():
|
322
|
-
provision.pelagos.park_node(host)
|
323
|
-
return
|
324
|
-
|
325
|
-
if (not ctx.noipmi and 'ipmi_user' in config and
|
326
|
-
'vpm' not in shortname):
|
327
|
-
try:
|
328
|
-
check_console(host)
|
329
|
-
except Exception:
|
330
|
-
log.exception('')
|
331
|
-
log.info("Will attempt to connect via SSH")
|
332
|
-
remote = Remote(host)
|
333
|
-
remote.connect()
|
334
|
-
add_remotes(ctx, None)
|
335
|
-
connect(ctx, None)
|
336
|
-
clear_firewall(ctx)
|
337
|
-
shutdown_daemons(ctx)
|
338
|
-
kill_valgrind(ctx)
|
339
|
-
# Try to remove packages before reboot
|
340
|
-
remove_installed_packages(ctx)
|
341
|
-
remotes = ctx.cluster.remotes.keys()
|
342
|
-
if should_reboot:
|
343
|
-
reboot(ctx, remotes)
|
344
|
-
# shutdown daemons again incase of startup
|
345
|
-
shutdown_daemons(ctx)
|
346
|
-
remove_osd_mounts(ctx)
|
347
|
-
remove_osd_tmpfs(ctx)
|
348
|
-
kill_hadoop(ctx)
|
349
|
-
remove_ceph_packages(ctx)
|
350
|
-
synch_clocks(remotes)
|
351
|
-
unlock_firmware_repo(ctx)
|
352
|
-
remove_configuration_files(ctx)
|
353
|
-
undo_multipath(ctx)
|
354
|
-
reset_syslog_dir(ctx)
|
355
|
-
remove_ceph_data(ctx)
|
356
|
-
if not keep_logs:
|
357
|
-
remove_testing_tree(ctx)
|
358
|
-
remove_yum_timedhosts(ctx)
|
359
|
-
# Once again remove packages after reboot
|
360
|
-
remove_installed_packages(ctx)
|
361
|
-
log.info('Installed packages removed.')
|
20
|
+
log.info("Cleared teuthology firewall rules.")
|