teuthology 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scripts/describe.py +1 -0
- scripts/dispatcher.py +62 -0
- scripts/exporter.py +18 -0
- scripts/lock.py +1 -1
- scripts/node_cleanup.py +58 -0
- scripts/openstack.py +9 -9
- scripts/results.py +12 -11
- scripts/run.py +4 -0
- scripts/schedule.py +4 -0
- scripts/suite.py +61 -16
- scripts/supervisor.py +44 -0
- scripts/update_inventory.py +10 -4
- scripts/wait.py +31 -0
- teuthology/__init__.py +24 -21
- teuthology/beanstalk.py +4 -3
- teuthology/config.py +17 -6
- teuthology/contextutil.py +18 -14
- teuthology/describe_tests.py +25 -18
- teuthology/dispatcher/__init__.py +365 -0
- teuthology/dispatcher/supervisor.py +374 -0
- teuthology/exceptions.py +54 -0
- teuthology/exporter.py +347 -0
- teuthology/kill.py +76 -75
- teuthology/lock/cli.py +16 -7
- teuthology/lock/ops.py +276 -70
- teuthology/lock/query.py +61 -44
- teuthology/ls.py +9 -18
- teuthology/misc.py +152 -137
- teuthology/nuke/__init__.py +12 -351
- teuthology/openstack/__init__.py +4 -3
- teuthology/openstack/openstack-centos-7.0-user-data.txt +1 -1
- teuthology/openstack/openstack-centos-7.1-user-data.txt +1 -1
- teuthology/openstack/openstack-centos-7.2-user-data.txt +1 -1
- teuthology/openstack/openstack-debian-8.0-user-data.txt +1 -1
- teuthology/openstack/openstack-opensuse-42.1-user-data.txt +1 -1
- teuthology/openstack/openstack-teuthology.cron +0 -1
- teuthology/orchestra/cluster.py +51 -9
- teuthology/orchestra/connection.py +23 -16
- teuthology/orchestra/console.py +111 -50
- teuthology/orchestra/daemon/cephadmunit.py +23 -5
- teuthology/orchestra/daemon/state.py +10 -3
- teuthology/orchestra/daemon/systemd.py +10 -8
- teuthology/orchestra/opsys.py +32 -11
- teuthology/orchestra/remote.py +369 -152
- teuthology/orchestra/run.py +21 -12
- teuthology/packaging.py +54 -15
- teuthology/provision/__init__.py +30 -10
- teuthology/provision/cloud/openstack.py +12 -6
- teuthology/provision/cloud/util.py +1 -2
- teuthology/provision/downburst.py +83 -29
- teuthology/provision/fog.py +68 -20
- teuthology/provision/openstack.py +5 -4
- teuthology/provision/pelagos.py +13 -5
- teuthology/repo_utils.py +91 -44
- teuthology/report.py +57 -35
- teuthology/results.py +5 -3
- teuthology/run.py +21 -15
- teuthology/run_tasks.py +114 -40
- teuthology/schedule.py +4 -3
- teuthology/scrape.py +28 -22
- teuthology/suite/__init__.py +75 -46
- teuthology/suite/build_matrix.py +34 -24
- teuthology/suite/fragment-merge.lua +105 -0
- teuthology/suite/matrix.py +31 -2
- teuthology/suite/merge.py +175 -0
- teuthology/suite/placeholder.py +8 -8
- teuthology/suite/run.py +204 -102
- teuthology/suite/util.py +67 -211
- teuthology/task/__init__.py +1 -1
- teuthology/task/ansible.py +101 -31
- teuthology/task/buildpackages.py +2 -2
- teuthology/task/ceph_ansible.py +13 -6
- teuthology/task/cephmetrics.py +2 -1
- teuthology/task/clock.py +33 -14
- teuthology/task/exec.py +18 -0
- teuthology/task/hadoop.py +2 -2
- teuthology/task/install/__init__.py +51 -22
- teuthology/task/install/bin/adjust-ulimits +16 -0
- teuthology/task/install/bin/daemon-helper +114 -0
- teuthology/task/install/bin/stdin-killer +263 -0
- teuthology/task/install/deb.py +24 -4
- teuthology/task/install/redhat.py +36 -32
- teuthology/task/install/rpm.py +41 -14
- teuthology/task/install/util.py +48 -22
- teuthology/task/internal/__init__.py +69 -11
- teuthology/task/internal/edit_sudoers.sh +10 -0
- teuthology/task/internal/lock_machines.py +3 -133
- teuthology/task/internal/redhat.py +48 -28
- teuthology/task/internal/syslog.py +31 -8
- teuthology/task/kernel.py +155 -147
- teuthology/task/lockfile.py +1 -1
- teuthology/task/mpi.py +10 -10
- teuthology/task/pcp.py +1 -1
- teuthology/task/selinux.py +17 -8
- teuthology/task/ssh_keys.py +6 -6
- teuthology/task/tests/__init__.py +137 -77
- teuthology/task/tests/test_fetch_coredumps.py +116 -0
- teuthology/task/tests/test_run.py +4 -4
- teuthology/timer.py +3 -3
- teuthology/util/loggerfile.py +19 -0
- teuthology/util/scanner.py +159 -0
- teuthology/util/sentry.py +52 -0
- teuthology/util/time.py +52 -0
- teuthology-1.2.0.data/scripts/adjust-ulimits +16 -0
- teuthology-1.2.0.data/scripts/daemon-helper +114 -0
- teuthology-1.2.0.data/scripts/stdin-killer +263 -0
- teuthology-1.2.0.dist-info/METADATA +89 -0
- teuthology-1.2.0.dist-info/RECORD +174 -0
- {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/WHEEL +1 -1
- {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/entry_points.txt +5 -2
- scripts/nuke.py +0 -45
- scripts/worker.py +0 -37
- teuthology/nuke/actions.py +0 -456
- teuthology/openstack/test/__init__.py +0 -0
- teuthology/openstack/test/openstack-integration.py +0 -286
- teuthology/openstack/test/test_config.py +0 -35
- teuthology/openstack/test/test_openstack.py +0 -1695
- teuthology/orchestra/test/__init__.py +0 -0
- teuthology/orchestra/test/integration/__init__.py +0 -0
- teuthology/orchestra/test/integration/test_integration.py +0 -94
- teuthology/orchestra/test/test_cluster.py +0 -240
- teuthology/orchestra/test/test_connection.py +0 -106
- teuthology/orchestra/test/test_console.py +0 -217
- teuthology/orchestra/test/test_opsys.py +0 -404
- teuthology/orchestra/test/test_remote.py +0 -185
- teuthology/orchestra/test/test_run.py +0 -286
- teuthology/orchestra/test/test_systemd.py +0 -54
- teuthology/orchestra/test/util.py +0 -12
- teuthology/sentry.py +0 -18
- teuthology/test/__init__.py +0 -0
- teuthology/test/fake_archive.py +0 -107
- teuthology/test/fake_fs.py +0 -92
- teuthology/test/integration/__init__.py +0 -0
- teuthology/test/integration/test_suite.py +0 -86
- teuthology/test/task/__init__.py +0 -205
- teuthology/test/task/test_ansible.py +0 -624
- teuthology/test/task/test_ceph_ansible.py +0 -176
- teuthology/test/task/test_console_log.py +0 -88
- teuthology/test/task/test_install.py +0 -337
- teuthology/test/task/test_internal.py +0 -57
- teuthology/test/task/test_kernel.py +0 -243
- teuthology/test/task/test_pcp.py +0 -379
- teuthology/test/task/test_selinux.py +0 -35
- teuthology/test/test_config.py +0 -189
- teuthology/test/test_contextutil.py +0 -68
- teuthology/test/test_describe_tests.py +0 -316
- teuthology/test/test_email_sleep_before_teardown.py +0 -81
- teuthology/test/test_exit.py +0 -97
- teuthology/test/test_get_distro.py +0 -47
- teuthology/test/test_get_distro_version.py +0 -47
- teuthology/test/test_get_multi_machine_types.py +0 -27
- teuthology/test/test_job_status.py +0 -60
- teuthology/test/test_ls.py +0 -48
- teuthology/test/test_misc.py +0 -368
- teuthology/test/test_nuke.py +0 -232
- teuthology/test/test_packaging.py +0 -763
- teuthology/test/test_parallel.py +0 -28
- teuthology/test/test_repo_utils.py +0 -204
- teuthology/test/test_report.py +0 -77
- teuthology/test/test_results.py +0 -155
- teuthology/test/test_run.py +0 -238
- teuthology/test/test_safepath.py +0 -55
- teuthology/test/test_schedule.py +0 -45
- teuthology/test/test_scrape.py +0 -167
- teuthology/test/test_timer.py +0 -80
- teuthology/test/test_vps_os_vers_parameter_checking.py +0 -84
- teuthology/test/test_worker.py +0 -303
- teuthology/worker.py +0 -339
- teuthology-1.0.0.dist-info/METADATA +0 -76
- teuthology-1.0.0.dist-info/RECORD +0 -210
- {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/LICENSE +0 -0
- {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/top_level.txt +0 -0
teuthology/nuke/__init__.py
CHANGED
@@ -1,359 +1,20 @@
|
|
1
|
-
import argparse
|
2
|
-
import datetime
|
3
|
-
import json
|
4
1
|
import logging
|
5
|
-
import os
|
6
|
-
import subprocess
|
7
|
-
|
8
|
-
import yaml
|
9
|
-
|
10
|
-
import teuthology
|
11
|
-
from teuthology import provision
|
12
|
-
from teuthology.lock.ops import unlock_one
|
13
|
-
from teuthology.lock.query import is_vm, list_locks, \
|
14
|
-
find_stale_locks, get_status
|
15
|
-
from teuthology.lock.util import locked_since_seconds
|
16
|
-
from teuthology.nuke.actions import (
|
17
|
-
check_console, clear_firewall, shutdown_daemons, remove_installed_packages,
|
18
|
-
reboot, remove_osd_mounts, remove_osd_tmpfs, kill_hadoop,
|
19
|
-
remove_ceph_packages, synch_clocks, unlock_firmware_repo,
|
20
|
-
remove_configuration_files, undo_multipath, reset_syslog_dir,
|
21
|
-
remove_ceph_data, remove_testing_tree, remove_yum_timedhosts,
|
22
|
-
kill_valgrind,
|
23
|
-
)
|
24
|
-
from teuthology.config import config, FakeNamespace
|
25
|
-
from teuthology.misc import (
|
26
|
-
canonicalize_hostname, config_file, decanonicalize_hostname, merge_configs,
|
27
|
-
get_user, sh
|
28
|
-
)
|
29
|
-
from teuthology.openstack import OpenStack, OpenStackInstance, enforce_json_dictionary
|
30
|
-
from teuthology.orchestra.remote import Remote
|
31
|
-
from teuthology.parallel import parallel
|
32
|
-
from teuthology.task.internal import check_lock, add_remotes, connect
|
33
2
|
|
34
3
|
log = logging.getLogger(__name__)
|
35
4
|
|
36
5
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
def openstack_volume_name(volume):
|
42
|
-
return (volume.get('Display Name') or
|
43
|
-
volume.get('display_name') or
|
44
|
-
volume.get('Name') or
|
45
|
-
volume.get('name') or "")
|
46
|
-
|
47
|
-
|
48
|
-
def stale_openstack(ctx):
|
49
|
-
targets = dict(map(lambda i: (i['ID'], i),
|
50
|
-
OpenStack.list_instances()))
|
51
|
-
nodes = list_locks(keyed_by_name=True, locked=True)
|
52
|
-
stale_openstack_instances(ctx, targets, nodes)
|
53
|
-
stale_openstack_nodes(ctx, targets, nodes)
|
54
|
-
stale_openstack_volumes(ctx, OpenStack.list_volumes())
|
55
|
-
if not ctx.dry_run:
|
56
|
-
openstack_remove_again()
|
57
|
-
|
58
|
-
#
|
59
|
-
# A delay, in seconds, that is significantly longer than
|
60
|
-
# any kind of OpenStack server creation / deletion / etc.
|
61
|
-
#
|
62
|
-
OPENSTACK_DELAY = 30 * 60
|
63
|
-
|
64
|
-
|
65
|
-
def stale_openstack_instances(ctx, instances, locked_nodes):
|
66
|
-
for (instance_id, instance) in instances.items():
|
67
|
-
i = OpenStackInstance(instance_id)
|
68
|
-
if not i.exists():
|
69
|
-
log.debug("stale-openstack: {instance} disappeared, ignored"
|
70
|
-
.format(instance=instance_id))
|
71
|
-
continue
|
72
|
-
if (i.get_created() >
|
73
|
-
config['max_job_time'] + OPENSTACK_DELAY):
|
74
|
-
log.info(
|
75
|
-
"stale-openstack: destroying instance {instance}"
|
76
|
-
" because it was created {created} seconds ago"
|
77
|
-
" which is older than"
|
78
|
-
" max_job_time {max_job_time} + {delay}"
|
79
|
-
.format(instance=i['name'],
|
80
|
-
created=i.get_created(),
|
81
|
-
max_job_time=config['max_job_time'],
|
82
|
-
delay=OPENSTACK_DELAY))
|
83
|
-
if not ctx.dry_run:
|
84
|
-
i.destroy()
|
85
|
-
continue
|
86
|
-
name = canonicalize_hostname(i['name'], user=None)
|
87
|
-
if i.get_created() > OPENSTACK_DELAY and name not in locked_nodes:
|
88
|
-
log.info("stale-openstack: destroying instance {instance}"
|
89
|
-
" because it was created {created} seconds ago"
|
90
|
-
" is older than {delay}s and it is not locked"
|
91
|
-
.format(instance=i['name'],
|
92
|
-
created=i.get_created(),
|
93
|
-
delay=OPENSTACK_DELAY))
|
94
|
-
if not ctx.dry_run:
|
95
|
-
i.destroy()
|
96
|
-
continue
|
97
|
-
log.debug("stale-openstack: instance " + i['name'] + " OK")
|
98
|
-
|
99
|
-
|
100
|
-
def openstack_delete_volume(id):
|
101
|
-
OpenStack().run("volume delete " + id + " || true")
|
102
|
-
|
103
|
-
|
104
|
-
def stale_openstack_volumes(ctx, volumes):
|
105
|
-
now = datetime.datetime.now()
|
106
|
-
for volume in volumes:
|
107
|
-
volume_id = openstack_volume_id(volume)
|
108
|
-
try:
|
109
|
-
volume = json.loads(OpenStack().run("volume show -f json " +
|
110
|
-
volume_id))
|
111
|
-
except subprocess.CalledProcessError:
|
112
|
-
log.debug("stale-openstack: {id} disappeared, ignored"
|
113
|
-
.format(id=volume_id))
|
114
|
-
continue
|
115
|
-
volume_name = openstack_volume_name(volume)
|
116
|
-
enforce_json_dictionary(volume)
|
117
|
-
created_at = datetime.datetime.strptime(
|
118
|
-
volume['created_at'], '%Y-%m-%dT%H:%M:%S.%f')
|
119
|
-
created = (now - created_at).total_seconds()
|
120
|
-
if created > config['max_job_time'] + OPENSTACK_DELAY:
|
121
|
-
log.info(
|
122
|
-
"stale-openstack: destroying volume {volume}({id})"
|
123
|
-
" because it was created {created} seconds ago"
|
124
|
-
" which is older than"
|
125
|
-
" max_job_time {max_job_time} + {delay}"
|
126
|
-
.format(volume=volume_name,
|
127
|
-
id=volume_id,
|
128
|
-
created=created,
|
129
|
-
max_job_time=config['max_job_time'],
|
130
|
-
delay=OPENSTACK_DELAY))
|
131
|
-
if not ctx.dry_run:
|
132
|
-
openstack_delete_volume(volume_id)
|
133
|
-
continue
|
134
|
-
log.debug("stale-openstack: volume " + volume_id + " OK")
|
135
|
-
|
136
|
-
|
137
|
-
def stale_openstack_nodes(ctx, instances, locked_nodes):
|
138
|
-
names = set([ i['Name'] for i in instances.values() ])
|
139
|
-
for (name, node) in locked_nodes.items():
|
140
|
-
name = decanonicalize_hostname(name)
|
141
|
-
if node['machine_type'] != 'openstack':
|
142
|
-
continue
|
143
|
-
if (name not in names and
|
144
|
-
locked_since_seconds(node) > OPENSTACK_DELAY):
|
145
|
-
log.info("stale-openstack: unlocking node {name} unlocked"
|
146
|
-
" because it was created {created}"
|
147
|
-
" seconds ago which is older than {delay}"
|
148
|
-
" and it has no instance"
|
149
|
-
.format(name=name,
|
150
|
-
created=locked_since_seconds(node),
|
151
|
-
delay=OPENSTACK_DELAY))
|
152
|
-
if not ctx.dry_run:
|
153
|
-
unlock_one(ctx, name, node['locked_by'])
|
154
|
-
continue
|
155
|
-
log.debug("stale-openstack: node " + name + " OK")
|
156
|
-
|
157
|
-
|
158
|
-
def openstack_remove_again():
|
6
|
+
# This is being kept because ceph.git/qa/tasks/cephfs/filesystem.py references it.
|
7
|
+
def clear_firewall(ctx):
|
159
8
|
"""
|
160
|
-
|
161
|
-
|
162
|
-
|
9
|
+
Remove any iptables rules created by teuthology. These rules are
|
10
|
+
identified by containing a comment with 'teuthology' in it. Non-teuthology
|
11
|
+
firewall rules are unaffected.
|
163
12
|
"""
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
remove_me = [openstack_volume_id(v) for v in volumes
|
171
|
-
if 'REMOVE-ME' in openstack_volume_name(v)]
|
172
|
-
for i in remove_me:
|
173
|
-
log.info("Trying to remove stale volume %s" % i)
|
174
|
-
openstack_delete_volume(i)
|
175
|
-
|
176
|
-
|
177
|
-
def main(args):
|
178
|
-
ctx = FakeNamespace(args)
|
179
|
-
if ctx.verbose:
|
180
|
-
teuthology.log.setLevel(logging.DEBUG)
|
181
|
-
|
182
|
-
info = {}
|
183
|
-
if ctx.archive:
|
184
|
-
ctx.config = config_file(ctx.archive + '/config.yaml')
|
185
|
-
ifn = os.path.join(ctx.archive, 'info.yaml')
|
186
|
-
if os.path.exists(ifn):
|
187
|
-
with open(ifn, 'r') as fd:
|
188
|
-
info = yaml.safe_load(fd.read())
|
189
|
-
if not ctx.pid:
|
190
|
-
ctx.pid = info.get('pid')
|
191
|
-
if not ctx.pid:
|
192
|
-
ctx.pid = int(open(ctx.archive + '/pid').read().rstrip('\n'))
|
193
|
-
if not ctx.owner:
|
194
|
-
ctx.owner = info.get('owner')
|
195
|
-
if not ctx.owner:
|
196
|
-
ctx.owner = open(ctx.archive + '/owner').read().rstrip('\n')
|
197
|
-
|
198
|
-
if ctx.targets:
|
199
|
-
ctx.config = merge_configs(ctx.targets)
|
200
|
-
|
201
|
-
if ctx.stale:
|
202
|
-
stale_nodes = find_stale_locks(ctx.owner)
|
203
|
-
targets = dict()
|
204
|
-
for node in stale_nodes:
|
205
|
-
targets[node['name']] = node['ssh_pub_key']
|
206
|
-
ctx.config = dict(targets=targets)
|
207
|
-
|
208
|
-
if ctx.stale_openstack:
|
209
|
-
stale_openstack(ctx)
|
210
|
-
return
|
211
|
-
|
212
|
-
log.info(
|
213
|
-
'\n '.join(
|
214
|
-
['targets:', ] + yaml.safe_dump(
|
215
|
-
ctx.config['targets'],
|
216
|
-
default_flow_style=False).splitlines()))
|
217
|
-
|
218
|
-
if ctx.dry_run:
|
219
|
-
log.info("Not actually nuking anything since --dry-run was passed")
|
220
|
-
return
|
221
|
-
|
222
|
-
if ctx.owner is None:
|
223
|
-
ctx.owner = get_user()
|
224
|
-
|
225
|
-
if ctx.pid:
|
226
|
-
if ctx.archive:
|
227
|
-
log.info('Killing teuthology process at pid %d', ctx.pid)
|
228
|
-
os.system('grep -q %s /proc/%d/cmdline && sudo kill %d' % (
|
229
|
-
ctx.archive,
|
230
|
-
ctx.pid,
|
231
|
-
ctx.pid))
|
232
|
-
else:
|
233
|
-
subprocess.check_call(["kill", "-9", str(ctx.pid)])
|
234
|
-
|
235
|
-
nuke(ctx, ctx.unlock, ctx.synch_clocks, ctx.reboot_all, ctx.noipmi)
|
236
|
-
|
237
|
-
|
238
|
-
def nuke(ctx, should_unlock, sync_clocks=True, reboot_all=True, noipmi=False):
|
239
|
-
if 'targets' not in ctx.config:
|
240
|
-
return
|
241
|
-
total_unnuked = {}
|
242
|
-
targets = dict(ctx.config['targets'])
|
243
|
-
if ctx.name:
|
244
|
-
log.info('Checking targets against current locks')
|
245
|
-
locks = list_locks()
|
246
|
-
# Remove targets who's description doesn't match archive name.
|
247
|
-
for lock in locks:
|
248
|
-
for target in targets:
|
249
|
-
if target == lock['name']:
|
250
|
-
if ctx.name not in lock['description']:
|
251
|
-
del ctx.config['targets'][lock['name']]
|
252
|
-
log.info(
|
253
|
-
"Not nuking %s because description doesn't match",
|
254
|
-
lock['name'])
|
255
|
-
with parallel() as p:
|
256
|
-
for target, hostkey in ctx.config['targets'].items():
|
257
|
-
p.spawn(
|
258
|
-
nuke_one,
|
259
|
-
ctx,
|
260
|
-
{target: hostkey},
|
261
|
-
should_unlock,
|
262
|
-
sync_clocks,
|
263
|
-
reboot_all,
|
264
|
-
ctx.config.get('check-locks', True),
|
265
|
-
noipmi,
|
266
|
-
)
|
267
|
-
for unnuked in p:
|
268
|
-
if unnuked:
|
269
|
-
total_unnuked.update(unnuked)
|
270
|
-
if total_unnuked:
|
271
|
-
log.error('Could not nuke the following targets:\n' +
|
272
|
-
'\n '.join(['targets:', ] +
|
273
|
-
yaml.safe_dump(
|
274
|
-
total_unnuked,
|
275
|
-
default_flow_style=False).splitlines()))
|
276
|
-
|
277
|
-
|
278
|
-
def nuke_one(ctx, target, should_unlock, synch_clocks, reboot_all,
|
279
|
-
check_locks, noipmi):
|
280
|
-
ret = None
|
281
|
-
ctx = argparse.Namespace(
|
282
|
-
config=dict(targets=target),
|
283
|
-
owner=ctx.owner,
|
284
|
-
check_locks=check_locks,
|
285
|
-
synch_clocks=synch_clocks,
|
286
|
-
reboot_all=reboot_all,
|
287
|
-
teuthology_config=config.to_dict(),
|
288
|
-
name=ctx.name,
|
289
|
-
noipmi=noipmi,
|
13
|
+
log.info("Clearing teuthology firewall rules...")
|
14
|
+
ctx.cluster.run(
|
15
|
+
args=[
|
16
|
+
"sudo", "sh", "-c",
|
17
|
+
"iptables-save | grep -v teuthology | iptables-restore"
|
18
|
+
],
|
290
19
|
)
|
291
|
-
|
292
|
-
nuke_helper(ctx, should_unlock)
|
293
|
-
except Exception:
|
294
|
-
log.exception('Could not nuke %s' % target)
|
295
|
-
# not re-raising the so that parallel calls aren't killed
|
296
|
-
ret = target
|
297
|
-
else:
|
298
|
-
if should_unlock:
|
299
|
-
unlock_one(ctx, list(target.keys())[0], ctx.owner)
|
300
|
-
return ret
|
301
|
-
|
302
|
-
|
303
|
-
def nuke_helper(ctx, should_unlock):
|
304
|
-
# ensure node is up with ipmi
|
305
|
-
(target,) = ctx.config['targets'].keys()
|
306
|
-
host = target.split('@')[-1]
|
307
|
-
shortname = host.split('.')[0]
|
308
|
-
if should_unlock:
|
309
|
-
if is_vm(shortname):
|
310
|
-
return
|
311
|
-
log.debug('shortname: %s' % shortname)
|
312
|
-
if ctx.check_locks:
|
313
|
-
# does not check to ensure if the node is 'up'
|
314
|
-
# we want to be able to nuke a downed node
|
315
|
-
check_lock.check_lock(ctx, None, check_up=False)
|
316
|
-
status = get_status(host)
|
317
|
-
if status['machine_type'] in provision.fog.get_types():
|
318
|
-
remote = Remote(host)
|
319
|
-
remote.console.power_off()
|
320
|
-
return
|
321
|
-
elif status['machine_type'] in provision.pelagos.get_types():
|
322
|
-
provision.pelagos.park_node(host)
|
323
|
-
return
|
324
|
-
|
325
|
-
if (not ctx.noipmi and 'ipmi_user' in config and
|
326
|
-
'vpm' not in shortname):
|
327
|
-
try:
|
328
|
-
check_console(host)
|
329
|
-
except Exception:
|
330
|
-
log.exception('')
|
331
|
-
log.info("Will attempt to connect via SSH")
|
332
|
-
remote = Remote(host)
|
333
|
-
remote.connect()
|
334
|
-
add_remotes(ctx, None)
|
335
|
-
connect(ctx, None)
|
336
|
-
clear_firewall(ctx)
|
337
|
-
shutdown_daemons(ctx)
|
338
|
-
kill_valgrind(ctx)
|
339
|
-
# Try to remove packages before reboot
|
340
|
-
remove_installed_packages(ctx)
|
341
|
-
remotes = ctx.cluster.remotes.keys()
|
342
|
-
reboot(ctx, remotes)
|
343
|
-
# shutdown daemons again incase of startup
|
344
|
-
shutdown_daemons(ctx)
|
345
|
-
remove_osd_mounts(ctx)
|
346
|
-
remove_osd_tmpfs(ctx)
|
347
|
-
kill_hadoop(ctx)
|
348
|
-
remove_ceph_packages(ctx)
|
349
|
-
synch_clocks(remotes)
|
350
|
-
unlock_firmware_repo(ctx)
|
351
|
-
remove_configuration_files(ctx)
|
352
|
-
undo_multipath(ctx)
|
353
|
-
reset_syslog_dir(ctx)
|
354
|
-
remove_ceph_data(ctx)
|
355
|
-
remove_testing_tree(ctx)
|
356
|
-
remove_yum_timedhosts(ctx)
|
357
|
-
# Once again remove packages after reboot
|
358
|
-
remove_installed_packages(ctx)
|
359
|
-
log.info('Installed packages removed.')
|
20
|
+
log.info("Cleared teuthology firewall rules.")
|
teuthology/openstack/__init__.py
CHANGED
@@ -123,7 +123,7 @@ class OpenStackInstance(object):
|
|
123
123
|
with safe_while(sleep=2, tries=30,
|
124
124
|
action="get ip " + self['id']) as proceed:
|
125
125
|
while proceed():
|
126
|
-
found = re.match('.*\d+', self['addresses'])
|
126
|
+
found = re.match(r'.*\d+', self['addresses'])
|
127
127
|
if found:
|
128
128
|
return self['addresses']
|
129
129
|
self.set_info()
|
@@ -165,7 +165,7 @@ class OpenStackInstance(object):
|
|
165
165
|
self.private_ip = self.get_ip_neutron()
|
166
166
|
except Exception as e:
|
167
167
|
log.debug("ignoring get_ip_neutron exception " + str(e))
|
168
|
-
self.private_ip = re.findall(network + '=([\d.]+)',
|
168
|
+
self.private_ip = re.findall(network + r'=([\d.]+)',
|
169
169
|
self.get_addresses())[0]
|
170
170
|
return self.private_ip
|
171
171
|
|
@@ -1026,7 +1026,8 @@ ssh access : ssh {identity}{username}@{ip} # logs in /usr/share/nginx/
|
|
1026
1026
|
cluster, based on a template where the OpenStack credentials
|
1027
1027
|
and a few other values are substituted.
|
1028
1028
|
"""
|
1029
|
-
path = tempfile.
|
1029
|
+
fd, path = tempfile.mkstemp()
|
1030
|
+
os.close(fd)
|
1030
1031
|
|
1031
1032
|
with open(os.path.dirname(__file__) + '/bootstrap-teuthology.sh', 'rb') as f:
|
1032
1033
|
b64_bootstrap = base64.b64encode(f.read())
|
@@ -6,7 +6,7 @@ bootcmd:
|
|
6
6
|
- ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
|
7
7
|
- hostname $(cat /etc/hostname)
|
8
8
|
- ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
|
9
|
-
# See https://github.com/ceph/ceph-cm-ansible/blob/
|
9
|
+
# See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
|
10
10
|
- ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
|
11
11
|
preserve_hostname: true
|
12
12
|
system_info:
|
@@ -6,7 +6,7 @@ bootcmd:
|
|
6
6
|
- ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
|
7
7
|
- hostname $(cat /etc/hostname)
|
8
8
|
- ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
|
9
|
-
# See https://github.com/ceph/ceph-cm-ansible/blob/
|
9
|
+
# See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
|
10
10
|
- ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
|
11
11
|
preserve_hostname: true
|
12
12
|
system_info:
|
@@ -6,7 +6,7 @@ bootcmd:
|
|
6
6
|
- ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
|
7
7
|
- hostname $(cat /etc/hostname)
|
8
8
|
- ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
|
9
|
-
# See https://github.com/ceph/ceph-cm-ansible/blob/
|
9
|
+
# See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
|
10
10
|
- ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
|
11
11
|
preserve_hostname: true
|
12
12
|
system_info:
|
@@ -18,7 +18,7 @@ packages:
|
|
18
18
|
- git
|
19
19
|
- ntp
|
20
20
|
runcmd:
|
21
|
-
# See https://github.com/ceph/ceph-cm-ansible/blob/
|
21
|
+
# See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
|
22
22
|
- ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
|
23
23
|
- echo '{username} ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
|
24
24
|
final_message: "{up}, after $UPTIME seconds"
|
@@ -6,7 +6,7 @@ bootcmd:
|
|
6
6
|
- ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
|
7
7
|
- hostname $(cat /etc/hostname)
|
8
8
|
- ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
|
9
|
-
# See https://github.com/ceph/ceph-cm-ansible/blob/
|
9
|
+
# See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
|
10
10
|
- ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
|
11
11
|
preserve_hostname: true
|
12
12
|
users:
|
teuthology/orchestra/cluster.py
CHANGED
@@ -2,8 +2,7 @@
|
|
2
2
|
Cluster definition
|
3
3
|
part of context, Cluster is used to save connection information.
|
4
4
|
"""
|
5
|
-
|
6
|
-
|
5
|
+
from teuthology.orchestra import run
|
7
6
|
|
8
7
|
class Cluster(object):
|
9
8
|
"""
|
@@ -50,20 +49,49 @@ class Cluster(object):
|
|
50
49
|
)
|
51
50
|
self.remotes[remote] = list(roles)
|
52
51
|
|
53
|
-
def run(self, **kwargs):
|
52
|
+
def run(self, wait=True, parallel=False, **kwargs):
|
54
53
|
"""
|
55
54
|
Run a command on all the nodes in this cluster.
|
56
55
|
|
57
56
|
Goes through nodes in alphabetical order.
|
58
57
|
|
59
|
-
|
58
|
+
The default usage is when parallel=False and wait=True,
|
59
|
+
which is a sequential run for each node one by one.
|
60
|
+
|
61
|
+
If you specify parallel=True, it will be in parallel.
|
62
|
+
|
63
|
+
If you specify wait=False, it returns immediately.
|
64
|
+
Since it is not possible to run sequentially and
|
65
|
+
do not wait each command run finished, the parallel value
|
66
|
+
is ignored and treated as True.
|
60
67
|
|
61
68
|
Returns a list of `RemoteProcess`.
|
62
69
|
"""
|
70
|
+
# -+-------+----------+----------+------------+---------------
|
71
|
+
# | wait | parallel | run.wait | remote.run | comments
|
72
|
+
# -+-------+----------+----------+------------+---------------
|
73
|
+
# 1|*True |*False | no | wait=True | sequentially
|
74
|
+
# 2| True | True | yes | wait=False | parallel
|
75
|
+
# 3| False | True | no | wait=False | parallel
|
76
|
+
# 4| False | False | no | wait=False | same as above
|
77
|
+
|
78
|
+
# We always run in parallel if wait=False,
|
79
|
+
# that is why (4) is equivalent to (3).
|
80
|
+
|
81
|
+
# We wait from remote.run only if run sequentially.
|
82
|
+
_wait = (parallel == False and wait == True)
|
83
|
+
|
63
84
|
remotes = sorted(self.remotes.keys(), key=lambda rem: rem.name)
|
64
|
-
|
85
|
+
procs = [remote.run(**kwargs, wait=_wait) for remote in remotes]
|
65
86
|
|
66
|
-
|
87
|
+
# We do run.wait only if parallel=True, because if parallel=False,
|
88
|
+
# we have run sequentially and all processes are complete.
|
89
|
+
|
90
|
+
if parallel and wait:
|
91
|
+
run.wait(procs)
|
92
|
+
return procs
|
93
|
+
|
94
|
+
def sh(self, script, **kwargs):
|
67
95
|
"""
|
68
96
|
Run a command on all the nodes in this cluster.
|
69
97
|
|
@@ -72,7 +100,7 @@ class Cluster(object):
|
|
72
100
|
Returns a list of the command outputs correspondingly.
|
73
101
|
"""
|
74
102
|
remotes = sorted(self.remotes.keys(), key=lambda rem: rem.name)
|
75
|
-
return [remote.sh(**kwargs) for remote in remotes]
|
103
|
+
return [remote.sh(script, **kwargs) for remote in remotes]
|
76
104
|
|
77
105
|
def write_file(self, file_name, content, sudo=False, perms=None, owner=None):
|
78
106
|
"""
|
@@ -86,11 +114,12 @@ class Cluster(object):
|
|
86
114
|
remotes = sorted(self.remotes.keys(), key=lambda rem: rem.name)
|
87
115
|
for remote in remotes:
|
88
116
|
if sudo:
|
89
|
-
|
117
|
+
remote.write_file(file_name, content,
|
118
|
+
sudo=True, mode=perms, owner=owner)
|
90
119
|
else:
|
91
120
|
if perms is not None or owner is not None:
|
92
121
|
raise ValueError("To specify perms or owner, sudo must be True")
|
93
|
-
|
122
|
+
remote.write_file(file_name, content)
|
94
123
|
|
95
124
|
def only(self, *roles):
|
96
125
|
"""
|
@@ -144,3 +173,16 @@ class Cluster(object):
|
|
144
173
|
if remote not in matches.remotes:
|
145
174
|
c.add(remote, has_roles)
|
146
175
|
return c
|
176
|
+
|
177
|
+
def filter(self, func):
|
178
|
+
"""
|
179
|
+
Return a cluster whose remotes are filtered by `func`.
|
180
|
+
|
181
|
+
Example::
|
182
|
+
cluster = ctx.cluster.filter(lambda r: r.is_online)
|
183
|
+
"""
|
184
|
+
result = self.__class__()
|
185
|
+
for rem, roles in self.remotes.items():
|
186
|
+
if func(rem):
|
187
|
+
result.add(rem, roles)
|
188
|
+
return result
|
@@ -1,13 +1,13 @@
|
|
1
1
|
"""
|
2
2
|
Connection utilities
|
3
3
|
"""
|
4
|
-
import base64
|
5
4
|
import paramiko
|
6
5
|
import os
|
7
6
|
import logging
|
8
7
|
|
9
8
|
from teuthology.config import config
|
10
9
|
from teuthology.contextutil import safe_while
|
10
|
+
from paramiko.hostkeys import HostKeyEntry
|
11
11
|
|
12
12
|
log = logging.getLogger(__name__)
|
13
13
|
|
@@ -29,14 +29,11 @@ def create_key(keytype, key):
|
|
29
29
|
"""
|
30
30
|
Create an ssh-rsa, ssh-dss or ssh-ed25519 key.
|
31
31
|
"""
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
return paramiko.ed25519key.Ed25519Key(data=base64.decodestring(key.encode()))
|
38
|
-
else:
|
39
|
-
raise ValueError('keytype must be ssh-rsa, ssh-dss (DSA) or ssh-ed25519')
|
32
|
+
l = "{hostname} {keytype} {key}".format(hostname="x", keytype=keytype, key=key)
|
33
|
+
|
34
|
+
ke = HostKeyEntry.from_line(l)
|
35
|
+
assert ke, f'invalid host key "{keytype} {key}"'
|
36
|
+
return ke.key
|
40
37
|
|
41
38
|
|
42
39
|
def connect(user_at_host, host_key=None, keep_alive=False, timeout=60,
|
@@ -82,13 +79,17 @@ def connect(user_at_host, host_key=None, keep_alive=False, timeout=60,
|
|
82
79
|
timeout=timeout
|
83
80
|
)
|
84
81
|
|
85
|
-
|
86
|
-
|
82
|
+
key_filename = key_filename or config.ssh_key
|
83
|
+
ssh_config_path = config.ssh_config_path or "~/.ssh/config"
|
84
|
+
ssh_config_path = os.path.expanduser(ssh_config_path)
|
85
|
+
if not key_filename and os.path.exists(ssh_config_path):
|
87
86
|
ssh_config = paramiko.SSHConfig()
|
88
87
|
ssh_config.parse(open(ssh_config_path))
|
89
88
|
opts = ssh_config.lookup(host)
|
90
89
|
if not key_filename and 'identityfile' in opts:
|
91
90
|
key_filename = opts['identityfile']
|
91
|
+
if 'hostname' in opts:
|
92
|
+
connect_args['hostname'] = opts['hostname']
|
92
93
|
|
93
94
|
if key_filename:
|
94
95
|
if not isinstance(key_filename, list):
|
@@ -101,14 +102,20 @@ def connect(user_at_host, host_key=None, keep_alive=False, timeout=60,
|
|
101
102
|
if not retry:
|
102
103
|
ssh.connect(**connect_args)
|
103
104
|
else:
|
104
|
-
|
105
|
-
with safe_while(sleep=1, action='connect to ' + host) as proceed:
|
105
|
+
with safe_while(sleep=1, increment=3, action='connect to ' + host) as proceed:
|
106
106
|
while proceed():
|
107
|
+
auth_err_msg = f"Error authenticating with {host}"
|
107
108
|
try:
|
108
109
|
ssh.connect(**connect_args)
|
109
110
|
break
|
110
|
-
except
|
111
|
-
log.
|
112
|
-
|
111
|
+
except EOFError:
|
112
|
+
log.error(f"{auth_err_msg}: EOFError")
|
113
|
+
except paramiko.AuthenticationException as e:
|
114
|
+
log.error(f"{auth_err_msg}: {repr(e)}")
|
115
|
+
except paramiko.SSHException as e:
|
116
|
+
auth_err_msg = f"{auth_err_msg}: {repr(e)}"
|
117
|
+
if not key_filename:
|
118
|
+
auth_err_msg = f"{auth_err_msg} (No SSH private key found!)"
|
119
|
+
log.exception(auth_err_msg)
|
113
120
|
ssh.get_transport().set_keepalive(keep_alive)
|
114
121
|
return ssh
|