teuthology 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. scripts/describe.py +1 -0
  2. scripts/dispatcher.py +62 -0
  3. scripts/exporter.py +18 -0
  4. scripts/lock.py +1 -1
  5. scripts/node_cleanup.py +58 -0
  6. scripts/openstack.py +9 -9
  7. scripts/results.py +12 -11
  8. scripts/run.py +4 -0
  9. scripts/schedule.py +4 -0
  10. scripts/suite.py +61 -16
  11. scripts/supervisor.py +44 -0
  12. scripts/update_inventory.py +10 -4
  13. scripts/wait.py +31 -0
  14. teuthology/__init__.py +24 -21
  15. teuthology/beanstalk.py +4 -3
  16. teuthology/config.py +17 -6
  17. teuthology/contextutil.py +18 -14
  18. teuthology/describe_tests.py +25 -18
  19. teuthology/dispatcher/__init__.py +365 -0
  20. teuthology/dispatcher/supervisor.py +374 -0
  21. teuthology/exceptions.py +54 -0
  22. teuthology/exporter.py +347 -0
  23. teuthology/kill.py +76 -75
  24. teuthology/lock/cli.py +16 -7
  25. teuthology/lock/ops.py +276 -70
  26. teuthology/lock/query.py +61 -44
  27. teuthology/ls.py +9 -18
  28. teuthology/misc.py +152 -137
  29. teuthology/nuke/__init__.py +12 -351
  30. teuthology/openstack/__init__.py +4 -3
  31. teuthology/openstack/openstack-centos-7.0-user-data.txt +1 -1
  32. teuthology/openstack/openstack-centos-7.1-user-data.txt +1 -1
  33. teuthology/openstack/openstack-centos-7.2-user-data.txt +1 -1
  34. teuthology/openstack/openstack-debian-8.0-user-data.txt +1 -1
  35. teuthology/openstack/openstack-opensuse-42.1-user-data.txt +1 -1
  36. teuthology/openstack/openstack-teuthology.cron +0 -1
  37. teuthology/orchestra/cluster.py +51 -9
  38. teuthology/orchestra/connection.py +23 -16
  39. teuthology/orchestra/console.py +111 -50
  40. teuthology/orchestra/daemon/cephadmunit.py +23 -5
  41. teuthology/orchestra/daemon/state.py +10 -3
  42. teuthology/orchestra/daemon/systemd.py +10 -8
  43. teuthology/orchestra/opsys.py +32 -11
  44. teuthology/orchestra/remote.py +369 -152
  45. teuthology/orchestra/run.py +21 -12
  46. teuthology/packaging.py +54 -15
  47. teuthology/provision/__init__.py +30 -10
  48. teuthology/provision/cloud/openstack.py +12 -6
  49. teuthology/provision/cloud/util.py +1 -2
  50. teuthology/provision/downburst.py +83 -29
  51. teuthology/provision/fog.py +68 -20
  52. teuthology/provision/openstack.py +5 -4
  53. teuthology/provision/pelagos.py +13 -5
  54. teuthology/repo_utils.py +91 -44
  55. teuthology/report.py +57 -35
  56. teuthology/results.py +5 -3
  57. teuthology/run.py +21 -15
  58. teuthology/run_tasks.py +114 -40
  59. teuthology/schedule.py +4 -3
  60. teuthology/scrape.py +28 -22
  61. teuthology/suite/__init__.py +75 -46
  62. teuthology/suite/build_matrix.py +34 -24
  63. teuthology/suite/fragment-merge.lua +105 -0
  64. teuthology/suite/matrix.py +31 -2
  65. teuthology/suite/merge.py +175 -0
  66. teuthology/suite/placeholder.py +8 -8
  67. teuthology/suite/run.py +204 -102
  68. teuthology/suite/util.py +67 -211
  69. teuthology/task/__init__.py +1 -1
  70. teuthology/task/ansible.py +101 -31
  71. teuthology/task/buildpackages.py +2 -2
  72. teuthology/task/ceph_ansible.py +13 -6
  73. teuthology/task/cephmetrics.py +2 -1
  74. teuthology/task/clock.py +33 -14
  75. teuthology/task/exec.py +18 -0
  76. teuthology/task/hadoop.py +2 -2
  77. teuthology/task/install/__init__.py +51 -22
  78. teuthology/task/install/bin/adjust-ulimits +16 -0
  79. teuthology/task/install/bin/daemon-helper +114 -0
  80. teuthology/task/install/bin/stdin-killer +263 -0
  81. teuthology/task/install/deb.py +24 -4
  82. teuthology/task/install/redhat.py +36 -32
  83. teuthology/task/install/rpm.py +41 -14
  84. teuthology/task/install/util.py +48 -22
  85. teuthology/task/internal/__init__.py +69 -11
  86. teuthology/task/internal/edit_sudoers.sh +10 -0
  87. teuthology/task/internal/lock_machines.py +3 -133
  88. teuthology/task/internal/redhat.py +48 -28
  89. teuthology/task/internal/syslog.py +31 -8
  90. teuthology/task/kernel.py +155 -147
  91. teuthology/task/lockfile.py +1 -1
  92. teuthology/task/mpi.py +10 -10
  93. teuthology/task/pcp.py +1 -1
  94. teuthology/task/selinux.py +17 -8
  95. teuthology/task/ssh_keys.py +6 -6
  96. teuthology/task/tests/__init__.py +137 -77
  97. teuthology/task/tests/test_fetch_coredumps.py +116 -0
  98. teuthology/task/tests/test_run.py +4 -4
  99. teuthology/timer.py +3 -3
  100. teuthology/util/loggerfile.py +19 -0
  101. teuthology/util/scanner.py +159 -0
  102. teuthology/util/sentry.py +52 -0
  103. teuthology/util/time.py +52 -0
  104. teuthology-1.2.0.data/scripts/adjust-ulimits +16 -0
  105. teuthology-1.2.0.data/scripts/daemon-helper +114 -0
  106. teuthology-1.2.0.data/scripts/stdin-killer +263 -0
  107. teuthology-1.2.0.dist-info/METADATA +89 -0
  108. teuthology-1.2.0.dist-info/RECORD +174 -0
  109. {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/WHEEL +1 -1
  110. {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/entry_points.txt +5 -2
  111. scripts/nuke.py +0 -45
  112. scripts/worker.py +0 -37
  113. teuthology/nuke/actions.py +0 -456
  114. teuthology/openstack/test/__init__.py +0 -0
  115. teuthology/openstack/test/openstack-integration.py +0 -286
  116. teuthology/openstack/test/test_config.py +0 -35
  117. teuthology/openstack/test/test_openstack.py +0 -1695
  118. teuthology/orchestra/test/__init__.py +0 -0
  119. teuthology/orchestra/test/integration/__init__.py +0 -0
  120. teuthology/orchestra/test/integration/test_integration.py +0 -94
  121. teuthology/orchestra/test/test_cluster.py +0 -240
  122. teuthology/orchestra/test/test_connection.py +0 -106
  123. teuthology/orchestra/test/test_console.py +0 -217
  124. teuthology/orchestra/test/test_opsys.py +0 -404
  125. teuthology/orchestra/test/test_remote.py +0 -185
  126. teuthology/orchestra/test/test_run.py +0 -286
  127. teuthology/orchestra/test/test_systemd.py +0 -54
  128. teuthology/orchestra/test/util.py +0 -12
  129. teuthology/sentry.py +0 -18
  130. teuthology/test/__init__.py +0 -0
  131. teuthology/test/fake_archive.py +0 -107
  132. teuthology/test/fake_fs.py +0 -92
  133. teuthology/test/integration/__init__.py +0 -0
  134. teuthology/test/integration/test_suite.py +0 -86
  135. teuthology/test/task/__init__.py +0 -205
  136. teuthology/test/task/test_ansible.py +0 -624
  137. teuthology/test/task/test_ceph_ansible.py +0 -176
  138. teuthology/test/task/test_console_log.py +0 -88
  139. teuthology/test/task/test_install.py +0 -337
  140. teuthology/test/task/test_internal.py +0 -57
  141. teuthology/test/task/test_kernel.py +0 -243
  142. teuthology/test/task/test_pcp.py +0 -379
  143. teuthology/test/task/test_selinux.py +0 -35
  144. teuthology/test/test_config.py +0 -189
  145. teuthology/test/test_contextutil.py +0 -68
  146. teuthology/test/test_describe_tests.py +0 -316
  147. teuthology/test/test_email_sleep_before_teardown.py +0 -81
  148. teuthology/test/test_exit.py +0 -97
  149. teuthology/test/test_get_distro.py +0 -47
  150. teuthology/test/test_get_distro_version.py +0 -47
  151. teuthology/test/test_get_multi_machine_types.py +0 -27
  152. teuthology/test/test_job_status.py +0 -60
  153. teuthology/test/test_ls.py +0 -48
  154. teuthology/test/test_misc.py +0 -368
  155. teuthology/test/test_nuke.py +0 -232
  156. teuthology/test/test_packaging.py +0 -763
  157. teuthology/test/test_parallel.py +0 -28
  158. teuthology/test/test_repo_utils.py +0 -204
  159. teuthology/test/test_report.py +0 -77
  160. teuthology/test/test_results.py +0 -155
  161. teuthology/test/test_run.py +0 -238
  162. teuthology/test/test_safepath.py +0 -55
  163. teuthology/test/test_schedule.py +0 -45
  164. teuthology/test/test_scrape.py +0 -167
  165. teuthology/test/test_timer.py +0 -80
  166. teuthology/test/test_vps_os_vers_parameter_checking.py +0 -84
  167. teuthology/test/test_worker.py +0 -303
  168. teuthology/worker.py +0 -339
  169. teuthology-1.0.0.dist-info/METADATA +0 -76
  170. teuthology-1.0.0.dist-info/RECORD +0 -210
  171. {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/LICENSE +0 -0
  172. {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/top_level.txt +0 -0
@@ -1,359 +1,20 @@
1
- import argparse
2
- import datetime
3
- import json
4
1
  import logging
5
- import os
6
- import subprocess
7
-
8
- import yaml
9
-
10
- import teuthology
11
- from teuthology import provision
12
- from teuthology.lock.ops import unlock_one
13
- from teuthology.lock.query import is_vm, list_locks, \
14
- find_stale_locks, get_status
15
- from teuthology.lock.util import locked_since_seconds
16
- from teuthology.nuke.actions import (
17
- check_console, clear_firewall, shutdown_daemons, remove_installed_packages,
18
- reboot, remove_osd_mounts, remove_osd_tmpfs, kill_hadoop,
19
- remove_ceph_packages, synch_clocks, unlock_firmware_repo,
20
- remove_configuration_files, undo_multipath, reset_syslog_dir,
21
- remove_ceph_data, remove_testing_tree, remove_yum_timedhosts,
22
- kill_valgrind,
23
- )
24
- from teuthology.config import config, FakeNamespace
25
- from teuthology.misc import (
26
- canonicalize_hostname, config_file, decanonicalize_hostname, merge_configs,
27
- get_user, sh
28
- )
29
- from teuthology.openstack import OpenStack, OpenStackInstance, enforce_json_dictionary
30
- from teuthology.orchestra.remote import Remote
31
- from teuthology.parallel import parallel
32
- from teuthology.task.internal import check_lock, add_remotes, connect
33
2
 
34
3
  log = logging.getLogger(__name__)
35
4
 
36
5
 
37
- def openstack_volume_id(volume):
38
- return (volume.get('ID') or volume['id'])
39
-
40
-
41
- def openstack_volume_name(volume):
42
- return (volume.get('Display Name') or
43
- volume.get('display_name') or
44
- volume.get('Name') or
45
- volume.get('name') or "")
46
-
47
-
48
- def stale_openstack(ctx):
49
- targets = dict(map(lambda i: (i['ID'], i),
50
- OpenStack.list_instances()))
51
- nodes = list_locks(keyed_by_name=True, locked=True)
52
- stale_openstack_instances(ctx, targets, nodes)
53
- stale_openstack_nodes(ctx, targets, nodes)
54
- stale_openstack_volumes(ctx, OpenStack.list_volumes())
55
- if not ctx.dry_run:
56
- openstack_remove_again()
57
-
58
- #
59
- # A delay, in seconds, that is significantly longer than
60
- # any kind of OpenStack server creation / deletion / etc.
61
- #
62
- OPENSTACK_DELAY = 30 * 60
63
-
64
-
65
- def stale_openstack_instances(ctx, instances, locked_nodes):
66
- for (instance_id, instance) in instances.items():
67
- i = OpenStackInstance(instance_id)
68
- if not i.exists():
69
- log.debug("stale-openstack: {instance} disappeared, ignored"
70
- .format(instance=instance_id))
71
- continue
72
- if (i.get_created() >
73
- config['max_job_time'] + OPENSTACK_DELAY):
74
- log.info(
75
- "stale-openstack: destroying instance {instance}"
76
- " because it was created {created} seconds ago"
77
- " which is older than"
78
- " max_job_time {max_job_time} + {delay}"
79
- .format(instance=i['name'],
80
- created=i.get_created(),
81
- max_job_time=config['max_job_time'],
82
- delay=OPENSTACK_DELAY))
83
- if not ctx.dry_run:
84
- i.destroy()
85
- continue
86
- name = canonicalize_hostname(i['name'], user=None)
87
- if i.get_created() > OPENSTACK_DELAY and name not in locked_nodes:
88
- log.info("stale-openstack: destroying instance {instance}"
89
- " because it was created {created} seconds ago"
90
- " is older than {delay}s and it is not locked"
91
- .format(instance=i['name'],
92
- created=i.get_created(),
93
- delay=OPENSTACK_DELAY))
94
- if not ctx.dry_run:
95
- i.destroy()
96
- continue
97
- log.debug("stale-openstack: instance " + i['name'] + " OK")
98
-
99
-
100
- def openstack_delete_volume(id):
101
- OpenStack().run("volume delete " + id + " || true")
102
-
103
-
104
- def stale_openstack_volumes(ctx, volumes):
105
- now = datetime.datetime.now()
106
- for volume in volumes:
107
- volume_id = openstack_volume_id(volume)
108
- try:
109
- volume = json.loads(OpenStack().run("volume show -f json " +
110
- volume_id))
111
- except subprocess.CalledProcessError:
112
- log.debug("stale-openstack: {id} disappeared, ignored"
113
- .format(id=volume_id))
114
- continue
115
- volume_name = openstack_volume_name(volume)
116
- enforce_json_dictionary(volume)
117
- created_at = datetime.datetime.strptime(
118
- volume['created_at'], '%Y-%m-%dT%H:%M:%S.%f')
119
- created = (now - created_at).total_seconds()
120
- if created > config['max_job_time'] + OPENSTACK_DELAY:
121
- log.info(
122
- "stale-openstack: destroying volume {volume}({id})"
123
- " because it was created {created} seconds ago"
124
- " which is older than"
125
- " max_job_time {max_job_time} + {delay}"
126
- .format(volume=volume_name,
127
- id=volume_id,
128
- created=created,
129
- max_job_time=config['max_job_time'],
130
- delay=OPENSTACK_DELAY))
131
- if not ctx.dry_run:
132
- openstack_delete_volume(volume_id)
133
- continue
134
- log.debug("stale-openstack: volume " + volume_id + " OK")
135
-
136
-
137
- def stale_openstack_nodes(ctx, instances, locked_nodes):
138
- names = set([ i['Name'] for i in instances.values() ])
139
- for (name, node) in locked_nodes.items():
140
- name = decanonicalize_hostname(name)
141
- if node['machine_type'] != 'openstack':
142
- continue
143
- if (name not in names and
144
- locked_since_seconds(node) > OPENSTACK_DELAY):
145
- log.info("stale-openstack: unlocking node {name} unlocked"
146
- " because it was created {created}"
147
- " seconds ago which is older than {delay}"
148
- " and it has no instance"
149
- .format(name=name,
150
- created=locked_since_seconds(node),
151
- delay=OPENSTACK_DELAY))
152
- if not ctx.dry_run:
153
- unlock_one(ctx, name, node['locked_by'])
154
- continue
155
- log.debug("stale-openstack: node " + name + " OK")
156
-
157
-
158
- def openstack_remove_again():
6
+ # This is being kept because ceph.git/qa/tasks/cephfs/filesystem.py references it.
7
+ def clear_firewall(ctx):
159
8
  """
160
- Volumes and servers with REMOVE-ME in the name are leftover
161
- that failed to be removed. It is not uncommon for a failed removal
162
- to succeed later on.
9
+ Remove any iptables rules created by teuthology. These rules are
10
+ identified by containing a comment with 'teuthology' in it. Non-teuthology
11
+ firewall rules are unaffected.
163
12
  """
164
- sh("""
165
- openstack server list --name REMOVE-ME --column ID --format value |
166
- xargs --no-run-if-empty --max-args 1 -P20 openstack server delete --wait
167
- true
168
- """)
169
- volumes = json.loads(OpenStack().run("volume list -f json --long"))
170
- remove_me = [openstack_volume_id(v) for v in volumes
171
- if 'REMOVE-ME' in openstack_volume_name(v)]
172
- for i in remove_me:
173
- log.info("Trying to remove stale volume %s" % i)
174
- openstack_delete_volume(i)
175
-
176
-
177
- def main(args):
178
- ctx = FakeNamespace(args)
179
- if ctx.verbose:
180
- teuthology.log.setLevel(logging.DEBUG)
181
-
182
- info = {}
183
- if ctx.archive:
184
- ctx.config = config_file(ctx.archive + '/config.yaml')
185
- ifn = os.path.join(ctx.archive, 'info.yaml')
186
- if os.path.exists(ifn):
187
- with open(ifn, 'r') as fd:
188
- info = yaml.safe_load(fd.read())
189
- if not ctx.pid:
190
- ctx.pid = info.get('pid')
191
- if not ctx.pid:
192
- ctx.pid = int(open(ctx.archive + '/pid').read().rstrip('\n'))
193
- if not ctx.owner:
194
- ctx.owner = info.get('owner')
195
- if not ctx.owner:
196
- ctx.owner = open(ctx.archive + '/owner').read().rstrip('\n')
197
-
198
- if ctx.targets:
199
- ctx.config = merge_configs(ctx.targets)
200
-
201
- if ctx.stale:
202
- stale_nodes = find_stale_locks(ctx.owner)
203
- targets = dict()
204
- for node in stale_nodes:
205
- targets[node['name']] = node['ssh_pub_key']
206
- ctx.config = dict(targets=targets)
207
-
208
- if ctx.stale_openstack:
209
- stale_openstack(ctx)
210
- return
211
-
212
- log.info(
213
- '\n '.join(
214
- ['targets:', ] + yaml.safe_dump(
215
- ctx.config['targets'],
216
- default_flow_style=False).splitlines()))
217
-
218
- if ctx.dry_run:
219
- log.info("Not actually nuking anything since --dry-run was passed")
220
- return
221
-
222
- if ctx.owner is None:
223
- ctx.owner = get_user()
224
-
225
- if ctx.pid:
226
- if ctx.archive:
227
- log.info('Killing teuthology process at pid %d', ctx.pid)
228
- os.system('grep -q %s /proc/%d/cmdline && sudo kill %d' % (
229
- ctx.archive,
230
- ctx.pid,
231
- ctx.pid))
232
- else:
233
- subprocess.check_call(["kill", "-9", str(ctx.pid)])
234
-
235
- nuke(ctx, ctx.unlock, ctx.synch_clocks, ctx.reboot_all, ctx.noipmi)
236
-
237
-
238
- def nuke(ctx, should_unlock, sync_clocks=True, reboot_all=True, noipmi=False):
239
- if 'targets' not in ctx.config:
240
- return
241
- total_unnuked = {}
242
- targets = dict(ctx.config['targets'])
243
- if ctx.name:
244
- log.info('Checking targets against current locks')
245
- locks = list_locks()
246
- # Remove targets who's description doesn't match archive name.
247
- for lock in locks:
248
- for target in targets:
249
- if target == lock['name']:
250
- if ctx.name not in lock['description']:
251
- del ctx.config['targets'][lock['name']]
252
- log.info(
253
- "Not nuking %s because description doesn't match",
254
- lock['name'])
255
- with parallel() as p:
256
- for target, hostkey in ctx.config['targets'].items():
257
- p.spawn(
258
- nuke_one,
259
- ctx,
260
- {target: hostkey},
261
- should_unlock,
262
- sync_clocks,
263
- reboot_all,
264
- ctx.config.get('check-locks', True),
265
- noipmi,
266
- )
267
- for unnuked in p:
268
- if unnuked:
269
- total_unnuked.update(unnuked)
270
- if total_unnuked:
271
- log.error('Could not nuke the following targets:\n' +
272
- '\n '.join(['targets:', ] +
273
- yaml.safe_dump(
274
- total_unnuked,
275
- default_flow_style=False).splitlines()))
276
-
277
-
278
- def nuke_one(ctx, target, should_unlock, synch_clocks, reboot_all,
279
- check_locks, noipmi):
280
- ret = None
281
- ctx = argparse.Namespace(
282
- config=dict(targets=target),
283
- owner=ctx.owner,
284
- check_locks=check_locks,
285
- synch_clocks=synch_clocks,
286
- reboot_all=reboot_all,
287
- teuthology_config=config.to_dict(),
288
- name=ctx.name,
289
- noipmi=noipmi,
13
+ log.info("Clearing teuthology firewall rules...")
14
+ ctx.cluster.run(
15
+ args=[
16
+ "sudo", "sh", "-c",
17
+ "iptables-save | grep -v teuthology | iptables-restore"
18
+ ],
290
19
  )
291
- try:
292
- nuke_helper(ctx, should_unlock)
293
- except Exception:
294
- log.exception('Could not nuke %s' % target)
295
- # not re-raising the so that parallel calls aren't killed
296
- ret = target
297
- else:
298
- if should_unlock:
299
- unlock_one(ctx, list(target.keys())[0], ctx.owner)
300
- return ret
301
-
302
-
303
- def nuke_helper(ctx, should_unlock):
304
- # ensure node is up with ipmi
305
- (target,) = ctx.config['targets'].keys()
306
- host = target.split('@')[-1]
307
- shortname = host.split('.')[0]
308
- if should_unlock:
309
- if is_vm(shortname):
310
- return
311
- log.debug('shortname: %s' % shortname)
312
- if ctx.check_locks:
313
- # does not check to ensure if the node is 'up'
314
- # we want to be able to nuke a downed node
315
- check_lock.check_lock(ctx, None, check_up=False)
316
- status = get_status(host)
317
- if status['machine_type'] in provision.fog.get_types():
318
- remote = Remote(host)
319
- remote.console.power_off()
320
- return
321
- elif status['machine_type'] in provision.pelagos.get_types():
322
- provision.pelagos.park_node(host)
323
- return
324
-
325
- if (not ctx.noipmi and 'ipmi_user' in config and
326
- 'vpm' not in shortname):
327
- try:
328
- check_console(host)
329
- except Exception:
330
- log.exception('')
331
- log.info("Will attempt to connect via SSH")
332
- remote = Remote(host)
333
- remote.connect()
334
- add_remotes(ctx, None)
335
- connect(ctx, None)
336
- clear_firewall(ctx)
337
- shutdown_daemons(ctx)
338
- kill_valgrind(ctx)
339
- # Try to remove packages before reboot
340
- remove_installed_packages(ctx)
341
- remotes = ctx.cluster.remotes.keys()
342
- reboot(ctx, remotes)
343
- # shutdown daemons again incase of startup
344
- shutdown_daemons(ctx)
345
- remove_osd_mounts(ctx)
346
- remove_osd_tmpfs(ctx)
347
- kill_hadoop(ctx)
348
- remove_ceph_packages(ctx)
349
- synch_clocks(remotes)
350
- unlock_firmware_repo(ctx)
351
- remove_configuration_files(ctx)
352
- undo_multipath(ctx)
353
- reset_syslog_dir(ctx)
354
- remove_ceph_data(ctx)
355
- remove_testing_tree(ctx)
356
- remove_yum_timedhosts(ctx)
357
- # Once again remove packages after reboot
358
- remove_installed_packages(ctx)
359
- log.info('Installed packages removed.')
20
+ log.info("Cleared teuthology firewall rules.")
@@ -123,7 +123,7 @@ class OpenStackInstance(object):
123
123
  with safe_while(sleep=2, tries=30,
124
124
  action="get ip " + self['id']) as proceed:
125
125
  while proceed():
126
- found = re.match('.*\d+', self['addresses'])
126
+ found = re.match(r'.*\d+', self['addresses'])
127
127
  if found:
128
128
  return self['addresses']
129
129
  self.set_info()
@@ -165,7 +165,7 @@ class OpenStackInstance(object):
165
165
  self.private_ip = self.get_ip_neutron()
166
166
  except Exception as e:
167
167
  log.debug("ignoring get_ip_neutron exception " + str(e))
168
- self.private_ip = re.findall(network + '=([\d.]+)',
168
+ self.private_ip = re.findall(network + r'=([\d.]+)',
169
169
  self.get_addresses())[0]
170
170
  return self.private_ip
171
171
 
@@ -1026,7 +1026,8 @@ ssh access : ssh {identity}{username}@{ip} # logs in /usr/share/nginx/
1026
1026
  cluster, based on a template where the OpenStack credentials
1027
1027
  and a few other values are substituted.
1028
1028
  """
1029
- path = tempfile.mktemp()
1029
+ fd, path = tempfile.mkstemp()
1030
+ os.close(fd)
1030
1031
 
1031
1032
  with open(os.path.dirname(__file__) + '/bootstrap-teuthology.sh', 'rb') as f:
1032
1033
  b64_bootstrap = base64.b64encode(f.read())
@@ -6,7 +6,7 @@ bootcmd:
6
6
  - ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
7
7
  - hostname $(cat /etc/hostname)
8
8
  - ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
9
- # See https://github.com/ceph/ceph-cm-ansible/blob/master/roles/cobbler/templates/snippets/cephlab_user
9
+ # See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
10
10
  - ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
11
11
  preserve_hostname: true
12
12
  system_info:
@@ -6,7 +6,7 @@ bootcmd:
6
6
  - ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
7
7
  - hostname $(cat /etc/hostname)
8
8
  - ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
9
- # See https://github.com/ceph/ceph-cm-ansible/blob/master/roles/cobbler/templates/snippets/cephlab_user
9
+ # See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
10
10
  - ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
11
11
  preserve_hostname: true
12
12
  system_info:
@@ -6,7 +6,7 @@ bootcmd:
6
6
  - ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
7
7
  - hostname $(cat /etc/hostname)
8
8
  - ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
9
- # See https://github.com/ceph/ceph-cm-ansible/blob/master/roles/cobbler/templates/snippets/cephlab_user
9
+ # See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
10
10
  - ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
11
11
  preserve_hostname: true
12
12
  system_info:
@@ -18,7 +18,7 @@ packages:
18
18
  - git
19
19
  - ntp
20
20
  runcmd:
21
- # See https://github.com/ceph/ceph-cm-ansible/blob/master/roles/cobbler/templates/snippets/cephlab_user
21
+ # See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
22
22
  - ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
23
23
  - echo '{username} ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
24
24
  final_message: "{up}, after $UPTIME seconds"
@@ -6,7 +6,7 @@ bootcmd:
6
6
  - ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
7
7
  - hostname $(cat /etc/hostname)
8
8
  - ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
9
- # See https://github.com/ceph/ceph-cm-ansible/blob/master/roles/cobbler/templates/snippets/cephlab_user
9
+ # See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
10
10
  - ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
11
11
  preserve_hostname: true
12
12
  users:
@@ -1,2 +1 @@
1
1
  SHELL=/bin/bash
2
- */30 * * * * ( date ; source $HOME/openrc.sh ; time timeout 900 $HOME/teuthology/virtualenv/bin/teuthology-nuke --stale-openstack ) >> $HOME/cron.log 2>&1
@@ -2,8 +2,7 @@
2
2
  Cluster definition
3
3
  part of context, Cluster is used to save connection information.
4
4
  """
5
- import teuthology.misc
6
-
5
+ from teuthology.orchestra import run
7
6
 
8
7
  class Cluster(object):
9
8
  """
@@ -50,20 +49,49 @@ class Cluster(object):
50
49
  )
51
50
  self.remotes[remote] = list(roles)
52
51
 
53
- def run(self, **kwargs):
52
+ def run(self, wait=True, parallel=False, **kwargs):
54
53
  """
55
54
  Run a command on all the nodes in this cluster.
56
55
 
57
56
  Goes through nodes in alphabetical order.
58
57
 
59
- If you don't specify wait=False, this will be sequentially.
58
+ The default usage is when parallel=False and wait=True,
59
+ which is a sequential run for each node one by one.
60
+
61
+ If you specify parallel=True, it will be in parallel.
62
+
63
+ If you specify wait=False, it returns immediately.
64
+ Since it is not possible to run sequentially and
65
+ do not wait each command run finished, the parallel value
66
+ is ignored and treated as True.
60
67
 
61
68
  Returns a list of `RemoteProcess`.
62
69
  """
70
+ # -+-------+----------+----------+------------+---------------
71
+ # | wait | parallel | run.wait | remote.run | comments
72
+ # -+-------+----------+----------+------------+---------------
73
+ # 1|*True |*False | no | wait=True | sequentially
74
+ # 2| True | True | yes | wait=False | parallel
75
+ # 3| False | True | no | wait=False | parallel
76
+ # 4| False | False | no | wait=False | same as above
77
+
78
+ # We always run in parallel if wait=False,
79
+ # that is why (4) is equivalent to (3).
80
+
81
+ # We wait from remote.run only if run sequentially.
82
+ _wait = (parallel == False and wait == True)
83
+
63
84
  remotes = sorted(self.remotes.keys(), key=lambda rem: rem.name)
64
- return [remote.run(**kwargs) for remote in remotes]
85
+ procs = [remote.run(**kwargs, wait=_wait) for remote in remotes]
65
86
 
66
- def sh(self, **kwargs):
87
+ # We do run.wait only if parallel=True, because if parallel=False,
88
+ # we have run sequentially and all processes are complete.
89
+
90
+ if parallel and wait:
91
+ run.wait(procs)
92
+ return procs
93
+
94
+ def sh(self, script, **kwargs):
67
95
  """
68
96
  Run a command on all the nodes in this cluster.
69
97
 
@@ -72,7 +100,7 @@ class Cluster(object):
72
100
  Returns a list of the command outputs correspondingly.
73
101
  """
74
102
  remotes = sorted(self.remotes.keys(), key=lambda rem: rem.name)
75
- return [remote.sh(**kwargs) for remote in remotes]
103
+ return [remote.sh(script, **kwargs) for remote in remotes]
76
104
 
77
105
  def write_file(self, file_name, content, sudo=False, perms=None, owner=None):
78
106
  """
@@ -86,11 +114,12 @@ class Cluster(object):
86
114
  remotes = sorted(self.remotes.keys(), key=lambda rem: rem.name)
87
115
  for remote in remotes:
88
116
  if sudo:
89
- teuthology.misc.sudo_write_file(remote, file_name, content, perms=perms, owner=owner)
117
+ remote.write_file(file_name, content,
118
+ sudo=True, mode=perms, owner=owner)
90
119
  else:
91
120
  if perms is not None or owner is not None:
92
121
  raise ValueError("To specify perms or owner, sudo must be True")
93
- teuthology.misc.write_file(remote, file_name, content)
122
+ remote.write_file(file_name, content)
94
123
 
95
124
  def only(self, *roles):
96
125
  """
@@ -144,3 +173,16 @@ class Cluster(object):
144
173
  if remote not in matches.remotes:
145
174
  c.add(remote, has_roles)
146
175
  return c
176
+
177
+ def filter(self, func):
178
+ """
179
+ Return a cluster whose remotes are filtered by `func`.
180
+
181
+ Example::
182
+ cluster = ctx.cluster.filter(lambda r: r.is_online)
183
+ """
184
+ result = self.__class__()
185
+ for rem, roles in self.remotes.items():
186
+ if func(rem):
187
+ result.add(rem, roles)
188
+ return result
@@ -1,13 +1,13 @@
1
1
  """
2
2
  Connection utilities
3
3
  """
4
- import base64
5
4
  import paramiko
6
5
  import os
7
6
  import logging
8
7
 
9
8
  from teuthology.config import config
10
9
  from teuthology.contextutil import safe_while
10
+ from paramiko.hostkeys import HostKeyEntry
11
11
 
12
12
  log = logging.getLogger(__name__)
13
13
 
@@ -29,14 +29,11 @@ def create_key(keytype, key):
29
29
  """
30
30
  Create an ssh-rsa, ssh-dss or ssh-ed25519 key.
31
31
  """
32
- if keytype == 'ssh-rsa':
33
- return paramiko.rsakey.RSAKey(data=base64.decodestring(key.encode()))
34
- elif keytype == 'ssh-dss':
35
- return paramiko.dsskey.DSSKey(data=base64.decodestring(key.encode()))
36
- elif keytype == 'ssh-ed25519':
37
- return paramiko.ed25519key.Ed25519Key(data=base64.decodestring(key.encode()))
38
- else:
39
- raise ValueError('keytype must be ssh-rsa, ssh-dss (DSA) or ssh-ed25519')
32
+ l = "{hostname} {keytype} {key}".format(hostname="x", keytype=keytype, key=key)
33
+
34
+ ke = HostKeyEntry.from_line(l)
35
+ assert ke, f'invalid host key "{keytype} {key}"'
36
+ return ke.key
40
37
 
41
38
 
42
39
  def connect(user_at_host, host_key=None, keep_alive=False, timeout=60,
@@ -82,13 +79,17 @@ def connect(user_at_host, host_key=None, keep_alive=False, timeout=60,
82
79
  timeout=timeout
83
80
  )
84
81
 
85
- ssh_config_path = os.path.expanduser("~/.ssh/config")
86
- if os.path.exists(ssh_config_path):
82
+ key_filename = key_filename or config.ssh_key
83
+ ssh_config_path = config.ssh_config_path or "~/.ssh/config"
84
+ ssh_config_path = os.path.expanduser(ssh_config_path)
85
+ if not key_filename and os.path.exists(ssh_config_path):
87
86
  ssh_config = paramiko.SSHConfig()
88
87
  ssh_config.parse(open(ssh_config_path))
89
88
  opts = ssh_config.lookup(host)
90
89
  if not key_filename and 'identityfile' in opts:
91
90
  key_filename = opts['identityfile']
91
+ if 'hostname' in opts:
92
+ connect_args['hostname'] = opts['hostname']
92
93
 
93
94
  if key_filename:
94
95
  if not isinstance(key_filename, list):
@@ -101,14 +102,20 @@ def connect(user_at_host, host_key=None, keep_alive=False, timeout=60,
101
102
  if not retry:
102
103
  ssh.connect(**connect_args)
103
104
  else:
104
- # Retries are implemented using safe_while
105
- with safe_while(sleep=1, action='connect to ' + host) as proceed:
105
+ with safe_while(sleep=1, increment=3, action='connect to ' + host) as proceed:
106
106
  while proceed():
107
+ auth_err_msg = f"Error authenticating with {host}"
107
108
  try:
108
109
  ssh.connect(**connect_args)
109
110
  break
110
- except paramiko.AuthenticationException:
111
- log.exception(
112
- "Error connecting to {host}".format(host=host))
111
+ except EOFError:
112
+ log.error(f"{auth_err_msg}: EOFError")
113
+ except paramiko.AuthenticationException as e:
114
+ log.error(f"{auth_err_msg}: {repr(e)}")
115
+ except paramiko.SSHException as e:
116
+ auth_err_msg = f"{auth_err_msg}: {repr(e)}"
117
+ if not key_filename:
118
+ auth_err_msg = f"{auth_err_msg} (No SSH private key found!)"
119
+ log.exception(auth_err_msg)
113
120
  ssh.get_transport().set_keepalive(keep_alive)
114
121
  return ssh