teuthology 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scripts/describe.py +1 -0
- scripts/dispatcher.py +55 -26
- scripts/exporter.py +18 -0
- scripts/lock.py +1 -1
- scripts/node_cleanup.py +58 -0
- scripts/openstack.py +9 -9
- scripts/results.py +12 -11
- scripts/schedule.py +4 -0
- scripts/suite.py +57 -16
- scripts/supervisor.py +44 -0
- scripts/update_inventory.py +10 -4
- teuthology/__init__.py +24 -26
- teuthology/beanstalk.py +4 -3
- teuthology/config.py +16 -6
- teuthology/contextutil.py +18 -14
- teuthology/describe_tests.py +25 -18
- teuthology/dispatcher/__init__.py +210 -35
- teuthology/dispatcher/supervisor.py +140 -58
- teuthology/exceptions.py +43 -0
- teuthology/exporter.py +347 -0
- teuthology/kill.py +76 -81
- teuthology/lock/cli.py +3 -3
- teuthology/lock/ops.py +135 -61
- teuthology/lock/query.py +61 -44
- teuthology/ls.py +1 -1
- teuthology/misc.py +61 -75
- teuthology/nuke/__init__.py +12 -353
- teuthology/openstack/__init__.py +4 -3
- teuthology/openstack/openstack-centos-7.0-user-data.txt +1 -1
- teuthology/openstack/openstack-centos-7.1-user-data.txt +1 -1
- teuthology/openstack/openstack-centos-7.2-user-data.txt +1 -1
- teuthology/openstack/openstack-debian-8.0-user-data.txt +1 -1
- teuthology/openstack/openstack-opensuse-42.1-user-data.txt +1 -1
- teuthology/openstack/openstack-teuthology.cron +0 -1
- teuthology/orchestra/cluster.py +49 -7
- teuthology/orchestra/connection.py +16 -5
- teuthology/orchestra/console.py +111 -50
- teuthology/orchestra/daemon/cephadmunit.py +17 -4
- teuthology/orchestra/daemon/state.py +8 -1
- teuthology/orchestra/daemon/systemd.py +4 -4
- teuthology/orchestra/opsys.py +30 -11
- teuthology/orchestra/remote.py +405 -338
- teuthology/orchestra/run.py +3 -3
- teuthology/packaging.py +19 -16
- teuthology/provision/__init__.py +30 -10
- teuthology/provision/cloud/openstack.py +12 -6
- teuthology/provision/cloud/util.py +1 -2
- teuthology/provision/downburst.py +4 -3
- teuthology/provision/fog.py +68 -20
- teuthology/provision/openstack.py +5 -4
- teuthology/provision/pelagos.py +1 -1
- teuthology/repo_utils.py +43 -13
- teuthology/report.py +57 -35
- teuthology/results.py +5 -3
- teuthology/run.py +13 -14
- teuthology/run_tasks.py +27 -43
- teuthology/schedule.py +4 -3
- teuthology/scrape.py +28 -22
- teuthology/suite/__init__.py +74 -45
- teuthology/suite/build_matrix.py +34 -24
- teuthology/suite/fragment-merge.lua +105 -0
- teuthology/suite/matrix.py +31 -2
- teuthology/suite/merge.py +175 -0
- teuthology/suite/placeholder.py +6 -9
- teuthology/suite/run.py +175 -100
- teuthology/suite/util.py +64 -218
- teuthology/task/__init__.py +1 -1
- teuthology/task/ansible.py +101 -32
- teuthology/task/buildpackages.py +2 -2
- teuthology/task/ceph_ansible.py +13 -6
- teuthology/task/cephmetrics.py +2 -1
- teuthology/task/clock.py +33 -14
- teuthology/task/exec.py +18 -0
- teuthology/task/hadoop.py +2 -2
- teuthology/task/install/__init__.py +29 -7
- teuthology/task/install/bin/adjust-ulimits +16 -0
- teuthology/task/install/bin/daemon-helper +114 -0
- teuthology/task/install/bin/stdin-killer +263 -0
- teuthology/task/install/deb.py +1 -1
- teuthology/task/install/rpm.py +17 -5
- teuthology/task/install/util.py +3 -3
- teuthology/task/internal/__init__.py +41 -10
- teuthology/task/internal/edit_sudoers.sh +10 -0
- teuthology/task/internal/lock_machines.py +2 -9
- teuthology/task/internal/redhat.py +31 -1
- teuthology/task/internal/syslog.py +31 -8
- teuthology/task/kernel.py +152 -145
- teuthology/task/lockfile.py +1 -1
- teuthology/task/mpi.py +10 -10
- teuthology/task/pcp.py +1 -1
- teuthology/task/selinux.py +16 -8
- teuthology/task/ssh_keys.py +4 -4
- teuthology/task/tests/__init__.py +137 -77
- teuthology/task/tests/test_fetch_coredumps.py +116 -0
- teuthology/task/tests/test_run.py +4 -4
- teuthology/timer.py +3 -3
- teuthology/util/loggerfile.py +19 -0
- teuthology/util/scanner.py +159 -0
- teuthology/util/sentry.py +52 -0
- teuthology/util/time.py +52 -0
- teuthology-1.2.0.data/scripts/adjust-ulimits +16 -0
- teuthology-1.2.0.data/scripts/daemon-helper +114 -0
- teuthology-1.2.0.data/scripts/stdin-killer +263 -0
- teuthology-1.2.0.dist-info/METADATA +89 -0
- teuthology-1.2.0.dist-info/RECORD +174 -0
- {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/WHEEL +1 -1
- {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/entry_points.txt +3 -2
- scripts/nuke.py +0 -47
- scripts/worker.py +0 -37
- teuthology/nuke/actions.py +0 -456
- teuthology/openstack/test/__init__.py +0 -0
- teuthology/openstack/test/openstack-integration.py +0 -286
- teuthology/openstack/test/test_config.py +0 -35
- teuthology/openstack/test/test_openstack.py +0 -1695
- teuthology/orchestra/test/__init__.py +0 -0
- teuthology/orchestra/test/integration/__init__.py +0 -0
- teuthology/orchestra/test/integration/test_integration.py +0 -94
- teuthology/orchestra/test/test_cluster.py +0 -240
- teuthology/orchestra/test/test_connection.py +0 -106
- teuthology/orchestra/test/test_console.py +0 -217
- teuthology/orchestra/test/test_opsys.py +0 -404
- teuthology/orchestra/test/test_remote.py +0 -185
- teuthology/orchestra/test/test_run.py +0 -286
- teuthology/orchestra/test/test_systemd.py +0 -54
- teuthology/orchestra/test/util.py +0 -12
- teuthology/test/__init__.py +0 -0
- teuthology/test/fake_archive.py +0 -107
- teuthology/test/fake_fs.py +0 -92
- teuthology/test/integration/__init__.py +0 -0
- teuthology/test/integration/test_suite.py +0 -86
- teuthology/test/task/__init__.py +0 -205
- teuthology/test/task/test_ansible.py +0 -624
- teuthology/test/task/test_ceph_ansible.py +0 -176
- teuthology/test/task/test_console_log.py +0 -88
- teuthology/test/task/test_install.py +0 -337
- teuthology/test/task/test_internal.py +0 -57
- teuthology/test/task/test_kernel.py +0 -243
- teuthology/test/task/test_pcp.py +0 -379
- teuthology/test/task/test_selinux.py +0 -35
- teuthology/test/test_config.py +0 -189
- teuthology/test/test_contextutil.py +0 -68
- teuthology/test/test_describe_tests.py +0 -316
- teuthology/test/test_email_sleep_before_teardown.py +0 -81
- teuthology/test/test_exit.py +0 -97
- teuthology/test/test_get_distro.py +0 -47
- teuthology/test/test_get_distro_version.py +0 -47
- teuthology/test/test_get_multi_machine_types.py +0 -27
- teuthology/test/test_job_status.py +0 -60
- teuthology/test/test_ls.py +0 -48
- teuthology/test/test_misc.py +0 -391
- teuthology/test/test_nuke.py +0 -290
- teuthology/test/test_packaging.py +0 -763
- teuthology/test/test_parallel.py +0 -28
- teuthology/test/test_repo_utils.py +0 -225
- teuthology/test/test_report.py +0 -77
- teuthology/test/test_results.py +0 -155
- teuthology/test/test_run.py +0 -239
- teuthology/test/test_safepath.py +0 -55
- teuthology/test/test_schedule.py +0 -45
- teuthology/test/test_scrape.py +0 -167
- teuthology/test/test_timer.py +0 -80
- teuthology/test/test_vps_os_vers_parameter_checking.py +0 -84
- teuthology/test/test_worker.py +0 -303
- teuthology/worker.py +0 -354
- teuthology-1.1.0.dist-info/METADATA +0 -76
- teuthology-1.1.0.dist-info/RECORD +0 -213
- {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/LICENSE +0 -0
- {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/top_level.txt +0 -0
teuthology/openstack/__init__.py
CHANGED
@@ -123,7 +123,7 @@ class OpenStackInstance(object):
|
|
123
123
|
with safe_while(sleep=2, tries=30,
|
124
124
|
action="get ip " + self['id']) as proceed:
|
125
125
|
while proceed():
|
126
|
-
found = re.match('.*\d+', self['addresses'])
|
126
|
+
found = re.match(r'.*\d+', self['addresses'])
|
127
127
|
if found:
|
128
128
|
return self['addresses']
|
129
129
|
self.set_info()
|
@@ -165,7 +165,7 @@ class OpenStackInstance(object):
|
|
165
165
|
self.private_ip = self.get_ip_neutron()
|
166
166
|
except Exception as e:
|
167
167
|
log.debug("ignoring get_ip_neutron exception " + str(e))
|
168
|
-
self.private_ip = re.findall(network + '=([\d.]+)',
|
168
|
+
self.private_ip = re.findall(network + r'=([\d.]+)',
|
169
169
|
self.get_addresses())[0]
|
170
170
|
return self.private_ip
|
171
171
|
|
@@ -1026,7 +1026,8 @@ ssh access : ssh {identity}{username}@{ip} # logs in /usr/share/nginx/
|
|
1026
1026
|
cluster, based on a template where the OpenStack credentials
|
1027
1027
|
and a few other values are substituted.
|
1028
1028
|
"""
|
1029
|
-
path = tempfile.
|
1029
|
+
fd, path = tempfile.mkstemp()
|
1030
|
+
os.close(fd)
|
1030
1031
|
|
1031
1032
|
with open(os.path.dirname(__file__) + '/bootstrap-teuthology.sh', 'rb') as f:
|
1032
1033
|
b64_bootstrap = base64.b64encode(f.read())
|
@@ -6,7 +6,7 @@ bootcmd:
|
|
6
6
|
- ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
|
7
7
|
- hostname $(cat /etc/hostname)
|
8
8
|
- ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
|
9
|
-
# See https://github.com/ceph/ceph-cm-ansible/blob/
|
9
|
+
# See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
|
10
10
|
- ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
|
11
11
|
preserve_hostname: true
|
12
12
|
system_info:
|
@@ -6,7 +6,7 @@ bootcmd:
|
|
6
6
|
- ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
|
7
7
|
- hostname $(cat /etc/hostname)
|
8
8
|
- ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
|
9
|
-
# See https://github.com/ceph/ceph-cm-ansible/blob/
|
9
|
+
# See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
|
10
10
|
- ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
|
11
11
|
preserve_hostname: true
|
12
12
|
system_info:
|
@@ -6,7 +6,7 @@ bootcmd:
|
|
6
6
|
- ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
|
7
7
|
- hostname $(cat /etc/hostname)
|
8
8
|
- ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
|
9
|
-
# See https://github.com/ceph/ceph-cm-ansible/blob/
|
9
|
+
# See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
|
10
10
|
- ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
|
11
11
|
preserve_hostname: true
|
12
12
|
system_info:
|
@@ -18,7 +18,7 @@ packages:
|
|
18
18
|
- git
|
19
19
|
- ntp
|
20
20
|
runcmd:
|
21
|
-
# See https://github.com/ceph/ceph-cm-ansible/blob/
|
21
|
+
# See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
|
22
22
|
- ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
|
23
23
|
- echo '{username} ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
|
24
24
|
final_message: "{up}, after $UPTIME seconds"
|
@@ -6,7 +6,7 @@ bootcmd:
|
|
6
6
|
- ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
|
7
7
|
- hostname $(cat /etc/hostname)
|
8
8
|
- ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
|
9
|
-
# See https://github.com/ceph/ceph-cm-ansible/blob/
|
9
|
+
# See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
|
10
10
|
- ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
|
11
11
|
preserve_hostname: true
|
12
12
|
users:
|
teuthology/orchestra/cluster.py
CHANGED
@@ -2,8 +2,7 @@
|
|
2
2
|
Cluster definition
|
3
3
|
part of context, Cluster is used to save connection information.
|
4
4
|
"""
|
5
|
-
|
6
|
-
|
5
|
+
from teuthology.orchestra import run
|
7
6
|
|
8
7
|
class Cluster(object):
|
9
8
|
"""
|
@@ -50,18 +49,47 @@ class Cluster(object):
|
|
50
49
|
)
|
51
50
|
self.remotes[remote] = list(roles)
|
52
51
|
|
53
|
-
def run(self, **kwargs):
|
52
|
+
def run(self, wait=True, parallel=False, **kwargs):
|
54
53
|
"""
|
55
54
|
Run a command on all the nodes in this cluster.
|
56
55
|
|
57
56
|
Goes through nodes in alphabetical order.
|
58
57
|
|
59
|
-
|
58
|
+
The default usage is when parallel=False and wait=True,
|
59
|
+
which is a sequential run for each node one by one.
|
60
|
+
|
61
|
+
If you specify parallel=True, it will be in parallel.
|
62
|
+
|
63
|
+
If you specify wait=False, it returns immediately.
|
64
|
+
Since it is not possible to run sequentially and
|
65
|
+
do not wait each command run finished, the parallel value
|
66
|
+
is ignored and treated as True.
|
60
67
|
|
61
68
|
Returns a list of `RemoteProcess`.
|
62
69
|
"""
|
70
|
+
# -+-------+----------+----------+------------+---------------
|
71
|
+
# | wait | parallel | run.wait | remote.run | comments
|
72
|
+
# -+-------+----------+----------+------------+---------------
|
73
|
+
# 1|*True |*False | no | wait=True | sequentially
|
74
|
+
# 2| True | True | yes | wait=False | parallel
|
75
|
+
# 3| False | True | no | wait=False | parallel
|
76
|
+
# 4| False | False | no | wait=False | same as above
|
77
|
+
|
78
|
+
# We always run in parallel if wait=False,
|
79
|
+
# that is why (4) is equivalent to (3).
|
80
|
+
|
81
|
+
# We wait from remote.run only if run sequentially.
|
82
|
+
_wait = (parallel == False and wait == True)
|
83
|
+
|
63
84
|
remotes = sorted(self.remotes.keys(), key=lambda rem: rem.name)
|
64
|
-
|
85
|
+
procs = [remote.run(**kwargs, wait=_wait) for remote in remotes]
|
86
|
+
|
87
|
+
# We do run.wait only if parallel=True, because if parallel=False,
|
88
|
+
# we have run sequentially and all processes are complete.
|
89
|
+
|
90
|
+
if parallel and wait:
|
91
|
+
run.wait(procs)
|
92
|
+
return procs
|
65
93
|
|
66
94
|
def sh(self, script, **kwargs):
|
67
95
|
"""
|
@@ -86,11 +114,12 @@ class Cluster(object):
|
|
86
114
|
remotes = sorted(self.remotes.keys(), key=lambda rem: rem.name)
|
87
115
|
for remote in remotes:
|
88
116
|
if sudo:
|
89
|
-
|
117
|
+
remote.write_file(file_name, content,
|
118
|
+
sudo=True, mode=perms, owner=owner)
|
90
119
|
else:
|
91
120
|
if perms is not None or owner is not None:
|
92
121
|
raise ValueError("To specify perms or owner, sudo must be True")
|
93
|
-
|
122
|
+
remote.write_file(file_name, content)
|
94
123
|
|
95
124
|
def only(self, *roles):
|
96
125
|
"""
|
@@ -144,3 +173,16 @@ class Cluster(object):
|
|
144
173
|
if remote not in matches.remotes:
|
145
174
|
c.add(remote, has_roles)
|
146
175
|
return c
|
176
|
+
|
177
|
+
def filter(self, func):
|
178
|
+
"""
|
179
|
+
Return a cluster whose remotes are filtered by `func`.
|
180
|
+
|
181
|
+
Example::
|
182
|
+
cluster = ctx.cluster.filter(lambda r: r.is_online)
|
183
|
+
"""
|
184
|
+
result = self.__class__()
|
185
|
+
for rem, roles in self.remotes.items():
|
186
|
+
if func(rem):
|
187
|
+
result.add(rem, roles)
|
188
|
+
return result
|
@@ -79,13 +79,17 @@ def connect(user_at_host, host_key=None, keep_alive=False, timeout=60,
|
|
79
79
|
timeout=timeout
|
80
80
|
)
|
81
81
|
|
82
|
-
|
83
|
-
|
82
|
+
key_filename = key_filename or config.ssh_key
|
83
|
+
ssh_config_path = config.ssh_config_path or "~/.ssh/config"
|
84
|
+
ssh_config_path = os.path.expanduser(ssh_config_path)
|
85
|
+
if not key_filename and os.path.exists(ssh_config_path):
|
84
86
|
ssh_config = paramiko.SSHConfig()
|
85
87
|
ssh_config.parse(open(ssh_config_path))
|
86
88
|
opts = ssh_config.lookup(host)
|
87
89
|
if not key_filename and 'identityfile' in opts:
|
88
90
|
key_filename = opts['identityfile']
|
91
|
+
if 'hostname' in opts:
|
92
|
+
connect_args['hostname'] = opts['hostname']
|
89
93
|
|
90
94
|
if key_filename:
|
91
95
|
if not isinstance(key_filename, list):
|
@@ -98,13 +102,20 @@ def connect(user_at_host, host_key=None, keep_alive=False, timeout=60,
|
|
98
102
|
if not retry:
|
99
103
|
ssh.connect(**connect_args)
|
100
104
|
else:
|
101
|
-
|
102
|
-
with safe_while(sleep=1, action='connect to ' + host) as proceed:
|
105
|
+
with safe_while(sleep=1, increment=3, action='connect to ' + host) as proceed:
|
103
106
|
while proceed():
|
107
|
+
auth_err_msg = f"Error authenticating with {host}"
|
104
108
|
try:
|
105
109
|
ssh.connect(**connect_args)
|
106
110
|
break
|
111
|
+
except EOFError:
|
112
|
+
log.error(f"{auth_err_msg}: EOFError")
|
107
113
|
except paramiko.AuthenticationException as e:
|
108
|
-
log.error(f"
|
114
|
+
log.error(f"{auth_err_msg}: {repr(e)}")
|
115
|
+
except paramiko.SSHException as e:
|
116
|
+
auth_err_msg = f"{auth_err_msg}: {repr(e)}"
|
117
|
+
if not key_filename:
|
118
|
+
auth_err_msg = f"{auth_err_msg} (No SSH private key found!)"
|
119
|
+
log.exception(auth_err_msg)
|
109
120
|
ssh.get_transport().set_keepalive(keep_alive)
|
110
121
|
return ssh
|
teuthology/orchestra/console.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import io
|
1
2
|
import logging
|
2
3
|
import os
|
3
4
|
import pexpect
|
@@ -6,6 +7,8 @@ import subprocess
|
|
6
7
|
import sys
|
7
8
|
import time
|
8
9
|
|
10
|
+
from typing import Union, Literal, Optional
|
11
|
+
|
9
12
|
import teuthology.lock.query
|
10
13
|
import teuthology.lock.util
|
11
14
|
from teuthology.config import config
|
@@ -19,6 +22,7 @@ except ImportError:
|
|
19
22
|
libvirt = None
|
20
23
|
|
21
24
|
log = logging.getLogger(__name__)
|
25
|
+
PowerOnOffState = Union[Literal["on"], Literal["off"]]
|
22
26
|
|
23
27
|
|
24
28
|
class RemoteConsole():
|
@@ -35,11 +39,11 @@ class PhysicalConsole(RemoteConsole):
|
|
35
39
|
Physical Console (set from getRemoteConsole)
|
36
40
|
"""
|
37
41
|
def __init__(self, name, ipmiuser=None, ipmipass=None, ipmidomain=None,
|
38
|
-
|
42
|
+
timeout=120):
|
39
43
|
self.name = name
|
40
44
|
self.shortname = self.getShortName(name)
|
45
|
+
self.log = log.getChild(self.shortname)
|
41
46
|
self.timeout = timeout
|
42
|
-
self.logfile = None
|
43
47
|
self.ipmiuser = ipmiuser or config.ipmi_user
|
44
48
|
self.ipmipass = ipmipass or config.ipmi_password
|
45
49
|
self.ipmidomain = ipmidomain or config.ipmi_domain
|
@@ -71,11 +75,14 @@ class PhysicalConsole(RemoteConsole):
|
|
71
75
|
"""
|
72
76
|
Run a command using pexpect.spawn(). Return the child object.
|
73
77
|
"""
|
74
|
-
log.debug('pexpect command: %s', cmd)
|
75
|
-
|
78
|
+
self.log.debug('pexpect command: %s', cmd)
|
79
|
+
p = pexpect.spawn(
|
76
80
|
cmd,
|
77
|
-
|
81
|
+
encoding='utf-8',
|
82
|
+
codec_errors="backslashreplace",
|
78
83
|
)
|
84
|
+
p.logfile_read = io.StringIO()
|
85
|
+
return p
|
79
86
|
|
80
87
|
def _get_console(self, readonly=True):
|
81
88
|
def start():
|
@@ -84,7 +91,7 @@ class PhysicalConsole(RemoteConsole):
|
|
84
91
|
|
85
92
|
child = start()
|
86
93
|
if self.has_conserver and not child.isalive():
|
87
|
-
log.error("conserver failed to get the console; will try ipmitool")
|
94
|
+
self.log.error("conserver failed to get the console; will try ipmitool")
|
88
95
|
self.has_conserver = False
|
89
96
|
child = start()
|
90
97
|
return child
|
@@ -114,7 +121,7 @@ class PhysicalConsole(RemoteConsole):
|
|
114
121
|
|
115
122
|
def _check_ipmi_credentials(self):
|
116
123
|
if not self.has_ipmi_credentials:
|
117
|
-
log.error(
|
124
|
+
self.log.error(
|
118
125
|
"Must set ipmi_user, ipmi_password, and ipmi_domain in "
|
119
126
|
".teuthology.yaml"
|
120
127
|
)
|
@@ -129,41 +136,108 @@ class PhysicalConsole(RemoteConsole):
|
|
129
136
|
timeout=t)
|
130
137
|
if r != 0:
|
131
138
|
child.kill(15)
|
139
|
+
self.log.debug('console disconnect output: %s', child.logfile_read.getvalue().strip())
|
132
140
|
else:
|
133
141
|
child.send('~.')
|
134
142
|
r = child.expect(
|
135
143
|
['terminated ipmitool', pexpect.TIMEOUT, pexpect.EOF],
|
136
144
|
timeout=t)
|
145
|
+
self.log.debug('ipmitool disconnect output: %s', child.logfile_read.getvalue().strip())
|
137
146
|
if r != 0:
|
138
147
|
self._pexpect_spawn_ipmi('sol deactivate')
|
148
|
+
self.log.debug('sol deactivate output: %s', child.logfile_read.getvalue().strip())
|
139
149
|
|
140
150
|
def _wait_for_login(self, timeout=None, attempts=2):
|
141
151
|
"""
|
142
152
|
Wait for login. Retry if timeouts occur on commands.
|
143
153
|
"""
|
144
154
|
t = timeout or self.timeout
|
145
|
-
log.debug('Waiting for login prompt
|
155
|
+
self.log.debug('Waiting for login prompt')
|
146
156
|
# wait for login prompt to indicate boot completed
|
147
157
|
for i in range(0, attempts):
|
148
158
|
start = time.time()
|
149
159
|
while time.time() - start < t:
|
150
160
|
child = self._get_console(readonly=False)
|
151
161
|
child.send('\n')
|
152
|
-
log.debug('expect: {s} login'.format(s=self.shortname))
|
153
162
|
r = child.expect(
|
154
163
|
['{s} login: '.format(s=self.shortname),
|
155
164
|
pexpect.TIMEOUT,
|
156
165
|
pexpect.EOF],
|
157
166
|
timeout=(t - (time.time() - start)))
|
158
|
-
log.debug('expect before: {b}'.format(b=child.before))
|
159
|
-
log.debug('expect after: {a}'.format(a=child.after))
|
167
|
+
self.log.debug('expect before: {b}'.format(b=child.before))
|
168
|
+
self.log.debug('expect after: {a}'.format(a=child.after))
|
160
169
|
|
161
170
|
self._exit_session(child)
|
162
171
|
if r == 0:
|
163
172
|
return
|
164
173
|
raise ConsoleError("Did not get a login prompt from %s!" % self.name)
|
165
174
|
|
166
|
-
def check_power(self, state,
|
175
|
+
def check_power(self, state: Literal["on","off"]):
|
176
|
+
c = self._pexpect_spawn_ipmi('power status')
|
177
|
+
r = c.expect(['Chassis Power is {s}'.format(
|
178
|
+
s=state), pexpect.EOF, pexpect.TIMEOUT], timeout=1)
|
179
|
+
self.log.debug('check power output: %s', c.logfile_read.getvalue().strip())
|
180
|
+
return r == 0
|
181
|
+
|
182
|
+
def set_power(self, state: PowerOnOffState, timeout: Optional[int]):
|
183
|
+
self.log.info(f"Power {state}")
|
184
|
+
timeout = timeout or self.timeout
|
185
|
+
sleep_time = 4
|
186
|
+
reissue_after_failures = 5
|
187
|
+
failures = 0
|
188
|
+
issued = False
|
189
|
+
succeeded = False
|
190
|
+
with safe_while(
|
191
|
+
sleep=sleep_time,
|
192
|
+
tries=int(timeout / sleep_time),
|
193
|
+
_raise=False,
|
194
|
+
action='wait for power on') as proceed:
|
195
|
+
while proceed():
|
196
|
+
if not issued:
|
197
|
+
child = self._pexpect_spawn_ipmi(f"power {state}")
|
198
|
+
rc = child.expect(
|
199
|
+
[
|
200
|
+
"Up/On" if state.lower() == "on" else "Down/Off",
|
201
|
+
pexpect.EOF
|
202
|
+
],
|
203
|
+
timeout=self.timeout
|
204
|
+
)
|
205
|
+
self.log.debug(
|
206
|
+
f"power {state} output: {child.logfile_read.getvalue().strip()}"
|
207
|
+
)
|
208
|
+
if rc == 0:
|
209
|
+
issued = True
|
210
|
+
continue
|
211
|
+
|
212
|
+
if not succeeded:
|
213
|
+
child = self._pexpect_spawn_ipmi('power status')
|
214
|
+
rc = child.expect(
|
215
|
+
[
|
216
|
+
f"Chassis Power is {state}",
|
217
|
+
pexpect.EOF,
|
218
|
+
pexpect.TIMEOUT
|
219
|
+
],
|
220
|
+
timeout=1
|
221
|
+
)
|
222
|
+
self.log.debug(
|
223
|
+
f"check power output: {child.logfile_read.getvalue().strip()}"
|
224
|
+
)
|
225
|
+
if rc == 0:
|
226
|
+
succeeded = True
|
227
|
+
break
|
228
|
+
failures += 1
|
229
|
+
if failures == reissue_after_failures:
|
230
|
+
issued = False
|
231
|
+
|
232
|
+
if issued and succeeded:
|
233
|
+
self.log.info(f"Power {state} completed")
|
234
|
+
return True
|
235
|
+
raise RuntimeError(
|
236
|
+
f"Failed to power {state} {self.shortname} in {self.timeout}s"
|
237
|
+
)
|
238
|
+
return False
|
239
|
+
|
240
|
+
def check_power_retries(self, state, timeout=None):
|
167
241
|
"""
|
168
242
|
Check power. Retry if EOF encountered on power check read.
|
169
243
|
"""
|
@@ -178,6 +252,7 @@ class PhysicalConsole(RemoteConsole):
|
|
178
252
|
c = self._pexpect_spawn_ipmi('power status')
|
179
253
|
r = c.expect(['Chassis Power is {s}'.format(
|
180
254
|
s=state), pexpect.EOF, pexpect.TIMEOUT], timeout=1)
|
255
|
+
self.log.debug('check power output: %s', c.logfile_read.getvalue().strip())
|
181
256
|
if r == 0:
|
182
257
|
return True
|
183
258
|
return False
|
@@ -190,9 +265,8 @@ class PhysicalConsole(RemoteConsole):
|
|
190
265
|
# check for login prompt at console
|
191
266
|
self._wait_for_login(timeout)
|
192
267
|
return True
|
193
|
-
except Exception
|
194
|
-
log.
|
195
|
-
s=self.shortname, e=e))
|
268
|
+
except Exception:
|
269
|
+
self.log.exception('Failed to get ipmi console status')
|
196
270
|
return False
|
197
271
|
|
198
272
|
def power_cycle(self, timeout=300):
|
@@ -201,62 +275,45 @@ class PhysicalConsole(RemoteConsole):
|
|
201
275
|
|
202
276
|
:param timeout: How long to wait for login
|
203
277
|
"""
|
204
|
-
log.info('Power cycling
|
278
|
+
self.log.info('Power cycling')
|
205
279
|
child = self._pexpect_spawn_ipmi('power cycle')
|
206
280
|
child.expect('Chassis Power Control: Cycle', timeout=self.timeout)
|
281
|
+
self.log.debug('power cycle output: %s', child.logfile_read.getvalue().strip())
|
207
282
|
self._wait_for_login(timeout=timeout)
|
208
|
-
log.info('Power cycle
|
283
|
+
self.log.info('Power cycle completed')
|
209
284
|
|
210
285
|
def hard_reset(self, wait_for_login=True):
|
211
286
|
"""
|
212
287
|
Perform physical hard reset. Retry if EOF returned from read
|
213
288
|
and wait for login when complete.
|
214
289
|
"""
|
215
|
-
log.info('Performing hard reset
|
290
|
+
self.log.info('Performing hard reset')
|
216
291
|
start = time.time()
|
217
292
|
while time.time() - start < self.timeout:
|
218
293
|
child = self._pexpect_spawn_ipmi('power reset')
|
219
294
|
r = child.expect(['Chassis Power Control: Reset', pexpect.EOF],
|
220
295
|
timeout=self.timeout)
|
296
|
+
self.log.debug('power reset output: %s', child.logfile_read.getvalue().strip())
|
221
297
|
if r == 0:
|
222
298
|
break
|
223
299
|
if wait_for_login:
|
224
300
|
self._wait_for_login()
|
225
|
-
log.info('Hard reset
|
301
|
+
self.log.info('Hard reset completed')
|
226
302
|
|
227
303
|
def power_on(self):
|
228
304
|
"""
|
229
305
|
Physical power on. Loop checking cmd return.
|
230
306
|
"""
|
231
|
-
|
232
|
-
start = time.time()
|
233
|
-
while time.time() - start < self.timeout:
|
234
|
-
child = self._pexpect_spawn_ipmi('power on')
|
235
|
-
r = child.expect(['Chassis Power Control: Up/On', pexpect.EOF],
|
236
|
-
timeout=self.timeout)
|
237
|
-
if r == 0:
|
238
|
-
break
|
239
|
-
if self.check_power('on'):
|
240
|
-
log.info('Power on for {s} completed'.format(s=self.shortname))
|
241
|
-
else:
|
242
|
-
log.error('Failed to power on {s}'.format(s=self.shortname))
|
307
|
+
return self.set_power("on", timeout=None)
|
243
308
|
|
244
309
|
def power_off(self):
|
245
310
|
"""
|
246
311
|
Physical power off. Loop checking cmd return.
|
247
312
|
"""
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
r = child.expect(['Chassis Power Control: Down/Off', pexpect.EOF],
|
253
|
-
timeout=self.timeout)
|
254
|
-
if r == 0:
|
255
|
-
break
|
256
|
-
if self.check_power('off', 60):
|
257
|
-
log.info('Power off for {s} completed'.format(s=self.shortname))
|
258
|
-
else:
|
259
|
-
log.error('Failed to power off {s}'.format(s=self.shortname))
|
313
|
+
try:
|
314
|
+
return self.set_power("off", timeout=None)
|
315
|
+
except Exception:
|
316
|
+
pass
|
260
317
|
|
261
318
|
def power_off_for_interval(self, interval=30):
|
262
319
|
"""
|
@@ -264,17 +321,21 @@ class PhysicalConsole(RemoteConsole):
|
|
264
321
|
|
265
322
|
:param interval: Length of power-off period.
|
266
323
|
"""
|
267
|
-
log.info('Power off
|
268
|
-
s=self.shortname, i=interval))
|
324
|
+
self.log.info('Power off for {i} seconds'.format(i=interval))
|
269
325
|
child = self._pexpect_spawn_ipmi('power off')
|
270
326
|
child.expect('Chassis Power Control: Down/Off', timeout=self.timeout)
|
271
327
|
|
328
|
+
self.log.debug('power off output: %s', child.logfile_read.getvalue().strip())
|
329
|
+
child.logfile_read.seek(0)
|
330
|
+
child.logfile_read.truncate()
|
331
|
+
|
272
332
|
time.sleep(interval)
|
273
333
|
|
274
334
|
child = self._pexpect_spawn_ipmi('power on')
|
275
335
|
child.expect('Chassis Power Control: Up/On', timeout=self.timeout)
|
336
|
+
self.log.debug('power on output: %s', child.logfile_read.getvalue().strip())
|
276
337
|
self._wait_for_login()
|
277
|
-
log.info('Power off for {i} seconds completed'.format(i=interval))
|
338
|
+
self.log.info('Power off for {i} seconds completed'.format(i=interval))
|
278
339
|
|
279
340
|
def spawn_sol_log(self, dest_path):
|
280
341
|
"""
|
@@ -307,7 +368,7 @@ class PhysicalConsole(RemoteConsole):
|
|
307
368
|
|
308
369
|
proc = start()
|
309
370
|
if self.has_conserver and proc.poll() is not None:
|
310
|
-
log.error("conserver failed to get the console; will try ipmitool")
|
371
|
+
self.log.error("conserver failed to get the console; will try ipmitool")
|
311
372
|
self.has_conserver = False
|
312
373
|
proc = start()
|
313
374
|
return proc
|
@@ -322,6 +383,7 @@ class VirtualConsole(RemoteConsole):
|
|
322
383
|
raise RuntimeError("libvirt not found")
|
323
384
|
|
324
385
|
self.shortname = self.getShortName(name)
|
386
|
+
self.log = log.getChild(self.shortname)
|
325
387
|
status_info = teuthology.lock.query.get_status(self.shortname)
|
326
388
|
try:
|
327
389
|
if teuthology.lock.query.is_vm(status=status_info):
|
@@ -379,9 +441,8 @@ class VirtualConsole(RemoteConsole):
|
|
379
441
|
"""
|
380
442
|
Simiulate power off for an interval.
|
381
443
|
"""
|
382
|
-
log.info('Power off
|
383
|
-
s=self.shortname, i=interval))
|
444
|
+
self.log.info('Power off for {i} seconds'.format(i=interval))
|
384
445
|
self.vm_domain.info().destroy()
|
385
446
|
time.sleep(interval)
|
386
447
|
self.vm_domain.info().create()
|
387
|
-
log.info('Power off for {i} seconds completed'.format(i=interval))
|
448
|
+
self.log.info('Power off for {i} seconds completed'.format(i=interval))
|
@@ -35,9 +35,9 @@ class CephadmUnit(DaemonState):
|
|
35
35
|
|
36
36
|
def kill_cmd(self, sig):
|
37
37
|
return ' '.join([
|
38
|
-
'sudo', '
|
38
|
+
'sudo', 'systemctl', 'kill',
|
39
39
|
'-s', str(int(sig)),
|
40
|
-
'ceph-%s
|
40
|
+
'ceph-%s@%s.%s' % (self.fsid, self.type_, self.id_),
|
41
41
|
])
|
42
42
|
|
43
43
|
def _start_logger(self):
|
@@ -112,6 +112,18 @@ class CephadmUnit(DaemonState):
|
|
112
112
|
"""
|
113
113
|
return self.is_started
|
114
114
|
|
115
|
+
def finished(self):
|
116
|
+
"""
|
117
|
+
Is the daemon finished?
|
118
|
+
Return False if active.
|
119
|
+
"""
|
120
|
+
proc = self.remote.run(
|
121
|
+
args=self.status_cmd,
|
122
|
+
check_status=False,
|
123
|
+
quiet=True,
|
124
|
+
)
|
125
|
+
return proc.returncode != 0
|
126
|
+
|
115
127
|
def signal(self, sig, silent=False):
|
116
128
|
"""
|
117
129
|
Send a signal to associated remote command
|
@@ -132,11 +144,12 @@ class CephadmUnit(DaemonState):
|
|
132
144
|
Start this daemon instance.
|
133
145
|
"""
|
134
146
|
if self.running():
|
135
|
-
self.log.
|
147
|
+
self.log.warning('Restarting a running daemon')
|
136
148
|
self.restart()
|
137
149
|
return
|
138
150
|
self._start_logger()
|
139
|
-
self.remote.run(self.start_cmd)
|
151
|
+
self.remote.run(args=self.start_cmd)
|
152
|
+
self.is_started = True
|
140
153
|
|
141
154
|
def stop(self, timeout=300):
|
142
155
|
"""
|
@@ -97,6 +97,13 @@ class DaemonState(object):
|
|
97
97
|
"""
|
98
98
|
return self.proc is not None
|
99
99
|
|
100
|
+
def finished(self):
|
101
|
+
"""
|
102
|
+
Is the daemon finished?
|
103
|
+
Return False if active.
|
104
|
+
"""
|
105
|
+
return self.proc.finished if self.proc is not None else False
|
106
|
+
|
100
107
|
def signal(self, sig, silent=False):
|
101
108
|
"""
|
102
109
|
Send a signal to associated remote command.
|
@@ -118,7 +125,7 @@ class DaemonState(object):
|
|
118
125
|
Start this daemon instance.
|
119
126
|
"""
|
120
127
|
if self.running():
|
121
|
-
self.log.
|
128
|
+
self.log.warning('Restarting a running daemon')
|
122
129
|
self.restart()
|
123
130
|
|
124
131
|
def stop(self, timeout=300):
|
@@ -86,7 +86,7 @@ class SystemDState(DaemonState):
|
|
86
86
|
self.status_cmd + " | grep 'Main.*code=exited'",
|
87
87
|
)
|
88
88
|
line = out.strip().split('\n')[-1]
|
89
|
-
exit_code = int(re.match('.*status=(\d+).*', line).groups()[0])
|
89
|
+
exit_code = int(re.match(r'.*status=(\d+).*', line).groups()[0])
|
90
90
|
if exit_code:
|
91
91
|
self.remote.run(
|
92
92
|
args=self.output_cmd
|
@@ -156,7 +156,7 @@ class SystemDState(DaemonState):
|
|
156
156
|
|
157
157
|
:param extra_args: Extra keyword arguments to be added.
|
158
158
|
"""
|
159
|
-
self.log.
|
159
|
+
self.log.warning(
|
160
160
|
"restart_with_args() is not supported with systemd; performing"
|
161
161
|
"normal restart")
|
162
162
|
self.restart()
|
@@ -180,7 +180,7 @@ class SystemDState(DaemonState):
|
|
180
180
|
|
181
181
|
:param sig: signal to send
|
182
182
|
"""
|
183
|
-
self.log.
|
183
|
+
self.log.warning("systemd may restart daemons automatically")
|
184
184
|
pid = self.pid
|
185
185
|
self.log.info("Sending signal %s to process %s", sig, pid)
|
186
186
|
sig = '-' + str(sig)
|
@@ -191,7 +191,7 @@ class SystemDState(DaemonState):
|
|
191
191
|
Start this daemon instance.
|
192
192
|
"""
|
193
193
|
if self.running():
|
194
|
-
self.log.
|
194
|
+
self.log.warning('Restarting a running daemon')
|
195
195
|
self.restart()
|
196
196
|
return
|
197
197
|
self.remote.run(args=[run.Raw(self.start_cmd)])
|