teuthology 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. scripts/describe.py +1 -0
  2. scripts/dispatcher.py +55 -26
  3. scripts/exporter.py +18 -0
  4. scripts/lock.py +1 -1
  5. scripts/node_cleanup.py +58 -0
  6. scripts/openstack.py +9 -9
  7. scripts/results.py +12 -11
  8. scripts/schedule.py +4 -0
  9. scripts/suite.py +57 -16
  10. scripts/supervisor.py +44 -0
  11. scripts/update_inventory.py +10 -4
  12. teuthology/__init__.py +24 -26
  13. teuthology/beanstalk.py +4 -3
  14. teuthology/config.py +16 -6
  15. teuthology/contextutil.py +18 -14
  16. teuthology/describe_tests.py +25 -18
  17. teuthology/dispatcher/__init__.py +210 -35
  18. teuthology/dispatcher/supervisor.py +140 -58
  19. teuthology/exceptions.py +43 -0
  20. teuthology/exporter.py +347 -0
  21. teuthology/kill.py +76 -81
  22. teuthology/lock/cli.py +3 -3
  23. teuthology/lock/ops.py +135 -61
  24. teuthology/lock/query.py +61 -44
  25. teuthology/ls.py +1 -1
  26. teuthology/misc.py +61 -75
  27. teuthology/nuke/__init__.py +12 -353
  28. teuthology/openstack/__init__.py +4 -3
  29. teuthology/openstack/openstack-centos-7.0-user-data.txt +1 -1
  30. teuthology/openstack/openstack-centos-7.1-user-data.txt +1 -1
  31. teuthology/openstack/openstack-centos-7.2-user-data.txt +1 -1
  32. teuthology/openstack/openstack-debian-8.0-user-data.txt +1 -1
  33. teuthology/openstack/openstack-opensuse-42.1-user-data.txt +1 -1
  34. teuthology/openstack/openstack-teuthology.cron +0 -1
  35. teuthology/orchestra/cluster.py +49 -7
  36. teuthology/orchestra/connection.py +17 -4
  37. teuthology/orchestra/console.py +111 -50
  38. teuthology/orchestra/daemon/cephadmunit.py +15 -2
  39. teuthology/orchestra/daemon/state.py +8 -1
  40. teuthology/orchestra/daemon/systemd.py +4 -4
  41. teuthology/orchestra/opsys.py +30 -11
  42. teuthology/orchestra/remote.py +405 -338
  43. teuthology/orchestra/run.py +3 -3
  44. teuthology/packaging.py +19 -16
  45. teuthology/provision/__init__.py +30 -10
  46. teuthology/provision/cloud/openstack.py +12 -6
  47. teuthology/provision/cloud/util.py +1 -2
  48. teuthology/provision/downburst.py +4 -3
  49. teuthology/provision/fog.py +68 -20
  50. teuthology/provision/openstack.py +5 -4
  51. teuthology/provision/pelagos.py +1 -1
  52. teuthology/repo_utils.py +43 -13
  53. teuthology/report.py +57 -35
  54. teuthology/results.py +5 -3
  55. teuthology/run.py +13 -14
  56. teuthology/run_tasks.py +27 -43
  57. teuthology/schedule.py +4 -3
  58. teuthology/scrape.py +28 -22
  59. teuthology/suite/__init__.py +74 -45
  60. teuthology/suite/build_matrix.py +34 -24
  61. teuthology/suite/fragment-merge.lua +105 -0
  62. teuthology/suite/matrix.py +31 -2
  63. teuthology/suite/merge.py +175 -0
  64. teuthology/suite/placeholder.py +6 -9
  65. teuthology/suite/run.py +175 -100
  66. teuthology/suite/util.py +64 -218
  67. teuthology/task/__init__.py +1 -1
  68. teuthology/task/ansible.py +101 -32
  69. teuthology/task/buildpackages.py +2 -2
  70. teuthology/task/ceph_ansible.py +13 -6
  71. teuthology/task/cephmetrics.py +2 -1
  72. teuthology/task/clock.py +33 -14
  73. teuthology/task/exec.py +18 -0
  74. teuthology/task/hadoop.py +2 -2
  75. teuthology/task/install/__init__.py +29 -7
  76. teuthology/task/install/bin/adjust-ulimits +16 -0
  77. teuthology/task/install/bin/daemon-helper +114 -0
  78. teuthology/task/install/bin/stdin-killer +263 -0
  79. teuthology/task/install/deb.py +1 -1
  80. teuthology/task/install/rpm.py +17 -5
  81. teuthology/task/install/util.py +3 -3
  82. teuthology/task/internal/__init__.py +41 -10
  83. teuthology/task/internal/edit_sudoers.sh +10 -0
  84. teuthology/task/internal/lock_machines.py +2 -9
  85. teuthology/task/internal/redhat.py +31 -1
  86. teuthology/task/internal/syslog.py +31 -8
  87. teuthology/task/kernel.py +152 -145
  88. teuthology/task/lockfile.py +1 -1
  89. teuthology/task/mpi.py +10 -10
  90. teuthology/task/pcp.py +1 -1
  91. teuthology/task/selinux.py +16 -8
  92. teuthology/task/ssh_keys.py +4 -4
  93. teuthology/timer.py +3 -3
  94. teuthology/util/loggerfile.py +19 -0
  95. teuthology/util/scanner.py +159 -0
  96. teuthology/util/sentry.py +52 -0
  97. teuthology/util/time.py +52 -0
  98. teuthology-1.2.1.data/scripts/adjust-ulimits +16 -0
  99. teuthology-1.2.1.data/scripts/daemon-helper +114 -0
  100. teuthology-1.2.1.data/scripts/stdin-killer +263 -0
  101. teuthology-1.2.1.dist-info/METADATA +88 -0
  102. teuthology-1.2.1.dist-info/RECORD +168 -0
  103. {teuthology-1.1.0.dist-info → teuthology-1.2.1.dist-info}/WHEEL +1 -1
  104. {teuthology-1.1.0.dist-info → teuthology-1.2.1.dist-info}/entry_points.txt +3 -2
  105. scripts/nuke.py +0 -47
  106. scripts/worker.py +0 -37
  107. teuthology/lock/test/__init__.py +0 -0
  108. teuthology/lock/test/test_lock.py +0 -7
  109. teuthology/nuke/actions.py +0 -456
  110. teuthology/openstack/test/__init__.py +0 -0
  111. teuthology/openstack/test/openstack-integration.py +0 -286
  112. teuthology/openstack/test/test_config.py +0 -35
  113. teuthology/openstack/test/test_openstack.py +0 -1695
  114. teuthology/orchestra/test/__init__.py +0 -0
  115. teuthology/orchestra/test/integration/__init__.py +0 -0
  116. teuthology/orchestra/test/integration/test_integration.py +0 -94
  117. teuthology/orchestra/test/test_cluster.py +0 -240
  118. teuthology/orchestra/test/test_connection.py +0 -106
  119. teuthology/orchestra/test/test_console.py +0 -217
  120. teuthology/orchestra/test/test_opsys.py +0 -404
  121. teuthology/orchestra/test/test_remote.py +0 -185
  122. teuthology/orchestra/test/test_run.py +0 -286
  123. teuthology/orchestra/test/test_systemd.py +0 -54
  124. teuthology/orchestra/test/util.py +0 -12
  125. teuthology/task/tests/__init__.py +0 -110
  126. teuthology/task/tests/test_locking.py +0 -25
  127. teuthology/task/tests/test_run.py +0 -40
  128. teuthology/test/__init__.py +0 -0
  129. teuthology/test/fake_archive.py +0 -107
  130. teuthology/test/fake_fs.py +0 -92
  131. teuthology/test/integration/__init__.py +0 -0
  132. teuthology/test/integration/test_suite.py +0 -86
  133. teuthology/test/task/__init__.py +0 -205
  134. teuthology/test/task/test_ansible.py +0 -624
  135. teuthology/test/task/test_ceph_ansible.py +0 -176
  136. teuthology/test/task/test_console_log.py +0 -88
  137. teuthology/test/task/test_install.py +0 -337
  138. teuthology/test/task/test_internal.py +0 -57
  139. teuthology/test/task/test_kernel.py +0 -243
  140. teuthology/test/task/test_pcp.py +0 -379
  141. teuthology/test/task/test_selinux.py +0 -35
  142. teuthology/test/test_config.py +0 -189
  143. teuthology/test/test_contextutil.py +0 -68
  144. teuthology/test/test_describe_tests.py +0 -316
  145. teuthology/test/test_email_sleep_before_teardown.py +0 -81
  146. teuthology/test/test_exit.py +0 -97
  147. teuthology/test/test_get_distro.py +0 -47
  148. teuthology/test/test_get_distro_version.py +0 -47
  149. teuthology/test/test_get_multi_machine_types.py +0 -27
  150. teuthology/test/test_job_status.py +0 -60
  151. teuthology/test/test_ls.py +0 -48
  152. teuthology/test/test_misc.py +0 -391
  153. teuthology/test/test_nuke.py +0 -290
  154. teuthology/test/test_packaging.py +0 -763
  155. teuthology/test/test_parallel.py +0 -28
  156. teuthology/test/test_repo_utils.py +0 -225
  157. teuthology/test/test_report.py +0 -77
  158. teuthology/test/test_results.py +0 -155
  159. teuthology/test/test_run.py +0 -239
  160. teuthology/test/test_safepath.py +0 -55
  161. teuthology/test/test_schedule.py +0 -45
  162. teuthology/test/test_scrape.py +0 -167
  163. teuthology/test/test_timer.py +0 -80
  164. teuthology/test/test_vps_os_vers_parameter_checking.py +0 -84
  165. teuthology/test/test_worker.py +0 -303
  166. teuthology/worker.py +0 -354
  167. teuthology-1.1.0.dist-info/METADATA +0 -76
  168. teuthology-1.1.0.dist-info/RECORD +0 -213
  169. {teuthology-1.1.0.dist-info → teuthology-1.2.1.dist-info}/LICENSE +0 -0
  170. {teuthology-1.1.0.dist-info → teuthology-1.2.1.dist-info}/top_level.txt +0 -0
@@ -123,7 +123,7 @@ class OpenStackInstance(object):
123
123
  with safe_while(sleep=2, tries=30,
124
124
  action="get ip " + self['id']) as proceed:
125
125
  while proceed():
126
- found = re.match('.*\d+', self['addresses'])
126
+ found = re.match(r'.*\d+', self['addresses'])
127
127
  if found:
128
128
  return self['addresses']
129
129
  self.set_info()
@@ -165,7 +165,7 @@ class OpenStackInstance(object):
165
165
  self.private_ip = self.get_ip_neutron()
166
166
  except Exception as e:
167
167
  log.debug("ignoring get_ip_neutron exception " + str(e))
168
- self.private_ip = re.findall(network + '=([\d.]+)',
168
+ self.private_ip = re.findall(network + r'=([\d.]+)',
169
169
  self.get_addresses())[0]
170
170
  return self.private_ip
171
171
 
@@ -1026,7 +1026,8 @@ ssh access : ssh {identity}{username}@{ip} # logs in /usr/share/nginx/
1026
1026
  cluster, based on a template where the OpenStack credentials
1027
1027
  and a few other values are substituted.
1028
1028
  """
1029
- path = tempfile.mktemp()
1029
+ fd, path = tempfile.mkstemp()
1030
+ os.close(fd)
1030
1031
 
1031
1032
  with open(os.path.dirname(__file__) + '/bootstrap-teuthology.sh', 'rb') as f:
1032
1033
  b64_bootstrap = base64.b64encode(f.read())
@@ -6,7 +6,7 @@ bootcmd:
6
6
  - ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
7
7
  - hostname $(cat /etc/hostname)
8
8
  - ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
9
- # See https://github.com/ceph/ceph-cm-ansible/blob/master/roles/cobbler/templates/snippets/cephlab_user
9
+ # See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
10
10
  - ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
11
11
  preserve_hostname: true
12
12
  system_info:
@@ -6,7 +6,7 @@ bootcmd:
6
6
  - ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
7
7
  - hostname $(cat /etc/hostname)
8
8
  - ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
9
- # See https://github.com/ceph/ceph-cm-ansible/blob/master/roles/cobbler/templates/snippets/cephlab_user
9
+ # See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
10
10
  - ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
11
11
  preserve_hostname: true
12
12
  system_info:
@@ -6,7 +6,7 @@ bootcmd:
6
6
  - ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
7
7
  - hostname $(cat /etc/hostname)
8
8
  - ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
9
- # See https://github.com/ceph/ceph-cm-ansible/blob/master/roles/cobbler/templates/snippets/cephlab_user
9
+ # See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
10
10
  - ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
11
11
  preserve_hostname: true
12
12
  system_info:
@@ -18,7 +18,7 @@ packages:
18
18
  - git
19
19
  - ntp
20
20
  runcmd:
21
- # See https://github.com/ceph/ceph-cm-ansible/blob/master/roles/cobbler/templates/snippets/cephlab_user
21
+ # See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
22
22
  - ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
23
23
  - echo '{username} ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
24
24
  final_message: "{up}, after $UPTIME seconds"
@@ -6,7 +6,7 @@ bootcmd:
6
6
  - ( curl --silent http://169.254.169.254/2009-04-04/meta-data/hostname | sed -e 's/[\.-].*//' ; eval printf "%03d%03d%03d%03d.{lab_domain}" $(curl --silent http://169.254.169.254/2009-04-04/meta-data/local-ipv4 | tr . ' ' ) ) | tee /etc/hostname
7
7
  - hostname $(cat /etc/hostname)
8
8
  - ( echo ; echo "MaxSessions 1000" ) >> /etc/ssh/sshd_config
9
- # See https://github.com/ceph/ceph-cm-ansible/blob/master/roles/cobbler/templates/snippets/cephlab_user
9
+ # See https://github.com/ceph/ceph-cm-ansible/blob/main/roles/cobbler/templates/snippets/cephlab_user
10
10
  - ( echo 'Defaults !requiretty' ; echo 'Defaults visiblepw' ) | tee /etc/sudoers.d/cephlab_sudo ; chmod 0440 /etc/sudoers.d/cephlab_sudo
11
11
  preserve_hostname: true
12
12
  users:
@@ -1,2 +1 @@
1
1
  SHELL=/bin/bash
2
- */30 * * * * ( date ; source $HOME/openrc.sh ; time timeout 900 $HOME/teuthology/virtualenv/bin/teuthology-nuke --stale-openstack ) >> $HOME/cron.log 2>&1
@@ -2,8 +2,7 @@
2
2
  Cluster definition
3
3
  part of context, Cluster is used to save connection information.
4
4
  """
5
- import teuthology.misc
6
-
5
+ from teuthology.orchestra import run
7
6
 
8
7
  class Cluster(object):
9
8
  """
@@ -50,18 +49,47 @@ class Cluster(object):
50
49
  )
51
50
  self.remotes[remote] = list(roles)
52
51
 
53
- def run(self, **kwargs):
52
+ def run(self, wait=True, parallel=False, **kwargs):
54
53
  """
55
54
  Run a command on all the nodes in this cluster.
56
55
 
57
56
  Goes through nodes in alphabetical order.
58
57
 
59
- If you don't specify wait=False, this will be sequentially.
58
+ The default usage is when parallel=False and wait=True,
59
+ which is a sequential run for each node one by one.
60
+
61
+ If you specify parallel=True, it will be in parallel.
62
+
63
+ If you specify wait=False, it returns immediately.
64
+ Since it is not possible to run sequentially and
65
+ do not wait each command run finished, the parallel value
66
+ is ignored and treated as True.
60
67
 
61
68
  Returns a list of `RemoteProcess`.
62
69
  """
70
+ # -+-------+----------+----------+------------+---------------
71
+ # | wait | parallel | run.wait | remote.run | comments
72
+ # -+-------+----------+----------+------------+---------------
73
+ # 1|*True |*False | no | wait=True | sequentially
74
+ # 2| True | True | yes | wait=False | parallel
75
+ # 3| False | True | no | wait=False | parallel
76
+ # 4| False | False | no | wait=False | same as above
77
+
78
+ # We always run in parallel if wait=False,
79
+ # that is why (4) is equivalent to (3).
80
+
81
+ # We wait from remote.run only if run sequentially.
82
+ _wait = (parallel == False and wait == True)
83
+
63
84
  remotes = sorted(self.remotes.keys(), key=lambda rem: rem.name)
64
- return [remote.run(**kwargs) for remote in remotes]
85
+ procs = [remote.run(**kwargs, wait=_wait) for remote in remotes]
86
+
87
+ # We do run.wait only if parallel=True, because if parallel=False,
88
+ # we have run sequentially and all processes are complete.
89
+
90
+ if parallel and wait:
91
+ run.wait(procs)
92
+ return procs
65
93
 
66
94
  def sh(self, script, **kwargs):
67
95
  """
@@ -86,11 +114,12 @@ class Cluster(object):
86
114
  remotes = sorted(self.remotes.keys(), key=lambda rem: rem.name)
87
115
  for remote in remotes:
88
116
  if sudo:
89
- teuthology.misc.sudo_write_file(remote, file_name, content, perms=perms, owner=owner)
117
+ remote.write_file(file_name, content,
118
+ sudo=True, mode=perms, owner=owner)
90
119
  else:
91
120
  if perms is not None or owner is not None:
92
121
  raise ValueError("To specify perms or owner, sudo must be True")
93
- teuthology.misc.write_file(remote, file_name, content)
122
+ remote.write_file(file_name, content)
94
123
 
95
124
  def only(self, *roles):
96
125
  """
@@ -144,3 +173,16 @@ class Cluster(object):
144
173
  if remote not in matches.remotes:
145
174
  c.add(remote, has_roles)
146
175
  return c
176
+
177
+ def filter(self, func):
178
+ """
179
+ Return a cluster whose remotes are filtered by `func`.
180
+
181
+ Example::
182
+ cluster = ctx.cluster.filter(lambda r: r.is_online)
183
+ """
184
+ result = self.__class__()
185
+ for rem, roles in self.remotes.items():
186
+ if func(rem):
187
+ result.add(rem, roles)
188
+ return result
@@ -79,13 +79,19 @@ def connect(user_at_host, host_key=None, keep_alive=False, timeout=60,
79
79
  timeout=timeout
80
80
  )
81
81
 
82
- ssh_config_path = os.path.expanduser("~/.ssh/config")
82
+ key_filename = key_filename or config.ssh_key
83
+ ssh_config_path = config.ssh_config_path or "~/.ssh/config"
84
+ ssh_config_path = os.path.expanduser(ssh_config_path)
83
85
  if os.path.exists(ssh_config_path):
84
86
  ssh_config = paramiko.SSHConfig()
85
87
  ssh_config.parse(open(ssh_config_path))
86
88
  opts = ssh_config.lookup(host)
87
89
  if not key_filename and 'identityfile' in opts:
88
90
  key_filename = opts['identityfile']
91
+ if 'hostname' in opts:
92
+ connect_args['hostname'] = opts['hostname']
93
+ if 'user' in opts:
94
+ connect_args['username'] = opts['user']
89
95
 
90
96
  if key_filename:
91
97
  if not isinstance(key_filename, list):
@@ -98,13 +104,20 @@ def connect(user_at_host, host_key=None, keep_alive=False, timeout=60,
98
104
  if not retry:
99
105
  ssh.connect(**connect_args)
100
106
  else:
101
- # Retries are implemented using safe_while
102
- with safe_while(sleep=1, action='connect to ' + host) as proceed:
107
+ with safe_while(sleep=1, increment=3, action='connect to ' + host) as proceed:
103
108
  while proceed():
109
+ auth_err_msg = f"Error authenticating with {host}"
104
110
  try:
105
111
  ssh.connect(**connect_args)
106
112
  break
113
+ except EOFError:
114
+ log.error(f"{auth_err_msg}: EOFError")
107
115
  except paramiko.AuthenticationException as e:
108
- log.error(f"Error authenticating with {host}: {str(e)}")
116
+ log.error(f"{auth_err_msg}: {repr(e)}")
117
+ except paramiko.SSHException as e:
118
+ auth_err_msg = f"{auth_err_msg}: {repr(e)}"
119
+ if not key_filename:
120
+ auth_err_msg = f"{auth_err_msg} (No SSH private key found!)"
121
+ log.exception(auth_err_msg)
109
122
  ssh.get_transport().set_keepalive(keep_alive)
110
123
  return ssh
@@ -1,3 +1,4 @@
1
+ import io
1
2
  import logging
2
3
  import os
3
4
  import pexpect
@@ -6,6 +7,8 @@ import subprocess
6
7
  import sys
7
8
  import time
8
9
 
10
+ from typing import Union, Literal, Optional
11
+
9
12
  import teuthology.lock.query
10
13
  import teuthology.lock.util
11
14
  from teuthology.config import config
@@ -19,6 +22,7 @@ except ImportError:
19
22
  libvirt = None
20
23
 
21
24
  log = logging.getLogger(__name__)
25
+ PowerOnOffState = Union[Literal["on"], Literal["off"]]
22
26
 
23
27
 
24
28
  class RemoteConsole():
@@ -35,11 +39,11 @@ class PhysicalConsole(RemoteConsole):
35
39
  Physical Console (set from getRemoteConsole)
36
40
  """
37
41
  def __init__(self, name, ipmiuser=None, ipmipass=None, ipmidomain=None,
38
- logfile=None, timeout=40):
42
+ timeout=120):
39
43
  self.name = name
40
44
  self.shortname = self.getShortName(name)
45
+ self.log = log.getChild(self.shortname)
41
46
  self.timeout = timeout
42
- self.logfile = None
43
47
  self.ipmiuser = ipmiuser or config.ipmi_user
44
48
  self.ipmipass = ipmipass or config.ipmi_password
45
49
  self.ipmidomain = ipmidomain or config.ipmi_domain
@@ -71,11 +75,14 @@ class PhysicalConsole(RemoteConsole):
71
75
  """
72
76
  Run a command using pexpect.spawn(). Return the child object.
73
77
  """
74
- log.debug('pexpect command: %s', cmd)
75
- return pexpect.spawn(
78
+ self.log.debug('pexpect command: %s', cmd)
79
+ p = pexpect.spawn(
76
80
  cmd,
77
- logfile=self.logfile,
81
+ encoding='utf-8',
82
+ codec_errors="backslashreplace",
78
83
  )
84
+ p.logfile_read = io.StringIO()
85
+ return p
79
86
 
80
87
  def _get_console(self, readonly=True):
81
88
  def start():
@@ -84,7 +91,7 @@ class PhysicalConsole(RemoteConsole):
84
91
 
85
92
  child = start()
86
93
  if self.has_conserver and not child.isalive():
87
- log.error("conserver failed to get the console; will try ipmitool")
94
+ self.log.error("conserver failed to get the console; will try ipmitool")
88
95
  self.has_conserver = False
89
96
  child = start()
90
97
  return child
@@ -114,7 +121,7 @@ class PhysicalConsole(RemoteConsole):
114
121
 
115
122
  def _check_ipmi_credentials(self):
116
123
  if not self.has_ipmi_credentials:
117
- log.error(
124
+ self.log.error(
118
125
  "Must set ipmi_user, ipmi_password, and ipmi_domain in "
119
126
  ".teuthology.yaml"
120
127
  )
@@ -129,41 +136,108 @@ class PhysicalConsole(RemoteConsole):
129
136
  timeout=t)
130
137
  if r != 0:
131
138
  child.kill(15)
139
+ self.log.debug('console disconnect output: %s', child.logfile_read.getvalue().strip())
132
140
  else:
133
141
  child.send('~.')
134
142
  r = child.expect(
135
143
  ['terminated ipmitool', pexpect.TIMEOUT, pexpect.EOF],
136
144
  timeout=t)
145
+ self.log.debug('ipmitool disconnect output: %s', child.logfile_read.getvalue().strip())
137
146
  if r != 0:
138
147
  self._pexpect_spawn_ipmi('sol deactivate')
148
+ self.log.debug('sol deactivate output: %s', child.logfile_read.getvalue().strip())
139
149
 
140
150
  def _wait_for_login(self, timeout=None, attempts=2):
141
151
  """
142
152
  Wait for login. Retry if timeouts occur on commands.
143
153
  """
144
154
  t = timeout or self.timeout
145
- log.debug('Waiting for login prompt on {s}'.format(s=self.shortname))
155
+ self.log.debug('Waiting for login prompt')
146
156
  # wait for login prompt to indicate boot completed
147
157
  for i in range(0, attempts):
148
158
  start = time.time()
149
159
  while time.time() - start < t:
150
160
  child = self._get_console(readonly=False)
151
161
  child.send('\n')
152
- log.debug('expect: {s} login'.format(s=self.shortname))
153
162
  r = child.expect(
154
163
  ['{s} login: '.format(s=self.shortname),
155
164
  pexpect.TIMEOUT,
156
165
  pexpect.EOF],
157
166
  timeout=(t - (time.time() - start)))
158
- log.debug('expect before: {b}'.format(b=child.before))
159
- log.debug('expect after: {a}'.format(a=child.after))
167
+ self.log.debug('expect before: {b}'.format(b=child.before))
168
+ self.log.debug('expect after: {a}'.format(a=child.after))
160
169
 
161
170
  self._exit_session(child)
162
171
  if r == 0:
163
172
  return
164
173
  raise ConsoleError("Did not get a login prompt from %s!" % self.name)
165
174
 
166
- def check_power(self, state, timeout=None):
175
+ def check_power(self, state: Literal["on","off"]):
176
+ c = self._pexpect_spawn_ipmi('power status')
177
+ r = c.expect(['Chassis Power is {s}'.format(
178
+ s=state), pexpect.EOF, pexpect.TIMEOUT], timeout=1)
179
+ self.log.debug('check power output: %s', c.logfile_read.getvalue().strip())
180
+ return r == 0
181
+
182
+ def set_power(self, state: PowerOnOffState, timeout: Optional[int]):
183
+ self.log.info(f"Power {state}")
184
+ timeout = timeout or self.timeout
185
+ sleep_time = 4
186
+ reissue_after_failures = 5
187
+ failures = 0
188
+ issued = False
189
+ succeeded = False
190
+ with safe_while(
191
+ sleep=sleep_time,
192
+ tries=int(timeout / sleep_time),
193
+ _raise=False,
194
+ action='wait for power on') as proceed:
195
+ while proceed():
196
+ if not issued:
197
+ child = self._pexpect_spawn_ipmi(f"power {state}")
198
+ rc = child.expect(
199
+ [
200
+ "Up/On" if state.lower() == "on" else "Down/Off",
201
+ pexpect.EOF
202
+ ],
203
+ timeout=self.timeout
204
+ )
205
+ self.log.debug(
206
+ f"power {state} output: {child.logfile_read.getvalue().strip()}"
207
+ )
208
+ if rc == 0:
209
+ issued = True
210
+ continue
211
+
212
+ if not succeeded:
213
+ child = self._pexpect_spawn_ipmi('power status')
214
+ rc = child.expect(
215
+ [
216
+ f"Chassis Power is {state}",
217
+ pexpect.EOF,
218
+ pexpect.TIMEOUT
219
+ ],
220
+ timeout=1
221
+ )
222
+ self.log.debug(
223
+ f"check power output: {child.logfile_read.getvalue().strip()}"
224
+ )
225
+ if rc == 0:
226
+ succeeded = True
227
+ break
228
+ failures += 1
229
+ if failures == reissue_after_failures:
230
+ issued = False
231
+
232
+ if issued and succeeded:
233
+ self.log.info(f"Power {state} completed")
234
+ return True
235
+ raise RuntimeError(
236
+ f"Failed to power {state} {self.shortname} in {self.timeout}s"
237
+ )
238
+ return False
239
+
240
+ def check_power_retries(self, state, timeout=None):
167
241
  """
168
242
  Check power. Retry if EOF encountered on power check read.
169
243
  """
@@ -178,6 +252,7 @@ class PhysicalConsole(RemoteConsole):
178
252
  c = self._pexpect_spawn_ipmi('power status')
179
253
  r = c.expect(['Chassis Power is {s}'.format(
180
254
  s=state), pexpect.EOF, pexpect.TIMEOUT], timeout=1)
255
+ self.log.debug('check power output: %s', c.logfile_read.getvalue().strip())
181
256
  if r == 0:
182
257
  return True
183
258
  return False
@@ -190,9 +265,8 @@ class PhysicalConsole(RemoteConsole):
190
265
  # check for login prompt at console
191
266
  self._wait_for_login(timeout)
192
267
  return True
193
- except Exception as e:
194
- log.info('Failed to get ipmi console status for {s}: {e}'.format(
195
- s=self.shortname, e=e))
268
+ except Exception:
269
+ self.log.exception('Failed to get ipmi console status')
196
270
  return False
197
271
 
198
272
  def power_cycle(self, timeout=300):
@@ -201,62 +275,45 @@ class PhysicalConsole(RemoteConsole):
201
275
 
202
276
  :param timeout: How long to wait for login
203
277
  """
204
- log.info('Power cycling {s}'.format(s=self.shortname))
278
+ self.log.info('Power cycling')
205
279
  child = self._pexpect_spawn_ipmi('power cycle')
206
280
  child.expect('Chassis Power Control: Cycle', timeout=self.timeout)
281
+ self.log.debug('power cycle output: %s', child.logfile_read.getvalue().strip())
207
282
  self._wait_for_login(timeout=timeout)
208
- log.info('Power cycle for {s} completed'.format(s=self.shortname))
283
+ self.log.info('Power cycle completed')
209
284
 
210
285
  def hard_reset(self, wait_for_login=True):
211
286
  """
212
287
  Perform physical hard reset. Retry if EOF returned from read
213
288
  and wait for login when complete.
214
289
  """
215
- log.info('Performing hard reset of {s}'.format(s=self.shortname))
290
+ self.log.info('Performing hard reset')
216
291
  start = time.time()
217
292
  while time.time() - start < self.timeout:
218
293
  child = self._pexpect_spawn_ipmi('power reset')
219
294
  r = child.expect(['Chassis Power Control: Reset', pexpect.EOF],
220
295
  timeout=self.timeout)
296
+ self.log.debug('power reset output: %s', child.logfile_read.getvalue().strip())
221
297
  if r == 0:
222
298
  break
223
299
  if wait_for_login:
224
300
  self._wait_for_login()
225
- log.info('Hard reset for {s} completed'.format(s=self.shortname))
301
+ self.log.info('Hard reset completed')
226
302
 
227
303
  def power_on(self):
228
304
  """
229
305
  Physical power on. Loop checking cmd return.
230
306
  """
231
- log.info('Power on {s}'.format(s=self.shortname))
232
- start = time.time()
233
- while time.time() - start < self.timeout:
234
- child = self._pexpect_spawn_ipmi('power on')
235
- r = child.expect(['Chassis Power Control: Up/On', pexpect.EOF],
236
- timeout=self.timeout)
237
- if r == 0:
238
- break
239
- if self.check_power('on'):
240
- log.info('Power on for {s} completed'.format(s=self.shortname))
241
- else:
242
- log.error('Failed to power on {s}'.format(s=self.shortname))
307
+ return self.set_power("on", timeout=None)
243
308
 
244
309
  def power_off(self):
245
310
  """
246
311
  Physical power off. Loop checking cmd return.
247
312
  """
248
- log.info('Power off {s}'.format(s=self.shortname))
249
- start = time.time()
250
- while time.time() - start < self.timeout:
251
- child = self._pexpect_spawn_ipmi('power off')
252
- r = child.expect(['Chassis Power Control: Down/Off', pexpect.EOF],
253
- timeout=self.timeout)
254
- if r == 0:
255
- break
256
- if self.check_power('off', 60):
257
- log.info('Power off for {s} completed'.format(s=self.shortname))
258
- else:
259
- log.error('Failed to power off {s}'.format(s=self.shortname))
313
+ try:
314
+ return self.set_power("off", timeout=None)
315
+ except Exception:
316
+ pass
260
317
 
261
318
  def power_off_for_interval(self, interval=30):
262
319
  """
@@ -264,17 +321,21 @@ class PhysicalConsole(RemoteConsole):
264
321
 
265
322
  :param interval: Length of power-off period.
266
323
  """
267
- log.info('Power off {s} for {i} seconds'.format(
268
- s=self.shortname, i=interval))
324
+ self.log.info('Power off for {i} seconds'.format(i=interval))
269
325
  child = self._pexpect_spawn_ipmi('power off')
270
326
  child.expect('Chassis Power Control: Down/Off', timeout=self.timeout)
271
327
 
328
+ self.log.debug('power off output: %s', child.logfile_read.getvalue().strip())
329
+ child.logfile_read.seek(0)
330
+ child.logfile_read.truncate()
331
+
272
332
  time.sleep(interval)
273
333
 
274
334
  child = self._pexpect_spawn_ipmi('power on')
275
335
  child.expect('Chassis Power Control: Up/On', timeout=self.timeout)
336
+ self.log.debug('power on output: %s', child.logfile_read.getvalue().strip())
276
337
  self._wait_for_login()
277
- log.info('Power off for {i} seconds completed'.format(i=interval))
338
+ self.log.info('Power off for {i} seconds completed'.format(i=interval))
278
339
 
279
340
  def spawn_sol_log(self, dest_path):
280
341
  """
@@ -307,7 +368,7 @@ class PhysicalConsole(RemoteConsole):
307
368
 
308
369
  proc = start()
309
370
  if self.has_conserver and proc.poll() is not None:
310
- log.error("conserver failed to get the console; will try ipmitool")
371
+ self.log.error("conserver failed to get the console; will try ipmitool")
311
372
  self.has_conserver = False
312
373
  proc = start()
313
374
  return proc
@@ -322,6 +383,7 @@ class VirtualConsole(RemoteConsole):
322
383
  raise RuntimeError("libvirt not found")
323
384
 
324
385
  self.shortname = self.getShortName(name)
386
+ self.log = log.getChild(self.shortname)
325
387
  status_info = teuthology.lock.query.get_status(self.shortname)
326
388
  try:
327
389
  if teuthology.lock.query.is_vm(status=status_info):
@@ -379,9 +441,8 @@ class VirtualConsole(RemoteConsole):
379
441
  """
380
442
  Simiulate power off for an interval.
381
443
  """
382
- log.info('Power off {s} for {i} seconds'.format(
383
- s=self.shortname, i=interval))
444
+ self.log.info('Power off for {i} seconds'.format(i=interval))
384
445
  self.vm_domain.info().destroy()
385
446
  time.sleep(interval)
386
447
  self.vm_domain.info().create()
387
- log.info('Power off for {i} seconds completed'.format(i=interval))
448
+ self.log.info('Power off for {i} seconds completed'.format(i=interval))
@@ -112,6 +112,18 @@ class CephadmUnit(DaemonState):
112
112
  """
113
113
  return self.is_started
114
114
 
115
+ def finished(self):
116
+ """
117
+ Is the daemon finished?
118
+ Return False if active.
119
+ """
120
+ proc = self.remote.run(
121
+ args=self.status_cmd,
122
+ check_status=False,
123
+ quiet=True,
124
+ )
125
+ return proc.returncode != 0
126
+
115
127
  def signal(self, sig, silent=False):
116
128
  """
117
129
  Send a signal to associated remote command
@@ -132,11 +144,12 @@ class CephadmUnit(DaemonState):
132
144
  Start this daemon instance.
133
145
  """
134
146
  if self.running():
135
- self.log.warn('Restarting a running daemon')
147
+ self.log.warning('Restarting a running daemon')
136
148
  self.restart()
137
149
  return
138
150
  self._start_logger()
139
- self.remote.run(self.start_cmd)
151
+ self.remote.run(args=self.start_cmd)
152
+ self.is_started = True
140
153
 
141
154
  def stop(self, timeout=300):
142
155
  """
@@ -97,6 +97,13 @@ class DaemonState(object):
97
97
  """
98
98
  return self.proc is not None
99
99
 
100
+ def finished(self):
101
+ """
102
+ Is the daemon finished?
103
+ Return False if active.
104
+ """
105
+ return self.proc.finished if self.proc is not None else False
106
+
100
107
  def signal(self, sig, silent=False):
101
108
  """
102
109
  Send a signal to associated remote command.
@@ -118,7 +125,7 @@ class DaemonState(object):
118
125
  Start this daemon instance.
119
126
  """
120
127
  if self.running():
121
- self.log.warn('Restarting a running daemon')
128
+ self.log.warning('Restarting a running daemon')
122
129
  self.restart()
123
130
 
124
131
  def stop(self, timeout=300):
@@ -86,7 +86,7 @@ class SystemDState(DaemonState):
86
86
  self.status_cmd + " | grep 'Main.*code=exited'",
87
87
  )
88
88
  line = out.strip().split('\n')[-1]
89
- exit_code = int(re.match('.*status=(\d+).*', line).groups()[0])
89
+ exit_code = int(re.match(r'.*status=(\d+).*', line).groups()[0])
90
90
  if exit_code:
91
91
  self.remote.run(
92
92
  args=self.output_cmd
@@ -156,7 +156,7 @@ class SystemDState(DaemonState):
156
156
 
157
157
  :param extra_args: Extra keyword arguments to be added.
158
158
  """
159
- self.log.warn(
159
+ self.log.warning(
160
160
  "restart_with_args() is not supported with systemd; performing"
161
161
  "normal restart")
162
162
  self.restart()
@@ -180,7 +180,7 @@ class SystemDState(DaemonState):
180
180
 
181
181
  :param sig: signal to send
182
182
  """
183
- self.log.warn("systemd may restart daemons automatically")
183
+ self.log.warning("systemd may restart daemons automatically")
184
184
  pid = self.pid
185
185
  self.log.info("Sending signal %s to process %s", sig, pid)
186
186
  sig = '-' + str(sig)
@@ -191,7 +191,7 @@ class SystemDState(DaemonState):
191
191
  Start this daemon instance.
192
192
  """
193
193
  if self.running():
194
- self.log.warn('Restarting a running daemon')
194
+ self.log.warning('Restarting a running daemon')
195
195
  self.restart()
196
196
  return
197
197
  self.remote.run(args=[run.Raw(self.start_cmd)])