teuthology 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. scripts/describe.py +1 -0
  2. scripts/dispatcher.py +55 -26
  3. scripts/exporter.py +18 -0
  4. scripts/lock.py +1 -1
  5. scripts/node_cleanup.py +58 -0
  6. scripts/openstack.py +9 -9
  7. scripts/results.py +12 -11
  8. scripts/schedule.py +4 -0
  9. scripts/suite.py +57 -16
  10. scripts/supervisor.py +44 -0
  11. scripts/update_inventory.py +10 -4
  12. teuthology/__init__.py +24 -26
  13. teuthology/beanstalk.py +4 -3
  14. teuthology/config.py +16 -6
  15. teuthology/contextutil.py +18 -14
  16. teuthology/describe_tests.py +25 -18
  17. teuthology/dispatcher/__init__.py +210 -35
  18. teuthology/dispatcher/supervisor.py +140 -58
  19. teuthology/exceptions.py +43 -0
  20. teuthology/exporter.py +347 -0
  21. teuthology/kill.py +76 -81
  22. teuthology/lock/cli.py +3 -3
  23. teuthology/lock/ops.py +135 -61
  24. teuthology/lock/query.py +61 -44
  25. teuthology/ls.py +1 -1
  26. teuthology/misc.py +61 -75
  27. teuthology/nuke/__init__.py +12 -353
  28. teuthology/openstack/__init__.py +4 -3
  29. teuthology/openstack/openstack-centos-7.0-user-data.txt +1 -1
  30. teuthology/openstack/openstack-centos-7.1-user-data.txt +1 -1
  31. teuthology/openstack/openstack-centos-7.2-user-data.txt +1 -1
  32. teuthology/openstack/openstack-debian-8.0-user-data.txt +1 -1
  33. teuthology/openstack/openstack-opensuse-42.1-user-data.txt +1 -1
  34. teuthology/openstack/openstack-teuthology.cron +0 -1
  35. teuthology/orchestra/cluster.py +49 -7
  36. teuthology/orchestra/connection.py +16 -5
  37. teuthology/orchestra/console.py +111 -50
  38. teuthology/orchestra/daemon/cephadmunit.py +17 -4
  39. teuthology/orchestra/daemon/state.py +8 -1
  40. teuthology/orchestra/daemon/systemd.py +4 -4
  41. teuthology/orchestra/opsys.py +30 -11
  42. teuthology/orchestra/remote.py +405 -338
  43. teuthology/orchestra/run.py +3 -3
  44. teuthology/packaging.py +19 -16
  45. teuthology/provision/__init__.py +30 -10
  46. teuthology/provision/cloud/openstack.py +12 -6
  47. teuthology/provision/cloud/util.py +1 -2
  48. teuthology/provision/downburst.py +4 -3
  49. teuthology/provision/fog.py +68 -20
  50. teuthology/provision/openstack.py +5 -4
  51. teuthology/provision/pelagos.py +1 -1
  52. teuthology/repo_utils.py +43 -13
  53. teuthology/report.py +57 -35
  54. teuthology/results.py +5 -3
  55. teuthology/run.py +13 -14
  56. teuthology/run_tasks.py +27 -43
  57. teuthology/schedule.py +4 -3
  58. teuthology/scrape.py +28 -22
  59. teuthology/suite/__init__.py +74 -45
  60. teuthology/suite/build_matrix.py +34 -24
  61. teuthology/suite/fragment-merge.lua +105 -0
  62. teuthology/suite/matrix.py +31 -2
  63. teuthology/suite/merge.py +175 -0
  64. teuthology/suite/placeholder.py +6 -9
  65. teuthology/suite/run.py +175 -100
  66. teuthology/suite/util.py +64 -218
  67. teuthology/task/__init__.py +1 -1
  68. teuthology/task/ansible.py +101 -32
  69. teuthology/task/buildpackages.py +2 -2
  70. teuthology/task/ceph_ansible.py +13 -6
  71. teuthology/task/cephmetrics.py +2 -1
  72. teuthology/task/clock.py +33 -14
  73. teuthology/task/exec.py +18 -0
  74. teuthology/task/hadoop.py +2 -2
  75. teuthology/task/install/__init__.py +29 -7
  76. teuthology/task/install/bin/adjust-ulimits +16 -0
  77. teuthology/task/install/bin/daemon-helper +114 -0
  78. teuthology/task/install/bin/stdin-killer +263 -0
  79. teuthology/task/install/deb.py +1 -1
  80. teuthology/task/install/rpm.py +17 -5
  81. teuthology/task/install/util.py +3 -3
  82. teuthology/task/internal/__init__.py +41 -10
  83. teuthology/task/internal/edit_sudoers.sh +10 -0
  84. teuthology/task/internal/lock_machines.py +2 -9
  85. teuthology/task/internal/redhat.py +31 -1
  86. teuthology/task/internal/syslog.py +31 -8
  87. teuthology/task/kernel.py +152 -145
  88. teuthology/task/lockfile.py +1 -1
  89. teuthology/task/mpi.py +10 -10
  90. teuthology/task/pcp.py +1 -1
  91. teuthology/task/selinux.py +16 -8
  92. teuthology/task/ssh_keys.py +4 -4
  93. teuthology/task/tests/__init__.py +137 -77
  94. teuthology/task/tests/test_fetch_coredumps.py +116 -0
  95. teuthology/task/tests/test_run.py +4 -4
  96. teuthology/timer.py +3 -3
  97. teuthology/util/loggerfile.py +19 -0
  98. teuthology/util/scanner.py +159 -0
  99. teuthology/util/sentry.py +52 -0
  100. teuthology/util/time.py +52 -0
  101. teuthology-1.2.0.data/scripts/adjust-ulimits +16 -0
  102. teuthology-1.2.0.data/scripts/daemon-helper +114 -0
  103. teuthology-1.2.0.data/scripts/stdin-killer +263 -0
  104. teuthology-1.2.0.dist-info/METADATA +89 -0
  105. teuthology-1.2.0.dist-info/RECORD +174 -0
  106. {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/WHEEL +1 -1
  107. {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/entry_points.txt +3 -2
  108. scripts/nuke.py +0 -47
  109. scripts/worker.py +0 -37
  110. teuthology/nuke/actions.py +0 -456
  111. teuthology/openstack/test/__init__.py +0 -0
  112. teuthology/openstack/test/openstack-integration.py +0 -286
  113. teuthology/openstack/test/test_config.py +0 -35
  114. teuthology/openstack/test/test_openstack.py +0 -1695
  115. teuthology/orchestra/test/__init__.py +0 -0
  116. teuthology/orchestra/test/integration/__init__.py +0 -0
  117. teuthology/orchestra/test/integration/test_integration.py +0 -94
  118. teuthology/orchestra/test/test_cluster.py +0 -240
  119. teuthology/orchestra/test/test_connection.py +0 -106
  120. teuthology/orchestra/test/test_console.py +0 -217
  121. teuthology/orchestra/test/test_opsys.py +0 -404
  122. teuthology/orchestra/test/test_remote.py +0 -185
  123. teuthology/orchestra/test/test_run.py +0 -286
  124. teuthology/orchestra/test/test_systemd.py +0 -54
  125. teuthology/orchestra/test/util.py +0 -12
  126. teuthology/test/__init__.py +0 -0
  127. teuthology/test/fake_archive.py +0 -107
  128. teuthology/test/fake_fs.py +0 -92
  129. teuthology/test/integration/__init__.py +0 -0
  130. teuthology/test/integration/test_suite.py +0 -86
  131. teuthology/test/task/__init__.py +0 -205
  132. teuthology/test/task/test_ansible.py +0 -624
  133. teuthology/test/task/test_ceph_ansible.py +0 -176
  134. teuthology/test/task/test_console_log.py +0 -88
  135. teuthology/test/task/test_install.py +0 -337
  136. teuthology/test/task/test_internal.py +0 -57
  137. teuthology/test/task/test_kernel.py +0 -243
  138. teuthology/test/task/test_pcp.py +0 -379
  139. teuthology/test/task/test_selinux.py +0 -35
  140. teuthology/test/test_config.py +0 -189
  141. teuthology/test/test_contextutil.py +0 -68
  142. teuthology/test/test_describe_tests.py +0 -316
  143. teuthology/test/test_email_sleep_before_teardown.py +0 -81
  144. teuthology/test/test_exit.py +0 -97
  145. teuthology/test/test_get_distro.py +0 -47
  146. teuthology/test/test_get_distro_version.py +0 -47
  147. teuthology/test/test_get_multi_machine_types.py +0 -27
  148. teuthology/test/test_job_status.py +0 -60
  149. teuthology/test/test_ls.py +0 -48
  150. teuthology/test/test_misc.py +0 -391
  151. teuthology/test/test_nuke.py +0 -290
  152. teuthology/test/test_packaging.py +0 -763
  153. teuthology/test/test_parallel.py +0 -28
  154. teuthology/test/test_repo_utils.py +0 -225
  155. teuthology/test/test_report.py +0 -77
  156. teuthology/test/test_results.py +0 -155
  157. teuthology/test/test_run.py +0 -239
  158. teuthology/test/test_safepath.py +0 -55
  159. teuthology/test/test_schedule.py +0 -45
  160. teuthology/test/test_scrape.py +0 -167
  161. teuthology/test/test_timer.py +0 -80
  162. teuthology/test/test_vps_os_vers_parameter_checking.py +0 -84
  163. teuthology/test/test_worker.py +0 -303
  164. teuthology/worker.py +0 -354
  165. teuthology-1.1.0.dist-info/METADATA +0 -76
  166. teuthology-1.1.0.dist-info/RECORD +0 -213
  167. {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/LICENSE +0 -0
  168. {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/top_level.txt +0 -0
teuthology/config.py CHANGED
@@ -1,7 +1,10 @@
1
1
  import os
2
2
  import yaml
3
3
  import logging
4
- import collections
4
+ try:
5
+ from collections.abc import MutableMapping
6
+ except ImportError:
7
+ from collections import MutableMapping
5
8
 
6
9
 
7
10
  def init_logging():
@@ -11,7 +14,7 @@ def init_logging():
11
14
  log = init_logging()
12
15
 
13
16
 
14
- class YamlConfig(collections.MutableMapping):
17
+ class YamlConfig(MutableMapping):
15
18
  """
16
19
  A configuration object populated by parsing a yaml file, with optional
17
20
  default values.
@@ -30,12 +33,13 @@ class YamlConfig(collections.MutableMapping):
30
33
  self._conf = dict()
31
34
 
32
35
  def load(self, conf=None):
33
- if conf:
36
+ if conf is not None:
34
37
  if isinstance(conf, dict):
35
38
  self._conf = conf
36
- else:
39
+ return
40
+ elif conf:
37
41
  self._conf = yaml.safe_load(conf)
38
- return
42
+ return
39
43
  if os.path.exists(self.yaml_path):
40
44
  with open(self.yaml_path) as f:
41
45
  self._conf = yaml.safe_load(f)
@@ -149,10 +153,12 @@ class TeuthologyConfig(YamlConfig):
149
153
  'conserver_master': 'conserver.front.sepia.ceph.com',
150
154
  'conserver_port': 3109,
151
155
  'gitbuilder_host': 'gitbuilder.ceph.com',
152
- 'githelper_base_url': 'http://git.ceph.com:8080',
156
+ 'githelper_base_url': 'http://githelper.ceph.com',
153
157
  'check_package_signatures': True,
158
+ 'job_threshold': 500,
154
159
  'lab_domain': 'front.sepia.ceph.com',
155
160
  'lock_server': 'http://paddles.front.sepia.ceph.com/',
161
+ 'max_job_age': 1209600, # 2 weeks
156
162
  'max_job_time': 259200, # 3 days
157
163
  'nsupdate_url': 'http://nsupdate.front.sepia.ceph.com/update',
158
164
  'results_server': 'http://paddles.front.sepia.ceph.com/',
@@ -162,6 +168,8 @@ class TeuthologyConfig(YamlConfig):
162
168
  'src_base_path': os.path.expanduser('~/src'),
163
169
  'verify_host_keys': True,
164
170
  'watchdog_interval': 120,
171
+ 'fog_reimage_timeout': 1800,
172
+ 'fog_wait_for_ssh_timeout': 600,
165
173
  'kojihub_url': 'http://koji.fedoraproject.org/kojihub',
166
174
  'kojiroot_url': 'http://kojipkgs.fedoraproject.org/packages',
167
175
  'koji_task_url': 'https://kojipkgs.fedoraproject.org/work/',
@@ -187,6 +195,8 @@ class TeuthologyConfig(YamlConfig):
187
195
  },
188
196
  'rocketchat': None,
189
197
  'sleep_before_teardown': 0,
198
+ 'ssh_key': None,
199
+ 'active_machine_types': [],
190
200
  }
191
201
 
192
202
  def __init__(self, yaml_path=None):
teuthology/contextutil.py CHANGED
@@ -2,7 +2,6 @@ import contextlib
2
2
  import sys
3
3
  import logging
4
4
  import time
5
- import itertools
6
5
 
7
6
  from teuthology.config import config
8
7
  from teuthology.exceptions import MaxWhileTries
@@ -58,8 +57,8 @@ def nested(*managers):
58
57
  class safe_while(object):
59
58
  """
60
59
  A context manager to remove boiler plate code that deals with `while` loops
61
- that need a given number of tries and some seconds to sleep between each
62
- one of those tries.
60
+ that need a given number of tries or total timeout and some seconds to sleep
61
+ between each one of those tries.
63
62
 
64
63
  The most simple example possible will try 10 times sleeping for 6 seconds:
65
64
 
@@ -82,6 +81,8 @@ class safe_while(object):
82
81
  :param increment: The amount to add to the sleep value on each try.
83
82
  Default 0.
84
83
  :param tries: The amount of tries before giving up. Default 10.
84
+ :param timeout: Total seconds to try for, overrides the tries parameter
85
+ if specified. Default 0.
85
86
  :param action: The name of the action being attempted. Default none.
86
87
  :param _raise: Whether to raise an exception (or log a warning).
87
88
  Default True.
@@ -89,28 +90,24 @@ class safe_while(object):
89
90
  Default time.sleep
90
91
  """
91
92
 
92
- def __init__(self, sleep=6, increment=0, tries=10, action=None,
93
+ def __init__(self, sleep=6, increment=0, tries=10, timeout=0, action=None,
93
94
  _raise=True, _sleeper=None):
94
95
  self.sleep = sleep
95
96
  self.increment = increment
96
97
  self.tries = tries
98
+ self.timeout = timeout
97
99
  self.counter = 0
98
100
  self.sleep_current = sleep
99
101
  self.action = action
100
102
  self._raise = _raise
101
103
  self.sleeper = _sleeper or time.sleep
104
+ self.total_seconds = sleep
102
105
 
103
106
  def _make_error_msg(self):
104
107
  """
105
108
  Sum the total number of seconds we waited while providing the number
106
109
  of tries we attempted
107
110
  """
108
- total_seconds_waiting = sum(
109
- itertools.islice(
110
- itertools.count(self.sleep, self.increment),
111
- self.tries
112
- )
113
- )
114
111
  msg = 'reached maximum tries ({tries})' + \
115
112
  ' after waiting for {total} seconds'
116
113
  if self.action:
@@ -118,8 +115,8 @@ class safe_while(object):
118
115
 
119
116
  msg = msg.format(
120
117
  action=self.action,
121
- tries=self.tries,
122
- total=total_seconds_waiting,
118
+ tries=self.counter - 1,
119
+ total=self.total_seconds,
123
120
  )
124
121
  return msg
125
122
 
@@ -127,15 +124,22 @@ class safe_while(object):
127
124
  self.counter += 1
128
125
  if self.counter == 1:
129
126
  return True
130
- if self.counter > self.tries:
127
+ def must_stop():
128
+ return self.tries > 0 and self.counter > self.tries
129
+ if ((self.timeout > 0 and
130
+ self.total_seconds >= self.timeout) or
131
+ (self.timeout == 0 and must_stop())):
131
132
  error_msg = self._make_error_msg()
132
133
  if self._raise:
133
134
  raise MaxWhileTries(error_msg)
134
135
  else:
135
136
  log.warning(error_msg)
136
137
  return False
137
- self.sleeper(self.sleep_current)
138
138
  self.sleep_current += self.increment
139
+ if self.timeout > 0:
140
+ self.sleep_current = min(self.timeout - self.total_seconds, self.sleep_current)
141
+ self.total_seconds += self.sleep_current
142
+ self.sleeper(self.sleep_current)
139
143
  return True
140
144
 
141
145
  def __enter__(self):
@@ -13,7 +13,7 @@ from distutils.util import strtobool
13
13
  from teuthology.exceptions import ParseError
14
14
  from teuthology.suite.build_matrix import \
15
15
  build_matrix, generate_combinations, _get_matrix
16
- from teuthology.suite import util
16
+ from teuthology.suite import util, merge
17
17
 
18
18
  def main(args):
19
19
  try:
@@ -57,6 +57,7 @@ def describe_tests(args):
57
57
  limit=conf['limit'],
58
58
  seed=conf['seed'],
59
59
  subset=conf['subset'],
60
+ no_nested_subset=conf['no_nested_subset'],
60
61
  fields=conf['fields'],
61
62
  filter_in=conf['filter_in'],
62
63
  filter_out=conf['filter_out'],
@@ -69,6 +70,7 @@ def describe_tests(args):
69
70
  limit=conf['limit'],
70
71
  seed=conf['seed'],
71
72
  subset=conf['subset'],
73
+ no_nested_subset=conf['no_nested_subset'],
72
74
  show_desc=conf['print_description'],
73
75
  show_frag=conf['print_fragments'],
74
76
  filter_in=conf['filter_in'],
@@ -109,6 +111,7 @@ def output_results(headers, rows, output_format, hrule):
109
111
  def output_summary(path, limit=0,
110
112
  seed=None,
111
113
  subset=None,
114
+ no_nested_subset=None,
112
115
  show_desc=True,
113
116
  show_frag=False,
114
117
  show_matrix=False,
@@ -124,17 +127,19 @@ def output_summary(path, limit=0,
124
127
  """
125
128
 
126
129
  random.seed(seed)
127
- mat, first, matlimit = _get_matrix(path, subset)
130
+ mat, first, matlimit = _get_matrix(path, subset=subset, no_nested_subset=no_nested_subset)
128
131
  configs = generate_combinations(path, mat, first, matlimit)
129
132
  count = 0
133
+ total = len(configs)
130
134
  suite = os.path.basename(path)
131
- config_list = util.filter_configs(configs,
132
- suite_name=suite,
133
- filter_in=filter_in,
134
- filter_out=filter_out,
135
- filter_all=filter_all,
136
- filter_fragments=filter_fragments)
137
- for c in config_list:
135
+ configs = merge.config_merge(configs,
136
+ suite_name=suite,
137
+ filter_in=filter_in,
138
+ filter_out=filter_out,
139
+ filter_all=filter_all,
140
+ filter_fragments=filter_fragments,
141
+ seed=seed)
142
+ for c in configs:
138
143
  if limit and count >= limit:
139
144
  break
140
145
  count += 1
@@ -145,12 +150,13 @@ def output_summary(path, limit=0,
145
150
  print(" {}".format(util.strip_fragment_path(path)))
146
151
  if show_matrix:
147
152
  print(mat.tostr(1))
148
- print("# {}/{} {}".format(count, len(configs), path))
153
+ print("# {}/{} {}".format(count, total, path))
149
154
 
150
155
  def get_combinations(suite_dir,
151
156
  limit=0,
152
157
  seed=None,
153
158
  subset=None,
159
+ no_nested_subset=False,
154
160
  fields=[],
155
161
  filter_in=None,
156
162
  filter_out=None,
@@ -166,7 +172,7 @@ def get_combinations(suite_dir,
166
172
  of strings.
167
173
  """
168
174
  suite = os.path.basename(suite_dir)
169
- configs = build_matrix(suite_dir, subset, seed)
175
+ configs = build_matrix(suite_dir, subset=subset, no_nested_subset=no_nested_subset, seed=seed)
170
176
 
171
177
  num_listed = 0
172
178
  rows = []
@@ -175,13 +181,14 @@ def get_combinations(suite_dir,
175
181
  dirs = {}
176
182
  max_dir_depth = 0
177
183
 
178
- configs = util.filter_configs(configs,
179
- suite_name=suite,
180
- filter_in=filter_in,
181
- filter_out=filter_out,
182
- filter_all=filter_all,
183
- filter_fragments=filter_fragments)
184
- for _, fragment_paths in configs:
184
+ configs = merge.config_merge(configs,
185
+ suite_name=suite,
186
+ filter_in=filter_in,
187
+ filter_out=filter_out,
188
+ filter_all=filter_all,
189
+ filter_fragments=filter_fragments,
190
+ seed=seed)
191
+ for _, fragment_paths, __ in configs:
185
192
  if limit > 0 and num_listed >= limit:
186
193
  break
187
194
 
@@ -1,25 +1,32 @@
1
+ import datetime
1
2
  import logging
2
3
  import os
4
+ import psutil
3
5
  import subprocess
4
6
  import sys
5
7
  import yaml
6
8
 
7
- from datetime import datetime
8
-
9
- from teuthology import setup_log_file, install_except_hook
10
- from teuthology import beanstalk
11
- from teuthology import report
9
+ from typing import Dict, List
10
+
11
+ from teuthology import (
12
+ # non-modules
13
+ setup_log_file,
14
+ install_except_hook,
15
+ # modules
16
+ beanstalk,
17
+ exporter,
18
+ report,
19
+ repo_utils,
20
+ )
12
21
  from teuthology.config import config as teuth_config
13
- from teuthology.exceptions import SkipJob
14
- from teuthology.repo_utils import fetch_qa_suite, fetch_teuthology
15
- from teuthology.lock.ops import block_and_lock_machines
16
22
  from teuthology.dispatcher import supervisor
17
- from teuthology.worker import prep_job
23
+ from teuthology.exceptions import BranchNotFoundError, CommitNotFoundError, SkipJob, MaxWhileTries
24
+ from teuthology.lock import ops as lock_ops
25
+ from teuthology.util.time import parse_timestamp
18
26
  from teuthology import safepath
19
- from teuthology.nuke import nuke
20
27
 
21
28
  log = logging.getLogger(__name__)
22
- start_time = datetime.utcnow()
29
+ start_time = datetime.datetime.now(datetime.timezone.utc)
23
30
  restart_file_path = '/tmp/teuthology-restart-dispatcher'
24
31
  stop_file_path = '/tmp/teuthology-stop-dispatcher'
25
32
 
@@ -27,11 +34,14 @@ stop_file_path = '/tmp/teuthology-stop-dispatcher'
27
34
  def sentinel(path):
28
35
  if not os.path.exists(path):
29
36
  return False
30
- file_mtime = datetime.utcfromtimestamp(os.path.getmtime(path))
37
+ file_mtime = datetime.datetime.fromtimestamp(
38
+ os.path.getmtime(path),
39
+ datetime.timezone.utc,
40
+ )
31
41
  return file_mtime > start_time
32
42
 
33
43
 
34
- def restart():
44
+ def restart(log=log):
35
45
  log.info('Restarting...')
36
46
  args = sys.argv[:]
37
47
  args.insert(0, sys.executable)
@@ -56,38 +66,39 @@ def load_config(archive_dir=None):
56
66
 
57
67
 
58
68
  def main(args):
59
- # run dispatcher in job supervisor mode if --supervisor passed
60
- if args["--supervisor"]:
61
- return supervisor.main(args)
62
-
63
- verbose = args["--verbose"]
64
- tube = args["--tube"]
65
- log_dir = args["--log-dir"]
66
- archive_dir = args["--archive-dir"]
69
+ archive_dir = args.archive_dir or teuth_config.archive_base
67
70
 
68
- if archive_dir is None:
69
- archive_dir = teuth_config.archive_base
71
+ # Refuse to start more than one dispatcher per machine type
72
+ procs = find_dispatcher_processes().get(args.tube)
73
+ if procs:
74
+ raise RuntimeError(
75
+ "There is already a teuthology-dispatcher process running:"
76
+ f" {procs}"
77
+ )
70
78
 
71
79
  # setup logging for disoatcher in {log_dir}
72
80
  loglevel = logging.INFO
73
- if verbose:
81
+ if args.verbose:
74
82
  loglevel = logging.DEBUG
83
+ logging.getLogger().setLevel(loglevel)
75
84
  log.setLevel(loglevel)
76
- log_file_path = os.path.join(log_dir, f"dispatcher.{tube}.{os.getpid()}")
85
+ log_file_path = os.path.join(args.log_dir, f"dispatcher.{args.tube}.{os.getpid()}")
77
86
  setup_log_file(log_file_path)
78
87
  install_except_hook()
79
88
 
80
89
  load_config(archive_dir=archive_dir)
81
90
 
82
91
  connection = beanstalk.connect()
83
- beanstalk.watch_tube(connection, tube)
92
+ beanstalk.watch_tube(connection, args.tube)
84
93
  result_proc = None
85
94
 
86
95
  if teuth_config.teuthology_path is None:
87
- fetch_teuthology('master')
88
- fetch_qa_suite('master')
96
+ repo_utils.fetch_teuthology('main')
97
+ repo_utils.fetch_qa_suite('main')
89
98
 
90
99
  keep_running = True
100
+ job_procs = set()
101
+ worst_returncode = 0
91
102
  while keep_running:
92
103
  # Check to see if we have a teuthology-results process hanging around
93
104
  # and if so, read its return code so that it can exit.
@@ -102,9 +113,16 @@ def main(args):
102
113
  stop()
103
114
 
104
115
  load_config()
105
-
116
+ for proc in list(job_procs):
117
+ rc = proc.poll()
118
+ if rc is not None:
119
+ worst_returncode = max([worst_returncode, rc])
120
+ job_procs.remove(proc)
106
121
  job = connection.reserve(timeout=60)
107
122
  if job is None:
123
+ if args.exit_on_empty_queue and not job_procs:
124
+ log.info("Queue is empty and no supervisor processes running; exiting!")
125
+ break
108
126
  continue
109
127
 
110
128
  # bury the job so it won't be re-run if it fails
@@ -132,8 +150,7 @@ def main(args):
132
150
  job_config = lock_machines(job_config)
133
151
 
134
152
  run_args = [
135
- os.path.join(teuth_bin_path, 'teuthology-dispatcher'),
136
- '--supervisor',
153
+ os.path.join(teuth_bin_path, 'teuthology-supervisor'),
137
154
  '-v',
138
155
  '--bin-path', teuth_bin_path,
139
156
  '--archive-dir', archive_dir,
@@ -153,13 +170,24 @@ def main(args):
153
170
  run_args.extend(["--job-config", job_config_path])
154
171
 
155
172
  try:
156
- job_proc = subprocess.Popen(run_args)
173
+ job_proc = subprocess.Popen(
174
+ run_args,
175
+ stdout=subprocess.DEVNULL,
176
+ stderr=subprocess.DEVNULL,
177
+ )
178
+ job_procs.add(job_proc)
157
179
  log.info('Job supervisor PID: %s', job_proc.pid)
158
180
  except Exception:
159
181
  error_message = "Saw error while trying to spawn supervisor."
160
182
  log.exception(error_message)
161
183
  if 'targets' in job_config:
162
- nuke(supervisor.create_fake_context(job_config), True)
184
+ node_names = job_config["targets"].keys()
185
+ lock_ops.unlock_safe(
186
+ node_names,
187
+ job_config["owner"],
188
+ job_config["name"],
189
+ job_config["job_id"]
190
+ )
163
191
  report.try_push_job_info(job_config, dict(
164
192
  status='fail',
165
193
  failure_reason=error_message))
@@ -171,12 +199,159 @@ def main(args):
171
199
  except Exception:
172
200
  log.exception("Saw exception while trying to delete job")
173
201
 
202
+ return worst_returncode
203
+
204
+
205
+ def find_dispatcher_processes() -> Dict[str, List[psutil.Process]]:
206
+ def match(proc):
207
+ try:
208
+ cmdline = proc.cmdline()
209
+ except psutil.AccessDenied:
210
+ return False
211
+ except psutil.ZombieProcess:
212
+ return False
213
+ if len(cmdline) < 3:
214
+ return False
215
+ if not cmdline[1].endswith("/teuthology-dispatcher"):
216
+ return False
217
+ if cmdline[2] == "--supervisor":
218
+ return False
219
+ if "--tube" not in cmdline:
220
+ return False
221
+ if proc.pid == os.getpid():
222
+ return False
223
+ return True
224
+
225
+ procs = {}
226
+ attrs = ["pid", "cmdline"]
227
+ for proc in psutil.process_iter(attrs=attrs):
228
+ if not match(proc):
229
+ continue
230
+ cmdline = proc.cmdline()
231
+ machine_type = cmdline[cmdline.index("--tube") + 1]
232
+ procs.setdefault(machine_type, []).append(proc)
233
+ return procs
234
+
235
+
236
+ def prep_job(job_config, log_file_path, archive_dir):
237
+ job_id = job_config['job_id']
238
+ check_job_expiration(job_config)
239
+
240
+ safe_archive = safepath.munge(job_config['name'])
241
+ job_config['worker_log'] = log_file_path
242
+ archive_path_full = os.path.join(
243
+ archive_dir, safe_archive, str(job_id))
244
+ job_config['archive_path'] = archive_path_full
245
+
246
+ # If the teuthology branch was not specified, default to main and
247
+ # store that value.
248
+ teuthology_branch = job_config.get('teuthology_branch', 'main')
249
+ job_config['teuthology_branch'] = teuthology_branch
250
+ teuthology_sha1 = job_config.get('teuthology_sha1')
251
+ if not teuthology_sha1:
252
+ repo_url = repo_utils.build_git_url('teuthology', 'ceph')
253
+ try:
254
+ teuthology_sha1 = repo_utils.ls_remote(repo_url, teuthology_branch)
255
+ except Exception as exc:
256
+ log.exception(f"Could not get teuthology sha1 for branch {teuthology_branch}")
257
+ report.try_push_job_info(
258
+ job_config,
259
+ dict(status='dead', failure_reason=str(exc))
260
+ )
261
+ raise SkipJob()
262
+ if not teuthology_sha1:
263
+ reason = "Teuthology branch {} not found; marking job as dead".format(teuthology_branch)
264
+ log.error(reason)
265
+ report.try_push_job_info(
266
+ job_config,
267
+ dict(status='dead', failure_reason=reason)
268
+ )
269
+ raise SkipJob()
270
+ if teuth_config.teuthology_path is None:
271
+ log.info('Using teuthology sha1 %s', teuthology_sha1)
272
+
273
+ try:
274
+ if teuth_config.teuthology_path is not None:
275
+ teuth_path = teuth_config.teuthology_path
276
+ else:
277
+ teuth_path = repo_utils.fetch_teuthology(branch=teuthology_branch,
278
+ commit=teuthology_sha1)
279
+ # For the teuthology tasks, we look for suite_branch, and if we
280
+ # don't get that, we look for branch, and fall back to 'main'.
281
+ # last-in-suite jobs don't have suite_branch or branch set.
282
+ ceph_branch = job_config.get('branch', 'main')
283
+ suite_branch = job_config.get('suite_branch', ceph_branch)
284
+ suite_sha1 = job_config.get('suite_sha1')
285
+ suite_repo = job_config.get('suite_repo')
286
+ if suite_repo:
287
+ teuth_config.ceph_qa_suite_git_url = suite_repo
288
+ job_config['suite_path'] = os.path.normpath(os.path.join(
289
+ repo_utils.fetch_qa_suite(suite_branch, suite_sha1),
290
+ job_config.get('suite_relpath', ''),
291
+ ))
292
+ except (BranchNotFoundError, CommitNotFoundError) as exc:
293
+ log.exception("Requested version not found; marking job as dead")
294
+ report.try_push_job_info(
295
+ job_config,
296
+ dict(status='dead', failure_reason=str(exc))
297
+ )
298
+ raise SkipJob()
299
+ except MaxWhileTries as exc:
300
+ log.exception("Failed to fetch or bootstrap; marking job as dead")
301
+ report.try_push_job_info(
302
+ job_config,
303
+ dict(status='dead', failure_reason=str(exc))
304
+ )
305
+ raise SkipJob()
306
+
307
+ teuth_bin_path = os.path.join(teuth_path, 'virtualenv', 'bin')
308
+ if not os.path.isdir(teuth_bin_path):
309
+ raise RuntimeError("teuthology branch %s at %s not bootstrapped!" %
310
+ (teuthology_branch, teuth_bin_path))
311
+ return job_config, teuth_bin_path
312
+
313
+
314
+ def check_job_expiration(job_config):
315
+ job_id = job_config['job_id']
316
+ expired = False
317
+ now = datetime.datetime.now(datetime.timezone.utc)
318
+ if expire_str := job_config.get('timestamp'):
319
+ expire = parse_timestamp(expire_str) + \
320
+ datetime.timedelta(seconds=teuth_config.max_job_age)
321
+ expired = expire < now
322
+ if not expired and (expire_str := job_config.get('expire')):
323
+ try:
324
+ expire = parse_timestamp(expire_str)
325
+ expired = expired or expire < now
326
+ except ValueError:
327
+ log.warning(f"Failed to parse job expiration: {expire_str=}")
328
+ pass
329
+ if expired:
330
+ log.info(f"Skipping job {job_id} because it is expired: {expire_str} is in the past")
331
+ report.try_push_job_info(
332
+ job_config,
333
+ # TODO: Add a 'canceled' status to paddles, and use that.
334
+ dict(status='dead'),
335
+ )
336
+ raise SkipJob()
337
+
174
338
 
175
339
  def lock_machines(job_config):
176
340
  report.try_push_job_info(job_config, dict(status='running'))
177
341
  fake_ctx = supervisor.create_fake_context(job_config, block=True)
178
- block_and_lock_machines(fake_ctx, len(job_config['roles']),
179
- job_config['machine_type'], reimage=False)
342
+ machine_type = job_config["machine_type"]
343
+ count = len(job_config['roles'])
344
+ with exporter.NodeLockingTime().time(
345
+ machine_type=machine_type,
346
+ count=count,
347
+ ):
348
+ lock_ops.block_and_lock_machines(
349
+ fake_ctx,
350
+ count,
351
+ machine_type,
352
+ tries=-1,
353
+ reimage=False,
354
+ )
180
355
  job_config = fake_ctx.config
181
356
  return job_config
182
357