teuthology 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. scripts/describe.py +1 -0
  2. scripts/dispatcher.py +62 -0
  3. scripts/exporter.py +18 -0
  4. scripts/lock.py +1 -1
  5. scripts/node_cleanup.py +58 -0
  6. scripts/openstack.py +9 -9
  7. scripts/results.py +12 -11
  8. scripts/run.py +4 -0
  9. scripts/schedule.py +4 -0
  10. scripts/suite.py +61 -16
  11. scripts/supervisor.py +44 -0
  12. scripts/update_inventory.py +10 -4
  13. scripts/wait.py +31 -0
  14. teuthology/__init__.py +24 -21
  15. teuthology/beanstalk.py +4 -3
  16. teuthology/config.py +17 -6
  17. teuthology/contextutil.py +18 -14
  18. teuthology/describe_tests.py +25 -18
  19. teuthology/dispatcher/__init__.py +365 -0
  20. teuthology/dispatcher/supervisor.py +374 -0
  21. teuthology/exceptions.py +54 -0
  22. teuthology/exporter.py +347 -0
  23. teuthology/kill.py +76 -75
  24. teuthology/lock/cli.py +16 -7
  25. teuthology/lock/ops.py +276 -70
  26. teuthology/lock/query.py +61 -44
  27. teuthology/ls.py +9 -18
  28. teuthology/misc.py +152 -137
  29. teuthology/nuke/__init__.py +12 -351
  30. teuthology/openstack/__init__.py +4 -3
  31. teuthology/openstack/openstack-centos-7.0-user-data.txt +1 -1
  32. teuthology/openstack/openstack-centos-7.1-user-data.txt +1 -1
  33. teuthology/openstack/openstack-centos-7.2-user-data.txt +1 -1
  34. teuthology/openstack/openstack-debian-8.0-user-data.txt +1 -1
  35. teuthology/openstack/openstack-opensuse-42.1-user-data.txt +1 -1
  36. teuthology/openstack/openstack-teuthology.cron +0 -1
  37. teuthology/orchestra/cluster.py +51 -9
  38. teuthology/orchestra/connection.py +23 -16
  39. teuthology/orchestra/console.py +111 -50
  40. teuthology/orchestra/daemon/cephadmunit.py +23 -5
  41. teuthology/orchestra/daemon/state.py +10 -3
  42. teuthology/orchestra/daemon/systemd.py +10 -8
  43. teuthology/orchestra/opsys.py +32 -11
  44. teuthology/orchestra/remote.py +369 -152
  45. teuthology/orchestra/run.py +21 -12
  46. teuthology/packaging.py +54 -15
  47. teuthology/provision/__init__.py +30 -10
  48. teuthology/provision/cloud/openstack.py +12 -6
  49. teuthology/provision/cloud/util.py +1 -2
  50. teuthology/provision/downburst.py +83 -29
  51. teuthology/provision/fog.py +68 -20
  52. teuthology/provision/openstack.py +5 -4
  53. teuthology/provision/pelagos.py +13 -5
  54. teuthology/repo_utils.py +91 -44
  55. teuthology/report.py +57 -35
  56. teuthology/results.py +5 -3
  57. teuthology/run.py +21 -15
  58. teuthology/run_tasks.py +114 -40
  59. teuthology/schedule.py +4 -3
  60. teuthology/scrape.py +28 -22
  61. teuthology/suite/__init__.py +75 -46
  62. teuthology/suite/build_matrix.py +34 -24
  63. teuthology/suite/fragment-merge.lua +105 -0
  64. teuthology/suite/matrix.py +31 -2
  65. teuthology/suite/merge.py +175 -0
  66. teuthology/suite/placeholder.py +8 -8
  67. teuthology/suite/run.py +204 -102
  68. teuthology/suite/util.py +67 -211
  69. teuthology/task/__init__.py +1 -1
  70. teuthology/task/ansible.py +101 -31
  71. teuthology/task/buildpackages.py +2 -2
  72. teuthology/task/ceph_ansible.py +13 -6
  73. teuthology/task/cephmetrics.py +2 -1
  74. teuthology/task/clock.py +33 -14
  75. teuthology/task/exec.py +18 -0
  76. teuthology/task/hadoop.py +2 -2
  77. teuthology/task/install/__init__.py +51 -22
  78. teuthology/task/install/bin/adjust-ulimits +16 -0
  79. teuthology/task/install/bin/daemon-helper +114 -0
  80. teuthology/task/install/bin/stdin-killer +263 -0
  81. teuthology/task/install/deb.py +24 -4
  82. teuthology/task/install/redhat.py +36 -32
  83. teuthology/task/install/rpm.py +41 -14
  84. teuthology/task/install/util.py +48 -22
  85. teuthology/task/internal/__init__.py +69 -11
  86. teuthology/task/internal/edit_sudoers.sh +10 -0
  87. teuthology/task/internal/lock_machines.py +3 -133
  88. teuthology/task/internal/redhat.py +48 -28
  89. teuthology/task/internal/syslog.py +31 -8
  90. teuthology/task/kernel.py +155 -147
  91. teuthology/task/lockfile.py +1 -1
  92. teuthology/task/mpi.py +10 -10
  93. teuthology/task/pcp.py +1 -1
  94. teuthology/task/selinux.py +17 -8
  95. teuthology/task/ssh_keys.py +6 -6
  96. teuthology/task/tests/__init__.py +137 -77
  97. teuthology/task/tests/test_fetch_coredumps.py +116 -0
  98. teuthology/task/tests/test_run.py +4 -4
  99. teuthology/timer.py +3 -3
  100. teuthology/util/loggerfile.py +19 -0
  101. teuthology/util/scanner.py +159 -0
  102. teuthology/util/sentry.py +52 -0
  103. teuthology/util/time.py +52 -0
  104. teuthology-1.2.0.data/scripts/adjust-ulimits +16 -0
  105. teuthology-1.2.0.data/scripts/daemon-helper +114 -0
  106. teuthology-1.2.0.data/scripts/stdin-killer +263 -0
  107. teuthology-1.2.0.dist-info/METADATA +89 -0
  108. teuthology-1.2.0.dist-info/RECORD +174 -0
  109. {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/WHEEL +1 -1
  110. {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/entry_points.txt +5 -2
  111. scripts/nuke.py +0 -45
  112. scripts/worker.py +0 -37
  113. teuthology/nuke/actions.py +0 -456
  114. teuthology/openstack/test/__init__.py +0 -0
  115. teuthology/openstack/test/openstack-integration.py +0 -286
  116. teuthology/openstack/test/test_config.py +0 -35
  117. teuthology/openstack/test/test_openstack.py +0 -1695
  118. teuthology/orchestra/test/__init__.py +0 -0
  119. teuthology/orchestra/test/integration/__init__.py +0 -0
  120. teuthology/orchestra/test/integration/test_integration.py +0 -94
  121. teuthology/orchestra/test/test_cluster.py +0 -240
  122. teuthology/orchestra/test/test_connection.py +0 -106
  123. teuthology/orchestra/test/test_console.py +0 -217
  124. teuthology/orchestra/test/test_opsys.py +0 -404
  125. teuthology/orchestra/test/test_remote.py +0 -185
  126. teuthology/orchestra/test/test_run.py +0 -286
  127. teuthology/orchestra/test/test_systemd.py +0 -54
  128. teuthology/orchestra/test/util.py +0 -12
  129. teuthology/sentry.py +0 -18
  130. teuthology/test/__init__.py +0 -0
  131. teuthology/test/fake_archive.py +0 -107
  132. teuthology/test/fake_fs.py +0 -92
  133. teuthology/test/integration/__init__.py +0 -0
  134. teuthology/test/integration/test_suite.py +0 -86
  135. teuthology/test/task/__init__.py +0 -205
  136. teuthology/test/task/test_ansible.py +0 -624
  137. teuthology/test/task/test_ceph_ansible.py +0 -176
  138. teuthology/test/task/test_console_log.py +0 -88
  139. teuthology/test/task/test_install.py +0 -337
  140. teuthology/test/task/test_internal.py +0 -57
  141. teuthology/test/task/test_kernel.py +0 -243
  142. teuthology/test/task/test_pcp.py +0 -379
  143. teuthology/test/task/test_selinux.py +0 -35
  144. teuthology/test/test_config.py +0 -189
  145. teuthology/test/test_contextutil.py +0 -68
  146. teuthology/test/test_describe_tests.py +0 -316
  147. teuthology/test/test_email_sleep_before_teardown.py +0 -81
  148. teuthology/test/test_exit.py +0 -97
  149. teuthology/test/test_get_distro.py +0 -47
  150. teuthology/test/test_get_distro_version.py +0 -47
  151. teuthology/test/test_get_multi_machine_types.py +0 -27
  152. teuthology/test/test_job_status.py +0 -60
  153. teuthology/test/test_ls.py +0 -48
  154. teuthology/test/test_misc.py +0 -368
  155. teuthology/test/test_nuke.py +0 -232
  156. teuthology/test/test_packaging.py +0 -763
  157. teuthology/test/test_parallel.py +0 -28
  158. teuthology/test/test_repo_utils.py +0 -204
  159. teuthology/test/test_report.py +0 -77
  160. teuthology/test/test_results.py +0 -155
  161. teuthology/test/test_run.py +0 -238
  162. teuthology/test/test_safepath.py +0 -55
  163. teuthology/test/test_schedule.py +0 -45
  164. teuthology/test/test_scrape.py +0 -167
  165. teuthology/test/test_timer.py +0 -80
  166. teuthology/test/test_vps_os_vers_parameter_checking.py +0 -84
  167. teuthology/test/test_worker.py +0 -303
  168. teuthology/worker.py +0 -339
  169. teuthology-1.0.0.dist-info/METADATA +0 -76
  170. teuthology-1.0.0.dist-info/RECORD +0 -210
  171. {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/LICENSE +0 -0
  172. {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/top_level.txt +0 -0
teuthology/config.py CHANGED
@@ -1,7 +1,10 @@
1
1
  import os
2
2
  import yaml
3
3
  import logging
4
- import collections
4
+ try:
5
+ from collections.abc import MutableMapping
6
+ except ImportError:
7
+ from collections import MutableMapping
5
8
 
6
9
 
7
10
  def init_logging():
@@ -11,7 +14,7 @@ def init_logging():
11
14
  log = init_logging()
12
15
 
13
16
 
14
- class YamlConfig(collections.MutableMapping):
17
+ class YamlConfig(MutableMapping):
15
18
  """
16
19
  A configuration object populated by parsing a yaml file, with optional
17
20
  default values.
@@ -30,12 +33,13 @@ class YamlConfig(collections.MutableMapping):
30
33
  self._conf = dict()
31
34
 
32
35
  def load(self, conf=None):
33
- if conf:
36
+ if conf is not None:
34
37
  if isinstance(conf, dict):
35
38
  self._conf = conf
36
- else:
39
+ return
40
+ elif conf:
37
41
  self._conf = yaml.safe_load(conf)
38
- return
42
+ return
39
43
  if os.path.exists(self.yaml_path):
40
44
  with open(self.yaml_path) as f:
41
45
  self._conf = yaml.safe_load(f)
@@ -149,10 +153,12 @@ class TeuthologyConfig(YamlConfig):
149
153
  'conserver_master': 'conserver.front.sepia.ceph.com',
150
154
  'conserver_port': 3109,
151
155
  'gitbuilder_host': 'gitbuilder.ceph.com',
152
- 'githelper_base_url': 'http://git.ceph.com:8080',
156
+ 'githelper_base_url': 'http://githelper.ceph.com',
153
157
  'check_package_signatures': True,
158
+ 'job_threshold': 500,
154
159
  'lab_domain': 'front.sepia.ceph.com',
155
160
  'lock_server': 'http://paddles.front.sepia.ceph.com/',
161
+ 'max_job_age': 1209600, # 2 weeks
156
162
  'max_job_time': 259200, # 3 days
157
163
  'nsupdate_url': 'http://nsupdate.front.sepia.ceph.com/update',
158
164
  'results_server': 'http://paddles.front.sepia.ceph.com/',
@@ -162,6 +168,8 @@ class TeuthologyConfig(YamlConfig):
162
168
  'src_base_path': os.path.expanduser('~/src'),
163
169
  'verify_host_keys': True,
164
170
  'watchdog_interval': 120,
171
+ 'fog_reimage_timeout': 1800,
172
+ 'fog_wait_for_ssh_timeout': 600,
165
173
  'kojihub_url': 'http://koji.fedoraproject.org/kojihub',
166
174
  'kojiroot_url': 'http://kojipkgs.fedoraproject.org/packages',
167
175
  'koji_task_url': 'https://kojipkgs.fedoraproject.org/work/',
@@ -185,7 +193,10 @@ class TeuthologyConfig(YamlConfig):
185
193
  'size': 1,
186
194
  },
187
195
  },
196
+ 'rocketchat': None,
188
197
  'sleep_before_teardown': 0,
198
+ 'ssh_key': None,
199
+ 'active_machine_types': [],
189
200
  }
190
201
 
191
202
  def __init__(self, yaml_path=None):
teuthology/contextutil.py CHANGED
@@ -2,7 +2,6 @@ import contextlib
2
2
  import sys
3
3
  import logging
4
4
  import time
5
- import itertools
6
5
 
7
6
  from teuthology.config import config
8
7
  from teuthology.exceptions import MaxWhileTries
@@ -58,8 +57,8 @@ def nested(*managers):
58
57
  class safe_while(object):
59
58
  """
60
59
  A context manager to remove boiler plate code that deals with `while` loops
61
- that need a given number of tries and some seconds to sleep between each
62
- one of those tries.
60
+ that need a given number of tries or total timeout and some seconds to sleep
61
+ between each one of those tries.
63
62
 
64
63
  The most simple example possible will try 10 times sleeping for 6 seconds:
65
64
 
@@ -82,6 +81,8 @@ class safe_while(object):
82
81
  :param increment: The amount to add to the sleep value on each try.
83
82
  Default 0.
84
83
  :param tries: The amount of tries before giving up. Default 10.
84
+ :param timeout: Total seconds to try for, overrides the tries parameter
85
+ if specified. Default 0.
85
86
  :param action: The name of the action being attempted. Default none.
86
87
  :param _raise: Whether to raise an exception (or log a warning).
87
88
  Default True.
@@ -89,28 +90,24 @@ class safe_while(object):
89
90
  Default time.sleep
90
91
  """
91
92
 
92
- def __init__(self, sleep=6, increment=0, tries=10, action=None,
93
+ def __init__(self, sleep=6, increment=0, tries=10, timeout=0, action=None,
93
94
  _raise=True, _sleeper=None):
94
95
  self.sleep = sleep
95
96
  self.increment = increment
96
97
  self.tries = tries
98
+ self.timeout = timeout
97
99
  self.counter = 0
98
100
  self.sleep_current = sleep
99
101
  self.action = action
100
102
  self._raise = _raise
101
103
  self.sleeper = _sleeper or time.sleep
104
+ self.total_seconds = sleep
102
105
 
103
106
  def _make_error_msg(self):
104
107
  """
105
108
  Sum the total number of seconds we waited while providing the number
106
109
  of tries we attempted
107
110
  """
108
- total_seconds_waiting = sum(
109
- itertools.islice(
110
- itertools.count(self.sleep, self.increment),
111
- self.tries
112
- )
113
- )
114
111
  msg = 'reached maximum tries ({tries})' + \
115
112
  ' after waiting for {total} seconds'
116
113
  if self.action:
@@ -118,8 +115,8 @@ class safe_while(object):
118
115
 
119
116
  msg = msg.format(
120
117
  action=self.action,
121
- tries=self.tries,
122
- total=total_seconds_waiting,
118
+ tries=self.counter - 1,
119
+ total=self.total_seconds,
123
120
  )
124
121
  return msg
125
122
 
@@ -127,15 +124,22 @@ class safe_while(object):
127
124
  self.counter += 1
128
125
  if self.counter == 1:
129
126
  return True
130
- if self.counter > self.tries:
127
+ def must_stop():
128
+ return self.tries > 0 and self.counter > self.tries
129
+ if ((self.timeout > 0 and
130
+ self.total_seconds >= self.timeout) or
131
+ (self.timeout == 0 and must_stop())):
131
132
  error_msg = self._make_error_msg()
132
133
  if self._raise:
133
134
  raise MaxWhileTries(error_msg)
134
135
  else:
135
136
  log.warning(error_msg)
136
137
  return False
137
- self.sleeper(self.sleep_current)
138
138
  self.sleep_current += self.increment
139
+ if self.timeout > 0:
140
+ self.sleep_current = min(self.timeout - self.total_seconds, self.sleep_current)
141
+ self.total_seconds += self.sleep_current
142
+ self.sleeper(self.sleep_current)
139
143
  return True
140
144
 
141
145
  def __enter__(self):
@@ -13,7 +13,7 @@ from distutils.util import strtobool
13
13
  from teuthology.exceptions import ParseError
14
14
  from teuthology.suite.build_matrix import \
15
15
  build_matrix, generate_combinations, _get_matrix
16
- from teuthology.suite import util
16
+ from teuthology.suite import util, merge
17
17
 
18
18
  def main(args):
19
19
  try:
@@ -57,6 +57,7 @@ def describe_tests(args):
57
57
  limit=conf['limit'],
58
58
  seed=conf['seed'],
59
59
  subset=conf['subset'],
60
+ no_nested_subset=conf['no_nested_subset'],
60
61
  fields=conf['fields'],
61
62
  filter_in=conf['filter_in'],
62
63
  filter_out=conf['filter_out'],
@@ -69,6 +70,7 @@ def describe_tests(args):
69
70
  limit=conf['limit'],
70
71
  seed=conf['seed'],
71
72
  subset=conf['subset'],
73
+ no_nested_subset=conf['no_nested_subset'],
72
74
  show_desc=conf['print_description'],
73
75
  show_frag=conf['print_fragments'],
74
76
  filter_in=conf['filter_in'],
@@ -109,6 +111,7 @@ def output_results(headers, rows, output_format, hrule):
109
111
  def output_summary(path, limit=0,
110
112
  seed=None,
111
113
  subset=None,
114
+ no_nested_subset=None,
112
115
  show_desc=True,
113
116
  show_frag=False,
114
117
  show_matrix=False,
@@ -124,17 +127,19 @@ def output_summary(path, limit=0,
124
127
  """
125
128
 
126
129
  random.seed(seed)
127
- mat, first, matlimit = _get_matrix(path, subset)
130
+ mat, first, matlimit = _get_matrix(path, subset=subset, no_nested_subset=no_nested_subset)
128
131
  configs = generate_combinations(path, mat, first, matlimit)
129
132
  count = 0
133
+ total = len(configs)
130
134
  suite = os.path.basename(path)
131
- config_list = util.filter_configs(configs,
132
- suite_name=suite,
133
- filter_in=filter_in,
134
- filter_out=filter_out,
135
- filter_all=filter_all,
136
- filter_fragments=filter_fragments)
137
- for c in config_list:
135
+ configs = merge.config_merge(configs,
136
+ suite_name=suite,
137
+ filter_in=filter_in,
138
+ filter_out=filter_out,
139
+ filter_all=filter_all,
140
+ filter_fragments=filter_fragments,
141
+ seed=seed)
142
+ for c in configs:
138
143
  if limit and count >= limit:
139
144
  break
140
145
  count += 1
@@ -145,12 +150,13 @@ def output_summary(path, limit=0,
145
150
  print(" {}".format(util.strip_fragment_path(path)))
146
151
  if show_matrix:
147
152
  print(mat.tostr(1))
148
- print("# {}/{} {}".format(count, len(configs), path))
153
+ print("# {}/{} {}".format(count, total, path))
149
154
 
150
155
  def get_combinations(suite_dir,
151
156
  limit=0,
152
157
  seed=None,
153
158
  subset=None,
159
+ no_nested_subset=False,
154
160
  fields=[],
155
161
  filter_in=None,
156
162
  filter_out=None,
@@ -166,7 +172,7 @@ def get_combinations(suite_dir,
166
172
  of strings.
167
173
  """
168
174
  suite = os.path.basename(suite_dir)
169
- configs = build_matrix(suite_dir, subset, seed)
175
+ configs = build_matrix(suite_dir, subset=subset, no_nested_subset=no_nested_subset, seed=seed)
170
176
 
171
177
  num_listed = 0
172
178
  rows = []
@@ -175,13 +181,14 @@ def get_combinations(suite_dir,
175
181
  dirs = {}
176
182
  max_dir_depth = 0
177
183
 
178
- configs = util.filter_configs(configs,
179
- suite_name=suite,
180
- filter_in=filter_in,
181
- filter_out=filter_out,
182
- filter_all=filter_all,
183
- filter_fragments=filter_fragments)
184
- for _, fragment_paths in configs:
184
+ configs = merge.config_merge(configs,
185
+ suite_name=suite,
186
+ filter_in=filter_in,
187
+ filter_out=filter_out,
188
+ filter_all=filter_all,
189
+ filter_fragments=filter_fragments,
190
+ seed=seed)
191
+ for _, fragment_paths, __ in configs:
185
192
  if limit > 0 and num_listed >= limit:
186
193
  break
187
194
 
@@ -0,0 +1,365 @@
1
+ import datetime
2
+ import logging
3
+ import os
4
+ import psutil
5
+ import subprocess
6
+ import sys
7
+ import yaml
8
+
9
+ from typing import Dict, List
10
+
11
+ from teuthology import (
12
+ # non-modules
13
+ setup_log_file,
14
+ install_except_hook,
15
+ # modules
16
+ beanstalk,
17
+ exporter,
18
+ report,
19
+ repo_utils,
20
+ )
21
+ from teuthology.config import config as teuth_config
22
+ from teuthology.dispatcher import supervisor
23
+ from teuthology.exceptions import BranchNotFoundError, CommitNotFoundError, SkipJob, MaxWhileTries
24
+ from teuthology.lock import ops as lock_ops
25
+ from teuthology.util.time import parse_timestamp
26
+ from teuthology import safepath
27
+
28
+ log = logging.getLogger(__name__)
29
+ start_time = datetime.datetime.now(datetime.timezone.utc)
30
+ restart_file_path = '/tmp/teuthology-restart-dispatcher'
31
+ stop_file_path = '/tmp/teuthology-stop-dispatcher'
32
+
33
+
34
+ def sentinel(path):
35
+ if not os.path.exists(path):
36
+ return False
37
+ file_mtime = datetime.datetime.fromtimestamp(
38
+ os.path.getmtime(path),
39
+ datetime.timezone.utc,
40
+ )
41
+ return file_mtime > start_time
42
+
43
+
44
+ def restart(log=log):
45
+ log.info('Restarting...')
46
+ args = sys.argv[:]
47
+ args.insert(0, sys.executable)
48
+ os.execv(sys.executable, args)
49
+
50
+
51
+ def stop():
52
+ log.info('Stopping...')
53
+ sys.exit(0)
54
+
55
+
56
+ def load_config(archive_dir=None):
57
+ teuth_config.load()
58
+ if archive_dir is not None:
59
+ if not os.path.isdir(archive_dir):
60
+ sys.exit("{prog}: archive directory must exist: {path}".format(
61
+ prog=os.path.basename(sys.argv[0]),
62
+ path=archive_dir,
63
+ ))
64
+ else:
65
+ teuth_config.archive_base = archive_dir
66
+
67
+
68
+ def main(args):
69
+ archive_dir = args.archive_dir or teuth_config.archive_base
70
+
71
+ # Refuse to start more than one dispatcher per machine type
72
+ procs = find_dispatcher_processes().get(args.tube)
73
+ if procs:
74
+ raise RuntimeError(
75
+ "There is already a teuthology-dispatcher process running:"
76
+ f" {procs}"
77
+ )
78
+
79
+ # setup logging for disoatcher in {log_dir}
80
+ loglevel = logging.INFO
81
+ if args.verbose:
82
+ loglevel = logging.DEBUG
83
+ logging.getLogger().setLevel(loglevel)
84
+ log.setLevel(loglevel)
85
+ log_file_path = os.path.join(args.log_dir, f"dispatcher.{args.tube}.{os.getpid()}")
86
+ setup_log_file(log_file_path)
87
+ install_except_hook()
88
+
89
+ load_config(archive_dir=archive_dir)
90
+
91
+ connection = beanstalk.connect()
92
+ beanstalk.watch_tube(connection, args.tube)
93
+ result_proc = None
94
+
95
+ if teuth_config.teuthology_path is None:
96
+ repo_utils.fetch_teuthology('main')
97
+ repo_utils.fetch_qa_suite('main')
98
+
99
+ keep_running = True
100
+ job_procs = set()
101
+ worst_returncode = 0
102
+ while keep_running:
103
+ # Check to see if we have a teuthology-results process hanging around
104
+ # and if so, read its return code so that it can exit.
105
+ if result_proc is not None and result_proc.poll() is not None:
106
+ log.debug("teuthology-results exited with code: %s",
107
+ result_proc.returncode)
108
+ result_proc = None
109
+
110
+ if sentinel(restart_file_path):
111
+ restart()
112
+ elif sentinel(stop_file_path):
113
+ stop()
114
+
115
+ load_config()
116
+ for proc in list(job_procs):
117
+ rc = proc.poll()
118
+ if rc is not None:
119
+ worst_returncode = max([worst_returncode, rc])
120
+ job_procs.remove(proc)
121
+ job = connection.reserve(timeout=60)
122
+ if job is None:
123
+ if args.exit_on_empty_queue and not job_procs:
124
+ log.info("Queue is empty and no supervisor processes running; exiting!")
125
+ break
126
+ continue
127
+
128
+ # bury the job so it won't be re-run if it fails
129
+ job.bury()
130
+ job_id = job.jid
131
+ log.info('Reserved job %d', job_id)
132
+ log.info('Config is: %s', job.body)
133
+ job_config = yaml.safe_load(job.body)
134
+ job_config['job_id'] = str(job_id)
135
+
136
+ if job_config.get('stop_worker'):
137
+ keep_running = False
138
+
139
+ try:
140
+ job_config, teuth_bin_path = prep_job(
141
+ job_config,
142
+ log_file_path,
143
+ archive_dir,
144
+ )
145
+ except SkipJob:
146
+ continue
147
+
148
+ # lock machines but do not reimage them
149
+ if 'roles' in job_config:
150
+ job_config = lock_machines(job_config)
151
+
152
+ run_args = [
153
+ os.path.join(teuth_bin_path, 'teuthology-supervisor'),
154
+ '-v',
155
+ '--bin-path', teuth_bin_path,
156
+ '--archive-dir', archive_dir,
157
+ ]
158
+
159
+ # Create run archive directory if not already created and
160
+ # job's archive directory
161
+ create_job_archive(job_config['name'],
162
+ job_config['archive_path'],
163
+ archive_dir)
164
+ job_config_path = os.path.join(job_config['archive_path'], 'orig.config.yaml')
165
+
166
+ # Write initial job config in job archive dir
167
+ with open(job_config_path, 'w') as f:
168
+ yaml.safe_dump(job_config, f, default_flow_style=False)
169
+
170
+ run_args.extend(["--job-config", job_config_path])
171
+
172
+ try:
173
+ job_proc = subprocess.Popen(
174
+ run_args,
175
+ stdout=subprocess.DEVNULL,
176
+ stderr=subprocess.DEVNULL,
177
+ )
178
+ job_procs.add(job_proc)
179
+ log.info('Job supervisor PID: %s', job_proc.pid)
180
+ except Exception:
181
+ error_message = "Saw error while trying to spawn supervisor."
182
+ log.exception(error_message)
183
+ if 'targets' in job_config:
184
+ node_names = job_config["targets"].keys()
185
+ lock_ops.unlock_safe(
186
+ node_names,
187
+ job_config["owner"],
188
+ job_config["name"],
189
+ job_config["job_id"]
190
+ )
191
+ report.try_push_job_info(job_config, dict(
192
+ status='fail',
193
+ failure_reason=error_message))
194
+
195
+ # This try/except block is to keep the worker from dying when
196
+ # beanstalkc throws a SocketError
197
+ try:
198
+ job.delete()
199
+ except Exception:
200
+ log.exception("Saw exception while trying to delete job")
201
+
202
+ return worst_returncode
203
+
204
+
205
+ def find_dispatcher_processes() -> Dict[str, List[psutil.Process]]:
206
+ def match(proc):
207
+ try:
208
+ cmdline = proc.cmdline()
209
+ except psutil.AccessDenied:
210
+ return False
211
+ except psutil.ZombieProcess:
212
+ return False
213
+ if len(cmdline) < 3:
214
+ return False
215
+ if not cmdline[1].endswith("/teuthology-dispatcher"):
216
+ return False
217
+ if cmdline[2] == "--supervisor":
218
+ return False
219
+ if "--tube" not in cmdline:
220
+ return False
221
+ if proc.pid == os.getpid():
222
+ return False
223
+ return True
224
+
225
+ procs = {}
226
+ attrs = ["pid", "cmdline"]
227
+ for proc in psutil.process_iter(attrs=attrs):
228
+ if not match(proc):
229
+ continue
230
+ cmdline = proc.cmdline()
231
+ machine_type = cmdline[cmdline.index("--tube") + 1]
232
+ procs.setdefault(machine_type, []).append(proc)
233
+ return procs
234
+
235
+
236
+ def prep_job(job_config, log_file_path, archive_dir):
237
+ job_id = job_config['job_id']
238
+ check_job_expiration(job_config)
239
+
240
+ safe_archive = safepath.munge(job_config['name'])
241
+ job_config['worker_log'] = log_file_path
242
+ archive_path_full = os.path.join(
243
+ archive_dir, safe_archive, str(job_id))
244
+ job_config['archive_path'] = archive_path_full
245
+
246
+ # If the teuthology branch was not specified, default to main and
247
+ # store that value.
248
+ teuthology_branch = job_config.get('teuthology_branch', 'main')
249
+ job_config['teuthology_branch'] = teuthology_branch
250
+ teuthology_sha1 = job_config.get('teuthology_sha1')
251
+ if not teuthology_sha1:
252
+ repo_url = repo_utils.build_git_url('teuthology', 'ceph')
253
+ try:
254
+ teuthology_sha1 = repo_utils.ls_remote(repo_url, teuthology_branch)
255
+ except Exception as exc:
256
+ log.exception(f"Could not get teuthology sha1 for branch {teuthology_branch}")
257
+ report.try_push_job_info(
258
+ job_config,
259
+ dict(status='dead', failure_reason=str(exc))
260
+ )
261
+ raise SkipJob()
262
+ if not teuthology_sha1:
263
+ reason = "Teuthology branch {} not found; marking job as dead".format(teuthology_branch)
264
+ log.error(reason)
265
+ report.try_push_job_info(
266
+ job_config,
267
+ dict(status='dead', failure_reason=reason)
268
+ )
269
+ raise SkipJob()
270
+ if teuth_config.teuthology_path is None:
271
+ log.info('Using teuthology sha1 %s', teuthology_sha1)
272
+
273
+ try:
274
+ if teuth_config.teuthology_path is not None:
275
+ teuth_path = teuth_config.teuthology_path
276
+ else:
277
+ teuth_path = repo_utils.fetch_teuthology(branch=teuthology_branch,
278
+ commit=teuthology_sha1)
279
+ # For the teuthology tasks, we look for suite_branch, and if we
280
+ # don't get that, we look for branch, and fall back to 'main'.
281
+ # last-in-suite jobs don't have suite_branch or branch set.
282
+ ceph_branch = job_config.get('branch', 'main')
283
+ suite_branch = job_config.get('suite_branch', ceph_branch)
284
+ suite_sha1 = job_config.get('suite_sha1')
285
+ suite_repo = job_config.get('suite_repo')
286
+ if suite_repo:
287
+ teuth_config.ceph_qa_suite_git_url = suite_repo
288
+ job_config['suite_path'] = os.path.normpath(os.path.join(
289
+ repo_utils.fetch_qa_suite(suite_branch, suite_sha1),
290
+ job_config.get('suite_relpath', ''),
291
+ ))
292
+ except (BranchNotFoundError, CommitNotFoundError) as exc:
293
+ log.exception("Requested version not found; marking job as dead")
294
+ report.try_push_job_info(
295
+ job_config,
296
+ dict(status='dead', failure_reason=str(exc))
297
+ )
298
+ raise SkipJob()
299
+ except MaxWhileTries as exc:
300
+ log.exception("Failed to fetch or bootstrap; marking job as dead")
301
+ report.try_push_job_info(
302
+ job_config,
303
+ dict(status='dead', failure_reason=str(exc))
304
+ )
305
+ raise SkipJob()
306
+
307
+ teuth_bin_path = os.path.join(teuth_path, 'virtualenv', 'bin')
308
+ if not os.path.isdir(teuth_bin_path):
309
+ raise RuntimeError("teuthology branch %s at %s not bootstrapped!" %
310
+ (teuthology_branch, teuth_bin_path))
311
+ return job_config, teuth_bin_path
312
+
313
+
314
+ def check_job_expiration(job_config):
315
+ job_id = job_config['job_id']
316
+ expired = False
317
+ now = datetime.datetime.now(datetime.timezone.utc)
318
+ if expire_str := job_config.get('timestamp'):
319
+ expire = parse_timestamp(expire_str) + \
320
+ datetime.timedelta(seconds=teuth_config.max_job_age)
321
+ expired = expire < now
322
+ if not expired and (expire_str := job_config.get('expire')):
323
+ try:
324
+ expire = parse_timestamp(expire_str)
325
+ expired = expired or expire < now
326
+ except ValueError:
327
+ log.warning(f"Failed to parse job expiration: {expire_str=}")
328
+ pass
329
+ if expired:
330
+ log.info(f"Skipping job {job_id} because it is expired: {expire_str} is in the past")
331
+ report.try_push_job_info(
332
+ job_config,
333
+ # TODO: Add a 'canceled' status to paddles, and use that.
334
+ dict(status='dead'),
335
+ )
336
+ raise SkipJob()
337
+
338
+
339
+ def lock_machines(job_config):
340
+ report.try_push_job_info(job_config, dict(status='running'))
341
+ fake_ctx = supervisor.create_fake_context(job_config, block=True)
342
+ machine_type = job_config["machine_type"]
343
+ count = len(job_config['roles'])
344
+ with exporter.NodeLockingTime().time(
345
+ machine_type=machine_type,
346
+ count=count,
347
+ ):
348
+ lock_ops.block_and_lock_machines(
349
+ fake_ctx,
350
+ count,
351
+ machine_type,
352
+ tries=-1,
353
+ reimage=False,
354
+ )
355
+ job_config = fake_ctx.config
356
+ return job_config
357
+
358
+
359
+ def create_job_archive(job_name, job_archive_path, archive_dir):
360
+ log.info('Creating job\'s archive dir %s', job_archive_path)
361
+ safe_archive = safepath.munge(job_name)
362
+ run_archive = os.path.join(archive_dir, safe_archive)
363
+ if not os.path.exists(run_archive):
364
+ safepath.makedirs('/', run_archive)
365
+ safepath.makedirs('/', job_archive_path)