teuthology 1.0.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scripts/describe.py +1 -0
- scripts/dispatcher.py +62 -0
- scripts/exporter.py +18 -0
- scripts/lock.py +1 -1
- scripts/node_cleanup.py +58 -0
- scripts/openstack.py +9 -9
- scripts/results.py +12 -11
- scripts/run.py +4 -0
- scripts/schedule.py +4 -0
- scripts/suite.py +61 -16
- scripts/supervisor.py +44 -0
- scripts/update_inventory.py +10 -4
- scripts/wait.py +31 -0
- teuthology/__init__.py +24 -21
- teuthology/beanstalk.py +4 -3
- teuthology/config.py +17 -6
- teuthology/contextutil.py +18 -14
- teuthology/describe_tests.py +25 -18
- teuthology/dispatcher/__init__.py +365 -0
- teuthology/dispatcher/supervisor.py +374 -0
- teuthology/exceptions.py +54 -0
- teuthology/exporter.py +347 -0
- teuthology/kill.py +76 -75
- teuthology/lock/cli.py +16 -7
- teuthology/lock/ops.py +276 -70
- teuthology/lock/query.py +61 -44
- teuthology/ls.py +9 -18
- teuthology/misc.py +152 -137
- teuthology/nuke/__init__.py +12 -351
- teuthology/openstack/__init__.py +4 -3
- teuthology/openstack/openstack-centos-7.0-user-data.txt +1 -1
- teuthology/openstack/openstack-centos-7.1-user-data.txt +1 -1
- teuthology/openstack/openstack-centos-7.2-user-data.txt +1 -1
- teuthology/openstack/openstack-debian-8.0-user-data.txt +1 -1
- teuthology/openstack/openstack-opensuse-42.1-user-data.txt +1 -1
- teuthology/openstack/openstack-teuthology.cron +0 -1
- teuthology/orchestra/cluster.py +51 -9
- teuthology/orchestra/connection.py +23 -16
- teuthology/orchestra/console.py +111 -50
- teuthology/orchestra/daemon/cephadmunit.py +23 -5
- teuthology/orchestra/daemon/state.py +10 -3
- teuthology/orchestra/daemon/systemd.py +10 -8
- teuthology/orchestra/opsys.py +32 -11
- teuthology/orchestra/remote.py +369 -152
- teuthology/orchestra/run.py +21 -12
- teuthology/packaging.py +54 -15
- teuthology/provision/__init__.py +30 -10
- teuthology/provision/cloud/openstack.py +12 -6
- teuthology/provision/cloud/util.py +1 -2
- teuthology/provision/downburst.py +83 -29
- teuthology/provision/fog.py +68 -20
- teuthology/provision/openstack.py +5 -4
- teuthology/provision/pelagos.py +13 -5
- teuthology/repo_utils.py +91 -44
- teuthology/report.py +57 -35
- teuthology/results.py +5 -3
- teuthology/run.py +21 -15
- teuthology/run_tasks.py +114 -40
- teuthology/schedule.py +4 -3
- teuthology/scrape.py +28 -22
- teuthology/suite/__init__.py +75 -46
- teuthology/suite/build_matrix.py +34 -24
- teuthology/suite/fragment-merge.lua +105 -0
- teuthology/suite/matrix.py +31 -2
- teuthology/suite/merge.py +175 -0
- teuthology/suite/placeholder.py +8 -8
- teuthology/suite/run.py +204 -102
- teuthology/suite/util.py +67 -211
- teuthology/task/__init__.py +1 -1
- teuthology/task/ansible.py +101 -31
- teuthology/task/buildpackages.py +2 -2
- teuthology/task/ceph_ansible.py +13 -6
- teuthology/task/cephmetrics.py +2 -1
- teuthology/task/clock.py +33 -14
- teuthology/task/exec.py +18 -0
- teuthology/task/hadoop.py +2 -2
- teuthology/task/install/__init__.py +51 -22
- teuthology/task/install/bin/adjust-ulimits +16 -0
- teuthology/task/install/bin/daemon-helper +114 -0
- teuthology/task/install/bin/stdin-killer +263 -0
- teuthology/task/install/deb.py +24 -4
- teuthology/task/install/redhat.py +36 -32
- teuthology/task/install/rpm.py +41 -14
- teuthology/task/install/util.py +48 -22
- teuthology/task/internal/__init__.py +69 -11
- teuthology/task/internal/edit_sudoers.sh +10 -0
- teuthology/task/internal/lock_machines.py +3 -133
- teuthology/task/internal/redhat.py +48 -28
- teuthology/task/internal/syslog.py +31 -8
- teuthology/task/kernel.py +155 -147
- teuthology/task/lockfile.py +1 -1
- teuthology/task/mpi.py +10 -10
- teuthology/task/pcp.py +1 -1
- teuthology/task/selinux.py +17 -8
- teuthology/task/ssh_keys.py +6 -6
- teuthology/task/tests/__init__.py +137 -77
- teuthology/task/tests/test_fetch_coredumps.py +116 -0
- teuthology/task/tests/test_run.py +4 -4
- teuthology/timer.py +3 -3
- teuthology/util/loggerfile.py +19 -0
- teuthology/util/scanner.py +159 -0
- teuthology/util/sentry.py +52 -0
- teuthology/util/time.py +52 -0
- teuthology-1.2.0.data/scripts/adjust-ulimits +16 -0
- teuthology-1.2.0.data/scripts/daemon-helper +114 -0
- teuthology-1.2.0.data/scripts/stdin-killer +263 -0
- teuthology-1.2.0.dist-info/METADATA +89 -0
- teuthology-1.2.0.dist-info/RECORD +174 -0
- {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/WHEEL +1 -1
- {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/entry_points.txt +5 -2
- scripts/nuke.py +0 -45
- scripts/worker.py +0 -37
- teuthology/nuke/actions.py +0 -456
- teuthology/openstack/test/__init__.py +0 -0
- teuthology/openstack/test/openstack-integration.py +0 -286
- teuthology/openstack/test/test_config.py +0 -35
- teuthology/openstack/test/test_openstack.py +0 -1695
- teuthology/orchestra/test/__init__.py +0 -0
- teuthology/orchestra/test/integration/__init__.py +0 -0
- teuthology/orchestra/test/integration/test_integration.py +0 -94
- teuthology/orchestra/test/test_cluster.py +0 -240
- teuthology/orchestra/test/test_connection.py +0 -106
- teuthology/orchestra/test/test_console.py +0 -217
- teuthology/orchestra/test/test_opsys.py +0 -404
- teuthology/orchestra/test/test_remote.py +0 -185
- teuthology/orchestra/test/test_run.py +0 -286
- teuthology/orchestra/test/test_systemd.py +0 -54
- teuthology/orchestra/test/util.py +0 -12
- teuthology/sentry.py +0 -18
- teuthology/test/__init__.py +0 -0
- teuthology/test/fake_archive.py +0 -107
- teuthology/test/fake_fs.py +0 -92
- teuthology/test/integration/__init__.py +0 -0
- teuthology/test/integration/test_suite.py +0 -86
- teuthology/test/task/__init__.py +0 -205
- teuthology/test/task/test_ansible.py +0 -624
- teuthology/test/task/test_ceph_ansible.py +0 -176
- teuthology/test/task/test_console_log.py +0 -88
- teuthology/test/task/test_install.py +0 -337
- teuthology/test/task/test_internal.py +0 -57
- teuthology/test/task/test_kernel.py +0 -243
- teuthology/test/task/test_pcp.py +0 -379
- teuthology/test/task/test_selinux.py +0 -35
- teuthology/test/test_config.py +0 -189
- teuthology/test/test_contextutil.py +0 -68
- teuthology/test/test_describe_tests.py +0 -316
- teuthology/test/test_email_sleep_before_teardown.py +0 -81
- teuthology/test/test_exit.py +0 -97
- teuthology/test/test_get_distro.py +0 -47
- teuthology/test/test_get_distro_version.py +0 -47
- teuthology/test/test_get_multi_machine_types.py +0 -27
- teuthology/test/test_job_status.py +0 -60
- teuthology/test/test_ls.py +0 -48
- teuthology/test/test_misc.py +0 -368
- teuthology/test/test_nuke.py +0 -232
- teuthology/test/test_packaging.py +0 -763
- teuthology/test/test_parallel.py +0 -28
- teuthology/test/test_repo_utils.py +0 -204
- teuthology/test/test_report.py +0 -77
- teuthology/test/test_results.py +0 -155
- teuthology/test/test_run.py +0 -238
- teuthology/test/test_safepath.py +0 -55
- teuthology/test/test_schedule.py +0 -45
- teuthology/test/test_scrape.py +0 -167
- teuthology/test/test_timer.py +0 -80
- teuthology/test/test_vps_os_vers_parameter_checking.py +0 -84
- teuthology/test/test_worker.py +0 -303
- teuthology/worker.py +0 -339
- teuthology-1.0.0.dist-info/METADATA +0 -76
- teuthology-1.0.0.dist-info/RECORD +0 -210
- {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/LICENSE +0 -0
- {teuthology-1.0.0.dist-info → teuthology-1.2.0.dist-info}/top_level.txt +0 -0
teuthology/config.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
import os
|
2
2
|
import yaml
|
3
3
|
import logging
|
4
|
-
|
4
|
+
try:
|
5
|
+
from collections.abc import MutableMapping
|
6
|
+
except ImportError:
|
7
|
+
from collections import MutableMapping
|
5
8
|
|
6
9
|
|
7
10
|
def init_logging():
|
@@ -11,7 +14,7 @@ def init_logging():
|
|
11
14
|
log = init_logging()
|
12
15
|
|
13
16
|
|
14
|
-
class YamlConfig(
|
17
|
+
class YamlConfig(MutableMapping):
|
15
18
|
"""
|
16
19
|
A configuration object populated by parsing a yaml file, with optional
|
17
20
|
default values.
|
@@ -30,12 +33,13 @@ class YamlConfig(collections.MutableMapping):
|
|
30
33
|
self._conf = dict()
|
31
34
|
|
32
35
|
def load(self, conf=None):
|
33
|
-
if conf:
|
36
|
+
if conf is not None:
|
34
37
|
if isinstance(conf, dict):
|
35
38
|
self._conf = conf
|
36
|
-
|
39
|
+
return
|
40
|
+
elif conf:
|
37
41
|
self._conf = yaml.safe_load(conf)
|
38
|
-
|
42
|
+
return
|
39
43
|
if os.path.exists(self.yaml_path):
|
40
44
|
with open(self.yaml_path) as f:
|
41
45
|
self._conf = yaml.safe_load(f)
|
@@ -149,10 +153,12 @@ class TeuthologyConfig(YamlConfig):
|
|
149
153
|
'conserver_master': 'conserver.front.sepia.ceph.com',
|
150
154
|
'conserver_port': 3109,
|
151
155
|
'gitbuilder_host': 'gitbuilder.ceph.com',
|
152
|
-
'githelper_base_url': 'http://
|
156
|
+
'githelper_base_url': 'http://githelper.ceph.com',
|
153
157
|
'check_package_signatures': True,
|
158
|
+
'job_threshold': 500,
|
154
159
|
'lab_domain': 'front.sepia.ceph.com',
|
155
160
|
'lock_server': 'http://paddles.front.sepia.ceph.com/',
|
161
|
+
'max_job_age': 1209600, # 2 weeks
|
156
162
|
'max_job_time': 259200, # 3 days
|
157
163
|
'nsupdate_url': 'http://nsupdate.front.sepia.ceph.com/update',
|
158
164
|
'results_server': 'http://paddles.front.sepia.ceph.com/',
|
@@ -162,6 +168,8 @@ class TeuthologyConfig(YamlConfig):
|
|
162
168
|
'src_base_path': os.path.expanduser('~/src'),
|
163
169
|
'verify_host_keys': True,
|
164
170
|
'watchdog_interval': 120,
|
171
|
+
'fog_reimage_timeout': 1800,
|
172
|
+
'fog_wait_for_ssh_timeout': 600,
|
165
173
|
'kojihub_url': 'http://koji.fedoraproject.org/kojihub',
|
166
174
|
'kojiroot_url': 'http://kojipkgs.fedoraproject.org/packages',
|
167
175
|
'koji_task_url': 'https://kojipkgs.fedoraproject.org/work/',
|
@@ -185,7 +193,10 @@ class TeuthologyConfig(YamlConfig):
|
|
185
193
|
'size': 1,
|
186
194
|
},
|
187
195
|
},
|
196
|
+
'rocketchat': None,
|
188
197
|
'sleep_before_teardown': 0,
|
198
|
+
'ssh_key': None,
|
199
|
+
'active_machine_types': [],
|
189
200
|
}
|
190
201
|
|
191
202
|
def __init__(self, yaml_path=None):
|
teuthology/contextutil.py
CHANGED
@@ -2,7 +2,6 @@ import contextlib
|
|
2
2
|
import sys
|
3
3
|
import logging
|
4
4
|
import time
|
5
|
-
import itertools
|
6
5
|
|
7
6
|
from teuthology.config import config
|
8
7
|
from teuthology.exceptions import MaxWhileTries
|
@@ -58,8 +57,8 @@ def nested(*managers):
|
|
58
57
|
class safe_while(object):
|
59
58
|
"""
|
60
59
|
A context manager to remove boiler plate code that deals with `while` loops
|
61
|
-
that need a given number of tries and some seconds to sleep
|
62
|
-
one of those tries.
|
60
|
+
that need a given number of tries or total timeout and some seconds to sleep
|
61
|
+
between each one of those tries.
|
63
62
|
|
64
63
|
The most simple example possible will try 10 times sleeping for 6 seconds:
|
65
64
|
|
@@ -82,6 +81,8 @@ class safe_while(object):
|
|
82
81
|
:param increment: The amount to add to the sleep value on each try.
|
83
82
|
Default 0.
|
84
83
|
:param tries: The amount of tries before giving up. Default 10.
|
84
|
+
:param timeout: Total seconds to try for, overrides the tries parameter
|
85
|
+
if specified. Default 0.
|
85
86
|
:param action: The name of the action being attempted. Default none.
|
86
87
|
:param _raise: Whether to raise an exception (or log a warning).
|
87
88
|
Default True.
|
@@ -89,28 +90,24 @@ class safe_while(object):
|
|
89
90
|
Default time.sleep
|
90
91
|
"""
|
91
92
|
|
92
|
-
def __init__(self, sleep=6, increment=0, tries=10, action=None,
|
93
|
+
def __init__(self, sleep=6, increment=0, tries=10, timeout=0, action=None,
|
93
94
|
_raise=True, _sleeper=None):
|
94
95
|
self.sleep = sleep
|
95
96
|
self.increment = increment
|
96
97
|
self.tries = tries
|
98
|
+
self.timeout = timeout
|
97
99
|
self.counter = 0
|
98
100
|
self.sleep_current = sleep
|
99
101
|
self.action = action
|
100
102
|
self._raise = _raise
|
101
103
|
self.sleeper = _sleeper or time.sleep
|
104
|
+
self.total_seconds = sleep
|
102
105
|
|
103
106
|
def _make_error_msg(self):
|
104
107
|
"""
|
105
108
|
Sum the total number of seconds we waited while providing the number
|
106
109
|
of tries we attempted
|
107
110
|
"""
|
108
|
-
total_seconds_waiting = sum(
|
109
|
-
itertools.islice(
|
110
|
-
itertools.count(self.sleep, self.increment),
|
111
|
-
self.tries
|
112
|
-
)
|
113
|
-
)
|
114
111
|
msg = 'reached maximum tries ({tries})' + \
|
115
112
|
' after waiting for {total} seconds'
|
116
113
|
if self.action:
|
@@ -118,8 +115,8 @@ class safe_while(object):
|
|
118
115
|
|
119
116
|
msg = msg.format(
|
120
117
|
action=self.action,
|
121
|
-
tries=self.
|
122
|
-
total=
|
118
|
+
tries=self.counter - 1,
|
119
|
+
total=self.total_seconds,
|
123
120
|
)
|
124
121
|
return msg
|
125
122
|
|
@@ -127,15 +124,22 @@ class safe_while(object):
|
|
127
124
|
self.counter += 1
|
128
125
|
if self.counter == 1:
|
129
126
|
return True
|
130
|
-
|
127
|
+
def must_stop():
|
128
|
+
return self.tries > 0 and self.counter > self.tries
|
129
|
+
if ((self.timeout > 0 and
|
130
|
+
self.total_seconds >= self.timeout) or
|
131
|
+
(self.timeout == 0 and must_stop())):
|
131
132
|
error_msg = self._make_error_msg()
|
132
133
|
if self._raise:
|
133
134
|
raise MaxWhileTries(error_msg)
|
134
135
|
else:
|
135
136
|
log.warning(error_msg)
|
136
137
|
return False
|
137
|
-
self.sleeper(self.sleep_current)
|
138
138
|
self.sleep_current += self.increment
|
139
|
+
if self.timeout > 0:
|
140
|
+
self.sleep_current = min(self.timeout - self.total_seconds, self.sleep_current)
|
141
|
+
self.total_seconds += self.sleep_current
|
142
|
+
self.sleeper(self.sleep_current)
|
139
143
|
return True
|
140
144
|
|
141
145
|
def __enter__(self):
|
teuthology/describe_tests.py
CHANGED
@@ -13,7 +13,7 @@ from distutils.util import strtobool
|
|
13
13
|
from teuthology.exceptions import ParseError
|
14
14
|
from teuthology.suite.build_matrix import \
|
15
15
|
build_matrix, generate_combinations, _get_matrix
|
16
|
-
from teuthology.suite import util
|
16
|
+
from teuthology.suite import util, merge
|
17
17
|
|
18
18
|
def main(args):
|
19
19
|
try:
|
@@ -57,6 +57,7 @@ def describe_tests(args):
|
|
57
57
|
limit=conf['limit'],
|
58
58
|
seed=conf['seed'],
|
59
59
|
subset=conf['subset'],
|
60
|
+
no_nested_subset=conf['no_nested_subset'],
|
60
61
|
fields=conf['fields'],
|
61
62
|
filter_in=conf['filter_in'],
|
62
63
|
filter_out=conf['filter_out'],
|
@@ -69,6 +70,7 @@ def describe_tests(args):
|
|
69
70
|
limit=conf['limit'],
|
70
71
|
seed=conf['seed'],
|
71
72
|
subset=conf['subset'],
|
73
|
+
no_nested_subset=conf['no_nested_subset'],
|
72
74
|
show_desc=conf['print_description'],
|
73
75
|
show_frag=conf['print_fragments'],
|
74
76
|
filter_in=conf['filter_in'],
|
@@ -109,6 +111,7 @@ def output_results(headers, rows, output_format, hrule):
|
|
109
111
|
def output_summary(path, limit=0,
|
110
112
|
seed=None,
|
111
113
|
subset=None,
|
114
|
+
no_nested_subset=None,
|
112
115
|
show_desc=True,
|
113
116
|
show_frag=False,
|
114
117
|
show_matrix=False,
|
@@ -124,17 +127,19 @@ def output_summary(path, limit=0,
|
|
124
127
|
"""
|
125
128
|
|
126
129
|
random.seed(seed)
|
127
|
-
mat, first, matlimit = _get_matrix(path, subset)
|
130
|
+
mat, first, matlimit = _get_matrix(path, subset=subset, no_nested_subset=no_nested_subset)
|
128
131
|
configs = generate_combinations(path, mat, first, matlimit)
|
129
132
|
count = 0
|
133
|
+
total = len(configs)
|
130
134
|
suite = os.path.basename(path)
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
135
|
+
configs = merge.config_merge(configs,
|
136
|
+
suite_name=suite,
|
137
|
+
filter_in=filter_in,
|
138
|
+
filter_out=filter_out,
|
139
|
+
filter_all=filter_all,
|
140
|
+
filter_fragments=filter_fragments,
|
141
|
+
seed=seed)
|
142
|
+
for c in configs:
|
138
143
|
if limit and count >= limit:
|
139
144
|
break
|
140
145
|
count += 1
|
@@ -145,12 +150,13 @@ def output_summary(path, limit=0,
|
|
145
150
|
print(" {}".format(util.strip_fragment_path(path)))
|
146
151
|
if show_matrix:
|
147
152
|
print(mat.tostr(1))
|
148
|
-
print("# {}/{} {}".format(count,
|
153
|
+
print("# {}/{} {}".format(count, total, path))
|
149
154
|
|
150
155
|
def get_combinations(suite_dir,
|
151
156
|
limit=0,
|
152
157
|
seed=None,
|
153
158
|
subset=None,
|
159
|
+
no_nested_subset=False,
|
154
160
|
fields=[],
|
155
161
|
filter_in=None,
|
156
162
|
filter_out=None,
|
@@ -166,7 +172,7 @@ def get_combinations(suite_dir,
|
|
166
172
|
of strings.
|
167
173
|
"""
|
168
174
|
suite = os.path.basename(suite_dir)
|
169
|
-
configs = build_matrix(suite_dir, subset, seed)
|
175
|
+
configs = build_matrix(suite_dir, subset=subset, no_nested_subset=no_nested_subset, seed=seed)
|
170
176
|
|
171
177
|
num_listed = 0
|
172
178
|
rows = []
|
@@ -175,13 +181,14 @@ def get_combinations(suite_dir,
|
|
175
181
|
dirs = {}
|
176
182
|
max_dir_depth = 0
|
177
183
|
|
178
|
-
configs =
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
184
|
+
configs = merge.config_merge(configs,
|
185
|
+
suite_name=suite,
|
186
|
+
filter_in=filter_in,
|
187
|
+
filter_out=filter_out,
|
188
|
+
filter_all=filter_all,
|
189
|
+
filter_fragments=filter_fragments,
|
190
|
+
seed=seed)
|
191
|
+
for _, fragment_paths, __ in configs:
|
185
192
|
if limit > 0 and num_listed >= limit:
|
186
193
|
break
|
187
194
|
|
@@ -0,0 +1,365 @@
|
|
1
|
+
import datetime
|
2
|
+
import logging
|
3
|
+
import os
|
4
|
+
import psutil
|
5
|
+
import subprocess
|
6
|
+
import sys
|
7
|
+
import yaml
|
8
|
+
|
9
|
+
from typing import Dict, List
|
10
|
+
|
11
|
+
from teuthology import (
|
12
|
+
# non-modules
|
13
|
+
setup_log_file,
|
14
|
+
install_except_hook,
|
15
|
+
# modules
|
16
|
+
beanstalk,
|
17
|
+
exporter,
|
18
|
+
report,
|
19
|
+
repo_utils,
|
20
|
+
)
|
21
|
+
from teuthology.config import config as teuth_config
|
22
|
+
from teuthology.dispatcher import supervisor
|
23
|
+
from teuthology.exceptions import BranchNotFoundError, CommitNotFoundError, SkipJob, MaxWhileTries
|
24
|
+
from teuthology.lock import ops as lock_ops
|
25
|
+
from teuthology.util.time import parse_timestamp
|
26
|
+
from teuthology import safepath
|
27
|
+
|
28
|
+
log = logging.getLogger(__name__)
|
29
|
+
start_time = datetime.datetime.now(datetime.timezone.utc)
|
30
|
+
restart_file_path = '/tmp/teuthology-restart-dispatcher'
|
31
|
+
stop_file_path = '/tmp/teuthology-stop-dispatcher'
|
32
|
+
|
33
|
+
|
34
|
+
def sentinel(path):
|
35
|
+
if not os.path.exists(path):
|
36
|
+
return False
|
37
|
+
file_mtime = datetime.datetime.fromtimestamp(
|
38
|
+
os.path.getmtime(path),
|
39
|
+
datetime.timezone.utc,
|
40
|
+
)
|
41
|
+
return file_mtime > start_time
|
42
|
+
|
43
|
+
|
44
|
+
def restart(log=log):
|
45
|
+
log.info('Restarting...')
|
46
|
+
args = sys.argv[:]
|
47
|
+
args.insert(0, sys.executable)
|
48
|
+
os.execv(sys.executable, args)
|
49
|
+
|
50
|
+
|
51
|
+
def stop():
|
52
|
+
log.info('Stopping...')
|
53
|
+
sys.exit(0)
|
54
|
+
|
55
|
+
|
56
|
+
def load_config(archive_dir=None):
|
57
|
+
teuth_config.load()
|
58
|
+
if archive_dir is not None:
|
59
|
+
if not os.path.isdir(archive_dir):
|
60
|
+
sys.exit("{prog}: archive directory must exist: {path}".format(
|
61
|
+
prog=os.path.basename(sys.argv[0]),
|
62
|
+
path=archive_dir,
|
63
|
+
))
|
64
|
+
else:
|
65
|
+
teuth_config.archive_base = archive_dir
|
66
|
+
|
67
|
+
|
68
|
+
def main(args):
|
69
|
+
archive_dir = args.archive_dir or teuth_config.archive_base
|
70
|
+
|
71
|
+
# Refuse to start more than one dispatcher per machine type
|
72
|
+
procs = find_dispatcher_processes().get(args.tube)
|
73
|
+
if procs:
|
74
|
+
raise RuntimeError(
|
75
|
+
"There is already a teuthology-dispatcher process running:"
|
76
|
+
f" {procs}"
|
77
|
+
)
|
78
|
+
|
79
|
+
# setup logging for disoatcher in {log_dir}
|
80
|
+
loglevel = logging.INFO
|
81
|
+
if args.verbose:
|
82
|
+
loglevel = logging.DEBUG
|
83
|
+
logging.getLogger().setLevel(loglevel)
|
84
|
+
log.setLevel(loglevel)
|
85
|
+
log_file_path = os.path.join(args.log_dir, f"dispatcher.{args.tube}.{os.getpid()}")
|
86
|
+
setup_log_file(log_file_path)
|
87
|
+
install_except_hook()
|
88
|
+
|
89
|
+
load_config(archive_dir=archive_dir)
|
90
|
+
|
91
|
+
connection = beanstalk.connect()
|
92
|
+
beanstalk.watch_tube(connection, args.tube)
|
93
|
+
result_proc = None
|
94
|
+
|
95
|
+
if teuth_config.teuthology_path is None:
|
96
|
+
repo_utils.fetch_teuthology('main')
|
97
|
+
repo_utils.fetch_qa_suite('main')
|
98
|
+
|
99
|
+
keep_running = True
|
100
|
+
job_procs = set()
|
101
|
+
worst_returncode = 0
|
102
|
+
while keep_running:
|
103
|
+
# Check to see if we have a teuthology-results process hanging around
|
104
|
+
# and if so, read its return code so that it can exit.
|
105
|
+
if result_proc is not None and result_proc.poll() is not None:
|
106
|
+
log.debug("teuthology-results exited with code: %s",
|
107
|
+
result_proc.returncode)
|
108
|
+
result_proc = None
|
109
|
+
|
110
|
+
if sentinel(restart_file_path):
|
111
|
+
restart()
|
112
|
+
elif sentinel(stop_file_path):
|
113
|
+
stop()
|
114
|
+
|
115
|
+
load_config()
|
116
|
+
for proc in list(job_procs):
|
117
|
+
rc = proc.poll()
|
118
|
+
if rc is not None:
|
119
|
+
worst_returncode = max([worst_returncode, rc])
|
120
|
+
job_procs.remove(proc)
|
121
|
+
job = connection.reserve(timeout=60)
|
122
|
+
if job is None:
|
123
|
+
if args.exit_on_empty_queue and not job_procs:
|
124
|
+
log.info("Queue is empty and no supervisor processes running; exiting!")
|
125
|
+
break
|
126
|
+
continue
|
127
|
+
|
128
|
+
# bury the job so it won't be re-run if it fails
|
129
|
+
job.bury()
|
130
|
+
job_id = job.jid
|
131
|
+
log.info('Reserved job %d', job_id)
|
132
|
+
log.info('Config is: %s', job.body)
|
133
|
+
job_config = yaml.safe_load(job.body)
|
134
|
+
job_config['job_id'] = str(job_id)
|
135
|
+
|
136
|
+
if job_config.get('stop_worker'):
|
137
|
+
keep_running = False
|
138
|
+
|
139
|
+
try:
|
140
|
+
job_config, teuth_bin_path = prep_job(
|
141
|
+
job_config,
|
142
|
+
log_file_path,
|
143
|
+
archive_dir,
|
144
|
+
)
|
145
|
+
except SkipJob:
|
146
|
+
continue
|
147
|
+
|
148
|
+
# lock machines but do not reimage them
|
149
|
+
if 'roles' in job_config:
|
150
|
+
job_config = lock_machines(job_config)
|
151
|
+
|
152
|
+
run_args = [
|
153
|
+
os.path.join(teuth_bin_path, 'teuthology-supervisor'),
|
154
|
+
'-v',
|
155
|
+
'--bin-path', teuth_bin_path,
|
156
|
+
'--archive-dir', archive_dir,
|
157
|
+
]
|
158
|
+
|
159
|
+
# Create run archive directory if not already created and
|
160
|
+
# job's archive directory
|
161
|
+
create_job_archive(job_config['name'],
|
162
|
+
job_config['archive_path'],
|
163
|
+
archive_dir)
|
164
|
+
job_config_path = os.path.join(job_config['archive_path'], 'orig.config.yaml')
|
165
|
+
|
166
|
+
# Write initial job config in job archive dir
|
167
|
+
with open(job_config_path, 'w') as f:
|
168
|
+
yaml.safe_dump(job_config, f, default_flow_style=False)
|
169
|
+
|
170
|
+
run_args.extend(["--job-config", job_config_path])
|
171
|
+
|
172
|
+
try:
|
173
|
+
job_proc = subprocess.Popen(
|
174
|
+
run_args,
|
175
|
+
stdout=subprocess.DEVNULL,
|
176
|
+
stderr=subprocess.DEVNULL,
|
177
|
+
)
|
178
|
+
job_procs.add(job_proc)
|
179
|
+
log.info('Job supervisor PID: %s', job_proc.pid)
|
180
|
+
except Exception:
|
181
|
+
error_message = "Saw error while trying to spawn supervisor."
|
182
|
+
log.exception(error_message)
|
183
|
+
if 'targets' in job_config:
|
184
|
+
node_names = job_config["targets"].keys()
|
185
|
+
lock_ops.unlock_safe(
|
186
|
+
node_names,
|
187
|
+
job_config["owner"],
|
188
|
+
job_config["name"],
|
189
|
+
job_config["job_id"]
|
190
|
+
)
|
191
|
+
report.try_push_job_info(job_config, dict(
|
192
|
+
status='fail',
|
193
|
+
failure_reason=error_message))
|
194
|
+
|
195
|
+
# This try/except block is to keep the worker from dying when
|
196
|
+
# beanstalkc throws a SocketError
|
197
|
+
try:
|
198
|
+
job.delete()
|
199
|
+
except Exception:
|
200
|
+
log.exception("Saw exception while trying to delete job")
|
201
|
+
|
202
|
+
return worst_returncode
|
203
|
+
|
204
|
+
|
205
|
+
def find_dispatcher_processes() -> Dict[str, List[psutil.Process]]:
|
206
|
+
def match(proc):
|
207
|
+
try:
|
208
|
+
cmdline = proc.cmdline()
|
209
|
+
except psutil.AccessDenied:
|
210
|
+
return False
|
211
|
+
except psutil.ZombieProcess:
|
212
|
+
return False
|
213
|
+
if len(cmdline) < 3:
|
214
|
+
return False
|
215
|
+
if not cmdline[1].endswith("/teuthology-dispatcher"):
|
216
|
+
return False
|
217
|
+
if cmdline[2] == "--supervisor":
|
218
|
+
return False
|
219
|
+
if "--tube" not in cmdline:
|
220
|
+
return False
|
221
|
+
if proc.pid == os.getpid():
|
222
|
+
return False
|
223
|
+
return True
|
224
|
+
|
225
|
+
procs = {}
|
226
|
+
attrs = ["pid", "cmdline"]
|
227
|
+
for proc in psutil.process_iter(attrs=attrs):
|
228
|
+
if not match(proc):
|
229
|
+
continue
|
230
|
+
cmdline = proc.cmdline()
|
231
|
+
machine_type = cmdline[cmdline.index("--tube") + 1]
|
232
|
+
procs.setdefault(machine_type, []).append(proc)
|
233
|
+
return procs
|
234
|
+
|
235
|
+
|
236
|
+
def prep_job(job_config, log_file_path, archive_dir):
|
237
|
+
job_id = job_config['job_id']
|
238
|
+
check_job_expiration(job_config)
|
239
|
+
|
240
|
+
safe_archive = safepath.munge(job_config['name'])
|
241
|
+
job_config['worker_log'] = log_file_path
|
242
|
+
archive_path_full = os.path.join(
|
243
|
+
archive_dir, safe_archive, str(job_id))
|
244
|
+
job_config['archive_path'] = archive_path_full
|
245
|
+
|
246
|
+
# If the teuthology branch was not specified, default to main and
|
247
|
+
# store that value.
|
248
|
+
teuthology_branch = job_config.get('teuthology_branch', 'main')
|
249
|
+
job_config['teuthology_branch'] = teuthology_branch
|
250
|
+
teuthology_sha1 = job_config.get('teuthology_sha1')
|
251
|
+
if not teuthology_sha1:
|
252
|
+
repo_url = repo_utils.build_git_url('teuthology', 'ceph')
|
253
|
+
try:
|
254
|
+
teuthology_sha1 = repo_utils.ls_remote(repo_url, teuthology_branch)
|
255
|
+
except Exception as exc:
|
256
|
+
log.exception(f"Could not get teuthology sha1 for branch {teuthology_branch}")
|
257
|
+
report.try_push_job_info(
|
258
|
+
job_config,
|
259
|
+
dict(status='dead', failure_reason=str(exc))
|
260
|
+
)
|
261
|
+
raise SkipJob()
|
262
|
+
if not teuthology_sha1:
|
263
|
+
reason = "Teuthology branch {} not found; marking job as dead".format(teuthology_branch)
|
264
|
+
log.error(reason)
|
265
|
+
report.try_push_job_info(
|
266
|
+
job_config,
|
267
|
+
dict(status='dead', failure_reason=reason)
|
268
|
+
)
|
269
|
+
raise SkipJob()
|
270
|
+
if teuth_config.teuthology_path is None:
|
271
|
+
log.info('Using teuthology sha1 %s', teuthology_sha1)
|
272
|
+
|
273
|
+
try:
|
274
|
+
if teuth_config.teuthology_path is not None:
|
275
|
+
teuth_path = teuth_config.teuthology_path
|
276
|
+
else:
|
277
|
+
teuth_path = repo_utils.fetch_teuthology(branch=teuthology_branch,
|
278
|
+
commit=teuthology_sha1)
|
279
|
+
# For the teuthology tasks, we look for suite_branch, and if we
|
280
|
+
# don't get that, we look for branch, and fall back to 'main'.
|
281
|
+
# last-in-suite jobs don't have suite_branch or branch set.
|
282
|
+
ceph_branch = job_config.get('branch', 'main')
|
283
|
+
suite_branch = job_config.get('suite_branch', ceph_branch)
|
284
|
+
suite_sha1 = job_config.get('suite_sha1')
|
285
|
+
suite_repo = job_config.get('suite_repo')
|
286
|
+
if suite_repo:
|
287
|
+
teuth_config.ceph_qa_suite_git_url = suite_repo
|
288
|
+
job_config['suite_path'] = os.path.normpath(os.path.join(
|
289
|
+
repo_utils.fetch_qa_suite(suite_branch, suite_sha1),
|
290
|
+
job_config.get('suite_relpath', ''),
|
291
|
+
))
|
292
|
+
except (BranchNotFoundError, CommitNotFoundError) as exc:
|
293
|
+
log.exception("Requested version not found; marking job as dead")
|
294
|
+
report.try_push_job_info(
|
295
|
+
job_config,
|
296
|
+
dict(status='dead', failure_reason=str(exc))
|
297
|
+
)
|
298
|
+
raise SkipJob()
|
299
|
+
except MaxWhileTries as exc:
|
300
|
+
log.exception("Failed to fetch or bootstrap; marking job as dead")
|
301
|
+
report.try_push_job_info(
|
302
|
+
job_config,
|
303
|
+
dict(status='dead', failure_reason=str(exc))
|
304
|
+
)
|
305
|
+
raise SkipJob()
|
306
|
+
|
307
|
+
teuth_bin_path = os.path.join(teuth_path, 'virtualenv', 'bin')
|
308
|
+
if not os.path.isdir(teuth_bin_path):
|
309
|
+
raise RuntimeError("teuthology branch %s at %s not bootstrapped!" %
|
310
|
+
(teuthology_branch, teuth_bin_path))
|
311
|
+
return job_config, teuth_bin_path
|
312
|
+
|
313
|
+
|
314
|
+
def check_job_expiration(job_config):
|
315
|
+
job_id = job_config['job_id']
|
316
|
+
expired = False
|
317
|
+
now = datetime.datetime.now(datetime.timezone.utc)
|
318
|
+
if expire_str := job_config.get('timestamp'):
|
319
|
+
expire = parse_timestamp(expire_str) + \
|
320
|
+
datetime.timedelta(seconds=teuth_config.max_job_age)
|
321
|
+
expired = expire < now
|
322
|
+
if not expired and (expire_str := job_config.get('expire')):
|
323
|
+
try:
|
324
|
+
expire = parse_timestamp(expire_str)
|
325
|
+
expired = expired or expire < now
|
326
|
+
except ValueError:
|
327
|
+
log.warning(f"Failed to parse job expiration: {expire_str=}")
|
328
|
+
pass
|
329
|
+
if expired:
|
330
|
+
log.info(f"Skipping job {job_id} because it is expired: {expire_str} is in the past")
|
331
|
+
report.try_push_job_info(
|
332
|
+
job_config,
|
333
|
+
# TODO: Add a 'canceled' status to paddles, and use that.
|
334
|
+
dict(status='dead'),
|
335
|
+
)
|
336
|
+
raise SkipJob()
|
337
|
+
|
338
|
+
|
339
|
+
def lock_machines(job_config):
|
340
|
+
report.try_push_job_info(job_config, dict(status='running'))
|
341
|
+
fake_ctx = supervisor.create_fake_context(job_config, block=True)
|
342
|
+
machine_type = job_config["machine_type"]
|
343
|
+
count = len(job_config['roles'])
|
344
|
+
with exporter.NodeLockingTime().time(
|
345
|
+
machine_type=machine_type,
|
346
|
+
count=count,
|
347
|
+
):
|
348
|
+
lock_ops.block_and_lock_machines(
|
349
|
+
fake_ctx,
|
350
|
+
count,
|
351
|
+
machine_type,
|
352
|
+
tries=-1,
|
353
|
+
reimage=False,
|
354
|
+
)
|
355
|
+
job_config = fake_ctx.config
|
356
|
+
return job_config
|
357
|
+
|
358
|
+
|
359
|
+
def create_job_archive(job_name, job_archive_path, archive_dir):
|
360
|
+
log.info('Creating job\'s archive dir %s', job_archive_path)
|
361
|
+
safe_archive = safepath.munge(job_name)
|
362
|
+
run_archive = os.path.join(archive_dir, safe_archive)
|
363
|
+
if not os.path.exists(run_archive):
|
364
|
+
safepath.makedirs('/', run_archive)
|
365
|
+
safepath.makedirs('/', job_archive_path)
|