teuthology 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scripts/describe.py +1 -0
- scripts/dispatcher.py +55 -26
- scripts/exporter.py +18 -0
- scripts/lock.py +1 -1
- scripts/node_cleanup.py +58 -0
- scripts/openstack.py +9 -9
- scripts/results.py +12 -11
- scripts/schedule.py +4 -0
- scripts/suite.py +57 -16
- scripts/supervisor.py +44 -0
- scripts/update_inventory.py +10 -4
- teuthology/__init__.py +24 -26
- teuthology/beanstalk.py +4 -3
- teuthology/config.py +16 -6
- teuthology/contextutil.py +18 -14
- teuthology/describe_tests.py +25 -18
- teuthology/dispatcher/__init__.py +210 -35
- teuthology/dispatcher/supervisor.py +140 -58
- teuthology/exceptions.py +43 -0
- teuthology/exporter.py +347 -0
- teuthology/kill.py +76 -81
- teuthology/lock/cli.py +3 -3
- teuthology/lock/ops.py +135 -61
- teuthology/lock/query.py +61 -44
- teuthology/ls.py +1 -1
- teuthology/misc.py +61 -75
- teuthology/nuke/__init__.py +12 -353
- teuthology/openstack/__init__.py +4 -3
- teuthology/openstack/openstack-centos-7.0-user-data.txt +1 -1
- teuthology/openstack/openstack-centos-7.1-user-data.txt +1 -1
- teuthology/openstack/openstack-centos-7.2-user-data.txt +1 -1
- teuthology/openstack/openstack-debian-8.0-user-data.txt +1 -1
- teuthology/openstack/openstack-opensuse-42.1-user-data.txt +1 -1
- teuthology/openstack/openstack-teuthology.cron +0 -1
- teuthology/orchestra/cluster.py +49 -7
- teuthology/orchestra/connection.py +16 -5
- teuthology/orchestra/console.py +111 -50
- teuthology/orchestra/daemon/cephadmunit.py +17 -4
- teuthology/orchestra/daemon/state.py +8 -1
- teuthology/orchestra/daemon/systemd.py +4 -4
- teuthology/orchestra/opsys.py +30 -11
- teuthology/orchestra/remote.py +405 -338
- teuthology/orchestra/run.py +3 -3
- teuthology/packaging.py +19 -16
- teuthology/provision/__init__.py +30 -10
- teuthology/provision/cloud/openstack.py +12 -6
- teuthology/provision/cloud/util.py +1 -2
- teuthology/provision/downburst.py +4 -3
- teuthology/provision/fog.py +68 -20
- teuthology/provision/openstack.py +5 -4
- teuthology/provision/pelagos.py +1 -1
- teuthology/repo_utils.py +43 -13
- teuthology/report.py +57 -35
- teuthology/results.py +5 -3
- teuthology/run.py +13 -14
- teuthology/run_tasks.py +27 -43
- teuthology/schedule.py +4 -3
- teuthology/scrape.py +28 -22
- teuthology/suite/__init__.py +74 -45
- teuthology/suite/build_matrix.py +34 -24
- teuthology/suite/fragment-merge.lua +105 -0
- teuthology/suite/matrix.py +31 -2
- teuthology/suite/merge.py +175 -0
- teuthology/suite/placeholder.py +6 -9
- teuthology/suite/run.py +175 -100
- teuthology/suite/util.py +64 -218
- teuthology/task/__init__.py +1 -1
- teuthology/task/ansible.py +101 -32
- teuthology/task/buildpackages.py +2 -2
- teuthology/task/ceph_ansible.py +13 -6
- teuthology/task/cephmetrics.py +2 -1
- teuthology/task/clock.py +33 -14
- teuthology/task/exec.py +18 -0
- teuthology/task/hadoop.py +2 -2
- teuthology/task/install/__init__.py +29 -7
- teuthology/task/install/bin/adjust-ulimits +16 -0
- teuthology/task/install/bin/daemon-helper +114 -0
- teuthology/task/install/bin/stdin-killer +263 -0
- teuthology/task/install/deb.py +1 -1
- teuthology/task/install/rpm.py +17 -5
- teuthology/task/install/util.py +3 -3
- teuthology/task/internal/__init__.py +41 -10
- teuthology/task/internal/edit_sudoers.sh +10 -0
- teuthology/task/internal/lock_machines.py +2 -9
- teuthology/task/internal/redhat.py +31 -1
- teuthology/task/internal/syslog.py +31 -8
- teuthology/task/kernel.py +152 -145
- teuthology/task/lockfile.py +1 -1
- teuthology/task/mpi.py +10 -10
- teuthology/task/pcp.py +1 -1
- teuthology/task/selinux.py +16 -8
- teuthology/task/ssh_keys.py +4 -4
- teuthology/task/tests/__init__.py +137 -77
- teuthology/task/tests/test_fetch_coredumps.py +116 -0
- teuthology/task/tests/test_run.py +4 -4
- teuthology/timer.py +3 -3
- teuthology/util/loggerfile.py +19 -0
- teuthology/util/scanner.py +159 -0
- teuthology/util/sentry.py +52 -0
- teuthology/util/time.py +52 -0
- teuthology-1.2.0.data/scripts/adjust-ulimits +16 -0
- teuthology-1.2.0.data/scripts/daemon-helper +114 -0
- teuthology-1.2.0.data/scripts/stdin-killer +263 -0
- teuthology-1.2.0.dist-info/METADATA +89 -0
- teuthology-1.2.0.dist-info/RECORD +174 -0
- {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/WHEEL +1 -1
- {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/entry_points.txt +3 -2
- scripts/nuke.py +0 -47
- scripts/worker.py +0 -37
- teuthology/nuke/actions.py +0 -456
- teuthology/openstack/test/__init__.py +0 -0
- teuthology/openstack/test/openstack-integration.py +0 -286
- teuthology/openstack/test/test_config.py +0 -35
- teuthology/openstack/test/test_openstack.py +0 -1695
- teuthology/orchestra/test/__init__.py +0 -0
- teuthology/orchestra/test/integration/__init__.py +0 -0
- teuthology/orchestra/test/integration/test_integration.py +0 -94
- teuthology/orchestra/test/test_cluster.py +0 -240
- teuthology/orchestra/test/test_connection.py +0 -106
- teuthology/orchestra/test/test_console.py +0 -217
- teuthology/orchestra/test/test_opsys.py +0 -404
- teuthology/orchestra/test/test_remote.py +0 -185
- teuthology/orchestra/test/test_run.py +0 -286
- teuthology/orchestra/test/test_systemd.py +0 -54
- teuthology/orchestra/test/util.py +0 -12
- teuthology/test/__init__.py +0 -0
- teuthology/test/fake_archive.py +0 -107
- teuthology/test/fake_fs.py +0 -92
- teuthology/test/integration/__init__.py +0 -0
- teuthology/test/integration/test_suite.py +0 -86
- teuthology/test/task/__init__.py +0 -205
- teuthology/test/task/test_ansible.py +0 -624
- teuthology/test/task/test_ceph_ansible.py +0 -176
- teuthology/test/task/test_console_log.py +0 -88
- teuthology/test/task/test_install.py +0 -337
- teuthology/test/task/test_internal.py +0 -57
- teuthology/test/task/test_kernel.py +0 -243
- teuthology/test/task/test_pcp.py +0 -379
- teuthology/test/task/test_selinux.py +0 -35
- teuthology/test/test_config.py +0 -189
- teuthology/test/test_contextutil.py +0 -68
- teuthology/test/test_describe_tests.py +0 -316
- teuthology/test/test_email_sleep_before_teardown.py +0 -81
- teuthology/test/test_exit.py +0 -97
- teuthology/test/test_get_distro.py +0 -47
- teuthology/test/test_get_distro_version.py +0 -47
- teuthology/test/test_get_multi_machine_types.py +0 -27
- teuthology/test/test_job_status.py +0 -60
- teuthology/test/test_ls.py +0 -48
- teuthology/test/test_misc.py +0 -391
- teuthology/test/test_nuke.py +0 -290
- teuthology/test/test_packaging.py +0 -763
- teuthology/test/test_parallel.py +0 -28
- teuthology/test/test_repo_utils.py +0 -225
- teuthology/test/test_report.py +0 -77
- teuthology/test/test_results.py +0 -155
- teuthology/test/test_run.py +0 -239
- teuthology/test/test_safepath.py +0 -55
- teuthology/test/test_schedule.py +0 -45
- teuthology/test/test_scrape.py +0 -167
- teuthology/test/test_timer.py +0 -80
- teuthology/test/test_vps_os_vers_parameter_checking.py +0 -84
- teuthology/test/test_worker.py +0 -303
- teuthology/worker.py +0 -354
- teuthology-1.1.0.dist-info/METADATA +0 -76
- teuthology-1.1.0.dist-info/RECORD +0 -213
- {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/LICENSE +0 -0
- {teuthology-1.1.0.dist-info → teuthology-1.2.0.dist-info}/top_level.txt +0 -0
teuthology/config.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
import os
|
2
2
|
import yaml
|
3
3
|
import logging
|
4
|
-
|
4
|
+
try:
|
5
|
+
from collections.abc import MutableMapping
|
6
|
+
except ImportError:
|
7
|
+
from collections import MutableMapping
|
5
8
|
|
6
9
|
|
7
10
|
def init_logging():
|
@@ -11,7 +14,7 @@ def init_logging():
|
|
11
14
|
log = init_logging()
|
12
15
|
|
13
16
|
|
14
|
-
class YamlConfig(
|
17
|
+
class YamlConfig(MutableMapping):
|
15
18
|
"""
|
16
19
|
A configuration object populated by parsing a yaml file, with optional
|
17
20
|
default values.
|
@@ -30,12 +33,13 @@ class YamlConfig(collections.MutableMapping):
|
|
30
33
|
self._conf = dict()
|
31
34
|
|
32
35
|
def load(self, conf=None):
|
33
|
-
if conf:
|
36
|
+
if conf is not None:
|
34
37
|
if isinstance(conf, dict):
|
35
38
|
self._conf = conf
|
36
|
-
|
39
|
+
return
|
40
|
+
elif conf:
|
37
41
|
self._conf = yaml.safe_load(conf)
|
38
|
-
|
42
|
+
return
|
39
43
|
if os.path.exists(self.yaml_path):
|
40
44
|
with open(self.yaml_path) as f:
|
41
45
|
self._conf = yaml.safe_load(f)
|
@@ -149,10 +153,12 @@ class TeuthologyConfig(YamlConfig):
|
|
149
153
|
'conserver_master': 'conserver.front.sepia.ceph.com',
|
150
154
|
'conserver_port': 3109,
|
151
155
|
'gitbuilder_host': 'gitbuilder.ceph.com',
|
152
|
-
'githelper_base_url': 'http://
|
156
|
+
'githelper_base_url': 'http://githelper.ceph.com',
|
153
157
|
'check_package_signatures': True,
|
158
|
+
'job_threshold': 500,
|
154
159
|
'lab_domain': 'front.sepia.ceph.com',
|
155
160
|
'lock_server': 'http://paddles.front.sepia.ceph.com/',
|
161
|
+
'max_job_age': 1209600, # 2 weeks
|
156
162
|
'max_job_time': 259200, # 3 days
|
157
163
|
'nsupdate_url': 'http://nsupdate.front.sepia.ceph.com/update',
|
158
164
|
'results_server': 'http://paddles.front.sepia.ceph.com/',
|
@@ -162,6 +168,8 @@ class TeuthologyConfig(YamlConfig):
|
|
162
168
|
'src_base_path': os.path.expanduser('~/src'),
|
163
169
|
'verify_host_keys': True,
|
164
170
|
'watchdog_interval': 120,
|
171
|
+
'fog_reimage_timeout': 1800,
|
172
|
+
'fog_wait_for_ssh_timeout': 600,
|
165
173
|
'kojihub_url': 'http://koji.fedoraproject.org/kojihub',
|
166
174
|
'kojiroot_url': 'http://kojipkgs.fedoraproject.org/packages',
|
167
175
|
'koji_task_url': 'https://kojipkgs.fedoraproject.org/work/',
|
@@ -187,6 +195,8 @@ class TeuthologyConfig(YamlConfig):
|
|
187
195
|
},
|
188
196
|
'rocketchat': None,
|
189
197
|
'sleep_before_teardown': 0,
|
198
|
+
'ssh_key': None,
|
199
|
+
'active_machine_types': [],
|
190
200
|
}
|
191
201
|
|
192
202
|
def __init__(self, yaml_path=None):
|
teuthology/contextutil.py
CHANGED
@@ -2,7 +2,6 @@ import contextlib
|
|
2
2
|
import sys
|
3
3
|
import logging
|
4
4
|
import time
|
5
|
-
import itertools
|
6
5
|
|
7
6
|
from teuthology.config import config
|
8
7
|
from teuthology.exceptions import MaxWhileTries
|
@@ -58,8 +57,8 @@ def nested(*managers):
|
|
58
57
|
class safe_while(object):
|
59
58
|
"""
|
60
59
|
A context manager to remove boiler plate code that deals with `while` loops
|
61
|
-
that need a given number of tries and some seconds to sleep
|
62
|
-
one of those tries.
|
60
|
+
that need a given number of tries or total timeout and some seconds to sleep
|
61
|
+
between each one of those tries.
|
63
62
|
|
64
63
|
The most simple example possible will try 10 times sleeping for 6 seconds:
|
65
64
|
|
@@ -82,6 +81,8 @@ class safe_while(object):
|
|
82
81
|
:param increment: The amount to add to the sleep value on each try.
|
83
82
|
Default 0.
|
84
83
|
:param tries: The amount of tries before giving up. Default 10.
|
84
|
+
:param timeout: Total seconds to try for, overrides the tries parameter
|
85
|
+
if specified. Default 0.
|
85
86
|
:param action: The name of the action being attempted. Default none.
|
86
87
|
:param _raise: Whether to raise an exception (or log a warning).
|
87
88
|
Default True.
|
@@ -89,28 +90,24 @@ class safe_while(object):
|
|
89
90
|
Default time.sleep
|
90
91
|
"""
|
91
92
|
|
92
|
-
def __init__(self, sleep=6, increment=0, tries=10, action=None,
|
93
|
+
def __init__(self, sleep=6, increment=0, tries=10, timeout=0, action=None,
|
93
94
|
_raise=True, _sleeper=None):
|
94
95
|
self.sleep = sleep
|
95
96
|
self.increment = increment
|
96
97
|
self.tries = tries
|
98
|
+
self.timeout = timeout
|
97
99
|
self.counter = 0
|
98
100
|
self.sleep_current = sleep
|
99
101
|
self.action = action
|
100
102
|
self._raise = _raise
|
101
103
|
self.sleeper = _sleeper or time.sleep
|
104
|
+
self.total_seconds = sleep
|
102
105
|
|
103
106
|
def _make_error_msg(self):
|
104
107
|
"""
|
105
108
|
Sum the total number of seconds we waited while providing the number
|
106
109
|
of tries we attempted
|
107
110
|
"""
|
108
|
-
total_seconds_waiting = sum(
|
109
|
-
itertools.islice(
|
110
|
-
itertools.count(self.sleep, self.increment),
|
111
|
-
self.tries
|
112
|
-
)
|
113
|
-
)
|
114
111
|
msg = 'reached maximum tries ({tries})' + \
|
115
112
|
' after waiting for {total} seconds'
|
116
113
|
if self.action:
|
@@ -118,8 +115,8 @@ class safe_while(object):
|
|
118
115
|
|
119
116
|
msg = msg.format(
|
120
117
|
action=self.action,
|
121
|
-
tries=self.
|
122
|
-
total=
|
118
|
+
tries=self.counter - 1,
|
119
|
+
total=self.total_seconds,
|
123
120
|
)
|
124
121
|
return msg
|
125
122
|
|
@@ -127,15 +124,22 @@ class safe_while(object):
|
|
127
124
|
self.counter += 1
|
128
125
|
if self.counter == 1:
|
129
126
|
return True
|
130
|
-
|
127
|
+
def must_stop():
|
128
|
+
return self.tries > 0 and self.counter > self.tries
|
129
|
+
if ((self.timeout > 0 and
|
130
|
+
self.total_seconds >= self.timeout) or
|
131
|
+
(self.timeout == 0 and must_stop())):
|
131
132
|
error_msg = self._make_error_msg()
|
132
133
|
if self._raise:
|
133
134
|
raise MaxWhileTries(error_msg)
|
134
135
|
else:
|
135
136
|
log.warning(error_msg)
|
136
137
|
return False
|
137
|
-
self.sleeper(self.sleep_current)
|
138
138
|
self.sleep_current += self.increment
|
139
|
+
if self.timeout > 0:
|
140
|
+
self.sleep_current = min(self.timeout - self.total_seconds, self.sleep_current)
|
141
|
+
self.total_seconds += self.sleep_current
|
142
|
+
self.sleeper(self.sleep_current)
|
139
143
|
return True
|
140
144
|
|
141
145
|
def __enter__(self):
|
teuthology/describe_tests.py
CHANGED
@@ -13,7 +13,7 @@ from distutils.util import strtobool
|
|
13
13
|
from teuthology.exceptions import ParseError
|
14
14
|
from teuthology.suite.build_matrix import \
|
15
15
|
build_matrix, generate_combinations, _get_matrix
|
16
|
-
from teuthology.suite import util
|
16
|
+
from teuthology.suite import util, merge
|
17
17
|
|
18
18
|
def main(args):
|
19
19
|
try:
|
@@ -57,6 +57,7 @@ def describe_tests(args):
|
|
57
57
|
limit=conf['limit'],
|
58
58
|
seed=conf['seed'],
|
59
59
|
subset=conf['subset'],
|
60
|
+
no_nested_subset=conf['no_nested_subset'],
|
60
61
|
fields=conf['fields'],
|
61
62
|
filter_in=conf['filter_in'],
|
62
63
|
filter_out=conf['filter_out'],
|
@@ -69,6 +70,7 @@ def describe_tests(args):
|
|
69
70
|
limit=conf['limit'],
|
70
71
|
seed=conf['seed'],
|
71
72
|
subset=conf['subset'],
|
73
|
+
no_nested_subset=conf['no_nested_subset'],
|
72
74
|
show_desc=conf['print_description'],
|
73
75
|
show_frag=conf['print_fragments'],
|
74
76
|
filter_in=conf['filter_in'],
|
@@ -109,6 +111,7 @@ def output_results(headers, rows, output_format, hrule):
|
|
109
111
|
def output_summary(path, limit=0,
|
110
112
|
seed=None,
|
111
113
|
subset=None,
|
114
|
+
no_nested_subset=None,
|
112
115
|
show_desc=True,
|
113
116
|
show_frag=False,
|
114
117
|
show_matrix=False,
|
@@ -124,17 +127,19 @@ def output_summary(path, limit=0,
|
|
124
127
|
"""
|
125
128
|
|
126
129
|
random.seed(seed)
|
127
|
-
mat, first, matlimit = _get_matrix(path, subset)
|
130
|
+
mat, first, matlimit = _get_matrix(path, subset=subset, no_nested_subset=no_nested_subset)
|
128
131
|
configs = generate_combinations(path, mat, first, matlimit)
|
129
132
|
count = 0
|
133
|
+
total = len(configs)
|
130
134
|
suite = os.path.basename(path)
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
135
|
+
configs = merge.config_merge(configs,
|
136
|
+
suite_name=suite,
|
137
|
+
filter_in=filter_in,
|
138
|
+
filter_out=filter_out,
|
139
|
+
filter_all=filter_all,
|
140
|
+
filter_fragments=filter_fragments,
|
141
|
+
seed=seed)
|
142
|
+
for c in configs:
|
138
143
|
if limit and count >= limit:
|
139
144
|
break
|
140
145
|
count += 1
|
@@ -145,12 +150,13 @@ def output_summary(path, limit=0,
|
|
145
150
|
print(" {}".format(util.strip_fragment_path(path)))
|
146
151
|
if show_matrix:
|
147
152
|
print(mat.tostr(1))
|
148
|
-
print("# {}/{} {}".format(count,
|
153
|
+
print("# {}/{} {}".format(count, total, path))
|
149
154
|
|
150
155
|
def get_combinations(suite_dir,
|
151
156
|
limit=0,
|
152
157
|
seed=None,
|
153
158
|
subset=None,
|
159
|
+
no_nested_subset=False,
|
154
160
|
fields=[],
|
155
161
|
filter_in=None,
|
156
162
|
filter_out=None,
|
@@ -166,7 +172,7 @@ def get_combinations(suite_dir,
|
|
166
172
|
of strings.
|
167
173
|
"""
|
168
174
|
suite = os.path.basename(suite_dir)
|
169
|
-
configs = build_matrix(suite_dir, subset, seed)
|
175
|
+
configs = build_matrix(suite_dir, subset=subset, no_nested_subset=no_nested_subset, seed=seed)
|
170
176
|
|
171
177
|
num_listed = 0
|
172
178
|
rows = []
|
@@ -175,13 +181,14 @@ def get_combinations(suite_dir,
|
|
175
181
|
dirs = {}
|
176
182
|
max_dir_depth = 0
|
177
183
|
|
178
|
-
configs =
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
184
|
+
configs = merge.config_merge(configs,
|
185
|
+
suite_name=suite,
|
186
|
+
filter_in=filter_in,
|
187
|
+
filter_out=filter_out,
|
188
|
+
filter_all=filter_all,
|
189
|
+
filter_fragments=filter_fragments,
|
190
|
+
seed=seed)
|
191
|
+
for _, fragment_paths, __ in configs:
|
185
192
|
if limit > 0 and num_listed >= limit:
|
186
193
|
break
|
187
194
|
|
@@ -1,25 +1,32 @@
|
|
1
|
+
import datetime
|
1
2
|
import logging
|
2
3
|
import os
|
4
|
+
import psutil
|
3
5
|
import subprocess
|
4
6
|
import sys
|
5
7
|
import yaml
|
6
8
|
|
7
|
-
from
|
8
|
-
|
9
|
-
from teuthology import
|
10
|
-
|
11
|
-
|
9
|
+
from typing import Dict, List
|
10
|
+
|
11
|
+
from teuthology import (
|
12
|
+
# non-modules
|
13
|
+
setup_log_file,
|
14
|
+
install_except_hook,
|
15
|
+
# modules
|
16
|
+
beanstalk,
|
17
|
+
exporter,
|
18
|
+
report,
|
19
|
+
repo_utils,
|
20
|
+
)
|
12
21
|
from teuthology.config import config as teuth_config
|
13
|
-
from teuthology.exceptions import SkipJob
|
14
|
-
from teuthology.repo_utils import fetch_qa_suite, fetch_teuthology
|
15
|
-
from teuthology.lock.ops import block_and_lock_machines
|
16
22
|
from teuthology.dispatcher import supervisor
|
17
|
-
from teuthology.
|
23
|
+
from teuthology.exceptions import BranchNotFoundError, CommitNotFoundError, SkipJob, MaxWhileTries
|
24
|
+
from teuthology.lock import ops as lock_ops
|
25
|
+
from teuthology.util.time import parse_timestamp
|
18
26
|
from teuthology import safepath
|
19
|
-
from teuthology.nuke import nuke
|
20
27
|
|
21
28
|
log = logging.getLogger(__name__)
|
22
|
-
start_time = datetime.
|
29
|
+
start_time = datetime.datetime.now(datetime.timezone.utc)
|
23
30
|
restart_file_path = '/tmp/teuthology-restart-dispatcher'
|
24
31
|
stop_file_path = '/tmp/teuthology-stop-dispatcher'
|
25
32
|
|
@@ -27,11 +34,14 @@ stop_file_path = '/tmp/teuthology-stop-dispatcher'
|
|
27
34
|
def sentinel(path):
|
28
35
|
if not os.path.exists(path):
|
29
36
|
return False
|
30
|
-
file_mtime = datetime.
|
37
|
+
file_mtime = datetime.datetime.fromtimestamp(
|
38
|
+
os.path.getmtime(path),
|
39
|
+
datetime.timezone.utc,
|
40
|
+
)
|
31
41
|
return file_mtime > start_time
|
32
42
|
|
33
43
|
|
34
|
-
def restart():
|
44
|
+
def restart(log=log):
|
35
45
|
log.info('Restarting...')
|
36
46
|
args = sys.argv[:]
|
37
47
|
args.insert(0, sys.executable)
|
@@ -56,38 +66,39 @@ def load_config(archive_dir=None):
|
|
56
66
|
|
57
67
|
|
58
68
|
def main(args):
|
59
|
-
|
60
|
-
if args["--supervisor"]:
|
61
|
-
return supervisor.main(args)
|
62
|
-
|
63
|
-
verbose = args["--verbose"]
|
64
|
-
tube = args["--tube"]
|
65
|
-
log_dir = args["--log-dir"]
|
66
|
-
archive_dir = args["--archive-dir"]
|
69
|
+
archive_dir = args.archive_dir or teuth_config.archive_base
|
67
70
|
|
68
|
-
|
69
|
-
|
71
|
+
# Refuse to start more than one dispatcher per machine type
|
72
|
+
procs = find_dispatcher_processes().get(args.tube)
|
73
|
+
if procs:
|
74
|
+
raise RuntimeError(
|
75
|
+
"There is already a teuthology-dispatcher process running:"
|
76
|
+
f" {procs}"
|
77
|
+
)
|
70
78
|
|
71
79
|
# setup logging for disoatcher in {log_dir}
|
72
80
|
loglevel = logging.INFO
|
73
|
-
if verbose:
|
81
|
+
if args.verbose:
|
74
82
|
loglevel = logging.DEBUG
|
83
|
+
logging.getLogger().setLevel(loglevel)
|
75
84
|
log.setLevel(loglevel)
|
76
|
-
log_file_path = os.path.join(log_dir, f"dispatcher.{tube}.{os.getpid()}")
|
85
|
+
log_file_path = os.path.join(args.log_dir, f"dispatcher.{args.tube}.{os.getpid()}")
|
77
86
|
setup_log_file(log_file_path)
|
78
87
|
install_except_hook()
|
79
88
|
|
80
89
|
load_config(archive_dir=archive_dir)
|
81
90
|
|
82
91
|
connection = beanstalk.connect()
|
83
|
-
beanstalk.watch_tube(connection, tube)
|
92
|
+
beanstalk.watch_tube(connection, args.tube)
|
84
93
|
result_proc = None
|
85
94
|
|
86
95
|
if teuth_config.teuthology_path is None:
|
87
|
-
fetch_teuthology('
|
88
|
-
fetch_qa_suite('
|
96
|
+
repo_utils.fetch_teuthology('main')
|
97
|
+
repo_utils.fetch_qa_suite('main')
|
89
98
|
|
90
99
|
keep_running = True
|
100
|
+
job_procs = set()
|
101
|
+
worst_returncode = 0
|
91
102
|
while keep_running:
|
92
103
|
# Check to see if we have a teuthology-results process hanging around
|
93
104
|
# and if so, read its return code so that it can exit.
|
@@ -102,9 +113,16 @@ def main(args):
|
|
102
113
|
stop()
|
103
114
|
|
104
115
|
load_config()
|
105
|
-
|
116
|
+
for proc in list(job_procs):
|
117
|
+
rc = proc.poll()
|
118
|
+
if rc is not None:
|
119
|
+
worst_returncode = max([worst_returncode, rc])
|
120
|
+
job_procs.remove(proc)
|
106
121
|
job = connection.reserve(timeout=60)
|
107
122
|
if job is None:
|
123
|
+
if args.exit_on_empty_queue and not job_procs:
|
124
|
+
log.info("Queue is empty and no supervisor processes running; exiting!")
|
125
|
+
break
|
108
126
|
continue
|
109
127
|
|
110
128
|
# bury the job so it won't be re-run if it fails
|
@@ -132,8 +150,7 @@ def main(args):
|
|
132
150
|
job_config = lock_machines(job_config)
|
133
151
|
|
134
152
|
run_args = [
|
135
|
-
os.path.join(teuth_bin_path, 'teuthology-
|
136
|
-
'--supervisor',
|
153
|
+
os.path.join(teuth_bin_path, 'teuthology-supervisor'),
|
137
154
|
'-v',
|
138
155
|
'--bin-path', teuth_bin_path,
|
139
156
|
'--archive-dir', archive_dir,
|
@@ -153,13 +170,24 @@ def main(args):
|
|
153
170
|
run_args.extend(["--job-config", job_config_path])
|
154
171
|
|
155
172
|
try:
|
156
|
-
job_proc = subprocess.Popen(
|
173
|
+
job_proc = subprocess.Popen(
|
174
|
+
run_args,
|
175
|
+
stdout=subprocess.DEVNULL,
|
176
|
+
stderr=subprocess.DEVNULL,
|
177
|
+
)
|
178
|
+
job_procs.add(job_proc)
|
157
179
|
log.info('Job supervisor PID: %s', job_proc.pid)
|
158
180
|
except Exception:
|
159
181
|
error_message = "Saw error while trying to spawn supervisor."
|
160
182
|
log.exception(error_message)
|
161
183
|
if 'targets' in job_config:
|
162
|
-
|
184
|
+
node_names = job_config["targets"].keys()
|
185
|
+
lock_ops.unlock_safe(
|
186
|
+
node_names,
|
187
|
+
job_config["owner"],
|
188
|
+
job_config["name"],
|
189
|
+
job_config["job_id"]
|
190
|
+
)
|
163
191
|
report.try_push_job_info(job_config, dict(
|
164
192
|
status='fail',
|
165
193
|
failure_reason=error_message))
|
@@ -171,12 +199,159 @@ def main(args):
|
|
171
199
|
except Exception:
|
172
200
|
log.exception("Saw exception while trying to delete job")
|
173
201
|
|
202
|
+
return worst_returncode
|
203
|
+
|
204
|
+
|
205
|
+
def find_dispatcher_processes() -> Dict[str, List[psutil.Process]]:
|
206
|
+
def match(proc):
|
207
|
+
try:
|
208
|
+
cmdline = proc.cmdline()
|
209
|
+
except psutil.AccessDenied:
|
210
|
+
return False
|
211
|
+
except psutil.ZombieProcess:
|
212
|
+
return False
|
213
|
+
if len(cmdline) < 3:
|
214
|
+
return False
|
215
|
+
if not cmdline[1].endswith("/teuthology-dispatcher"):
|
216
|
+
return False
|
217
|
+
if cmdline[2] == "--supervisor":
|
218
|
+
return False
|
219
|
+
if "--tube" not in cmdline:
|
220
|
+
return False
|
221
|
+
if proc.pid == os.getpid():
|
222
|
+
return False
|
223
|
+
return True
|
224
|
+
|
225
|
+
procs = {}
|
226
|
+
attrs = ["pid", "cmdline"]
|
227
|
+
for proc in psutil.process_iter(attrs=attrs):
|
228
|
+
if not match(proc):
|
229
|
+
continue
|
230
|
+
cmdline = proc.cmdline()
|
231
|
+
machine_type = cmdline[cmdline.index("--tube") + 1]
|
232
|
+
procs.setdefault(machine_type, []).append(proc)
|
233
|
+
return procs
|
234
|
+
|
235
|
+
|
236
|
+
def prep_job(job_config, log_file_path, archive_dir):
|
237
|
+
job_id = job_config['job_id']
|
238
|
+
check_job_expiration(job_config)
|
239
|
+
|
240
|
+
safe_archive = safepath.munge(job_config['name'])
|
241
|
+
job_config['worker_log'] = log_file_path
|
242
|
+
archive_path_full = os.path.join(
|
243
|
+
archive_dir, safe_archive, str(job_id))
|
244
|
+
job_config['archive_path'] = archive_path_full
|
245
|
+
|
246
|
+
# If the teuthology branch was not specified, default to main and
|
247
|
+
# store that value.
|
248
|
+
teuthology_branch = job_config.get('teuthology_branch', 'main')
|
249
|
+
job_config['teuthology_branch'] = teuthology_branch
|
250
|
+
teuthology_sha1 = job_config.get('teuthology_sha1')
|
251
|
+
if not teuthology_sha1:
|
252
|
+
repo_url = repo_utils.build_git_url('teuthology', 'ceph')
|
253
|
+
try:
|
254
|
+
teuthology_sha1 = repo_utils.ls_remote(repo_url, teuthology_branch)
|
255
|
+
except Exception as exc:
|
256
|
+
log.exception(f"Could not get teuthology sha1 for branch {teuthology_branch}")
|
257
|
+
report.try_push_job_info(
|
258
|
+
job_config,
|
259
|
+
dict(status='dead', failure_reason=str(exc))
|
260
|
+
)
|
261
|
+
raise SkipJob()
|
262
|
+
if not teuthology_sha1:
|
263
|
+
reason = "Teuthology branch {} not found; marking job as dead".format(teuthology_branch)
|
264
|
+
log.error(reason)
|
265
|
+
report.try_push_job_info(
|
266
|
+
job_config,
|
267
|
+
dict(status='dead', failure_reason=reason)
|
268
|
+
)
|
269
|
+
raise SkipJob()
|
270
|
+
if teuth_config.teuthology_path is None:
|
271
|
+
log.info('Using teuthology sha1 %s', teuthology_sha1)
|
272
|
+
|
273
|
+
try:
|
274
|
+
if teuth_config.teuthology_path is not None:
|
275
|
+
teuth_path = teuth_config.teuthology_path
|
276
|
+
else:
|
277
|
+
teuth_path = repo_utils.fetch_teuthology(branch=teuthology_branch,
|
278
|
+
commit=teuthology_sha1)
|
279
|
+
# For the teuthology tasks, we look for suite_branch, and if we
|
280
|
+
# don't get that, we look for branch, and fall back to 'main'.
|
281
|
+
# last-in-suite jobs don't have suite_branch or branch set.
|
282
|
+
ceph_branch = job_config.get('branch', 'main')
|
283
|
+
suite_branch = job_config.get('suite_branch', ceph_branch)
|
284
|
+
suite_sha1 = job_config.get('suite_sha1')
|
285
|
+
suite_repo = job_config.get('suite_repo')
|
286
|
+
if suite_repo:
|
287
|
+
teuth_config.ceph_qa_suite_git_url = suite_repo
|
288
|
+
job_config['suite_path'] = os.path.normpath(os.path.join(
|
289
|
+
repo_utils.fetch_qa_suite(suite_branch, suite_sha1),
|
290
|
+
job_config.get('suite_relpath', ''),
|
291
|
+
))
|
292
|
+
except (BranchNotFoundError, CommitNotFoundError) as exc:
|
293
|
+
log.exception("Requested version not found; marking job as dead")
|
294
|
+
report.try_push_job_info(
|
295
|
+
job_config,
|
296
|
+
dict(status='dead', failure_reason=str(exc))
|
297
|
+
)
|
298
|
+
raise SkipJob()
|
299
|
+
except MaxWhileTries as exc:
|
300
|
+
log.exception("Failed to fetch or bootstrap; marking job as dead")
|
301
|
+
report.try_push_job_info(
|
302
|
+
job_config,
|
303
|
+
dict(status='dead', failure_reason=str(exc))
|
304
|
+
)
|
305
|
+
raise SkipJob()
|
306
|
+
|
307
|
+
teuth_bin_path = os.path.join(teuth_path, 'virtualenv', 'bin')
|
308
|
+
if not os.path.isdir(teuth_bin_path):
|
309
|
+
raise RuntimeError("teuthology branch %s at %s not bootstrapped!" %
|
310
|
+
(teuthology_branch, teuth_bin_path))
|
311
|
+
return job_config, teuth_bin_path
|
312
|
+
|
313
|
+
|
314
|
+
def check_job_expiration(job_config):
|
315
|
+
job_id = job_config['job_id']
|
316
|
+
expired = False
|
317
|
+
now = datetime.datetime.now(datetime.timezone.utc)
|
318
|
+
if expire_str := job_config.get('timestamp'):
|
319
|
+
expire = parse_timestamp(expire_str) + \
|
320
|
+
datetime.timedelta(seconds=teuth_config.max_job_age)
|
321
|
+
expired = expire < now
|
322
|
+
if not expired and (expire_str := job_config.get('expire')):
|
323
|
+
try:
|
324
|
+
expire = parse_timestamp(expire_str)
|
325
|
+
expired = expired or expire < now
|
326
|
+
except ValueError:
|
327
|
+
log.warning(f"Failed to parse job expiration: {expire_str=}")
|
328
|
+
pass
|
329
|
+
if expired:
|
330
|
+
log.info(f"Skipping job {job_id} because it is expired: {expire_str} is in the past")
|
331
|
+
report.try_push_job_info(
|
332
|
+
job_config,
|
333
|
+
# TODO: Add a 'canceled' status to paddles, and use that.
|
334
|
+
dict(status='dead'),
|
335
|
+
)
|
336
|
+
raise SkipJob()
|
337
|
+
|
174
338
|
|
175
339
|
def lock_machines(job_config):
|
176
340
|
report.try_push_job_info(job_config, dict(status='running'))
|
177
341
|
fake_ctx = supervisor.create_fake_context(job_config, block=True)
|
178
|
-
|
179
|
-
|
342
|
+
machine_type = job_config["machine_type"]
|
343
|
+
count = len(job_config['roles'])
|
344
|
+
with exporter.NodeLockingTime().time(
|
345
|
+
machine_type=machine_type,
|
346
|
+
count=count,
|
347
|
+
):
|
348
|
+
lock_ops.block_and_lock_machines(
|
349
|
+
fake_ctx,
|
350
|
+
count,
|
351
|
+
machine_type,
|
352
|
+
tries=-1,
|
353
|
+
reimage=False,
|
354
|
+
)
|
180
355
|
job_config = fake_ctx.config
|
181
356
|
return job_config
|
182
357
|
|