PyPI - teuthology - Versions diffs - 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl - Mend

teuthology 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

scripts/node_cleanup.py +18 -2
scripts/suite.py +2 -0
teuthology/__init__.py +0 -1
teuthology/config.py +28 -7
teuthology/dispatcher/supervisor.py +9 -6
teuthology/lock/cli.py +4 -2
teuthology/lock/ops.py +10 -9
teuthology/lock/query.py +28 -4
teuthology/lock/util.py +1 -1
teuthology/misc.py +13 -58
teuthology/openstack/__init__.py +202 -176
teuthology/openstack/setup-openstack.sh +52 -27
teuthology/orchestra/connection.py +3 -1
teuthology/orchestra/daemon/cephadmunit.py +2 -2
teuthology/orchestra/opsys.py +15 -0
teuthology/orchestra/remote.py +54 -2
teuthology/orchestra/run.py +8 -2
teuthology/provision/downburst.py +84 -43
teuthology/provision/fog.py +2 -2
teuthology/repo_utils.py +3 -1
teuthology/run.py +1 -1
teuthology/scrape.py +5 -2
teuthology/suite/merge.py +3 -1
teuthology/suite/run.py +51 -37
teuthology/suite/util.py +2 -2
teuthology/task/install/rpm.py +8 -16
teuthology/task/internal/__init__.py +2 -1
teuthology/task/internal/syslog.py +17 -13
teuthology/task/kernel.py +1 -1
{teuthology-1.2.0.dist-info → teuthology-1.2.2.dist-info}/METADATA +11 -10
{teuthology-1.2.0.dist-info → teuthology-1.2.2.dist-info}/RECORD +38 -44
{teuthology-1.2.0.dist-info → teuthology-1.2.2.dist-info}/WHEEL +1 -1
teuthology/lock/test/__init__.py +0 -0
teuthology/lock/test/test_lock.py +0 -7
teuthology/task/tests/__init__.py +0 -170
teuthology/task/tests/test_fetch_coredumps.py +0 -116
teuthology/task/tests/test_locking.py +0 -25
teuthology/task/tests/test_run.py +0 -40
{teuthology-1.2.0.data → teuthology-1.2.2.data}/scripts/adjust-ulimits +0 -0
{teuthology-1.2.0.data → teuthology-1.2.2.data}/scripts/daemon-helper +0 -0
{teuthology-1.2.0.data → teuthology-1.2.2.data}/scripts/stdin-killer +0 -0
{teuthology-1.2.0.dist-info → teuthology-1.2.2.dist-info}/entry_points.txt +0 -0
{teuthology-1.2.0.dist-info → teuthology-1.2.2.dist-info/licenses}/LICENSE +0 -0
{teuthology-1.2.0.dist-info → teuthology-1.2.2.dist-info}/top_level.txt +0 -0

scripts/node_cleanup.py CHANGED Viewed

@@ -3,14 +3,27 @@ import logging
 import sys
 import teuthology
+from teuthology.config import config
 from teuthology.lock import query, ops
 def main():
     args = parse_args(sys.argv[1:])
     if args.verbose:
         teuthology.log.setLevel(logging.DEBUG)
+    else:
+        teuthology.log.setLevel(100)
     log = logging.getLogger(__name__)
-    stale = query.find_stale_locks(args.owner)
+    logger = logging.getLogger()
+    for handler in logger.handlers:
+        handler.setFormatter(
+            logging.Formatter('%(message)s')
+        )
+    try:
+        stale = query.find_stale_locks(args.owner)
+    except Exception:
+        log.exception(f"Error while check for stale locks held by {args.owner}")
+        return
     if not stale:
         return
     by_owner = {}
@@ -26,10 +39,13 @@ def main():
         log.info("Would attempt to unlock:")
         for owner, nodes in by_owner.items():
             for node in nodes:
-                log.info(f"{node['name']}\t{node['description']}")
+                node_job = node['description'].replace(
+                    config.archive_base, config.results_ui_server)
+                log.info(f"{node['name']}\t{node_job}")
     else:
         for owner, nodes in by_owner.items():
             ops.unlock_safe([node["name"] for node in nodes], owner)
+    log.info(f"unlocked {len(stale)} nodes")
 def parse_args(argv):
     parser = argparse.ArgumentParser(

scripts/suite.py CHANGED Viewed

@@ -77,6 +77,8 @@ Standard arguments:
                               [default: qa]
   --suite-branch <suite_branch>
                               Use this suite branch instead of the ceph branch
+  --suite-sha1 <suite_sha1>   The suite sha1 to use for the tests (overrides
+                              --suite-branch)
   --suite-dir <suite_dir>     Use this alternative directory as-is when
                               assembling jobs from yaml fragments. This causes
                               <suite_branch> to be ignored for scheduling

teuthology/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from __future__ import print_function
 import os, sys
 try:
     import importlib.metadata as importlib_metadata

teuthology/config.py CHANGED Viewed

@@ -7,6 +7,12 @@ except ImportError:
     from collections import MutableMapping
+# Configuration constants
+SYSTEM_CONFIG_PATH = '/etc/teuthology.yaml'
+USER_CONFIG_PATH = '~/.teuthology.yaml'
+CONFIG_PATH_VAR_NAME = 'TEUTHOLOGY_CONFIG'  # name of env var to check
 def init_logging():
     log = logging.getLogger(__name__)
     return log
@@ -135,9 +141,9 @@ class TeuthologyConfig(YamlConfig):
     """
     This class is intended to unify teuthology's many configuration files and
     objects. Currently it serves as a convenient interface to
-    ~/.teuthology.yaml and nothing else.
+    ~/.teuthology.yaml or equivalent.
     """
-    yaml_path = os.path.join(os.path.expanduser('~/.teuthology.yaml'))
+    yaml_path = USER_CONFIG_PATH  # yaml_path is updated in _get_config_path
     _defaults = {
         'archive_base': '/home/teuthworker/archive',
         'archive_upload': None,
@@ -149,6 +155,7 @@ class TeuthologyConfig(YamlConfig):
         'ceph_git_url': None,
         'ceph_qa_suite_git_url': None,
         'ceph_cm_ansible_git_url': None,
+        'teuthology_git_url': None,
         'use_conserver': False,
         'conserver_master': 'conserver.front.sepia.ceph.com',
         'conserver_port': 3109,
@@ -214,6 +221,10 @@ class TeuthologyConfig(YamlConfig):
         return (self.ceph_git_url or
                 self.ceph_git_base_url + 'ceph-ci.git')
+    def get_teuthology_git_url(self):
+        return (self.teuthology_git_url or
+                self.ceph_git_base_url + 'teuthology.git')
 class JobConfig(YamlConfig):
     pass
@@ -285,10 +296,20 @@ def set_config_attr(obj):
 def _get_config_path():
-    system_config_path = '/etc/teuthology.yaml'
-    if not os.path.exists(TeuthologyConfig.yaml_path) and \
-            os.path.exists(system_config_path):
-        return system_config_path
-    return TeuthologyConfig.yaml_path
+    """Look for a teuthology config yaml and return it's path.
+    Raises ValueError if no config yaml can be found.
+    """
+    paths = [
+        os.path.join(os.path.expanduser(USER_CONFIG_PATH)),
+        SYSTEM_CONFIG_PATH,
+    ]
+    if CONFIG_PATH_VAR_NAME in os.environ:
+        paths.insert(0, os.path.expanduser(os.environ[CONFIG_PATH_VAR_NAME]))
+    for path in paths:
+        if os.path.exists(path):
+            return path
+    log.warning(f"no teuthology config found, looked for: {paths}")
+    return None
 config = TeuthologyConfig(yaml_path=_get_config_path())

teuthology/dispatcher/supervisor.py CHANGED Viewed

@@ -226,6 +226,7 @@ def reimage(job_config):
         reimaged = lock_ops.reimage_machines(ctx, targets, job_config['machine_type'])
     except Exception as e:
         log.exception('Reimaging error. Nuking machines...')
+        unlock_targets(job_config)
         # Reimage failures should map to the 'dead' status instead of 'fail'
         report.try_push_job_info(
             ctx.config,
@@ -245,17 +246,19 @@ def reimage(job_config):
 def unlock_targets(job_config):
-    serializer = report.ResultsSerializer(teuth_config.archive_base)
-    job_info = serializer.job_info(job_config['name'], job_config['job_id'])
-    machine_statuses = query.get_statuses(job_info['targets'].keys())
-    # only unlock targets if locked and description matches
+    """
+    Unlock machines only if locked and description matches.
+    :param job_config:      dict, job config data
+    """
+    machine_statuses = query.get_statuses(job_config['targets'].keys())
     locked = []
     for status in machine_statuses:
         name = shortname(status['name'])
         description = status['description']
         if not status['locked']:
             continue
-        if description != job_info['archive_path']:
+        if description != job_config['archive_path']:
             log.warning(
                 "Was going to unlock %s but it was locked by another job: %s",
                 name, description
@@ -266,7 +269,7 @@ def unlock_targets(job_config):
         return
     if job_config.get("unlock_on_failure", True):
         log.info('Unlocking machines...')
-        lock_ops.unlock_safe(locked, job_info["owner"], job_info["name"], job_info["job_id"])
+        lock_ops.unlock_safe(locked, job_config["owner"], job_config["name"], job_config["job_id"])
 def run_with_watchdog(process, job_config):

teuthology/lock/cli.py CHANGED Viewed

@@ -178,12 +178,14 @@ def main(ctx):
                 # Update keys last
                 updatekeys_machines = list()
             else:
-                machines_to_update.append(machine)
                 ops.update_nodes([machine], True)
-                teuthology.provision.create_if_vm(
+                created = teuthology.provision.create_if_vm(
                     ctx,
                     misc.canonicalize_hostname(machine),
                 )
+                # do not try to update inventory if failed to create vm
+                if created:
+                    machines_to_update.append(machine)
         with teuthology.parallel.parallel() as p:
             ops.update_nodes(reimage_machines, True)
             for machine in reimage_machines:

teuthology/lock/ops.py CHANGED Viewed

@@ -76,18 +76,20 @@ def lock_many(ctx, num, machine_type, user=None, description=None,
     # all in one shot. If we are passed 'plana,mira,burnupi,vps', do one query
     # for 'plana,mira,burnupi' and one for 'vps'
     machine_types_list = misc.get_multi_machine_types(machine_type)
-    if machine_types_list == ['vps']:
+    downburst_types = teuthology.provision.downburst.get_types()
+    if all(t in downburst_types for t in machine_types_list):
         machine_types = machine_types_list
     elif machine_types_list == ['openstack']:
         return lock_many_openstack(ctx, num, machine_type,
                                    user=user,
                                    description=description,
                                    arch=arch)
-    elif 'vps' in machine_types_list:
-        machine_types_non_vps = list(machine_types_list)
-        machine_types_non_vps.remove('vps')
-        machine_types_non_vps = '|'.join(machine_types_non_vps)
-        machine_types = [machine_types_non_vps, 'vps']
+    elif any(t in downburst_types for t in machine_types_list):
+        the_vps = list(t for t in machine_types_list
+                                        if t in downburst_types)
+        non_vps = list(t for t in machine_types_list
+                                        if not t in downburst_types)
+        machine_types = ['|'.join(non_vps), '|'.join(the_vps)]
     else:
         machine_types_str = '|'.join(machine_types_list)
         machine_types = [machine_types_str, ]
@@ -102,9 +104,9 @@ def lock_many(ctx, num, machine_type, user=None, description=None,
         )
         # Only query for os_type/os_version if non-vps and non-libcloud, since
         # in that case we just create them.
-        vm_types = ['vps'] + teuthology.provision.cloud.get_types()
+        vm_types = downburst_types + teuthology.provision.cloud.get_types()
         reimage_types = teuthology.provision.get_reimage_types()
-        if machine_type not in vm_types + reimage_types:
+        if machine_type not in (vm_types + reimage_types):
             if os_type:
                 data['os_type'] = os_type
             if os_version:
@@ -445,7 +447,6 @@ def block_and_lock_machines(ctx, total_requested, machine_type, reimage=True, tr
                     loopcount += 1
                     time.sleep(10)
                     keys_dict = misc.ssh_keyscan(vmlist)
-                    log.info('virtual machine is still unavailable')
                     if loopcount == 40:
                         loopcount = 0
                         log.info('virtual machine(s) still not up, ' +

teuthology/lock/query.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import datetime
 import logging
 import os
 import requests
@@ -8,6 +9,7 @@ from teuthology import misc
 from teuthology.config import config
 from teuthology.contextutil import safe_while
 from teuthology.util.compat import urlencode
+from teuthology.util.time import parse_timestamp
 log = logging.getLogger(__name__)
@@ -125,17 +127,18 @@ def find_stale_locks(owner=None) -> List[Dict]:
     # running
     result = list()
     for node in nodes:
-        if node_active_job(node["name"]):
+        if node_active_job(node["name"], grace_time=5):
             continue
         result.append(node)
     return result
-def node_active_job(name: str, status: Union[dict, None] = None) -> Union[str, None]:
+def node_active_job(name: str, status: Union[dict, None] = None, grace_time: int = 0) -> Union[str, None]:
     """
     Is this node's job active (e.g. running or waiting)?
     :param node:  The node dict as returned from the lock server
     :param cache: A set() used for caching results
+    :param grace: A period of time (in mins) after job finishes before we consider the node inactive
     :returns:     A string if the node has an active job, or None if not
     """
     status = status or get_status(name)
@@ -143,18 +146,39 @@ def node_active_job(name: str, status: Union[dict, None] = None) -> Union[str, N
         # This should never happen with a normal node
         return "node had no status"
     description = status['description']
+    if '/' not in description:
+        # technically not an "active job", but someone locked the node
+        # for a different purpose and is likely still using it.
+        return description
     (run_name, job_id) = description.split('/')[-2:]
     if not run_name or job_id == '':
         # We thought this node might have a stale job, but no.
         return "node description does not contained scheduled job info"
     url = f"{config.results_server}/runs/{run_name}/jobs/{job_id}/"
     job_status = ""
+    active = True
     with safe_while(
             sleep=1, increment=0.5, action='node_is_active') as proceed:
         while proceed():
             resp = requests.get(url)
             if resp.ok:
-                job_status = resp.json()["status"]
+                job_obj = resp.json()
+                job_status = job_obj["status"]
+                active = job_status and job_status not in ('pass', 'fail', 'dead')
+                if active:
+                    break
+                job_updated = job_obj["updated"]
+                if not grace_time:
+                    break
+                try:
+                    delta = datetime.datetime.now(datetime.timezone.utc) - parse_timestamp(job_updated)
+                    active = active or delta < datetime.timedelta(minutes=grace_time)
+                except Exception:
+                    log.exception(f"{run_name}/{job_id} updated={job_updated}")
                 break
-    if job_status and job_status not in ('pass', 'fail', 'dead'):
+            elif resp.status_code == 404:
+                break
+            else:
+                log.debug(f"Error {resp.status_code} listing job {run_name}/{job_id} for {name}: {resp.text}")
+    if active:
         return description

teuthology/lock/util.py CHANGED Viewed

@@ -18,7 +18,7 @@ def vps_version_or_type_valid(machine_type, os_type, os_version):
     is skipped (so that this code should behave as it did before this
     check was added).
     """
-    if not machine_type == 'vps':
+    if not (machine_type in teuthology.provision.downburst.get_types()):
         return True
     if os_type is None or os_version is None:
         # we'll use the defaults provided by provision.create_if_vm

teuthology/misc.py CHANGED Viewed

@@ -1009,64 +1009,17 @@ def deep_merge(a: DeepMerge, b: DeepMerge) -> DeepMerge:
         return a
     return b
-def get_valgrind_args(testdir, name, preamble, v, exit_on_first_error=True):
+def update_key(key_to_update, a: dict, b: dict):
     """
-    Build a command line for running valgrind.
-    testdir - test results directory
-    name - name of daemon (for naming hte log file)
-    preamble - stuff we should run before valgrind
-    v - valgrind arguments
+    Update key (`key_to_update`) of dict `a` on all levels
+    to the values of same key in `b` dict.
     """
-    if v is None:
-        return preamble
-    if not isinstance(v, list):
-        v = [v]
-    # https://tracker.ceph.com/issues/44362
-    preamble.extend([
-        'env', 'OPENSSL_ia32cap=~0x1000000000000000',
-    ])
-    val_path = '/var/log/ceph/valgrind'
-    if '--tool=memcheck' in v or '--tool=helgrind' in v:
-        extra_args = [
-            'valgrind',
-            '--trace-children=no',
-            '--child-silent-after-fork=yes',
-            '--soname-synonyms=somalloc=*tcmalloc*',
-            '--num-callers=50',
-            '--suppressions={tdir}/valgrind.supp'.format(tdir=testdir),
-            '--xml=yes',
-            '--xml-file={vdir}/{n}.log'.format(vdir=val_path, n=name),
-            '--time-stamp=yes',
-            '--vgdb=yes',
-        ]
-    else:
-        extra_args = [
-            'valgrind',
-            '--trace-children=no',
-            '--child-silent-after-fork=yes',
-            '--soname-synonyms=somalloc=*tcmalloc*',
-            '--suppressions={tdir}/valgrind.supp'.format(tdir=testdir),
-            '--log-file={vdir}/{n}.log'.format(vdir=val_path, n=name),
-            '--time-stamp=yes',
-            '--vgdb=yes',
-        ]
-    if exit_on_first_error:
-        extra_args.extend([
-            # at least Valgrind 3.14 is required
-            '--exit-on-first-error=yes',
-            '--error-exitcode=42',
-        ])
-    args = [
-        'cd', testdir,
-        run.Raw('&&'),
-    ] + preamble + extra_args + v
-    log.debug('running %s under valgrind with args %s', name, args)
-    return args
+    for key, value in b.items():
+        if key == key_to_update:
+            a[key] = value
+        elif isinstance(value, dict):
+            if key in a and isinstance(a[key], dict):
+                update_key(key_to_update, a[key], value)
 def ssh_keyscan(hostnames, _raise=True):
     """
@@ -1148,15 +1101,17 @@ def ssh_keyscan_wait(hostname):
             log.info("try ssh_keyscan again for " + str(hostname))
         return success
-def stop_daemons_of_type(ctx, type_, cluster='ceph'):
+def stop_daemons_of_type(ctx, type_, cluster='ceph', timeout=300):
     """
     :param type_: type of daemons to be stopped.
+    :param cluster: Cluster name, default is 'ceph'.
+    :param timeout: Timeout in seconds for stopping each daemon.
     """
     log.info('Shutting down %s daemons...' % type_)
     exc = None
     for daemon in ctx.daemons.iter_daemons_of_role(type_, cluster):
         try:
-            daemon.stop()
+            daemon.stop(timeout)
         except (CommandFailedError,
                 CommandCrashedError,
                 ConnectionLostError) as e:

teuthology 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

teuthology 1.2.0py3-none-any.whl → 1.2.2py3-none-any.whl