PyPI - yuclid - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

yuclid 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

yuclid/__init__.py +1 -1
yuclid/cli.py +0 -7
yuclid/plot.py +39 -70
yuclid/run.py +59 -42
{yuclid-0.1.2.dist-info → yuclid-0.1.4.dist-info}/METADATA +1 -1
yuclid-0.1.4.dist-info/RECORD +11 -0
yuclid-0.1.2.dist-info/RECORD +0 -11
{yuclid-0.1.2.dist-info → yuclid-0.1.4.dist-info}/WHEEL +0 -0
{yuclid-0.1.2.dist-info → yuclid-0.1.4.dist-info}/entry_points.txt +0 -0
{yuclid-0.1.2.dist-info → yuclid-0.1.4.dist-info}/top_level.txt +0 -0

yuclid/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.0"
1	+ __version__ = "0.1.4"

yuclid/cli.py CHANGED Viewed

@@ -109,13 +109,6 @@ def get_parser():
         nargs="*",
         help="Normalize all values w.r.t. a single reference",
     )
-    # plot_parser.add_argument(
-    #     "-S",
-    #     "--norm-scope",
-    #     choices=["global", "local"],
-    #     default="local",
-    #     help="Normalization scope",
-    # )
     plot_parser.add_argument(
         "-r",
         "--norm-reverse",

yuclid/plot.py CHANGED Viewed

@@ -1,6 +1,4 @@
 from yuclid.log import report, LogLevel
-import yuclid.cli
-import matplotlib.gridspec as gridspec
 import matplotlib.lines as mlines
 import matplotlib.pyplot as plt
 import yuclid.spread as spread
@@ -8,14 +6,10 @@ import seaborn as sns
 import pandas as pd
 import numpy as np
 import scipy.stats
-import subprocess
-import threading
+import yuclid.cli
 import itertools
 import pathlib
-import hashlib
-import time
 import math
-import sys
 def get_current_config(ctx):
@@ -168,8 +162,6 @@ def generate_dataframe(ctx):
     if len(dfs) == 0:
         report(LogLevel.ERROR, "no valid source of data")
-        ctx["alive"] = False
-        sys.exit(1)
     df = pd.concat(dfs)
@@ -208,7 +200,6 @@ def rescale(ctx):
     args = ctx["args"]
     for y in args.y:
         df[y] = df[y] * args.rescale
-    ctx["df"] = df
 def draw(fig, ax, cli_args):
@@ -254,34 +245,6 @@ def generate_space(ctx):
     )
-def file_monitor(ctx):
-    current_hash = None
-    last_hash = None
-    while ctx["alive"]:
-        try:
-            current_hash = ""
-            for file in ctx["local_files"]:
-                with open(file, "rb") as f:
-                    current_hash += hashlib.md5(f.read()).hexdigest()
-        except FileNotFoundError:
-            current_hash = None
-        if current_hash != last_hash:
-            generate_dataframe(ctx)
-            rescale(ctx)
-            generate_space(ctx)
-            compute_ylimits(ctx)
-            space_columns = ctx["df"].columns.difference([ctx["y_axis"]])
-            sizes = ["{}={}".format(d, ctx["df"][d].nunique()) for d in space_columns]
-            missing = compute_missing(ctx)
-            report(LogLevel.INFO, "space sizes", " | ".join(sizes))
-            if len(missing) > 0:
-                report(LogLevel.WARNING, f"at least {len(missing)} missing experiments")
-            update_table(ctx)
-            update_plot(ctx)
-        last_hash = current_hash
-        time.sleep(1)
 def update_table(ctx):
     ax_table = ctx["ax_table"]
     free_dims = ctx["free_dims"]
@@ -317,32 +280,6 @@ def is_remote(file):
     return "@" in file
-def sync_files(ctx):
-    args = ctx["args"]
-    valid_files = ctx["valid_files"]
-    jobs = []
-    for file in valid_files:
-        if is_remote(file):
-            mirror = get_local_mirror(file)
-            proc = subprocess.run(["scp", file, mirror])
-            if proc.returncode != 0:
-                report(LogLevel.ERROR, f"scp transfer failed for {file}")
-                sys.exit(1)
-            jobs.append((file, mirror))
-    def rsync(src, dst):
-        while ctx["alive"]:
-            subprocess.run(
-                ["rsync", "-z", "--checksum", src, dst],
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
-            )
-            time.sleep(args.rsync_interval)
-    for job in jobs:
-        threading.Thread(target=rsync, daemon=True, args=job).start()
 def fontsize_to_y_units(ctx, fontsize):
     fig = ctx["fig"]
     ax = ctx["ax_plot"]
@@ -952,13 +889,9 @@ def validate_args(ctx):
 def start_gui(ctx):
-    ctx["alive"] = True
     update_plot(ctx)
     update_table(ctx)
-    threading.Thread(target=file_monitor, daemon=True, args=(ctx,)).start()
     report(LogLevel.INFO, "application running")
-    time.sleep(1.0)  # wait for the GUI to initialize
     plt.show()
@@ -995,12 +928,48 @@ def compute_ylimits(ctx):
     ctx["top"] = top
+def generate_derived_metrics(ctx):
+    args = ctx["args"]
+    df = ctx["df"]
+    # derived metrics are any -y value with a ":"
+    derived_metrics = dict()
+    new_ys = []
+    for y in args.y:
+        if ":" in y:
+            name, func = y.split(":")
+            derived_metrics[name.strip()] = func.strip()
+            new_ys.append(name.strip())
+        else:
+            new_ys.append(y)
+    for name, func in derived_metrics.items():
+        try:
+            # replace column names in the expression with df[column_name] syntax
+            expression = func
+            for col in df.columns:
+                if col in expression:
+                    expression = expression.replace(col, f"df['{col}']")
+            df[name] = eval(expression)
+        except Exception as e:
+            hint = "maybe you misspelled a column name"
+            report(
+                LogLevel.ERROR,
+                f"failed to evaluate derived metric '{name}'",
+                hint=hint,
+            )
+            continue
+    args.y = new_ys
 def launch(args):
-    ctx = {"args": args, "alive": True}
+    ctx = {"args": args}
     validate_files(ctx)
     locate_files(ctx)
-    sync_files(ctx)
     generate_dataframe(ctx)
+    generate_derived_metrics(ctx)
     validate_args(ctx)
     rescale(ctx)
     generate_space(ctx)

yuclid/run.py CHANGED Viewed

@@ -110,29 +110,36 @@ def load_json(f):
 def aggregate_input_data(settings):
-    data = None
+    data = {
+        "env": {},
+        "setup": {"global": [], "point": []},
+        "space": {},
+        "trials": [],
+        "metrics": [],
+        "presets": {},
+        "order": [],
+    }
     for file in settings["inputs"]:
         with open(file, "r") as f:
             current = normalize_data(load_json(f))
-            if data is None:
-                data = current
-                continue
             for key, val in current.items():
-                if isinstance(data[key], list):
+                if key in ["env", "space", "presets"]:
+                    data[key].update(val)
+                elif key in ["trials", "metrics", "order"]:
                     data[key].extend(val)
-                elif isinstance(data[key], dict):
-                    if key == "space":
-                        for subkey, subval in val.items():
-                            if data[key].get(subkey) is None:
-                                data[key][subkey] = subval
-                            else:
-                                data[key].setdefault(subkey, []).extend(subval)
-                    else:
-                        data[key].update(val)
-            order = data.get("order", []) + current.get("order", [])
-            data["order"] = remove_duplicates(order)
+                elif key == "setup":
+                    for subkey, subval in val.items():
+                        if data[key].get(subkey) is None:
+                            # undefined dimensions are overridden
+                            data[key][subkey] = subval
+                        else:
+                            data[key].setdefault(subkey, []).extend(subval)
+                elif key == "setup":
+                    data[key]["setup"]["global"] += val["setup"]["global"]
+                    data[key]["setup"]["point"] += val["setup"]["point"]
+    data["order"] = remove_duplicates(data["order"])
     if len(data["trials"]) == 0:
         report(LogLevel.FATAL, "no valid trials found")
@@ -735,10 +742,12 @@ def run_point_trials(settings, data, execution, f, i, point):
         )
     i_padded = str(i).zfill(len(str(execution["subspace_size"])))
     for j, trial in enumerate(compatible_trials):
         point_id = os.path.join(
-            settings["temp_dir"], settings["now"], f"{i_padded}." + point_to_string(point) + f"_trial{j}"
+            settings["temp_dir"],
+            settings["now"],
+            f"{i_padded}." + point_to_string(point) + f"_trial{j}",
         )
         command = substitute_global_yvars(trial["command"], execution["subspace"])
@@ -780,7 +789,8 @@ def run_point_trials(settings, data, execution, f, i, point):
             capture_output=True,
             env=execution["env"],
         )
-        if command_output.returncode != 0:
+        def complain():
             hint = "check the following files for more details:\n"
             hint += f"{point_id}.out\n{point_id}.err\n{point_id}.tmp"
             report(
@@ -791,6 +801,9 @@ def run_point_trials(settings, data, execution, f, i, point):
                 ),
                 hint=hint,
             )
+        if command_output.returncode != 0:
+            complain()
         else:
             output_lines = command_output.stdout.strip().split("\n")
@@ -798,7 +811,10 @@ def run_point_trials(settings, data, execution, f, i, point):
                 try:
                     return int(x)
                 except ValueError:
-                    return float(x)
+                    try:
+                        return float(x)
+                    except ValueError:
+                        complain()
             collected_metrics[metric["name"]] = [
                 int_or_float(line) for line in output_lines
@@ -871,6 +887,20 @@ def validate_execution(execution, data):
                 point_to_string(point),
                 hint="try relaxing your trial conditions or adding more trials.",
             )
+        if len(execution["metrics"] or []) > 0:
+            compatible_metric_names = {m["name"] for m in compatible_metrics}
+            incompatible = [
+                m for m in execution["metrics"] if m not in compatible_metric_names
+            ]
+            if len(incompatible) > 0:
+                report(
+                    LogLevel.ERROR,
+                    "some metrics are not compatible with {}".format(
+                        point_to_string(point)
+                    ),
+                    ", ".join(incompatible),
+                    hint="try relaxing your metric conditions or adding more metrics.",
+                )
 def get_compatible_trials_and_metrics(data, point, execution):
@@ -911,25 +941,12 @@ def run_subspace_trials(settings, data, execution):
                 compatible_trials, compatible_metrics = (
                     get_compatible_trials_and_metrics(data, point, execution)
                 )
-                if len(compatible_trials) == 0:
-                    report(
-                        LogLevel.ERROR,
-                        point_to_string(point),
-                        "no compatible trials found",
-                    )
-                elif len(compatible_metrics) == 0:
-                    report(
-                        LogLevel.ERROR,
-                        point_to_string(point),
-                        "no compatible metrics found",
-                    )
-                else:
-                    report(
-                        LogLevel.INFO,
-                        get_progress(i, execution["subspace_size"]),
-                        "dry run",
-                        point_to_string(point),
-                    )
+                report(
+                    LogLevel.INFO,
+                    get_progress(i, execution["subspace_size"]),
+                    "dry run",
+                    point_to_string(point),
+                )
     else:
         output_dir = os.path.dirname(settings["output"])
         if output_dir and not os.path.exists(output_dir):
@@ -1122,7 +1139,7 @@ def normalize_point_setup(point_setup, space):
         report(LogLevel.FATAL, "point setup must be a string or a list")
     # check validity of 'on' fields
-    for item in point_setup:
+    for item in normalized_items:
         if not isinstance(item["on"], (list, type(None))):
             report(LogLevel.FATAL, "point setup 'on' must be a list or None")
         for dim in item["on"]:
@@ -1139,7 +1156,7 @@ def normalize_point_setup(point_setup, space):
                 )
     # check validity of 'parallel' fields
-    for item in point_setup:
+    for item in normalized_items:
         parallel = item["parallel"]
         if not isinstance(parallel, (bool, list)):
             report(LogLevel.FATAL, "point setup 'parallel' must be a boolean or a list")

{yuclid-0.1.2.dist-info → yuclid-0.1.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: yuclid
-Version: 0.1.2
+Version: 0.1.4
 Summary: Run experiments and interactively plot results across combinations of user-specified dimensions
 Author-email: Federico Sossai <federico.sossai@gmail.com>
 Project-URL: Homepage, https://github.com/fsossai/yuclid

yuclid-0.1.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+yuclid/__init__.py,sha256=JMD28FXYHc_TM03visyUSd3UA9FZAaJMRStnfZoq50Y,21
+yuclid/cli.py,sha256=YZzxJty5wlUhCOEELvEcJeQb_lQ1Qc89RG4_s5IyKWU,6224
+yuclid/log.py,sha256=GR_FVfNroumuonKguAPd6H1rKjxJKRc8tAS2sVNTbzE,1655
+yuclid/plot.py,sha256=R6IXw6hHuXYFx1MjTKLCIqBfdNORStVEoDidAr-jEuE,29697
+yuclid/run.py,sha256=s1BGCmYckO2s5TSoKNCb4llZpUouxyooxtVlbqsQNTs,44641
+yuclid/spread.py,sha256=4Ci3nsu8n_dhG-AK2IWHKRElQ8oaGdw14LrgNu79biM,4938
+yuclid-0.1.4.dist-info/METADATA,sha256=Qm5Sw-K-L1VGSEJVBwE_C6Ubjp6JNa55SiH7snsWdPM,673
+yuclid-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+yuclid-0.1.4.dist-info/entry_points.txt,sha256=2AvTtyt5iBnjr6HnjqH_3PeSoq9UzIbT92qivmEbOYA,43
+yuclid-0.1.4.dist-info/top_level.txt,sha256=cL5mb4h_4etwTsqhPvSnoVBXImIzPFGd3rINV1nEjPo,7
+yuclid-0.1.4.dist-info/RECORD,,

yuclid-0.1.2.dist-info/RECORD DELETED Viewed

@@ -1,11 +0,0 @@
-yuclid/__init__.py,sha256=Pru0BlFBASFCFo7McHdohtKkUtgMPDwbGfyUZlE2_Vw,21
-yuclid/cli.py,sha256=l5WUY6Q6nwg7WRrAAPf5uaspG9zrEPE9BA9v3eYI_vE,6410
-yuclid/log.py,sha256=GR_FVfNroumuonKguAPd6H1rKjxJKRc8tAS2sVNTbzE,1655
-yuclid/plot.py,sha256=RV_bgkFDpOGxw7ankW7QsnBsyrholBtYKKj9jUtBAyM,30836
-yuclid/run.py,sha256=NVvcmLiQkzypgqpRbmI4lttgKSy05hYJKVBEp3fxnpA,44106
-yuclid/spread.py,sha256=4Ci3nsu8n_dhG-AK2IWHKRElQ8oaGdw14LrgNu79biM,4938
-yuclid-0.1.2.dist-info/METADATA,sha256=lCvP9NyDUhHzKnu4e8jCHVTFyAkgps_hH27EuwCL0gk,673
-yuclid-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-yuclid-0.1.2.dist-info/entry_points.txt,sha256=2AvTtyt5iBnjr6HnjqH_3PeSoq9UzIbT92qivmEbOYA,43
-yuclid-0.1.2.dist-info/top_level.txt,sha256=cL5mb4h_4etwTsqhPvSnoVBXImIzPFGd3rINV1nEjPo,7
-yuclid-0.1.2.dist-info/RECORD,,

{yuclid-0.1.2.dist-info → yuclid-0.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{yuclid-0.1.2.dist-info → yuclid-0.1.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{yuclid-0.1.2.dist-info → yuclid-0.1.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

yuclid 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

yuclid 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl