yuclid 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
yuclid/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.0"
1
+ __version__ = "0.1.4"
yuclid/cli.py CHANGED
@@ -109,13 +109,6 @@ def get_parser():
109
109
  nargs="*",
110
110
  help="Normalize all values w.r.t. a single reference",
111
111
  )
112
- # plot_parser.add_argument(
113
- # "-S",
114
- # "--norm-scope",
115
- # choices=["global", "local"],
116
- # default="local",
117
- # help="Normalization scope",
118
- # )
119
112
  plot_parser.add_argument(
120
113
  "-r",
121
114
  "--norm-reverse",
yuclid/plot.py CHANGED
@@ -1,6 +1,4 @@
1
1
  from yuclid.log import report, LogLevel
2
- import yuclid.cli
3
- import matplotlib.gridspec as gridspec
4
2
  import matplotlib.lines as mlines
5
3
  import matplotlib.pyplot as plt
6
4
  import yuclid.spread as spread
@@ -8,14 +6,10 @@ import seaborn as sns
8
6
  import pandas as pd
9
7
  import numpy as np
10
8
  import scipy.stats
11
- import subprocess
12
- import threading
9
+ import yuclid.cli
13
10
  import itertools
14
11
  import pathlib
15
- import hashlib
16
- import time
17
12
  import math
18
- import sys
19
13
 
20
14
 
21
15
  def get_current_config(ctx):
@@ -168,8 +162,6 @@ def generate_dataframe(ctx):
168
162
 
169
163
  if len(dfs) == 0:
170
164
  report(LogLevel.ERROR, "no valid source of data")
171
- ctx["alive"] = False
172
- sys.exit(1)
173
165
 
174
166
  df = pd.concat(dfs)
175
167
 
@@ -208,7 +200,6 @@ def rescale(ctx):
208
200
  args = ctx["args"]
209
201
  for y in args.y:
210
202
  df[y] = df[y] * args.rescale
211
- ctx["df"] = df
212
203
 
213
204
 
214
205
  def draw(fig, ax, cli_args):
@@ -254,34 +245,6 @@ def generate_space(ctx):
254
245
  )
255
246
 
256
247
 
257
- def file_monitor(ctx):
258
- current_hash = None
259
- last_hash = None
260
- while ctx["alive"]:
261
- try:
262
- current_hash = ""
263
- for file in ctx["local_files"]:
264
- with open(file, "rb") as f:
265
- current_hash += hashlib.md5(f.read()).hexdigest()
266
- except FileNotFoundError:
267
- current_hash = None
268
- if current_hash != last_hash:
269
- generate_dataframe(ctx)
270
- rescale(ctx)
271
- generate_space(ctx)
272
- compute_ylimits(ctx)
273
- space_columns = ctx["df"].columns.difference([ctx["y_axis"]])
274
- sizes = ["{}={}".format(d, ctx["df"][d].nunique()) for d in space_columns]
275
- missing = compute_missing(ctx)
276
- report(LogLevel.INFO, "space sizes", " | ".join(sizes))
277
- if len(missing) > 0:
278
- report(LogLevel.WARNING, f"at least {len(missing)} missing experiments")
279
- update_table(ctx)
280
- update_plot(ctx)
281
- last_hash = current_hash
282
- time.sleep(1)
283
-
284
-
285
248
  def update_table(ctx):
286
249
  ax_table = ctx["ax_table"]
287
250
  free_dims = ctx["free_dims"]
@@ -317,32 +280,6 @@ def is_remote(file):
317
280
  return "@" in file
318
281
 
319
282
 
320
- def sync_files(ctx):
321
- args = ctx["args"]
322
- valid_files = ctx["valid_files"]
323
- jobs = []
324
- for file in valid_files:
325
- if is_remote(file):
326
- mirror = get_local_mirror(file)
327
- proc = subprocess.run(["scp", file, mirror])
328
- if proc.returncode != 0:
329
- report(LogLevel.ERROR, f"scp transfer failed for {file}")
330
- sys.exit(1)
331
- jobs.append((file, mirror))
332
-
333
- def rsync(src, dst):
334
- while ctx["alive"]:
335
- subprocess.run(
336
- ["rsync", "-z", "--checksum", src, dst],
337
- stdout=subprocess.DEVNULL,
338
- stderr=subprocess.DEVNULL,
339
- )
340
- time.sleep(args.rsync_interval)
341
-
342
- for job in jobs:
343
- threading.Thread(target=rsync, daemon=True, args=job).start()
344
-
345
-
346
283
  def fontsize_to_y_units(ctx, fontsize):
347
284
  fig = ctx["fig"]
348
285
  ax = ctx["ax_plot"]
@@ -952,13 +889,9 @@ def validate_args(ctx):
952
889
 
953
890
 
954
891
  def start_gui(ctx):
955
- ctx["alive"] = True
956
-
957
892
  update_plot(ctx)
958
893
  update_table(ctx)
959
- threading.Thread(target=file_monitor, daemon=True, args=(ctx,)).start()
960
894
  report(LogLevel.INFO, "application running")
961
- time.sleep(1.0) # wait for the GUI to initialize
962
895
  plt.show()
963
896
 
964
897
 
@@ -995,12 +928,48 @@ def compute_ylimits(ctx):
995
928
  ctx["top"] = top
996
929
 
997
930
 
931
+ def generate_derived_metrics(ctx):
932
+ args = ctx["args"]
933
+ df = ctx["df"]
934
+
935
+ # derived metrics are any -y value with a ":"
936
+ derived_metrics = dict()
937
+ new_ys = []
938
+ for y in args.y:
939
+ if ":" in y:
940
+ name, func = y.split(":")
941
+ derived_metrics[name.strip()] = func.strip()
942
+ new_ys.append(name.strip())
943
+ else:
944
+ new_ys.append(y)
945
+
946
+ for name, func in derived_metrics.items():
947
+ try:
948
+ # replace column names in the expression with df[column_name] syntax
949
+ expression = func
950
+ for col in df.columns:
951
+ if col in expression:
952
+ expression = expression.replace(col, f"df['{col}']")
953
+
954
+ df[name] = eval(expression)
955
+ except Exception as e:
956
+ hint = "maybe you misspelled a column name"
957
+ report(
958
+ LogLevel.ERROR,
959
+ f"failed to evaluate derived metric '{name}'",
960
+ hint=hint,
961
+ )
962
+ continue
963
+
964
+ args.y = new_ys
965
+
966
+
998
967
  def launch(args):
999
- ctx = {"args": args, "alive": True}
968
+ ctx = {"args": args}
1000
969
  validate_files(ctx)
1001
970
  locate_files(ctx)
1002
- sync_files(ctx)
1003
971
  generate_dataframe(ctx)
972
+ generate_derived_metrics(ctx)
1004
973
  validate_args(ctx)
1005
974
  rescale(ctx)
1006
975
  generate_space(ctx)
yuclid/run.py CHANGED
@@ -110,29 +110,36 @@ def load_json(f):
110
110
 
111
111
 
112
112
  def aggregate_input_data(settings):
113
- data = None
113
+ data = {
114
+ "env": {},
115
+ "setup": {"global": [], "point": []},
116
+ "space": {},
117
+ "trials": [],
118
+ "metrics": [],
119
+ "presets": {},
120
+ "order": [],
121
+ }
114
122
 
115
123
  for file in settings["inputs"]:
116
124
  with open(file, "r") as f:
117
125
  current = normalize_data(load_json(f))
118
- if data is None:
119
- data = current
120
- continue
121
126
  for key, val in current.items():
122
- if isinstance(data[key], list):
127
+ if key in ["env", "space", "presets"]:
128
+ data[key].update(val)
129
+ elif key in ["trials", "metrics", "order"]:
123
130
  data[key].extend(val)
124
- elif isinstance(data[key], dict):
125
- if key == "space":
126
- for subkey, subval in val.items():
127
- if data[key].get(subkey) is None:
128
- data[key][subkey] = subval
129
- else:
130
- data[key].setdefault(subkey, []).extend(subval)
131
- else:
132
- data[key].update(val)
133
-
134
- order = data.get("order", []) + current.get("order", [])
135
- data["order"] = remove_duplicates(order)
131
+ elif key == "setup":
132
+ for subkey, subval in val.items():
133
+ if data[key].get(subkey) is None:
134
+ # undefined dimensions are overridden
135
+ data[key][subkey] = subval
136
+ else:
137
+ data[key].setdefault(subkey, []).extend(subval)
138
+ elif key == "setup":
139
+ data[key]["setup"]["global"] += val["setup"]["global"]
140
+ data[key]["setup"]["point"] += val["setup"]["point"]
141
+
142
+ data["order"] = remove_duplicates(data["order"])
136
143
 
137
144
  if len(data["trials"]) == 0:
138
145
  report(LogLevel.FATAL, "no valid trials found")
@@ -735,10 +742,12 @@ def run_point_trials(settings, data, execution, f, i, point):
735
742
  )
736
743
 
737
744
  i_padded = str(i).zfill(len(str(execution["subspace_size"])))
738
-
745
+
739
746
  for j, trial in enumerate(compatible_trials):
740
747
  point_id = os.path.join(
741
- settings["temp_dir"], settings["now"], f"{i_padded}." + point_to_string(point) + f"_trial{j}"
748
+ settings["temp_dir"],
749
+ settings["now"],
750
+ f"{i_padded}." + point_to_string(point) + f"_trial{j}",
742
751
  )
743
752
 
744
753
  command = substitute_global_yvars(trial["command"], execution["subspace"])
@@ -780,7 +789,8 @@ def run_point_trials(settings, data, execution, f, i, point):
780
789
  capture_output=True,
781
790
  env=execution["env"],
782
791
  )
783
- if command_output.returncode != 0:
792
+
793
+ def complain():
784
794
  hint = "check the following files for more details:\n"
785
795
  hint += f"{point_id}.out\n{point_id}.err\n{point_id}.tmp"
786
796
  report(
@@ -791,6 +801,9 @@ def run_point_trials(settings, data, execution, f, i, point):
791
801
  ),
792
802
  hint=hint,
793
803
  )
804
+
805
+ if command_output.returncode != 0:
806
+ complain()
794
807
  else:
795
808
  output_lines = command_output.stdout.strip().split("\n")
796
809
 
@@ -798,7 +811,10 @@ def run_point_trials(settings, data, execution, f, i, point):
798
811
  try:
799
812
  return int(x)
800
813
  except ValueError:
801
- return float(x)
814
+ try:
815
+ return float(x)
816
+ except ValueError:
817
+ complain()
802
818
 
803
819
  collected_metrics[metric["name"]] = [
804
820
  int_or_float(line) for line in output_lines
@@ -871,6 +887,20 @@ def validate_execution(execution, data):
871
887
  point_to_string(point),
872
888
  hint="try relaxing your trial conditions or adding more trials.",
873
889
  )
890
+ if len(execution["metrics"] or []) > 0:
891
+ compatible_metric_names = {m["name"] for m in compatible_metrics}
892
+ incompatible = [
893
+ m for m in execution["metrics"] if m not in compatible_metric_names
894
+ ]
895
+ if len(incompatible) > 0:
896
+ report(
897
+ LogLevel.ERROR,
898
+ "some metrics are not compatible with {}".format(
899
+ point_to_string(point)
900
+ ),
901
+ ", ".join(incompatible),
902
+ hint="try relaxing your metric conditions or adding more metrics.",
903
+ )
874
904
 
875
905
 
876
906
  def get_compatible_trials_and_metrics(data, point, execution):
@@ -911,25 +941,12 @@ def run_subspace_trials(settings, data, execution):
911
941
  compatible_trials, compatible_metrics = (
912
942
  get_compatible_trials_and_metrics(data, point, execution)
913
943
  )
914
- if len(compatible_trials) == 0:
915
- report(
916
- LogLevel.ERROR,
917
- point_to_string(point),
918
- "no compatible trials found",
919
- )
920
- elif len(compatible_metrics) == 0:
921
- report(
922
- LogLevel.ERROR,
923
- point_to_string(point),
924
- "no compatible metrics found",
925
- )
926
- else:
927
- report(
928
- LogLevel.INFO,
929
- get_progress(i, execution["subspace_size"]),
930
- "dry run",
931
- point_to_string(point),
932
- )
944
+ report(
945
+ LogLevel.INFO,
946
+ get_progress(i, execution["subspace_size"]),
947
+ "dry run",
948
+ point_to_string(point),
949
+ )
933
950
  else:
934
951
  output_dir = os.path.dirname(settings["output"])
935
952
  if output_dir and not os.path.exists(output_dir):
@@ -1122,7 +1139,7 @@ def normalize_point_setup(point_setup, space):
1122
1139
  report(LogLevel.FATAL, "point setup must be a string or a list")
1123
1140
 
1124
1141
  # check validity of 'on' fields
1125
- for item in point_setup:
1142
+ for item in normalized_items:
1126
1143
  if not isinstance(item["on"], (list, type(None))):
1127
1144
  report(LogLevel.FATAL, "point setup 'on' must be a list or None")
1128
1145
  for dim in item["on"]:
@@ -1139,7 +1156,7 @@ def normalize_point_setup(point_setup, space):
1139
1156
  )
1140
1157
 
1141
1158
  # check validity of 'parallel' fields
1142
- for item in point_setup:
1159
+ for item in normalized_items:
1143
1160
  parallel = item["parallel"]
1144
1161
  if not isinstance(parallel, (bool, list)):
1145
1162
  report(LogLevel.FATAL, "point setup 'parallel' must be a boolean or a list")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: yuclid
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: Run experiments and interactively plot results across combinations of user-specified dimensions
5
5
  Author-email: Federico Sossai <federico.sossai@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/fsossai/yuclid
@@ -0,0 +1,11 @@
1
+ yuclid/__init__.py,sha256=JMD28FXYHc_TM03visyUSd3UA9FZAaJMRStnfZoq50Y,21
2
+ yuclid/cli.py,sha256=YZzxJty5wlUhCOEELvEcJeQb_lQ1Qc89RG4_s5IyKWU,6224
3
+ yuclid/log.py,sha256=GR_FVfNroumuonKguAPd6H1rKjxJKRc8tAS2sVNTbzE,1655
4
+ yuclid/plot.py,sha256=R6IXw6hHuXYFx1MjTKLCIqBfdNORStVEoDidAr-jEuE,29697
5
+ yuclid/run.py,sha256=s1BGCmYckO2s5TSoKNCb4llZpUouxyooxtVlbqsQNTs,44641
6
+ yuclid/spread.py,sha256=4Ci3nsu8n_dhG-AK2IWHKRElQ8oaGdw14LrgNu79biM,4938
7
+ yuclid-0.1.4.dist-info/METADATA,sha256=Qm5Sw-K-L1VGSEJVBwE_C6Ubjp6JNa55SiH7snsWdPM,673
8
+ yuclid-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ yuclid-0.1.4.dist-info/entry_points.txt,sha256=2AvTtyt5iBnjr6HnjqH_3PeSoq9UzIbT92qivmEbOYA,43
10
+ yuclid-0.1.4.dist-info/top_level.txt,sha256=cL5mb4h_4etwTsqhPvSnoVBXImIzPFGd3rINV1nEjPo,7
11
+ yuclid-0.1.4.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- yuclid/__init__.py,sha256=Pru0BlFBASFCFo7McHdohtKkUtgMPDwbGfyUZlE2_Vw,21
2
- yuclid/cli.py,sha256=l5WUY6Q6nwg7WRrAAPf5uaspG9zrEPE9BA9v3eYI_vE,6410
3
- yuclid/log.py,sha256=GR_FVfNroumuonKguAPd6H1rKjxJKRc8tAS2sVNTbzE,1655
4
- yuclid/plot.py,sha256=RV_bgkFDpOGxw7ankW7QsnBsyrholBtYKKj9jUtBAyM,30836
5
- yuclid/run.py,sha256=NVvcmLiQkzypgqpRbmI4lttgKSy05hYJKVBEp3fxnpA,44106
6
- yuclid/spread.py,sha256=4Ci3nsu8n_dhG-AK2IWHKRElQ8oaGdw14LrgNu79biM,4938
7
- yuclid-0.1.2.dist-info/METADATA,sha256=lCvP9NyDUhHzKnu4e8jCHVTFyAkgps_hH27EuwCL0gk,673
8
- yuclid-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
- yuclid-0.1.2.dist-info/entry_points.txt,sha256=2AvTtyt5iBnjr6HnjqH_3PeSoq9UzIbT92qivmEbOYA,43
10
- yuclid-0.1.2.dist-info/top_level.txt,sha256=cL5mb4h_4etwTsqhPvSnoVBXImIzPFGd3rINV1nEjPo,7
11
- yuclid-0.1.2.dist-info/RECORD,,
File without changes