subset2evaluate 1.0.20__tar.gz → 1.0.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/PKG-INFO +1 -1
  2. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/pyproject.toml +1 -1
  3. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate/utils.py +19 -9
  4. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate.egg-info/PKG-INFO +1 -1
  5. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/README.md +0 -0
  6. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/setup.cfg +0 -0
  7. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate/__init__.py +0 -0
  8. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate/evaluate.py +0 -0
  9. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate/methods.py +0 -0
  10. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate/methods_old.py +0 -0
  11. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate/reference_info.py +0 -0
  12. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate/select_subset.py +0 -0
  13. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate/test.py +0 -0
  14. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate.egg-info/SOURCES.txt +0 -0
  15. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate.egg-info/dependency_links.txt +0 -0
  16. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate.egg-info/entry_points.txt +0 -0
  17. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate.egg-info/requires.txt +0 -0
  18. {subset2evaluate-1.0.20 → subset2evaluate-1.0.22}/subset2evaluate.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: subset2evaluate
3
- Version: 1.0.20
3
+ Version: 1.0.22
4
4
  Summary: Find informative examples to efficiently (human-)evaluate NLG models.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: MIT
@@ -11,7 +11,7 @@ packages = ["subset2evaluate"]
11
11
 
12
12
  [project]
13
13
  name = "subset2evaluate"
14
- version = "1.0.20"
14
+ version = "1.0.22"
15
15
  description = "Find informative examples to efficiently (human-)evaluate NLG models."
16
16
  license = {text = "MIT"}
17
17
  readme = "README.md"
@@ -16,6 +16,8 @@ def _data_minmax_normalize(data):
16
16
  for line in data:
17
17
  for met_all in line["scores"].values():
18
18
  for met_k, met_v in met_all.items():
19
+ if met_v is None:
20
+ continue
19
21
  data_flat[met_k].append(met_v)
20
22
 
21
23
  # normalize
@@ -24,10 +26,15 @@ def _data_minmax_normalize(data):
24
26
  for line in data:
25
27
  for model, met_all in line["scores"].items():
26
28
  for met_k, met_v in met_all.items():
27
- # (x-min)/(max-min) normalize
28
- line["scores"][model][met_k] = (met_v - data_flat[met_k][0]) / (
29
- data_flat[met_k][1] - data_flat[met_k][0]
30
- )
29
+ if met_v is None:
30
+ continue
31
+ if data_flat[met_k][1] - data_flat[met_k][0] == 0:
32
+ line["scores"][model][met_k] = 0
33
+ else:
34
+ # (x-min)/(max-min) normalize
35
+ line["scores"][model][met_k] = (met_v - data_flat[met_k][0]) / (
36
+ data_flat[met_k][1] - data_flat[met_k][0]
37
+ )
31
38
 
32
39
 
33
40
  def confidence_interval(data, confidence=0.95):
@@ -844,18 +851,21 @@ def load_data_wmt( # noqa: C901
844
851
  humscores = [
845
852
  model_v["human"] for line in data for model_v in line["scores"].values()
846
853
  ]
847
- if all(x <= 0 for x in humscores):
854
+ if all(x <= 0 for x in humscores if x is not None):
848
855
  for line in data:
849
856
  for model_v in line["scores"].values():
850
- model_v["human"] = max(0, min(100, 100 + 4 * model_v["human"]))
851
- elif all(x >= 0 and x <= 1 for x in humscores):
857
+ if model_v["human"] is not None:
858
+ model_v["human"] = max(0, min(100, 100 + 4 * model_v["human"]))
859
+ elif all(x >= 0 and x <= 1 for x in humscores if x is not None):
852
860
  for line in data:
853
861
  for model_v in line["scores"].values():
854
- model_v["human"] = max(0, min(100, model_v["human"] * 100))
862
+ if model_v["human"] is not None:
863
+ model_v["human"] = max(0, min(100, model_v["human"] * 100))
855
864
  else:
856
865
  for line in data:
857
866
  for model_v in line["scores"].values():
858
- model_v["human"] = max(0, min(100, model_v["human"]))
867
+ if model_v["human"] is not None:
868
+ model_v["human"] = max(0, min(100, model_v["human"]))
859
869
 
860
870
  # this is min-max normalization
861
871
  if normalize and not binarize:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: subset2evaluate
3
- Version: 1.0.20
3
+ Version: 1.0.22
4
4
  Summary: Find informative examples to efficiently (human-)evaluate NLG models.
5
5
  Author-email: Vilém Zouhar <vilem.zouhar@gmail.com>
6
6
  License: MIT