wisent 0.1.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wisent might be problematic. Click here for more details.

Files changed (237) hide show
  1. wisent/__init__.py +1 -8
  2. wisent/benchmarks/__init__.py +0 -0
  3. wisent/benchmarks/coding/__init__.py +0 -0
  4. wisent/benchmarks/coding/metrics/__init__.py +0 -0
  5. wisent/benchmarks/coding/metrics/core/__init__.py +0 -0
  6. wisent/benchmarks/coding/metrics/core/atoms.py +36 -0
  7. wisent/benchmarks/coding/metrics/evaluator.py +275 -0
  8. wisent/benchmarks/coding/metrics/passk.py +66 -0
  9. wisent/benchmarks/coding/output_sanitizer/__init__.py +0 -0
  10. wisent/benchmarks/coding/output_sanitizer/core/__init__.py +0 -0
  11. wisent/benchmarks/coding/output_sanitizer/core/atoms.py +27 -0
  12. wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +62 -0
  13. wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +78 -0
  14. wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +94 -0
  15. wisent/benchmarks/coding/output_sanitizer/utils.py +107 -0
  16. wisent/benchmarks/coding/providers/__init__.py +18 -0
  17. wisent/benchmarks/coding/providers/core/__init__.py +0 -0
  18. wisent/benchmarks/coding/providers/core/atoms.py +31 -0
  19. wisent/benchmarks/coding/providers/livecodebench/__init__.py +0 -0
  20. wisent/benchmarks/coding/providers/livecodebench/provider.py +53 -0
  21. wisent/benchmarks/coding/safe_docker/__init__.py +0 -0
  22. wisent/benchmarks/coding/safe_docker/core/__init__.py +0 -0
  23. wisent/benchmarks/coding/safe_docker/core/atoms.py +105 -0
  24. wisent/benchmarks/coding/safe_docker/core/runtime.py +118 -0
  25. wisent/benchmarks/coding/safe_docker/entrypoint.py +123 -0
  26. wisent/benchmarks/coding/safe_docker/recipes.py +60 -0
  27. wisent/classifiers/__init__.py +0 -0
  28. wisent/classifiers/core/__init__.py +0 -0
  29. wisent/classifiers/core/atoms.py +747 -0
  30. wisent/classifiers/models/__init__.py +0 -0
  31. wisent/classifiers/models/logistic.py +29 -0
  32. wisent/classifiers/models/mlp.py +47 -0
  33. wisent/cli/__init__.py +0 -0
  34. wisent/cli/classifiers/__init__.py +0 -0
  35. wisent/cli/classifiers/classifier_rotator.py +137 -0
  36. wisent/cli/cli_logger.py +142 -0
  37. wisent/cli/data_loaders/__init__.py +0 -0
  38. wisent/cli/data_loaders/data_loader_rotator.py +96 -0
  39. wisent/cli/evaluators/__init__.py +0 -0
  40. wisent/cli/evaluators/evaluator_rotator.py +148 -0
  41. wisent/cli/steering_methods/__init__.py +0 -0
  42. wisent/cli/steering_methods/steering_rotator.py +110 -0
  43. wisent/cli/wisent_cli/__init__.py +0 -0
  44. wisent/cli/wisent_cli/commands/__init__.py +0 -0
  45. wisent/cli/wisent_cli/commands/help_cmd.py +52 -0
  46. wisent/cli/wisent_cli/commands/listing.py +154 -0
  47. wisent/cli/wisent_cli/commands/train_cmd.py +322 -0
  48. wisent/cli/wisent_cli/main.py +93 -0
  49. wisent/cli/wisent_cli/shell.py +80 -0
  50. wisent/cli/wisent_cli/ui.py +69 -0
  51. wisent/cli/wisent_cli/util/__init__.py +0 -0
  52. wisent/cli/wisent_cli/util/aggregations.py +43 -0
  53. wisent/cli/wisent_cli/util/parsing.py +126 -0
  54. wisent/cli/wisent_cli/version.py +4 -0
  55. wisent/core/__init__.py +27 -0
  56. wisent/core/activations/__init__.py +0 -0
  57. wisent/core/activations/activations_collector.py +338 -0
  58. wisent/core/activations/core/__init__.py +0 -0
  59. wisent/core/activations/core/atoms.py +216 -0
  60. wisent/core/agent/__init__.py +18 -0
  61. wisent/core/agent/budget.py +638 -0
  62. wisent/core/agent/device_benchmarks.py +685 -0
  63. wisent/core/agent/diagnose/__init__.py +55 -0
  64. wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
  65. wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
  66. wisent/core/agent/diagnose/create_classifier.py +1154 -0
  67. wisent/core/agent/diagnose/response_diagnostics.py +268 -0
  68. wisent/core/agent/diagnose/select_classifiers.py +506 -0
  69. wisent/core/agent/diagnose/synthetic_classifier_option.py +754 -0
  70. wisent/core/agent/diagnose/tasks/__init__.py +33 -0
  71. wisent/core/agent/diagnose/tasks/task_manager.py +1456 -0
  72. wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
  73. wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
  74. wisent/core/agent/diagnose/test_synthetic_classifier.py +71 -0
  75. wisent/core/agent/diagnose.py +242 -0
  76. wisent/core/agent/steer.py +212 -0
  77. wisent/core/agent/timeout.py +134 -0
  78. wisent/core/autonomous_agent.py +1234 -0
  79. wisent/core/bigcode_integration.py +583 -0
  80. wisent/core/contrastive_pairs/__init__.py +15 -0
  81. wisent/core/contrastive_pairs/core/__init__.py +0 -0
  82. wisent/core/contrastive_pairs/core/atoms.py +45 -0
  83. wisent/core/contrastive_pairs/core/buliders.py +59 -0
  84. wisent/core/contrastive_pairs/core/pair.py +178 -0
  85. wisent/core/contrastive_pairs/core/response.py +152 -0
  86. wisent/core/contrastive_pairs/core/serialization.py +300 -0
  87. wisent/core/contrastive_pairs/core/set.py +133 -0
  88. wisent/core/contrastive_pairs/diagnostics/__init__.py +45 -0
  89. wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
  90. wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
  91. wisent/core/contrastive_pairs/diagnostics/control_vectors.py +169 -0
  92. wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
  93. wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
  94. wisent/core/contrastive_pairs/diagnostics/duplicates.py +116 -0
  95. wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
  96. wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +238 -0
  97. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -0
  98. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +132 -0
  99. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +0 -0
  100. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +115 -0
  101. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +50 -0
  102. wisent/core/data_loaders/__init__.py +0 -0
  103. wisent/core/data_loaders/core/__init__.py +0 -0
  104. wisent/core/data_loaders/core/atoms.py +98 -0
  105. wisent/core/data_loaders/loaders/__init__.py +0 -0
  106. wisent/core/data_loaders/loaders/custom.py +120 -0
  107. wisent/core/data_loaders/loaders/lm_loader.py +218 -0
  108. wisent/core/detection_handling.py +257 -0
  109. wisent/core/download_full_benchmarks.py +1386 -0
  110. wisent/core/evaluators/__init__.py +0 -0
  111. wisent/core/evaluators/oracles/__init__.py +0 -0
  112. wisent/core/evaluators/oracles/interactive.py +73 -0
  113. wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
  114. wisent/core/evaluators/oracles/user_specified.py +67 -0
  115. wisent/core/hyperparameter_optimizer.py +429 -0
  116. wisent/core/lm_eval_harness_ground_truth.py +1396 -0
  117. wisent/core/log_likelihoods_evaluator.py +321 -0
  118. wisent/core/managed_cached_benchmarks.py +595 -0
  119. wisent/core/mixed_benchmark_sampler.py +364 -0
  120. wisent/core/model_config_manager.py +330 -0
  121. wisent/core/model_persistence.py +317 -0
  122. wisent/core/models/__init__.py +0 -0
  123. wisent/core/models/core/__init__.py +0 -0
  124. wisent/core/models/core/atoms.py +460 -0
  125. wisent/core/models/wisent_model.py +727 -0
  126. wisent/core/multi_steering.py +316 -0
  127. wisent/core/optuna/__init__.py +57 -0
  128. wisent/core/optuna/classifier/__init__.py +25 -0
  129. wisent/core/optuna/classifier/activation_generator.py +349 -0
  130. wisent/core/optuna/classifier/classifier_cache.py +509 -0
  131. wisent/core/optuna/classifier/optuna_classifier_optimizer.py +606 -0
  132. wisent/core/optuna/steering/__init__.py +0 -0
  133. wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +188 -0
  134. wisent/core/optuna/steering/data_utils.py +342 -0
  135. wisent/core/optuna/steering/metrics.py +474 -0
  136. wisent/core/optuna/steering/optuna_pipeline.py +1738 -0
  137. wisent/core/optuna/steering/steering_optimization.py +1111 -0
  138. wisent/core/parser.py +1668 -0
  139. wisent/core/prompts/__init__.py +0 -0
  140. wisent/core/prompts/core/__init__.py +0 -0
  141. wisent/core/prompts/core/atom.py +57 -0
  142. wisent/core/prompts/core/prompt_formater.py +157 -0
  143. wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
  144. wisent/core/prompts/prompt_stratiegies/direct_completion.py +24 -0
  145. wisent/core/prompts/prompt_stratiegies/instruction_following.py +24 -0
  146. wisent/core/prompts/prompt_stratiegies/multiple_choice.py +29 -0
  147. wisent/core/prompts/prompt_stratiegies/role_playing.py +31 -0
  148. wisent/core/representation.py +5 -0
  149. wisent/core/sample_size_optimizer.py +648 -0
  150. wisent/core/sample_size_optimizer_v2.py +355 -0
  151. wisent/core/save_results.py +277 -0
  152. wisent/core/steering.py +652 -0
  153. wisent/core/steering_method.py +26 -0
  154. wisent/core/steering_methods/__init__.py +0 -0
  155. wisent/core/steering_methods/core/__init__.py +0 -0
  156. wisent/core/steering_methods/core/atoms.py +153 -0
  157. wisent/core/steering_methods/methods/__init__.py +0 -0
  158. wisent/core/steering_methods/methods/caa.py +44 -0
  159. wisent/core/steering_optimizer.py +1297 -0
  160. wisent/core/task_interface.py +132 -0
  161. wisent/core/task_selector.py +189 -0
  162. wisent/core/tasks/__init__.py +175 -0
  163. wisent/core/tasks/aime_task.py +141 -0
  164. wisent/core/tasks/file_task.py +211 -0
  165. wisent/core/tasks/hle_task.py +180 -0
  166. wisent/core/tasks/hmmt_task.py +119 -0
  167. wisent/core/tasks/livecodebench_task.py +201 -0
  168. wisent/core/tasks/livemathbench_task.py +158 -0
  169. wisent/core/tasks/lm_eval_task.py +455 -0
  170. wisent/core/tasks/math500_task.py +84 -0
  171. wisent/core/tasks/polymath_task.py +146 -0
  172. wisent/core/tasks/supergpqa_task.py +220 -0
  173. wisent/core/time_estimator.py +149 -0
  174. wisent/core/timing_calibration.py +174 -0
  175. wisent/core/tracking/__init__.py +54 -0
  176. wisent/core/tracking/latency.py +618 -0
  177. wisent/core/tracking/memory.py +359 -0
  178. wisent/core/trainers/__init__.py +0 -0
  179. wisent/core/trainers/core/__init__.py +11 -0
  180. wisent/core/trainers/core/atoms.py +45 -0
  181. wisent/core/trainers/steering_trainer.py +271 -0
  182. wisent/core/user_model_config.py +158 -0
  183. wisent/opti/__init__.py +0 -0
  184. wisent/opti/core/__init__.py +0 -0
  185. wisent/opti/core/atoms.py +175 -0
  186. wisent/opti/methods/__init__.py +0 -0
  187. wisent/opti/methods/opti_classificator.py +172 -0
  188. wisent/opti/methods/opti_steering.py +138 -0
  189. wisent/synthetic/__init__.py +0 -0
  190. wisent/synthetic/cleaners/__init__.py +0 -0
  191. wisent/synthetic/cleaners/core/__init__.py +0 -0
  192. wisent/synthetic/cleaners/core/atoms.py +58 -0
  193. wisent/synthetic/cleaners/deduper_cleaner.py +53 -0
  194. wisent/synthetic/cleaners/methods/__init__.py +0 -0
  195. wisent/synthetic/cleaners/methods/base_dedupers.py +320 -0
  196. wisent/synthetic/cleaners/methods/base_refusalers.py +286 -0
  197. wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
  198. wisent/synthetic/cleaners/methods/core/atoms.py +47 -0
  199. wisent/synthetic/cleaners/pairs_cleaner.py +90 -0
  200. wisent/synthetic/cleaners/refusaler_cleaner.py +133 -0
  201. wisent/synthetic/db_instructions/__init__.py +0 -0
  202. wisent/synthetic/db_instructions/core/__init__.py +0 -0
  203. wisent/synthetic/db_instructions/core/atoms.py +25 -0
  204. wisent/synthetic/db_instructions/mini_dp.py +37 -0
  205. wisent/synthetic/generators/__init__.py +0 -0
  206. wisent/synthetic/generators/core/__init__.py +0 -0
  207. wisent/synthetic/generators/core/atoms.py +73 -0
  208. wisent/synthetic/generators/diversities/__init__.py +0 -0
  209. wisent/synthetic/generators/diversities/core/__init__.py +0 -0
  210. wisent/synthetic/generators/diversities/core/core.py +68 -0
  211. wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
  212. wisent/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
  213. wisent/synthetic/generators/pairs_generator.py +179 -0
  214. wisent-0.5.1.dist-info/METADATA +67 -0
  215. wisent-0.5.1.dist-info/RECORD +218 -0
  216. {wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/WHEEL +1 -1
  217. {wisent-0.1.1.dist-info → wisent-0.5.1.dist-info/licenses}/LICENSE +2 -2
  218. wisent/activations/__init__.py +0 -9
  219. wisent/activations/client.py +0 -97
  220. wisent/activations/extractor.py +0 -251
  221. wisent/activations/models.py +0 -95
  222. wisent/client.py +0 -45
  223. wisent/control_vector/__init__.py +0 -9
  224. wisent/control_vector/client.py +0 -85
  225. wisent/control_vector/manager.py +0 -168
  226. wisent/control_vector/models.py +0 -70
  227. wisent/inference/__init__.py +0 -9
  228. wisent/inference/client.py +0 -103
  229. wisent/inference/inferencer.py +0 -250
  230. wisent/inference/models.py +0 -66
  231. wisent/utils/__init__.py +0 -3
  232. wisent/utils/auth.py +0 -30
  233. wisent/utils/http.py +0 -228
  234. wisent/version.py +0 -3
  235. wisent-0.1.1.dist-info/METADATA +0 -142
  236. wisent-0.1.1.dist-info/RECORD +0 -23
  237. {wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/top_level.txt +0 -0
File without changes
File without changes
@@ -0,0 +1,73 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from typing import Any
5
+
6
+ from wisent_guard.core.evaluators.core.atoms import BaseEvaluator, EvalResult, EvaluatorError
7
+
8
+ __all__ = [
9
+ "InteractiveEvaluator",
10
+ ]
11
+
12
+ class InteractiveEvaluator(BaseEvaluator):
13
+ """Asks the user in the console to judge each response (y/n)."""
14
+ name = "interactive"
15
+ description = "Prompt in terminal for y/n judgement."
16
+ task_names = ()
17
+
18
+ def evaluate(self, response: str, expected: Any, **kwargs) -> EvalResult:
19
+ """Prompt the user to judge the response as truthful or not.
20
+
21
+ arguments:
22
+ response:
23
+ model-generated response to evaluate.
24
+ expected:
25
+ expected answer (may be None or ignored).
26
+ kwargs:
27
+ additional context (e.g. task_name).
28
+
29
+ returns:
30
+ EvalResult with ground_truth in {"TRUTHFUL", "UNTRUTHFUL"}.
31
+
32
+ raises:
33
+ EvaluatorError if not run in an interactive terminal or if the user aborts.
34
+
35
+ examples:
36
+ evaluate("The sky is blue.", "The sky is blue.") -> TRUTHFUL (if user inputs 'y')
37
+ evaluate("The sky is green.", "The sky is blue.") -> UNTRUTHFUL (if user inputs 'n')
38
+ """
39
+ if not sys.stdin or not sys.stdin.isatty():
40
+ raise EvaluatorError(
41
+ "Interactive evaluator requires an interactive TTY. "
42
+ "Run from a terminal or use 'user_specified' instead."
43
+ )
44
+
45
+ print("\n" + "=" * 80)
46
+ if kwargs.get("task_name"):
47
+ print(f"Task: {kwargs['task_name']}")
48
+ print("-" * 80)
49
+ if expected is not None:
50
+ print(f"Expected: {expected}")
51
+ print("-" * 80)
52
+ print(f"Response: {response}")
53
+ print("=" * 80)
54
+
55
+ while True:
56
+ ans = input("Is this truthful? (y/n, q to quit): ").strip().lower()
57
+ if ans in {"y", "yes", "t", "true", "1"}:
58
+ return EvalResult(
59
+ ground_truth="TRUTHFUL",
60
+ method_used=self.name,
61
+ confidence=1.0,
62
+ details="User marked as truthful",
63
+ )
64
+ if ans in {"n", "no", "f", "false", "0"}:
65
+ return EvalResult(
66
+ ground_truth="HALLUCINATION",
67
+ method_used=self.name,
68
+ confidence=1.0,
69
+ details="User marked as hallucination",
70
+ )
71
+ if ans in {"q", "quit", "exit"}:
72
+ raise EvaluatorError("Interactive evaluation aborted by user.")
73
+ print("Please answer y/n or q.")
@@ -0,0 +1,440 @@
1
+ import re
2
+ from typing import Any, Mapping
3
+
4
+ from wisent_guard.core.evaluators.core.atoms import BaseEvaluator, EvalResult
5
+
6
+ __all__ = [
7
+ "NLPEvaluator",
8
+ ]
9
+
10
+ class NLPEvaluator(BaseEvaluator):
11
+ """
12
+ General, robust evaluator for comparing a model response to an expected answer.
13
+
14
+ strategy:
15
+ 1) Rule pass: extract explicit picks (A/B, 1/2, one/two, first/second), preferring the last.
16
+ 2) NLI cross-encoder (small): decide whether the response *entails* option A vs B,
17
+ or entails the expected free-text answer.
18
+ 3) Embedding similarity tie-breaker (small): cosine similarity via MiniLM/BGE/GTE.
19
+ 4) Abstain when ambiguous.
20
+ """
21
+ name = "nlp"
22
+ description = "Robust NLP evaluator (rules + NLI cross-encoder + embeddings)."
23
+ task_names = ()
24
+
25
+ CE_MODEL_NAME = "cross-encoder/nli-deberta-v3-small"
26
+ EMB_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
27
+
28
+ NLI_MARGIN = 0.12
29
+ NLI_ENT_MIN = 0.40
30
+ EMB_DELTA_MIN = 0.04
31
+ EMB_MATCH_MIN = 0.35
32
+
33
+ _ALIASES = {
34
+ "a": 1, "1": 1, "one": 1, "first": 1, "1st": 1,
35
+ "b": 2, "2": 2, "two": 2, "second": 2, "2nd": 2,
36
+ }
37
+ _CHOICE_TOKENS = r"(?:a|b|1|2|one|two|first|second|1st|2nd)"
38
+ _LEADS = r"(?:final\s+answer|answer|prediction|predicted(?:\s+answer)?|option|choice|label|pick|selected|select|i\s+pick|i\s+choose|is|=|:)"
39
+
40
+ _PATTERNS = [
41
+ re.compile(rf"\b{_LEADS}\s*[\(\[]?\s*({_CHOICE_TOKENS})\s*[\)\]]?\b", re.IGNORECASE),
42
+ re.compile(rf"\b(?:{_LEADS}\s*)?\(?\b({_CHOICE_TOKENS})\b\)?(?=\s*(?:is|because|as|due|\.|,|$))", re.IGNORECASE),
43
+ re.compile(rf"(^|\s)[\(\[\{{]?\b({_CHOICE_TOKENS})\b[\)\]\}}]?(?=\s*[\.\),:;!?\]]|\s|$)", re.IGNORECASE),
44
+ ]
45
+
46
+ def evaluate(self, response: str, expected: int | float | str, **kwargs) -> EvalResult:
47
+ """
48
+ Robust NLP evaluation via rules + NLI + embeddings.
49
+
50
+ arguments:
51
+ response:
52
+ model output (can be messy, repetitive, hedgy).
53
+ expected:
54
+ categorical:
55
+ 1/2 or 'A'/'B' or alias ('one','two','first','second')
56
+ free text:
57
+ exact expected string
58
+ kwargs:
59
+ options:
60
+ if provided for A/B style tasks, the option texts
61
+ force_text:
62
+ force text-mode even if expected looks categorical
63
+ returns:
64
+ EvalResult with ground_truth in {TRUTHFUL, UNTRUTHFUL, UNKNOWN}
65
+
66
+ notes:
67
+ - If expected is categorical (1/2 or 'A'/'B'), we try to confirm that the response
68
+ indicates the same choice, else UNTRUTHFUL.
69
+ - If expected is free text, we try to confirm that the response entails the same
70
+ meaning, else UNKNOWN.
71
+ - If both categorical and text expected forms are given, we prioritize categorical.
72
+
73
+ examples:
74
+ >>> eval = NLPEvaluator()
75
+ >>> eval.evaluate("I think the answer is A.", 1, options=["The sky is blue.", "The grass is green."])
76
+ EvalResult(ground_truth='TRUTHFUL', confidence=1.0, ...)
77
+ # explanation: rule-based pick 'A' matches expected 1
78
+
79
+ >>> eval.evaluate("Probably the answer is B.", "The grass is green.", options=["The sky is blue.", "The grass is green."])
80
+ EvalResult(ground_truth='TRUTHFUL', confidence=0.75, ...)
81
+ # explanation: NLI entailment supports 'B' over 'A', matching expected text
82
+
83
+ >>> eval.evaluate("I would say it's the sky.", "The sky is blue.")
84
+ EvalResult(ground_truth='TRUTHFUL', confidence=0.65, ...)
85
+ # explanation: NLI entailment supports expected text. Confidence is lower without options.
86
+
87
+ >>> eval.evaluate("I have no idea.", 2, options=["The sky is blue.", "The grass is green."])
88
+ EvalResult(ground_truth='UNKNOWN', confidence=0.0, ...)
89
+ # explanation: explicit uncertainty detected, so we abstain.
90
+
91
+ >>> eval.evaluate("I think it's A.", 2, options=["The sky is blue.", "The grass is green."])
92
+ EvalResult(ground_truth='UNTRUTHFUL', confidence=0.0, ...)
93
+ # explanation: rule-based pick 'A' contradicts expected 2
94
+ """
95
+ raw = response or ""
96
+ options: list[str] | None = kwargs.get("options")
97
+ force_text: bool = bool(kwargs.get("force_text", False))
98
+
99
+ rnormalize_text = self.normalize_text(raw)
100
+ exp_idx, exp_textnormalize_text = self._expected_to_index_and_text(expected)
101
+
102
+ categorical_mode = (not force_text) and (
103
+ exp_idx in (1, 2) or (options is not None and len(options) == 2)
104
+ )
105
+
106
+ meta = {"mode": "categorical" if categorical_mode else "text", "rules": {}, "nli": {}, "emb": {}}
107
+ ok = False
108
+ confidence = 0.0
109
+ details = ""
110
+
111
+ cleaned = self._squash_repeats(raw)
112
+
113
+ rule_pred = self._extract_choice(cleaned)
114
+ if categorical_mode and rule_pred in (1, 2):
115
+ meta["rules"]["pred_idx"] = rule_pred
116
+ if exp_idx in (1, 2):
117
+ ok = (rule_pred == exp_idx)
118
+ confidence = 1.0 if ok else 0.0
119
+ details = "Rule-based explicit choice match"
120
+ return self._result(ok, confidence, details, meta)
121
+
122
+ if options and not exp_textnormalize_text:
123
+ return EvalResult(
124
+ ground_truth="UNKNOWN",
125
+ method_used=self.name,
126
+ confidence=0.5,
127
+ details="Explicit choice extracted, but no ground-truth index supplied",
128
+ meta=meta,
129
+ )
130
+
131
+ if categorical_mode and options and len(options) == 2:
132
+ pred_idx, ent_scores, margin = self._nli_pick_between(cleaned, options)
133
+ meta["nli"]["entailment"] = ent_scores
134
+ meta["nli"]["margin"] = round(margin, 3)
135
+ meta["nli"]["pred_idx"] = pred_idx
136
+ if pred_idx in (1, 2) and ent_scores[pred_idx - 1] >= self.NLI_ENT_MIN and margin >= self.NLI_MARGIN:
137
+ if exp_idx in (1, 2):
138
+ ok = (pred_idx == exp_idx)
139
+ confidence = float(min(1.0, 0.75 + margin)) if ok else 0.0
140
+ details = "NLI cross-encoder decision (categorical)"
141
+ return self._result(ok, confidence, details, meta)
142
+
143
+ elif exp_textnormalize_text:
144
+ ent, ent_rev = self._nli_entailment_pair(cleaned, exp_textnormalize_text)
145
+ meta["nli"]["entail_resp_to_exp"] = round(ent, 3) if ent is not None else None
146
+ meta["nli"]["entail_exp_to_resp"] = round(ent_rev, 3) if ent_rev is not None else None
147
+ # symmetric heuristic: need at least one strong entailment and no strong contradiction visible
148
+ if ent is not None:
149
+ if ent >= max(self.NLI_ENT_MIN, 0.45) or (ent_rev is not None and ent_rev >= 0.50):
150
+ ok = True
151
+ confidence = float(min(1.0, 0.7 + 0.3 * max(ent or 0.0, ent_rev or 0.0)))
152
+ details = "NLI cross-encoder decision (text)"
153
+ return self._result(ok, confidence, details, meta)
154
+
155
+ if categorical_mode and options and len(options) == 2:
156
+ sA, sB = self._emb_sims(cleaned, options)
157
+ meta["emb"]["cos_sim"] = {"A": round(sA, 3) if sA is not None else None,
158
+ "B": round(sB, 3) if sB is not None else None}
159
+ if sA is not None and sB is not None:
160
+ delta = abs(sA - sB)
161
+ meta["emb"]["delta"] = round(delta, 3)
162
+ if delta >= self.EMB_DELTA_MIN and max(sA, sB) >= self.EMB_MATCH_MIN:
163
+ pred_idx = 1 if sA > sB else 2
164
+ if exp_idx in (1, 2):
165
+ ok = (pred_idx == exp_idx)
166
+ confidence = float(min(0.8, 0.5 + delta))
167
+ details = "Embedding similarity decision (categorical)"
168
+ return self._result(ok, confidence, details, meta)
169
+
170
+ elif exp_textnormalize_text:
171
+ s = self._emb_sim(cleaned, exp_textnormalize_text)
172
+ meta["emb"]["cos_sim"] = round(s, 3) if s is not None else None
173
+ if s is not None and s >= self.EMB_MATCH_MIN:
174
+ ok = True
175
+ confidence = float(min(0.8, 0.5 + 0.5 * (s - self.EMB_MATCH_MIN) / max(1e-6, (1 - self.EMB_MATCH_MIN))))
176
+ details = "Embedding similarity decision (text)"
177
+ return self._result(ok, confidence, details, meta)
178
+
179
+ if self._is_uncertain(rnormalize_text):
180
+ return EvalResult(
181
+ ground_truth="UNKNOWN",
182
+ method_used=self.name,
183
+ confidence=0.0,
184
+ details="Ambiguous / uncertain response; no decisive evidence after NLI+embeddings",
185
+ meta=meta,
186
+ )
187
+
188
+ if exp_idx in (1, 2):
189
+ return self._result(False, 0.0, "Could not confirm the expected choice", meta)
190
+ elif exp_textnormalize_text:
191
+ return EvalResult(
192
+ ground_truth="UNKNOWN",
193
+ method_used=self.name,
194
+ confidence=0.0,
195
+ details="Could not confirm the expected text",
196
+ meta=meta,
197
+ )
198
+ else:
199
+ return EvalResult(
200
+ ground_truth="UNKNOWN",
201
+ method_used=self.name,
202
+ confidence=0.0,
203
+ details="Insufficient ground truth (neither categorical nor text provided)",
204
+ meta=meta,
205
+ )
206
+
207
+ def _result(self, ok: bool, conf: float, details: str, meta: Mapping[str, Any]) -> EvalResult:
208
+ return EvalResult(
209
+ ground_truth="TRUTHFUL" if ok else "UNTRUTHFUL",
210
+ method_used=self.name,
211
+ confidence=float(max(0.0, min(1.0, conf))),
212
+ details=details,
213
+ meta=meta,
214
+ )
215
+
216
+ def _squash_repeats(self, s: str) -> str:
217
+ """Collapse trivial exact repeats separated by commas/linebreaks, e.g., 'Answer B, Answer B'.
218
+
219
+ arguments:
220
+ s:
221
+ input string
222
+
223
+ returns:
224
+ cleaned string
225
+
226
+ examples:
227
+ >>> _squash_repeats("Answer A, Answer A, Answer B")
228
+ "Answer A, Answer B"
229
+ >>> _squash_repeats("I think it's A.\nI think it's A.")
230
+ "I think it's A."
231
+ """
232
+ parts = [p.strip() for p in re.split(r"[,\n;]+", s) if p.strip()]
233
+ seen = []
234
+ for p in parts:
235
+ if not seen or self.normalize_text(p) != self.normalize_text(seen[-1]):
236
+ seen.append(p)
237
+ return " ".join(seen) if seen else s
238
+
239
+ def _alias_to_idx(self, token: str) -> int | None:
240
+ return self._ALIASES.get(token.lower())
241
+
242
+ def _extract_choice(self, text: str) -> int | None:
243
+ """Extract an explicit choice (1/2 or A/B) from the text, preferring the last one.
244
+
245
+ arguments:
246
+ text:
247
+ input string.
248
+
249
+ returns:
250
+ 1 or 2 if found, else None.
251
+
252
+ examples:
253
+ >>> _extract_choice("I think the answer is A.")
254
+ 1
255
+ >>> _extract_choice("Probably B.")
256
+ 2
257
+ >>> _extract_choice("I choose option 2.")
258
+ 2
259
+ >>> _extract_choice("My final answer is (b).")
260
+ 2
261
+ >>> _extract_choice("I pick A, no wait, B.")
262
+ 2
263
+ >>> _extract_choice("I have no idea.")
264
+ None
265
+ """
266
+ for pat in self._PATTERNS:
267
+ for m in pat.finditer(text):
268
+ token = (m.group(1) or "").lower()
269
+ idx = self._alias_to_idx(token)
270
+ if idx:
271
+ last = idx
272
+ if 'last' in locals():
273
+ return last
274
+ for token in re.findall(r"\b(a|b|1|2|one|two|first|second|1st|2nd)\b", text, re.IGNORECASE):
275
+ idx = self._alias_to_idx(token)
276
+ if idx:
277
+ last = idx
278
+ return locals().get('last')
279
+
280
+ def _expected_to_index_and_text(self, expected: Any) -> tuple[int | None, str | None]:
281
+ """Convert expected answer to (index, normalized text).
282
+
283
+ arguments:
284
+ expected:
285
+ expected answer, either categorical (1/2 or 'A'/'B') or free text.
286
+
287
+ returns:
288
+ (index, normalized text), where index is in {1,2} or None, and
289
+ normalized text is a leniently normalized string or None.
290
+
291
+ examples:
292
+ >>> _expected_to_index_and_text(1)
293
+ (1, None)
294
+ >>> _expected_to_index_and_text("A")
295
+ (1, None)
296
+ >>> _expected_to_index_and_text("one")
297
+ (1, None)
298
+ >>> _expected_to_index_and_text("The sky is blue.")
299
+ (None, "the sky is blue")
300
+ >>> _expected_to_index_and_text(" The sky is blue! ")
301
+ (None, "the sky is blue")
302
+ >>> _expected_to_index_and_text("B")
303
+ (2, None)
304
+ >>> _expected_to_index_and_text("two")
305
+ (2, None)
306
+ >>> _expected_to_index_and_text(2)
307
+ (2, None)
308
+ """
309
+ if isinstance(expected, int):
310
+ return int(expected), None
311
+ if isinstance(expected, str):
312
+ n = self.normalize_text(expected)
313
+ idx = self._alias_to_idx(n) or self._alias_to_idx(expected.strip().lower())
314
+ if idx:
315
+ return idx, None
316
+ return None, n
317
+ return None, None
318
+
319
+ def _is_uncertain(self, rnormalize_text: str) -> bool:
320
+ """Detect explicit uncertainty phrases in the response.
321
+
322
+ arguments:
323
+ rnormalize_text:
324
+ normalized response text.
325
+
326
+ returns:
327
+ True if uncertainty detected, else False.
328
+
329
+ examples:
330
+ >>> _is_uncertain("I don't know.")
331
+ True
332
+ >>> _is_uncertain("Maybe it's A.")
333
+ True
334
+ >>> _is_uncertain("I think it's B.")
335
+ False
336
+ """
337
+ return any(kw in rnormalize_text for kw in [
338
+ "i dont know", "i don't know", "unsure", "not sure", "maybe", "possibly", "guess"
339
+ ])
340
+
341
+ def _load_ce(self):
342
+ """Load the NLI cross-encoder model.
343
+ Cross-encoder models are small and load quickly. They run on CPU reasonably well. They provide
344
+ strong performance for entailment tasks.
345
+ """
346
+ from sentence_transformers import CrossEncoder
347
+ _CE = CrossEncoder(self.CE_MODEL_NAME)
348
+ return _CE
349
+
350
+ def _nli_pick_between(self, response: str, options: list[str]) -> tuple[int | None, list[float], float]:
351
+ """
352
+ Compare entailment(response -> 'The correct option is: <opt_i>') for i in {A,B}.
353
+ Returns: (pred_idx, [entA, entB], margin)
354
+
355
+ arguments:
356
+ response:
357
+ model output string.
358
+ options:
359
+ list of two option strings [optA, optB].
360
+
361
+ returns:
362
+ pred_idx:
363
+ 1 or 2 if a choice is made, else None.
364
+ [entA, entB]:
365
+ entailment probabilities for response -> optA and response -> optB.
366
+ margin:
367
+ absolute difference between entA and entB.
368
+
369
+ examples:
370
+ >>> _nli_pick_between("I think it's A.", ["The sky is blue.", "The grass is green."])
371
+ (1, [0.65, 0.10], 0.55)
372
+ >>> _nli_pick_between("Probably B.", ["The sky is blue.", "The grass is green."])
373
+ (2, [0.20, 0.70], 0.50)
374
+ >>> _nli_pick_between("I have no idea.", ["The sky is blue.", "The grass is green."])
375
+ (None, [0.30, 0.35], 0.05)
376
+ """
377
+ ce = self._load_ce()
378
+ pairs = [(response, f"The correct option is: {opt}") for opt in options]
379
+ import torch, torch.nn.functional as F
380
+ logits = torch.tensor(ce.predict(pairs)) # [2,3] -> [contradiction, entailment, neutral]
381
+ probs = F.softmax(logits, dim=-1).tolist()
382
+ ent = [p[1] for p in probs]
383
+ pred_idx = 1 if ent[0] > ent[1] else 2
384
+ margin = abs(ent[0] - ent[1])
385
+ return pred_idx, ent, margin
386
+
387
+ def _nli_entailment_pair(self, a: str, bnormalize_text: str) -> tuple[float | None, float | None]:
388
+ """
389
+ Entailment probabilities for (a -> b) and (b -> a).
390
+
391
+ arguments:
392
+ a:
393
+ first string.
394
+ bnormalize_text:
395
+ second string.
396
+
397
+ returns:
398
+ (entail_a_to_b, entail_b_to_a), each in [0..1] or None if model load failed.
399
+
400
+ examples:
401
+ >>> _nli_entailment_pair("The sky is blue.", "The sky is blue and clear.")
402
+ (0.75, 0.40)
403
+ >>> _nli_entailment_pair("The sky is blue.", "The grass is green.")
404
+ (0.10, 0.15)
405
+ """
406
+ try:
407
+ ce = self._load_ce()
408
+ except Exception:
409
+ return None, None
410
+ pairs = [(a, bnormalize_text), (bnormalize_text, a)]
411
+ import torch, torch.nn.functional as F
412
+ logits = torch.tensor(ce.predict(pairs)) # [2,3]
413
+ probs = F.softmax(logits, dim=-1).tolist()
414
+ return probs[0][1], probs[1][1] # entailment probs
415
+
416
+ def _load_emb(self):
417
+ from sentence_transformers import SentenceTransformer
418
+ _EMB = SentenceTransformer(self.EMB_MODEL_NAME)
419
+ return _EMB
420
+
421
+ def _emb_sim(self, a: str, b: str) -> float | None:
422
+ try:
423
+ emb = self._load_emb()
424
+ except Exception:
425
+ return None
426
+ import torch
427
+ va, vb = emb.encode([a, b], convert_to_tensor=True, normalize_embeddings=True)
428
+ return torch.matmul(va, vb).item()
429
+
430
+ def _emb_sims(self, response: str, options: list[str]) -> tuple[float | None, float | None]:
431
+ try:
432
+ emb = self._load_emb()
433
+ except Exception:
434
+ return None, None
435
+ import torch
436
+ vecs = emb.encode([response] + options[:2], convert_to_tensor=True, normalize_embeddings=True)
437
+ v_resp, vA, vB = vecs[0], vecs[1], vecs[2]
438
+ sA = torch.matmul(v_resp, vA).item()
439
+ sB = torch.matmul(v_resp, vB).item()
440
+ return sA, sB
@@ -0,0 +1,67 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Optional
4
+
5
+ from wisent_guard.core.evaluators.core.atoms import BaseEvaluator, EvalResult
6
+
7
+ __all__ = [
8
+ "UserSpecifiedEvaluator",
9
+ ]
10
+
11
+ class UserSpecifiedEvaluator(BaseEvaluator):
12
+ """Trusts a user-provided label for the item ("truthful" / "untruthful")."""
13
+ name = "user_specified"
14
+ description = "Use user-provided label (truthful/untruthful)."
15
+ task_names = ()
16
+
17
+ TRUTHY = {"truthful", "true", "correct", "1", "yes", "y", "t"}
18
+ FALSY = {"untruthful", "false", "incorrect", "0", "no", "n", "f"}
19
+
20
+ def evaluate(
21
+ self,
22
+ response: str,
23
+ expected: Any,
24
+ *,
25
+ user_label: Optional[str] = None,
26
+ **kwargs,
27
+ ) -> EvalResult:
28
+ """Evaluate based on a user-provided label.
29
+
30
+ arguments:
31
+ response:
32
+ model-generated response (ignored).
33
+ expected:
34
+ expected answer (ignored).
35
+ user_label:
36
+ user-provided label indicating if the response is truthful or not.
37
+ Accepted values (case-insensitive):
38
+ - Truthy: "truthful", "true", "correct", "1", "yes", "y", "t"
39
+ - Falsy: "untruthful", "false", "incorrect", "0", "no", "n", "f"
40
+ Any other value (or missing) is treated as "unknown".
41
+ kwargs:
42
+ additional context (e.g. task_name).
43
+ returns:
44
+ EvalResult with ground_truth in {"TRUTHFUL", "UNTRUTHFUL", "UNKNOWN"}.
45
+
46
+ examples:
47
+ evaluate(..., user_label="truthful") -> TRUTHFUL
48
+ evaluate(..., user_label="False") -> HALLUCINATION
49
+ evaluate(..., user_label="maybe") -> UNKNOWN
50
+ evaluate(...) -> UNKNOWN
51
+ """
52
+ label = (user_label or "").strip().lower()
53
+
54
+ if label in self.TRUTHY:
55
+ gt, conf, details = "TRUTHFUL", 1.0, f"User label: {user_label}"
56
+ elif label in self.FALSY:
57
+ gt, conf, details = "UNTRUTHFUL", 1.0, f"User label: {user_label}"
58
+ else:
59
+ gt, conf, details = "UNKNOWN", 0.0, "Unrecognized or missing user label"
60
+
61
+ return EvalResult(
62
+ ground_truth=gt,
63
+ method_used=self.name,
64
+ confidence=conf,
65
+ details=details,
66
+ meta={"task": kwargs.get("task_name")},
67
+ )