wisent 0.5.11__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wisent might be problematic. Click here for more details.

Files changed (225) hide show
  1. wisent/__init__.py +1 -1
  2. wisent/core/activations/__init__.py +26 -0
  3. wisent/core/activations/activations.py +96 -0
  4. wisent/core/activations/activations_collector.py +71 -20
  5. wisent/core/activations/prompt_construction_strategy.py +47 -0
  6. wisent/core/agent/budget.py +2 -2
  7. wisent/core/agent/device_benchmarks.py +1 -1
  8. wisent/core/agent/diagnose/classifier_marketplace.py +8 -8
  9. wisent/core/agent/diagnose/response_diagnostics.py +4 -4
  10. wisent/core/agent/diagnose/synthetic_classifier_option.py +1 -1
  11. wisent/core/agent/diagnose/tasks/task_manager.py +3 -3
  12. wisent/core/agent/diagnose.py +2 -1
  13. wisent/core/autonomous_agent.py +10 -2
  14. wisent/core/benchmark_extractors.py +293 -0
  15. wisent/core/bigcode_integration.py +20 -7
  16. wisent/core/branding.py +108 -0
  17. wisent/core/cli/__init__.py +15 -0
  18. wisent/core/cli/create_steering_vector.py +138 -0
  19. wisent/core/cli/evaluate_responses.py +715 -0
  20. wisent/core/cli/generate_pairs.py +128 -0
  21. wisent/core/cli/generate_pairs_from_task.py +119 -0
  22. wisent/core/cli/generate_responses.py +129 -0
  23. wisent/core/cli/generate_vector_from_synthetic.py +149 -0
  24. wisent/core/cli/generate_vector_from_task.py +147 -0
  25. wisent/core/cli/get_activations.py +191 -0
  26. wisent/core/cli/optimize_classification.py +339 -0
  27. wisent/core/cli/optimize_steering.py +364 -0
  28. wisent/core/cli/tasks.py +182 -0
  29. wisent/core/cli_logger.py +22 -0
  30. wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +27 -1
  31. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +49 -1
  32. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_challenge.py +115 -0
  33. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_easy.py +115 -0
  34. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arithmetic.py +111 -0
  35. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/asdiv.py +119 -0
  36. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq.py +115 -0
  37. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cb.py +114 -0
  38. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copa.py +118 -0
  39. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coqa.py +146 -0
  40. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/drop.py +129 -0
  41. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gsm8k.py +119 -0
  42. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/headqa.py +112 -0
  43. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hellaswag.py +113 -0
  44. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/livecodebench.py +367 -0
  45. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa.py +115 -0
  46. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa2.py +114 -0
  47. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mc-taco.py +113 -0
  48. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medqa.py +112 -0
  49. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mrpc.py +111 -0
  50. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multirc.py +114 -0
  51. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mutual.py +113 -0
  52. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/openbookqa.py +115 -0
  53. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pawsx.py +111 -0
  54. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/piqa.py +113 -0
  55. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prost.py +113 -0
  56. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pubmedqa.py +112 -0
  57. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qa4mre.py +116 -0
  58. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper.py +115 -0
  59. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnli.py +111 -0
  60. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qqp.py +111 -0
  61. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/race.py +121 -0
  62. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/record.py +121 -0
  63. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/rte.py +111 -0
  64. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sciq.py +110 -0
  65. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/social_iqa.py +114 -0
  66. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/squad2.py +124 -0
  67. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sst2.py +111 -0
  68. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swag.py +112 -0
  69. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/triviaqa.py +127 -0
  70. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_gen.py +112 -0
  71. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc1.py +117 -0
  72. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc2.py +117 -0
  73. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/webqs.py +127 -0
  74. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wic.py +119 -0
  75. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +1 -1
  76. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wnli.py +111 -0
  77. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc.py +114 -0
  78. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xnli.py +112 -0
  79. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xstorycloze.py +114 -0
  80. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xwinograd.py +114 -0
  81. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +1 -1
  82. wisent/core/data_loaders/__init__.py +235 -0
  83. wisent/core/data_loaders/loaders/lm_loader.py +2 -2
  84. wisent/core/data_loaders/loaders/task_interface_loader.py +300 -0
  85. wisent/{cli/data_loaders/data_loader_rotator.py → core/data_loaders/rotator.py} +1 -1
  86. wisent/core/download_full_benchmarks.py +79 -2
  87. wisent/core/evaluators/benchmark_specific/__init__.py +26 -0
  88. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/evaluator.py +17 -17
  89. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/cpp_sanitizer.py +2 -2
  90. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/java_sanitizer.py +2 -2
  91. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/python_sanitizer.py +2 -2
  92. wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/__init__.py +3 -0
  93. wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/provider.py +305 -0
  94. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/core/runtime.py +36 -4
  95. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/entrypoint.py +2 -4
  96. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/recipes.py +1 -1
  97. wisent/core/evaluators/benchmark_specific/coding/solution_generator.py +258 -0
  98. wisent/core/evaluators/benchmark_specific/exact_match_evaluator.py +79 -0
  99. wisent/core/evaluators/benchmark_specific/f1_evaluator.py +101 -0
  100. wisent/core/evaluators/benchmark_specific/generation_evaluator.py +197 -0
  101. wisent/core/{log_likelihoods_evaluator.py → evaluators/benchmark_specific/log_likelihoods_evaluator.py} +10 -2
  102. wisent/core/evaluators/benchmark_specific/perplexity_evaluator.py +140 -0
  103. wisent/core/evaluators/benchmark_specific/personalization_evaluator.py +250 -0
  104. wisent/{cli/evaluators/evaluator_rotator.py → core/evaluators/rotator.py} +4 -4
  105. wisent/core/lm_eval_harness_ground_truth.py +3 -2
  106. wisent/core/main.py +57 -0
  107. wisent/core/model_persistence.py +2 -2
  108. wisent/core/models/wisent_model.py +8 -6
  109. wisent/core/optuna/classifier/optuna_classifier_optimizer.py +2 -2
  110. wisent/core/optuna/steering/steering_optimization.py +1 -1
  111. wisent/core/parser_arguments/__init__.py +10 -0
  112. wisent/core/parser_arguments/agent_parser.py +110 -0
  113. wisent/core/parser_arguments/configure_model_parser.py +7 -0
  114. wisent/core/parser_arguments/create_steering_vector_parser.py +59 -0
  115. wisent/core/parser_arguments/evaluate_parser.py +40 -0
  116. wisent/core/parser_arguments/evaluate_responses_parser.py +10 -0
  117. wisent/core/parser_arguments/full_optimize_parser.py +115 -0
  118. wisent/core/parser_arguments/generate_pairs_from_task_parser.py +33 -0
  119. wisent/core/parser_arguments/generate_pairs_parser.py +29 -0
  120. wisent/core/parser_arguments/generate_responses_parser.py +15 -0
  121. wisent/core/parser_arguments/generate_vector_from_synthetic_parser.py +127 -0
  122. wisent/core/parser_arguments/generate_vector_from_task_parser.py +127 -0
  123. wisent/core/parser_arguments/generate_vector_parser.py +90 -0
  124. wisent/core/parser_arguments/get_activations_parser.py +90 -0
  125. wisent/core/parser_arguments/main_parser.py +152 -0
  126. wisent/core/parser_arguments/model_config_parser.py +59 -0
  127. wisent/core/parser_arguments/monitor_parser.py +17 -0
  128. wisent/core/parser_arguments/multi_steer_parser.py +47 -0
  129. wisent/core/parser_arguments/optimize_classification_parser.py +67 -0
  130. wisent/core/parser_arguments/optimize_sample_size_parser.py +58 -0
  131. wisent/core/parser_arguments/optimize_steering_parser.py +147 -0
  132. wisent/core/parser_arguments/synthetic_parser.py +93 -0
  133. wisent/core/parser_arguments/tasks_parser.py +584 -0
  134. wisent/core/parser_arguments/test_nonsense_parser.py +26 -0
  135. wisent/core/parser_arguments/utils.py +111 -0
  136. wisent/core/prompts/core/prompt_formater.py +3 -3
  137. wisent/core/prompts/prompt_stratiegies/direct_completion.py +2 -0
  138. wisent/core/prompts/prompt_stratiegies/instruction_following.py +2 -0
  139. wisent/core/prompts/prompt_stratiegies/multiple_choice.py +2 -0
  140. wisent/core/prompts/prompt_stratiegies/role_playing.py +2 -0
  141. wisent/{cli/steering_methods/steering_rotator.py → core/steering_methods/rotator.py} +4 -4
  142. wisent/core/steering_optimizer.py +45 -21
  143. wisent/{synthetic → core/synthetic}/cleaners/deduper_cleaner.py +3 -3
  144. wisent/{synthetic → core/synthetic}/cleaners/methods/base_dedupers.py +2 -2
  145. wisent/{synthetic → core/synthetic}/cleaners/methods/base_refusalers.py +1 -1
  146. wisent/{synthetic → core/synthetic}/cleaners/pairs_cleaner.py +5 -5
  147. wisent/{synthetic → core/synthetic}/cleaners/refusaler_cleaner.py +4 -4
  148. wisent/{synthetic → core/synthetic}/db_instructions/mini_dp.py +1 -1
  149. wisent/{synthetic → core/synthetic}/generators/diversities/methods/fast_diversity.py +1 -1
  150. wisent/{synthetic → core/synthetic}/generators/pairs_generator.py +38 -12
  151. wisent/core/tasks/livecodebench_task.py +4 -103
  152. wisent/core/timing_calibration.py +1 -1
  153. {wisent-0.5.11.dist-info → wisent-0.5.13.dist-info}/METADATA +3 -3
  154. wisent-0.5.13.dist-info/RECORD +294 -0
  155. wisent-0.5.13.dist-info/entry_points.txt +2 -0
  156. wisent/benchmarks/coding/providers/livecodebench/provider.py +0 -53
  157. wisent/classifiers/core/atoms.py +0 -747
  158. wisent/classifiers/models/logistic.py +0 -29
  159. wisent/classifiers/models/mlp.py +0 -47
  160. wisent/cli/classifiers/classifier_rotator.py +0 -137
  161. wisent/cli/cli_logger.py +0 -142
  162. wisent/cli/wisent_cli/commands/help_cmd.py +0 -52
  163. wisent/cli/wisent_cli/commands/listing.py +0 -154
  164. wisent/cli/wisent_cli/commands/train_cmd.py +0 -322
  165. wisent/cli/wisent_cli/main.py +0 -93
  166. wisent/cli/wisent_cli/shell.py +0 -80
  167. wisent/cli/wisent_cli/ui.py +0 -69
  168. wisent/cli/wisent_cli/util/aggregations.py +0 -43
  169. wisent/cli/wisent_cli/util/parsing.py +0 -126
  170. wisent/cli/wisent_cli/version.py +0 -4
  171. wisent/opti/methods/__init__.py +0 -0
  172. wisent/synthetic/__init__.py +0 -0
  173. wisent/synthetic/cleaners/__init__.py +0 -0
  174. wisent/synthetic/cleaners/core/__init__.py +0 -0
  175. wisent/synthetic/cleaners/methods/__init__.py +0 -0
  176. wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
  177. wisent/synthetic/db_instructions/__init__.py +0 -0
  178. wisent/synthetic/db_instructions/core/__init__.py +0 -0
  179. wisent/synthetic/generators/__init__.py +0 -0
  180. wisent/synthetic/generators/core/__init__.py +0 -0
  181. wisent/synthetic/generators/diversities/__init__.py +0 -0
  182. wisent/synthetic/generators/diversities/core/__init__.py +0 -0
  183. wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
  184. wisent-0.5.11.dist-info/RECORD +0 -220
  185. /wisent/{benchmarks → core/evaluators/benchmark_specific/coding}/__init__.py +0 -0
  186. /wisent/{benchmarks/coding → core/evaluators/benchmark_specific/coding/metrics}/__init__.py +0 -0
  187. /wisent/{benchmarks/coding/metrics → core/evaluators/benchmark_specific/coding/metrics/core}/__init__.py +0 -0
  188. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/core/atoms.py +0 -0
  189. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/passk.py +0 -0
  190. /wisent/{benchmarks/coding/metrics/core → core/evaluators/benchmark_specific/coding/output_sanitizer}/__init__.py +0 -0
  191. /wisent/{benchmarks/coding/output_sanitizer → core/evaluators/benchmark_specific/coding/output_sanitizer/core}/__init__.py +0 -0
  192. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/core/atoms.py +0 -0
  193. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/utils.py +0 -0
  194. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/providers/__init__.py +0 -0
  195. /wisent/{benchmarks/coding/output_sanitizer → core/evaluators/benchmark_specific/coding/providers}/core/__init__.py +0 -0
  196. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/providers/core/atoms.py +0 -0
  197. /wisent/{benchmarks/coding/providers/core → core/evaluators/benchmark_specific/coding/safe_docker}/__init__.py +0 -0
  198. /wisent/{benchmarks/coding/providers/livecodebench → core/evaluators/benchmark_specific/coding/safe_docker/core}/__init__.py +0 -0
  199. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/core/atoms.py +0 -0
  200. /wisent/{benchmarks/coding/safe_docker → core/opti}/__init__.py +0 -0
  201. /wisent/{benchmarks/coding/safe_docker → core/opti}/core/__init__.py +0 -0
  202. /wisent/{opti → core/opti}/core/atoms.py +0 -0
  203. /wisent/{classifiers → core/opti/methods}/__init__.py +0 -0
  204. /wisent/{opti → core/opti}/methods/opti_classificator.py +0 -0
  205. /wisent/{opti → core/opti}/methods/opti_steering.py +0 -0
  206. /wisent/{classifiers/core → core/synthetic}/__init__.py +0 -0
  207. /wisent/{classifiers/models → core/synthetic/cleaners}/__init__.py +0 -0
  208. /wisent/{cli → core/synthetic/cleaners/core}/__init__.py +0 -0
  209. /wisent/{synthetic → core/synthetic}/cleaners/core/atoms.py +0 -0
  210. /wisent/{cli/classifiers → core/synthetic/cleaners/methods}/__init__.py +0 -0
  211. /wisent/{cli/data_loaders → core/synthetic/cleaners/methods/core}/__init__.py +0 -0
  212. /wisent/{synthetic → core/synthetic}/cleaners/methods/core/atoms.py +0 -0
  213. /wisent/{cli/evaluators → core/synthetic/db_instructions}/__init__.py +0 -0
  214. /wisent/{cli/steering_methods → core/synthetic/db_instructions/core}/__init__.py +0 -0
  215. /wisent/{synthetic → core/synthetic}/db_instructions/core/atoms.py +0 -0
  216. /wisent/{cli/wisent_cli → core/synthetic/generators}/__init__.py +0 -0
  217. /wisent/{cli/wisent_cli/commands → core/synthetic/generators/core}/__init__.py +0 -0
  218. /wisent/{synthetic → core/synthetic}/generators/core/atoms.py +0 -0
  219. /wisent/{cli/wisent_cli/util → core/synthetic/generators/diversities}/__init__.py +0 -0
  220. /wisent/{opti → core/synthetic/generators/diversities/core}/__init__.py +0 -0
  221. /wisent/{synthetic → core/synthetic}/generators/diversities/core/core.py +0 -0
  222. /wisent/{opti/core → core/synthetic/generators/diversities/methods}/__init__.py +0 -0
  223. {wisent-0.5.11.dist-info → wisent-0.5.13.dist-info}/WHEEL +0 -0
  224. {wisent-0.5.11.dist-info → wisent-0.5.13.dist-info}/licenses/LICENSE +0 -0
  225. {wisent-0.5.11.dist-info → wisent-0.5.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
1
+ """
2
+ Utility functions for parser arguments.
3
+
4
+ Shared helper functions used across multiple command parsers.
5
+ """
6
+
7
+ from typing import List, Optional
8
+
9
+
10
+ def parse_layers_from_arg(layer_arg: str, model=None) -> List[int]:
11
+ """
12
+ Parse layer argument into list of integers.
13
+
14
+ Args:
15
+ layer_arg: String like "15", "14-16", "14,15,16", or "-1" (for auto-optimization)
16
+ model: Model object (needed for determining available layers)
17
+
18
+ Returns:
19
+ List of layer indices
20
+ """
21
+ # Handle special cases
22
+ if layer_arg == "-1":
23
+ # Signal for auto-optimization - return single layer list
24
+ return [-1]
25
+
26
+ # Use existing parse_layer_range logic
27
+ layers = parse_layer_range(layer_arg, model)
28
+ if layers is None:
29
+ # "all" case - auto-detect model layers
30
+ if model is not None:
31
+ from wisent.core.hyperparameter_optimizer import detect_model_layers
32
+
33
+ total_layers = detect_model_layers(model)
34
+ return list(range(total_layers))
35
+ # If no model provided, we cannot determine layers - this should not happen
36
+ raise ValueError("Cannot determine layer range without model instance")
37
+
38
+ return layers
39
+
40
+
41
+ def parse_layer_range(layer_range_str: str, model=None) -> Optional[List[int]]:
42
+ """
43
+ Parse layer range string into list of integers.
44
+
45
+ Args:
46
+ layer_range_str: String like "8-24", "10,15,20", or "all"
47
+ model: Model object (needed for "all" option)
48
+
49
+ Returns:
50
+ List of layer indices, or None if "all" (will be auto-detected later)
51
+ """
52
+ if layer_range_str.lower() == "all":
53
+ # Return None to signal auto-detection
54
+ return None
55
+ if "-" in layer_range_str:
56
+ # Range format: "8-24"
57
+ start, end = map(int, layer_range_str.split("-"))
58
+ return list(range(start, end + 1))
59
+ if "," in layer_range_str:
60
+ # Comma-separated format: "10,15,20"
61
+ return [int(x.strip()) for x in layer_range_str.split(",")]
62
+ # Single layer
63
+ return [int(layer_range_str)]
64
+
65
+
66
+ def aggregate_token_scores(token_scores: List[float], method: str) -> float:
67
+ """
68
+ Aggregate token scores using the specified method.
69
+
70
+ Args:
71
+ token_scores: List of token scores (probabilities)
72
+ method: Aggregation method ("average", "final", "first", "max", "min")
73
+
74
+ Returns:
75
+ Aggregated score
76
+ """
77
+ if not token_scores:
78
+ return 0.5
79
+
80
+ # Convert any tensor values to floats and filter out None values
81
+ clean_scores = []
82
+ for i, score in enumerate(token_scores):
83
+ if score is None:
84
+ raise ValueError(
85
+ f"Token score at index {i} is None! This indicates a bug in the classifier output handling."
86
+ )
87
+ if hasattr(score, "item"): # Handle tensors
88
+ raise ValueError(
89
+ f"Token score at index {i} is a tensor ({type(score)})! Expected float but got tensor: {score}"
90
+ )
91
+ if not isinstance(score, (int, float)):
92
+ raise ValueError(
93
+ f"Token score at index {i} has invalid type: {type(score)}. Expected float but got {type(score).__name__}: {score}"
94
+ )
95
+ clean_scores.append(float(score))
96
+
97
+ if not clean_scores:
98
+ return 0.5
99
+
100
+ if method == "average":
101
+ return sum(clean_scores) / len(clean_scores)
102
+ if method == "final":
103
+ return clean_scores[-1]
104
+ if method == "first":
105
+ return clean_scores[0]
106
+ if method == "max":
107
+ return max(clean_scores)
108
+ if method == "min":
109
+ return min(clean_scores)
110
+ # Default to average if unknown method
111
+ return sum(clean_scores) / len(clean_scores)
@@ -110,14 +110,14 @@ class PromptFormatter:
110
110
  RuntimeError: If the 'strategies' package is not found or no strategies are discovered.
111
111
  """
112
112
  try:
113
- import wisent_guard.core.prompts.prompt_stratiegies as strategies_pkg
113
+ import wisent.core.prompts.prompt_stratiegies as strategies_pkg
114
114
  except ModuleNotFoundError as exc:
115
115
  raise RuntimeError(
116
116
  "The 'strategies' package was not found. "
117
117
  "Create a 'strategies' directory with an empty __init__.py."
118
118
  ) from exc
119
119
 
120
- import wisent_guard.core.prompts.prompt_stratiegies as strategies_pkg
120
+ import wisent.core.prompts.prompt_stratiegies as strategies_pkg
121
121
 
122
122
  for module_info in pkgutil.iter_modules(strategies_pkg.__path__):
123
123
  name = module_info.name
@@ -125,7 +125,7 @@ class PromptFormatter:
125
125
  # Skip private/dunder modules.
126
126
  continue
127
127
 
128
- module = importlib.import_module(f"strategies.{name}")
128
+ module = importlib.import_module(f"wisent.core.prompts.prompt_stratiegies.{name}")
129
129
  self._register_strategies_from_module(module)
130
130
 
131
131
  if not self._registry:
@@ -7,6 +7,8 @@ __all__ = ["DirectCompletionStrategy"]
7
7
  class DirectCompletionStrategy(PromptStrategy):
8
8
  """Direct completion strategy: question from user, answer from assistant."""
9
9
 
10
+ strategy_key = "direct_completion"
11
+
10
12
  def build(
11
13
  self,
12
14
  question: str,
@@ -7,6 +7,8 @@ __all__ = ["InstructionFollowingStrategy"]
7
7
  class InstructionFollowingStrategy(PromptStrategy):
8
8
  """Plain instruction/QA style: question from user, answer from assistant."""
9
9
 
10
+ strategy_key = "instruction_following"
11
+
10
12
  def build(
11
13
  self,
12
14
  question: str,
@@ -7,6 +7,8 @@ __all__ = ["MultipleChoiceStrategy"]
7
7
  class MultipleChoiceStrategy(PromptStrategy):
8
8
  """Formats a multiple-choice prompt with options A/B."""
9
9
 
10
+ strategy_key = "multiple_choice"
11
+
10
12
  def build(
11
13
  self,
12
14
  question: str,
@@ -7,6 +7,8 @@ __all__ = ["RolePlayingStrategy"]
7
7
  class RolePlayingStrategy(PromptStrategy):
8
8
  """Asks the model to behave like someone who gives a specific answer."""
9
9
 
10
+ strategy_key = "role_playing"
11
+
10
12
  def build(
11
13
  self,
12
14
  question: str,
@@ -25,7 +25,7 @@ class SteeringMethodRotator:
25
25
  def __init__(
26
26
  self,
27
27
  method: str | BaseSteeringMethod | Type[BaseSteeringMethod] | None = None,
28
- methods_location: str | Path = "wisent_guard.core.steering_methods.methods",
28
+ methods_location: str | Path = "wisent.core.steering_methods.methods",
29
29
  autoload: bool = True,
30
30
  **default_method_kwargs: Any,
31
31
  ) -> None:
@@ -44,7 +44,7 @@ class SteeringMethodRotator:
44
44
  spec = importlib.util.spec_from_file_location(mod_name, py)
45
45
  if spec and spec.loader:
46
46
  module = importlib.util.module_from_spec(spec)
47
- spec.loader.exec_module(module)
47
+ spec.loader.exec_module(module)
48
48
  return
49
49
 
50
50
  if not isinstance(location, str):
@@ -91,7 +91,7 @@ class SteeringMethodRotator:
91
91
  if isinstance(method, str):
92
92
  return BaseSteeringMethod.get(method)(**kwargs)
93
93
  raise TypeError("method must be None, str name, BaseSteeringMethod instance, or subclass.")
94
-
94
+
95
95
  def use(self, method: str | BaseSteeringMethod | Type[BaseSteeringMethod], **kwargs: Any) -> None:
96
96
  self._method = self._resolve_method(method, **kwargs)
97
97
 
@@ -107,4 +107,4 @@ if __name__ == "__main__":
107
107
  rot = SteeringMethodRotator()
108
108
  print("Available steering methods:")
109
109
  for m in rot.list_methods():
110
- print(f" - {m['name']}: {m['description']} ({m['class']})")
110
+ print(f" - {m['name']}: {m['description']} ({m['class']})")
@@ -392,19 +392,21 @@ class SteeringOptimizer:
392
392
 
393
393
  if layer_search_range is None:
394
394
  # Default: search around classification layer if available
395
- if self.base_classification_layer:
396
- min_layer = max(1, self.base_classification_layer - 3)
397
- max_layer = self.base_classification_layer + 3
398
- layer_search_range = (min_layer, max_layer)
399
- else:
400
- # TODO: Auto-detect model layer count and use reasonable range
401
- layer_search_range = (10, 20) # Default fallback
402
-
403
- # TODO: Implement layer optimization logic
395
+ if not self.base_classification_layer:
396
+ raise ValueError(
397
+ "Layer optimization requires either layer_search_range parameter or "
398
+ "base_classification_layer to be set. Please provide a layer_search_range "
399
+ "or initialize SteeringOptimizer with a base_classification_layer."
400
+ )
401
+ min_layer = max(1, self.base_classification_layer - 3)
402
+ max_layer = self.base_classification_layer + 3
403
+ layer_search_range = (min_layer, max_layer)
404
+
404
405
  raise NotImplementedError(
405
406
  "Steering layer optimization not yet implemented. "
406
407
  "This requires implementing steering vector training and "
407
- "effectiveness measurement across different layers."
408
+ "effectiveness measurement across different layers. "
409
+ f"Would search layers {layer_search_range}."
408
410
  )
409
411
 
410
412
  def optimize_steering_strength(
@@ -419,7 +421,7 @@ class SteeringOptimizer:
419
421
  ) -> SteeringOptimizationResult:
420
422
  """
421
423
  Find optimal steering strength for a specific method, layer, and task.
422
-
424
+
423
425
  Args:
424
426
  task_name: Task to optimize for
425
427
  steering_method: Steering method to use
@@ -427,16 +429,26 @@ class SteeringOptimizer:
427
429
  strength_range: (min_strength, max_strength) to search
428
430
  strength_steps: Number of strength values to test
429
431
  limit: Maximum samples for testing
430
-
432
+
431
433
  Returns:
432
434
  SteeringOptimizationResult with optimal strength
433
435
  """
436
+ import time
437
+ start_time = time.time()
438
+
434
439
  if layer is None:
435
- layer = self.base_classification_layer or 15 # Default fallback
436
-
440
+ if not self.base_classification_layer:
441
+ raise ValueError(
442
+ "Steering strength optimization requires a layer to be specified. "
443
+ "Please provide the 'layer' parameter or initialize SteeringOptimizer "
444
+ "with a base_classification_layer."
445
+ )
446
+ layer = self.base_classification_layer
447
+
437
448
  if strength_range is None:
438
- strength_range = (0.1, 2.0) # Default strength range
439
-
449
+ # Default strength range is reasonable for most steering methods
450
+ strength_range = (0.1, 2.0)
451
+
440
452
  logger.info(f"⚡ Optimizing steering strength for {task_name}")
441
453
  logger.info(f" Method: {steering_method.value}, Layer: {layer}")
442
454
  logger.info(f" Strength range: {strength_range}, Steps: {strength_steps}")
@@ -609,7 +621,10 @@ class SteeringOptimizer:
609
621
  'score': 0.0,
610
622
  'error': str(e)
611
623
  })
612
-
624
+
625
+ # Calculate optimization time
626
+ optimization_time = time.time() - start_time
627
+
613
628
  return SteeringOptimizationResult(
614
629
  task_name=task_name,
615
630
  best_steering_layer=layer,
@@ -618,7 +633,7 @@ class SteeringOptimizer:
618
633
  optimal_parameters={'strength': best_strength},
619
634
  steering_effectiveness_score=best_score,
620
635
  classification_accuracy_impact=best_score, # Using same score for now
621
- optimization_time_seconds=0.0, # TODO: Track actual time
636
+ optimization_time_seconds=optimization_time,
622
637
  total_configurations_tested=len(results),
623
638
  error_message=None
624
639
  )
@@ -750,10 +765,19 @@ class SteeringOptimizer:
750
765
  task_overrides = self.classification_config.get("task_specific_overrides", {})
751
766
  tasks = list(task_overrides.keys())
752
767
  if not tasks:
753
- logger.warning("No classification-optimized tasks found, using default task set")
754
- tasks = ["truthfulqa_mc1", "gsm8k", "squad2"] # Default fallback
768
+ raise ValueError(
769
+ "No classification-optimized tasks found in classification_config. "
770
+ "Please either:\n"
771
+ " 1. Run classification optimization first to populate task_specific_overrides, or\n"
772
+ " 2. Explicitly provide a list of tasks via the 'tasks' parameter"
773
+ )
755
774
  else:
756
- tasks = ["truthfulqa_mc1", "gsm8k", "squad2"] # Default fallback
775
+ raise ValueError(
776
+ "No tasks provided and no classification_config available. "
777
+ "Please either:\n"
778
+ " 1. Provide explicit tasks via the 'tasks' parameter, or\n"
779
+ " 2. Initialize SteeringOptimizer with a classification_config that contains task_specific_overrides"
780
+ )
757
781
 
758
782
  if methods is None:
759
783
  methods = [SteeringMethod.CAA, SteeringMethod.HPR] # Start with simpler methods
@@ -1,8 +1,8 @@
1
- from wisent.synthetic.cleaners.core.atoms import CleanStep
2
- from wisent.synthetic.cleaners.core.atoms import CleanStepStats
1
+ from wisent.core.synthetic.cleaners.core.atoms import CleanStep
2
+ from wisent.core.synthetic.cleaners.core.atoms import CleanStepStats
3
3
 
4
4
  from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
5
- from wisent.synthetic.cleaners.methods.core.atoms import Deduper
5
+ from wisent.core.synthetic.cleaners.methods.core.atoms import Deduper
6
6
 
7
7
 
8
8
  __all__ = [
@@ -4,7 +4,7 @@ import hashlib
4
4
  from collections import Counter, defaultdict
5
5
  from typing import Mapping, Sequence, Callable
6
6
 
7
- from wisent.synthetic.cleaners.methods.core.atoms import Deduper
7
+ from wisent.core.synthetic.cleaners.methods.core.atoms import Deduper
8
8
  from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
9
9
 
10
10
  __all__ = [
@@ -279,7 +279,7 @@ class SimHashDeduper(Deduper):
279
279
  64-bit integer hash
280
280
 
281
281
  example:
282
- >>> SimHashDeduper()._hash64("wisent_guard")
282
+ >>> SimHashDeduper()._hash64("wisent")
283
283
  TODO: actual value"
284
284
  """
285
285
  h = hashlib.blake2b(s.encode("utf-8"), digest_size=8)
@@ -1,4 +1,4 @@
1
- from wisent.synthetic.cleaners.methods.core.atoms import Refusaler
1
+ from wisent.core.synthetic.cleaners.methods.core.atoms import Refusaler
2
2
  from wisent.core.models.wisent_model import WisentModel
3
3
 
4
4
  import re, unicodedata
@@ -2,8 +2,8 @@ from __future__ import annotations
2
2
 
3
3
  from typing import Iterable, TYPE_CHECKING
4
4
 
5
- from wisent.synthetic.cleaners.core.atoms import CleanStep, Cleaner
6
- from wisent.synthetic.cleaners.core.atoms import CleanerStats
5
+ from wisent.core.synthetic.cleaners.core.atoms import CleanStep, Cleaner
6
+ from wisent.core.synthetic.cleaners.core.atoms import CleanerStats
7
7
  from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
8
8
 
9
9
  __all__ = [
@@ -39,9 +39,9 @@ class PairsCleaner(Cleaner):
39
39
  >>> from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
40
40
  >>> from wisent.core.contrastive_pairs.core.pair import ContrastivePair
41
41
  >>> from wisent.core.contrastive_pairs.core.response import PositiveResponse, NegativeResponse
42
- >>> from wisent.synthetic.cleaners.methods.base_refusalers import BasesRefusaler
43
- >>> from wisent.synthetic.cleaners.methods.base_dedupers import SimHashDeduper
44
- >>> from wisent.synthetic.cleaners.cleaners import PairsCleaner
42
+ >>> from wisent.core.synthetic.cleaners.methods.base_refusalers import BasesRefusaler
43
+ >>> from wisent.core.synthetic.cleaners.methods.base_dedupers import SimHashDeduper
44
+ >>> from wisent.core.synthetic.cleaners.cleaners import PairsCleaner
45
45
  >>> from wisent.core.models.wisent_model import WisentModel
46
46
  >>> refusal = BasesRefusaler()
47
47
  >>> deduper = SimHashDeduper()
@@ -1,9 +1,9 @@
1
1
 
2
- from wisent.synthetic.cleaners.core.atoms import CleanStep
2
+ from wisent.core.synthetic.cleaners.core.atoms import CleanStep
3
3
  from wisent.core.contrastive_pairs.core.pair import ContrastivePair
4
- from wisent.synthetic.cleaners.core.atoms import CleanStepStats
4
+ from wisent.core.synthetic.cleaners.core.atoms import CleanStepStats
5
5
 
6
- from wisent.synthetic.cleaners.methods.core.atoms import Refusaler
6
+ from wisent.core.synthetic.cleaners.methods.core.atoms import Refusaler
7
7
  from wisent.core.models.wisent_model import WisentModel
8
8
  from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
9
9
  from wisent.core.contrastive_pairs.core.response import PositiveResponse, NegativeResponse
@@ -59,7 +59,7 @@ class RefusalerCleaner(CleanStep):
59
59
  >>> from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
60
60
  >>> from wisent.core.contrastive_pairs.core.pair import ContrastivePair
61
61
  >>> from wisent.core.contrastive_pairs.core.response import PositiveResponse, NegativeResponse
62
- >>> from wisent.synthetic.cleaners.methods.base_refusalers import SimpleRefusaler
62
+ >>> from wisent.core.synthetic.cleaners.methods.base_refusalers import SimpleRefusaler
63
63
  >>> from wisent.core.models.wisent_model import WisentModel
64
64
  >>> refusal = SimpleRefusaler()
65
65
  >>> model = WisentModel(...)
@@ -1,5 +1,5 @@
1
1
 
2
- from wisent.synthetic.db_instructions.core.atoms import DB_Instructions
2
+ from wisent.core.synthetic.db_instructions.core.atoms import DB_Instructions
3
3
 
4
4
  __all__ = ["Default_DB_Instructions"]
5
5
 
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
  from typing import Iterable
3
3
  import re
4
4
  import numpy as np
5
- from wisent.synthetic.generators.diversities.core.core import Diversity, DiversityScores
5
+ from wisent.core.synthetic.generators.diversities.core.core import Diversity, DiversityScores
6
6
 
7
7
  __all__ = [
8
8
  "FastDiversity",
@@ -3,18 +3,18 @@ from __future__ import annotations
3
3
  import logging
4
4
 
5
5
 
6
- from wisent.core.contrastive_pairs.core.pair import ContrastivePair
7
- from wisent.core.contrastive_pairs.core.response import PositiveResponse, NegativeResponse
8
- from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
6
+ from wisent.core.contrastive_pairs.core.pair import ContrastivePair
7
+ from wisent.core.contrastive_pairs.core.response import PositiveResponse, NegativeResponse
8
+ from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
9
9
 
10
10
  from wisent.core.models.wisent_model import WisentModel
11
- from wisent.synthetic.db_instructions.core.atoms import DB_Instructions
11
+ from wisent.core.synthetic.db_instructions.core.atoms import DB_Instructions
12
12
 
13
- from wisent.synthetic.generators.core.atoms import GenerationReport
13
+ from wisent.core.synthetic.generators.core.atoms import GenerationReport
14
14
 
15
- from wisent.synthetic.generators.diversities.core.core import Diversity
15
+ from wisent.core.synthetic.generators.diversities.core.core import Diversity
16
16
 
17
- from wisent.synthetic.cleaners.pairs_cleaner import PairsCleaner
17
+ from wisent.core.synthetic.cleaners.pairs_cleaner import PairsCleaner
18
18
 
19
19
  __all__ = [
20
20
  "SyntheticContrastivePairsGenerator",
@@ -80,7 +80,8 @@ class SyntheticContrastivePairsGenerator:
80
80
  # 3) clean
81
81
  cleaned, stats = self.cleaner.clean(parsed)
82
82
 
83
- retries = stats.step_stats.get("refusaler_cleaner").modified_items
83
+ refusaler_stats = stats.step_stats.get("refusaler_cleaner")
84
+ retries = refusaler_stats.modified_items if refusaler_stats else 0
84
85
 
85
86
  # 4) build domain objects
86
87
  cps = ContrastivePairSet(name=self.contrastive_set_name, task_type=self.trait_label)
@@ -123,25 +124,47 @@ class SyntheticContrastivePairsGenerator:
123
124
  name=self.contrastive_set_name,
124
125
  task_type=self.trait_label,
125
126
  )
126
- for r in raw:
127
+
128
+ logger.info(f"[PARSE DEBUG] Received {len(raw)} raw outputs to parse")
129
+
130
+ for idx, r in enumerate(raw):
131
+ logger.info(f"[PARSE DEBUG] Raw output {idx}:\n{r[:500]}") # First 500 chars
132
+
133
+ original_r = r
127
134
  #TODO: this is very ugly, need to improve robustness
128
135
  # r can have instruction, and i want extacrt everything between ```json and ``` (after - You must return answer in valid JSON format only. Don't include any explanations or additional text.assistant)
129
136
  # also try to recover like Expecting ',' delimiter
130
137
  if "```json" in r:
131
138
  r = r.split("```json")[-1]
139
+ logger.info(f"[PARSE DEBUG] After json block extraction: {r[:200]}")
132
140
  if "```" in r:
133
141
  r = r.split("```")[0]
142
+ logger.info(f"[PARSE DEBUG] After backtick removal: {r[:200]}")
134
143
  r = r.strip()
144
+
145
+ logger.info(f"[PARSE DEBUG] Final cleaned string to parse:\n{r}")
146
+
135
147
  try:
136
148
  data = json.loads(r)
137
- except json.JSONDecodeError:
149
+ logger.info(f"[PARSE DEBUG] Successfully parsed JSON: {data}")
150
+ except json.JSONDecodeError as e:
151
+ logger.warning(f"[PARSE DEBUG] JSON decode failed: {e}")
138
152
  # try to recover from common errors
139
153
  r = r.replace("'", '"').replace("```", '')
154
+ logger.info(f"[PARSE DEBUG] Attempting recovery with quote replacement: {r[:200]}")
140
155
  try:
141
156
  data = json.loads(r)
142
- except json.JSONDecodeError:
157
+ logger.info(f"[PARSE DEBUG] Recovery successful: {data}")
158
+ except json.JSONDecodeError as e2:
159
+ logger.error(f"[PARSE DEBUG] Recovery failed: {e2}. Skipping this output.")
160
+ logger.error(f"[PARSE DEBUG] Original raw output was:\n{original_r}")
143
161
  continue
144
- for item in data.get("pairs", []):
162
+
163
+ pairs_list = data.get("pairs", [])
164
+ logger.info(f"[PARSE DEBUG] Found {len(pairs_list)} pairs in data")
165
+
166
+ for item_idx, item in enumerate(pairs_list):
167
+ logger.info(f"[PARSE DEBUG] Processing pair {item_idx}: {item}")
145
168
  cp = ContrastivePair(
146
169
  prompt=item["prompt"],
147
170
  positive_response=PositiveResponse(model_response=item["positive"]),
@@ -150,6 +173,9 @@ class SyntheticContrastivePairsGenerator:
150
173
  trait_description=item.get("trait_description", self.trait_description),
151
174
  )
152
175
  out.add(cp)
176
+ logger.info(f"[PARSE DEBUG] Successfully added pair {item_idx}")
177
+
178
+ logger.info(f"[PARSE DEBUG] Finished parsing. Total pairs collected: {len(out)}")
153
179
  return out
154
180
 
155
181
  @staticmethod
@@ -52,97 +52,11 @@ class LiveCodeBenchTask(TaskInterface):
52
52
 
53
53
  def load_data(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
54
54
  """Load LiveCodeBench data for the specified release version."""
55
- try:
56
- # Load real LiveCodeBench data
57
- problems = self._data_loader.load_problems(release_version=self._release_version, limit=limit)
58
-
59
- # Convert to dictionary format
60
- return [problem.to_dict() for problem in problems]
61
-
62
- except Exception as e:
63
- # Fallback to sample data if loading fails
64
- import logging
55
+ # Load real LiveCodeBench data - no fallbacks
56
+ problems = self._data_loader.load_problems(release_version=self._release_version, limit=limit)
65
57
 
66
- logging.warning(f"Failed to load real LiveCodeBench data: {e}. Using sample data.")
67
- return self._generate_sample_data_fallback(limit)
68
-
69
- def _generate_sample_data_fallback(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
70
- """Generate sample data for the specified number of problems."""
71
- base_problems = [
72
- {
73
- "task_id": "lcb_001",
74
- "question_title": "Two Sum",
75
- "question_content": "Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.",
76
- "starter_code": "def two_sum(nums, target):\n # Your code here\n pass",
77
- "difficulty": "EASY",
78
- "platform": "LEETCODE",
79
- "public_test_cases": [{"input": "[2,7,11,15], 9", "output": "[0,1]", "testtype": "FUNCTIONAL"}],
80
- "contest_date": "2023-05-15",
81
- "metadata": {"tags": ["array", "hash-table"], "constraints": "2 <= nums.length <= 10^4"},
82
- },
83
- {
84
- "task_id": "lcb_002",
85
- "question_title": "Valid Parentheses",
86
- "question_content": "Given a string s containing just the characters '(', ')', '{', '}', '[' and ']', determine if the input string is valid.",
87
- "starter_code": "def is_valid(s):\n # Your code here\n pass",
88
- "difficulty": "EASY",
89
- "platform": "LEETCODE",
90
- "public_test_cases": [{"input": '"()"', "output": "true", "testtype": "FUNCTIONAL"}],
91
- "contest_date": "2023-06-01",
92
- "metadata": {"tags": ["string", "stack"], "constraints": "1 <= s.length <= 10^4"},
93
- },
94
- {
95
- "task_id": "lcb_003",
96
- "question_title": "Longest Increasing Subsequence",
97
- "question_content": "Given an integer array nums, return the length of the longest strictly increasing subsequence.",
98
- "starter_code": "def length_of_lis(nums):\n # Your code here\n pass",
99
- "difficulty": "MEDIUM",
100
- "platform": "LEETCODE",
101
- "public_test_cases": [{"input": "[10,9,2,5,3,7,101,18]", "output": "4", "testtype": "FUNCTIONAL"}],
102
- "contest_date": "2023-07-10",
103
- "metadata": {
104
- "tags": ["array", "binary-search", "dynamic-programming"],
105
- "constraints": "1 <= nums.length <= 2500",
106
- },
107
- },
108
- {
109
- "task_id": "lcb_004",
110
- "question_title": "Merge Two Sorted Lists",
111
- "question_content": "You are given the heads of two sorted linked lists list1 and list2. Merge the two lists into one sorted list.",
112
- "starter_code": "def merge_two_lists(list1, list2):\n # Your code here\n pass",
113
- "difficulty": "EASY",
114
- "platform": "LEETCODE",
115
- "public_test_cases": [
116
- {"input": "[1,2,4], [1,3,4]", "output": "[1,1,2,3,4,4]", "testtype": "FUNCTIONAL"}
117
- ],
118
- "contest_date": "2023-08-01",
119
- "metadata": {
120
- "tags": ["linked-list", "recursion"],
121
- "constraints": "0 <= list1.length, list2.length <= 50",
122
- },
123
- },
124
- {
125
- "task_id": "lcb_005",
126
- "question_title": "Best Time to Buy and Sell Stock",
127
- "question_content": "You are given an array prices where prices[i] is the price of a given stock on the ith day. Find the maximum profit.",
128
- "starter_code": "def max_profit(prices):\n # Your code here\n pass",
129
- "difficulty": "EASY",
130
- "platform": "LEETCODE",
131
- "public_test_cases": [{"input": "[7,1,5,3,6,4]", "output": "5", "testtype": "FUNCTIONAL"}],
132
- "contest_date": "2023-09-15",
133
- "metadata": {"tags": ["array", "dynamic-programming"], "constraints": "1 <= prices.length <= 10^5"},
134
- },
135
- ]
136
-
137
- # Generate limited sample data for fallback
138
- if limit:
139
- base_problems = base_problems[:limit]
140
-
141
- # Add version-specific metadata
142
- for problem in base_problems:
143
- problem["release_version"] = self._release_version
144
-
145
- return base_problems
58
+ # Convert to dictionary format
59
+ return [problem.to_dict() for problem in problems]
146
60
 
147
61
  def get_extractor(self):
148
62
  """Get the LiveCodeBench extractor."""
@@ -186,16 +100,3 @@ class LiveCodeBenchTask(TaskInterface):
186
100
  question = doc.get("question_content", "")
187
101
  starter = doc.get("starter_code", "")
188
102
  return f"{question}\n\n{starter}"
189
-
190
-
191
- # TODO: In a real implementation, this would integrate with the actual LiveCodeBench library
192
- # Example integration:
193
- # from livecodebench import LiveCodeBench
194
- #
195
- # class LiveCodeBenchTask(TaskInterface):
196
- # def __init__(self):
197
- # self._lcb = LiveCodeBench()
198
- # # self._extractor = LiveCodeBenchExtractor() # Not needed with model outputs approach
199
- #
200
- # def load_data(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
201
- # return self._lcb.load_problems(limit=limit)