wisent 0.5.12__py3-none-any.whl → 0.5.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wisent might be problematic. Click here for more details.
- wisent/__init__.py +1 -1
- wisent/core/activations/__init__.py +26 -0
- wisent/core/activations/activations.py +96 -0
- wisent/core/activations/activations_collector.py +71 -20
- wisent/core/activations/prompt_construction_strategy.py +47 -0
- wisent/core/agent/__init__.py +1 -18
- wisent/core/agent/budget.py +2 -2
- wisent/core/agent/device_benchmarks.py +1 -1
- wisent/core/agent/diagnose/__init__.py +1 -55
- wisent/core/agent/diagnose/classifier_marketplace.py +8 -8
- wisent/core/agent/diagnose/response_diagnostics.py +4 -4
- wisent/core/agent/diagnose/synthetic_classifier_option.py +1 -1
- wisent/core/agent/diagnose/tasks/task_manager.py +3 -3
- wisent/core/agent/diagnose.py +2 -1
- wisent/core/autonomous_agent.py +10 -2
- wisent/core/benchmark_extractors.py +293 -0
- wisent/core/bigcode_integration.py +20 -7
- wisent/core/branding.py +108 -0
- wisent/core/cli/__init__.py +15 -0
- wisent/core/cli/create_steering_vector.py +138 -0
- wisent/core/cli/evaluate_responses.py +715 -0
- wisent/core/cli/generate_pairs.py +128 -0
- wisent/core/cli/generate_pairs_from_task.py +119 -0
- wisent/core/cli/generate_responses.py +129 -0
- wisent/core/cli/generate_vector_from_synthetic.py +149 -0
- wisent/core/cli/generate_vector_from_task.py +147 -0
- wisent/core/cli/get_activations.py +191 -0
- wisent/core/cli/optimize_classification.py +339 -0
- wisent/core/cli/optimize_steering.py +364 -0
- wisent/core/cli/tasks.py +182 -0
- wisent/core/cli_logger.py +22 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +27 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +49 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_challenge.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_easy.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arithmetic.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/asdiv.py +119 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cb.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copa.py +118 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coqa.py +146 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/drop.py +129 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gsm8k.py +119 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/headqa.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hellaswag.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/livecodebench.py +367 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa2.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mc-taco.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medqa.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mrpc.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multirc.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mutual.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/openbookqa.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pawsx.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/piqa.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prost.py +113 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pubmedqa.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qa4mre.py +116 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnli.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qqp.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/race.py +121 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/record.py +121 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/rte.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sciq.py +110 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/social_iqa.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/squad2.py +124 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sst2.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swag.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/triviaqa.py +127 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_gen.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc1.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc2.py +117 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/webqs.py +127 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wic.py +119 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +1 -1
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wnli.py +111 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xnli.py +112 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xstorycloze.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xwinograd.py +114 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +1 -1
- wisent/core/data_loaders/__init__.py +235 -0
- wisent/core/data_loaders/loaders/lm_loader.py +2 -2
- wisent/core/data_loaders/loaders/task_interface_loader.py +300 -0
- wisent/{cli/data_loaders/data_loader_rotator.py → core/data_loaders/rotator.py} +1 -1
- wisent/core/download_full_benchmarks.py +79 -2
- wisent/core/evaluators/benchmark_specific/__init__.py +26 -0
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/evaluator.py +17 -17
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/cpp_sanitizer.py +2 -2
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/java_sanitizer.py +2 -2
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/python_sanitizer.py +2 -2
- wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/__init__.py +3 -0
- wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/provider.py +305 -0
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/core/runtime.py +36 -4
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/entrypoint.py +2 -4
- wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/recipes.py +1 -1
- wisent/core/evaluators/benchmark_specific/coding/solution_generator.py +258 -0
- wisent/core/evaluators/benchmark_specific/exact_match_evaluator.py +79 -0
- wisent/core/evaluators/benchmark_specific/f1_evaluator.py +101 -0
- wisent/core/evaluators/benchmark_specific/generation_evaluator.py +197 -0
- wisent/core/{log_likelihoods_evaluator.py → evaluators/benchmark_specific/log_likelihoods_evaluator.py} +10 -2
- wisent/core/evaluators/benchmark_specific/perplexity_evaluator.py +140 -0
- wisent/core/evaluators/benchmark_specific/personalization_evaluator.py +250 -0
- wisent/{cli/evaluators/evaluator_rotator.py → core/evaluators/rotator.py} +4 -4
- wisent/core/lm_eval_harness_ground_truth.py +3 -2
- wisent/core/main.py +57 -0
- wisent/core/model_persistence.py +2 -2
- wisent/core/models/wisent_model.py +6 -6
- wisent/core/optuna/classifier/optuna_classifier_optimizer.py +2 -2
- wisent/core/optuna/steering/steering_optimization.py +1 -1
- wisent/core/parser_arguments/__init__.py +10 -0
- wisent/core/parser_arguments/agent_parser.py +110 -0
- wisent/core/parser_arguments/configure_model_parser.py +7 -0
- wisent/core/parser_arguments/create_steering_vector_parser.py +59 -0
- wisent/core/parser_arguments/evaluate_parser.py +40 -0
- wisent/core/parser_arguments/evaluate_responses_parser.py +10 -0
- wisent/core/parser_arguments/full_optimize_parser.py +115 -0
- wisent/core/parser_arguments/generate_pairs_from_task_parser.py +33 -0
- wisent/core/parser_arguments/generate_pairs_parser.py +29 -0
- wisent/core/parser_arguments/generate_responses_parser.py +15 -0
- wisent/core/parser_arguments/generate_vector_from_synthetic_parser.py +127 -0
- wisent/core/parser_arguments/generate_vector_from_task_parser.py +127 -0
- wisent/core/parser_arguments/generate_vector_parser.py +90 -0
- wisent/core/parser_arguments/get_activations_parser.py +90 -0
- wisent/core/parser_arguments/main_parser.py +152 -0
- wisent/core/parser_arguments/model_config_parser.py +59 -0
- wisent/core/parser_arguments/monitor_parser.py +17 -0
- wisent/core/parser_arguments/multi_steer_parser.py +47 -0
- wisent/core/parser_arguments/optimize_classification_parser.py +67 -0
- wisent/core/parser_arguments/optimize_sample_size_parser.py +58 -0
- wisent/core/parser_arguments/optimize_steering_parser.py +147 -0
- wisent/core/parser_arguments/synthetic_parser.py +93 -0
- wisent/core/parser_arguments/tasks_parser.py +584 -0
- wisent/core/parser_arguments/test_nonsense_parser.py +26 -0
- wisent/core/parser_arguments/utils.py +111 -0
- wisent/core/prompts/core/prompt_formater.py +3 -3
- wisent/core/prompts/prompt_stratiegies/direct_completion.py +2 -0
- wisent/core/prompts/prompt_stratiegies/instruction_following.py +2 -0
- wisent/core/prompts/prompt_stratiegies/multiple_choice.py +2 -0
- wisent/core/prompts/prompt_stratiegies/role_playing.py +2 -0
- wisent/{cli/steering_methods/steering_rotator.py → core/steering_methods/rotator.py} +4 -4
- wisent/core/steering_optimizer.py +45 -21
- wisent/{synthetic → core/synthetic}/cleaners/deduper_cleaner.py +3 -3
- wisent/{synthetic → core/synthetic}/cleaners/methods/base_dedupers.py +2 -2
- wisent/{synthetic → core/synthetic}/cleaners/methods/base_refusalers.py +1 -1
- wisent/{synthetic → core/synthetic}/cleaners/pairs_cleaner.py +5 -5
- wisent/{synthetic → core/synthetic}/cleaners/refusaler_cleaner.py +4 -4
- wisent/{synthetic → core/synthetic}/db_instructions/mini_dp.py +1 -1
- wisent/{synthetic → core/synthetic}/generators/diversities/methods/fast_diversity.py +1 -1
- wisent/{synthetic → core/synthetic}/generators/pairs_generator.py +38 -12
- wisent/core/tasks/livecodebench_task.py +4 -103
- wisent/core/timing_calibration.py +1 -1
- {wisent-0.5.12.dist-info → wisent-0.5.14.dist-info}/METADATA +3 -3
- wisent-0.5.14.dist-info/RECORD +294 -0
- wisent-0.5.14.dist-info/entry_points.txt +2 -0
- wisent/benchmarks/coding/providers/livecodebench/provider.py +0 -53
- wisent/classifiers/core/atoms.py +0 -747
- wisent/classifiers/models/logistic.py +0 -29
- wisent/classifiers/models/mlp.py +0 -47
- wisent/cli/classifiers/classifier_rotator.py +0 -137
- wisent/cli/cli_logger.py +0 -142
- wisent/cli/wisent_cli/commands/help_cmd.py +0 -52
- wisent/cli/wisent_cli/commands/listing.py +0 -154
- wisent/cli/wisent_cli/commands/train_cmd.py +0 -322
- wisent/cli/wisent_cli/main.py +0 -93
- wisent/cli/wisent_cli/shell.py +0 -80
- wisent/cli/wisent_cli/ui.py +0 -69
- wisent/cli/wisent_cli/util/aggregations.py +0 -43
- wisent/cli/wisent_cli/util/parsing.py +0 -126
- wisent/cli/wisent_cli/version.py +0 -4
- wisent/opti/methods/__init__.py +0 -0
- wisent/synthetic/__init__.py +0 -0
- wisent/synthetic/cleaners/__init__.py +0 -0
- wisent/synthetic/cleaners/core/__init__.py +0 -0
- wisent/synthetic/cleaners/methods/__init__.py +0 -0
- wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
- wisent/synthetic/db_instructions/__init__.py +0 -0
- wisent/synthetic/db_instructions/core/__init__.py +0 -0
- wisent/synthetic/generators/__init__.py +0 -0
- wisent/synthetic/generators/core/__init__.py +0 -0
- wisent/synthetic/generators/diversities/__init__.py +0 -0
- wisent/synthetic/generators/diversities/core/__init__.py +0 -0
- wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
- wisent-0.5.12.dist-info/RECORD +0 -220
- /wisent/{benchmarks → core/evaluators/benchmark_specific/coding}/__init__.py +0 -0
- /wisent/{benchmarks/coding → core/evaluators/benchmark_specific/coding/metrics}/__init__.py +0 -0
- /wisent/{benchmarks/coding/metrics → core/evaluators/benchmark_specific/coding/metrics/core}/__init__.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/core/atoms.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/passk.py +0 -0
- /wisent/{benchmarks/coding/metrics/core → core/evaluators/benchmark_specific/coding/output_sanitizer}/__init__.py +0 -0
- /wisent/{benchmarks/coding/output_sanitizer → core/evaluators/benchmark_specific/coding/output_sanitizer/core}/__init__.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/core/atoms.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/utils.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/providers/__init__.py +0 -0
- /wisent/{benchmarks/coding/output_sanitizer → core/evaluators/benchmark_specific/coding/providers}/core/__init__.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/providers/core/atoms.py +0 -0
- /wisent/{benchmarks/coding/providers/core → core/evaluators/benchmark_specific/coding/safe_docker}/__init__.py +0 -0
- /wisent/{benchmarks/coding/providers/livecodebench → core/evaluators/benchmark_specific/coding/safe_docker/core}/__init__.py +0 -0
- /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/core/atoms.py +0 -0
- /wisent/{benchmarks/coding/safe_docker → core/opti}/__init__.py +0 -0
- /wisent/{benchmarks/coding/safe_docker → core/opti}/core/__init__.py +0 -0
- /wisent/{opti → core/opti}/core/atoms.py +0 -0
- /wisent/{classifiers → core/opti/methods}/__init__.py +0 -0
- /wisent/{opti → core/opti}/methods/opti_classificator.py +0 -0
- /wisent/{opti → core/opti}/methods/opti_steering.py +0 -0
- /wisent/{classifiers/core → core/synthetic}/__init__.py +0 -0
- /wisent/{classifiers/models → core/synthetic/cleaners}/__init__.py +0 -0
- /wisent/{cli → core/synthetic/cleaners/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/cleaners/core/atoms.py +0 -0
- /wisent/{cli/classifiers → core/synthetic/cleaners/methods}/__init__.py +0 -0
- /wisent/{cli/data_loaders → core/synthetic/cleaners/methods/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/cleaners/methods/core/atoms.py +0 -0
- /wisent/{cli/evaluators → core/synthetic/db_instructions}/__init__.py +0 -0
- /wisent/{cli/steering_methods → core/synthetic/db_instructions/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/db_instructions/core/atoms.py +0 -0
- /wisent/{cli/wisent_cli → core/synthetic/generators}/__init__.py +0 -0
- /wisent/{cli/wisent_cli/commands → core/synthetic/generators/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/generators/core/atoms.py +0 -0
- /wisent/{cli/wisent_cli/util → core/synthetic/generators/diversities}/__init__.py +0 -0
- /wisent/{opti → core/synthetic/generators/diversities/core}/__init__.py +0 -0
- /wisent/{synthetic → core/synthetic}/generators/diversities/core/core.py +0 -0
- /wisent/{opti/core → core/synthetic/generators/diversities/methods}/__init__.py +0 -0
- {wisent-0.5.12.dist-info → wisent-0.5.14.dist-info}/WHEEL +0 -0
- {wisent-0.5.12.dist-info → wisent-0.5.14.dist-info}/licenses/LICENSE +0 -0
- {wisent-0.5.12.dist-info → wisent-0.5.14.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for parser arguments.
|
|
3
|
+
|
|
4
|
+
Shared helper functions used across multiple command parsers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def parse_layers_from_arg(layer_arg: str, model=None) -> List[int]:
|
|
11
|
+
"""
|
|
12
|
+
Parse layer argument into list of integers.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
layer_arg: String like "15", "14-16", "14,15,16", or "-1" (for auto-optimization)
|
|
16
|
+
model: Model object (needed for determining available layers)
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
List of layer indices
|
|
20
|
+
"""
|
|
21
|
+
# Handle special cases
|
|
22
|
+
if layer_arg == "-1":
|
|
23
|
+
# Signal for auto-optimization - return single layer list
|
|
24
|
+
return [-1]
|
|
25
|
+
|
|
26
|
+
# Use existing parse_layer_range logic
|
|
27
|
+
layers = parse_layer_range(layer_arg, model)
|
|
28
|
+
if layers is None:
|
|
29
|
+
# "all" case - auto-detect model layers
|
|
30
|
+
if model is not None:
|
|
31
|
+
from wisent.core.hyperparameter_optimizer import detect_model_layers
|
|
32
|
+
|
|
33
|
+
total_layers = detect_model_layers(model)
|
|
34
|
+
return list(range(total_layers))
|
|
35
|
+
# If no model provided, we cannot determine layers - this should not happen
|
|
36
|
+
raise ValueError("Cannot determine layer range without model instance")
|
|
37
|
+
|
|
38
|
+
return layers
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def parse_layer_range(layer_range_str: str, model=None) -> Optional[List[int]]:
|
|
42
|
+
"""
|
|
43
|
+
Parse layer range string into list of integers.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
layer_range_str: String like "8-24", "10,15,20", or "all"
|
|
47
|
+
model: Model object (needed for "all" option)
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
List of layer indices, or None if "all" (will be auto-detected later)
|
|
51
|
+
"""
|
|
52
|
+
if layer_range_str.lower() == "all":
|
|
53
|
+
# Return None to signal auto-detection
|
|
54
|
+
return None
|
|
55
|
+
if "-" in layer_range_str:
|
|
56
|
+
# Range format: "8-24"
|
|
57
|
+
start, end = map(int, layer_range_str.split("-"))
|
|
58
|
+
return list(range(start, end + 1))
|
|
59
|
+
if "," in layer_range_str:
|
|
60
|
+
# Comma-separated format: "10,15,20"
|
|
61
|
+
return [int(x.strip()) for x in layer_range_str.split(",")]
|
|
62
|
+
# Single layer
|
|
63
|
+
return [int(layer_range_str)]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def aggregate_token_scores(token_scores: List[float], method: str) -> float:
|
|
67
|
+
"""
|
|
68
|
+
Aggregate token scores using the specified method.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
token_scores: List of token scores (probabilities)
|
|
72
|
+
method: Aggregation method ("average", "final", "first", "max", "min")
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
Aggregated score
|
|
76
|
+
"""
|
|
77
|
+
if not token_scores:
|
|
78
|
+
return 0.5
|
|
79
|
+
|
|
80
|
+
# Convert any tensor values to floats and filter out None values
|
|
81
|
+
clean_scores = []
|
|
82
|
+
for i, score in enumerate(token_scores):
|
|
83
|
+
if score is None:
|
|
84
|
+
raise ValueError(
|
|
85
|
+
f"Token score at index {i} is None! This indicates a bug in the classifier output handling."
|
|
86
|
+
)
|
|
87
|
+
if hasattr(score, "item"): # Handle tensors
|
|
88
|
+
raise ValueError(
|
|
89
|
+
f"Token score at index {i} is a tensor ({type(score)})! Expected float but got tensor: {score}"
|
|
90
|
+
)
|
|
91
|
+
if not isinstance(score, (int, float)):
|
|
92
|
+
raise ValueError(
|
|
93
|
+
f"Token score at index {i} has invalid type: {type(score)}. Expected float but got {type(score).__name__}: {score}"
|
|
94
|
+
)
|
|
95
|
+
clean_scores.append(float(score))
|
|
96
|
+
|
|
97
|
+
if not clean_scores:
|
|
98
|
+
return 0.5
|
|
99
|
+
|
|
100
|
+
if method == "average":
|
|
101
|
+
return sum(clean_scores) / len(clean_scores)
|
|
102
|
+
if method == "final":
|
|
103
|
+
return clean_scores[-1]
|
|
104
|
+
if method == "first":
|
|
105
|
+
return clean_scores[0]
|
|
106
|
+
if method == "max":
|
|
107
|
+
return max(clean_scores)
|
|
108
|
+
if method == "min":
|
|
109
|
+
return min(clean_scores)
|
|
110
|
+
# Default to average if unknown method
|
|
111
|
+
return sum(clean_scores) / len(clean_scores)
|
|
@@ -110,14 +110,14 @@ class PromptFormatter:
|
|
|
110
110
|
RuntimeError: If the 'strategies' package is not found or no strategies are discovered.
|
|
111
111
|
"""
|
|
112
112
|
try:
|
|
113
|
-
import
|
|
113
|
+
import wisent.core.prompts.prompt_stratiegies as strategies_pkg
|
|
114
114
|
except ModuleNotFoundError as exc:
|
|
115
115
|
raise RuntimeError(
|
|
116
116
|
"The 'strategies' package was not found. "
|
|
117
117
|
"Create a 'strategies' directory with an empty __init__.py."
|
|
118
118
|
) from exc
|
|
119
119
|
|
|
120
|
-
import
|
|
120
|
+
import wisent.core.prompts.prompt_stratiegies as strategies_pkg
|
|
121
121
|
|
|
122
122
|
for module_info in pkgutil.iter_modules(strategies_pkg.__path__):
|
|
123
123
|
name = module_info.name
|
|
@@ -125,7 +125,7 @@ class PromptFormatter:
|
|
|
125
125
|
# Skip private/dunder modules.
|
|
126
126
|
continue
|
|
127
127
|
|
|
128
|
-
module = importlib.import_module(f"
|
|
128
|
+
module = importlib.import_module(f"wisent.core.prompts.prompt_stratiegies.{name}")
|
|
129
129
|
self._register_strategies_from_module(module)
|
|
130
130
|
|
|
131
131
|
if not self._registry:
|
|
@@ -25,7 +25,7 @@ class SteeringMethodRotator:
|
|
|
25
25
|
def __init__(
|
|
26
26
|
self,
|
|
27
27
|
method: str | BaseSteeringMethod | Type[BaseSteeringMethod] | None = None,
|
|
28
|
-
methods_location: str | Path = "
|
|
28
|
+
methods_location: str | Path = "wisent.core.steering_methods.methods",
|
|
29
29
|
autoload: bool = True,
|
|
30
30
|
**default_method_kwargs: Any,
|
|
31
31
|
) -> None:
|
|
@@ -44,7 +44,7 @@ class SteeringMethodRotator:
|
|
|
44
44
|
spec = importlib.util.spec_from_file_location(mod_name, py)
|
|
45
45
|
if spec and spec.loader:
|
|
46
46
|
module = importlib.util.module_from_spec(spec)
|
|
47
|
-
spec.loader.exec_module(module)
|
|
47
|
+
spec.loader.exec_module(module)
|
|
48
48
|
return
|
|
49
49
|
|
|
50
50
|
if not isinstance(location, str):
|
|
@@ -91,7 +91,7 @@ class SteeringMethodRotator:
|
|
|
91
91
|
if isinstance(method, str):
|
|
92
92
|
return BaseSteeringMethod.get(method)(**kwargs)
|
|
93
93
|
raise TypeError("method must be None, str name, BaseSteeringMethod instance, or subclass.")
|
|
94
|
-
|
|
94
|
+
|
|
95
95
|
def use(self, method: str | BaseSteeringMethod | Type[BaseSteeringMethod], **kwargs: Any) -> None:
|
|
96
96
|
self._method = self._resolve_method(method, **kwargs)
|
|
97
97
|
|
|
@@ -107,4 +107,4 @@ if __name__ == "__main__":
|
|
|
107
107
|
rot = SteeringMethodRotator()
|
|
108
108
|
print("Available steering methods:")
|
|
109
109
|
for m in rot.list_methods():
|
|
110
|
-
print(f" - {m['name']}: {m['description']} ({m['class']})")
|
|
110
|
+
print(f" - {m['name']}: {m['description']} ({m['class']})")
|
|
@@ -392,19 +392,21 @@ class SteeringOptimizer:
|
|
|
392
392
|
|
|
393
393
|
if layer_search_range is None:
|
|
394
394
|
# Default: search around classification layer if available
|
|
395
|
-
if self.base_classification_layer:
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
395
|
+
if not self.base_classification_layer:
|
|
396
|
+
raise ValueError(
|
|
397
|
+
"Layer optimization requires either layer_search_range parameter or "
|
|
398
|
+
"base_classification_layer to be set. Please provide a layer_search_range "
|
|
399
|
+
"or initialize SteeringOptimizer with a base_classification_layer."
|
|
400
|
+
)
|
|
401
|
+
min_layer = max(1, self.base_classification_layer - 3)
|
|
402
|
+
max_layer = self.base_classification_layer + 3
|
|
403
|
+
layer_search_range = (min_layer, max_layer)
|
|
404
|
+
|
|
404
405
|
raise NotImplementedError(
|
|
405
406
|
"Steering layer optimization not yet implemented. "
|
|
406
407
|
"This requires implementing steering vector training and "
|
|
407
|
-
"effectiveness measurement across different layers."
|
|
408
|
+
"effectiveness measurement across different layers. "
|
|
409
|
+
f"Would search layers {layer_search_range}."
|
|
408
410
|
)
|
|
409
411
|
|
|
410
412
|
def optimize_steering_strength(
|
|
@@ -419,7 +421,7 @@ class SteeringOptimizer:
|
|
|
419
421
|
) -> SteeringOptimizationResult:
|
|
420
422
|
"""
|
|
421
423
|
Find optimal steering strength for a specific method, layer, and task.
|
|
422
|
-
|
|
424
|
+
|
|
423
425
|
Args:
|
|
424
426
|
task_name: Task to optimize for
|
|
425
427
|
steering_method: Steering method to use
|
|
@@ -427,16 +429,26 @@ class SteeringOptimizer:
|
|
|
427
429
|
strength_range: (min_strength, max_strength) to search
|
|
428
430
|
strength_steps: Number of strength values to test
|
|
429
431
|
limit: Maximum samples for testing
|
|
430
|
-
|
|
432
|
+
|
|
431
433
|
Returns:
|
|
432
434
|
SteeringOptimizationResult with optimal strength
|
|
433
435
|
"""
|
|
436
|
+
import time
|
|
437
|
+
start_time = time.time()
|
|
438
|
+
|
|
434
439
|
if layer is None:
|
|
435
|
-
|
|
436
|
-
|
|
440
|
+
if not self.base_classification_layer:
|
|
441
|
+
raise ValueError(
|
|
442
|
+
"Steering strength optimization requires a layer to be specified. "
|
|
443
|
+
"Please provide the 'layer' parameter or initialize SteeringOptimizer "
|
|
444
|
+
"with a base_classification_layer."
|
|
445
|
+
)
|
|
446
|
+
layer = self.base_classification_layer
|
|
447
|
+
|
|
437
448
|
if strength_range is None:
|
|
438
|
-
|
|
439
|
-
|
|
449
|
+
# Default strength range is reasonable for most steering methods
|
|
450
|
+
strength_range = (0.1, 2.0)
|
|
451
|
+
|
|
440
452
|
logger.info(f"⚡ Optimizing steering strength for {task_name}")
|
|
441
453
|
logger.info(f" Method: {steering_method.value}, Layer: {layer}")
|
|
442
454
|
logger.info(f" Strength range: {strength_range}, Steps: {strength_steps}")
|
|
@@ -609,7 +621,10 @@ class SteeringOptimizer:
|
|
|
609
621
|
'score': 0.0,
|
|
610
622
|
'error': str(e)
|
|
611
623
|
})
|
|
612
|
-
|
|
624
|
+
|
|
625
|
+
# Calculate optimization time
|
|
626
|
+
optimization_time = time.time() - start_time
|
|
627
|
+
|
|
613
628
|
return SteeringOptimizationResult(
|
|
614
629
|
task_name=task_name,
|
|
615
630
|
best_steering_layer=layer,
|
|
@@ -618,7 +633,7 @@ class SteeringOptimizer:
|
|
|
618
633
|
optimal_parameters={'strength': best_strength},
|
|
619
634
|
steering_effectiveness_score=best_score,
|
|
620
635
|
classification_accuracy_impact=best_score, # Using same score for now
|
|
621
|
-
optimization_time_seconds=
|
|
636
|
+
optimization_time_seconds=optimization_time,
|
|
622
637
|
total_configurations_tested=len(results),
|
|
623
638
|
error_message=None
|
|
624
639
|
)
|
|
@@ -750,10 +765,19 @@ class SteeringOptimizer:
|
|
|
750
765
|
task_overrides = self.classification_config.get("task_specific_overrides", {})
|
|
751
766
|
tasks = list(task_overrides.keys())
|
|
752
767
|
if not tasks:
|
|
753
|
-
|
|
754
|
-
|
|
768
|
+
raise ValueError(
|
|
769
|
+
"No classification-optimized tasks found in classification_config. "
|
|
770
|
+
"Please either:\n"
|
|
771
|
+
" 1. Run classification optimization first to populate task_specific_overrides, or\n"
|
|
772
|
+
" 2. Explicitly provide a list of tasks via the 'tasks' parameter"
|
|
773
|
+
)
|
|
755
774
|
else:
|
|
756
|
-
|
|
775
|
+
raise ValueError(
|
|
776
|
+
"No tasks provided and no classification_config available. "
|
|
777
|
+
"Please either:\n"
|
|
778
|
+
" 1. Provide explicit tasks via the 'tasks' parameter, or\n"
|
|
779
|
+
" 2. Initialize SteeringOptimizer with a classification_config that contains task_specific_overrides"
|
|
780
|
+
)
|
|
757
781
|
|
|
758
782
|
if methods is None:
|
|
759
783
|
methods = [SteeringMethod.CAA, SteeringMethod.HPR] # Start with simpler methods
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from wisent.synthetic.cleaners.core.atoms import CleanStep
|
|
2
|
-
from wisent.synthetic.cleaners.core.atoms import CleanStepStats
|
|
1
|
+
from wisent.core.synthetic.cleaners.core.atoms import CleanStep
|
|
2
|
+
from wisent.core.synthetic.cleaners.core.atoms import CleanStepStats
|
|
3
3
|
|
|
4
4
|
from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
5
|
-
from wisent.synthetic.cleaners.methods.core.atoms import Deduper
|
|
5
|
+
from wisent.core.synthetic.cleaners.methods.core.atoms import Deduper
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
__all__ = [
|
|
@@ -4,7 +4,7 @@ import hashlib
|
|
|
4
4
|
from collections import Counter, defaultdict
|
|
5
5
|
from typing import Mapping, Sequence, Callable
|
|
6
6
|
|
|
7
|
-
from wisent.synthetic.cleaners.methods.core.atoms import Deduper
|
|
7
|
+
from wisent.core.synthetic.cleaners.methods.core.atoms import Deduper
|
|
8
8
|
from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
9
9
|
|
|
10
10
|
__all__ = [
|
|
@@ -279,7 +279,7 @@ class SimHashDeduper(Deduper):
|
|
|
279
279
|
64-bit integer hash
|
|
280
280
|
|
|
281
281
|
example:
|
|
282
|
-
>>> SimHashDeduper()._hash64("
|
|
282
|
+
>>> SimHashDeduper()._hash64("wisent")
|
|
283
283
|
TODO: actual value"
|
|
284
284
|
"""
|
|
285
285
|
h = hashlib.blake2b(s.encode("utf-8"), digest_size=8)
|
|
@@ -2,8 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from typing import Iterable, TYPE_CHECKING
|
|
4
4
|
|
|
5
|
-
from wisent.synthetic.cleaners.core.atoms import CleanStep, Cleaner
|
|
6
|
-
from wisent.synthetic.cleaners.core.atoms import CleanerStats
|
|
5
|
+
from wisent.core.synthetic.cleaners.core.atoms import CleanStep, Cleaner
|
|
6
|
+
from wisent.core.synthetic.cleaners.core.atoms import CleanerStats
|
|
7
7
|
from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
8
8
|
|
|
9
9
|
__all__ = [
|
|
@@ -39,9 +39,9 @@ class PairsCleaner(Cleaner):
|
|
|
39
39
|
>>> from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
40
40
|
>>> from wisent.core.contrastive_pairs.core.pair import ContrastivePair
|
|
41
41
|
>>> from wisent.core.contrastive_pairs.core.response import PositiveResponse, NegativeResponse
|
|
42
|
-
>>> from wisent.synthetic.cleaners.methods.base_refusalers import BasesRefusaler
|
|
43
|
-
>>> from wisent.synthetic.cleaners.methods.base_dedupers import SimHashDeduper
|
|
44
|
-
>>> from wisent.synthetic.cleaners.cleaners import PairsCleaner
|
|
42
|
+
>>> from wisent.core.synthetic.cleaners.methods.base_refusalers import BasesRefusaler
|
|
43
|
+
>>> from wisent.core.synthetic.cleaners.methods.base_dedupers import SimHashDeduper
|
|
44
|
+
>>> from wisent.core.synthetic.cleaners.cleaners import PairsCleaner
|
|
45
45
|
>>> from wisent.core.models.wisent_model import WisentModel
|
|
46
46
|
>>> refusal = BasesRefusaler()
|
|
47
47
|
>>> deduper = SimHashDeduper()
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
|
|
2
|
-
from wisent.synthetic.cleaners.core.atoms import CleanStep
|
|
2
|
+
from wisent.core.synthetic.cleaners.core.atoms import CleanStep
|
|
3
3
|
from wisent.core.contrastive_pairs.core.pair import ContrastivePair
|
|
4
|
-
from wisent.synthetic.cleaners.core.atoms import CleanStepStats
|
|
4
|
+
from wisent.core.synthetic.cleaners.core.atoms import CleanStepStats
|
|
5
5
|
|
|
6
|
-
from wisent.synthetic.cleaners.methods.core.atoms import Refusaler
|
|
6
|
+
from wisent.core.synthetic.cleaners.methods.core.atoms import Refusaler
|
|
7
7
|
from wisent.core.models.wisent_model import WisentModel
|
|
8
8
|
from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
9
9
|
from wisent.core.contrastive_pairs.core.response import PositiveResponse, NegativeResponse
|
|
@@ -59,7 +59,7 @@ class RefusalerCleaner(CleanStep):
|
|
|
59
59
|
>>> from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
60
60
|
>>> from wisent.core.contrastive_pairs.core.pair import ContrastivePair
|
|
61
61
|
>>> from wisent.core.contrastive_pairs.core.response import PositiveResponse, NegativeResponse
|
|
62
|
-
>>> from wisent.synthetic.cleaners.methods.base_refusalers import SimpleRefusaler
|
|
62
|
+
>>> from wisent.core.synthetic.cleaners.methods.base_refusalers import SimpleRefusaler
|
|
63
63
|
>>> from wisent.core.models.wisent_model import WisentModel
|
|
64
64
|
>>> refusal = SimpleRefusaler()
|
|
65
65
|
>>> model = WisentModel(...)
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
from typing import Iterable
|
|
3
3
|
import re
|
|
4
4
|
import numpy as np
|
|
5
|
-
from wisent.synthetic.generators.diversities.core.core import Diversity, DiversityScores
|
|
5
|
+
from wisent.core.synthetic.generators.diversities.core.core import Diversity, DiversityScores
|
|
6
6
|
|
|
7
7
|
__all__ = [
|
|
8
8
|
"FastDiversity",
|
|
@@ -3,18 +3,18 @@ from __future__ import annotations
|
|
|
3
3
|
import logging
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
from wisent.core.contrastive_pairs.core.pair import ContrastivePair
|
|
7
|
-
from wisent.core.contrastive_pairs.core.response import PositiveResponse, NegativeResponse
|
|
8
|
-
from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
6
|
+
from wisent.core.contrastive_pairs.core.pair import ContrastivePair
|
|
7
|
+
from wisent.core.contrastive_pairs.core.response import PositiveResponse, NegativeResponse
|
|
8
|
+
from wisent.core.contrastive_pairs.core.set import ContrastivePairSet
|
|
9
9
|
|
|
10
10
|
from wisent.core.models.wisent_model import WisentModel
|
|
11
|
-
from wisent.synthetic.db_instructions.core.atoms import DB_Instructions
|
|
11
|
+
from wisent.core.synthetic.db_instructions.core.atoms import DB_Instructions
|
|
12
12
|
|
|
13
|
-
from wisent.synthetic.generators.core.atoms import GenerationReport
|
|
13
|
+
from wisent.core.synthetic.generators.core.atoms import GenerationReport
|
|
14
14
|
|
|
15
|
-
from wisent.synthetic.generators.diversities.core.core import Diversity
|
|
15
|
+
from wisent.core.synthetic.generators.diversities.core.core import Diversity
|
|
16
16
|
|
|
17
|
-
from wisent.synthetic.cleaners.pairs_cleaner import PairsCleaner
|
|
17
|
+
from wisent.core.synthetic.cleaners.pairs_cleaner import PairsCleaner
|
|
18
18
|
|
|
19
19
|
__all__ = [
|
|
20
20
|
"SyntheticContrastivePairsGenerator",
|
|
@@ -80,7 +80,8 @@ class SyntheticContrastivePairsGenerator:
|
|
|
80
80
|
# 3) clean
|
|
81
81
|
cleaned, stats = self.cleaner.clean(parsed)
|
|
82
82
|
|
|
83
|
-
|
|
83
|
+
refusaler_stats = stats.step_stats.get("refusaler_cleaner")
|
|
84
|
+
retries = refusaler_stats.modified_items if refusaler_stats else 0
|
|
84
85
|
|
|
85
86
|
# 4) build domain objects
|
|
86
87
|
cps = ContrastivePairSet(name=self.contrastive_set_name, task_type=self.trait_label)
|
|
@@ -123,25 +124,47 @@ class SyntheticContrastivePairsGenerator:
|
|
|
123
124
|
name=self.contrastive_set_name,
|
|
124
125
|
task_type=self.trait_label,
|
|
125
126
|
)
|
|
126
|
-
|
|
127
|
+
|
|
128
|
+
logger.info(f"[PARSE DEBUG] Received {len(raw)} raw outputs to parse")
|
|
129
|
+
|
|
130
|
+
for idx, r in enumerate(raw):
|
|
131
|
+
logger.info(f"[PARSE DEBUG] Raw output {idx}:\n{r[:500]}") # First 500 chars
|
|
132
|
+
|
|
133
|
+
original_r = r
|
|
127
134
|
#TODO: this is very ugly, need to improve robustness
|
|
128
135
|
# r can have instruction, and i want extacrt everything between ```json and ``` (after - You must return answer in valid JSON format only. Don't include any explanations or additional text.assistant)
|
|
129
136
|
# also try to recover like Expecting ',' delimiter
|
|
130
137
|
if "```json" in r:
|
|
131
138
|
r = r.split("```json")[-1]
|
|
139
|
+
logger.info(f"[PARSE DEBUG] After json block extraction: {r[:200]}")
|
|
132
140
|
if "```" in r:
|
|
133
141
|
r = r.split("```")[0]
|
|
142
|
+
logger.info(f"[PARSE DEBUG] After backtick removal: {r[:200]}")
|
|
134
143
|
r = r.strip()
|
|
144
|
+
|
|
145
|
+
logger.info(f"[PARSE DEBUG] Final cleaned string to parse:\n{r}")
|
|
146
|
+
|
|
135
147
|
try:
|
|
136
148
|
data = json.loads(r)
|
|
137
|
-
|
|
149
|
+
logger.info(f"[PARSE DEBUG] Successfully parsed JSON: {data}")
|
|
150
|
+
except json.JSONDecodeError as e:
|
|
151
|
+
logger.warning(f"[PARSE DEBUG] JSON decode failed: {e}")
|
|
138
152
|
# try to recover from common errors
|
|
139
153
|
r = r.replace("'", '"').replace("```", '')
|
|
154
|
+
logger.info(f"[PARSE DEBUG] Attempting recovery with quote replacement: {r[:200]}")
|
|
140
155
|
try:
|
|
141
156
|
data = json.loads(r)
|
|
142
|
-
|
|
157
|
+
logger.info(f"[PARSE DEBUG] Recovery successful: {data}")
|
|
158
|
+
except json.JSONDecodeError as e2:
|
|
159
|
+
logger.error(f"[PARSE DEBUG] Recovery failed: {e2}. Skipping this output.")
|
|
160
|
+
logger.error(f"[PARSE DEBUG] Original raw output was:\n{original_r}")
|
|
143
161
|
continue
|
|
144
|
-
|
|
162
|
+
|
|
163
|
+
pairs_list = data.get("pairs", [])
|
|
164
|
+
logger.info(f"[PARSE DEBUG] Found {len(pairs_list)} pairs in data")
|
|
165
|
+
|
|
166
|
+
for item_idx, item in enumerate(pairs_list):
|
|
167
|
+
logger.info(f"[PARSE DEBUG] Processing pair {item_idx}: {item}")
|
|
145
168
|
cp = ContrastivePair(
|
|
146
169
|
prompt=item["prompt"],
|
|
147
170
|
positive_response=PositiveResponse(model_response=item["positive"]),
|
|
@@ -150,6 +173,9 @@ class SyntheticContrastivePairsGenerator:
|
|
|
150
173
|
trait_description=item.get("trait_description", self.trait_description),
|
|
151
174
|
)
|
|
152
175
|
out.add(cp)
|
|
176
|
+
logger.info(f"[PARSE DEBUG] Successfully added pair {item_idx}")
|
|
177
|
+
|
|
178
|
+
logger.info(f"[PARSE DEBUG] Finished parsing. Total pairs collected: {len(out)}")
|
|
153
179
|
return out
|
|
154
180
|
|
|
155
181
|
@staticmethod
|
|
@@ -52,97 +52,11 @@ class LiveCodeBenchTask(TaskInterface):
|
|
|
52
52
|
|
|
53
53
|
def load_data(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
|
|
54
54
|
"""Load LiveCodeBench data for the specified release version."""
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
problems = self._data_loader.load_problems(release_version=self._release_version, limit=limit)
|
|
58
|
-
|
|
59
|
-
# Convert to dictionary format
|
|
60
|
-
return [problem.to_dict() for problem in problems]
|
|
61
|
-
|
|
62
|
-
except Exception as e:
|
|
63
|
-
# Fallback to sample data if loading fails
|
|
64
|
-
import logging
|
|
55
|
+
# Load real LiveCodeBench data - no fallbacks
|
|
56
|
+
problems = self._data_loader.load_problems(release_version=self._release_version, limit=limit)
|
|
65
57
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def _generate_sample_data_fallback(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
|
|
70
|
-
"""Generate sample data for the specified number of problems."""
|
|
71
|
-
base_problems = [
|
|
72
|
-
{
|
|
73
|
-
"task_id": "lcb_001",
|
|
74
|
-
"question_title": "Two Sum",
|
|
75
|
-
"question_content": "Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.",
|
|
76
|
-
"starter_code": "def two_sum(nums, target):\n # Your code here\n pass",
|
|
77
|
-
"difficulty": "EASY",
|
|
78
|
-
"platform": "LEETCODE",
|
|
79
|
-
"public_test_cases": [{"input": "[2,7,11,15], 9", "output": "[0,1]", "testtype": "FUNCTIONAL"}],
|
|
80
|
-
"contest_date": "2023-05-15",
|
|
81
|
-
"metadata": {"tags": ["array", "hash-table"], "constraints": "2 <= nums.length <= 10^4"},
|
|
82
|
-
},
|
|
83
|
-
{
|
|
84
|
-
"task_id": "lcb_002",
|
|
85
|
-
"question_title": "Valid Parentheses",
|
|
86
|
-
"question_content": "Given a string s containing just the characters '(', ')', '{', '}', '[' and ']', determine if the input string is valid.",
|
|
87
|
-
"starter_code": "def is_valid(s):\n # Your code here\n pass",
|
|
88
|
-
"difficulty": "EASY",
|
|
89
|
-
"platform": "LEETCODE",
|
|
90
|
-
"public_test_cases": [{"input": '"()"', "output": "true", "testtype": "FUNCTIONAL"}],
|
|
91
|
-
"contest_date": "2023-06-01",
|
|
92
|
-
"metadata": {"tags": ["string", "stack"], "constraints": "1 <= s.length <= 10^4"},
|
|
93
|
-
},
|
|
94
|
-
{
|
|
95
|
-
"task_id": "lcb_003",
|
|
96
|
-
"question_title": "Longest Increasing Subsequence",
|
|
97
|
-
"question_content": "Given an integer array nums, return the length of the longest strictly increasing subsequence.",
|
|
98
|
-
"starter_code": "def length_of_lis(nums):\n # Your code here\n pass",
|
|
99
|
-
"difficulty": "MEDIUM",
|
|
100
|
-
"platform": "LEETCODE",
|
|
101
|
-
"public_test_cases": [{"input": "[10,9,2,5,3,7,101,18]", "output": "4", "testtype": "FUNCTIONAL"}],
|
|
102
|
-
"contest_date": "2023-07-10",
|
|
103
|
-
"metadata": {
|
|
104
|
-
"tags": ["array", "binary-search", "dynamic-programming"],
|
|
105
|
-
"constraints": "1 <= nums.length <= 2500",
|
|
106
|
-
},
|
|
107
|
-
},
|
|
108
|
-
{
|
|
109
|
-
"task_id": "lcb_004",
|
|
110
|
-
"question_title": "Merge Two Sorted Lists",
|
|
111
|
-
"question_content": "You are given the heads of two sorted linked lists list1 and list2. Merge the two lists into one sorted list.",
|
|
112
|
-
"starter_code": "def merge_two_lists(list1, list2):\n # Your code here\n pass",
|
|
113
|
-
"difficulty": "EASY",
|
|
114
|
-
"platform": "LEETCODE",
|
|
115
|
-
"public_test_cases": [
|
|
116
|
-
{"input": "[1,2,4], [1,3,4]", "output": "[1,1,2,3,4,4]", "testtype": "FUNCTIONAL"}
|
|
117
|
-
],
|
|
118
|
-
"contest_date": "2023-08-01",
|
|
119
|
-
"metadata": {
|
|
120
|
-
"tags": ["linked-list", "recursion"],
|
|
121
|
-
"constraints": "0 <= list1.length, list2.length <= 50",
|
|
122
|
-
},
|
|
123
|
-
},
|
|
124
|
-
{
|
|
125
|
-
"task_id": "lcb_005",
|
|
126
|
-
"question_title": "Best Time to Buy and Sell Stock",
|
|
127
|
-
"question_content": "You are given an array prices where prices[i] is the price of a given stock on the ith day. Find the maximum profit.",
|
|
128
|
-
"starter_code": "def max_profit(prices):\n # Your code here\n pass",
|
|
129
|
-
"difficulty": "EASY",
|
|
130
|
-
"platform": "LEETCODE",
|
|
131
|
-
"public_test_cases": [{"input": "[7,1,5,3,6,4]", "output": "5", "testtype": "FUNCTIONAL"}],
|
|
132
|
-
"contest_date": "2023-09-15",
|
|
133
|
-
"metadata": {"tags": ["array", "dynamic-programming"], "constraints": "1 <= prices.length <= 10^5"},
|
|
134
|
-
},
|
|
135
|
-
]
|
|
136
|
-
|
|
137
|
-
# Generate limited sample data for fallback
|
|
138
|
-
if limit:
|
|
139
|
-
base_problems = base_problems[:limit]
|
|
140
|
-
|
|
141
|
-
# Add version-specific metadata
|
|
142
|
-
for problem in base_problems:
|
|
143
|
-
problem["release_version"] = self._release_version
|
|
144
|
-
|
|
145
|
-
return base_problems
|
|
58
|
+
# Convert to dictionary format
|
|
59
|
+
return [problem.to_dict() for problem in problems]
|
|
146
60
|
|
|
147
61
|
def get_extractor(self):
|
|
148
62
|
"""Get the LiveCodeBench extractor."""
|
|
@@ -186,16 +100,3 @@ class LiveCodeBenchTask(TaskInterface):
|
|
|
186
100
|
question = doc.get("question_content", "")
|
|
187
101
|
starter = doc.get("starter_code", "")
|
|
188
102
|
return f"{question}\n\n{starter}"
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
# TODO: In a real implementation, this would integrate with the actual LiveCodeBench library
|
|
192
|
-
# Example integration:
|
|
193
|
-
# from livecodebench import LiveCodeBench
|
|
194
|
-
#
|
|
195
|
-
# class LiveCodeBenchTask(TaskInterface):
|
|
196
|
-
# def __init__(self):
|
|
197
|
-
# self._lcb = LiveCodeBench()
|
|
198
|
-
# # self._extractor = LiveCodeBenchExtractor() # Not needed with model outputs approach
|
|
199
|
-
#
|
|
200
|
-
# def load_data(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
|
|
201
|
-
# return self._lcb.load_problems(limit=limit)
|