wisent 0.5.12__py3-none-any.whl → 0.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wisent might be problematic. Click here for more details.

Files changed (225) hide show
  1. wisent/__init__.py +1 -1
  2. wisent/core/activations/__init__.py +26 -0
  3. wisent/core/activations/activations.py +96 -0
  4. wisent/core/activations/activations_collector.py +71 -20
  5. wisent/core/activations/prompt_construction_strategy.py +47 -0
  6. wisent/core/agent/budget.py +2 -2
  7. wisent/core/agent/device_benchmarks.py +1 -1
  8. wisent/core/agent/diagnose/classifier_marketplace.py +8 -8
  9. wisent/core/agent/diagnose/response_diagnostics.py +4 -4
  10. wisent/core/agent/diagnose/synthetic_classifier_option.py +1 -1
  11. wisent/core/agent/diagnose/tasks/task_manager.py +3 -3
  12. wisent/core/agent/diagnose.py +2 -1
  13. wisent/core/autonomous_agent.py +10 -2
  14. wisent/core/benchmark_extractors.py +293 -0
  15. wisent/core/bigcode_integration.py +20 -7
  16. wisent/core/branding.py +108 -0
  17. wisent/core/cli/__init__.py +15 -0
  18. wisent/core/cli/create_steering_vector.py +138 -0
  19. wisent/core/cli/evaluate_responses.py +715 -0
  20. wisent/core/cli/generate_pairs.py +128 -0
  21. wisent/core/cli/generate_pairs_from_task.py +119 -0
  22. wisent/core/cli/generate_responses.py +129 -0
  23. wisent/core/cli/generate_vector_from_synthetic.py +149 -0
  24. wisent/core/cli/generate_vector_from_task.py +147 -0
  25. wisent/core/cli/get_activations.py +191 -0
  26. wisent/core/cli/optimize_classification.py +339 -0
  27. wisent/core/cli/optimize_steering.py +364 -0
  28. wisent/core/cli/tasks.py +182 -0
  29. wisent/core/cli_logger.py +22 -0
  30. wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +27 -1
  31. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +49 -1
  32. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_challenge.py +115 -0
  33. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arc_easy.py +115 -0
  34. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/arithmetic.py +111 -0
  35. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/asdiv.py +119 -0
  36. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/boolq.py +115 -0
  37. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/cb.py +114 -0
  38. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/copa.py +118 -0
  39. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/coqa.py +146 -0
  40. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/drop.py +129 -0
  41. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/gsm8k.py +119 -0
  42. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/headqa.py +112 -0
  43. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/hellaswag.py +113 -0
  44. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/livecodebench.py +367 -0
  45. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa.py +115 -0
  46. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/logiqa2.py +114 -0
  47. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mc-taco.py +113 -0
  48. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/medqa.py +112 -0
  49. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mrpc.py +111 -0
  50. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/multirc.py +114 -0
  51. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/mutual.py +113 -0
  52. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/openbookqa.py +115 -0
  53. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pawsx.py +111 -0
  54. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/piqa.py +113 -0
  55. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/prost.py +113 -0
  56. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/pubmedqa.py +112 -0
  57. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qa4mre.py +116 -0
  58. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qasper.py +115 -0
  59. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qnli.py +111 -0
  60. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/qqp.py +111 -0
  61. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/race.py +121 -0
  62. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/record.py +121 -0
  63. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/rte.py +111 -0
  64. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sciq.py +110 -0
  65. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/social_iqa.py +114 -0
  66. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/squad2.py +124 -0
  67. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/sst2.py +111 -0
  68. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/swag.py +112 -0
  69. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/triviaqa.py +127 -0
  70. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_gen.py +112 -0
  71. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc1.py +117 -0
  72. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/truthfulqa_mc2.py +117 -0
  73. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/webqs.py +127 -0
  74. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wic.py +119 -0
  75. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +1 -1
  76. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wnli.py +111 -0
  77. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/wsc.py +114 -0
  78. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xnli.py +112 -0
  79. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xstorycloze.py +114 -0
  80. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/xwinograd.py +114 -0
  81. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +1 -1
  82. wisent/core/data_loaders/__init__.py +235 -0
  83. wisent/core/data_loaders/loaders/lm_loader.py +2 -2
  84. wisent/core/data_loaders/loaders/task_interface_loader.py +300 -0
  85. wisent/{cli/data_loaders/data_loader_rotator.py → core/data_loaders/rotator.py} +1 -1
  86. wisent/core/download_full_benchmarks.py +79 -2
  87. wisent/core/evaluators/benchmark_specific/__init__.py +26 -0
  88. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/evaluator.py +17 -17
  89. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/cpp_sanitizer.py +2 -2
  90. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/java_sanitizer.py +2 -2
  91. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/python_sanitizer.py +2 -2
  92. wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/__init__.py +3 -0
  93. wisent/core/evaluators/benchmark_specific/coding/providers/livecodebench/provider.py +305 -0
  94. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/core/runtime.py +36 -4
  95. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/entrypoint.py +2 -4
  96. wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/recipes.py +1 -1
  97. wisent/core/evaluators/benchmark_specific/coding/solution_generator.py +258 -0
  98. wisent/core/evaluators/benchmark_specific/exact_match_evaluator.py +79 -0
  99. wisent/core/evaluators/benchmark_specific/f1_evaluator.py +101 -0
  100. wisent/core/evaluators/benchmark_specific/generation_evaluator.py +197 -0
  101. wisent/core/{log_likelihoods_evaluator.py → evaluators/benchmark_specific/log_likelihoods_evaluator.py} +10 -2
  102. wisent/core/evaluators/benchmark_specific/perplexity_evaluator.py +140 -0
  103. wisent/core/evaluators/benchmark_specific/personalization_evaluator.py +250 -0
  104. wisent/{cli/evaluators/evaluator_rotator.py → core/evaluators/rotator.py} +4 -4
  105. wisent/core/lm_eval_harness_ground_truth.py +3 -2
  106. wisent/core/main.py +57 -0
  107. wisent/core/model_persistence.py +2 -2
  108. wisent/core/models/wisent_model.py +6 -6
  109. wisent/core/optuna/classifier/optuna_classifier_optimizer.py +2 -2
  110. wisent/core/optuna/steering/steering_optimization.py +1 -1
  111. wisent/core/parser_arguments/__init__.py +10 -0
  112. wisent/core/parser_arguments/agent_parser.py +110 -0
  113. wisent/core/parser_arguments/configure_model_parser.py +7 -0
  114. wisent/core/parser_arguments/create_steering_vector_parser.py +59 -0
  115. wisent/core/parser_arguments/evaluate_parser.py +40 -0
  116. wisent/core/parser_arguments/evaluate_responses_parser.py +10 -0
  117. wisent/core/parser_arguments/full_optimize_parser.py +115 -0
  118. wisent/core/parser_arguments/generate_pairs_from_task_parser.py +33 -0
  119. wisent/core/parser_arguments/generate_pairs_parser.py +29 -0
  120. wisent/core/parser_arguments/generate_responses_parser.py +15 -0
  121. wisent/core/parser_arguments/generate_vector_from_synthetic_parser.py +127 -0
  122. wisent/core/parser_arguments/generate_vector_from_task_parser.py +127 -0
  123. wisent/core/parser_arguments/generate_vector_parser.py +90 -0
  124. wisent/core/parser_arguments/get_activations_parser.py +90 -0
  125. wisent/core/parser_arguments/main_parser.py +152 -0
  126. wisent/core/parser_arguments/model_config_parser.py +59 -0
  127. wisent/core/parser_arguments/monitor_parser.py +17 -0
  128. wisent/core/parser_arguments/multi_steer_parser.py +47 -0
  129. wisent/core/parser_arguments/optimize_classification_parser.py +67 -0
  130. wisent/core/parser_arguments/optimize_sample_size_parser.py +58 -0
  131. wisent/core/parser_arguments/optimize_steering_parser.py +147 -0
  132. wisent/core/parser_arguments/synthetic_parser.py +93 -0
  133. wisent/core/parser_arguments/tasks_parser.py +584 -0
  134. wisent/core/parser_arguments/test_nonsense_parser.py +26 -0
  135. wisent/core/parser_arguments/utils.py +111 -0
  136. wisent/core/prompts/core/prompt_formater.py +3 -3
  137. wisent/core/prompts/prompt_stratiegies/direct_completion.py +2 -0
  138. wisent/core/prompts/prompt_stratiegies/instruction_following.py +2 -0
  139. wisent/core/prompts/prompt_stratiegies/multiple_choice.py +2 -0
  140. wisent/core/prompts/prompt_stratiegies/role_playing.py +2 -0
  141. wisent/{cli/steering_methods/steering_rotator.py → core/steering_methods/rotator.py} +4 -4
  142. wisent/core/steering_optimizer.py +45 -21
  143. wisent/{synthetic → core/synthetic}/cleaners/deduper_cleaner.py +3 -3
  144. wisent/{synthetic → core/synthetic}/cleaners/methods/base_dedupers.py +2 -2
  145. wisent/{synthetic → core/synthetic}/cleaners/methods/base_refusalers.py +1 -1
  146. wisent/{synthetic → core/synthetic}/cleaners/pairs_cleaner.py +5 -5
  147. wisent/{synthetic → core/synthetic}/cleaners/refusaler_cleaner.py +4 -4
  148. wisent/{synthetic → core/synthetic}/db_instructions/mini_dp.py +1 -1
  149. wisent/{synthetic → core/synthetic}/generators/diversities/methods/fast_diversity.py +1 -1
  150. wisent/{synthetic → core/synthetic}/generators/pairs_generator.py +38 -12
  151. wisent/core/tasks/livecodebench_task.py +4 -103
  152. wisent/core/timing_calibration.py +1 -1
  153. {wisent-0.5.12.dist-info → wisent-0.5.13.dist-info}/METADATA +3 -3
  154. wisent-0.5.13.dist-info/RECORD +294 -0
  155. wisent-0.5.13.dist-info/entry_points.txt +2 -0
  156. wisent/benchmarks/coding/providers/livecodebench/provider.py +0 -53
  157. wisent/classifiers/core/atoms.py +0 -747
  158. wisent/classifiers/models/logistic.py +0 -29
  159. wisent/classifiers/models/mlp.py +0 -47
  160. wisent/cli/classifiers/classifier_rotator.py +0 -137
  161. wisent/cli/cli_logger.py +0 -142
  162. wisent/cli/wisent_cli/commands/help_cmd.py +0 -52
  163. wisent/cli/wisent_cli/commands/listing.py +0 -154
  164. wisent/cli/wisent_cli/commands/train_cmd.py +0 -322
  165. wisent/cli/wisent_cli/main.py +0 -93
  166. wisent/cli/wisent_cli/shell.py +0 -80
  167. wisent/cli/wisent_cli/ui.py +0 -69
  168. wisent/cli/wisent_cli/util/aggregations.py +0 -43
  169. wisent/cli/wisent_cli/util/parsing.py +0 -126
  170. wisent/cli/wisent_cli/version.py +0 -4
  171. wisent/opti/methods/__init__.py +0 -0
  172. wisent/synthetic/__init__.py +0 -0
  173. wisent/synthetic/cleaners/__init__.py +0 -0
  174. wisent/synthetic/cleaners/core/__init__.py +0 -0
  175. wisent/synthetic/cleaners/methods/__init__.py +0 -0
  176. wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
  177. wisent/synthetic/db_instructions/__init__.py +0 -0
  178. wisent/synthetic/db_instructions/core/__init__.py +0 -0
  179. wisent/synthetic/generators/__init__.py +0 -0
  180. wisent/synthetic/generators/core/__init__.py +0 -0
  181. wisent/synthetic/generators/diversities/__init__.py +0 -0
  182. wisent/synthetic/generators/diversities/core/__init__.py +0 -0
  183. wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
  184. wisent-0.5.12.dist-info/RECORD +0 -220
  185. /wisent/{benchmarks → core/evaluators/benchmark_specific/coding}/__init__.py +0 -0
  186. /wisent/{benchmarks/coding → core/evaluators/benchmark_specific/coding/metrics}/__init__.py +0 -0
  187. /wisent/{benchmarks/coding/metrics → core/evaluators/benchmark_specific/coding/metrics/core}/__init__.py +0 -0
  188. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/core/atoms.py +0 -0
  189. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/metrics/passk.py +0 -0
  190. /wisent/{benchmarks/coding/metrics/core → core/evaluators/benchmark_specific/coding/output_sanitizer}/__init__.py +0 -0
  191. /wisent/{benchmarks/coding/output_sanitizer → core/evaluators/benchmark_specific/coding/output_sanitizer/core}/__init__.py +0 -0
  192. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/core/atoms.py +0 -0
  193. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/output_sanitizer/utils.py +0 -0
  194. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/providers/__init__.py +0 -0
  195. /wisent/{benchmarks/coding/output_sanitizer → core/evaluators/benchmark_specific/coding/providers}/core/__init__.py +0 -0
  196. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/providers/core/atoms.py +0 -0
  197. /wisent/{benchmarks/coding/providers/core → core/evaluators/benchmark_specific/coding/safe_docker}/__init__.py +0 -0
  198. /wisent/{benchmarks/coding/providers/livecodebench → core/evaluators/benchmark_specific/coding/safe_docker/core}/__init__.py +0 -0
  199. /wisent/{benchmarks → core/evaluators/benchmark_specific}/coding/safe_docker/core/atoms.py +0 -0
  200. /wisent/{benchmarks/coding/safe_docker → core/opti}/__init__.py +0 -0
  201. /wisent/{benchmarks/coding/safe_docker → core/opti}/core/__init__.py +0 -0
  202. /wisent/{opti → core/opti}/core/atoms.py +0 -0
  203. /wisent/{classifiers → core/opti/methods}/__init__.py +0 -0
  204. /wisent/{opti → core/opti}/methods/opti_classificator.py +0 -0
  205. /wisent/{opti → core/opti}/methods/opti_steering.py +0 -0
  206. /wisent/{classifiers/core → core/synthetic}/__init__.py +0 -0
  207. /wisent/{classifiers/models → core/synthetic/cleaners}/__init__.py +0 -0
  208. /wisent/{cli → core/synthetic/cleaners/core}/__init__.py +0 -0
  209. /wisent/{synthetic → core/synthetic}/cleaners/core/atoms.py +0 -0
  210. /wisent/{cli/classifiers → core/synthetic/cleaners/methods}/__init__.py +0 -0
  211. /wisent/{cli/data_loaders → core/synthetic/cleaners/methods/core}/__init__.py +0 -0
  212. /wisent/{synthetic → core/synthetic}/cleaners/methods/core/atoms.py +0 -0
  213. /wisent/{cli/evaluators → core/synthetic/db_instructions}/__init__.py +0 -0
  214. /wisent/{cli/steering_methods → core/synthetic/db_instructions/core}/__init__.py +0 -0
  215. /wisent/{synthetic → core/synthetic}/db_instructions/core/atoms.py +0 -0
  216. /wisent/{cli/wisent_cli → core/synthetic/generators}/__init__.py +0 -0
  217. /wisent/{cli/wisent_cli/commands → core/synthetic/generators/core}/__init__.py +0 -0
  218. /wisent/{synthetic → core/synthetic}/generators/core/atoms.py +0 -0
  219. /wisent/{cli/wisent_cli/util → core/synthetic/generators/diversities}/__init__.py +0 -0
  220. /wisent/{opti → core/synthetic/generators/diversities/core}/__init__.py +0 -0
  221. /wisent/{synthetic → core/synthetic}/generators/diversities/core/core.py +0 -0
  222. /wisent/{opti/core → core/synthetic/generators/diversities/methods}/__init__.py +0 -0
  223. {wisent-0.5.12.dist-info → wisent-0.5.13.dist-info}/WHEEL +0 -0
  224. {wisent-0.5.12.dist-info → wisent-0.5.13.dist-info}/licenses/LICENSE +0 -0
  225. {wisent-0.5.12.dist-info → wisent-0.5.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,90 @@
1
+ """Parser setup for the 'generate-vector' command."""
2
+
3
+
4
+ def setup_generate_vector_parser(parser):
5
+ """Set up the generate-vector subcommand parser."""
6
+ # Source of contrastive pairs - mutually exclusive for single property
7
+ source_group = parser.add_mutually_exclusive_group(required=False)
8
+ source_group.add_argument(
9
+ "--from-pairs",
10
+ type=str,
11
+ metavar="FILE",
12
+ help="Path to JSON file containing contrastive pairs (single property)",
13
+ )
14
+ source_group.add_argument(
15
+ "--from-description",
16
+ type=str,
17
+ metavar="TRAIT",
18
+ help="Natural language description of the trait (single property)",
19
+ )
20
+
21
+ # Multi-property support
22
+ parser.add_argument("--multi-property", action="store_true", help="Enable multi-property steering (DAC only)")
23
+ parser.add_argument(
24
+ "--property-files",
25
+ type=str,
26
+ nargs="+",
27
+ metavar="NAME:FILE:LAYER",
28
+ help="Property definitions from files (format: property_name:pairs_file:layer)",
29
+ )
30
+ parser.add_argument(
31
+ "--property-descriptions",
32
+ type=str,
33
+ nargs="+",
34
+ metavar="NAME:DESC:LAYER",
35
+ help="Property definitions from descriptions (format: property_name:description:layer)",
36
+ )
37
+
38
+ # Model configuration
39
+ parser.add_argument("--model", type=str, default="distilgpt2", help="Model name or path (default: distilgpt2)")
40
+ parser.add_argument("--device", type=str, default=None, help="Device to run on (default: auto-detect)")
41
+
42
+ # Steering method configuration
43
+ parser.add_argument(
44
+ "--method",
45
+ type=str,
46
+ default="DAC",
47
+ choices=["DAC", "CAA", "HPR", "BiPO", "ControlVectorSteering"],
48
+ help="Steering method to use (default: DAC)",
49
+ )
50
+ parser.add_argument("--layer", type=int, default=0, help="Layer index to apply steering (default: 0)")
51
+
52
+ # Output configuration
53
+ parser.add_argument("--output", type=str, required=True, help="Output path for the generated steering vector")
54
+
55
+ # Pair generation options (only used with --from-description)
56
+ parser.add_argument(
57
+ "--num-pairs",
58
+ type=int,
59
+ default=30,
60
+ help="Number of pairs to generate when using --from-description (default: 30)",
61
+ )
62
+ parser.add_argument(
63
+ "--save-pairs", type=str, default=None, help="Save generated pairs to this file when using --from-description"
64
+ )
65
+
66
+ # Method-specific parameters
67
+ parser.add_argument("--dynamic-control", action="store_true", help="Enable dynamic control for DAC method")
68
+ parser.add_argument(
69
+ "--entropy-threshold", type=float, default=1.0, help="Entropy threshold for DAC method (default: 1.0)"
70
+ )
71
+ parser.add_argument("--beta", type=float, default=1.0, help="Beta parameter for HPR method (default: 1.0)")
72
+
73
+ # Activation extraction configuration
74
+ parser.add_argument(
75
+ "--prompt-construction",
76
+ type=str,
77
+ default="multiple_choice",
78
+ choices=["multiple_choice", "role_playing", "direct_completion", "instruction_following"],
79
+ help="Strategy for constructing prompts from question-answer pairs (default: multiple_choice)",
80
+ )
81
+ parser.add_argument(
82
+ "--token-targeting",
83
+ type=str,
84
+ default="choice_token",
85
+ choices=["choice_token", "continuation_token", "last_token", "first_token", "mean_pooling", "max_pooling"],
86
+ help="Strategy for targeting tokens in activation extraction (default: choice_token)",
87
+ )
88
+
89
+ # General options
90
+ parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
@@ -0,0 +1,90 @@
1
+ """Parser for the get-activations command."""
2
+
3
+ import argparse
4
+
5
+
6
+ def setup_get_activations_parser(parser: argparse.ArgumentParser) -> None:
7
+ """
8
+ Set up the get-activations command parser.
9
+
10
+ This command loads contrastive pairs from a JSON file, collects activations
11
+ from specified model layers, and saves the enriched pairs back to disk.
12
+ """
13
+ # Input/Output
14
+ parser.add_argument(
15
+ "pairs_file",
16
+ type=str,
17
+ help="Path to JSON file containing contrastive pairs"
18
+ )
19
+ parser.add_argument(
20
+ "--output",
21
+ type=str,
22
+ required=True,
23
+ help="Output file path for pairs with activations (JSON)"
24
+ )
25
+
26
+ # Model configuration
27
+ parser.add_argument(
28
+ "--model",
29
+ type=str,
30
+ default="meta-llama/Llama-3.2-1B-Instruct",
31
+ help="Model identifier (e.g., 'meta-llama/Llama-3.2-1B-Instruct')"
32
+ )
33
+ parser.add_argument(
34
+ "--device",
35
+ type=str,
36
+ default="cuda",
37
+ help="Device to run on (cuda, cpu, mps)"
38
+ )
39
+
40
+ # Layer selection
41
+ parser.add_argument(
42
+ "--layers",
43
+ type=str,
44
+ default=None,
45
+ help="Comma-separated layer indices (e.g., '8,12,15') or 'all' for all layers"
46
+ )
47
+
48
+ # Token aggregation
49
+ parser.add_argument(
50
+ "--token-aggregation",
51
+ type=str,
52
+ choices=["average", "final", "first", "max", "min"],
53
+ default="average",
54
+ help="How to aggregate token activations"
55
+ )
56
+
57
+ # Prompt construction strategy
58
+ parser.add_argument(
59
+ "--prompt-strategy",
60
+ type=str,
61
+ choices=["chat_template", "direct_completion", "instruction_following", "multiple_choice", "role_playing"],
62
+ default="chat_template",
63
+ help="Prompt construction strategy (default: chat_template)"
64
+ )
65
+
66
+ # Processing options
67
+ parser.add_argument(
68
+ "--batch-size",
69
+ type=int,
70
+ default=1,
71
+ help="Batch size for processing (default: 1)"
72
+ )
73
+ parser.add_argument(
74
+ "--limit",
75
+ type=int,
76
+ default=None,
77
+ help="Maximum number of pairs to process"
78
+ )
79
+
80
+ # Display options
81
+ parser.add_argument(
82
+ "--verbose",
83
+ action="store_true",
84
+ help="Enable verbose output"
85
+ )
86
+ parser.add_argument(
87
+ "--timing",
88
+ action="store_true",
89
+ help="Show timing information"
90
+ )
@@ -0,0 +1,152 @@
1
+ """
2
+ Main parser setup for Wisent CLI.
3
+
4
+ This module imports and combines all command-specific parsers into a single
5
+ argparse parser for the Wisent CLI.
6
+ """
7
+
8
+ import argparse
9
+
10
+ from wisent.core.parser_arguments.tasks_parser import setup_tasks_parser
11
+ from wisent.core.parser_arguments.generate_pairs_parser import setup_generate_pairs_parser
12
+ from wisent.core.parser_arguments.generate_pairs_from_task_parser import setup_generate_pairs_from_task_parser
13
+ from wisent.core.parser_arguments.get_activations_parser import setup_get_activations_parser
14
+ from wisent.core.parser_arguments.create_steering_vector_parser import setup_create_steering_vector_parser
15
+ from wisent.core.parser_arguments.generate_vector_from_task_parser import setup_generate_vector_from_task_parser
16
+ from wisent.core.parser_arguments.generate_vector_from_synthetic_parser import setup_generate_vector_from_synthetic_parser
17
+ from wisent.core.parser_arguments.synthetic_parser import setup_synthetic_parser
18
+ from wisent.core.parser_arguments.test_nonsense_parser import setup_test_nonsense_parser
19
+ from wisent.core.parser_arguments.monitor_parser import setup_monitor_parser
20
+ from wisent.core.parser_arguments.agent_parser import setup_agent_parser
21
+ from wisent.core.parser_arguments.model_config_parser import setup_model_config_parser
22
+ from wisent.core.parser_arguments.configure_model_parser import setup_configure_model_parser
23
+ from wisent.core.parser_arguments.optimize_classification_parser import setup_classification_optimizer_parser
24
+ from wisent.core.parser_arguments.optimize_steering_parser import setup_steering_optimizer_parser
25
+ from wisent.core.parser_arguments.optimize_sample_size_parser import setup_sample_size_optimizer_parser
26
+ from wisent.core.parser_arguments.full_optimize_parser import setup_full_optimizer_parser
27
+ from wisent.core.parser_arguments.generate_vector_parser import setup_generate_vector_parser
28
+ from wisent.core.parser_arguments.multi_steer_parser import setup_multi_steer_parser
29
+ from wisent.core.parser_arguments.evaluate_parser import setup_evaluate_parser
30
+ from wisent.core.parser_arguments.generate_responses_parser import setup_generate_responses_parser
31
+ from wisent.core.parser_arguments.evaluate_responses_parser import setup_evaluate_responses_parser
32
+
33
+
34
+ def setup_parser() -> argparse.ArgumentParser:
35
+ """Set up the main CLI parser with subcommands."""
36
+ parser = argparse.ArgumentParser(description="Wisent-Guard: Advanced AI Safety and Alignment Toolkit")
37
+
38
+ # Global arguments
39
+ parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
40
+
41
+ # Create subparsers for different commands
42
+ subparsers = parser.add_subparsers(dest="command", help="Available commands")
43
+
44
+ # Tasks command (main evaluation pipeline)
45
+ tasks_parser = subparsers.add_parser("tasks", help="Run evaluation tasks")
46
+ setup_tasks_parser(tasks_parser)
47
+
48
+ # Generate pairs command
49
+ generate_parser = subparsers.add_parser("generate-pairs", help="Generate synthetic contrastive pairs")
50
+ setup_generate_pairs_parser(generate_parser)
51
+
52
+ # Generate pairs from task command
53
+ generate_from_task_parser = subparsers.add_parser("generate-pairs-from-task", help="Generate contrastive pairs from lm-eval task")
54
+
55
+ # Get activations command
56
+ get_activations_parser = subparsers.add_parser("get-activations", help="Collect activations from contrastive pairs")
57
+ setup_get_activations_parser(get_activations_parser)
58
+ setup_generate_pairs_from_task_parser(generate_from_task_parser)
59
+
60
+ # Create steering vector command
61
+ create_steering_parser = subparsers.add_parser("create-steering-vector", help="Create steering vectors from enriched pairs")
62
+ setup_create_steering_vector_parser(create_steering_parser)
63
+
64
+ # Generate vector from task command (full pipeline)
65
+ generate_vector_from_task_parser = subparsers.add_parser("generate-vector-from-task", help="Generate steering vector from task (full pipeline)")
66
+ setup_generate_vector_from_task_parser(generate_vector_from_task_parser)
67
+
68
+ # Generate vector from synthetic command (full pipeline)
69
+ generate_vector_from_synthetic_parser = subparsers.add_parser("generate-vector-from-synthetic", help="Generate steering vector from synthetic pairs (full pipeline)")
70
+ setup_generate_vector_from_synthetic_parser(generate_vector_from_synthetic_parser)
71
+
72
+ # Synthetic command (generate + train + test)
73
+ synthetic_parser = subparsers.add_parser("synthetic", help="Run synthetic contrastive pair pipeline")
74
+ setup_synthetic_parser(synthetic_parser)
75
+
76
+ # Test nonsense detection command
77
+ test_nonsense_parser = subparsers.add_parser("test-nonsense", help="Test nonsense detection system")
78
+ setup_test_nonsense_parser(test_nonsense_parser)
79
+
80
+ # Monitor command for performance monitoring
81
+ monitor_parser = subparsers.add_parser("monitor", help="Performance monitoring and system information")
82
+ setup_monitor_parser(monitor_parser)
83
+
84
+ # Agent command for autonomous agent interaction
85
+ agent_parser = subparsers.add_parser("agent", help="Interact with autonomous agent")
86
+ setup_agent_parser(agent_parser)
87
+
88
+ # Model configuration command for managing optimal parameters
89
+ model_config_parser = subparsers.add_parser("model-config", help="Manage model-specific optimal parameters")
90
+ setup_model_config_parser(model_config_parser)
91
+
92
+ # Configure model command for setting up new/unsupported models
93
+ configure_model_parser = subparsers.add_parser(
94
+ "configure-model", help="Configure tokens and layer access for unsupported models"
95
+ )
96
+ setup_configure_model_parser(configure_model_parser)
97
+
98
+ # Classification optimization command for finding optimal classification parameters
99
+ classification_optimizer_parser = subparsers.add_parser(
100
+ "optimize-classification", help="Optimize classification parameters across all tasks"
101
+ )
102
+ setup_classification_optimizer_parser(classification_optimizer_parser)
103
+
104
+ # Steering optimization command for finding optimal steering parameters
105
+ steering_optimizer_parser = subparsers.add_parser(
106
+ "optimize-steering", help="Optimize steering parameters for different methods"
107
+ )
108
+ setup_steering_optimizer_parser(steering_optimizer_parser)
109
+
110
+ # Sample size optimization command for finding optimal training sample sizes
111
+ sample_size_optimizer_parser = subparsers.add_parser(
112
+ "optimize-sample-size", help="Find optimal training sample size for classifiers"
113
+ )
114
+ setup_sample_size_optimizer_parser(sample_size_optimizer_parser)
115
+
116
+ # Full optimization command that runs both classification and sample size optimization
117
+ full_optimizer_parser = subparsers.add_parser(
118
+ "full-optimize", help="Run full optimization: classification parameters then sample size"
119
+ )
120
+ setup_full_optimizer_parser(full_optimizer_parser)
121
+
122
+ # Generate vector command for creating steering vectors without tasks
123
+ generate_vector_parser = subparsers.add_parser(
124
+ "generate-vector", help="Generate steering vectors from contrastive pairs (file or description)"
125
+ )
126
+ setup_generate_vector_parser(generate_vector_parser)
127
+
128
+ # Multi-vector steering command for combining multiple vectors at inference time
129
+ multi_steer_parser = subparsers.add_parser(
130
+ "multi-steer", help="Combine multiple steering vectors dynamically at inference time"
131
+ )
132
+ setup_multi_steer_parser(multi_steer_parser)
133
+
134
+ # Single-prompt evaluation command for real-time steering assessment
135
+ evaluate_parser = subparsers.add_parser(
136
+ "evaluate", help="Evaluate single prompt with steering vector and return quality scores"
137
+ )
138
+ setup_evaluate_parser(evaluate_parser)
139
+
140
+ # Generate responses command for generating model responses to task questions
141
+ generate_responses_parser = subparsers.add_parser(
142
+ "generate-responses", help="Generate model responses to questions from a task"
143
+ )
144
+ setup_generate_responses_parser(generate_responses_parser)
145
+
146
+ # Evaluate responses command for evaluating generated responses
147
+ evaluate_responses_parser = subparsers.add_parser(
148
+ "evaluate-responses", help="Evaluate generated responses using embedded evaluator"
149
+ )
150
+ setup_evaluate_responses_parser(evaluate_responses_parser)
151
+
152
+ return parser
@@ -0,0 +1,59 @@
1
+ """Parser setup for the 'model-config' command."""
2
+
3
+
4
+ def setup_model_config_parser(parser):
5
+ """Set up the model-config subcommand parser."""
6
+ # Create subparsers for different model config actions
7
+ config_subparsers = parser.add_subparsers(dest="config_action", help="Model configuration actions")
8
+
9
+ # Save configuration subcommand
10
+ save_parser = config_subparsers.add_parser("save", help="Save optimal parameters for a model")
11
+ save_parser.add_argument("model", type=str, help="Model name or path")
12
+ save_parser.add_argument("--classification-layer", type=int, required=True, help="Optimal layer for classification")
13
+ save_parser.add_argument(
14
+ "--steering-layer", type=int, default=None, help="Optimal layer for steering (defaults to classification layer)"
15
+ )
16
+ save_parser.add_argument(
17
+ "--token-aggregation",
18
+ type=str,
19
+ default="average",
20
+ choices=["average", "final", "first", "max", "min"],
21
+ help="Token aggregation method",
22
+ )
23
+ save_parser.add_argument("--detection-threshold", type=float, default=0.6, help="Detection threshold")
24
+ save_parser.add_argument(
25
+ "--optimization-method", type=str, default="manual", help="How these parameters were determined"
26
+ )
27
+ save_parser.add_argument("--metrics", type=str, default=None, help="JSON string with optimization metrics")
28
+
29
+ # List configurations subcommand
30
+ list_parser = config_subparsers.add_parser("list", help="List all saved model configurations")
31
+ list_parser.add_argument("--detailed", action="store_true", help="Show detailed configuration information")
32
+
33
+ # Show configuration subcommand
34
+ show_parser = config_subparsers.add_parser("show", help="Show configuration for a specific model")
35
+ show_parser.add_argument("model", type=str, help="Model name or path")
36
+ show_parser.add_argument("--task", type=str, default=None, help="Show task-specific overrides if available")
37
+
38
+ # Remove configuration subcommand
39
+ remove_parser = config_subparsers.add_parser("remove", help="Remove configuration for a model")
40
+ remove_parser.add_argument("model", type=str, help="Model name or path")
41
+ remove_parser.add_argument("--confirm", action="store_true", help="Confirm removal without prompting")
42
+
43
+ # Test configuration subcommand
44
+ test_parser = config_subparsers.add_parser("test", help="Test if saved configuration works")
45
+ test_parser.add_argument("model", type=str, help="Model name or path")
46
+ test_parser.add_argument(
47
+ "--task", type=str, default="truthfulqa_mc1", help="Task to test with (default: truthfulqa_mc1)"
48
+ )
49
+ test_parser.add_argument("--limit", type=int, default=5, help="Number of samples to test with (default: 5)")
50
+ test_parser.add_argument("--device", type=str, default=None, help="Device to run on")
51
+
52
+ # Common arguments for all subcommands
53
+ parser.add_argument(
54
+ "--config-dir",
55
+ type=str,
56
+ default=None,
57
+ help="Custom directory for configuration files (default: ~/.wisent-guard/model_configs/)",
58
+ )
59
+ parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
@@ -0,0 +1,17 @@
1
+ """Parser setup for the 'monitor' command."""
2
+
3
+
4
+ def setup_monitor_parser(parser):
5
+ """Set up the monitor subcommand parser."""
6
+ parser.add_argument("--memory-info", action="store_true", help="Show current memory usage information")
7
+ parser.add_argument("--system-info", action="store_true", help="Show system information and capabilities")
8
+ parser.add_argument("--benchmark", action="store_true", help="Run performance benchmarks")
9
+ parser.add_argument("--test-gpu", action="store_true", help="Test GPU availability and memory")
10
+ parser.add_argument("--continuous", action="store_true", help="Continuous monitoring mode (Ctrl+C to stop)")
11
+ parser.add_argument("--interval", type=float, default=1.0, help="Monitoring interval in seconds (default: 1.0)")
12
+ parser.add_argument("--export-csv", type=str, default=None, help="Export monitoring data to CSV file")
13
+ parser.add_argument(
14
+ "--duration", type=int, default=60, help="Duration for continuous monitoring in seconds (default: 60)"
15
+ )
16
+ parser.add_argument("--track-gpu", action="store_true", help="Include GPU monitoring (requires CUDA)")
17
+ parser.add_argument("--detailed", action="store_true", help="Show detailed monitoring information")
@@ -0,0 +1,47 @@
1
+ """Parser setup for the 'multi-steer' command."""
2
+
3
+
4
+ def setup_multi_steer_parser(parser):
5
+ """Set up the multi-steer subcommand parser for dynamic vector combination."""
6
+ # Vector inputs - can specify multiple vector-weight pairs
7
+ parser.add_argument(
8
+ "--vector",
9
+ type=str,
10
+ action="append",
11
+ required=True,
12
+ metavar="PATH:WEIGHT",
13
+ help="Path to steering vector and its weight (format: path/to/vector.pt:0.5). Can be specified multiple times.",
14
+ )
15
+
16
+ # Model configuration
17
+ parser.add_argument("--model", type=str, required=True, help="Model name or path")
18
+ parser.add_argument("--layer", type=int, required=True, help="Layer index to apply combined steering")
19
+ parser.add_argument("--device", type=str, default=None, help="Device to run on (default: auto-detect)")
20
+
21
+ # Steering method configuration
22
+ parser.add_argument(
23
+ "--method",
24
+ type=str,
25
+ default="CAA",
26
+ choices=["CAA", "DAC"],
27
+ help="Steering method to use for combination (default: CAA)",
28
+ )
29
+
30
+ # Generation configuration
31
+ parser.add_argument("--prompt", type=str, required=True, help="Prompt to generate with combined steering")
32
+ parser.add_argument("--max-new-tokens", type=int, default=100, help="Maximum new tokens to generate (default: 100)")
33
+
34
+ # Weight normalization
35
+ parser.add_argument("--normalize-weights", action="store_true", help="Normalize weights to sum to 1.0")
36
+ parser.add_argument(
37
+ "--allow-unnormalized", action="store_true", help="Allow weights that don't sum to 1.0 (for stronger effects)"
38
+ )
39
+ parser.add_argument(
40
+ "--target-norm", type=float, default=None, help="Scale the combined vector to have this norm (e.g., 10.0)"
41
+ )
42
+
43
+ # Output options
44
+ parser.add_argument(
45
+ "--save-combined", type=str, default=None, help="Save the combined steering vector to this path"
46
+ )
47
+ parser.add_argument("--verbose", action="store_true", help="Enable verbose output showing weight calculations")
@@ -0,0 +1,67 @@
1
+ """Parser setup for the 'optimize-classification' command."""
2
+
3
+
4
+ def setup_classification_optimizer_parser(parser):
5
+ """Set up the classification-optimizer subcommand parser."""
6
+ parser.add_argument("model", type=str, help="Model name or path to optimize")
7
+ parser.add_argument("--limit", type=int, default=1000, help="Maximum samples per task (default: 1000)")
8
+ parser.add_argument(
9
+ "--optimization-metric",
10
+ type=str,
11
+ default="f1",
12
+ choices=["f1", "accuracy", "precision", "recall"],
13
+ help="Metric to optimize (default: f1)",
14
+ )
15
+ parser.add_argument(
16
+ "--max-time-per-task", type=float, default=15.0, help="Maximum time per task in minutes (default: 15.0)"
17
+ )
18
+ parser.add_argument(
19
+ "--layer-range", type=str, default=None, help="Layer range to test (e.g., '10-20', if None uses all layers)"
20
+ )
21
+ parser.add_argument(
22
+ "--aggregation-methods",
23
+ type=str,
24
+ nargs="+",
25
+ default=["average", "final", "first", "max", "min"],
26
+ help="Token aggregation methods to test",
27
+ )
28
+ parser.add_argument(
29
+ "--threshold-range",
30
+ type=float,
31
+ nargs="+",
32
+ default=[0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
33
+ help="Detection thresholds to test",
34
+ )
35
+ parser.add_argument("--device", type=str, default=None, help="Device to run on")
36
+ parser.add_argument("--results-file", type=str, default=None, help="Custom file path for saving results")
37
+ parser.add_argument("--no-save", action="store_true", help="Don't save results to model config")
38
+ parser.add_argument("--save-logs-json", type=str, default=None, help="Save detailed optimization logs to JSON file")
39
+ parser.add_argument(
40
+ "--save-classifiers",
41
+ action="store_true",
42
+ default=True,
43
+ help="Save best classifiers for each task (default: True)",
44
+ )
45
+ parser.add_argument(
46
+ "--no-save-classifiers",
47
+ dest="save_classifiers",
48
+ action="store_false",
49
+ help="Don't save classifiers (overrides --save-classifiers)",
50
+ )
51
+ parser.add_argument(
52
+ "--classifiers-dir",
53
+ type=str,
54
+ default=None,
55
+ help="Directory to save classifiers (default: ./optimized_classifiers/model_name/)",
56
+ )
57
+
58
+ # Timing calibration options
59
+ parser.add_argument(
60
+ "--skip-timing-estimation", action="store_true", help="Skip timing estimation and proceed without time warnings"
61
+ )
62
+ parser.add_argument("--calibration-file", type=str, default=None, help="File to save/load calibration data")
63
+ parser.add_argument(
64
+ "--calibrate-only",
65
+ action="store_true",
66
+ help="Only run calibration and exit (saves to --calibration-file if provided)",
67
+ )
@@ -0,0 +1,58 @@
1
+ """Parser setup for the 'optimize-sample-size' command."""
2
+
3
+
4
+ def setup_sample_size_optimizer_parser(parser):
5
+ """Set up the sample-size-optimizer subcommand parser."""
6
+ parser.add_argument("model", type=str, help="Model name or path to optimize")
7
+ parser.add_argument("--task", type=str, required=True, help="Task to optimize for (REQUIRED)")
8
+ parser.add_argument("--layer", type=int, required=True, help="Layer index to use (REQUIRED)")
9
+ parser.add_argument(
10
+ "--token-aggregation",
11
+ type=str,
12
+ required=True,
13
+ choices=["average", "final", "first", "max", "min"],
14
+ help="Token aggregation method (REQUIRED)",
15
+ )
16
+
17
+ # Classification-specific arguments
18
+ parser.add_argument(
19
+ "--threshold", type=float, default=0.5, help="Detection threshold for classification (default: 0.5)"
20
+ )
21
+
22
+ # Steering mode
23
+ parser.add_argument("--steering-mode", action="store_true", help="Optimize for steering instead of classification")
24
+ parser.add_argument(
25
+ "--steering-method",
26
+ type=str,
27
+ default="CAA",
28
+ choices=["CAA", "CAA_L2", "HPR", "DAC", "BiPO", "KSteering"],
29
+ help="Steering method to use (default: CAA)",
30
+ )
31
+ parser.add_argument("--steering-strength", type=float, default=1.0, help="Steering strength to use (default: 1.0)")
32
+ parser.add_argument(
33
+ "--token-targeting-strategy",
34
+ type=str,
35
+ default="LAST_TOKEN",
36
+ choices=["CHOICE_TOKEN", "LAST_TOKEN", "FIRST_TOKEN", "ALL_TOKENS"],
37
+ help="Token targeting strategy for steering (default: LAST_TOKEN)",
38
+ )
39
+
40
+ # Common optimization parameters
41
+ parser.add_argument(
42
+ "--sample-sizes",
43
+ type=int,
44
+ nargs="+",
45
+ default=[5, 10, 20, 50, 100, 200, 500],
46
+ help="Sample sizes to test (default: 5 10 20 50 100 200 500)",
47
+ )
48
+ parser.add_argument("--test-size", type=int, default=200, help="Fixed test set size (default: 200)")
49
+ parser.add_argument("--test-split", type=float, default=0.2, help="DEPRECATED: Use --test-size instead")
50
+ parser.add_argument("--seed", type=int, default=42, help="Random seed for reproducibility (default: 42)")
51
+ parser.add_argument("--limit", type=int, default=None, help="Maximum number of samples to load from dataset")
52
+ parser.add_argument("--save-plot", action="store_true", help="Save performance plot")
53
+ parser.add_argument("--no-save-config", action="store_true", help="Don't save optimal sample size to model config")
54
+ parser.add_argument("--device", type=str, default=None, help="Device to run on")
55
+ parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
56
+ parser.add_argument(
57
+ "--force", action="store_true", help="Force optimization even without matching classifier parameters"
58
+ )