wisent 0.5.13__py3-none-any.whl → 0.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wisent might be problematic. Click here for more details.

Files changed (62) hide show
  1. wisent/__init__.py +1 -1
  2. wisent/cli.py +114 -0
  3. wisent/core/activations/activations_collector.py +19 -11
  4. wisent/core/agent/__init__.py +1 -18
  5. wisent/core/agent/diagnose/__init__.py +1 -55
  6. wisent/core/cli/__init__.py +3 -1
  7. wisent/core/cli/create_steering_vector.py +60 -18
  8. wisent/core/cli/evaluate_responses.py +14 -8
  9. wisent/core/cli/generate_pairs_from_task.py +18 -5
  10. wisent/core/cli/get_activations.py +1 -1
  11. wisent/core/cli/multi_steer.py +108 -0
  12. wisent/core/cli/optimize_classification.py +187 -285
  13. wisent/core/cli/optimize_sample_size.py +78 -0
  14. wisent/core/cli/optimize_steering.py +354 -53
  15. wisent/core/cli/tasks.py +274 -9
  16. wisent/core/errors/__init__.py +0 -0
  17. wisent/core/errors/error_handler.py +134 -0
  18. wisent/core/evaluators/benchmark_specific/log_likelihoods_evaluator.py +152 -295
  19. wisent/core/evaluators/rotator.py +22 -8
  20. wisent/core/main.py +5 -1
  21. wisent/core/model_persistence.py +4 -19
  22. wisent/core/models/wisent_model.py +11 -3
  23. wisent/core/parser.py +4 -3
  24. wisent/core/parser_arguments/main_parser.py +1 -1
  25. wisent/core/parser_arguments/multi_steer_parser.py +4 -3
  26. wisent/core/parser_arguments/optimize_steering_parser.py +4 -0
  27. wisent/core/sample_size_optimizer_v2.py +1 -1
  28. wisent/core/steering_optimizer.py +2 -2
  29. wisent/tests/__init__.py +0 -0
  30. wisent/tests/examples/__init__.py +0 -0
  31. wisent/tests/examples/cli/__init__.py +0 -0
  32. wisent/tests/examples/cli/activations/__init__.py +0 -0
  33. wisent/tests/examples/cli/activations/test_get_activations.py +127 -0
  34. wisent/tests/examples/cli/classifier/__init__.py +0 -0
  35. wisent/tests/examples/cli/classifier/test_classifier_examples.py +141 -0
  36. wisent/tests/examples/cli/contrastive_pairs/__init__.py +0 -0
  37. wisent/tests/examples/cli/contrastive_pairs/test_generate_pairs.py +89 -0
  38. wisent/tests/examples/cli/evaluation/__init__.py +0 -0
  39. wisent/tests/examples/cli/evaluation/test_evaluation_examples.py +117 -0
  40. wisent/tests/examples/cli/generate/__init__.py +0 -0
  41. wisent/tests/examples/cli/generate/test_generate_with_classifier.py +146 -0
  42. wisent/tests/examples/cli/generate/test_generate_with_steering.py +149 -0
  43. wisent/tests/examples/cli/generate/test_only_generate.py +110 -0
  44. wisent/tests/examples/cli/multi_steering/__init__.py +0 -0
  45. wisent/tests/examples/cli/multi_steering/test_multi_steer_from_trained_vectors.py +210 -0
  46. wisent/tests/examples/cli/multi_steering/test_multi_steer_with_different_parameters.py +205 -0
  47. wisent/tests/examples/cli/multi_steering/test_train_and_multi_steer.py +174 -0
  48. wisent/tests/examples/cli/optimizer/__init__.py +0 -0
  49. wisent/tests/examples/cli/optimizer/test_optimize_sample_size.py +102 -0
  50. wisent/tests/examples/cli/optimizer/test_optimizer_examples.py +59 -0
  51. wisent/tests/examples/cli/steering/__init__.py +0 -0
  52. wisent/tests/examples/cli/steering/test_create_steering_vectors.py +135 -0
  53. wisent/tests/examples/cli/synthetic/__init__.py +0 -0
  54. wisent/tests/examples/cli/synthetic/test_synthetic_pairs.py +45 -0
  55. {wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/METADATA +3 -1
  56. {wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/RECORD +61 -31
  57. wisent/core/agent/diagnose/test_synthetic_classifier.py +0 -71
  58. /wisent/core/parser_arguments/{test_nonsense_parser.py → nonsense_parser.py} +0 -0
  59. {wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/WHEEL +0 -0
  60. {wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/entry_points.txt +0 -0
  61. {wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/licenses/LICENSE +0 -0
  62. {wisent-0.5.13.dist-info → wisent-0.5.15.dist-info}/top_level.txt +0 -0
wisent/core/cli/tasks.py CHANGED
@@ -17,6 +17,37 @@ def execute_tasks(args):
17
17
  from wisent.core.classifiers.classifiers.core.atoms import ClassifierTrainConfig
18
18
  from wisent.core.model_persistence import ModelPersistence, create_classifier_metadata
19
19
 
20
+ # Check if this is inference-only mode with steering vector
21
+ if args.inference_only and args.load_steering_vector:
22
+ import torch
23
+ print(f"\n🎯 Starting inference with steering vector")
24
+ print(f" Loading vector from: {args.load_steering_vector}")
25
+
26
+ # Load steering vector
27
+ vector_data = torch.load(args.load_steering_vector)
28
+ steering_vector = vector_data['vector']
29
+ layer = vector_data['layer']
30
+
31
+ print(f" ✓ Loaded steering vector for layer {layer}")
32
+ print(f" Model: {vector_data.get('model', 'unknown')}")
33
+ print(f" Method: {vector_data.get('method', 'unknown')}")
34
+
35
+ # For now, just load and validate - actual inference would require more implementation
36
+ print(f"\n✅ Steering vector loaded successfully!\n")
37
+ print(f"Note: Inference with steering vector requires additional implementation")
38
+
39
+ # Return results for programmatic access
40
+ return {
41
+ "steering_vector_loaded": True,
42
+ "vector_path": args.load_steering_vector,
43
+ "layer": layer,
44
+ "method": vector_data.get('method', 'unknown'),
45
+ "test_accuracy": None,
46
+ "test_f1_score": None,
47
+ "training_time": 0.0,
48
+ "evaluation_results": {}
49
+ }
50
+
20
51
  print(f"\n🎯 Starting classifier training on task: {args.task_names}")
21
52
  print(f" Model: {args.model}")
22
53
  print(f" Layer: {args.layer}")
@@ -24,10 +55,11 @@ def execute_tasks(args):
24
55
 
25
56
  try:
26
57
  # 1. Load task data using LMEvalDataLoader
27
- print(f"\n📊 Loading task '{args.task_names}'...")
58
+ task_name = args.task_names[0] if isinstance(args.task_names, list) else args.task_names
59
+ print(f"\n📊 Loading task '{task_name}'...")
28
60
  loader = LMEvalDataLoader()
29
61
  result = loader._load_one_task(
30
- task_name=args.task_names,
62
+ task_name=task_name,
31
63
  split_ratio=args.split_ratio,
32
64
  seed=args.seed,
33
65
  limit=args.limit,
@@ -94,6 +126,70 @@ def execute_tasks(args):
94
126
 
95
127
  print(f"\n ✓ Collected {len(positive_activations)} positive and {len(negative_activations)} negative activations")
96
128
 
129
+ # Check if steering vector mode is requested
130
+ if args.save_steering_vector and args.train_only:
131
+ import torch
132
+ from wisent.core.steering_methods.methods.caa import CAAMethod
133
+
134
+ print(f"\n🎯 Training steering vector using {args.steering_method} method...")
135
+
136
+ # Convert activations to tensors
137
+ pos_tensors = [torch.from_numpy(act).float() for act in positive_activations]
138
+ neg_tensors = [torch.from_numpy(act).float() for act in negative_activations]
139
+
140
+ # Create steering method
141
+ steering_method = CAAMethod(normalize=True)
142
+
143
+ # Train steering vector
144
+ steering_vector = steering_method.train_for_layer(pos_tensors, neg_tensors)
145
+
146
+ # Save steering vector
147
+ print(f"\n💾 Saving steering vector to '{args.save_steering_vector}'...")
148
+ os.makedirs(os.path.dirname(args.save_steering_vector) or '.', exist_ok=True)
149
+ torch.save({
150
+ 'steering_vector': steering_vector,
151
+ 'layer_index': layer,
152
+ 'method': args.steering_method,
153
+ 'model': args.model,
154
+ 'task': args.task_names,
155
+ # Legacy keys for backward compatibility
156
+ 'vector': steering_vector,
157
+ 'layer': layer,
158
+ }, args.save_steering_vector)
159
+ print(f" ✓ Steering vector saved to: {args.save_steering_vector}")
160
+
161
+ # Save output artifacts if requested
162
+ if args.output:
163
+ print(f"\n📁 Saving artifacts to '{args.output}'...")
164
+ os.makedirs(args.output, exist_ok=True)
165
+ report_path = os.path.join(args.output, 'training_report.json')
166
+ with open(report_path, 'w') as f:
167
+ json.dump({
168
+ 'method': args.steering_method,
169
+ 'layer': layer,
170
+ 'num_positive': len(positive_activations),
171
+ 'num_negative': len(negative_activations),
172
+ 'vector_shape': list(steering_vector.shape)
173
+ }, f, indent=2)
174
+ print(f" ✓ Training report saved to: {report_path}")
175
+
176
+ print(f"\n✅ Steering vector training completed successfully!\n")
177
+
178
+ # Return results for programmatic access
179
+ return {
180
+ "steering_vector_saved": True,
181
+ "vector_path": args.save_steering_vector,
182
+ "layer": layer,
183
+ "method": args.steering_method,
184
+ "num_positive": len(positive_activations),
185
+ "num_negative": len(negative_activations),
186
+ "vector_shape": list(steering_vector.shape),
187
+ "test_accuracy": None,
188
+ "test_f1_score": None,
189
+ "training_time": 0.0,
190
+ "evaluation_results": {}
191
+ }
192
+
97
193
  # 6. Prepare training data
98
194
  print(f"\n🎯 Preparing training data...")
99
195
  X_positive = np.array(positive_activations)
@@ -126,15 +222,155 @@ def execute_tasks(args):
126
222
  # Train the classifier
127
223
  report = classifier.fit(X, y, config=train_config)
128
224
 
129
- # 8. Print results
225
+ # 8. Print training completion
130
226
  print(f"\n📈 Training completed!")
131
227
  print(f" Best epoch: {report.best_epoch}/{report.epochs_ran}")
132
- print(f" Final metrics:")
133
- print(f" • Accuracy: {report.final.accuracy:.4f}")
134
- print(f" Precision: {report.final.precision:.4f}")
135
- print(f" • Recall: {report.final.recall:.4f}")
136
- print(f" • F1 Score: {report.final.f1:.4f}")
137
- print(f" • AUC: {report.final.auc:.4f}")
228
+
229
+ # 8.5. PROPER EVALUATION: Test classifier on real model generations
230
+ print(f"\n🎯 Evaluating classifier on real model generations...")
231
+
232
+ # Get test pairs
233
+ test_pairs = result['test_qa_pairs']
234
+ print(f" Generating responses for {len(test_pairs.pairs)} test questions...")
235
+
236
+ # Initialize evaluator for this task
237
+ from wisent.core.evaluators.rotator import EvaluatorRotator
238
+ # Discover both oracles and benchmark_specific evaluators
239
+ EvaluatorRotator.discover_evaluators("wisent.core.evaluators.oracles")
240
+ EvaluatorRotator.discover_evaluators("wisent.core.evaluators.benchmark_specific")
241
+ evaluator = EvaluatorRotator(evaluator=None, task_name=task_name, autoload=False)
242
+ print(f" Using evaluator: {evaluator._evaluator.name}")
243
+
244
+ # Generate responses and collect activations
245
+ generation_results = []
246
+ for i, pair in enumerate(test_pairs.pairs):
247
+ if i % 10 == 0:
248
+ print(f" Processing {i+1}/{len(test_pairs.pairs)}...", end='\r')
249
+
250
+ question = pair.prompt
251
+ expected = pair.positive_response.model_response
252
+ choices = [pair.negative_response.model_response, pair.positive_response.model_response]
253
+
254
+ # Generate response from unsteered model
255
+ response = model.generate(
256
+ [[{"role": "user", "content": question}]],
257
+ max_new_tokens=100,
258
+ do_sample=False # Deterministic (greedy decoding) for evaluation
259
+ )[0]
260
+
261
+ # Evaluate the response using Wisent evaluator
262
+ eval_result = evaluator.evaluate(
263
+ response=response,
264
+ expected=expected,
265
+ model=model,
266
+ question=question,
267
+ choices=choices,
268
+ task_name=task_name
269
+ )
270
+
271
+ # Get activation for this generation
272
+ # Use ActivationCollector to collect activations from the generated text
273
+ gen_collector = ActivationCollector(model=model, store_device="cpu")
274
+ # Create a pair with the generated response
275
+ from wisent.core.contrastive_pairs.core.response import PositiveResponse, NegativeResponse
276
+ from wisent.core.contrastive_pairs.core.pair import ContrastivePair
277
+ temp_pos_response = PositiveResponse(model_response=response, layers_activations={})
278
+ temp_neg_response = NegativeResponse(model_response="placeholder", layers_activations={}) # Not used
279
+ temp_pair = ContrastivePair(
280
+ prompt=question,
281
+ positive_response=temp_pos_response,
282
+ negative_response=temp_neg_response,
283
+ label=None,
284
+ trait_description=None
285
+ )
286
+
287
+ # Collect activation - ActivationCollector will re-run the model with prompt+response
288
+ # First, collect with full sequence to get token-by-token activations
289
+ collected_full = gen_collector.collect_for_pair(
290
+ temp_pair,
291
+ layers=[layer_str],
292
+ aggregation=aggregation_strategy,
293
+ return_full_sequence=True,
294
+ normalize_layers=False
295
+ )
296
+
297
+ # Access the collected activations
298
+ import torch
299
+ if collected_full.positive_response.layers_activations:
300
+ layer_activations_full = collected_full.positive_response.layers_activations
301
+ if layer_str in layer_activations_full:
302
+ activation_full_seq = layer_activations_full[layer_str]
303
+ if activation_full_seq is not None and isinstance(activation_full_seq, torch.Tensor):
304
+ # activation_full_seq shape: (num_tokens, hidden_dim)
305
+
306
+ # Apply aggregation manually to get single vector for classifier
307
+ if aggregation_strategy.name == 'MEAN_POOLING':
308
+ activation_agg = activation_full_seq.mean(dim=0)
309
+ elif aggregation_strategy.name == 'LAST_TOKEN':
310
+ activation_agg = activation_full_seq[-1]
311
+ elif aggregation_strategy.name == 'FIRST_TOKEN':
312
+ activation_agg = activation_full_seq[0]
313
+ elif aggregation_strategy.name == 'MAX_POOLING':
314
+ activation_agg = activation_full_seq.max(dim=0)[0]
315
+ else:
316
+ # Default to mean
317
+ activation_agg = activation_full_seq.mean(dim=0)
318
+
319
+ # Get classifier prediction on aggregated vector
320
+ act_tensor = activation_agg.unsqueeze(0).float()
321
+ pred_proba_result = classifier.predict_proba(act_tensor)
322
+ # Handle both float (single sample) and list return types
323
+ pred_proba = pred_proba_result if isinstance(pred_proba_result, float) else pred_proba_result[0]
324
+ pred_label = int(pred_proba > args.detection_threshold)
325
+
326
+ # Ground truth from evaluator
327
+ ground_truth = 1 if eval_result.ground_truth == "TRUTHFUL" else 0
328
+
329
+ # Compute per-token classifier scores
330
+ # For each token, get classifier probability
331
+ token_scores = []
332
+ for token_idx in range(activation_full_seq.shape[0]):
333
+ token_act = activation_full_seq[token_idx].unsqueeze(0).float()
334
+ token_proba_result = classifier.predict_proba(token_act)
335
+ token_proba = token_proba_result if isinstance(token_proba_result, float) else token_proba_result[0]
336
+ token_scores.append(float(token_proba))
337
+
338
+ generation_results.append({
339
+ 'question': question,
340
+ 'response': response,
341
+ 'expected': expected,
342
+ 'eval_result': eval_result.ground_truth,
343
+ 'classifier_pred': pred_label,
344
+ 'classifier_proba': float(pred_proba),
345
+ 'correct': pred_label == ground_truth,
346
+ 'token_scores': token_scores, # Per-token classifier probabilities
347
+ 'num_tokens': len(token_scores)
348
+ })
349
+
350
+ print(f"\n ✓ Evaluated {len(generation_results)} generations")
351
+
352
+ # Calculate real-world metrics
353
+ if generation_results:
354
+ correct_predictions = sum(1 for r in generation_results if r['correct'])
355
+ real_accuracy = correct_predictions / len(generation_results)
356
+
357
+ # Calculate precision, recall, F1 on real generations
358
+ true_positives = sum(1 for r in generation_results if r['classifier_pred'] == 1 and r['eval_result'] == 'TRUTHFUL')
359
+ false_positives = sum(1 for r in generation_results if r['classifier_pred'] == 1 and r['eval_result'] == 'UNTRUTHFUL')
360
+ false_negatives = sum(1 for r in generation_results if r['classifier_pred'] == 0 and r['eval_result'] == 'TRUTHFUL')
361
+
362
+ real_precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
363
+ real_recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
364
+ real_f1 = 2 * (real_precision * real_recall) / (real_precision + real_recall) if (real_precision + real_recall) > 0 else 0
365
+
366
+ print(f"\n 📊 Real-world performance (on actual generations):")
367
+ print(f" • Accuracy: {real_accuracy:.4f}")
368
+ print(f" • Precision: {real_precision:.4f}")
369
+ print(f" • Recall: {real_recall:.4f}")
370
+ print(f" • F1 Score: {real_f1:.4f}")
371
+ else:
372
+ real_accuracy = real_f1 = real_precision = real_recall = 0.0
373
+ generation_results = []
138
374
 
139
375
  # 9. Save classifier if requested
140
376
  if args.save_classifier:
@@ -172,8 +408,37 @@ def execute_tasks(args):
172
408
  json.dump(report.asdict(), f, indent=2)
173
409
  print(f" ✓ Training report saved to: {report_path}")
174
410
 
411
+ # Save generation details with token scores
412
+ if generation_results:
413
+ generation_path = os.path.join(args.output, 'generation_details.json')
414
+ with open(generation_path, 'w') as f:
415
+ json.dump({
416
+ 'task': args.task_names,
417
+ 'model': args.model,
418
+ 'layer': layer,
419
+ 'aggregation': args.token_aggregation,
420
+ 'threshold': args.detection_threshold,
421
+ 'num_generations': len(generation_results),
422
+ 'generations': generation_results
423
+ }, f, indent=2)
424
+ print(f" ✓ Generation details (with token scores) saved to: {generation_path}")
425
+
175
426
  print(f"\n✅ Task completed successfully!\n")
176
427
 
428
+ # Return results for programmatic access
429
+ return {
430
+ # Real-world metrics (on actual generations) - THE ONLY METRICS THAT MATTER
431
+ "accuracy": float(real_accuracy),
432
+ "f1_score": float(real_f1),
433
+ "precision": float(real_precision),
434
+ "recall": float(real_recall),
435
+ "generation_count": len(generation_results),
436
+ # Metadata
437
+ "best_epoch": report.best_epoch,
438
+ "epochs_ran": report.epochs_ran,
439
+ "generation_details": generation_results
440
+ }
441
+
177
442
  except Exception as e:
178
443
  print(f"\n❌ Error: {str(e)}", file=sys.stderr)
179
444
  if args.verbose:
File without changes
@@ -0,0 +1,134 @@
1
+ """Comprehensive error handling for Wisent.
2
+
3
+ This module provides informative error classes and utilities for proper error handling
4
+ throughout the codebase. NO FALLBACKS - errors should be raised immediately with
5
+ detailed information about what went wrong and how to fix it.
6
+ """
7
+
8
+ import logging
9
+ from typing import Optional, Any, Dict
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class WisentError(Exception):
15
+ """Base exception for all Wisent errors."""
16
+
17
+ def __init__(self, message: str, details: Optional[Dict[str, Any]] = None):
18
+ self.message = message
19
+ self.details = details or {}
20
+ super().__init__(self.message)
21
+
22
+ def __str__(self):
23
+ if self.details:
24
+ details_str = "\n".join(f" - {k}: {v}" for k, v in self.details.items())
25
+ return f"{self.message}\nDetails:\n{details_str}"
26
+ return self.message
27
+
28
+
29
+ class EvaluationError(WisentError):
30
+ """Raised when evaluation fails."""
31
+ pass
32
+
33
+
34
+ class MissingParameterError(EvaluationError):
35
+ """Raised when required parameters are missing for evaluation."""
36
+
37
+ def __init__(self, missing_params: list, evaluator_name: str, task_name: Optional[str] = None):
38
+ message = f"Evaluator '{evaluator_name}' requires missing parameters: {', '.join(missing_params)}"
39
+ details = {
40
+ "evaluator": evaluator_name,
41
+ "missing_parameters": missing_params,
42
+ "task": task_name or "unknown"
43
+ }
44
+ super().__init__(message, details)
45
+
46
+
47
+ class InvalidChoicesError(EvaluationError):
48
+ """Raised when choices are invalid or missing for multiple choice evaluation."""
49
+
50
+ def __init__(self, reason: str, task_name: str, choices: Optional[list] = None):
51
+ message = f"Invalid choices for task '{task_name}': {reason}"
52
+ details = {
53
+ "task": task_name,
54
+ "reason": reason,
55
+ "choices_provided": choices
56
+ }
57
+ super().__init__(message, details)
58
+
59
+
60
+ class ModelNotProvidedError(EvaluationError):
61
+ """Raised when model is required but not provided."""
62
+
63
+ def __init__(self, evaluator_name: str, task_name: str):
64
+ message = (
65
+ f"Evaluator '{evaluator_name}' requires a model for log likelihood computation, "
66
+ f"but none was provided for task '{task_name}'. "
67
+ f"Pass model=<WisentModel> in kwargs to evaluate()."
68
+ )
69
+ details = {
70
+ "evaluator": evaluator_name,
71
+ "task": task_name,
72
+ "solution": "Pass model parameter in kwargs"
73
+ }
74
+ super().__init__(message, details)
75
+
76
+
77
+ def require_all_parameters(params: Dict[str, Any], context: str, task_name: Optional[str] = None):
78
+ """Raise error if any required parameters are None or missing.
79
+
80
+ Args:
81
+ params: Dict of parameter_name -> value
82
+ context: Context where parameters are required
83
+ task_name: Optional task name for better error messages
84
+
85
+ Raises:
86
+ MissingParameterError: If any parameters are None
87
+ """
88
+ missing = [name for name, value in params.items() if value is None]
89
+ if missing:
90
+ raise MissingParameterError(
91
+ missing_params=missing,
92
+ evaluator_name=context,
93
+ task_name=task_name
94
+ )
95
+
96
+
97
+ def validate_choices(choices: Optional[list], task_name: str, min_choices: int = 2):
98
+ """Validate that choices are provided and valid.
99
+
100
+ Args:
101
+ choices: List of answer choices
102
+ task_name: Name of the task
103
+ min_choices: Minimum number of choices required
104
+
105
+ Raises:
106
+ InvalidChoicesError: If choices are invalid
107
+ """
108
+ if choices is None:
109
+ raise InvalidChoicesError(
110
+ reason="No choices provided",
111
+ task_name=task_name,
112
+ choices=None
113
+ )
114
+
115
+ if not isinstance(choices, list):
116
+ raise InvalidChoicesError(
117
+ reason=f"Choices must be a list, got {type(choices).__name__}",
118
+ task_name=task_name,
119
+ choices=choices
120
+ )
121
+
122
+ if len(choices) < min_choices:
123
+ raise InvalidChoicesError(
124
+ reason=f"Need at least {min_choices} choices, got {len(choices)}",
125
+ task_name=task_name,
126
+ choices=choices
127
+ )
128
+
129
+ if any(not isinstance(c, str) or not c.strip() for c in choices):
130
+ raise InvalidChoicesError(
131
+ reason="All choices must be non-empty strings",
132
+ task_name=task_name,
133
+ choices=choices
134
+ )