wisent 0.1.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wisent might be problematic. Click here for more details.

Files changed (237) hide show
  1. wisent/__init__.py +1 -8
  2. wisent/benchmarks/__init__.py +0 -0
  3. wisent/benchmarks/coding/__init__.py +0 -0
  4. wisent/benchmarks/coding/metrics/__init__.py +0 -0
  5. wisent/benchmarks/coding/metrics/core/__init__.py +0 -0
  6. wisent/benchmarks/coding/metrics/core/atoms.py +36 -0
  7. wisent/benchmarks/coding/metrics/evaluator.py +275 -0
  8. wisent/benchmarks/coding/metrics/passk.py +66 -0
  9. wisent/benchmarks/coding/output_sanitizer/__init__.py +0 -0
  10. wisent/benchmarks/coding/output_sanitizer/core/__init__.py +0 -0
  11. wisent/benchmarks/coding/output_sanitizer/core/atoms.py +27 -0
  12. wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +62 -0
  13. wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +78 -0
  14. wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +94 -0
  15. wisent/benchmarks/coding/output_sanitizer/utils.py +107 -0
  16. wisent/benchmarks/coding/providers/__init__.py +18 -0
  17. wisent/benchmarks/coding/providers/core/__init__.py +0 -0
  18. wisent/benchmarks/coding/providers/core/atoms.py +31 -0
  19. wisent/benchmarks/coding/providers/livecodebench/__init__.py +0 -0
  20. wisent/benchmarks/coding/providers/livecodebench/provider.py +53 -0
  21. wisent/benchmarks/coding/safe_docker/__init__.py +0 -0
  22. wisent/benchmarks/coding/safe_docker/core/__init__.py +0 -0
  23. wisent/benchmarks/coding/safe_docker/core/atoms.py +105 -0
  24. wisent/benchmarks/coding/safe_docker/core/runtime.py +118 -0
  25. wisent/benchmarks/coding/safe_docker/entrypoint.py +123 -0
  26. wisent/benchmarks/coding/safe_docker/recipes.py +60 -0
  27. wisent/classifiers/__init__.py +0 -0
  28. wisent/classifiers/core/__init__.py +0 -0
  29. wisent/classifiers/core/atoms.py +747 -0
  30. wisent/classifiers/models/__init__.py +0 -0
  31. wisent/classifiers/models/logistic.py +29 -0
  32. wisent/classifiers/models/mlp.py +47 -0
  33. wisent/cli/__init__.py +0 -0
  34. wisent/cli/classifiers/__init__.py +0 -0
  35. wisent/cli/classifiers/classifier_rotator.py +137 -0
  36. wisent/cli/cli_logger.py +142 -0
  37. wisent/cli/data_loaders/__init__.py +0 -0
  38. wisent/cli/data_loaders/data_loader_rotator.py +96 -0
  39. wisent/cli/evaluators/__init__.py +0 -0
  40. wisent/cli/evaluators/evaluator_rotator.py +148 -0
  41. wisent/cli/steering_methods/__init__.py +0 -0
  42. wisent/cli/steering_methods/steering_rotator.py +110 -0
  43. wisent/cli/wisent_cli/__init__.py +0 -0
  44. wisent/cli/wisent_cli/commands/__init__.py +0 -0
  45. wisent/cli/wisent_cli/commands/help_cmd.py +52 -0
  46. wisent/cli/wisent_cli/commands/listing.py +154 -0
  47. wisent/cli/wisent_cli/commands/train_cmd.py +322 -0
  48. wisent/cli/wisent_cli/main.py +93 -0
  49. wisent/cli/wisent_cli/shell.py +80 -0
  50. wisent/cli/wisent_cli/ui.py +69 -0
  51. wisent/cli/wisent_cli/util/__init__.py +0 -0
  52. wisent/cli/wisent_cli/util/aggregations.py +43 -0
  53. wisent/cli/wisent_cli/util/parsing.py +126 -0
  54. wisent/cli/wisent_cli/version.py +4 -0
  55. wisent/core/__init__.py +27 -0
  56. wisent/core/activations/__init__.py +0 -0
  57. wisent/core/activations/activations_collector.py +338 -0
  58. wisent/core/activations/core/__init__.py +0 -0
  59. wisent/core/activations/core/atoms.py +216 -0
  60. wisent/core/agent/__init__.py +18 -0
  61. wisent/core/agent/budget.py +638 -0
  62. wisent/core/agent/device_benchmarks.py +685 -0
  63. wisent/core/agent/diagnose/__init__.py +55 -0
  64. wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
  65. wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
  66. wisent/core/agent/diagnose/create_classifier.py +1154 -0
  67. wisent/core/agent/diagnose/response_diagnostics.py +268 -0
  68. wisent/core/agent/diagnose/select_classifiers.py +506 -0
  69. wisent/core/agent/diagnose/synthetic_classifier_option.py +754 -0
  70. wisent/core/agent/diagnose/tasks/__init__.py +33 -0
  71. wisent/core/agent/diagnose/tasks/task_manager.py +1456 -0
  72. wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
  73. wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
  74. wisent/core/agent/diagnose/test_synthetic_classifier.py +71 -0
  75. wisent/core/agent/diagnose.py +242 -0
  76. wisent/core/agent/steer.py +212 -0
  77. wisent/core/agent/timeout.py +134 -0
  78. wisent/core/autonomous_agent.py +1234 -0
  79. wisent/core/bigcode_integration.py +583 -0
  80. wisent/core/contrastive_pairs/__init__.py +15 -0
  81. wisent/core/contrastive_pairs/core/__init__.py +0 -0
  82. wisent/core/contrastive_pairs/core/atoms.py +45 -0
  83. wisent/core/contrastive_pairs/core/buliders.py +59 -0
  84. wisent/core/contrastive_pairs/core/pair.py +178 -0
  85. wisent/core/contrastive_pairs/core/response.py +152 -0
  86. wisent/core/contrastive_pairs/core/serialization.py +300 -0
  87. wisent/core/contrastive_pairs/core/set.py +133 -0
  88. wisent/core/contrastive_pairs/diagnostics/__init__.py +45 -0
  89. wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
  90. wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
  91. wisent/core/contrastive_pairs/diagnostics/control_vectors.py +169 -0
  92. wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
  93. wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
  94. wisent/core/contrastive_pairs/diagnostics/duplicates.py +116 -0
  95. wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
  96. wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +238 -0
  97. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -0
  98. wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +132 -0
  99. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +0 -0
  100. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +115 -0
  101. wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +50 -0
  102. wisent/core/data_loaders/__init__.py +0 -0
  103. wisent/core/data_loaders/core/__init__.py +0 -0
  104. wisent/core/data_loaders/core/atoms.py +98 -0
  105. wisent/core/data_loaders/loaders/__init__.py +0 -0
  106. wisent/core/data_loaders/loaders/custom.py +120 -0
  107. wisent/core/data_loaders/loaders/lm_loader.py +218 -0
  108. wisent/core/detection_handling.py +257 -0
  109. wisent/core/download_full_benchmarks.py +1386 -0
  110. wisent/core/evaluators/__init__.py +0 -0
  111. wisent/core/evaluators/oracles/__init__.py +0 -0
  112. wisent/core/evaluators/oracles/interactive.py +73 -0
  113. wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
  114. wisent/core/evaluators/oracles/user_specified.py +67 -0
  115. wisent/core/hyperparameter_optimizer.py +429 -0
  116. wisent/core/lm_eval_harness_ground_truth.py +1396 -0
  117. wisent/core/log_likelihoods_evaluator.py +321 -0
  118. wisent/core/managed_cached_benchmarks.py +595 -0
  119. wisent/core/mixed_benchmark_sampler.py +364 -0
  120. wisent/core/model_config_manager.py +330 -0
  121. wisent/core/model_persistence.py +317 -0
  122. wisent/core/models/__init__.py +0 -0
  123. wisent/core/models/core/__init__.py +0 -0
  124. wisent/core/models/core/atoms.py +460 -0
  125. wisent/core/models/wisent_model.py +727 -0
  126. wisent/core/multi_steering.py +316 -0
  127. wisent/core/optuna/__init__.py +57 -0
  128. wisent/core/optuna/classifier/__init__.py +25 -0
  129. wisent/core/optuna/classifier/activation_generator.py +349 -0
  130. wisent/core/optuna/classifier/classifier_cache.py +509 -0
  131. wisent/core/optuna/classifier/optuna_classifier_optimizer.py +606 -0
  132. wisent/core/optuna/steering/__init__.py +0 -0
  133. wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +188 -0
  134. wisent/core/optuna/steering/data_utils.py +342 -0
  135. wisent/core/optuna/steering/metrics.py +474 -0
  136. wisent/core/optuna/steering/optuna_pipeline.py +1738 -0
  137. wisent/core/optuna/steering/steering_optimization.py +1111 -0
  138. wisent/core/parser.py +1668 -0
  139. wisent/core/prompts/__init__.py +0 -0
  140. wisent/core/prompts/core/__init__.py +0 -0
  141. wisent/core/prompts/core/atom.py +57 -0
  142. wisent/core/prompts/core/prompt_formater.py +157 -0
  143. wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
  144. wisent/core/prompts/prompt_stratiegies/direct_completion.py +24 -0
  145. wisent/core/prompts/prompt_stratiegies/instruction_following.py +24 -0
  146. wisent/core/prompts/prompt_stratiegies/multiple_choice.py +29 -0
  147. wisent/core/prompts/prompt_stratiegies/role_playing.py +31 -0
  148. wisent/core/representation.py +5 -0
  149. wisent/core/sample_size_optimizer.py +648 -0
  150. wisent/core/sample_size_optimizer_v2.py +355 -0
  151. wisent/core/save_results.py +277 -0
  152. wisent/core/steering.py +652 -0
  153. wisent/core/steering_method.py +26 -0
  154. wisent/core/steering_methods/__init__.py +0 -0
  155. wisent/core/steering_methods/core/__init__.py +0 -0
  156. wisent/core/steering_methods/core/atoms.py +153 -0
  157. wisent/core/steering_methods/methods/__init__.py +0 -0
  158. wisent/core/steering_methods/methods/caa.py +44 -0
  159. wisent/core/steering_optimizer.py +1297 -0
  160. wisent/core/task_interface.py +132 -0
  161. wisent/core/task_selector.py +189 -0
  162. wisent/core/tasks/__init__.py +175 -0
  163. wisent/core/tasks/aime_task.py +141 -0
  164. wisent/core/tasks/file_task.py +211 -0
  165. wisent/core/tasks/hle_task.py +180 -0
  166. wisent/core/tasks/hmmt_task.py +119 -0
  167. wisent/core/tasks/livecodebench_task.py +201 -0
  168. wisent/core/tasks/livemathbench_task.py +158 -0
  169. wisent/core/tasks/lm_eval_task.py +455 -0
  170. wisent/core/tasks/math500_task.py +84 -0
  171. wisent/core/tasks/polymath_task.py +146 -0
  172. wisent/core/tasks/supergpqa_task.py +220 -0
  173. wisent/core/time_estimator.py +149 -0
  174. wisent/core/timing_calibration.py +174 -0
  175. wisent/core/tracking/__init__.py +54 -0
  176. wisent/core/tracking/latency.py +618 -0
  177. wisent/core/tracking/memory.py +359 -0
  178. wisent/core/trainers/__init__.py +0 -0
  179. wisent/core/trainers/core/__init__.py +11 -0
  180. wisent/core/trainers/core/atoms.py +45 -0
  181. wisent/core/trainers/steering_trainer.py +271 -0
  182. wisent/core/user_model_config.py +158 -0
  183. wisent/opti/__init__.py +0 -0
  184. wisent/opti/core/__init__.py +0 -0
  185. wisent/opti/core/atoms.py +175 -0
  186. wisent/opti/methods/__init__.py +0 -0
  187. wisent/opti/methods/opti_classificator.py +172 -0
  188. wisent/opti/methods/opti_steering.py +138 -0
  189. wisent/synthetic/__init__.py +0 -0
  190. wisent/synthetic/cleaners/__init__.py +0 -0
  191. wisent/synthetic/cleaners/core/__init__.py +0 -0
  192. wisent/synthetic/cleaners/core/atoms.py +58 -0
  193. wisent/synthetic/cleaners/deduper_cleaner.py +53 -0
  194. wisent/synthetic/cleaners/methods/__init__.py +0 -0
  195. wisent/synthetic/cleaners/methods/base_dedupers.py +320 -0
  196. wisent/synthetic/cleaners/methods/base_refusalers.py +286 -0
  197. wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
  198. wisent/synthetic/cleaners/methods/core/atoms.py +47 -0
  199. wisent/synthetic/cleaners/pairs_cleaner.py +90 -0
  200. wisent/synthetic/cleaners/refusaler_cleaner.py +133 -0
  201. wisent/synthetic/db_instructions/__init__.py +0 -0
  202. wisent/synthetic/db_instructions/core/__init__.py +0 -0
  203. wisent/synthetic/db_instructions/core/atoms.py +25 -0
  204. wisent/synthetic/db_instructions/mini_dp.py +37 -0
  205. wisent/synthetic/generators/__init__.py +0 -0
  206. wisent/synthetic/generators/core/__init__.py +0 -0
  207. wisent/synthetic/generators/core/atoms.py +73 -0
  208. wisent/synthetic/generators/diversities/__init__.py +0 -0
  209. wisent/synthetic/generators/diversities/core/__init__.py +0 -0
  210. wisent/synthetic/generators/diversities/core/core.py +68 -0
  211. wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
  212. wisent/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
  213. wisent/synthetic/generators/pairs_generator.py +179 -0
  214. wisent-0.5.1.dist-info/METADATA +67 -0
  215. wisent-0.5.1.dist-info/RECORD +218 -0
  216. {wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/WHEEL +1 -1
  217. {wisent-0.1.1.dist-info → wisent-0.5.1.dist-info/licenses}/LICENSE +2 -2
  218. wisent/activations/__init__.py +0 -9
  219. wisent/activations/client.py +0 -97
  220. wisent/activations/extractor.py +0 -251
  221. wisent/activations/models.py +0 -95
  222. wisent/client.py +0 -45
  223. wisent/control_vector/__init__.py +0 -9
  224. wisent/control_vector/client.py +0 -85
  225. wisent/control_vector/manager.py +0 -168
  226. wisent/control_vector/models.py +0 -70
  227. wisent/inference/__init__.py +0 -9
  228. wisent/inference/client.py +0 -103
  229. wisent/inference/inferencer.py +0 -250
  230. wisent/inference/models.py +0 -66
  231. wisent/utils/__init__.py +0 -3
  232. wisent/utils/auth.py +0 -30
  233. wisent/utils/http.py +0 -228
  234. wisent/version.py +0 -3
  235. wisent-0.1.1.dist-info/METADATA +0 -142
  236. wisent-0.1.1.dist-info/RECORD +0 -23
  237. {wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,618 @@
1
+ """
2
+ Latency tracking for wisent-guard operations.
3
+
4
+ This module provides comprehensive timing and performance monitoring capabilities
5
+ for all aspects of the wisent-guard pipeline including model operations,
6
+ steering computations, and text generation.
7
+ """
8
+
9
+ import time
10
+ import statistics
11
+ from typing import Dict, List, Optional, Any, Callable, Union
12
+ from dataclasses import dataclass, field
13
+ from contextlib import contextmanager
14
+ from collections import defaultdict
15
+ import functools
16
+
17
+
18
+ @dataclass
19
+ class TimingEvent:
20
+ """Single timing event measurement."""
21
+ name: str
22
+ start_time: float
23
+ end_time: float
24
+ duration: float
25
+ metadata: Dict[str, Any] = field(default_factory=dict)
26
+ parent: Optional[str] = None
27
+
28
+ @property
29
+ def duration_ms(self) -> float:
30
+ """Duration in milliseconds."""
31
+ return self.duration * 1000
32
+
33
+
34
+ @dataclass
35
+ class LatencyStats:
36
+ """Aggregated latency statistics for an operation."""
37
+ operation: str
38
+ count: int
39
+ total_time: float
40
+ mean_time: float
41
+ median_time: float
42
+ min_time: float
43
+ max_time: float
44
+ std_dev: float
45
+ percentile_95: float
46
+ percentile_99: float
47
+ events: List[TimingEvent] = field(default_factory=list)
48
+
49
+ @property
50
+ def mean_time_ms(self) -> float:
51
+ """Mean time in milliseconds."""
52
+ return self.mean_time * 1000
53
+
54
+ @property
55
+ def total_time_ms(self) -> float:
56
+ """Total time in milliseconds."""
57
+ return self.total_time * 1000
58
+
59
+
60
+ @dataclass
61
+ class GenerationMetrics:
62
+ """User-facing generation performance metrics."""
63
+ time_to_first_token: float # seconds
64
+ total_generation_time: float # seconds
65
+ token_count: int
66
+ tokens_per_second: float
67
+ prompt_length: int = 0
68
+
69
+ @property
70
+ def ttft_ms(self) -> float:
71
+ """Time to first token in milliseconds."""
72
+ return self.time_to_first_token * 1000
73
+
74
+ @property
75
+ def total_time_ms(self) -> float:
76
+ """Total generation time in milliseconds."""
77
+ return self.total_generation_time * 1000
78
+
79
+
80
+ @dataclass
81
+ class TrainingMetrics:
82
+ """User-facing training performance metrics."""
83
+ total_training_time: float # seconds
84
+ training_samples: int
85
+ method: str
86
+ success: bool = True
87
+ error_message: Optional[str] = None
88
+
89
+ @property
90
+ def training_time_ms(self) -> float:
91
+ """Training time in milliseconds."""
92
+ return self.total_training_time * 1000
93
+
94
+ @property
95
+ def samples_per_second(self) -> float:
96
+ """Training samples processed per second."""
97
+ return self.training_samples / self.total_training_time if self.total_training_time > 0 else 0
98
+
99
+
100
+ class LatencyTracker:
101
+ """
102
+ Comprehensive latency tracker for wisent-guard operations.
103
+
104
+ Tracks timing for individual operations and provides aggregated statistics.
105
+ Supports nested operation tracking and hierarchical timing analysis.
106
+ """
107
+
108
+ def __init__(self, auto_start: bool = True):
109
+ """
110
+ Initialize latency tracker.
111
+
112
+ Args:
113
+ auto_start: Whether to automatically start tracking
114
+ """
115
+ self.events: List[TimingEvent] = []
116
+ self.active_operations: Dict[str, float] = {}
117
+ self.operation_stack: List[str] = []
118
+ self.is_tracking = auto_start
119
+ self.start_time = time.time() if auto_start else None
120
+
121
+ def start_tracking(self) -> None:
122
+ """Start or resume latency tracking."""
123
+ self.is_tracking = True
124
+ if self.start_time is None:
125
+ self.start_time = time.time()
126
+
127
+ def stop_tracking(self) -> None:
128
+ """Stop latency tracking."""
129
+ self.is_tracking = False
130
+
131
+ def start_operation(
132
+ self,
133
+ name: str,
134
+ metadata: Optional[Dict[str, Any]] = None
135
+ ) -> str:
136
+ """
137
+ Start timing an operation.
138
+
139
+ Args:
140
+ name: Name of the operation
141
+ metadata: Optional metadata to store with the event
142
+
143
+ Returns:
144
+ Operation ID for later reference
145
+ """
146
+ if not self.is_tracking:
147
+ return name
148
+
149
+ current_time = time.time()
150
+ operation_id = f"{name}_{len(self.events)}"
151
+
152
+ self.active_operations[operation_id] = current_time
153
+ self.operation_stack.append(operation_id)
154
+
155
+ return operation_id
156
+
157
+ def end_operation(
158
+ self,
159
+ operation_id: str,
160
+ metadata: Optional[Dict[str, Any]] = None
161
+ ) -> Optional[TimingEvent]:
162
+ """
163
+ End timing an operation.
164
+
165
+ Args:
166
+ operation_id: ID returned from start_operation
167
+ metadata: Additional metadata to store
168
+
169
+ Returns:
170
+ TimingEvent if operation was found, None otherwise
171
+ """
172
+ if not self.is_tracking or operation_id not in self.active_operations:
173
+ return None
174
+
175
+ end_time = time.time()
176
+ start_time = self.active_operations.pop(operation_id)
177
+ duration = end_time - start_time
178
+
179
+ # Extract operation name from ID
180
+ name = operation_id.rsplit('_', 1)[0]
181
+
182
+ # Determine parent operation
183
+ parent = None
184
+ if operation_id in self.operation_stack:
185
+ stack_index = self.operation_stack.index(operation_id)
186
+ if stack_index > 0:
187
+ parent_id = self.operation_stack[stack_index - 1]
188
+ parent = parent_id.rsplit('_', 1)[0]
189
+ self.operation_stack.remove(operation_id)
190
+
191
+ # Merge metadata
192
+ combined_metadata = metadata or {}
193
+
194
+ event = TimingEvent(
195
+ name=name,
196
+ start_time=start_time,
197
+ end_time=end_time,
198
+ duration=duration,
199
+ metadata=combined_metadata,
200
+ parent=parent
201
+ )
202
+
203
+ self.events.append(event)
204
+ return event
205
+
206
+ @contextmanager
207
+ def time_operation(
208
+ self,
209
+ name: str,
210
+ metadata: Optional[Dict[str, Any]] = None
211
+ ):
212
+ """
213
+ Context manager for timing operations.
214
+
215
+ Args:
216
+ name: Name of the operation
217
+ metadata: Optional metadata to store
218
+
219
+ Yields:
220
+ TimingEvent that will be populated when context exits
221
+ """
222
+ operation_id = self.start_operation(name, metadata)
223
+ event_placeholder = {"event": None}
224
+
225
+ try:
226
+ yield event_placeholder
227
+ finally:
228
+ event = self.end_operation(operation_id, metadata)
229
+ event_placeholder["event"] = event
230
+
231
+ @contextmanager
232
+ def time_generation(self, name: str = "response_generation", prompt_length: int = 0):
233
+ """
234
+ Context manager for timing text generation with TTFT tracking.
235
+
236
+ Args:
237
+ name: Name of the generation operation
238
+ prompt_length: Length of the input prompt in tokens
239
+
240
+ Yields:
241
+ Dict with methods to mark first token and update token count
242
+ """
243
+ start_time = time.time()
244
+ operation_id = self.start_operation(name, {"prompt_length": prompt_length})
245
+
246
+ generation_state = {
247
+ "first_token_time": None,
248
+ "token_count": 0
249
+ }
250
+
251
+ # Add methods that modify the dict
252
+ generation_state["mark_first_token"] = lambda: generation_state.update({"first_token_time": time.time()})
253
+ generation_state["update_tokens"] = lambda count: generation_state.update({"token_count": count})
254
+
255
+ try:
256
+ yield generation_state
257
+ finally:
258
+ end_time = time.time()
259
+ total_duration = end_time - start_time
260
+
261
+ # Calculate TTFT
262
+ ttft = generation_state["first_token_time"] - start_time if generation_state["first_token_time"] else 0.0
263
+
264
+ # Calculate tokens per second
265
+ tokens_per_sec = generation_state["token_count"] / total_duration if total_duration > 0 else 0.0
266
+
267
+ metadata = {
268
+ "prompt_length": prompt_length,
269
+ "time_to_first_token": ttft,
270
+ "token_count": generation_state["token_count"],
271
+ "tokens_per_second": tokens_per_sec
272
+ }
273
+
274
+ self.end_operation(operation_id, metadata)
275
+
276
+ def get_stats(self, operation_name: Optional[str] = None) -> Union[LatencyStats, Dict[str, LatencyStats]]:
277
+ """
278
+ Get latency statistics.
279
+
280
+ Args:
281
+ operation_name: Specific operation to get stats for, or None for all
282
+
283
+ Returns:
284
+ LatencyStats for specific operation or dict of all operation stats
285
+ """
286
+ if operation_name:
287
+ events = [e for e in self.events if e.name == operation_name]
288
+ if not events:
289
+ raise ValueError(f"No events found for operation: {operation_name}")
290
+ return self._calculate_stats(operation_name, events)
291
+ else:
292
+ # Group events by operation name
293
+ operation_events = defaultdict(list)
294
+ for event in self.events:
295
+ operation_events[event.name].append(event)
296
+
297
+ return {
298
+ name: self._calculate_stats(name, events)
299
+ for name, events in operation_events.items()
300
+ }
301
+
302
+ def _calculate_stats(self, operation: str, events: List[TimingEvent]) -> LatencyStats:
303
+ """Calculate statistics for a list of timing events."""
304
+ durations = [e.duration for e in events]
305
+
306
+ if not durations:
307
+ return LatencyStats(
308
+ operation=operation,
309
+ count=0,
310
+ total_time=0,
311
+ mean_time=0,
312
+ median_time=0,
313
+ min_time=0,
314
+ max_time=0,
315
+ std_dev=0,
316
+ percentile_95=0,
317
+ percentile_99=0,
318
+ events=[]
319
+ )
320
+
321
+ durations.sort()
322
+
323
+ return LatencyStats(
324
+ operation=operation,
325
+ count=len(durations),
326
+ total_time=sum(durations),
327
+ mean_time=statistics.mean(durations),
328
+ median_time=statistics.median(durations),
329
+ min_time=min(durations),
330
+ max_time=max(durations),
331
+ std_dev=statistics.stdev(durations) if len(durations) > 1 else 0,
332
+ percentile_95=self._percentile(durations, 95),
333
+ percentile_99=self._percentile(durations, 99),
334
+ events=events.copy()
335
+ )
336
+
337
+ def _percentile(self, sorted_data: List[float], percentile: float) -> float:
338
+ """Calculate percentile from sorted data."""
339
+ if not sorted_data:
340
+ return 0
341
+
342
+ index = (percentile / 100) * (len(sorted_data) - 1)
343
+ if index.is_integer():
344
+ return sorted_data[int(index)]
345
+ else:
346
+ lower = sorted_data[int(index)]
347
+ upper = sorted_data[int(index) + 1]
348
+ return lower + (upper - lower) * (index - int(index))
349
+
350
+ def get_timeline(self) -> List[TimingEvent]:
351
+ """Get chronological timeline of all events."""
352
+ return sorted(self.events, key=lambda e: e.start_time)
353
+
354
+ def get_hierarchy(self) -> Dict[str, List[TimingEvent]]:
355
+ """Get hierarchical view of operations (parent -> children)."""
356
+ hierarchy = defaultdict(list)
357
+
358
+ for event in self.events:
359
+ parent = event.parent or "root"
360
+ hierarchy[parent].append(event)
361
+
362
+ return dict(hierarchy)
363
+
364
+ def reset(self) -> None:
365
+ """Reset all tracking data."""
366
+ self.events.clear()
367
+ self.active_operations.clear()
368
+ self.operation_stack.clear()
369
+ self.start_time = time.time() if self.is_tracking else None
370
+
371
+ def get_generation_metrics(self, operation_name: str = "response_generation") -> Optional[GenerationMetrics]:
372
+ """Get user-facing generation metrics."""
373
+ events = [e for e in self.events if e.name == operation_name]
374
+ if not events:
375
+ return None
376
+
377
+ # Use the most recent event
378
+ latest_event = events[-1]
379
+ metadata = latest_event.metadata
380
+
381
+ return GenerationMetrics(
382
+ time_to_first_token=metadata.get('time_to_first_token', 0.0),
383
+ total_generation_time=latest_event.duration,
384
+ token_count=metadata.get('token_count', 0),
385
+ tokens_per_second=metadata.get('tokens_per_second', 0.0),
386
+ prompt_length=metadata.get('prompt_length', 0)
387
+ )
388
+
389
+ def get_training_metrics(self, operation_name: str = "total_training_time") -> Optional[TrainingMetrics]:
390
+ """Get user-facing training metrics."""
391
+ events = [e for e in self.events if e.name == operation_name]
392
+ if not events:
393
+ return None
394
+
395
+ latest_event = events[-1]
396
+ metadata = latest_event.metadata
397
+
398
+ return TrainingMetrics(
399
+ total_training_time=latest_event.duration,
400
+ training_samples=metadata.get('training_samples', 0),
401
+ method=metadata.get('method', 'unknown'),
402
+ success=metadata.get('success', True),
403
+ error_message=metadata.get('error_message')
404
+ )
405
+
406
+ def format_user_metrics(self) -> str:
407
+ """Format user-facing performance metrics."""
408
+ lines = ["🚀 Performance Summary:"]
409
+
410
+ # Training metrics
411
+ training_metrics = self.get_training_metrics()
412
+ if training_metrics:
413
+ lines.extend([
414
+ f"\n📚 Training:",
415
+ f" Method: {training_metrics.method}",
416
+ f" Total Time: {training_metrics.training_time_ms:.0f} ms",
417
+ f" Samples: {training_metrics.training_samples}",
418
+ f" Speed: {training_metrics.samples_per_second:.1f} samples/sec"
419
+ ])
420
+
421
+ # Generation metrics - check for both response_generation and individual generation events
422
+ generation_metrics = self.get_generation_metrics("response_generation")
423
+ if not generation_metrics:
424
+ # Try to get metrics from steered_generation if response_generation doesn't exist
425
+ generation_metrics = self.get_generation_metrics("steered_generation")
426
+
427
+ if generation_metrics and generation_metrics.token_count > 0:
428
+ lines.extend([
429
+ f"\n🎭 Generation:",
430
+ f" Time to First Token: {generation_metrics.ttft_ms:.0f} ms",
431
+ f" Total Generation: {generation_metrics.total_time_ms:.0f} ms",
432
+ f" Tokens Generated: {generation_metrics.token_count}",
433
+ f" Speed: {generation_metrics.tokens_per_second:.1f} tokens/sec"
434
+ ])
435
+
436
+ # Steering overhead comparison
437
+ steered_events = [e for e in self.events if e.name == "steered_generation"]
438
+ unsteered_events = [e for e in self.events if e.name == "unsteered_generation"]
439
+
440
+ if steered_events and unsteered_events:
441
+ steered_avg = sum(e.duration for e in steered_events) / len(steered_events)
442
+ unsteered_avg = sum(e.duration for e in unsteered_events) / len(unsteered_events)
443
+ overhead = ((steered_avg - unsteered_avg) / unsteered_avg) * 100
444
+
445
+ lines.extend([
446
+ f"\n⚡ Steering Overhead:",
447
+ f" Unsteered Avg: {unsteered_avg * 1000:.0f} ms ({len(unsteered_events)} runs)",
448
+ f" Steered Avg: {steered_avg * 1000:.0f} ms ({len(steered_events)} runs)",
449
+ f" Overhead: {overhead:+.1f}%"
450
+ ])
451
+ elif steered_events:
452
+ # Show steered performance even without comparison
453
+ steered_avg = sum(e.duration for e in steered_events) / len(steered_events)
454
+ lines.extend([
455
+ f"\n🎯 Steered Generation:",
456
+ f" Average Time: {steered_avg * 1000:.0f} ms ({len(steered_events)} runs)"
457
+ ])
458
+ elif unsteered_events:
459
+ # Show unsteered performance even without comparison
460
+ unsteered_avg = sum(e.duration for e in unsteered_events) / len(unsteered_events)
461
+ lines.extend([
462
+ f"\n🔄 Unsteered Generation:",
463
+ f" Average Time: {unsteered_avg * 1000:.0f} ms ({len(unsteered_events)} runs)"
464
+ ])
465
+
466
+ # Show warning if no generation metrics found
467
+ if not generation_metrics or generation_metrics.token_count == 0:
468
+ lines.extend([
469
+ f"\n⚠️ No generation metrics available",
470
+ f" (Responses may be empty or timing failed)"
471
+ ])
472
+
473
+ return '\n'.join(lines)
474
+
475
+ def format_stats(
476
+ self,
477
+ stats: Union[LatencyStats, Dict[str, LatencyStats]],
478
+ detailed: bool = False
479
+ ) -> str:
480
+ """Format latency statistics as a readable string."""
481
+ if isinstance(stats, LatencyStats):
482
+ return self._format_single_stats(stats, detailed)
483
+ else:
484
+ lines = ["Latency Statistics Summary:"]
485
+ for operation, op_stats in stats.items():
486
+ lines.append(f"\n{operation}:")
487
+ lines.extend([f" {line}" for line in self._format_single_stats(op_stats, detailed).split('\n')])
488
+ return '\n'.join(lines)
489
+
490
+ def _format_single_stats(self, stats: LatencyStats, detailed: bool) -> str:
491
+ """Format statistics for a single operation."""
492
+ lines = [
493
+ f"Operation: {stats.operation}",
494
+ f"Count: {stats.count}",
495
+ f"Total Time: {stats.total_time_ms:.1f} ms",
496
+ f"Mean Time: {stats.mean_time_ms:.1f} ms",
497
+ f"Median Time: {stats.median_time * 1000:.1f} ms",
498
+ f"Min Time: {stats.min_time * 1000:.1f} ms",
499
+ f"Max Time: {stats.max_time * 1000:.1f} ms",
500
+ ]
501
+
502
+ if stats.count > 1:
503
+ lines.extend([
504
+ f"Std Dev: {stats.std_dev * 1000:.1f} ms",
505
+ f"95th Percentile: {stats.percentile_95 * 1000:.1f} ms",
506
+ f"99th Percentile: {stats.percentile_99 * 1000:.1f} ms",
507
+ ])
508
+
509
+ if detailed and stats.events:
510
+ lines.append(f"Recent Events:")
511
+ for event in stats.events[-5:]: # Show last 5 events
512
+ lines.append(f" {event.duration_ms:.1f} ms")
513
+ if event.metadata:
514
+ lines.append(f" Metadata: {event.metadata}")
515
+
516
+ return '\n'.join(lines)
517
+
518
+ def export_csv(self, filename: str) -> None:
519
+ """Export timing events to CSV file."""
520
+ import csv
521
+
522
+ with open(filename, 'w', newline='') as csvfile:
523
+ writer = csv.writer(csvfile)
524
+ writer.writerow([
525
+ 'operation', 'start_time', 'end_time', 'duration_ms',
526
+ 'parent', 'metadata'
527
+ ])
528
+
529
+ for event in self.events:
530
+ writer.writerow([
531
+ event.name,
532
+ event.start_time,
533
+ event.end_time,
534
+ event.duration_ms,
535
+ event.parent or '',
536
+ str(event.metadata) if event.metadata else ''
537
+ ])
538
+
539
+
540
+ # Global latency tracker instance
541
+ _global_tracker: Optional[LatencyTracker] = None
542
+
543
+
544
+ def get_global_tracker() -> LatencyTracker:
545
+ """Get or create the global latency tracker instance."""
546
+ global _global_tracker
547
+ if _global_tracker is None:
548
+ _global_tracker = LatencyTracker()
549
+ return _global_tracker
550
+
551
+
552
+ def time_function(operation_name: Optional[str] = None):
553
+ """
554
+ Decorator to automatically time function execution.
555
+
556
+ Args:
557
+ operation_name: Name for the operation (defaults to function name)
558
+ """
559
+ def decorator(func: Callable) -> Callable:
560
+ name = operation_name or func.__name__
561
+
562
+ @functools.wraps(func)
563
+ def wrapper(*args, **kwargs):
564
+ tracker = get_global_tracker()
565
+ with tracker.time_operation(name):
566
+ return func(*args, **kwargs)
567
+ return wrapper
568
+ return decorator
569
+
570
+
571
+ @contextmanager
572
+ def time_operation(name: str, metadata: Optional[Dict[str, Any]] = None):
573
+ """Global context manager for timing operations."""
574
+ tracker = get_global_tracker()
575
+ with tracker.time_operation(name, metadata) as event_ref:
576
+ yield event_ref
577
+
578
+
579
+ def get_timing_summary() -> Dict[str, LatencyStats]:
580
+ """Get timing summary from global tracker."""
581
+ tracker = get_global_tracker()
582
+ return tracker.get_stats()
583
+
584
+
585
+ def format_timing_summary(detailed: bool = False) -> str:
586
+ """Format timing summary as a readable string."""
587
+ tracker = get_global_tracker()
588
+ stats = tracker.get_stats()
589
+ return tracker.format_stats(stats, detailed)
590
+
591
+
592
+ def reset_timing() -> None:
593
+ """Reset global timing data."""
594
+ tracker = get_global_tracker()
595
+ tracker.reset()
596
+
597
+
598
+ # Common operation names for user-facing metrics
599
+ class Operations:
600
+ """Standard operation names for user-facing performance metrics."""
601
+ # Core user-facing metrics
602
+ TOTAL_TRAINING_TIME = "total_training_time"
603
+ TIME_TO_FIRST_TOKEN = "time_to_first_token"
604
+ RESPONSE_GENERATION = "response_generation"
605
+ UNSTEERED_GENERATION = "unsteered_generation"
606
+ STEERED_GENERATION = "steered_generation"
607
+
608
+ # Batch processing
609
+ BATCH_INFERENCE = "batch_inference"
610
+ PER_RESPONSE = "per_response"
611
+
612
+ # Training phases
613
+ STEERING_VECTOR_TRAINING = "steering_vector_training"
614
+ CLASSIFIER_TRAINING = "classifier_training"
615
+
616
+ # Legacy (for backward compatibility)
617
+ MODEL_LOADING = "model_loading"
618
+ ACTIVATION_EXTRACTION = "activation_extraction"