wisent 0.1.1__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wisent might be problematic. Click here for more details.
- wisent/__init__.py +1 -8
- wisent/benchmarks/__init__.py +0 -0
- wisent/benchmarks/coding/__init__.py +0 -0
- wisent/benchmarks/coding/metrics/__init__.py +0 -0
- wisent/benchmarks/coding/metrics/core/__init__.py +0 -0
- wisent/benchmarks/coding/metrics/core/atoms.py +36 -0
- wisent/benchmarks/coding/metrics/evaluator.py +275 -0
- wisent/benchmarks/coding/metrics/passk.py +66 -0
- wisent/benchmarks/coding/output_sanitizer/__init__.py +0 -0
- wisent/benchmarks/coding/output_sanitizer/core/__init__.py +0 -0
- wisent/benchmarks/coding/output_sanitizer/core/atoms.py +27 -0
- wisent/benchmarks/coding/output_sanitizer/cpp_sanitizer.py +62 -0
- wisent/benchmarks/coding/output_sanitizer/java_sanitizer.py +78 -0
- wisent/benchmarks/coding/output_sanitizer/python_sanitizer.py +94 -0
- wisent/benchmarks/coding/output_sanitizer/utils.py +107 -0
- wisent/benchmarks/coding/providers/__init__.py +18 -0
- wisent/benchmarks/coding/providers/core/__init__.py +0 -0
- wisent/benchmarks/coding/providers/core/atoms.py +31 -0
- wisent/benchmarks/coding/providers/livecodebench/__init__.py +0 -0
- wisent/benchmarks/coding/providers/livecodebench/provider.py +53 -0
- wisent/benchmarks/coding/safe_docker/__init__.py +0 -0
- wisent/benchmarks/coding/safe_docker/core/__init__.py +0 -0
- wisent/benchmarks/coding/safe_docker/core/atoms.py +105 -0
- wisent/benchmarks/coding/safe_docker/core/runtime.py +118 -0
- wisent/benchmarks/coding/safe_docker/entrypoint.py +123 -0
- wisent/benchmarks/coding/safe_docker/recipes.py +60 -0
- wisent/classifiers/__init__.py +0 -0
- wisent/classifiers/core/__init__.py +0 -0
- wisent/classifiers/core/atoms.py +747 -0
- wisent/classifiers/models/__init__.py +0 -0
- wisent/classifiers/models/logistic.py +29 -0
- wisent/classifiers/models/mlp.py +47 -0
- wisent/cli/__init__.py +0 -0
- wisent/cli/classifiers/__init__.py +0 -0
- wisent/cli/classifiers/classifier_rotator.py +137 -0
- wisent/cli/cli_logger.py +142 -0
- wisent/cli/data_loaders/__init__.py +0 -0
- wisent/cli/data_loaders/data_loader_rotator.py +96 -0
- wisent/cli/evaluators/__init__.py +0 -0
- wisent/cli/evaluators/evaluator_rotator.py +148 -0
- wisent/cli/steering_methods/__init__.py +0 -0
- wisent/cli/steering_methods/steering_rotator.py +110 -0
- wisent/cli/wisent_cli/__init__.py +0 -0
- wisent/cli/wisent_cli/commands/__init__.py +0 -0
- wisent/cli/wisent_cli/commands/help_cmd.py +52 -0
- wisent/cli/wisent_cli/commands/listing.py +154 -0
- wisent/cli/wisent_cli/commands/train_cmd.py +322 -0
- wisent/cli/wisent_cli/main.py +93 -0
- wisent/cli/wisent_cli/shell.py +80 -0
- wisent/cli/wisent_cli/ui.py +69 -0
- wisent/cli/wisent_cli/util/__init__.py +0 -0
- wisent/cli/wisent_cli/util/aggregations.py +43 -0
- wisent/cli/wisent_cli/util/parsing.py +126 -0
- wisent/cli/wisent_cli/version.py +4 -0
- wisent/core/__init__.py +27 -0
- wisent/core/activations/__init__.py +0 -0
- wisent/core/activations/activations_collector.py +338 -0
- wisent/core/activations/core/__init__.py +0 -0
- wisent/core/activations/core/atoms.py +216 -0
- wisent/core/agent/__init__.py +18 -0
- wisent/core/agent/budget.py +638 -0
- wisent/core/agent/device_benchmarks.py +685 -0
- wisent/core/agent/diagnose/__init__.py +55 -0
- wisent/core/agent/diagnose/agent_classifier_decision.py +641 -0
- wisent/core/agent/diagnose/classifier_marketplace.py +554 -0
- wisent/core/agent/diagnose/create_classifier.py +1154 -0
- wisent/core/agent/diagnose/response_diagnostics.py +268 -0
- wisent/core/agent/diagnose/select_classifiers.py +506 -0
- wisent/core/agent/diagnose/synthetic_classifier_option.py +754 -0
- wisent/core/agent/diagnose/tasks/__init__.py +33 -0
- wisent/core/agent/diagnose/tasks/task_manager.py +1456 -0
- wisent/core/agent/diagnose/tasks/task_relevance.py +94 -0
- wisent/core/agent/diagnose/tasks/task_selector.py +151 -0
- wisent/core/agent/diagnose/test_synthetic_classifier.py +71 -0
- wisent/core/agent/diagnose.py +242 -0
- wisent/core/agent/steer.py +212 -0
- wisent/core/agent/timeout.py +134 -0
- wisent/core/autonomous_agent.py +1234 -0
- wisent/core/bigcode_integration.py +583 -0
- wisent/core/contrastive_pairs/__init__.py +15 -0
- wisent/core/contrastive_pairs/core/__init__.py +0 -0
- wisent/core/contrastive_pairs/core/atoms.py +45 -0
- wisent/core/contrastive_pairs/core/buliders.py +59 -0
- wisent/core/contrastive_pairs/core/pair.py +178 -0
- wisent/core/contrastive_pairs/core/response.py +152 -0
- wisent/core/contrastive_pairs/core/serialization.py +300 -0
- wisent/core/contrastive_pairs/core/set.py +133 -0
- wisent/core/contrastive_pairs/diagnostics/__init__.py +45 -0
- wisent/core/contrastive_pairs/diagnostics/activations.py +53 -0
- wisent/core/contrastive_pairs/diagnostics/base.py +73 -0
- wisent/core/contrastive_pairs/diagnostics/control_vectors.py +169 -0
- wisent/core/contrastive_pairs/diagnostics/coverage.py +79 -0
- wisent/core/contrastive_pairs/diagnostics/divergence.py +98 -0
- wisent/core/contrastive_pairs/diagnostics/duplicates.py +116 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/__init__.py +0 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/atoms.py +238 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_manifest.py +8 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_extractor_registry.py +132 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/__init__.py +0 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_extractors/winogrande.py +115 -0
- wisent/core/contrastive_pairs/lm_eval_pairs/lm_task_pairs_generation.py +50 -0
- wisent/core/data_loaders/__init__.py +0 -0
- wisent/core/data_loaders/core/__init__.py +0 -0
- wisent/core/data_loaders/core/atoms.py +98 -0
- wisent/core/data_loaders/loaders/__init__.py +0 -0
- wisent/core/data_loaders/loaders/custom.py +120 -0
- wisent/core/data_loaders/loaders/lm_loader.py +218 -0
- wisent/core/detection_handling.py +257 -0
- wisent/core/download_full_benchmarks.py +1386 -0
- wisent/core/evaluators/__init__.py +0 -0
- wisent/core/evaluators/oracles/__init__.py +0 -0
- wisent/core/evaluators/oracles/interactive.py +73 -0
- wisent/core/evaluators/oracles/nlp_evaluator.py +440 -0
- wisent/core/evaluators/oracles/user_specified.py +67 -0
- wisent/core/hyperparameter_optimizer.py +429 -0
- wisent/core/lm_eval_harness_ground_truth.py +1396 -0
- wisent/core/log_likelihoods_evaluator.py +321 -0
- wisent/core/managed_cached_benchmarks.py +595 -0
- wisent/core/mixed_benchmark_sampler.py +364 -0
- wisent/core/model_config_manager.py +330 -0
- wisent/core/model_persistence.py +317 -0
- wisent/core/models/__init__.py +0 -0
- wisent/core/models/core/__init__.py +0 -0
- wisent/core/models/core/atoms.py +460 -0
- wisent/core/models/wisent_model.py +727 -0
- wisent/core/multi_steering.py +316 -0
- wisent/core/optuna/__init__.py +57 -0
- wisent/core/optuna/classifier/__init__.py +25 -0
- wisent/core/optuna/classifier/activation_generator.py +349 -0
- wisent/core/optuna/classifier/classifier_cache.py +509 -0
- wisent/core/optuna/classifier/optuna_classifier_optimizer.py +606 -0
- wisent/core/optuna/steering/__init__.py +0 -0
- wisent/core/optuna/steering/bigcode_evaluator_wrapper.py +188 -0
- wisent/core/optuna/steering/data_utils.py +342 -0
- wisent/core/optuna/steering/metrics.py +474 -0
- wisent/core/optuna/steering/optuna_pipeline.py +1738 -0
- wisent/core/optuna/steering/steering_optimization.py +1111 -0
- wisent/core/parser.py +1668 -0
- wisent/core/prompts/__init__.py +0 -0
- wisent/core/prompts/core/__init__.py +0 -0
- wisent/core/prompts/core/atom.py +57 -0
- wisent/core/prompts/core/prompt_formater.py +157 -0
- wisent/core/prompts/prompt_stratiegies/__init__.py +0 -0
- wisent/core/prompts/prompt_stratiegies/direct_completion.py +24 -0
- wisent/core/prompts/prompt_stratiegies/instruction_following.py +24 -0
- wisent/core/prompts/prompt_stratiegies/multiple_choice.py +29 -0
- wisent/core/prompts/prompt_stratiegies/role_playing.py +31 -0
- wisent/core/representation.py +5 -0
- wisent/core/sample_size_optimizer.py +648 -0
- wisent/core/sample_size_optimizer_v2.py +355 -0
- wisent/core/save_results.py +277 -0
- wisent/core/steering.py +652 -0
- wisent/core/steering_method.py +26 -0
- wisent/core/steering_methods/__init__.py +0 -0
- wisent/core/steering_methods/core/__init__.py +0 -0
- wisent/core/steering_methods/core/atoms.py +153 -0
- wisent/core/steering_methods/methods/__init__.py +0 -0
- wisent/core/steering_methods/methods/caa.py +44 -0
- wisent/core/steering_optimizer.py +1297 -0
- wisent/core/task_interface.py +132 -0
- wisent/core/task_selector.py +189 -0
- wisent/core/tasks/__init__.py +175 -0
- wisent/core/tasks/aime_task.py +141 -0
- wisent/core/tasks/file_task.py +211 -0
- wisent/core/tasks/hle_task.py +180 -0
- wisent/core/tasks/hmmt_task.py +119 -0
- wisent/core/tasks/livecodebench_task.py +201 -0
- wisent/core/tasks/livemathbench_task.py +158 -0
- wisent/core/tasks/lm_eval_task.py +455 -0
- wisent/core/tasks/math500_task.py +84 -0
- wisent/core/tasks/polymath_task.py +146 -0
- wisent/core/tasks/supergpqa_task.py +220 -0
- wisent/core/time_estimator.py +149 -0
- wisent/core/timing_calibration.py +174 -0
- wisent/core/tracking/__init__.py +54 -0
- wisent/core/tracking/latency.py +618 -0
- wisent/core/tracking/memory.py +359 -0
- wisent/core/trainers/__init__.py +0 -0
- wisent/core/trainers/core/__init__.py +11 -0
- wisent/core/trainers/core/atoms.py +45 -0
- wisent/core/trainers/steering_trainer.py +271 -0
- wisent/core/user_model_config.py +158 -0
- wisent/opti/__init__.py +0 -0
- wisent/opti/core/__init__.py +0 -0
- wisent/opti/core/atoms.py +175 -0
- wisent/opti/methods/__init__.py +0 -0
- wisent/opti/methods/opti_classificator.py +172 -0
- wisent/opti/methods/opti_steering.py +138 -0
- wisent/synthetic/__init__.py +0 -0
- wisent/synthetic/cleaners/__init__.py +0 -0
- wisent/synthetic/cleaners/core/__init__.py +0 -0
- wisent/synthetic/cleaners/core/atoms.py +58 -0
- wisent/synthetic/cleaners/deduper_cleaner.py +53 -0
- wisent/synthetic/cleaners/methods/__init__.py +0 -0
- wisent/synthetic/cleaners/methods/base_dedupers.py +320 -0
- wisent/synthetic/cleaners/methods/base_refusalers.py +286 -0
- wisent/synthetic/cleaners/methods/core/__init__.py +0 -0
- wisent/synthetic/cleaners/methods/core/atoms.py +47 -0
- wisent/synthetic/cleaners/pairs_cleaner.py +90 -0
- wisent/synthetic/cleaners/refusaler_cleaner.py +133 -0
- wisent/synthetic/db_instructions/__init__.py +0 -0
- wisent/synthetic/db_instructions/core/__init__.py +0 -0
- wisent/synthetic/db_instructions/core/atoms.py +25 -0
- wisent/synthetic/db_instructions/mini_dp.py +37 -0
- wisent/synthetic/generators/__init__.py +0 -0
- wisent/synthetic/generators/core/__init__.py +0 -0
- wisent/synthetic/generators/core/atoms.py +73 -0
- wisent/synthetic/generators/diversities/__init__.py +0 -0
- wisent/synthetic/generators/diversities/core/__init__.py +0 -0
- wisent/synthetic/generators/diversities/core/core.py +68 -0
- wisent/synthetic/generators/diversities/methods/__init__.py +0 -0
- wisent/synthetic/generators/diversities/methods/fast_diversity.py +249 -0
- wisent/synthetic/generators/pairs_generator.py +179 -0
- wisent-0.5.1.dist-info/METADATA +67 -0
- wisent-0.5.1.dist-info/RECORD +218 -0
- {wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/WHEEL +1 -1
- {wisent-0.1.1.dist-info → wisent-0.5.1.dist-info/licenses}/LICENSE +2 -2
- wisent/activations/__init__.py +0 -9
- wisent/activations/client.py +0 -97
- wisent/activations/extractor.py +0 -251
- wisent/activations/models.py +0 -95
- wisent/client.py +0 -45
- wisent/control_vector/__init__.py +0 -9
- wisent/control_vector/client.py +0 -85
- wisent/control_vector/manager.py +0 -168
- wisent/control_vector/models.py +0 -70
- wisent/inference/__init__.py +0 -9
- wisent/inference/client.py +0 -103
- wisent/inference/inferencer.py +0 -250
- wisent/inference/models.py +0 -66
- wisent/utils/__init__.py +0 -3
- wisent/utils/auth.py +0 -30
- wisent/utils/http.py +0 -228
- wisent/version.py +0 -3
- wisent-0.1.1.dist-info/METADATA +0 -142
- wisent-0.1.1.dist-info/RECORD +0 -23
- {wisent-0.1.1.dist-info → wisent-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,618 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Latency tracking for wisent-guard operations.
|
|
3
|
+
|
|
4
|
+
This module provides comprehensive timing and performance monitoring capabilities
|
|
5
|
+
for all aspects of the wisent-guard pipeline including model operations,
|
|
6
|
+
steering computations, and text generation.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import time
|
|
10
|
+
import statistics
|
|
11
|
+
from typing import Dict, List, Optional, Any, Callable, Union
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from contextlib import contextmanager
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
import functools
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class TimingEvent:
|
|
20
|
+
"""Single timing event measurement."""
|
|
21
|
+
name: str
|
|
22
|
+
start_time: float
|
|
23
|
+
end_time: float
|
|
24
|
+
duration: float
|
|
25
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
26
|
+
parent: Optional[str] = None
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def duration_ms(self) -> float:
|
|
30
|
+
"""Duration in milliseconds."""
|
|
31
|
+
return self.duration * 1000
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class LatencyStats:
|
|
36
|
+
"""Aggregated latency statistics for an operation."""
|
|
37
|
+
operation: str
|
|
38
|
+
count: int
|
|
39
|
+
total_time: float
|
|
40
|
+
mean_time: float
|
|
41
|
+
median_time: float
|
|
42
|
+
min_time: float
|
|
43
|
+
max_time: float
|
|
44
|
+
std_dev: float
|
|
45
|
+
percentile_95: float
|
|
46
|
+
percentile_99: float
|
|
47
|
+
events: List[TimingEvent] = field(default_factory=list)
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def mean_time_ms(self) -> float:
|
|
51
|
+
"""Mean time in milliseconds."""
|
|
52
|
+
return self.mean_time * 1000
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def total_time_ms(self) -> float:
|
|
56
|
+
"""Total time in milliseconds."""
|
|
57
|
+
return self.total_time * 1000
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class GenerationMetrics:
|
|
62
|
+
"""User-facing generation performance metrics."""
|
|
63
|
+
time_to_first_token: float # seconds
|
|
64
|
+
total_generation_time: float # seconds
|
|
65
|
+
token_count: int
|
|
66
|
+
tokens_per_second: float
|
|
67
|
+
prompt_length: int = 0
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def ttft_ms(self) -> float:
|
|
71
|
+
"""Time to first token in milliseconds."""
|
|
72
|
+
return self.time_to_first_token * 1000
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def total_time_ms(self) -> float:
|
|
76
|
+
"""Total generation time in milliseconds."""
|
|
77
|
+
return self.total_generation_time * 1000
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class TrainingMetrics:
|
|
82
|
+
"""User-facing training performance metrics."""
|
|
83
|
+
total_training_time: float # seconds
|
|
84
|
+
training_samples: int
|
|
85
|
+
method: str
|
|
86
|
+
success: bool = True
|
|
87
|
+
error_message: Optional[str] = None
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def training_time_ms(self) -> float:
|
|
91
|
+
"""Training time in milliseconds."""
|
|
92
|
+
return self.total_training_time * 1000
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def samples_per_second(self) -> float:
|
|
96
|
+
"""Training samples processed per second."""
|
|
97
|
+
return self.training_samples / self.total_training_time if self.total_training_time > 0 else 0
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class LatencyTracker:
|
|
101
|
+
"""
|
|
102
|
+
Comprehensive latency tracker for wisent-guard operations.
|
|
103
|
+
|
|
104
|
+
Tracks timing for individual operations and provides aggregated statistics.
|
|
105
|
+
Supports nested operation tracking and hierarchical timing analysis.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
def __init__(self, auto_start: bool = True):
|
|
109
|
+
"""
|
|
110
|
+
Initialize latency tracker.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
auto_start: Whether to automatically start tracking
|
|
114
|
+
"""
|
|
115
|
+
self.events: List[TimingEvent] = []
|
|
116
|
+
self.active_operations: Dict[str, float] = {}
|
|
117
|
+
self.operation_stack: List[str] = []
|
|
118
|
+
self.is_tracking = auto_start
|
|
119
|
+
self.start_time = time.time() if auto_start else None
|
|
120
|
+
|
|
121
|
+
def start_tracking(self) -> None:
|
|
122
|
+
"""Start or resume latency tracking."""
|
|
123
|
+
self.is_tracking = True
|
|
124
|
+
if self.start_time is None:
|
|
125
|
+
self.start_time = time.time()
|
|
126
|
+
|
|
127
|
+
def stop_tracking(self) -> None:
|
|
128
|
+
"""Stop latency tracking."""
|
|
129
|
+
self.is_tracking = False
|
|
130
|
+
|
|
131
|
+
def start_operation(
|
|
132
|
+
self,
|
|
133
|
+
name: str,
|
|
134
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
135
|
+
) -> str:
|
|
136
|
+
"""
|
|
137
|
+
Start timing an operation.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
name: Name of the operation
|
|
141
|
+
metadata: Optional metadata to store with the event
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Operation ID for later reference
|
|
145
|
+
"""
|
|
146
|
+
if not self.is_tracking:
|
|
147
|
+
return name
|
|
148
|
+
|
|
149
|
+
current_time = time.time()
|
|
150
|
+
operation_id = f"{name}_{len(self.events)}"
|
|
151
|
+
|
|
152
|
+
self.active_operations[operation_id] = current_time
|
|
153
|
+
self.operation_stack.append(operation_id)
|
|
154
|
+
|
|
155
|
+
return operation_id
|
|
156
|
+
|
|
157
|
+
def end_operation(
|
|
158
|
+
self,
|
|
159
|
+
operation_id: str,
|
|
160
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
161
|
+
) -> Optional[TimingEvent]:
|
|
162
|
+
"""
|
|
163
|
+
End timing an operation.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
operation_id: ID returned from start_operation
|
|
167
|
+
metadata: Additional metadata to store
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
TimingEvent if operation was found, None otherwise
|
|
171
|
+
"""
|
|
172
|
+
if not self.is_tracking or operation_id not in self.active_operations:
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
end_time = time.time()
|
|
176
|
+
start_time = self.active_operations.pop(operation_id)
|
|
177
|
+
duration = end_time - start_time
|
|
178
|
+
|
|
179
|
+
# Extract operation name from ID
|
|
180
|
+
name = operation_id.rsplit('_', 1)[0]
|
|
181
|
+
|
|
182
|
+
# Determine parent operation
|
|
183
|
+
parent = None
|
|
184
|
+
if operation_id in self.operation_stack:
|
|
185
|
+
stack_index = self.operation_stack.index(operation_id)
|
|
186
|
+
if stack_index > 0:
|
|
187
|
+
parent_id = self.operation_stack[stack_index - 1]
|
|
188
|
+
parent = parent_id.rsplit('_', 1)[0]
|
|
189
|
+
self.operation_stack.remove(operation_id)
|
|
190
|
+
|
|
191
|
+
# Merge metadata
|
|
192
|
+
combined_metadata = metadata or {}
|
|
193
|
+
|
|
194
|
+
event = TimingEvent(
|
|
195
|
+
name=name,
|
|
196
|
+
start_time=start_time,
|
|
197
|
+
end_time=end_time,
|
|
198
|
+
duration=duration,
|
|
199
|
+
metadata=combined_metadata,
|
|
200
|
+
parent=parent
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
self.events.append(event)
|
|
204
|
+
return event
|
|
205
|
+
|
|
206
|
+
@contextmanager
|
|
207
|
+
def time_operation(
|
|
208
|
+
self,
|
|
209
|
+
name: str,
|
|
210
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
211
|
+
):
|
|
212
|
+
"""
|
|
213
|
+
Context manager for timing operations.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
name: Name of the operation
|
|
217
|
+
metadata: Optional metadata to store
|
|
218
|
+
|
|
219
|
+
Yields:
|
|
220
|
+
TimingEvent that will be populated when context exits
|
|
221
|
+
"""
|
|
222
|
+
operation_id = self.start_operation(name, metadata)
|
|
223
|
+
event_placeholder = {"event": None}
|
|
224
|
+
|
|
225
|
+
try:
|
|
226
|
+
yield event_placeholder
|
|
227
|
+
finally:
|
|
228
|
+
event = self.end_operation(operation_id, metadata)
|
|
229
|
+
event_placeholder["event"] = event
|
|
230
|
+
|
|
231
|
+
@contextmanager
|
|
232
|
+
def time_generation(self, name: str = "response_generation", prompt_length: int = 0):
|
|
233
|
+
"""
|
|
234
|
+
Context manager for timing text generation with TTFT tracking.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
name: Name of the generation operation
|
|
238
|
+
prompt_length: Length of the input prompt in tokens
|
|
239
|
+
|
|
240
|
+
Yields:
|
|
241
|
+
Dict with methods to mark first token and update token count
|
|
242
|
+
"""
|
|
243
|
+
start_time = time.time()
|
|
244
|
+
operation_id = self.start_operation(name, {"prompt_length": prompt_length})
|
|
245
|
+
|
|
246
|
+
generation_state = {
|
|
247
|
+
"first_token_time": None,
|
|
248
|
+
"token_count": 0
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
# Add methods that modify the dict
|
|
252
|
+
generation_state["mark_first_token"] = lambda: generation_state.update({"first_token_time": time.time()})
|
|
253
|
+
generation_state["update_tokens"] = lambda count: generation_state.update({"token_count": count})
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
yield generation_state
|
|
257
|
+
finally:
|
|
258
|
+
end_time = time.time()
|
|
259
|
+
total_duration = end_time - start_time
|
|
260
|
+
|
|
261
|
+
# Calculate TTFT
|
|
262
|
+
ttft = generation_state["first_token_time"] - start_time if generation_state["first_token_time"] else 0.0
|
|
263
|
+
|
|
264
|
+
# Calculate tokens per second
|
|
265
|
+
tokens_per_sec = generation_state["token_count"] / total_duration if total_duration > 0 else 0.0
|
|
266
|
+
|
|
267
|
+
metadata = {
|
|
268
|
+
"prompt_length": prompt_length,
|
|
269
|
+
"time_to_first_token": ttft,
|
|
270
|
+
"token_count": generation_state["token_count"],
|
|
271
|
+
"tokens_per_second": tokens_per_sec
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
self.end_operation(operation_id, metadata)
|
|
275
|
+
|
|
276
|
+
def get_stats(self, operation_name: Optional[str] = None) -> Union[LatencyStats, Dict[str, LatencyStats]]:
|
|
277
|
+
"""
|
|
278
|
+
Get latency statistics.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
operation_name: Specific operation to get stats for, or None for all
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
LatencyStats for specific operation or dict of all operation stats
|
|
285
|
+
"""
|
|
286
|
+
if operation_name:
|
|
287
|
+
events = [e for e in self.events if e.name == operation_name]
|
|
288
|
+
if not events:
|
|
289
|
+
raise ValueError(f"No events found for operation: {operation_name}")
|
|
290
|
+
return self._calculate_stats(operation_name, events)
|
|
291
|
+
else:
|
|
292
|
+
# Group events by operation name
|
|
293
|
+
operation_events = defaultdict(list)
|
|
294
|
+
for event in self.events:
|
|
295
|
+
operation_events[event.name].append(event)
|
|
296
|
+
|
|
297
|
+
return {
|
|
298
|
+
name: self._calculate_stats(name, events)
|
|
299
|
+
for name, events in operation_events.items()
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
def _calculate_stats(self, operation: str, events: List[TimingEvent]) -> LatencyStats:
|
|
303
|
+
"""Calculate statistics for a list of timing events."""
|
|
304
|
+
durations = [e.duration for e in events]
|
|
305
|
+
|
|
306
|
+
if not durations:
|
|
307
|
+
return LatencyStats(
|
|
308
|
+
operation=operation,
|
|
309
|
+
count=0,
|
|
310
|
+
total_time=0,
|
|
311
|
+
mean_time=0,
|
|
312
|
+
median_time=0,
|
|
313
|
+
min_time=0,
|
|
314
|
+
max_time=0,
|
|
315
|
+
std_dev=0,
|
|
316
|
+
percentile_95=0,
|
|
317
|
+
percentile_99=0,
|
|
318
|
+
events=[]
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
durations.sort()
|
|
322
|
+
|
|
323
|
+
return LatencyStats(
|
|
324
|
+
operation=operation,
|
|
325
|
+
count=len(durations),
|
|
326
|
+
total_time=sum(durations),
|
|
327
|
+
mean_time=statistics.mean(durations),
|
|
328
|
+
median_time=statistics.median(durations),
|
|
329
|
+
min_time=min(durations),
|
|
330
|
+
max_time=max(durations),
|
|
331
|
+
std_dev=statistics.stdev(durations) if len(durations) > 1 else 0,
|
|
332
|
+
percentile_95=self._percentile(durations, 95),
|
|
333
|
+
percentile_99=self._percentile(durations, 99),
|
|
334
|
+
events=events.copy()
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
def _percentile(self, sorted_data: List[float], percentile: float) -> float:
|
|
338
|
+
"""Calculate percentile from sorted data."""
|
|
339
|
+
if not sorted_data:
|
|
340
|
+
return 0
|
|
341
|
+
|
|
342
|
+
index = (percentile / 100) * (len(sorted_data) - 1)
|
|
343
|
+
if index.is_integer():
|
|
344
|
+
return sorted_data[int(index)]
|
|
345
|
+
else:
|
|
346
|
+
lower = sorted_data[int(index)]
|
|
347
|
+
upper = sorted_data[int(index) + 1]
|
|
348
|
+
return lower + (upper - lower) * (index - int(index))
|
|
349
|
+
|
|
350
|
+
def get_timeline(self) -> List[TimingEvent]:
|
|
351
|
+
"""Get chronological timeline of all events."""
|
|
352
|
+
return sorted(self.events, key=lambda e: e.start_time)
|
|
353
|
+
|
|
354
|
+
def get_hierarchy(self) -> Dict[str, List[TimingEvent]]:
|
|
355
|
+
"""Get hierarchical view of operations (parent -> children)."""
|
|
356
|
+
hierarchy = defaultdict(list)
|
|
357
|
+
|
|
358
|
+
for event in self.events:
|
|
359
|
+
parent = event.parent or "root"
|
|
360
|
+
hierarchy[parent].append(event)
|
|
361
|
+
|
|
362
|
+
return dict(hierarchy)
|
|
363
|
+
|
|
364
|
+
def reset(self) -> None:
|
|
365
|
+
"""Reset all tracking data."""
|
|
366
|
+
self.events.clear()
|
|
367
|
+
self.active_operations.clear()
|
|
368
|
+
self.operation_stack.clear()
|
|
369
|
+
self.start_time = time.time() if self.is_tracking else None
|
|
370
|
+
|
|
371
|
+
def get_generation_metrics(self, operation_name: str = "response_generation") -> Optional[GenerationMetrics]:
|
|
372
|
+
"""Get user-facing generation metrics."""
|
|
373
|
+
events = [e for e in self.events if e.name == operation_name]
|
|
374
|
+
if not events:
|
|
375
|
+
return None
|
|
376
|
+
|
|
377
|
+
# Use the most recent event
|
|
378
|
+
latest_event = events[-1]
|
|
379
|
+
metadata = latest_event.metadata
|
|
380
|
+
|
|
381
|
+
return GenerationMetrics(
|
|
382
|
+
time_to_first_token=metadata.get('time_to_first_token', 0.0),
|
|
383
|
+
total_generation_time=latest_event.duration,
|
|
384
|
+
token_count=metadata.get('token_count', 0),
|
|
385
|
+
tokens_per_second=metadata.get('tokens_per_second', 0.0),
|
|
386
|
+
prompt_length=metadata.get('prompt_length', 0)
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
def get_training_metrics(self, operation_name: str = "total_training_time") -> Optional[TrainingMetrics]:
|
|
390
|
+
"""Get user-facing training metrics."""
|
|
391
|
+
events = [e for e in self.events if e.name == operation_name]
|
|
392
|
+
if not events:
|
|
393
|
+
return None
|
|
394
|
+
|
|
395
|
+
latest_event = events[-1]
|
|
396
|
+
metadata = latest_event.metadata
|
|
397
|
+
|
|
398
|
+
return TrainingMetrics(
|
|
399
|
+
total_training_time=latest_event.duration,
|
|
400
|
+
training_samples=metadata.get('training_samples', 0),
|
|
401
|
+
method=metadata.get('method', 'unknown'),
|
|
402
|
+
success=metadata.get('success', True),
|
|
403
|
+
error_message=metadata.get('error_message')
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
def format_user_metrics(self) -> str:
|
|
407
|
+
"""Format user-facing performance metrics."""
|
|
408
|
+
lines = ["🚀 Performance Summary:"]
|
|
409
|
+
|
|
410
|
+
# Training metrics
|
|
411
|
+
training_metrics = self.get_training_metrics()
|
|
412
|
+
if training_metrics:
|
|
413
|
+
lines.extend([
|
|
414
|
+
f"\n📚 Training:",
|
|
415
|
+
f" Method: {training_metrics.method}",
|
|
416
|
+
f" Total Time: {training_metrics.training_time_ms:.0f} ms",
|
|
417
|
+
f" Samples: {training_metrics.training_samples}",
|
|
418
|
+
f" Speed: {training_metrics.samples_per_second:.1f} samples/sec"
|
|
419
|
+
])
|
|
420
|
+
|
|
421
|
+
# Generation metrics - check for both response_generation and individual generation events
|
|
422
|
+
generation_metrics = self.get_generation_metrics("response_generation")
|
|
423
|
+
if not generation_metrics:
|
|
424
|
+
# Try to get metrics from steered_generation if response_generation doesn't exist
|
|
425
|
+
generation_metrics = self.get_generation_metrics("steered_generation")
|
|
426
|
+
|
|
427
|
+
if generation_metrics and generation_metrics.token_count > 0:
|
|
428
|
+
lines.extend([
|
|
429
|
+
f"\n🎭 Generation:",
|
|
430
|
+
f" Time to First Token: {generation_metrics.ttft_ms:.0f} ms",
|
|
431
|
+
f" Total Generation: {generation_metrics.total_time_ms:.0f} ms",
|
|
432
|
+
f" Tokens Generated: {generation_metrics.token_count}",
|
|
433
|
+
f" Speed: {generation_metrics.tokens_per_second:.1f} tokens/sec"
|
|
434
|
+
])
|
|
435
|
+
|
|
436
|
+
# Steering overhead comparison
|
|
437
|
+
steered_events = [e for e in self.events if e.name == "steered_generation"]
|
|
438
|
+
unsteered_events = [e for e in self.events if e.name == "unsteered_generation"]
|
|
439
|
+
|
|
440
|
+
if steered_events and unsteered_events:
|
|
441
|
+
steered_avg = sum(e.duration for e in steered_events) / len(steered_events)
|
|
442
|
+
unsteered_avg = sum(e.duration for e in unsteered_events) / len(unsteered_events)
|
|
443
|
+
overhead = ((steered_avg - unsteered_avg) / unsteered_avg) * 100
|
|
444
|
+
|
|
445
|
+
lines.extend([
|
|
446
|
+
f"\n⚡ Steering Overhead:",
|
|
447
|
+
f" Unsteered Avg: {unsteered_avg * 1000:.0f} ms ({len(unsteered_events)} runs)",
|
|
448
|
+
f" Steered Avg: {steered_avg * 1000:.0f} ms ({len(steered_events)} runs)",
|
|
449
|
+
f" Overhead: {overhead:+.1f}%"
|
|
450
|
+
])
|
|
451
|
+
elif steered_events:
|
|
452
|
+
# Show steered performance even without comparison
|
|
453
|
+
steered_avg = sum(e.duration for e in steered_events) / len(steered_events)
|
|
454
|
+
lines.extend([
|
|
455
|
+
f"\n🎯 Steered Generation:",
|
|
456
|
+
f" Average Time: {steered_avg * 1000:.0f} ms ({len(steered_events)} runs)"
|
|
457
|
+
])
|
|
458
|
+
elif unsteered_events:
|
|
459
|
+
# Show unsteered performance even without comparison
|
|
460
|
+
unsteered_avg = sum(e.duration for e in unsteered_events) / len(unsteered_events)
|
|
461
|
+
lines.extend([
|
|
462
|
+
f"\n🔄 Unsteered Generation:",
|
|
463
|
+
f" Average Time: {unsteered_avg * 1000:.0f} ms ({len(unsteered_events)} runs)"
|
|
464
|
+
])
|
|
465
|
+
|
|
466
|
+
# Show warning if no generation metrics found
|
|
467
|
+
if not generation_metrics or generation_metrics.token_count == 0:
|
|
468
|
+
lines.extend([
|
|
469
|
+
f"\n⚠️ No generation metrics available",
|
|
470
|
+
f" (Responses may be empty or timing failed)"
|
|
471
|
+
])
|
|
472
|
+
|
|
473
|
+
return '\n'.join(lines)
|
|
474
|
+
|
|
475
|
+
def format_stats(
|
|
476
|
+
self,
|
|
477
|
+
stats: Union[LatencyStats, Dict[str, LatencyStats]],
|
|
478
|
+
detailed: bool = False
|
|
479
|
+
) -> str:
|
|
480
|
+
"""Format latency statistics as a readable string."""
|
|
481
|
+
if isinstance(stats, LatencyStats):
|
|
482
|
+
return self._format_single_stats(stats, detailed)
|
|
483
|
+
else:
|
|
484
|
+
lines = ["Latency Statistics Summary:"]
|
|
485
|
+
for operation, op_stats in stats.items():
|
|
486
|
+
lines.append(f"\n{operation}:")
|
|
487
|
+
lines.extend([f" {line}" for line in self._format_single_stats(op_stats, detailed).split('\n')])
|
|
488
|
+
return '\n'.join(lines)
|
|
489
|
+
|
|
490
|
+
def _format_single_stats(self, stats: LatencyStats, detailed: bool) -> str:
|
|
491
|
+
"""Format statistics for a single operation."""
|
|
492
|
+
lines = [
|
|
493
|
+
f"Operation: {stats.operation}",
|
|
494
|
+
f"Count: {stats.count}",
|
|
495
|
+
f"Total Time: {stats.total_time_ms:.1f} ms",
|
|
496
|
+
f"Mean Time: {stats.mean_time_ms:.1f} ms",
|
|
497
|
+
f"Median Time: {stats.median_time * 1000:.1f} ms",
|
|
498
|
+
f"Min Time: {stats.min_time * 1000:.1f} ms",
|
|
499
|
+
f"Max Time: {stats.max_time * 1000:.1f} ms",
|
|
500
|
+
]
|
|
501
|
+
|
|
502
|
+
if stats.count > 1:
|
|
503
|
+
lines.extend([
|
|
504
|
+
f"Std Dev: {stats.std_dev * 1000:.1f} ms",
|
|
505
|
+
f"95th Percentile: {stats.percentile_95 * 1000:.1f} ms",
|
|
506
|
+
f"99th Percentile: {stats.percentile_99 * 1000:.1f} ms",
|
|
507
|
+
])
|
|
508
|
+
|
|
509
|
+
if detailed and stats.events:
|
|
510
|
+
lines.append(f"Recent Events:")
|
|
511
|
+
for event in stats.events[-5:]: # Show last 5 events
|
|
512
|
+
lines.append(f" {event.duration_ms:.1f} ms")
|
|
513
|
+
if event.metadata:
|
|
514
|
+
lines.append(f" Metadata: {event.metadata}")
|
|
515
|
+
|
|
516
|
+
return '\n'.join(lines)
|
|
517
|
+
|
|
518
|
+
def export_csv(self, filename: str) -> None:
|
|
519
|
+
"""Export timing events to CSV file."""
|
|
520
|
+
import csv
|
|
521
|
+
|
|
522
|
+
with open(filename, 'w', newline='') as csvfile:
|
|
523
|
+
writer = csv.writer(csvfile)
|
|
524
|
+
writer.writerow([
|
|
525
|
+
'operation', 'start_time', 'end_time', 'duration_ms',
|
|
526
|
+
'parent', 'metadata'
|
|
527
|
+
])
|
|
528
|
+
|
|
529
|
+
for event in self.events:
|
|
530
|
+
writer.writerow([
|
|
531
|
+
event.name,
|
|
532
|
+
event.start_time,
|
|
533
|
+
event.end_time,
|
|
534
|
+
event.duration_ms,
|
|
535
|
+
event.parent or '',
|
|
536
|
+
str(event.metadata) if event.metadata else ''
|
|
537
|
+
])
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
# Global latency tracker instance
|
|
541
|
+
_global_tracker: Optional[LatencyTracker] = None
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
def get_global_tracker() -> LatencyTracker:
|
|
545
|
+
"""Get or create the global latency tracker instance."""
|
|
546
|
+
global _global_tracker
|
|
547
|
+
if _global_tracker is None:
|
|
548
|
+
_global_tracker = LatencyTracker()
|
|
549
|
+
return _global_tracker
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def time_function(operation_name: Optional[str] = None):
|
|
553
|
+
"""
|
|
554
|
+
Decorator to automatically time function execution.
|
|
555
|
+
|
|
556
|
+
Args:
|
|
557
|
+
operation_name: Name for the operation (defaults to function name)
|
|
558
|
+
"""
|
|
559
|
+
def decorator(func: Callable) -> Callable:
|
|
560
|
+
name = operation_name or func.__name__
|
|
561
|
+
|
|
562
|
+
@functools.wraps(func)
|
|
563
|
+
def wrapper(*args, **kwargs):
|
|
564
|
+
tracker = get_global_tracker()
|
|
565
|
+
with tracker.time_operation(name):
|
|
566
|
+
return func(*args, **kwargs)
|
|
567
|
+
return wrapper
|
|
568
|
+
return decorator
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
@contextmanager
|
|
572
|
+
def time_operation(name: str, metadata: Optional[Dict[str, Any]] = None):
|
|
573
|
+
"""Global context manager for timing operations."""
|
|
574
|
+
tracker = get_global_tracker()
|
|
575
|
+
with tracker.time_operation(name, metadata) as event_ref:
|
|
576
|
+
yield event_ref
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def get_timing_summary() -> Dict[str, LatencyStats]:
|
|
580
|
+
"""Get timing summary from global tracker."""
|
|
581
|
+
tracker = get_global_tracker()
|
|
582
|
+
return tracker.get_stats()
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def format_timing_summary(detailed: bool = False) -> str:
|
|
586
|
+
"""Format timing summary as a readable string."""
|
|
587
|
+
tracker = get_global_tracker()
|
|
588
|
+
stats = tracker.get_stats()
|
|
589
|
+
return tracker.format_stats(stats, detailed)
|
|
590
|
+
|
|
591
|
+
|
|
592
|
+
def reset_timing() -> None:
|
|
593
|
+
"""Reset global timing data."""
|
|
594
|
+
tracker = get_global_tracker()
|
|
595
|
+
tracker.reset()
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
# Common operation names for user-facing metrics
|
|
599
|
+
class Operations:
|
|
600
|
+
"""Standard operation names for user-facing performance metrics."""
|
|
601
|
+
# Core user-facing metrics
|
|
602
|
+
TOTAL_TRAINING_TIME = "total_training_time"
|
|
603
|
+
TIME_TO_FIRST_TOKEN = "time_to_first_token"
|
|
604
|
+
RESPONSE_GENERATION = "response_generation"
|
|
605
|
+
UNSTEERED_GENERATION = "unsteered_generation"
|
|
606
|
+
STEERED_GENERATION = "steered_generation"
|
|
607
|
+
|
|
608
|
+
# Batch processing
|
|
609
|
+
BATCH_INFERENCE = "batch_inference"
|
|
610
|
+
PER_RESPONSE = "per_response"
|
|
611
|
+
|
|
612
|
+
# Training phases
|
|
613
|
+
STEERING_VECTOR_TRAINING = "steering_vector_training"
|
|
614
|
+
CLASSIFIER_TRAINING = "classifier_training"
|
|
615
|
+
|
|
616
|
+
# Legacy (for backward compatibility)
|
|
617
|
+
MODEL_LOADING = "model_loading"
|
|
618
|
+
ACTIVATION_EXTRACTION = "activation_extraction"
|