tuft 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tuft/__init__.py +5 -2
- tuft/auth.py +35 -0
- tuft/backend.py +254 -0
- tuft/backends/__init__.py +10 -0
- tuft/backends/base_backend.py +112 -0
- tuft/backends/hf_training_model.py +404 -0
- tuft/backends/sampling_backend.py +253 -0
- tuft/backends/training_backend.py +327 -0
- tuft/checkpoints.py +193 -0
- tuft/cli.py +91 -0
- tuft/config.py +121 -0
- tuft/exceptions.py +138 -0
- tuft/futures.py +431 -0
- tuft/loss_fn/__init__.py +48 -0
- tuft/loss_fn/cispo.py +40 -0
- tuft/loss_fn/cross_entropy.py +26 -0
- tuft/loss_fn/dro.py +37 -0
- tuft/loss_fn/importance_sampling.py +33 -0
- tuft/loss_fn/ppo.py +43 -0
- tuft/persistence/__init__.py +32 -0
- tuft/persistence/file_redis.py +268 -0
- tuft/persistence/redis_store.py +488 -0
- tuft/sampling_controller.py +366 -0
- tuft/server.py +720 -0
- tuft/state.py +352 -0
- tuft/telemetry/__init__.py +17 -0
- tuft/telemetry/metrics.py +335 -0
- tuft/telemetry/provider.py +198 -0
- tuft/telemetry/tracing.py +43 -0
- tuft/training_controller.py +723 -0
- tuft-0.1.1.dist-info/METADATA +633 -0
- tuft-0.1.1.dist-info/RECORD +35 -0
- {tuft-0.1.0.dist-info → tuft-0.1.1.dist-info}/WHEEL +1 -2
- tuft-0.1.1.dist-info/entry_points.txt +2 -0
- {tuft-0.1.0.dist-info → tuft-0.1.1.dist-info}/licenses/LICENSE +2 -2
- tuft-0.1.0.dist-info/METADATA +0 -77
- tuft-0.1.0.dist-info/RECORD +0 -6
- tuft-0.1.0.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""OpenTelemetry Provider initialization.
|
|
2
|
+
|
|
3
|
+
Initializes TracerProvider, MeterProvider, and LoggerProvider with
|
|
4
|
+
OTLP or Console exporters based on configuration.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
from opentelemetry import metrics, trace
|
|
13
|
+
from opentelemetry._logs import get_logger_provider, set_logger_provider
|
|
14
|
+
from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter
|
|
15
|
+
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
|
|
16
|
+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
|
17
|
+
from opentelemetry.instrumentation.logging import LoggingInstrumentor
|
|
18
|
+
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
|
|
19
|
+
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor, ConsoleLogExporter
|
|
20
|
+
from opentelemetry.sdk.metrics import MeterProvider
|
|
21
|
+
from opentelemetry.sdk.metrics.export import (
|
|
22
|
+
ConsoleMetricExporter,
|
|
23
|
+
PeriodicExportingMetricReader,
|
|
24
|
+
)
|
|
25
|
+
from opentelemetry.sdk.resources import Resource
|
|
26
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
27
|
+
from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter
|
|
28
|
+
|
|
29
|
+
from tuft.config import TelemetryConfig
|
|
30
|
+
|
|
31
|
+
from .metrics import ResourceMetricsCollector, clear_meters
|
|
32
|
+
from .tracing import clear_tracers
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
# Global state to track initialization
|
|
38
|
+
_tel_initialized = False
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _is_debug_mode() -> bool:
|
|
42
|
+
"""Check if debug mode is enabled via TUFT_OTEL_DEBUG environment variable."""
|
|
43
|
+
return os.getenv("TUFT_OTEL_DEBUG", "0") == "1"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _get_otlp_endpoint(config: TelemetryConfig) -> str | None:
|
|
47
|
+
"""Get OTLP endpoint from config or environment variable."""
|
|
48
|
+
if config.otlp_endpoint:
|
|
49
|
+
return config.otlp_endpoint
|
|
50
|
+
return os.getenv("TUFT_OTLP_ENDPOINT")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def init_telemetry(config: TelemetryConfig) -> None:
|
|
54
|
+
"""Initialize OpenTelemetry providers.
|
|
55
|
+
|
|
56
|
+
Sets up TracerProvider, MeterProvider, and LoggerProvider with appropriate
|
|
57
|
+
exporters based on configuration and environment variables.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
config: Telemetry configuration from tuft.config.TelemetryConfig.
|
|
61
|
+
"""
|
|
62
|
+
global _tel_initialized
|
|
63
|
+
|
|
64
|
+
if not config.enabled:
|
|
65
|
+
logger.debug("Telemetry is disabled")
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
if _tel_initialized:
|
|
69
|
+
logger.warning("Telemetry already initialized, skipping")
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
# Build resource with service info
|
|
73
|
+
resource_attrs = {
|
|
74
|
+
"service.name": config.service_name,
|
|
75
|
+
}
|
|
76
|
+
resource_attrs.update(config.resource_attributes)
|
|
77
|
+
resource = Resource.create(resource_attrs)
|
|
78
|
+
|
|
79
|
+
# Determine exporter mode
|
|
80
|
+
debug_mode = _is_debug_mode()
|
|
81
|
+
otlp_endpoint = _get_otlp_endpoint(config)
|
|
82
|
+
|
|
83
|
+
if debug_mode:
|
|
84
|
+
logger.info("Initializing telemetry with Console exporters (debug mode)")
|
|
85
|
+
_init_console_exporters(resource)
|
|
86
|
+
elif otlp_endpoint:
|
|
87
|
+
logger.info("Initializing telemetry with OTLP exporters to %s", otlp_endpoint)
|
|
88
|
+
_init_otlp_exporters(resource, otlp_endpoint)
|
|
89
|
+
else:
|
|
90
|
+
logger.info("Initializing telemetry with OTLP exporters (using default endpoint)")
|
|
91
|
+
_init_otlp_exporters(resource, None)
|
|
92
|
+
|
|
93
|
+
# Configure logging integration
|
|
94
|
+
_configure_logging_integration()
|
|
95
|
+
|
|
96
|
+
_tel_initialized = True
|
|
97
|
+
logger.info("Telemetry initialized successfully")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _init_console_exporters(resource: Resource) -> None:
|
|
101
|
+
"""Initialize Console exporters for debugging."""
|
|
102
|
+
# Trace
|
|
103
|
+
tracer_provider = TracerProvider(resource=resource)
|
|
104
|
+
tracer_provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter()))
|
|
105
|
+
trace.set_tracer_provider(tracer_provider)
|
|
106
|
+
|
|
107
|
+
# Metrics
|
|
108
|
+
reader = PeriodicExportingMetricReader(ConsoleMetricExporter())
|
|
109
|
+
meter_provider = MeterProvider(resource=resource, metric_readers=[reader])
|
|
110
|
+
metrics.set_meter_provider(meter_provider)
|
|
111
|
+
|
|
112
|
+
# Logs
|
|
113
|
+
logger_provider = LoggerProvider(resource=resource)
|
|
114
|
+
logger_provider.add_log_record_processor(BatchLogRecordProcessor(ConsoleLogExporter()))
|
|
115
|
+
set_logger_provider(logger_provider)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _init_otlp_exporters(resource: Resource, endpoint: str | None) -> None:
|
|
119
|
+
"""Initialize OTLP exporters."""
|
|
120
|
+
# Build exporter kwargs
|
|
121
|
+
exporter_kwargs = {}
|
|
122
|
+
if endpoint:
|
|
123
|
+
exporter_kwargs["endpoint"] = endpoint
|
|
124
|
+
|
|
125
|
+
# Trace
|
|
126
|
+
tracer_provider = TracerProvider(resource=resource)
|
|
127
|
+
span_exporter = OTLPSpanExporter(**exporter_kwargs)
|
|
128
|
+
tracer_provider.add_span_processor(BatchSpanProcessor(span_exporter))
|
|
129
|
+
trace.set_tracer_provider(tracer_provider)
|
|
130
|
+
|
|
131
|
+
# Metrics
|
|
132
|
+
metric_exporter = OTLPMetricExporter(**exporter_kwargs)
|
|
133
|
+
reader = PeriodicExportingMetricReader(metric_exporter)
|
|
134
|
+
meter_provider = MeterProvider(resource=resource, metric_readers=[reader])
|
|
135
|
+
metrics.set_meter_provider(meter_provider)
|
|
136
|
+
|
|
137
|
+
# Logs
|
|
138
|
+
log_exporter = OTLPLogExporter(**exporter_kwargs)
|
|
139
|
+
logger_provider = LoggerProvider(resource=resource)
|
|
140
|
+
logger_provider.add_log_record_processor(BatchLogRecordProcessor(log_exporter))
|
|
141
|
+
set_logger_provider(logger_provider)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _configure_logging_integration() -> None:
|
|
145
|
+
"""Configure Python logging to integrate with OpenTelemetry."""
|
|
146
|
+
LoggingInstrumentor().instrument(set_logging_format=True)
|
|
147
|
+
|
|
148
|
+
# Bridge Python logging to OTel LoggerProvider
|
|
149
|
+
# Get the configured logger provider
|
|
150
|
+
otel_logger_provider = get_logger_provider()
|
|
151
|
+
|
|
152
|
+
# Create a handler that sends logs to OTel
|
|
153
|
+
otel_handler = LoggingHandler(
|
|
154
|
+
level=logging.DEBUG,
|
|
155
|
+
logger_provider=otel_logger_provider,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# Add the handler to the root logger (don't remove existing handlers)
|
|
159
|
+
root_logger = logging.getLogger()
|
|
160
|
+
root_logger.addHandler(otel_handler)
|
|
161
|
+
|
|
162
|
+
logger.debug("Python logging bridged to OTel LoggerProvider")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def shutdown_telemetry() -> None:
|
|
166
|
+
"""Shutdown OpenTelemetry providers gracefully."""
|
|
167
|
+
global _tel_initialized
|
|
168
|
+
|
|
169
|
+
if not _tel_initialized:
|
|
170
|
+
return
|
|
171
|
+
|
|
172
|
+
# Shutdown trace provider
|
|
173
|
+
tracer_provider = trace.get_tracer_provider()
|
|
174
|
+
shutdown_fn = getattr(tracer_provider, "shutdown", None)
|
|
175
|
+
if shutdown_fn is not None:
|
|
176
|
+
shutdown_fn()
|
|
177
|
+
|
|
178
|
+
# Shutdown meter provider
|
|
179
|
+
meter_provider = metrics.get_meter_provider()
|
|
180
|
+
shutdown_fn = getattr(meter_provider, "shutdown", None)
|
|
181
|
+
if shutdown_fn is not None:
|
|
182
|
+
shutdown_fn()
|
|
183
|
+
|
|
184
|
+
# Shutdown logger provider
|
|
185
|
+
logger_provider = get_logger_provider()
|
|
186
|
+
shutdown_fn = getattr(logger_provider, "shutdown", None)
|
|
187
|
+
if shutdown_fn is not None:
|
|
188
|
+
shutdown_fn()
|
|
189
|
+
|
|
190
|
+
# Clear cached tracers and meters
|
|
191
|
+
clear_tracers()
|
|
192
|
+
clear_meters()
|
|
193
|
+
|
|
194
|
+
# Shutdown resource metrics collector
|
|
195
|
+
ResourceMetricsCollector.shutdown()
|
|
196
|
+
|
|
197
|
+
_tel_initialized = False
|
|
198
|
+
logger.info("Telemetry shutdown complete")
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Tracing utilities for TuFT.
|
|
2
|
+
|
|
3
|
+
Provides tracer access and context propagation utilities.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from opentelemetry import trace
|
|
11
|
+
from opentelemetry.propagate import extract, inject
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Module-level tracer cache
|
|
15
|
+
_tracers: dict[str, Any] = {}
|
|
16
|
+
|
|
17
|
+
# Re-export for convenience
|
|
18
|
+
inject_context = inject
|
|
19
|
+
extract_context = extract
|
|
20
|
+
get_current_span = trace.get_current_span
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_tracer(name: str = "tuft"):
|
|
24
|
+
"""Get a tracer instance by name.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
name: Name for the tracer (typically module name).
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
A Tracer instance. When no TracerProvider is configured,
|
|
31
|
+
OpenTelemetry automatically returns a NoOpTracer.
|
|
32
|
+
"""
|
|
33
|
+
if name in _tracers:
|
|
34
|
+
return _tracers[name]
|
|
35
|
+
|
|
36
|
+
tracer = trace.get_tracer(name)
|
|
37
|
+
_tracers[name] = tracer
|
|
38
|
+
return tracer
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def clear_tracers() -> None:
|
|
42
|
+
"""Clear the tracer cache. Used during shutdown."""
|
|
43
|
+
_tracers.clear()
|