truthound-dashboard 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. truthound_dashboard/__init__.py +11 -0
  2. truthound_dashboard/__main__.py +6 -0
  3. truthound_dashboard/api/__init__.py +15 -0
  4. truthound_dashboard/api/deps.py +153 -0
  5. truthound_dashboard/api/drift.py +179 -0
  6. truthound_dashboard/api/error_handlers.py +287 -0
  7. truthound_dashboard/api/health.py +78 -0
  8. truthound_dashboard/api/history.py +62 -0
  9. truthound_dashboard/api/middleware.py +626 -0
  10. truthound_dashboard/api/notifications.py +561 -0
  11. truthound_dashboard/api/profile.py +52 -0
  12. truthound_dashboard/api/router.py +83 -0
  13. truthound_dashboard/api/rules.py +277 -0
  14. truthound_dashboard/api/schedules.py +329 -0
  15. truthound_dashboard/api/schemas.py +136 -0
  16. truthound_dashboard/api/sources.py +229 -0
  17. truthound_dashboard/api/validations.py +125 -0
  18. truthound_dashboard/cli.py +226 -0
  19. truthound_dashboard/config.py +132 -0
  20. truthound_dashboard/core/__init__.py +264 -0
  21. truthound_dashboard/core/base.py +185 -0
  22. truthound_dashboard/core/cache.py +479 -0
  23. truthound_dashboard/core/connections.py +331 -0
  24. truthound_dashboard/core/encryption.py +409 -0
  25. truthound_dashboard/core/exceptions.py +627 -0
  26. truthound_dashboard/core/logging.py +488 -0
  27. truthound_dashboard/core/maintenance.py +542 -0
  28. truthound_dashboard/core/notifications/__init__.py +56 -0
  29. truthound_dashboard/core/notifications/base.py +390 -0
  30. truthound_dashboard/core/notifications/channels.py +557 -0
  31. truthound_dashboard/core/notifications/dispatcher.py +453 -0
  32. truthound_dashboard/core/notifications/events.py +155 -0
  33. truthound_dashboard/core/notifications/service.py +744 -0
  34. truthound_dashboard/core/sampling.py +626 -0
  35. truthound_dashboard/core/scheduler.py +311 -0
  36. truthound_dashboard/core/services.py +1531 -0
  37. truthound_dashboard/core/truthound_adapter.py +659 -0
  38. truthound_dashboard/db/__init__.py +67 -0
  39. truthound_dashboard/db/base.py +108 -0
  40. truthound_dashboard/db/database.py +196 -0
  41. truthound_dashboard/db/models.py +732 -0
  42. truthound_dashboard/db/repository.py +237 -0
  43. truthound_dashboard/main.py +309 -0
  44. truthound_dashboard/schemas/__init__.py +150 -0
  45. truthound_dashboard/schemas/base.py +96 -0
  46. truthound_dashboard/schemas/drift.py +118 -0
  47. truthound_dashboard/schemas/history.py +74 -0
  48. truthound_dashboard/schemas/profile.py +91 -0
  49. truthound_dashboard/schemas/rule.py +199 -0
  50. truthound_dashboard/schemas/schedule.py +88 -0
  51. truthound_dashboard/schemas/schema.py +121 -0
  52. truthound_dashboard/schemas/source.py +138 -0
  53. truthound_dashboard/schemas/validation.py +192 -0
  54. truthound_dashboard/static/assets/index-BqJMyAHX.js +110 -0
  55. truthound_dashboard/static/assets/index-DMDxHCTs.js +465 -0
  56. truthound_dashboard/static/assets/index-Dm2D11TK.css +1 -0
  57. truthound_dashboard/static/index.html +15 -0
  58. truthound_dashboard/static/mockServiceWorker.js +349 -0
  59. truthound_dashboard-1.0.0.dist-info/METADATA +218 -0
  60. truthound_dashboard-1.0.0.dist-info/RECORD +62 -0
  61. truthound_dashboard-1.0.0.dist-info/WHEEL +4 -0
  62. truthound_dashboard-1.0.0.dist-info/entry_points.txt +5 -0
@@ -0,0 +1,626 @@
1
+ """Data sampling strategies for large dataset handling.
2
+
3
+ This module provides an extensible sampling system for handling large datasets
4
+ before validation. The Strategy pattern allows adding new sampling methods
5
+ without modifying existing code.
6
+
7
+ Supported formats:
8
+ - CSV files
9
+ - Parquet files
10
+ - JSON/JSONL files
11
+
12
+ Features:
13
+ - Automatic format detection
14
+ - Configurable size thresholds
15
+ - Multiple sampling strategies (random, head, stratified)
16
+ - Memory-efficient streaming for very large files
17
+
18
+ Example:
19
+ sampler = get_sampler()
20
+
21
+ # Check if sampling is needed
22
+ if sampler.needs_sampling("/path/to/large.csv"):
23
+ sampled_path = await sampler.sample("/path/to/large.csv", n=10000)
24
+ # Use sampled_path for validation
25
+
26
+ # Or use auto-sample which handles the logic
27
+ data_path = await sampler.auto_sample("/path/to/data.csv")
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import asyncio
33
+ import hashlib
34
+ import logging
35
+ from abc import ABC, abstractmethod
36
+ from dataclasses import dataclass
37
+ from enum import Enum
38
+ from pathlib import Path
39
+ from typing import Any
40
+
41
+ from truthound_dashboard.config import get_settings
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ class SamplingMethod(str, Enum):
47
+ """Available sampling methods."""
48
+
49
+ RANDOM = "random" # Random sampling across entire dataset
50
+ HEAD = "head" # Take first N rows (fastest)
51
+ TAIL = "tail" # Take last N rows
52
+ STRATIFIED = "stratified" # Stratified sampling by column
53
+ RESERVOIR = "reservoir" # Reservoir sampling for streaming
54
+
55
+
56
+ @dataclass
57
+ class SamplingConfig:
58
+ """Configuration for data sampling.
59
+
60
+ Attributes:
61
+ size_threshold_mb: File size threshold in MB to trigger sampling.
62
+ row_threshold: Row count threshold to trigger sampling.
63
+ default_sample_size: Default number of rows to sample.
64
+ method: Default sampling method.
65
+ seed: Random seed for reproducibility.
66
+ temp_dir: Directory for temporary sampled files.
67
+ cleanup_after_hours: Hours to keep temp files before cleanup.
68
+ """
69
+
70
+ size_threshold_mb: float = 100.0
71
+ row_threshold: int = 1_000_000
72
+ default_sample_size: int = 10_000
73
+ method: SamplingMethod = SamplingMethod.RANDOM
74
+ seed: int = 42
75
+ temp_dir: Path | None = None
76
+ cleanup_after_hours: int = 24
77
+
78
+
79
+ @dataclass
80
+ class SamplingResult:
81
+ """Result of a sampling operation.
82
+
83
+ Attributes:
84
+ original_path: Path to original file.
85
+ sampled_path: Path to sampled file (same as original if no sampling).
86
+ was_sampled: Whether sampling was performed.
87
+ original_rows: Number of rows in original file.
88
+ sampled_rows: Number of rows in sampled file.
89
+ method: Sampling method used.
90
+ size_reduction_pct: Percentage reduction in file size.
91
+ """
92
+
93
+ original_path: str
94
+ sampled_path: str
95
+ was_sampled: bool
96
+ original_rows: int | None = None
97
+ sampled_rows: int | None = None
98
+ method: SamplingMethod | None = None
99
+ size_reduction_pct: float = 0.0
100
+
101
+ def to_dict(self) -> dict[str, Any]:
102
+ """Convert to dictionary."""
103
+ return {
104
+ "original_path": self.original_path,
105
+ "sampled_path": self.sampled_path,
106
+ "was_sampled": self.was_sampled,
107
+ "original_rows": self.original_rows,
108
+ "sampled_rows": self.sampled_rows,
109
+ "method": self.method.value if self.method else None,
110
+ "size_reduction_pct": round(self.size_reduction_pct, 2),
111
+ }
112
+
113
+
114
+ class SamplingStrategy(ABC):
115
+ """Abstract base class for sampling strategies.
116
+
117
+ Subclass this to implement custom sampling methods.
118
+ """
119
+
120
+ @property
121
+ @abstractmethod
122
+ def method(self) -> SamplingMethod:
123
+ """Get the sampling method identifier."""
124
+ ...
125
+
126
+ @abstractmethod
127
+ def sample(
128
+ self,
129
+ df: Any,
130
+ n: int,
131
+ seed: int = 42,
132
+ **kwargs: Any,
133
+ ) -> Any:
134
+ """Sample data from a DataFrame.
135
+
136
+ Args:
137
+ df: DataFrame to sample from.
138
+ n: Number of rows to sample.
139
+ seed: Random seed for reproducibility.
140
+ **kwargs: Additional strategy-specific arguments.
141
+
142
+ Returns:
143
+ Sampled DataFrame.
144
+ """
145
+ ...
146
+
147
+
148
+ class RandomSamplingStrategy(SamplingStrategy):
149
+ """Random sampling strategy using reservoir sampling for efficiency."""
150
+
151
+ @property
152
+ def method(self) -> SamplingMethod:
153
+ return SamplingMethod.RANDOM
154
+
155
+ def sample(
156
+ self,
157
+ df: Any,
158
+ n: int,
159
+ seed: int = 42,
160
+ **kwargs: Any,
161
+ ) -> Any:
162
+ """Perform random sampling."""
163
+
164
+ if len(df) <= n:
165
+ return df
166
+
167
+ return df.sample(n=n, seed=seed)
168
+
169
+
170
+ class HeadSamplingStrategy(SamplingStrategy):
171
+ """Head sampling strategy - take first N rows."""
172
+
173
+ @property
174
+ def method(self) -> SamplingMethod:
175
+ return SamplingMethod.HEAD
176
+
177
+ def sample(
178
+ self,
179
+ df: Any,
180
+ n: int,
181
+ seed: int = 42,
182
+ **kwargs: Any,
183
+ ) -> Any:
184
+ """Take first N rows."""
185
+ return df.head(n)
186
+
187
+
188
+ class TailSamplingStrategy(SamplingStrategy):
189
+ """Tail sampling strategy - take last N rows."""
190
+
191
+ @property
192
+ def method(self) -> SamplingMethod:
193
+ return SamplingMethod.TAIL
194
+
195
+ def sample(
196
+ self,
197
+ df: Any,
198
+ n: int,
199
+ seed: int = 42,
200
+ **kwargs: Any,
201
+ ) -> Any:
202
+ """Take last N rows."""
203
+ return df.tail(n)
204
+
205
+
206
+ class StratifiedSamplingStrategy(SamplingStrategy):
207
+ """Stratified sampling strategy by a categorical column."""
208
+
209
+ @property
210
+ def method(self) -> SamplingMethod:
211
+ return SamplingMethod.STRATIFIED
212
+
213
+ def sample(
214
+ self,
215
+ df: Any,
216
+ n: int,
217
+ seed: int = 42,
218
+ stratify_column: str | None = None,
219
+ **kwargs: Any,
220
+ ) -> Any:
221
+ """Perform stratified sampling.
222
+
223
+ Args:
224
+ df: DataFrame to sample from.
225
+ n: Total number of rows to sample.
226
+ seed: Random seed.
227
+ stratify_column: Column to stratify by. If None, falls back to random.
228
+ **kwargs: Additional arguments.
229
+
230
+ Returns:
231
+ Sampled DataFrame with proportional representation.
232
+ """
233
+
234
+ if len(df) <= n:
235
+ return df
236
+
237
+ if stratify_column is None or stratify_column not in df.columns:
238
+ # Fall back to random sampling
239
+ return df.sample(n=n, seed=seed)
240
+
241
+ # Calculate proportion for each group
242
+ total_rows = len(df)
243
+ fraction = n / total_rows
244
+
245
+ # Sample proportionally from each group
246
+ sampled = df.group_by(stratify_column).map_groups(
247
+ lambda group: group.sample(
248
+ fraction=min(1.0, fraction * 1.1), # Slight oversample
249
+ seed=seed,
250
+ )
251
+ )
252
+
253
+ # Trim to exact size if oversampled
254
+ if len(sampled) > n:
255
+ sampled = sampled.sample(n=n, seed=seed)
256
+
257
+ return sampled
258
+
259
+
260
+ class DataSampler:
261
+ """Main sampler class that coordinates sampling operations.
262
+
263
+ Provides a high-level interface for sampling large datasets
264
+ with automatic format detection and strategy selection.
265
+
266
+ Usage:
267
+ sampler = DataSampler()
268
+ result = await sampler.auto_sample("/path/to/large.csv")
269
+ # Use result.sampled_path for validation
270
+ """
271
+
272
+ def __init__(self, config: SamplingConfig | None = None) -> None:
273
+ """Initialize data sampler.
274
+
275
+ Args:
276
+ config: Sampling configuration. Uses defaults if not provided.
277
+ """
278
+ self._config = config or SamplingConfig()
279
+ self._strategies: dict[SamplingMethod, SamplingStrategy] = {}
280
+ self._register_default_strategies()
281
+
282
+ # Set up temp directory
283
+ if self._config.temp_dir is None:
284
+ settings = get_settings()
285
+ self._config.temp_dir = settings.cache_dir / "samples"
286
+ self._config.temp_dir.mkdir(parents=True, exist_ok=True)
287
+
288
+ def _register_default_strategies(self) -> None:
289
+ """Register all default sampling strategies."""
290
+ self._strategies = {
291
+ SamplingMethod.RANDOM: RandomSamplingStrategy(),
292
+ SamplingMethod.HEAD: HeadSamplingStrategy(),
293
+ SamplingMethod.TAIL: TailSamplingStrategy(),
294
+ SamplingMethod.STRATIFIED: StratifiedSamplingStrategy(),
295
+ }
296
+
297
+ def register_strategy(self, strategy: SamplingStrategy) -> None:
298
+ """Register a custom sampling strategy.
299
+
300
+ Args:
301
+ strategy: Sampling strategy to register.
302
+ """
303
+ self._strategies[strategy.method] = strategy
304
+
305
+ @property
306
+ def config(self) -> SamplingConfig:
307
+ """Get sampling configuration."""
308
+ return self._config
309
+
310
+ def get_file_info(self, path: str | Path) -> dict[str, Any]:
311
+ """Get file information for sampling decision.
312
+
313
+ Args:
314
+ path: Path to data file.
315
+
316
+ Returns:
317
+ Dictionary with file size, format, and estimated rows.
318
+ """
319
+ path = Path(path)
320
+
321
+ if not path.exists():
322
+ raise FileNotFoundError(f"File not found: {path}")
323
+
324
+ size_bytes = path.stat().st_size
325
+ size_mb = size_bytes / (1024 * 1024)
326
+
327
+ # Detect format
328
+ suffix = path.suffix.lower()
329
+ format_map = {
330
+ ".csv": "csv",
331
+ ".parquet": "parquet",
332
+ ".pq": "parquet",
333
+ ".json": "json",
334
+ ".jsonl": "jsonl",
335
+ ".ndjson": "jsonl",
336
+ }
337
+ file_format = format_map.get(suffix, "unknown")
338
+
339
+ # Estimate row count for CSV (rough estimate based on average line size)
340
+ estimated_rows = None
341
+ if file_format == "csv" and size_mb > 0:
342
+ # Sample first 10KB to estimate average line length
343
+ with open(path, encoding="utf-8", errors="ignore") as f:
344
+ sample = f.read(10240)
345
+ lines = sample.count("\n")
346
+ if lines > 0:
347
+ avg_line_size = len(sample) / lines
348
+ estimated_rows = int(size_bytes / avg_line_size)
349
+
350
+ return {
351
+ "path": str(path),
352
+ "size_bytes": size_bytes,
353
+ "size_mb": round(size_mb, 2),
354
+ "format": file_format,
355
+ "estimated_rows": estimated_rows,
356
+ }
357
+
358
+ def needs_sampling(self, path: str | Path) -> bool:
359
+ """Check if a file needs sampling based on size.
360
+
361
+ Args:
362
+ path: Path to data file.
363
+
364
+ Returns:
365
+ True if file exceeds size threshold.
366
+ """
367
+ info = self.get_file_info(path)
368
+ return info["size_mb"] > self._config.size_threshold_mb
369
+
370
+ def _load_dataframe(self, path: str | Path) -> Any:
371
+ """Load data file into polars DataFrame.
372
+
373
+ Args:
374
+ path: Path to data file.
375
+
376
+ Returns:
377
+ Polars DataFrame.
378
+ """
379
+ import polars as pl
380
+
381
+ path = Path(path)
382
+ suffix = path.suffix.lower()
383
+
384
+ if suffix == ".csv":
385
+ return pl.read_csv(path, infer_schema_length=10000)
386
+ elif suffix in (".parquet", ".pq"):
387
+ return pl.read_parquet(path)
388
+ elif suffix == ".json":
389
+ return pl.read_json(path)
390
+ elif suffix in (".jsonl", ".ndjson"):
391
+ return pl.read_ndjson(path)
392
+ else:
393
+ # Try CSV as fallback
394
+ logger.warning(f"Unknown format {suffix}, trying CSV")
395
+ return pl.read_csv(path, infer_schema_length=10000)
396
+
397
+ def _save_dataframe(self, df: Any, path: Path, original_format: str) -> None:
398
+ """Save DataFrame to file in specified format.
399
+
400
+ Args:
401
+ df: Polars DataFrame to save.
402
+ path: Output path.
403
+ original_format: Original file format.
404
+ """
405
+ if original_format in ("parquet", "pq"):
406
+ df.write_parquet(path)
407
+ elif original_format in ("json",):
408
+ df.write_json(path)
409
+ elif original_format in ("jsonl", "ndjson"):
410
+ df.write_ndjson(path)
411
+ else:
412
+ # Default to CSV
413
+ df.write_csv(path)
414
+
415
+ def _generate_sample_path(self, original_path: Path) -> Path:
416
+ """Generate a unique path for the sampled file.
417
+
418
+ Args:
419
+ original_path: Path to original file.
420
+
421
+ Returns:
422
+ Path for sampled file in temp directory.
423
+ """
424
+ # Create hash of original path for uniqueness
425
+ path_hash = hashlib.md5(str(original_path).encode()).hexdigest()[:12]
426
+ suffix = original_path.suffix
427
+
428
+ # Use parquet for efficiency if original was CSV/JSON
429
+ if suffix in (".csv", ".json", ".jsonl", ".ndjson"):
430
+ suffix = ".parquet"
431
+
432
+ return self._config.temp_dir / f"sample_{path_hash}{suffix}"
433
+
434
+ async def sample(
435
+ self,
436
+ path: str | Path,
437
+ n: int | None = None,
438
+ method: SamplingMethod | None = None,
439
+ **kwargs: Any,
440
+ ) -> SamplingResult:
441
+ """Sample data from a file.
442
+
443
+ Args:
444
+ path: Path to data file.
445
+ n: Number of rows to sample. Uses config default if not provided.
446
+ method: Sampling method. Uses config default if not provided.
447
+ **kwargs: Additional arguments for specific strategies.
448
+
449
+ Returns:
450
+ SamplingResult with paths and statistics.
451
+ """
452
+ path = Path(path)
453
+ n = n or self._config.default_sample_size
454
+ method = method or self._config.method
455
+
456
+ # Get strategy
457
+ strategy = self._strategies.get(method)
458
+ if strategy is None:
459
+ raise ValueError(f"Unknown sampling method: {method}")
460
+
461
+ # Run sampling in executor to avoid blocking
462
+ loop = asyncio.get_event_loop()
463
+ result = await loop.run_in_executor(
464
+ None,
465
+ self._sample_sync,
466
+ path,
467
+ n,
468
+ strategy,
469
+ kwargs,
470
+ )
471
+
472
+ return result
473
+
474
+ def _sample_sync(
475
+ self,
476
+ path: Path,
477
+ n: int,
478
+ strategy: SamplingStrategy,
479
+ kwargs: dict[str, Any],
480
+ ) -> SamplingResult:
481
+ """Synchronous sampling implementation.
482
+
483
+ Args:
484
+ path: Path to data file.
485
+ n: Number of rows to sample.
486
+ strategy: Sampling strategy to use.
487
+ kwargs: Additional strategy arguments.
488
+
489
+ Returns:
490
+ SamplingResult with sampling details.
491
+ """
492
+ file_info = self.get_file_info(path)
493
+
494
+ # Load data
495
+ logger.info(f"Loading {path} for sampling ({file_info['size_mb']:.1f} MB)")
496
+ df = self._load_dataframe(path)
497
+ original_rows = len(df)
498
+
499
+ # Check if sampling is actually needed
500
+ if original_rows <= n:
501
+ logger.info(f"File has {original_rows} rows, no sampling needed")
502
+ return SamplingResult(
503
+ original_path=str(path),
504
+ sampled_path=str(path),
505
+ was_sampled=False,
506
+ original_rows=original_rows,
507
+ sampled_rows=original_rows,
508
+ )
509
+
510
+ # Perform sampling
511
+ logger.info(
512
+ f"Sampling {n} rows from {original_rows} using {strategy.method.value}"
513
+ )
514
+ sampled_df = strategy.sample(
515
+ df,
516
+ n=n,
517
+ seed=self._config.seed,
518
+ **kwargs,
519
+ )
520
+ sampled_rows = len(sampled_df)
521
+
522
+ # Save sampled data
523
+ sample_path = self._generate_sample_path(path)
524
+ self._save_dataframe(sampled_df, sample_path, file_info["format"])
525
+
526
+ # Calculate size reduction
527
+ sampled_size = sample_path.stat().st_size
528
+ size_reduction = (1 - sampled_size / file_info["size_bytes"]) * 100
529
+
530
+ logger.info(
531
+ f"Sampling complete: {original_rows} -> {sampled_rows} rows "
532
+ f"({size_reduction:.1f}% size reduction)"
533
+ )
534
+
535
+ return SamplingResult(
536
+ original_path=str(path),
537
+ sampled_path=str(sample_path),
538
+ was_sampled=True,
539
+ original_rows=original_rows,
540
+ sampled_rows=sampled_rows,
541
+ method=strategy.method,
542
+ size_reduction_pct=size_reduction,
543
+ )
544
+
545
+ async def auto_sample(
546
+ self,
547
+ path: str | Path,
548
+ n: int | None = None,
549
+ method: SamplingMethod | None = None,
550
+ **kwargs: Any,
551
+ ) -> SamplingResult:
552
+ """Automatically sample if needed based on file size.
553
+
554
+ This is the recommended entry point for most use cases.
555
+ It checks file size and only samples if threshold is exceeded.
556
+
557
+ Args:
558
+ path: Path to data file.
559
+ n: Number of rows to sample if needed.
560
+ method: Sampling method if sampling is needed.
561
+ **kwargs: Additional strategy arguments.
562
+
563
+ Returns:
564
+ SamplingResult (was_sampled=False if no sampling needed).
565
+ """
566
+ path = Path(path)
567
+
568
+ if not self.needs_sampling(path):
569
+ # No sampling needed
570
+ return SamplingResult(
571
+ original_path=str(path),
572
+ sampled_path=str(path),
573
+ was_sampled=False,
574
+ )
575
+
576
+ return await self.sample(path, n=n, method=method, **kwargs)
577
+
578
+ async def cleanup_old_samples(self, max_age_hours: int | None = None) -> int:
579
+ """Clean up old sample files.
580
+
581
+ Args:
582
+ max_age_hours: Maximum age in hours. Uses config default if not provided.
583
+
584
+ Returns:
585
+ Number of files cleaned up.
586
+ """
587
+ import time
588
+
589
+ max_age_hours = max_age_hours or self._config.cleanup_after_hours
590
+ max_age_seconds = max_age_hours * 3600
591
+ now = time.time()
592
+
593
+ cleaned = 0
594
+ for sample_file in self._config.temp_dir.glob("sample_*"):
595
+ if sample_file.is_file():
596
+ age = now - sample_file.stat().st_mtime
597
+ if age > max_age_seconds:
598
+ sample_file.unlink()
599
+ cleaned += 1
600
+
601
+ if cleaned > 0:
602
+ logger.info(f"Cleaned up {cleaned} old sample files")
603
+
604
+ return cleaned
605
+
606
+
607
+ # Singleton instance
608
+ _sampler: DataSampler | None = None
609
+
610
+
611
+ def get_sampler() -> DataSampler:
612
+ """Get sampler singleton.
613
+
614
+ Returns:
615
+ DataSampler instance.
616
+ """
617
+ global _sampler
618
+ if _sampler is None:
619
+ _sampler = DataSampler()
620
+ return _sampler
621
+
622
+
623
+ def reset_sampler() -> None:
624
+ """Reset sampler singleton (for testing)."""
625
+ global _sampler
626
+ _sampler = None