themis-eval 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
themis/__init__.py CHANGED
@@ -4,15 +4,37 @@ The primary interface is the `evaluate()` function:
4
4
 
5
5
  import themis
6
6
  report = themis.evaluate("math500", model="gpt-4", limit=100)
7
+
8
+ Extension APIs for registering custom components:
9
+ - themis.register_metric() - Register custom metrics
10
+ - themis.register_dataset() - Register custom datasets
11
+ - themis.register_provider() - Register custom model providers
12
+ - themis.register_benchmark() - Register custom benchmark presets
7
13
  """
8
14
 
9
15
  from themis import config, core, evaluation, experiment, generation, project
10
16
  from themis._version import __version__
11
- from themis.api import evaluate
17
+ from themis.api import evaluate, get_registered_metrics, register_metric
18
+ from themis.datasets import register_dataset, list_datasets, is_dataset_registered
19
+ from themis.presets import register_benchmark, list_benchmarks, get_benchmark_preset
20
+ from themis.providers import register_provider
12
21
 
13
22
  __all__ = [
14
23
  # Main API
15
24
  "evaluate",
25
+ # Metrics
26
+ "register_metric",
27
+ "get_registered_metrics",
28
+ # Datasets
29
+ "register_dataset",
30
+ "list_datasets",
31
+ "is_dataset_registered",
32
+ # Benchmarks
33
+ "register_benchmark",
34
+ "list_benchmarks",
35
+ "get_benchmark_preset",
36
+ # Providers
37
+ "register_provider",
16
38
  # Submodules
17
39
  "config",
18
40
  "core",
themis/_version.py CHANGED
@@ -9,7 +9,7 @@ def _detect_version() -> str:
9
9
  try:
10
10
  return metadata.version("themis-eval")
11
11
  except metadata.PackageNotFoundError: # pragma: no cover - local dev only
12
- return "0.2.0" # Fallback for development
12
+ return "0.2.2" # Fallback for development
13
13
 
14
14
 
15
15
  __version__ = _detect_version()
themis/api.py CHANGED
@@ -33,6 +33,7 @@ Example:
33
33
 
34
34
  from __future__ import annotations
35
35
 
36
+ import logging
36
37
  from datetime import datetime
37
38
  from pathlib import Path
38
39
  from typing import Any, Callable, Sequence
@@ -52,6 +53,67 @@ from themis.generation.runner import GenerationRunner
52
53
  from themis.generation.templates import PromptTemplate
53
54
  from themis.providers import create_provider
54
55
 
56
+ # Import provider modules to ensure they register themselves
57
+ try:
58
+ from themis.generation import clients # noqa: F401 - registers fake provider
59
+ from themis.generation.providers import (
60
+ litellm_provider, # noqa: F401
61
+ vllm_provider, # noqa: F401
62
+ )
63
+ except ImportError:
64
+ pass
65
+
66
+ logger = logging.getLogger(__name__)
67
+
68
+
69
+ # Module-level metrics registry for custom metrics
70
+ _METRICS_REGISTRY: dict[str, type] = {}
71
+
72
+
73
+ def register_metric(name: str, metric_cls: type) -> None:
74
+ """Register a custom metric for use in evaluate().
75
+
76
+ This allows users to add their own metrics to Themis without modifying
77
+ the source code. Registered metrics can be used by passing their names
78
+ to the `metrics` parameter in evaluate().
79
+
80
+ Args:
81
+ name: Metric name (used in evaluate(metrics=[name]))
82
+ metric_cls: Metric class implementing the Metric interface.
83
+ Must have a compute() method that takes prediction, references,
84
+ and metadata parameters.
85
+
86
+ Raises:
87
+ TypeError: If metric_cls is not a class
88
+ ValueError: If metric_cls doesn't implement the required interface
89
+
90
+ Example:
91
+ >>> from themis.evaluation.metrics import MyCustomMetric
92
+ >>> themis.register_metric("my_metric", MyCustomMetric)
93
+ >>> report = themis.evaluate("math500", model="gpt-4", metrics=["my_metric"])
94
+ """
95
+ if not isinstance(metric_cls, type):
96
+ raise TypeError(f"metric_cls must be a class, got {type(metric_cls)}")
97
+
98
+ # Validate that it implements the Metric interface
99
+ if not hasattr(metric_cls, "compute"):
100
+ raise ValueError(
101
+ f"{metric_cls.__name__} must implement compute() method. "
102
+ f"See themis.evaluation.metrics for examples."
103
+ )
104
+
105
+ _METRICS_REGISTRY[name] = metric_cls
106
+ logger.info(f"Registered custom metric: {name} -> {metric_cls.__name__}")
107
+
108
+
109
+ def get_registered_metrics() -> dict[str, type]:
110
+ """Get all currently registered custom metrics.
111
+
112
+ Returns:
113
+ Dictionary mapping metric names to their classes
114
+ """
115
+ return _METRICS_REGISTRY.copy()
116
+
55
117
 
56
118
  def evaluate(
57
119
  benchmark_or_dataset: str | Sequence[dict[str, Any]],
@@ -123,6 +185,19 @@ def evaluate(
123
185
  >>> print(f"Accuracy: {report.evaluation_report.metrics['accuracy']:.2%}")
124
186
  Accuracy: 85.00%
125
187
  """
188
+ logger.info("=" * 60)
189
+ logger.info("Starting Themis evaluation")
190
+ logger.info(f"Model: {model}")
191
+ logger.info(f"Workers: {workers}")
192
+ logger.info(f"Temperature: {temperature}, Max tokens: {max_tokens}")
193
+ if "api_base" in kwargs:
194
+ logger.info(f"Custom API base: {kwargs['api_base']}")
195
+ if "api_key" in kwargs:
196
+ logger.info("API key: <provided>")
197
+ else:
198
+ logger.warning("⚠️ No api_key provided - may fail for custom API endpoints")
199
+ logger.info("=" * 60)
200
+
126
201
  # Import presets system (lazy import to avoid circular dependencies)
127
202
  from themis.presets import get_benchmark_preset, parse_model_name
128
203
 
@@ -131,11 +206,23 @@ def evaluate(
131
206
 
132
207
  if is_benchmark:
133
208
  benchmark_name = benchmark_or_dataset
209
+ logger.info(f"Loading benchmark: {benchmark_name}")
210
+
134
211
  # Get preset configuration
135
- preset = get_benchmark_preset(benchmark_name)
212
+ try:
213
+ preset = get_benchmark_preset(benchmark_name)
214
+ except Exception as e:
215
+ logger.error(f"❌ Failed to get benchmark preset '{benchmark_name}': {e}")
216
+ raise
136
217
 
137
218
  # Load dataset using preset loader
138
- dataset = preset.load_dataset(limit=limit)
219
+ logger.info(f"Loading dataset (limit={limit})...")
220
+ try:
221
+ dataset = preset.load_dataset(limit=limit)
222
+ logger.info(f"✅ Loaded {len(dataset)} samples from {benchmark_name}")
223
+ except Exception as e:
224
+ logger.error(f"❌ Failed to load dataset: {e}")
225
+ raise
139
226
 
140
227
  # Use preset prompt if not overridden
141
228
  if prompt is None:
@@ -158,11 +245,14 @@ def evaluate(
158
245
  dataset_id_field = preset.dataset_id_field
159
246
  else:
160
247
  # Custom dataset
248
+ logger.info("Using custom dataset")
161
249
  dataset = list(benchmark_or_dataset)
250
+ logger.info(f"Custom dataset has {len(dataset)} samples")
162
251
 
163
252
  # Limit dataset if requested
164
253
  if limit is not None:
165
254
  dataset = dataset[:limit]
255
+ logger.info(f"Limited to {len(dataset)} samples")
166
256
 
167
257
  # Use provided prompt or default
168
258
  if prompt is None:
@@ -188,7 +278,15 @@ def evaluate(
188
278
  dataset_id_field = "id"
189
279
 
190
280
  # Parse model name to get provider and options
191
- provider_name, model_id, provider_options = parse_model_name(model, **kwargs)
281
+ logger.info(f"Parsing model configuration...")
282
+ try:
283
+ provider_name, model_id, provider_options = parse_model_name(model, **kwargs)
284
+ logger.info(f"Provider: {provider_name}")
285
+ logger.info(f"Model ID: {model_id}")
286
+ logger.debug(f"Provider options: {provider_options}")
287
+ except Exception as e:
288
+ logger.error(f"❌ Failed to parse model name '{model}': {e}")
289
+ raise
192
290
 
193
291
  # Create model spec
194
292
  model_spec = ModelSpec(
@@ -214,17 +312,31 @@ def evaluate(
214
312
  )
215
313
 
216
314
  # Create provider and router
217
- provider = create_provider(provider_name, **provider_options)
315
+ logger.info(f"Creating provider '{provider_name}'...")
316
+ try:
317
+ provider = create_provider(provider_name, **provider_options)
318
+ logger.info(f"✅ Provider created successfully")
319
+ except KeyError as e:
320
+ logger.error(f"❌ Provider '{provider_name}' not registered. Available providers: fake, litellm, openai, anthropic, azure, bedrock, gemini, cohere, vllm")
321
+ logger.error(f" This usually means the provider module wasn't imported.")
322
+ raise
323
+ except Exception as e:
324
+ logger.error(f"❌ Failed to create provider: {e}")
325
+ raise
326
+
218
327
  router = ProviderRouter({model_id: provider})
328
+ logger.debug(f"Router configured for model: {model_id}")
219
329
 
220
330
  # Create runner
221
- runner = GenerationRunner(provider=router)
331
+ runner = GenerationRunner(provider=router, max_parallel=workers)
332
+ logger.info(f"Runner configured with {workers} parallel workers")
222
333
 
223
334
  # Create evaluation pipeline
224
335
  pipeline = EvaluationPipeline(
225
336
  extractor=extractor,
226
337
  metrics=metrics_list,
227
338
  )
339
+ logger.info(f"Evaluation metrics: {[m.name for m in metrics_list]}")
228
340
 
229
341
  # Determine storage location
230
342
  if storage is None:
@@ -235,11 +347,15 @@ def evaluate(
235
347
  # Generate run ID if not provided
236
348
  if run_id is None:
237
349
  run_id = f"run-{datetime.now().strftime('%Y%m%d-%H%M%S')}"
350
+ logger.info(f"Run ID: {run_id}")
351
+ logger.info(f"Storage: {storage_dir}")
352
+ logger.info(f"Resume: {resume}")
238
353
 
239
354
  # Create storage backend
240
355
  if isinstance(storage_dir, Path):
241
356
  from themis.experiment.storage import ExperimentStorage
242
357
  storage_backend = ExperimentStorage(storage_dir)
358
+ logger.debug(f"Storage backend created at {storage_dir}")
243
359
  else:
244
360
  # Cloud storage (to be implemented in Phase 3)
245
361
  raise NotImplementedError(
@@ -264,15 +380,34 @@ def evaluate(
264
380
  )
265
381
 
266
382
  # Run locally
267
- report = orchestrator.run(
268
- dataset=dataset,
269
- max_samples=limit,
270
- run_id=run_id,
271
- resume=resume,
272
- on_result=on_result,
273
- )
383
+ logger.info("=" * 60)
384
+ logger.info("🚀 Starting experiment execution...")
385
+ logger.info("=" * 60)
274
386
 
275
- return report
387
+ try:
388
+ report = orchestrator.run(
389
+ dataset=dataset,
390
+ max_samples=limit,
391
+ run_id=run_id,
392
+ resume=resume,
393
+ on_result=on_result,
394
+ )
395
+
396
+ logger.info("=" * 60)
397
+ logger.info("✅ Evaluation completed successfully!")
398
+ logger.info(f" Total samples: {len(report.generation_results)}")
399
+ logger.info(f" Successful: {report.metadata.get('successful_generations', 0)}")
400
+ logger.info(f" Failed: {report.metadata.get('failed_generations', 0)}")
401
+ if report.evaluation_report.metrics:
402
+ logger.info(f" Metrics: {list(report.evaluation_report.metrics.keys())}")
403
+ logger.info("=" * 60)
404
+
405
+ return report
406
+ except Exception as e:
407
+ logger.error("=" * 60)
408
+ logger.error(f"❌ Evaluation failed: {e}")
409
+ logger.error("=" * 60)
410
+ raise
276
411
 
277
412
 
278
413
  def _resolve_metrics(metric_names: list[str]) -> list:
@@ -298,8 +433,8 @@ def _resolve_metrics(metric_names: list[str]) -> list:
298
433
  except ImportError:
299
434
  nlp_available = False
300
435
 
301
- # Metric registry
302
- METRICS_REGISTRY = {
436
+ # Built-in metrics registry
437
+ BUILTIN_METRICS = {
303
438
  # Core metrics
304
439
  "exact_match": ExactMatch,
305
440
  "math_verify": MathVerifyAccuracy,
@@ -308,7 +443,7 @@ def _resolve_metrics(metric_names: list[str]) -> list:
308
443
 
309
444
  # Add NLP metrics if available
310
445
  if nlp_available:
311
- METRICS_REGISTRY.update({
446
+ BUILTIN_METRICS.update({
312
447
  "bleu": BLEU,
313
448
  "rouge1": lambda: ROUGE(variant=ROUGEVariant.ROUGE_1),
314
449
  "rouge2": lambda: ROUGE(variant=ROUGEVariant.ROUGE_2),
@@ -321,6 +456,10 @@ def _resolve_metrics(metric_names: list[str]) -> list:
321
456
  # "pass_at_k": PassAtK,
322
457
  # "codebleu": CodeBLEU,
323
458
 
459
+ # Merge built-in and custom metrics
460
+ # Custom metrics can override built-in metrics
461
+ METRICS_REGISTRY = {**BUILTIN_METRICS, **_METRICS_REGISTRY}
462
+
324
463
  metrics = []
325
464
  for name in metric_names:
326
465
  if name not in METRICS_REGISTRY:
@@ -340,4 +479,4 @@ def _resolve_metrics(metric_names: list[str]) -> list:
340
479
  return metrics
341
480
 
342
481
 
343
- __all__ = ["evaluate"]
482
+ __all__ = ["evaluate", "register_metric", "get_registered_metrics"]
@@ -2,10 +2,13 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import logging
5
6
  from datetime import datetime, timezone
6
7
  from typing import Callable, Sequence
7
8
 
8
9
  from themis.config.schema import IntegrationsConfig
10
+
11
+ logger = logging.getLogger(__name__)
9
12
  from themis.core.entities import (
10
13
  EvaluationRecord,
11
14
  ExperimentFailure,
@@ -102,6 +105,8 @@ class ExperimentOrchestrator:
102
105
  Returns:
103
106
  ExperimentReport with generation results, evaluation, and metadata
104
107
  """
108
+ logger.info("Orchestrator: Initializing experiment run")
109
+
105
110
  # Initialize integrations
106
111
  self._integrations.initialize_run(
107
112
  {
@@ -112,13 +117,23 @@ class ExperimentOrchestrator:
112
117
  )
113
118
 
114
119
  # Prepare dataset
115
- dataset_list = self._resolve_dataset(
116
- dataset=dataset, dataset_loader=dataset_loader, run_id=run_id
117
- )
120
+ logger.info("Orchestrator: Loading dataset...")
121
+ try:
122
+ dataset_list = self._resolve_dataset(
123
+ dataset=dataset, dataset_loader=dataset_loader, run_id=run_id
124
+ )
125
+ logger.info(f"Orchestrator: Dataset loaded ({len(dataset_list)} total samples)")
126
+ except Exception as e:
127
+ logger.error(f"Orchestrator: ❌ Failed to load dataset: {e}")
128
+ raise
129
+
118
130
  selected_dataset = (
119
131
  dataset_list[:max_samples] if max_samples is not None else dataset_list
120
132
  )
121
133
  run_identifier = run_id or self._default_run_id()
134
+
135
+ logger.info(f"Orchestrator: Processing {len(selected_dataset)} samples")
136
+ logger.info(f"Orchestrator: Run ID = {run_identifier}")
122
137
 
123
138
  # Initialize run in storage (if storage exists and run doesn't exist)
124
139
  if self._cache.has_storage:
@@ -130,18 +145,30 @@ class ExperimentOrchestrator:
130
145
  self._cache.cache_dataset(run_identifier, dataset_list)
131
146
 
132
147
  # Expand dataset into generation tasks
133
- tasks = list(self._plan.expand(selected_dataset))
148
+ logger.info("Orchestrator: Expanding dataset into generation tasks...")
149
+ try:
150
+ tasks = list(self._plan.expand(selected_dataset))
151
+ logger.info(f"Orchestrator: Created {len(tasks)} generation tasks")
152
+ except Exception as e:
153
+ logger.error(f"Orchestrator: ❌ Failed to expand dataset: {e}")
154
+ raise
134
155
 
135
156
  # Build evaluation configuration for cache invalidation
136
157
  evaluation_config = self._build_evaluation_config()
137
158
 
138
159
  # Load cached results if resuming
160
+ if resume:
161
+ logger.info("Orchestrator: Loading cached results...")
139
162
  cached_records = (
140
163
  self._cache.load_cached_records(run_identifier) if resume else {}
141
164
  )
142
165
  cached_evaluations = (
143
166
  self._cache.load_cached_evaluations(run_identifier, evaluation_config) if resume else {}
144
167
  )
168
+ if resume and cached_records:
169
+ logger.info(f"Orchestrator: Found {len(cached_records)} cached generation records")
170
+ if resume and cached_evaluations:
171
+ logger.info(f"Orchestrator: Found {len(cached_evaluations)} cached evaluation records")
145
172
 
146
173
  # Process tasks: use cached or run new generations
147
174
  generation_results: list[GenerationRecord] = []
@@ -178,9 +205,18 @@ class ExperimentOrchestrator:
178
205
 
179
206
  # Run pending generation tasks
180
207
  if pending_tasks:
208
+ logger.info(f"Orchestrator: Running {len(pending_tasks)} generation tasks...")
209
+ completed = 0
181
210
  for record in self._runner.run(pending_tasks):
211
+ logger.debug(f"Orchestrator: Received generation record")
182
212
  generation_results.append(record)
213
+ completed += 1
214
+
215
+ # Log progress every 10 samples or at key milestones
216
+ if completed % 10 == 0 or completed == len(pending_tasks):
217
+ logger.info(f"Orchestrator: Generation progress: {completed}/{len(pending_tasks)} ({100*completed//len(pending_tasks)}%)")
183
218
 
219
+ logger.debug(f"Orchestrator: Processing record (cost tracking...)")
184
220
  # Track cost for successful generations
185
221
  if record.output and record.output.usage:
186
222
  usage = record.output.usage
@@ -197,6 +233,7 @@ class ExperimentOrchestrator:
197
233
  cost=cost,
198
234
  )
199
235
 
236
+ logger.debug(f"Orchestrator: Processing record (error handling...)")
200
237
  if record.error:
201
238
  failures.append(
202
239
  ExperimentFailure(
@@ -204,20 +241,35 @@ class ExperimentOrchestrator:
204
241
  message=record.error.message,
205
242
  )
206
243
  )
244
+
245
+ logger.debug(f"Orchestrator: Processing record (caching...)")
207
246
  cache_key = experiment_storage.task_cache_key(record.task)
208
247
  if cache_results:
209
248
  self._cache.save_generation_record(
210
249
  run_identifier, record, cache_key
211
250
  )
251
+
252
+ logger.debug(f"Orchestrator: Processing record (adding to pending...)")
212
253
  pending_records.append(record)
213
254
  pending_keys.append(cache_key)
255
+
256
+ logger.debug(f"Orchestrator: Processing record (callback...)")
214
257
  if on_result:
215
258
  on_result(record)
259
+ logger.debug(f"Orchestrator: Record processing complete")
216
260
 
217
261
  # Evaluate pending records
262
+ logger.info(f"Orchestrator: Preparing to evaluate {len(pending_records)} pending records...")
218
263
  if pending_records:
219
- new_evaluation_report = self._evaluation.evaluate(pending_records)
264
+ logger.info(f"Orchestrator: Starting evaluation of {len(pending_records)} records...")
265
+ try:
266
+ new_evaluation_report = self._evaluation.evaluate(pending_records)
267
+ logger.info(f"Orchestrator: ✅ Evaluation complete - got {len(new_evaluation_report.records)} results")
268
+ except Exception as e:
269
+ logger.error(f"Orchestrator: ❌ Evaluation failed: {e}")
270
+ raise
220
271
  else:
272
+ logger.info("Orchestrator: No new records to evaluate (all cached)")
221
273
  new_evaluation_report = evaluation_pipeline.EvaluationReport(
222
274
  metrics={}, failures=[], records=[]
223
275
  )
@@ -229,12 +281,16 @@ class ExperimentOrchestrator:
229
281
  )
230
282
 
231
283
  # Combine cached and new evaluations
284
+ logger.info("Orchestrator: Combining cached and new evaluations...")
232
285
  evaluation_report = self._combine_evaluations(
233
286
  cached_eval_records, new_evaluation_report
234
287
  )
288
+ logger.info(f"Orchestrator: Total evaluation records: {len(evaluation_report.records)}")
235
289
 
236
290
  # Get cost breakdown
237
291
  cost_breakdown = self._cost_tracker.get_breakdown()
292
+ if cost_breakdown.total_cost > 0:
293
+ logger.info(f"Orchestrator: Total cost: ${cost_breakdown.total_cost:.4f}")
238
294
 
239
295
  # Build metadata
240
296
  metadata = {
@@ -184,7 +184,7 @@ class ExperimentStorage:
184
184
  # In-memory caches
185
185
  self._task_index: dict[str, set[str]] = {}
186
186
  self._template_index: dict[str, dict[str, str]] = {}
187
- self._locks: dict[str, int] = {} # fd for lock files
187
+ self._locks: dict[str, tuple[int, int]] = {} # (fd, count) for reentrant locks
188
188
 
189
189
  def _init_database(self):
190
190
  """Initialize SQLite metadata database."""
@@ -253,34 +253,175 @@ class ExperimentStorage:
253
253
 
254
254
  @contextlib.contextmanager
255
255
  def _acquire_lock(self, run_id: str):
256
- """Acquire exclusive lock for run directory."""
256
+ """Acquire exclusive lock for run directory with timeout (reentrant).
257
+
258
+ This lock is reentrant within the same thread to prevent deadlocks when
259
+ the same process acquires the lock multiple times (e.g., start_run()
260
+ followed by append_record()).
261
+
262
+ The lock uses OS-specific file locking:
263
+ - Unix/Linux/macOS: fcntl.flock with non-blocking retry
264
+ - Windows: msvcrt.locking
265
+ - Fallback: No locking (single-process mode)
266
+
267
+ Args:
268
+ run_id: Unique run identifier
269
+
270
+ Yields:
271
+ Context manager that holds the lock
272
+
273
+ Raises:
274
+ TimeoutError: If lock cannot be acquired within 30 seconds
275
+ """
276
+ import time
277
+
278
+ # Check if we already hold the lock (reentrant)
279
+ if run_id in self._locks:
280
+ lock_fd, count = self._locks[run_id]
281
+ self._locks[run_id] = (lock_fd, count + 1)
282
+ try:
283
+ yield
284
+ finally:
285
+ # Check if lock still exists (might have been cleaned up by another thread)
286
+ if run_id in self._locks:
287
+ lock_fd, count = self._locks[run_id]
288
+ if count > 1:
289
+ self._locks[run_id] = (lock_fd, count - 1)
290
+ else:
291
+ # Last unlock - release the actual lock
292
+ self._release_os_lock(lock_fd, run_id)
293
+ return
294
+
295
+ # First time acquiring lock for this run_id
257
296
  lock_path = self._get_run_dir(run_id) / ".lock"
258
297
  lock_path.parent.mkdir(parents=True, exist_ok=True)
259
298
 
260
- # Open lock file
261
- lock_fd = os.open(lock_path, os.O_CREAT | os.O_RDWR)
299
+ # Open lock file (OS-independent flags)
300
+ lock_fd = os.open(str(lock_path), os.O_CREAT | os.O_RDWR)
262
301
 
263
302
  try:
264
- # Acquire exclusive lock (blocking)
265
- if sys.platform == "win32":
266
- # Windows file locking
267
- msvcrt.locking(lock_fd, msvcrt.LK_LOCK, 1)
268
- elif FCNTL_AVAILABLE:
269
- # Unix file locking
270
- fcntl.flock(lock_fd, fcntl.LOCK_EX)
271
- # If neither available, proceed without locking (single-process only)
303
+ # Acquire exclusive lock with timeout
304
+ self._acquire_os_lock(lock_fd, run_id, lock_path, timeout=30)
272
305
 
273
- self._locks[run_id] = lock_fd
306
+ self._locks[run_id] = (lock_fd, 1)
274
307
  yield
275
308
  finally:
276
- # Release lock
277
- if sys.platform == "win32":
309
+ # Release lock (only if this was the outermost lock)
310
+ if run_id in self._locks:
311
+ lock_fd, count = self._locks[run_id]
312
+ if count == 1:
313
+ self._release_os_lock(lock_fd, run_id)
314
+ else:
315
+ # Decrement count
316
+ self._locks[run_id] = (lock_fd, count - 1)
317
+
318
+ def _acquire_os_lock(
319
+ self,
320
+ lock_fd: int,
321
+ run_id: str,
322
+ lock_path: Path,
323
+ timeout: int = 30
324
+ ) -> None:
325
+ """Acquire OS-specific file lock with timeout.
326
+
327
+ Args:
328
+ lock_fd: File descriptor for lock file
329
+ run_id: Run identifier (for error messages)
330
+ lock_path: Path to lock file (for error messages)
331
+ timeout: Timeout in seconds
332
+
333
+ Raises:
334
+ TimeoutError: If lock cannot be acquired within timeout
335
+ """
336
+ import time
337
+
338
+ if sys.platform == "win32":
339
+ # Windows file locking with retry
340
+ try:
341
+ import msvcrt
342
+ except ImportError:
343
+ # msvcrt not available - single-process mode
344
+ import logging
345
+ logger = logging.getLogger(__name__)
346
+ logger.debug("msvcrt not available. Single-process mode only.")
347
+ return
348
+
349
+ start_time = time.time()
350
+ while True:
351
+ try:
352
+ msvcrt.locking(lock_fd, msvcrt.LK_NBLCK, 1)
353
+ break # Lock acquired
354
+ except OSError as e:
355
+ # Lock is held by another thread/process (errno 13 Permission denied)
356
+ if time.time() - start_time > timeout:
357
+ try:
358
+ os.close(lock_fd)
359
+ except:
360
+ pass
361
+ raise TimeoutError(
362
+ f"Failed to acquire lock for run {run_id} after {timeout}s on Windows. "
363
+ f"This usually means another process is holding the lock or a previous process crashed. "
364
+ f"Try deleting: {lock_path}"
365
+ ) from e
366
+ time.sleep(0.1) # Wait 100ms before retry
367
+ elif FCNTL_AVAILABLE:
368
+ # Unix file locking with non-blocking retry
369
+ start_time = time.time()
370
+ while True:
371
+ try:
372
+ fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
373
+ break # Lock acquired
374
+ except (IOError, OSError) as e:
375
+ # Lock is held by another process
376
+ if time.time() - start_time > timeout:
377
+ try:
378
+ os.close(lock_fd)
379
+ except:
380
+ pass
381
+ raise TimeoutError(
382
+ f"Failed to acquire lock for run {run_id} after {timeout}s. "
383
+ f"This usually means another process is holding the lock or a previous process crashed. "
384
+ f"Try: rm -f {lock_path}"
385
+ ) from e
386
+ time.sleep(0.1) # Wait 100ms before retry
387
+ else:
388
+ # No locking available - single-process mode
389
+ # This is safe for single-process usage (most common case)
390
+ import logging
391
+ logger = logging.getLogger(__name__)
392
+ logger.debug(
393
+ f"File locking not available on this platform. "
394
+ f"Storage will work in single-process mode only."
395
+ )
396
+
397
+ def _release_os_lock(self, lock_fd: int, run_id: str) -> None:
398
+ """Release OS-specific file lock.
399
+
400
+ Args:
401
+ lock_fd: File descriptor to close
402
+ run_id: Run identifier (for cleanup)
403
+ """
404
+ # Release lock
405
+ if sys.platform == "win32":
406
+ try:
407
+ import msvcrt
278
408
  msvcrt.locking(lock_fd, msvcrt.LK_UNLCK, 1)
279
- elif FCNTL_AVAILABLE:
409
+ except (ImportError, OSError):
410
+ pass # Lock may already be released
411
+ elif FCNTL_AVAILABLE:
412
+ try:
280
413
  fcntl.flock(lock_fd, fcntl.LOCK_UN)
281
-
414
+ except (IOError, OSError):
415
+ pass # Lock may already be released
416
+
417
+ # Close file descriptor
418
+ try:
282
419
  os.close(lock_fd)
283
- self._locks.pop(run_id, None)
420
+ except OSError:
421
+ pass # FD may already be closed
422
+
423
+ # Clean up tracking
424
+ self._locks.pop(run_id, None)
284
425
 
285
426
  def start_run(
286
427
  self,
@@ -456,16 +597,19 @@ class ExperimentStorage:
456
597
 
457
598
  try:
458
599
  if self._config.compression == "gzip":
600
+ # Close the fd first since gzip.open will open by path
601
+ os.close(temp_fd)
459
602
  with gzip.open(temp_path, "wt", encoding="utf-8") as f:
460
603
  f.write(json_line)
461
604
  f.flush()
462
605
  os.fsync(f.fileno())
463
606
  else:
607
+ # Use the fd directly
464
608
  with open(temp_fd, "w", encoding="utf-8") as f:
465
609
  f.write(json_line)
466
610
  f.flush()
467
611
  os.fsync(f.fileno())
468
- os.close(temp_fd)
612
+ # fd is closed by context manager, don't close again
469
613
 
470
614
  # Get target path with compression
471
615
  target_path = (
@@ -2,6 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import logging
5
6
  import threading
6
7
  from dataclasses import dataclass
7
8
  from typing import Any, Dict
@@ -10,6 +11,8 @@ from themis.core import entities as core_entities
10
11
  from themis.interfaces import ModelProvider
11
12
  from themis.providers import register_provider
12
13
 
14
+ logger = logging.getLogger(__name__)
15
+
13
16
 
14
17
  @dataclass
15
18
  class LiteLLMProvider(ModelProvider):
@@ -57,7 +60,22 @@ class LiteLLMProvider(ModelProvider):
57
60
  litellm.drop_params = self.drop_params
58
61
  if self.max_retries > 0:
59
62
  litellm.num_retries = self.max_retries
63
+
64
+ logger.debug(f"LiteLLMProvider initialized:")
65
+ logger.debug(f" api_base: {self.api_base or 'default'}")
66
+ logger.debug(f" timeout: {self.timeout}s")
67
+ logger.debug(f" max_retries: {self.max_retries}")
68
+ logger.debug(f" n_parallel: {self.n_parallel}")
69
+
70
+ # Warn if api_base is set but no api_key
71
+ if self.api_base and not self.api_key:
72
+ logger.warning(
73
+ "⚠️ LiteLLMProvider: api_base is set but api_key is not. "
74
+ "This may cause authentication errors. "
75
+ "Set api_key='dummy' for local servers."
76
+ )
60
77
  except ImportError as exc:
78
+ logger.error("❌ LiteLLM is not installed")
61
79
  raise RuntimeError(
62
80
  "LiteLLM is not installed. Install via `pip install litellm` or "
63
81
  "`uv add litellm` to use LiteLLMProvider."
@@ -70,6 +88,10 @@ class LiteLLMProvider(ModelProvider):
70
88
 
71
89
  messages = self._build_messages(task)
72
90
  completion_kwargs = self._build_completion_kwargs(task, messages)
91
+
92
+ logger.debug(f"LiteLLMProvider: Calling model={completion_kwargs.get('model')}")
93
+ if self.api_base:
94
+ logger.debug(f"LiteLLMProvider: Using custom api_base={self.api_base}")
73
95
 
74
96
  try:
75
97
  with self._semaphore:
@@ -131,6 +153,30 @@ class LiteLLMProvider(ModelProvider):
131
153
  details["status_code"] = exc.status_code # type: ignore
132
154
  if hasattr(exc, "llm_provider"):
133
155
  details["llm_provider"] = exc.llm_provider # type: ignore
156
+
157
+ # Log with helpful context
158
+ if "AuthenticationError" in error_type or "api_key" in error_message.lower():
159
+ logger.error(
160
+ f"LiteLLMProvider: ❌ Authentication error for model {task.model.identifier}"
161
+ )
162
+ logger.error(
163
+ f" Error: {error_message[:200]}"
164
+ )
165
+ logger.error(
166
+ f" Hint: If using a custom api_base, ensure you also pass api_key='dummy'"
167
+ )
168
+ elif "Connection" in error_type or "timeout" in error_message.lower():
169
+ logger.error(
170
+ f"LiteLLMProvider: ❌ Connection error for model {task.model.identifier}"
171
+ )
172
+ logger.error(f" Error: {error_message[:200]}")
173
+ if self.api_base:
174
+ logger.error(f" Check that the server at {self.api_base} is running")
175
+ else:
176
+ logger.error(
177
+ f"LiteLLMProvider: ❌ Generation failed for {task.model.identifier}: "
178
+ f"{error_type}: {error_message[:200]}"
179
+ )
134
180
 
135
181
  return core_entities.GenerationRecord(
136
182
  task=task,
@@ -49,16 +49,32 @@ class GenerationRunner:
49
49
  ) -> Iterator[core_entities.GenerationRecord]:
50
50
  task_list = list(tasks)
51
51
  if not task_list:
52
+ logger.info("Runner: No tasks to execute")
52
53
  return
54
+
55
+ logger.info(f"Runner: Starting execution of {len(task_list)} tasks with {self._max_parallel} workers")
56
+
53
57
  if self._max_parallel <= 1:
54
- for task in task_list:
58
+ logger.info("Runner: Using sequential execution (1 worker)")
59
+ for i, task in enumerate(task_list, 1):
60
+ logger.debug(f"Runner: Processing task {i}/{len(task_list)}")
55
61
  yield self._execute_task(task)
56
62
  return
57
63
 
64
+ logger.info(f"Runner: Using parallel execution ({self._max_parallel} workers)")
58
65
  with ThreadPoolExecutor(max_workers=self._max_parallel) as executor:
59
66
  futures = [executor.submit(self._execute_task, task) for task in task_list]
67
+ completed = 0
60
68
  for future in futures:
61
- yield future.result()
69
+ try:
70
+ result = future.result()
71
+ completed += 1
72
+ if completed % max(1, len(task_list) // 10) == 0 or completed == len(task_list):
73
+ logger.debug(f"Runner: Completed {completed}/{len(task_list)} tasks")
74
+ yield result
75
+ except Exception as e:
76
+ logger.error(f"Runner: Task execution failed: {e}")
77
+ raise
62
78
 
63
79
  def _run_single_attempt(
64
80
  self, task: core_entities.GenerationTask
@@ -70,7 +86,7 @@ class GenerationRunner:
70
86
  for attempt in range(1, self._max_retries + 1):
71
87
  try:
72
88
  logger.debug(
73
- "Starting generation for %s attempt %s/%s",
89
+ "Runner: Starting generation for %s (attempt %s/%s)",
74
90
  task_label,
75
91
  attempt,
76
92
  self._max_retries,
@@ -79,16 +95,16 @@ class GenerationRunner:
79
95
  record.metrics["generation_attempts"] = attempt
80
96
  if attempt_errors:
81
97
  record.metrics.setdefault("retry_errors", attempt_errors)
82
- logger.debug("Completed %s in %s attempt(s)", task_label, attempt)
98
+ logger.debug("Runner: ✅ Completed %s in %s attempt(s)", task_label, attempt)
83
99
  return record
84
100
  except Exception as exc: # pragma: no cover - defensive path
85
101
  last_error = exc
86
102
  logger.warning(
87
- "Attempt %s/%s for %s failed: %s",
103
+ "Runner: ⚠️ Attempt %s/%s for %s failed: %s",
88
104
  attempt,
89
105
  self._max_retries,
90
106
  task_label,
91
- exc,
107
+ str(exc)[:100], # Truncate long error messages
92
108
  )
93
109
  attempt_errors.append(
94
110
  {
@@ -4,7 +4,18 @@ This module provides automatic configuration for popular benchmarks,
4
4
  eliminating the need for manual setup of prompts, metrics, and extractors.
5
5
  """
6
6
 
7
- from themis.presets.benchmarks import get_benchmark_preset, list_benchmarks
7
+ from themis.presets.benchmarks import (
8
+ BenchmarkPreset,
9
+ get_benchmark_preset,
10
+ list_benchmarks,
11
+ register_benchmark,
12
+ )
8
13
  from themis.presets.models import parse_model_name
9
14
 
10
- __all__ = ["get_benchmark_preset", "list_benchmarks", "parse_model_name"]
15
+ __all__ = [
16
+ "BenchmarkPreset",
17
+ "register_benchmark",
18
+ "get_benchmark_preset",
19
+ "list_benchmarks",
20
+ "parse_model_name",
21
+ ]
@@ -5,6 +5,9 @@ from __future__ import annotations
5
5
  import logging
6
6
  from typing import Mapping
7
7
 
8
+ from rich.logging import RichHandler
9
+ from rich.traceback import install as install_rich_traceback
10
+
8
11
  TRACE_LEVEL = 5
9
12
  logging.addLevelName(TRACE_LEVEL, "TRACE")
10
13
 
@@ -28,12 +31,14 @@ _LEVELS: Mapping[str, int] = {
28
31
 
29
32
  def configure_logging(level: str = "info") -> None:
30
33
  """Configure root logging with human-friendly formatting."""
31
-
34
+ install_rich_traceback()
32
35
  numeric_level = _LEVELS.get(level.lower(), logging.INFO)
36
+
33
37
  logging.basicConfig(
34
38
  level=numeric_level,
35
- format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
36
- datefmt="%H:%M:%S",
39
+ format="%(message)s",
40
+ datefmt="[%X]",
41
+ handlers=[RichHandler(rich_tracebacks=True, markup=True)],
37
42
  force=True,
38
43
  )
39
44
 
themis/utils/progress.py CHANGED
@@ -5,7 +5,16 @@ from __future__ import annotations
5
5
  from contextlib import AbstractContextManager
6
6
  from typing import Any, Callable
7
7
 
8
- from tqdm import tqdm
8
+ from rich.progress import (
9
+ BarColumn,
10
+ MofNCompleteColumn,
11
+ Progress,
12
+ SpinnerColumn,
13
+ TaskProgressColumn,
14
+ TextColumn,
15
+ TimeElapsedColumn,
16
+ TimeRemainingColumn,
17
+ )
9
18
 
10
19
 
11
20
  class ProgressReporter(AbstractContextManager["ProgressReporter"]):
@@ -21,7 +30,8 @@ class ProgressReporter(AbstractContextManager["ProgressReporter"]):
21
30
  self._description = description
22
31
  self._unit = unit
23
32
  self._leave = leave
24
- self._pbar: tqdm | None = None
33
+ self._progress: Progress | None = None
34
+ self._task_id = None
25
35
 
26
36
  def __enter__(self) -> "ProgressReporter":
27
37
  self.start()
@@ -31,22 +41,31 @@ class ProgressReporter(AbstractContextManager["ProgressReporter"]):
31
41
  self.close()
32
42
 
33
43
  def start(self) -> None:
34
- if self._pbar is None:
35
- self._pbar = tqdm(
36
- total=self._total,
37
- desc=self._description,
38
- unit=self._unit,
39
- leave=self._leave,
44
+ if self._progress is None:
45
+ self._progress = Progress(
46
+ SpinnerColumn(),
47
+ TextColumn("[progress.description]{task.description}"),
48
+ BarColumn(),
49
+ TaskProgressColumn(),
50
+ MofNCompleteColumn(),
51
+ TimeElapsedColumn(),
52
+ TimeRemainingColumn(),
53
+ transient=not self._leave,
54
+ )
55
+ self._progress.start()
56
+ self._task_id = self._progress.add_task(
57
+ self._description, total=self._total
40
58
  )
41
59
 
42
60
  def close(self) -> None:
43
- if self._pbar is not None:
44
- self._pbar.close()
45
- self._pbar = None
61
+ if self._progress is not None:
62
+ self._progress.stop()
63
+ self._progress = None
64
+ self._task_id = None
46
65
 
47
66
  def increment(self, step: int = 1) -> None:
48
- if self._pbar is not None:
49
- self._pbar.update(step)
67
+ if self._progress is not None and self._task_id is not None:
68
+ self._progress.update(self._task_id, advance=step)
50
69
 
51
70
  def on_result(self, _record: Any) -> None:
52
71
  self.increment()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: themis-eval
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Lightweight evaluation platform for LLM experiments
5
5
  Author: Pittawat Taveekitworachai
6
6
  License: MIT
@@ -25,6 +25,7 @@ Requires-Dist: tabulate>=0.9.0
25
25
  Requires-Dist: tenacity>=9.1.2
26
26
  Requires-Dist: plotly>=6.5.0
27
27
  Requires-Dist: math-verify>=0.8.0
28
+ Requires-Dist: rich>=14.2.0
28
29
  Provides-Extra: dev
29
30
  Requires-Dist: pytest>=8.0; extra == "dev"
30
31
  Requires-Dist: pytest-cov>=6.0.0; extra == "dev"
@@ -358,9 +359,9 @@ Themis is built on a clean, modular architecture:
358
359
 
359
360
  - **[API Reference](docs/index.md)** - Detailed API documentation
360
361
  - **[Examples](examples-simple/)** - Runnable code examples
361
- - **[Extending Backends](docs/EXTENDING_BACKENDS.md)** - Custom storage and execution
362
- - **[API Server](docs/API_SERVER.md)** - Web dashboard and REST API
363
- - **[Comparison Engine](docs/COMPARISON.md)** - Statistical testing guide
362
+ - **[Extending Backends](docs/customization/backends.md)** - Custom storage and execution
363
+ - **[API Server](docs/reference/api-server.md)** - Web dashboard and REST API
364
+ - **[Comparison Engine](docs/guides/comparison.md)** - Statistical testing guide
364
365
 
365
366
  ---
366
367
 
@@ -388,7 +389,7 @@ result = evaluate(
388
389
  )
389
390
  ```
390
391
 
391
- See [EXTENDING_BACKENDS.md](docs/EXTENDING_BACKENDS.md) for details.
392
+ See [docs/customization/backends.md](docs/customization/backends.md) for details.
392
393
 
393
394
  ### Distributed Execution
394
395
 
@@ -1,6 +1,6 @@
1
- themis/__init__.py,sha256=Pswn5ZiXyU5ANoknjdBLkqouZQdeWMm3DoUMVzU_j8M,543
2
- themis/_version.py,sha256=xRJB6N107oMsasuLYKaoIzuBo5Oe2hlK3-lGyTzxAC8,378
3
- themis/api.py,sha256=myHeMaWQMnyjCUAlr9P6cX2Awt50q1XGtyKDCimJgCg,12077
1
+ themis/__init__.py,sha256=rQL3njf3i5lnAcmu0HuRzGGMELbA9xX21hzw4HrbIxw,1394
2
+ themis/_version.py,sha256=y0Oqv0Je2udPmKCy5_D8Lib7GNLGxtLVp8b5WdavITg,378
3
+ themis/api.py,sha256=flZTbU-jRcbv7oXcfRKG4hkZjASmWlT52A4PghKj9G0,17700
4
4
  themis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  themis/backends/__init__.py,sha256=RWM5SnV5FrS_cVjpHHeZZM_b9CgqBu1rPS5DlT5YQTY,578
6
6
  themis/backends/execution.py,sha256=RAFuB9ri8TMil5PcnsisypKO2ViyLFXj08P_vjNYguU,6095
@@ -112,9 +112,9 @@ themis/experiment/export_csv.py,sha256=80w3gEGjeLjuiNq539rRP73k3MBtwrzJy90hgE91A
112
112
  themis/experiment/integration_manager.py,sha256=wTVTjDGcUkzz4tfnwSxa5nK1A4e2FKCPazDYGcdzYS8,3325
113
113
  themis/experiment/math.py,sha256=P2E9F_UKI7pb-aXepSztGdr_g309WEMe83zqg1nWO7A,6973
114
114
  themis/experiment/mcq.py,sha256=DDB99FHQsU_5vMIRDRhSZ7pReYvVf57wLmmo3OU_An4,6276
115
- themis/experiment/orchestrator.py,sha256=-6epspKnPoAJQPKzoNAxd54MrEX3lIhrKyqQ9dmD00A,16120
115
+ themis/experiment/orchestrator.py,sha256=VeSasDmCXrYlrv1r47I698RUq14vEBR7c_uyZzM01hw,19304
116
116
  themis/experiment/pricing.py,sha256=fTM32yE3L8vahMP4sr1zr7dbp9zYCjiPN4D4VuZ8-q8,9346
117
- themis/experiment/storage.py,sha256=QS3fJD79bzgodM5x79yJ2A69O5hTL2r2ROAKSvtRnkI,49471
117
+ themis/experiment/storage.py,sha256=ujGiQTeRPOfS8hYHB1a7F9t-dQnXquhqomI1vDjqmno,55250
118
118
  themis/experiment/visualization.py,sha256=dJYHrp3mntl8CPc5HPI3iKqPztVsddQB3ogRkd_FCNc,18473
119
119
  themis/generation/__init__.py,sha256=6KVwCQYMpPIsXNuWDZOGuqHkUkA45lbSacIFn8ZbD4s,36
120
120
  themis/generation/agentic_runner.py,sha256=armBQBk7qZDBEwT8HqjIWomYDQm57NfrP5CZJzay2uA,13669
@@ -123,18 +123,18 @@ themis/generation/clients.py,sha256=6apXCp_VNQosnpnmohTHOhHGXw-VZgsUyLds8MwtYUE,
123
123
  themis/generation/conversation_runner.py,sha256=kSZHwEvfqzxZ-eQYxmg5OkNZcgEHggZExjad6nBOeTM,7980
124
124
  themis/generation/plan.py,sha256=RmPIdefXkQMHYv5EWiilpx91I9a-svw31imvG0wV3fE,15961
125
125
  themis/generation/router.py,sha256=jZc0KFL483f8TrYtt9yxzFKs-T9CG2CoE2kfOQdHMEc,1082
126
- themis/generation/runner.py,sha256=iHTE5vSMWMYRrv4PEWMaZflF939nv1wWccK8V0e092c,8009
126
+ themis/generation/runner.py,sha256=pH4Dw77qskMQk3yxEkaHYAl1PItTofI7OXdvevnFiCA,8984
127
127
  themis/generation/strategies.py,sha256=hjqaVkNycFxJWh_edJ7ilBl7HS6bL-8pYm24zTfoAvg,2975
128
128
  themis/generation/templates.py,sha256=ut_6akp8Y6Ey_9O3s64jDbwCB74pw62Zf8URlYcKHkA,2325
129
129
  themis/generation/turn_strategies.py,sha256=w33qhzpQbGTsfeOgOgMDovV0wEeXeNZUUBm5yZy1naw,10973
130
130
  themis/generation/types.py,sha256=MkJnZk6lMHmHzlJVEsuIC9ioRW8XhWcSk9AdDeb_aLE,338
131
- themis/generation/providers/litellm_provider.py,sha256=rlTuglIwhcvSakCo5G-ffgQtEHbCEX0ZeKk6M1MaWmU,8155
131
+ themis/generation/providers/litellm_provider.py,sha256=tvLY8hrSjo4CnyWzccFp1PkXj8R2j8pda5irJiarWd8,10334
132
132
  themis/generation/providers/vllm_provider.py,sha256=0K4we6xDrRXlBXseC1ixLq2sJpRF4T8Ikv45dw-zNk4,4625
133
133
  themis/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
134
  themis/integrations/huggingface.py,sha256=vrLwYwn65pU4W3FUe0ImCOZxKKlpRshDqMoLFsclB3E,2370
135
135
  themis/integrations/wandb.py,sha256=LJOPojjlqG05EIPxcjy3QmA15Gxgs1db3encDWVzYYw,2545
136
136
  themis/interfaces/__init__.py,sha256=78dNE_eHfFmb9hXNy5sLZ1jOTGWS8TzdVE_eiYQPFVc,5967
137
- themis/presets/__init__.py,sha256=hkoyODYiWFFSQAIKTpEbAIUuFIwTibBhzTOkiTbzhVQ,411
137
+ themis/presets/__init__.py,sha256=w58fJcy4eNiE034qHO2xE5pp-H-4LNLXo5hLMuC7wIQ,533
138
138
  themis/presets/benchmarks.py,sha256=s9JxRogHwZs8oiuiI7Z7uiUBZXEp3gg7AQZnBvdGieA,12026
139
139
  themis/presets/models.py,sha256=c6-I_drHa4vMLIajSkCcrFbsJOsauFjY8fU1leBxZLg,5173
140
140
  themis/project/__init__.py,sha256=vgLv2nS62yz1XsFSFzFf7eIo6FyQJXpOY9OPRUcTQLQ,465
@@ -147,11 +147,11 @@ themis/server/app.py,sha256=OZ39gCC47AXVqZxroC_4KtIYBYx_rfpde7C25AF3EI0,11166
147
147
  themis/utils/api_generator.py,sha256=3oQ7mGZlFx2Dpm45pMg3rNIqNK2Smj05PjOMXp5RIkQ,10776
148
148
  themis/utils/cost_tracking.py,sha256=9_Z2iTfNaQse9G_bnqn4hme4T0fG2W-fxOLEDeF_3VI,11545
149
149
  themis/utils/dashboard.py,sha256=2yiIu9_oENglTde_J3G1d5cpQ5VtSnfbUvdliw5Og1E,13008
150
- themis/utils/logging_utils.py,sha256=YNSiDfO4LsciSzUhHF1aTVI5rkfnWiVbn1NcGjjmJuQ,1019
151
- themis/utils/progress.py,sha256=b3YwHKV5x3Cvr5rBukqifJimK3Si4CGY2fpN6a_ZySI,1434
150
+ themis/utils/logging_utils.py,sha256=buC64X-xOu-2SZ0wVkz3nCXzYVGiqKbxK-8DGSGsAdM,1173
151
+ themis/utils/progress.py,sha256=HS0-yVbRT7Ai9zRlsJcex_OKP6dUiKx1vOp_IsobiHM,2097
152
152
  themis/utils/tracing.py,sha256=VTeiRjcW_B5fOOoSeAp37nrmlwP1DiqPcoe6OtIQ7dk,8468
153
- themis_eval-0.2.0.dist-info/licenses/LICENSE,sha256=K5FLE7iqn5-_6k1sf3IGy7w-Wx_Vdx3t0sOVJByNlF0,1076
154
- themis_eval-0.2.0.dist-info/METADATA,sha256=S4dy0AD2REsRtPfULUYMiYC2Zk8nWgz4BWjBBJz2gHU,15173
155
- themis_eval-0.2.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
156
- themis_eval-0.2.0.dist-info/top_level.txt,sha256=QGIl4v-KB32upFS5UTXMJxHVX3vF7yBso82wJFI1Vbs,7
157
- themis_eval-0.2.0.dist-info/RECORD,,
153
+ themis_eval-0.2.2.dist-info/licenses/LICENSE,sha256=K5FLE7iqn5-_6k1sf3IGy7w-Wx_Vdx3t0sOVJByNlF0,1076
154
+ themis_eval-0.2.2.dist-info/METADATA,sha256=eOlF2Obimv_822azCt0vwhLaBz3CKsuvJPgDHMA3WFU,15235
155
+ themis_eval-0.2.2.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
156
+ themis_eval-0.2.2.dist-info/top_level.txt,sha256=QGIl4v-KB32upFS5UTXMJxHVX3vF7yBso82wJFI1Vbs,7
157
+ themis_eval-0.2.2.dist-info/RECORD,,