themis-eval 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- themis/cli/__init__.py +5 -0
- themis/cli/__main__.py +6 -0
- themis/cli/commands/__init__.py +19 -0
- themis/cli/commands/benchmarks.py +221 -0
- themis/cli/commands/comparison.py +394 -0
- themis/cli/commands/config_commands.py +244 -0
- themis/cli/commands/cost.py +214 -0
- themis/cli/commands/demo.py +68 -0
- themis/cli/commands/info.py +90 -0
- themis/cli/commands/leaderboard.py +362 -0
- themis/cli/commands/math_benchmarks.py +318 -0
- themis/cli/commands/mcq_benchmarks.py +207 -0
- themis/cli/commands/sample_run.py +244 -0
- themis/cli/commands/visualize.py +299 -0
- themis/cli/main.py +93 -0
- themis/cli/new_project.py +33 -0
- themis/cli/utils.py +51 -0
- themis/config/__init__.py +19 -0
- themis/config/loader.py +27 -0
- themis/config/registry.py +34 -0
- themis/config/runtime.py +214 -0
- themis/config/schema.py +112 -0
- themis/core/__init__.py +5 -0
- themis/core/conversation.py +354 -0
- themis/core/entities.py +164 -0
- themis/core/serialization.py +231 -0
- themis/core/tools.py +393 -0
- themis/core/types.py +141 -0
- themis/datasets/__init__.py +273 -0
- themis/datasets/base.py +264 -0
- themis/datasets/commonsense_qa.py +174 -0
- themis/datasets/competition_math.py +265 -0
- themis/datasets/coqa.py +133 -0
- themis/datasets/gpqa.py +190 -0
- themis/datasets/gsm8k.py +123 -0
- themis/datasets/gsm_symbolic.py +124 -0
- themis/datasets/math500.py +122 -0
- themis/datasets/med_qa.py +179 -0
- themis/datasets/medmcqa.py +169 -0
- themis/datasets/mmlu_pro.py +262 -0
- themis/datasets/piqa.py +146 -0
- themis/datasets/registry.py +201 -0
- themis/datasets/schema.py +245 -0
- themis/datasets/sciq.py +150 -0
- themis/datasets/social_i_qa.py +151 -0
- themis/datasets/super_gpqa.py +263 -0
- themis/evaluation/__init__.py +1 -0
- themis/evaluation/conditional.py +410 -0
- themis/evaluation/extractors/__init__.py +19 -0
- themis/evaluation/extractors/error_taxonomy_extractor.py +80 -0
- themis/evaluation/extractors/exceptions.py +7 -0
- themis/evaluation/extractors/identity_extractor.py +29 -0
- themis/evaluation/extractors/json_field_extractor.py +45 -0
- themis/evaluation/extractors/math_verify_extractor.py +37 -0
- themis/evaluation/extractors/regex_extractor.py +43 -0
- themis/evaluation/math_verify_utils.py +87 -0
- themis/evaluation/metrics/__init__.py +21 -0
- themis/evaluation/metrics/composite_metric.py +47 -0
- themis/evaluation/metrics/consistency_metric.py +80 -0
- themis/evaluation/metrics/exact_match.py +51 -0
- themis/evaluation/metrics/length_difference_tolerance.py +33 -0
- themis/evaluation/metrics/math_verify_accuracy.py +40 -0
- themis/evaluation/metrics/pairwise_judge_metric.py +141 -0
- themis/evaluation/metrics/response_length.py +33 -0
- themis/evaluation/metrics/rubric_judge_metric.py +134 -0
- themis/evaluation/pipeline.py +49 -0
- themis/evaluation/pipelines/__init__.py +15 -0
- themis/evaluation/pipelines/composable_pipeline.py +357 -0
- themis/evaluation/pipelines/standard_pipeline.py +288 -0
- themis/evaluation/reports.py +293 -0
- themis/evaluation/statistics/__init__.py +53 -0
- themis/evaluation/statistics/bootstrap.py +79 -0
- themis/evaluation/statistics/confidence_intervals.py +121 -0
- themis/evaluation/statistics/distributions.py +207 -0
- themis/evaluation/statistics/effect_sizes.py +124 -0
- themis/evaluation/statistics/hypothesis_tests.py +305 -0
- themis/evaluation/statistics/types.py +139 -0
- themis/evaluation/strategies/__init__.py +13 -0
- themis/evaluation/strategies/attempt_aware_evaluation_strategy.py +51 -0
- themis/evaluation/strategies/default_evaluation_strategy.py +25 -0
- themis/evaluation/strategies/evaluation_strategy.py +24 -0
- themis/evaluation/strategies/judge_evaluation_strategy.py +64 -0
- themis/experiment/__init__.py +5 -0
- themis/experiment/builder.py +151 -0
- themis/experiment/cache_manager.py +129 -0
- themis/experiment/comparison.py +631 -0
- themis/experiment/cost.py +310 -0
- themis/experiment/definitions.py +62 -0
- themis/experiment/export.py +690 -0
- themis/experiment/export_csv.py +159 -0
- themis/experiment/integration_manager.py +104 -0
- themis/experiment/math.py +192 -0
- themis/experiment/mcq.py +169 -0
- themis/experiment/orchestrator.py +373 -0
- themis/experiment/pricing.py +317 -0
- themis/experiment/storage.py +255 -0
- themis/experiment/visualization.py +588 -0
- themis/generation/__init__.py +1 -0
- themis/generation/agentic_runner.py +420 -0
- themis/generation/batching.py +254 -0
- themis/generation/clients.py +143 -0
- themis/generation/conversation_runner.py +236 -0
- themis/generation/plan.py +456 -0
- themis/generation/providers/litellm_provider.py +221 -0
- themis/generation/providers/vllm_provider.py +135 -0
- themis/generation/router.py +34 -0
- themis/generation/runner.py +207 -0
- themis/generation/strategies.py +98 -0
- themis/generation/templates.py +71 -0
- themis/generation/turn_strategies.py +393 -0
- themis/generation/types.py +9 -0
- themis/integrations/__init__.py +0 -0
- themis/integrations/huggingface.py +61 -0
- themis/integrations/wandb.py +65 -0
- themis/interfaces/__init__.py +83 -0
- themis/project/__init__.py +20 -0
- themis/project/definitions.py +98 -0
- themis/project/patterns.py +230 -0
- themis/providers/__init__.py +5 -0
- themis/providers/registry.py +39 -0
- themis/utils/api_generator.py +379 -0
- themis/utils/cost_tracking.py +376 -0
- themis/utils/dashboard.py +452 -0
- themis/utils/logging_utils.py +41 -0
- themis/utils/progress.py +58 -0
- themis/utils/tracing.py +320 -0
- {themis_eval-0.1.0.dist-info → themis_eval-0.1.1.dist-info}/METADATA +1 -1
- themis_eval-0.1.1.dist-info/RECORD +134 -0
- themis_eval-0.1.0.dist-info/RECORD +0 -8
- {themis_eval-0.1.0.dist-info → themis_eval-0.1.1.dist-info}/WHEEL +0 -0
- {themis_eval-0.1.0.dist-info → themis_eval-0.1.1.dist-info}/licenses/LICENSE +0 -0
- {themis_eval-0.1.0.dist-info → themis_eval-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
"""API generation utilities for creating REST APIs from Python functions.
|
|
2
|
+
|
|
3
|
+
This module provides tools to automatically generate REST API endpoints from
|
|
4
|
+
Python functions using their docstrings and type hints. It leverages FastAPI
|
|
5
|
+
for automatic OpenAPI schema generation.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
```python
|
|
9
|
+
from themis.utils.api_generator import create_api_from_module
|
|
10
|
+
|
|
11
|
+
# Generate API from a module
|
|
12
|
+
app = create_api_from_module(
|
|
13
|
+
module=themis.evaluation.statistics,
|
|
14
|
+
prefix="/api/v1/statistics"
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Run the API server
|
|
18
|
+
# uvicorn main:app --reload
|
|
19
|
+
```
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import inspect
|
|
25
|
+
from typing import Any, Callable, Dict, List, get_type_hints
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
from fastapi import FastAPI, HTTPException
|
|
29
|
+
from pydantic import BaseModel, create_model
|
|
30
|
+
|
|
31
|
+
FASTAPI_AVAILABLE = True
|
|
32
|
+
except ImportError:
|
|
33
|
+
FASTAPI_AVAILABLE = False
|
|
34
|
+
FastAPI = None
|
|
35
|
+
HTTPException = None
|
|
36
|
+
BaseModel = None
|
|
37
|
+
create_model = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class APIGenerationError(Exception):
|
|
41
|
+
"""Exception raised when API generation fails."""
|
|
42
|
+
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def create_api_from_functions(
|
|
47
|
+
functions: List[Callable],
|
|
48
|
+
title: str = "Auto-Generated API",
|
|
49
|
+
description: str = "API generated from Python functions",
|
|
50
|
+
version: str = "1.0.0",
|
|
51
|
+
prefix: str = "",
|
|
52
|
+
) -> Any:
|
|
53
|
+
"""Create a FastAPI application from a list of functions.
|
|
54
|
+
|
|
55
|
+
This function inspects each function's signature, type hints, and docstring
|
|
56
|
+
to automatically generate REST API endpoints with proper request/response
|
|
57
|
+
validation and OpenAPI documentation.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
functions: List of functions to expose as API endpoints
|
|
61
|
+
title: API title
|
|
62
|
+
description: API description
|
|
63
|
+
version: API version
|
|
64
|
+
prefix: URL prefix for all endpoints (e.g., "/api/v1")
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
FastAPI application instance
|
|
68
|
+
|
|
69
|
+
Raises:
|
|
70
|
+
APIGenerationError: If FastAPI is not installed or function inspection fails
|
|
71
|
+
|
|
72
|
+
Example:
|
|
73
|
+
```python
|
|
74
|
+
from themis.evaluation.statistics import compute_confidence_interval
|
|
75
|
+
|
|
76
|
+
app = create_api_from_functions(
|
|
77
|
+
functions=[compute_confidence_interval],
|
|
78
|
+
title="Statistics API",
|
|
79
|
+
prefix="/api/stats"
|
|
80
|
+
)
|
|
81
|
+
```
|
|
82
|
+
"""
|
|
83
|
+
if not FASTAPI_AVAILABLE:
|
|
84
|
+
raise APIGenerationError(
|
|
85
|
+
"FastAPI is not installed. Install it with: pip install fastapi uvicorn"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
app = FastAPI(title=title, description=description, version=version)
|
|
89
|
+
|
|
90
|
+
for func in functions:
|
|
91
|
+
_register_function_as_endpoint(app, func, prefix)
|
|
92
|
+
|
|
93
|
+
return app
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def create_api_from_module(
|
|
97
|
+
module: Any,
|
|
98
|
+
title: str | None = None,
|
|
99
|
+
description: str | None = None,
|
|
100
|
+
version: str = "1.0.0",
|
|
101
|
+
prefix: str = "",
|
|
102
|
+
include_private: bool = False,
|
|
103
|
+
) -> Any:
|
|
104
|
+
"""Create a FastAPI application from all functions in a module.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
module: Python module containing functions to expose
|
|
108
|
+
title: API title (defaults to module name)
|
|
109
|
+
description: API description (defaults to module docstring)
|
|
110
|
+
version: API version
|
|
111
|
+
prefix: URL prefix for all endpoints
|
|
112
|
+
include_private: Whether to include private functions (starting with _)
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
FastAPI application instance
|
|
116
|
+
|
|
117
|
+
Raises:
|
|
118
|
+
APIGenerationError: If FastAPI is not installed
|
|
119
|
+
|
|
120
|
+
Example:
|
|
121
|
+
```python
|
|
122
|
+
from themis.evaluation import statistics
|
|
123
|
+
|
|
124
|
+
app = create_api_from_module(
|
|
125
|
+
module=statistics,
|
|
126
|
+
prefix="/api/stats"
|
|
127
|
+
)
|
|
128
|
+
```
|
|
129
|
+
"""
|
|
130
|
+
if not FASTAPI_AVAILABLE:
|
|
131
|
+
raise APIGenerationError(
|
|
132
|
+
"FastAPI is not installed. Install it with: pip install fastapi uvicorn"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Extract module metadata
|
|
136
|
+
if title is None:
|
|
137
|
+
title = f"{module.__name__} API"
|
|
138
|
+
|
|
139
|
+
if description is None:
|
|
140
|
+
description = inspect.getdoc(module) or f"API for {module.__name__}"
|
|
141
|
+
|
|
142
|
+
# Find all functions in the module
|
|
143
|
+
functions = []
|
|
144
|
+
for name, obj in inspect.getmembers(module, inspect.isfunction):
|
|
145
|
+
# Skip private functions unless explicitly included
|
|
146
|
+
if not include_private and name.startswith("_"):
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
# Only include functions defined in this module
|
|
150
|
+
if obj.__module__ == module.__name__:
|
|
151
|
+
functions.append(obj)
|
|
152
|
+
|
|
153
|
+
return create_api_from_functions(
|
|
154
|
+
functions=functions,
|
|
155
|
+
title=title,
|
|
156
|
+
description=description,
|
|
157
|
+
version=version,
|
|
158
|
+
prefix=prefix,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _register_function_as_endpoint(
|
|
163
|
+
app: Any,
|
|
164
|
+
func: Callable,
|
|
165
|
+
prefix: str = "",
|
|
166
|
+
) -> None:
|
|
167
|
+
"""Register a single function as a POST endpoint in the FastAPI app.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
app: FastAPI application instance
|
|
171
|
+
func: Function to register
|
|
172
|
+
prefix: URL prefix for the endpoint
|
|
173
|
+
"""
|
|
174
|
+
func_name = func.__name__
|
|
175
|
+
endpoint_path = f"{prefix}/{func_name}".replace("//", "/")
|
|
176
|
+
|
|
177
|
+
# Get function signature and type hints
|
|
178
|
+
sig = inspect.signature(func)
|
|
179
|
+
type_hints = get_type_hints(func)
|
|
180
|
+
|
|
181
|
+
# Extract docstring
|
|
182
|
+
docstring = inspect.getdoc(func) or f"Execute {func_name}"
|
|
183
|
+
|
|
184
|
+
# Parse docstring to extract parameter descriptions
|
|
185
|
+
param_docs = _parse_docstring_params(docstring)
|
|
186
|
+
|
|
187
|
+
# Build Pydantic model for request body
|
|
188
|
+
request_model = _create_request_model(func_name, sig, type_hints, param_docs)
|
|
189
|
+
|
|
190
|
+
# Create endpoint function
|
|
191
|
+
async def endpoint(request: request_model): # type: ignore
|
|
192
|
+
try:
|
|
193
|
+
# Convert request model to dict
|
|
194
|
+
params = request.dict()
|
|
195
|
+
|
|
196
|
+
# Call the original function
|
|
197
|
+
result = func(**params)
|
|
198
|
+
|
|
199
|
+
return {"result": result}
|
|
200
|
+
except Exception as e:
|
|
201
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
202
|
+
|
|
203
|
+
# Set endpoint metadata
|
|
204
|
+
endpoint.__name__ = f"endpoint_{func_name}"
|
|
205
|
+
endpoint.__doc__ = docstring
|
|
206
|
+
|
|
207
|
+
# Register the endpoint
|
|
208
|
+
app.post(
|
|
209
|
+
endpoint_path,
|
|
210
|
+
response_model=Dict[str, Any],
|
|
211
|
+
summary=f"Execute {func_name}",
|
|
212
|
+
description=docstring,
|
|
213
|
+
)(endpoint)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _create_request_model(
|
|
217
|
+
func_name: str,
|
|
218
|
+
sig: inspect.Signature,
|
|
219
|
+
type_hints: Dict[str, type],
|
|
220
|
+
param_docs: Dict[str, str],
|
|
221
|
+
) -> type:
|
|
222
|
+
"""Create a Pydantic model for function parameters.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
func_name: Function name (used for model name)
|
|
226
|
+
sig: Function signature
|
|
227
|
+
type_hints: Type hints dictionary
|
|
228
|
+
param_docs: Parameter documentation from docstring
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Pydantic model class
|
|
232
|
+
"""
|
|
233
|
+
fields = {}
|
|
234
|
+
|
|
235
|
+
for param_name, param in sig.parameters.items():
|
|
236
|
+
# Skip self/cls parameters
|
|
237
|
+
if param_name in ("self", "cls"):
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
# Get type hint or default to Any
|
|
241
|
+
param_type = type_hints.get(param_name, Any)
|
|
242
|
+
|
|
243
|
+
# Get default value
|
|
244
|
+
if param.default is inspect.Parameter.empty:
|
|
245
|
+
default = ... # Required field
|
|
246
|
+
else:
|
|
247
|
+
default = param.default
|
|
248
|
+
|
|
249
|
+
# Get description from docstring
|
|
250
|
+
description = param_docs.get(param_name, "")
|
|
251
|
+
|
|
252
|
+
# Create field with description
|
|
253
|
+
fields[param_name] = (param_type, default)
|
|
254
|
+
|
|
255
|
+
# Create model name
|
|
256
|
+
model_name = f"{func_name.title().replace('_', '')}Request"
|
|
257
|
+
|
|
258
|
+
# Create and return Pydantic model
|
|
259
|
+
return create_model(model_name, **fields)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _parse_docstring_params(docstring: str) -> Dict[str, str]:
|
|
263
|
+
"""Parse parameter descriptions from Google-style docstring.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
docstring: Function docstring
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
Dictionary mapping parameter names to descriptions
|
|
270
|
+
"""
|
|
271
|
+
param_docs = {}
|
|
272
|
+
|
|
273
|
+
if not docstring:
|
|
274
|
+
return param_docs
|
|
275
|
+
|
|
276
|
+
# Look for Args section
|
|
277
|
+
lines = docstring.split("\n")
|
|
278
|
+
in_args_section = False
|
|
279
|
+
current_param = None
|
|
280
|
+
current_desc = []
|
|
281
|
+
|
|
282
|
+
for line in lines:
|
|
283
|
+
stripped = line.strip()
|
|
284
|
+
|
|
285
|
+
# Check if we're entering Args section
|
|
286
|
+
if stripped.lower().startswith("args:"):
|
|
287
|
+
in_args_section = True
|
|
288
|
+
continue
|
|
289
|
+
|
|
290
|
+
# Check if we're leaving Args section
|
|
291
|
+
if in_args_section and stripped and not line.startswith(" "):
|
|
292
|
+
break
|
|
293
|
+
|
|
294
|
+
if in_args_section and stripped:
|
|
295
|
+
# Check if this is a parameter line (has a colon)
|
|
296
|
+
if ":" in stripped and not stripped.startswith(":"):
|
|
297
|
+
# Save previous parameter
|
|
298
|
+
if current_param:
|
|
299
|
+
param_docs[current_param] = " ".join(current_desc).strip()
|
|
300
|
+
|
|
301
|
+
# Parse new parameter
|
|
302
|
+
parts = stripped.split(":", 1)
|
|
303
|
+
current_param = parts[0].strip()
|
|
304
|
+
if len(parts) > 1:
|
|
305
|
+
current_desc = [parts[1].strip()]
|
|
306
|
+
else:
|
|
307
|
+
current_desc = []
|
|
308
|
+
elif current_param:
|
|
309
|
+
# Continue description from previous line
|
|
310
|
+
current_desc.append(stripped)
|
|
311
|
+
|
|
312
|
+
# Save last parameter
|
|
313
|
+
if current_param:
|
|
314
|
+
param_docs[current_param] = " ".join(current_desc).strip()
|
|
315
|
+
|
|
316
|
+
return param_docs
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def generate_api_documentation(
|
|
320
|
+
app: Any,
|
|
321
|
+
output_path: str = "api_docs.md",
|
|
322
|
+
) -> None:
|
|
323
|
+
"""Generate markdown documentation for a FastAPI application.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
app: FastAPI application instance
|
|
327
|
+
output_path: Path to output markdown file
|
|
328
|
+
"""
|
|
329
|
+
if not FASTAPI_AVAILABLE:
|
|
330
|
+
raise APIGenerationError("FastAPI is not installed")
|
|
331
|
+
|
|
332
|
+
lines = [
|
|
333
|
+
f"# {app.title}",
|
|
334
|
+
"",
|
|
335
|
+
app.description,
|
|
336
|
+
"",
|
|
337
|
+
f"**Version:** {app.version}",
|
|
338
|
+
"",
|
|
339
|
+
"## Endpoints",
|
|
340
|
+
"",
|
|
341
|
+
]
|
|
342
|
+
|
|
343
|
+
for route in app.routes:
|
|
344
|
+
if hasattr(route, "methods") and "POST" in route.methods:
|
|
345
|
+
lines.append(f"### `POST {route.path}`")
|
|
346
|
+
lines.append("")
|
|
347
|
+
if route.description:
|
|
348
|
+
lines.append(route.description)
|
|
349
|
+
lines.append("")
|
|
350
|
+
|
|
351
|
+
with open(output_path, "w") as f:
|
|
352
|
+
f.write("\n".join(lines))
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
__all__ = [
|
|
356
|
+
"create_api_from_functions",
|
|
357
|
+
"create_api_from_module",
|
|
358
|
+
"generate_api_documentation",
|
|
359
|
+
"APIGenerationError",
|
|
360
|
+
]
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
# Example usage
|
|
364
|
+
if __name__ == "__main__":
|
|
365
|
+
# Check if FastAPI is available
|
|
366
|
+
if not FASTAPI_AVAILABLE:
|
|
367
|
+
print("FastAPI is not installed. Install with: pip install fastapi uvicorn")
|
|
368
|
+
exit(1)
|
|
369
|
+
|
|
370
|
+
# Example: Create API from evaluation.statistics module
|
|
371
|
+
print("Example: Creating API from functions...")
|
|
372
|
+
print("To use this utility:")
|
|
373
|
+
print("1. Install FastAPI: pip install fastapi uvicorn")
|
|
374
|
+
print("2. Create an API:")
|
|
375
|
+
print(" from themis.utils.api_generator import create_api_from_module")
|
|
376
|
+
print(" from themis.evaluation import statistics")
|
|
377
|
+
print(" app = create_api_from_module(statistics, prefix='/api/stats')")
|
|
378
|
+
print("3. Run the server:")
|
|
379
|
+
print(" uvicorn your_module:app --reload")
|