themis-eval 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. themis/cli/__init__.py +5 -0
  2. themis/cli/__main__.py +6 -0
  3. themis/cli/commands/__init__.py +19 -0
  4. themis/cli/commands/benchmarks.py +221 -0
  5. themis/cli/commands/comparison.py +394 -0
  6. themis/cli/commands/config_commands.py +244 -0
  7. themis/cli/commands/cost.py +214 -0
  8. themis/cli/commands/demo.py +68 -0
  9. themis/cli/commands/info.py +90 -0
  10. themis/cli/commands/leaderboard.py +362 -0
  11. themis/cli/commands/math_benchmarks.py +318 -0
  12. themis/cli/commands/mcq_benchmarks.py +207 -0
  13. themis/cli/commands/sample_run.py +244 -0
  14. themis/cli/commands/visualize.py +299 -0
  15. themis/cli/main.py +93 -0
  16. themis/cli/new_project.py +33 -0
  17. themis/cli/utils.py +51 -0
  18. themis/config/__init__.py +19 -0
  19. themis/config/loader.py +27 -0
  20. themis/config/registry.py +34 -0
  21. themis/config/runtime.py +214 -0
  22. themis/config/schema.py +112 -0
  23. themis/core/__init__.py +5 -0
  24. themis/core/conversation.py +354 -0
  25. themis/core/entities.py +164 -0
  26. themis/core/serialization.py +231 -0
  27. themis/core/tools.py +393 -0
  28. themis/core/types.py +141 -0
  29. themis/datasets/__init__.py +273 -0
  30. themis/datasets/base.py +264 -0
  31. themis/datasets/commonsense_qa.py +174 -0
  32. themis/datasets/competition_math.py +265 -0
  33. themis/datasets/coqa.py +133 -0
  34. themis/datasets/gpqa.py +190 -0
  35. themis/datasets/gsm8k.py +123 -0
  36. themis/datasets/gsm_symbolic.py +124 -0
  37. themis/datasets/math500.py +122 -0
  38. themis/datasets/med_qa.py +179 -0
  39. themis/datasets/medmcqa.py +169 -0
  40. themis/datasets/mmlu_pro.py +262 -0
  41. themis/datasets/piqa.py +146 -0
  42. themis/datasets/registry.py +201 -0
  43. themis/datasets/schema.py +245 -0
  44. themis/datasets/sciq.py +150 -0
  45. themis/datasets/social_i_qa.py +151 -0
  46. themis/datasets/super_gpqa.py +263 -0
  47. themis/evaluation/__init__.py +1 -0
  48. themis/evaluation/conditional.py +410 -0
  49. themis/evaluation/extractors/__init__.py +19 -0
  50. themis/evaluation/extractors/error_taxonomy_extractor.py +80 -0
  51. themis/evaluation/extractors/exceptions.py +7 -0
  52. themis/evaluation/extractors/identity_extractor.py +29 -0
  53. themis/evaluation/extractors/json_field_extractor.py +45 -0
  54. themis/evaluation/extractors/math_verify_extractor.py +37 -0
  55. themis/evaluation/extractors/regex_extractor.py +43 -0
  56. themis/evaluation/math_verify_utils.py +87 -0
  57. themis/evaluation/metrics/__init__.py +21 -0
  58. themis/evaluation/metrics/composite_metric.py +47 -0
  59. themis/evaluation/metrics/consistency_metric.py +80 -0
  60. themis/evaluation/metrics/exact_match.py +51 -0
  61. themis/evaluation/metrics/length_difference_tolerance.py +33 -0
  62. themis/evaluation/metrics/math_verify_accuracy.py +40 -0
  63. themis/evaluation/metrics/pairwise_judge_metric.py +141 -0
  64. themis/evaluation/metrics/response_length.py +33 -0
  65. themis/evaluation/metrics/rubric_judge_metric.py +134 -0
  66. themis/evaluation/pipeline.py +49 -0
  67. themis/evaluation/pipelines/__init__.py +15 -0
  68. themis/evaluation/pipelines/composable_pipeline.py +357 -0
  69. themis/evaluation/pipelines/standard_pipeline.py +288 -0
  70. themis/evaluation/reports.py +293 -0
  71. themis/evaluation/statistics/__init__.py +53 -0
  72. themis/evaluation/statistics/bootstrap.py +79 -0
  73. themis/evaluation/statistics/confidence_intervals.py +121 -0
  74. themis/evaluation/statistics/distributions.py +207 -0
  75. themis/evaluation/statistics/effect_sizes.py +124 -0
  76. themis/evaluation/statistics/hypothesis_tests.py +305 -0
  77. themis/evaluation/statistics/types.py +139 -0
  78. themis/evaluation/strategies/__init__.py +13 -0
  79. themis/evaluation/strategies/attempt_aware_evaluation_strategy.py +51 -0
  80. themis/evaluation/strategies/default_evaluation_strategy.py +25 -0
  81. themis/evaluation/strategies/evaluation_strategy.py +24 -0
  82. themis/evaluation/strategies/judge_evaluation_strategy.py +64 -0
  83. themis/experiment/__init__.py +5 -0
  84. themis/experiment/builder.py +151 -0
  85. themis/experiment/cache_manager.py +129 -0
  86. themis/experiment/comparison.py +631 -0
  87. themis/experiment/cost.py +310 -0
  88. themis/experiment/definitions.py +62 -0
  89. themis/experiment/export.py +690 -0
  90. themis/experiment/export_csv.py +159 -0
  91. themis/experiment/integration_manager.py +104 -0
  92. themis/experiment/math.py +192 -0
  93. themis/experiment/mcq.py +169 -0
  94. themis/experiment/orchestrator.py +373 -0
  95. themis/experiment/pricing.py +317 -0
  96. themis/experiment/storage.py +255 -0
  97. themis/experiment/visualization.py +588 -0
  98. themis/generation/__init__.py +1 -0
  99. themis/generation/agentic_runner.py +420 -0
  100. themis/generation/batching.py +254 -0
  101. themis/generation/clients.py +143 -0
  102. themis/generation/conversation_runner.py +236 -0
  103. themis/generation/plan.py +456 -0
  104. themis/generation/providers/litellm_provider.py +221 -0
  105. themis/generation/providers/vllm_provider.py +135 -0
  106. themis/generation/router.py +34 -0
  107. themis/generation/runner.py +207 -0
  108. themis/generation/strategies.py +98 -0
  109. themis/generation/templates.py +71 -0
  110. themis/generation/turn_strategies.py +393 -0
  111. themis/generation/types.py +9 -0
  112. themis/integrations/__init__.py +0 -0
  113. themis/integrations/huggingface.py +61 -0
  114. themis/integrations/wandb.py +65 -0
  115. themis/interfaces/__init__.py +83 -0
  116. themis/project/__init__.py +20 -0
  117. themis/project/definitions.py +98 -0
  118. themis/project/patterns.py +230 -0
  119. themis/providers/__init__.py +5 -0
  120. themis/providers/registry.py +39 -0
  121. themis/utils/api_generator.py +379 -0
  122. themis/utils/cost_tracking.py +376 -0
  123. themis/utils/dashboard.py +452 -0
  124. themis/utils/logging_utils.py +41 -0
  125. themis/utils/progress.py +58 -0
  126. themis/utils/tracing.py +320 -0
  127. {themis_eval-0.1.0.dist-info → themis_eval-0.1.1.dist-info}/METADATA +1 -1
  128. themis_eval-0.1.1.dist-info/RECORD +134 -0
  129. themis_eval-0.1.0.dist-info/RECORD +0 -8
  130. {themis_eval-0.1.0.dist-info → themis_eval-0.1.1.dist-info}/WHEEL +0 -0
  131. {themis_eval-0.1.0.dist-info → themis_eval-0.1.1.dist-info}/licenses/LICENSE +0 -0
  132. {themis_eval-0.1.0.dist-info → themis_eval-0.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,379 @@
1
+ """API generation utilities for creating REST APIs from Python functions.
2
+
3
+ This module provides tools to automatically generate REST API endpoints from
4
+ Python functions using their docstrings and type hints. It leverages FastAPI
5
+ for automatic OpenAPI schema generation.
6
+
7
+ Example:
8
+ ```python
9
+ from themis.utils.api_generator import create_api_from_module
10
+
11
+ # Generate API from a module
12
+ app = create_api_from_module(
13
+ module=themis.evaluation.statistics,
14
+ prefix="/api/v1/statistics"
15
+ )
16
+
17
+ # Run the API server
18
+ # uvicorn main:app --reload
19
+ ```
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import inspect
25
+ from typing import Any, Callable, Dict, List, get_type_hints
26
+
27
+ try:
28
+ from fastapi import FastAPI, HTTPException
29
+ from pydantic import BaseModel, create_model
30
+
31
+ FASTAPI_AVAILABLE = True
32
+ except ImportError:
33
+ FASTAPI_AVAILABLE = False
34
+ FastAPI = None
35
+ HTTPException = None
36
+ BaseModel = None
37
+ create_model = None
38
+
39
+
40
+ class APIGenerationError(Exception):
41
+ """Exception raised when API generation fails."""
42
+
43
+ pass
44
+
45
+
46
+ def create_api_from_functions(
47
+ functions: List[Callable],
48
+ title: str = "Auto-Generated API",
49
+ description: str = "API generated from Python functions",
50
+ version: str = "1.0.0",
51
+ prefix: str = "",
52
+ ) -> Any:
53
+ """Create a FastAPI application from a list of functions.
54
+
55
+ This function inspects each function's signature, type hints, and docstring
56
+ to automatically generate REST API endpoints with proper request/response
57
+ validation and OpenAPI documentation.
58
+
59
+ Args:
60
+ functions: List of functions to expose as API endpoints
61
+ title: API title
62
+ description: API description
63
+ version: API version
64
+ prefix: URL prefix for all endpoints (e.g., "/api/v1")
65
+
66
+ Returns:
67
+ FastAPI application instance
68
+
69
+ Raises:
70
+ APIGenerationError: If FastAPI is not installed or function inspection fails
71
+
72
+ Example:
73
+ ```python
74
+ from themis.evaluation.statistics import compute_confidence_interval
75
+
76
+ app = create_api_from_functions(
77
+ functions=[compute_confidence_interval],
78
+ title="Statistics API",
79
+ prefix="/api/stats"
80
+ )
81
+ ```
82
+ """
83
+ if not FASTAPI_AVAILABLE:
84
+ raise APIGenerationError(
85
+ "FastAPI is not installed. Install it with: pip install fastapi uvicorn"
86
+ )
87
+
88
+ app = FastAPI(title=title, description=description, version=version)
89
+
90
+ for func in functions:
91
+ _register_function_as_endpoint(app, func, prefix)
92
+
93
+ return app
94
+
95
+
96
+ def create_api_from_module(
97
+ module: Any,
98
+ title: str | None = None,
99
+ description: str | None = None,
100
+ version: str = "1.0.0",
101
+ prefix: str = "",
102
+ include_private: bool = False,
103
+ ) -> Any:
104
+ """Create a FastAPI application from all functions in a module.
105
+
106
+ Args:
107
+ module: Python module containing functions to expose
108
+ title: API title (defaults to module name)
109
+ description: API description (defaults to module docstring)
110
+ version: API version
111
+ prefix: URL prefix for all endpoints
112
+ include_private: Whether to include private functions (starting with _)
113
+
114
+ Returns:
115
+ FastAPI application instance
116
+
117
+ Raises:
118
+ APIGenerationError: If FastAPI is not installed
119
+
120
+ Example:
121
+ ```python
122
+ from themis.evaluation import statistics
123
+
124
+ app = create_api_from_module(
125
+ module=statistics,
126
+ prefix="/api/stats"
127
+ )
128
+ ```
129
+ """
130
+ if not FASTAPI_AVAILABLE:
131
+ raise APIGenerationError(
132
+ "FastAPI is not installed. Install it with: pip install fastapi uvicorn"
133
+ )
134
+
135
+ # Extract module metadata
136
+ if title is None:
137
+ title = f"{module.__name__} API"
138
+
139
+ if description is None:
140
+ description = inspect.getdoc(module) or f"API for {module.__name__}"
141
+
142
+ # Find all functions in the module
143
+ functions = []
144
+ for name, obj in inspect.getmembers(module, inspect.isfunction):
145
+ # Skip private functions unless explicitly included
146
+ if not include_private and name.startswith("_"):
147
+ continue
148
+
149
+ # Only include functions defined in this module
150
+ if obj.__module__ == module.__name__:
151
+ functions.append(obj)
152
+
153
+ return create_api_from_functions(
154
+ functions=functions,
155
+ title=title,
156
+ description=description,
157
+ version=version,
158
+ prefix=prefix,
159
+ )
160
+
161
+
162
+ def _register_function_as_endpoint(
163
+ app: Any,
164
+ func: Callable,
165
+ prefix: str = "",
166
+ ) -> None:
167
+ """Register a single function as a POST endpoint in the FastAPI app.
168
+
169
+ Args:
170
+ app: FastAPI application instance
171
+ func: Function to register
172
+ prefix: URL prefix for the endpoint
173
+ """
174
+ func_name = func.__name__
175
+ endpoint_path = f"{prefix}/{func_name}".replace("//", "/")
176
+
177
+ # Get function signature and type hints
178
+ sig = inspect.signature(func)
179
+ type_hints = get_type_hints(func)
180
+
181
+ # Extract docstring
182
+ docstring = inspect.getdoc(func) or f"Execute {func_name}"
183
+
184
+ # Parse docstring to extract parameter descriptions
185
+ param_docs = _parse_docstring_params(docstring)
186
+
187
+ # Build Pydantic model for request body
188
+ request_model = _create_request_model(func_name, sig, type_hints, param_docs)
189
+
190
+ # Create endpoint function
191
+ async def endpoint(request: request_model): # type: ignore
192
+ try:
193
+ # Convert request model to dict
194
+ params = request.dict()
195
+
196
+ # Call the original function
197
+ result = func(**params)
198
+
199
+ return {"result": result}
200
+ except Exception as e:
201
+ raise HTTPException(status_code=500, detail=str(e))
202
+
203
+ # Set endpoint metadata
204
+ endpoint.__name__ = f"endpoint_{func_name}"
205
+ endpoint.__doc__ = docstring
206
+
207
+ # Register the endpoint
208
+ app.post(
209
+ endpoint_path,
210
+ response_model=Dict[str, Any],
211
+ summary=f"Execute {func_name}",
212
+ description=docstring,
213
+ )(endpoint)
214
+
215
+
216
+ def _create_request_model(
217
+ func_name: str,
218
+ sig: inspect.Signature,
219
+ type_hints: Dict[str, type],
220
+ param_docs: Dict[str, str],
221
+ ) -> type:
222
+ """Create a Pydantic model for function parameters.
223
+
224
+ Args:
225
+ func_name: Function name (used for model name)
226
+ sig: Function signature
227
+ type_hints: Type hints dictionary
228
+ param_docs: Parameter documentation from docstring
229
+
230
+ Returns:
231
+ Pydantic model class
232
+ """
233
+ fields = {}
234
+
235
+ for param_name, param in sig.parameters.items():
236
+ # Skip self/cls parameters
237
+ if param_name in ("self", "cls"):
238
+ continue
239
+
240
+ # Get type hint or default to Any
241
+ param_type = type_hints.get(param_name, Any)
242
+
243
+ # Get default value
244
+ if param.default is inspect.Parameter.empty:
245
+ default = ... # Required field
246
+ else:
247
+ default = param.default
248
+
249
+ # Get description from docstring
250
+ description = param_docs.get(param_name, "")
251
+
252
+ # Create field with description
253
+ fields[param_name] = (param_type, default)
254
+
255
+ # Create model name
256
+ model_name = f"{func_name.title().replace('_', '')}Request"
257
+
258
+ # Create and return Pydantic model
259
+ return create_model(model_name, **fields)
260
+
261
+
262
+ def _parse_docstring_params(docstring: str) -> Dict[str, str]:
263
+ """Parse parameter descriptions from Google-style docstring.
264
+
265
+ Args:
266
+ docstring: Function docstring
267
+
268
+ Returns:
269
+ Dictionary mapping parameter names to descriptions
270
+ """
271
+ param_docs = {}
272
+
273
+ if not docstring:
274
+ return param_docs
275
+
276
+ # Look for Args section
277
+ lines = docstring.split("\n")
278
+ in_args_section = False
279
+ current_param = None
280
+ current_desc = []
281
+
282
+ for line in lines:
283
+ stripped = line.strip()
284
+
285
+ # Check if we're entering Args section
286
+ if stripped.lower().startswith("args:"):
287
+ in_args_section = True
288
+ continue
289
+
290
+ # Check if we're leaving Args section
291
+ if in_args_section and stripped and not line.startswith(" "):
292
+ break
293
+
294
+ if in_args_section and stripped:
295
+ # Check if this is a parameter line (has a colon)
296
+ if ":" in stripped and not stripped.startswith(":"):
297
+ # Save previous parameter
298
+ if current_param:
299
+ param_docs[current_param] = " ".join(current_desc).strip()
300
+
301
+ # Parse new parameter
302
+ parts = stripped.split(":", 1)
303
+ current_param = parts[0].strip()
304
+ if len(parts) > 1:
305
+ current_desc = [parts[1].strip()]
306
+ else:
307
+ current_desc = []
308
+ elif current_param:
309
+ # Continue description from previous line
310
+ current_desc.append(stripped)
311
+
312
+ # Save last parameter
313
+ if current_param:
314
+ param_docs[current_param] = " ".join(current_desc).strip()
315
+
316
+ return param_docs
317
+
318
+
319
+ def generate_api_documentation(
320
+ app: Any,
321
+ output_path: str = "api_docs.md",
322
+ ) -> None:
323
+ """Generate markdown documentation for a FastAPI application.
324
+
325
+ Args:
326
+ app: FastAPI application instance
327
+ output_path: Path to output markdown file
328
+ """
329
+ if not FASTAPI_AVAILABLE:
330
+ raise APIGenerationError("FastAPI is not installed")
331
+
332
+ lines = [
333
+ f"# {app.title}",
334
+ "",
335
+ app.description,
336
+ "",
337
+ f"**Version:** {app.version}",
338
+ "",
339
+ "## Endpoints",
340
+ "",
341
+ ]
342
+
343
+ for route in app.routes:
344
+ if hasattr(route, "methods") and "POST" in route.methods:
345
+ lines.append(f"### `POST {route.path}`")
346
+ lines.append("")
347
+ if route.description:
348
+ lines.append(route.description)
349
+ lines.append("")
350
+
351
+ with open(output_path, "w") as f:
352
+ f.write("\n".join(lines))
353
+
354
+
355
+ __all__ = [
356
+ "create_api_from_functions",
357
+ "create_api_from_module",
358
+ "generate_api_documentation",
359
+ "APIGenerationError",
360
+ ]
361
+
362
+
363
+ # Example usage
364
+ if __name__ == "__main__":
365
+ # Check if FastAPI is available
366
+ if not FASTAPI_AVAILABLE:
367
+ print("FastAPI is not installed. Install with: pip install fastapi uvicorn")
368
+ exit(1)
369
+
370
+ # Example: Create API from evaluation.statistics module
371
+ print("Example: Creating API from functions...")
372
+ print("To use this utility:")
373
+ print("1. Install FastAPI: pip install fastapi uvicorn")
374
+ print("2. Create an API:")
375
+ print(" from themis.utils.api_generator import create_api_from_module")
376
+ print(" from themis.evaluation import statistics")
377
+ print(" app = create_api_from_module(statistics, prefix='/api/stats')")
378
+ print("3. Run the server:")
379
+ print(" uvicorn your_module:app --reload")