themis-eval 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. themis/__init__.py +12 -1
  2. themis/_version.py +2 -2
  3. themis/api.py +343 -0
  4. themis/backends/__init__.py +17 -0
  5. themis/backends/execution.py +197 -0
  6. themis/backends/storage.py +260 -0
  7. themis/cli/commands/results.py +252 -0
  8. themis/cli/main.py +427 -57
  9. themis/comparison/__init__.py +25 -0
  10. themis/comparison/engine.py +348 -0
  11. themis/comparison/reports.py +283 -0
  12. themis/comparison/statistics.py +402 -0
  13. themis/core/entities.py +23 -3
  14. themis/evaluation/metrics/code/__init__.py +19 -0
  15. themis/evaluation/metrics/code/codebleu.py +144 -0
  16. themis/evaluation/metrics/code/execution.py +280 -0
  17. themis/evaluation/metrics/code/pass_at_k.py +181 -0
  18. themis/evaluation/metrics/nlp/__init__.py +21 -0
  19. themis/evaluation/metrics/nlp/bertscore.py +138 -0
  20. themis/evaluation/metrics/nlp/bleu.py +129 -0
  21. themis/evaluation/metrics/nlp/meteor.py +153 -0
  22. themis/evaluation/metrics/nlp/rouge.py +136 -0
  23. themis/evaluation/pipelines/standard_pipeline.py +68 -8
  24. themis/experiment/cache_manager.py +8 -3
  25. themis/experiment/export.py +110 -2
  26. themis/experiment/orchestrator.py +48 -6
  27. themis/experiment/storage.py +1313 -110
  28. themis/integrations/huggingface.py +12 -1
  29. themis/integrations/wandb.py +13 -1
  30. themis/interfaces/__init__.py +86 -0
  31. themis/presets/__init__.py +10 -0
  32. themis/presets/benchmarks.py +354 -0
  33. themis/presets/models.py +190 -0
  34. themis/server/__init__.py +28 -0
  35. themis/server/app.py +337 -0
  36. themis_eval-0.2.0.dist-info/METADATA +596 -0
  37. {themis_eval-0.1.1.dist-info → themis_eval-0.2.0.dist-info}/RECORD +40 -17
  38. {themis_eval-0.1.1.dist-info → themis_eval-0.2.0.dist-info}/WHEEL +1 -1
  39. themis_eval-0.1.1.dist-info/METADATA +0 -758
  40. {themis_eval-0.1.1.dist-info → themis_eval-0.2.0.dist-info}/licenses/LICENSE +0 -0
  41. {themis_eval-0.1.1.dist-info → themis_eval-0.2.0.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,13 @@
1
- themis/__init__.py,sha256=3bKi1PneI5PaTaDPXsArCVvfinkLFDRU91lvZIeg7V0,281
2
- themis/_version.py,sha256=tc4TJqWVv2dx4UzItLqneMPaG7vM8CQFDNW5pJgNoKg,345
1
+ themis/__init__.py,sha256=Pswn5ZiXyU5ANoknjdBLkqouZQdeWMm3DoUMVzU_j8M,543
2
+ themis/_version.py,sha256=xRJB6N107oMsasuLYKaoIzuBo5Oe2hlK3-lGyTzxAC8,378
3
+ themis/api.py,sha256=myHeMaWQMnyjCUAlr9P6cX2Awt50q1XGtyKDCimJgCg,12077
3
4
  themis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ themis/backends/__init__.py,sha256=RWM5SnV5FrS_cVjpHHeZZM_b9CgqBu1rPS5DlT5YQTY,578
6
+ themis/backends/execution.py,sha256=RAFuB9ri8TMil5PcnsisypKO2ViyLFXj08P_vjNYguU,6095
7
+ themis/backends/storage.py,sha256=pQp20WagSCl8Vmd-Rgx0hDbpYFhCqARXtvGDw3DPgNQ,8021
4
8
  themis/cli/__init__.py,sha256=An2DrMHRfmiee5BYJ6TGqvbG7sXWECjjyvEgcoGJ7cE,99
5
9
  themis/cli/__main__.py,sha256=df2pOghoSuq18hZmVVikmGhaFSaRe-jeDOnrsu-1QDM,135
6
- themis/cli/main.py,sha256=NSayD8zwH5NJjC_qaTwqhDR7_8PO3I2n1dPX-fBYa-A,3131
10
+ themis/cli/main.py,sha256=AGBFxb1sPLQ-aUAq8RM3YI6gGNs6SdFmBzVSqwp_MSg,15482
7
11
  themis/cli/new_project.py,sha256=D8asV4QbjgQNYvmXt_WhK4nPM-wKHe_K0VJiBdgtO_E,1121
8
12
  themis/cli/utils.py,sha256=NAPyFiXspfpx5vBxA8aEcOMmWEDyt-R8ywoHo_8Nr4A,1307
9
13
  themis/cli/commands/__init__.py,sha256=CTx7su3qTtq96qxLNclDsE6UM_86NhaS01M9-x9wFiw,287
@@ -16,8 +20,13 @@ themis/cli/commands/info.py,sha256=9maOaw-TFiBpuVhaqlMKukGuZ_zgESetqbMQ1Qdvjxs,2
16
20
  themis/cli/commands/leaderboard.py,sha256=AVvsYIwZAY18jn3sOq3QD45yNtfdHUEl7eixM4aMCKw,10615
17
21
  themis/cli/commands/math_benchmarks.py,sha256=nQ4TcPB7T9O3piAy4_TgrOQOQxh2Q8OyBreK_HoPCeQ,9946
18
22
  themis/cli/commands/mcq_benchmarks.py,sha256=Cls5W1jGd7TKizmw07CnZWY5N6ywR8VhJ6jKDnY_cRk,7026
23
+ themis/cli/commands/results.py,sha256=rdN3SaMoFnSfAoAXlfpeCTt3V6MwIp0Dk7FIjvPNF7s,7774
19
24
  themis/cli/commands/sample_run.py,sha256=r3Ymg5dVHg4IAVJvzoP0ZWUWWUE4Dia1t0062Yhdk9Q,9445
20
25
  themis/cli/commands/visualize.py,sha256=ZECkB0NjIltuOeBE-Q1JnndZEMXVzc8KgcrbaP-GSXo,9740
26
+ themis/comparison/__init__.py,sha256=bRI8gDlcjMtnH77R7N5ARioq_V4daJcWWM4DXKsoE1k,679
27
+ themis/comparison/engine.py,sha256=UkzXKmEFI2JiX0y8534oc6JFySxgA5v1emzRcGj33Kk,12133
28
+ themis/comparison/reports.py,sha256=126VJbd-lxj8C2YJqul53Fyr-nrZgmbrBsRA6Qkh0ro,10117
29
+ themis/comparison/statistics.py,sha256=eLqKUtKFwSvXnbZax8S0lF8RiSepwYdhnmnDD7DcrZs,12929
21
30
  themis/config/__init__.py,sha256=YMdFG1iLvOQUnSPlc_ZJVn5zCCTbIozML64b4qUtGR8,476
22
31
  themis/config/loader.py,sha256=t_wcIDwekuy3EaLprQgWILSKH2h5lFkF7quvNfAHddM,746
23
32
  themis/config/registry.py,sha256=sSrL1mTjUG86s30o-dhuRInY4YeumEICtmxdjS-PiiQ,1055
@@ -25,7 +34,7 @@ themis/config/runtime.py,sha256=hU69_oND7fJfAOIBJONENmsuf7Y8roO7n-w9OwxzoT8,7475
25
34
  themis/config/schema.py,sha256=SMR9QHp8OBkSnb1dHyOgg-IJWSqpXfyAqywnBeMy46M,3196
26
35
  themis/core/__init__.py,sha256=S8G1x-39sZ3_NQ5DJ6R1yBTWXp_gO0WxOtVjeB9sTwY,113
27
36
  themis/core/conversation.py,sha256=wwO8RS4t4plDR0Sf1KjYv_ejonlvKe0ZwAD-4sfGak8,10155
28
- themis/core/entities.py,sha256=S1Kw3qrx0vWWdOmskZnu3GuKGOGf0WDpHBazmwhEnmM,3770
37
+ themis/core/entities.py,sha256=WV9kiYdZFGxn6oH0lPtqaViY4I8oq-hWi_SmCKjvRnc,4449
29
38
  themis/core/serialization.py,sha256=cxfoSKwcZiNsnR8g_SAJAq1ZLrfLXM4S9_rVEDUT8qs,7071
30
39
  themis/core/tools.py,sha256=v0_ctsBCtinZGNC_I4C-h0GUPNM5ZeTi7z-U4iCtyp4,11035
31
40
  themis/core/types.py,sha256=I5rr9MMS0irX4lo-xlqGjosx-FjPgT64RzQAraM223A,3652
@@ -68,9 +77,18 @@ themis/evaluation/metrics/math_verify_accuracy.py,sha256=YhBhpONLmouLELfpcjNHiVS
68
77
  themis/evaluation/metrics/pairwise_judge_metric.py,sha256=DEYKwt3smzXiSUhDV4lWxDFXWoHz-JMg3z5bMjlLPKo,4890
69
78
  themis/evaluation/metrics/response_length.py,sha256=Xn2PQi4pMLhC_3bMmSbLEf-QVFOrMNm2ZJr0PiCDH-E,910
70
79
  themis/evaluation/metrics/rubric_judge_metric.py,sha256=KSSqwpMHaXCK6krbb_A93nppZ_0xk6Or30u7csnw7rM,4796
80
+ themis/evaluation/metrics/code/__init__.py,sha256=meZYPwDZVdZhW4jVW-52kOkZaC6ItyGfEhRVX7jIkXg,599
81
+ themis/evaluation/metrics/code/codebleu.py,sha256=joZJH1VOTBmKhqW1YBvizribqO5rilqsDmvslEdB2as,4826
82
+ themis/evaluation/metrics/code/execution.py,sha256=ACvWuG3Fc4bWuISLeKmWajibZeDDZx1Le-shQgFDsc0,8543
83
+ themis/evaluation/metrics/code/pass_at_k.py,sha256=X4V0bK8uG9dh4vovW0GafzHctRQ-3bH28aFTI8FE9NE,5649
84
+ themis/evaluation/metrics/nlp/__init__.py,sha256=cop5o1tmMv21dNRrDyxrz17iRH9f4vIaKQZNzii4W7k,699
85
+ themis/evaluation/metrics/nlp/bertscore.py,sha256=czlIqYkOTBWsfHiE6U1vkq1KHRQm8pvUnQgTb-Fte1s,4807
86
+ themis/evaluation/metrics/nlp/bleu.py,sha256=o_aVkoFPSMmeOLYaHRMamIpSKlYSxrMA1OdntTIUe9g,4436
87
+ themis/evaluation/metrics/nlp/meteor.py,sha256=QZT09s4aiUcVvDJDVPZYjzi5SxXdS2gn2IaOTNmKp78,5076
88
+ themis/evaluation/metrics/nlp/rouge.py,sha256=YL05qluF-KsesHYFRfm5zELJlcvo6RvaKp7xKy6BuLI,4365
71
89
  themis/evaluation/pipelines/__init__.py,sha256=5YI1xaUULHisctFxrumN4XRpWYneoonX7nd9zBtsjvQ,384
72
90
  themis/evaluation/pipelines/composable_pipeline.py,sha256=nNP9MSvQQJvaSBw5_gO3FeyhGm9So2ZlGqh5qSvE8Ac,10905
73
- themis/evaluation/pipelines/standard_pipeline.py,sha256=5ub_7zfQVj9YbDQxYTdxNIVijlMktR0rrfmN0GSndnI,11610
91
+ themis/evaluation/pipelines/standard_pipeline.py,sha256=nDd_bkqAVQxgwG9RK6G_fsgqwZth3058uG3p4QM0Dck,14650
74
92
  themis/evaluation/statistics/__init__.py,sha256=TTrScTLAW7EHNq0nbjuJs6iP3_HgDx1yy3EtYXx5JCk,1257
75
93
  themis/evaluation/statistics/bootstrap.py,sha256=JUQ8rtzFvW2e41I2pLJ7pqgSEjuJ1r6McyYLI42At9g,2409
76
94
  themis/evaluation/statistics/confidence_intervals.py,sha256=CN5EO2gWiSITQubuWuPryngnGXhGwczY9kO3mcG6JVc,3676
@@ -85,18 +103,18 @@ themis/evaluation/strategies/evaluation_strategy.py,sha256=YFF-bXkz4Z52GuCw52Fck
85
103
  themis/evaluation/strategies/judge_evaluation_strategy.py,sha256=58pDB30y1VpM_1KPB6sGS0JImGZk5WTgnK9CKDF8N5k,2304
86
104
  themis/experiment/__init__.py,sha256=dGranqpESugmmfbQlTU9efwspazW6j3vcmAKEtAoWZk,182
87
105
  themis/experiment/builder.py,sha256=AEjCDeSOI2B0i0PBjkfY1GUDNrYGTGiqPvt0SxnDQFo,5618
88
- themis/experiment/cache_manager.py,sha256=KlSMPy5CICX3XmqnYwg6hP0TB4zzJu2nb-nLcZAFzy0,4012
106
+ themis/experiment/cache_manager.py,sha256=Fd8Qxifrmyn8f2zjAyPrLv-ZU4Dcp-MKo8-09BoW7tY,4361
89
107
  themis/experiment/comparison.py,sha256=Mr1L5Zj7i87xk9XUQ_UueLTsC-sDZH8YGwLwg_gG0VI,21562
90
108
  themis/experiment/cost.py,sha256=flhENfB5WKvyNWwPMDtygNZAv6y_yv4RoClsRz714Hc,10159
91
109
  themis/experiment/definitions.py,sha256=oOZBFfEQkSBiZd9CMutCQ5luH6oeUT9yAZFd7fpVjnw,2015
92
- themis/experiment/export.py,sha256=wsUICG9XG6HET2OBdQisHPXVv5pjF3HWCvcpv1A9igM,26012
110
+ themis/experiment/export.py,sha256=ujwiSvqQhLaO99WHyE8osdnmriHjyIM1C2zKf5o93Cw,29800
93
111
  themis/experiment/export_csv.py,sha256=80w3gEGjeLjuiNq539rRP73k3MBtwrzJy90hgE91AKw,6030
94
112
  themis/experiment/integration_manager.py,sha256=wTVTjDGcUkzz4tfnwSxa5nK1A4e2FKCPazDYGcdzYS8,3325
95
113
  themis/experiment/math.py,sha256=P2E9F_UKI7pb-aXepSztGdr_g309WEMe83zqg1nWO7A,6973
96
114
  themis/experiment/mcq.py,sha256=DDB99FHQsU_5vMIRDRhSZ7pReYvVf57wLmmo3OU_An4,6276
97
- themis/experiment/orchestrator.py,sha256=D2ANvg2s4Dyo0ridZ3alDVl1dTW4kazLoMeG0knQ6-M,14244
115
+ themis/experiment/orchestrator.py,sha256=-6epspKnPoAJQPKzoNAxd54MrEX3lIhrKyqQ9dmD00A,16120
98
116
  themis/experiment/pricing.py,sha256=fTM32yE3L8vahMP4sr1zr7dbp9zYCjiPN4D4VuZ8-q8,9346
99
- themis/experiment/storage.py,sha256=58tSwHn3J36UMMCWblkbs00ZjCyIqLkoR2Vib9c-zgE,9156
117
+ themis/experiment/storage.py,sha256=QS3fJD79bzgodM5x79yJ2A69O5hTL2r2ROAKSvtRnkI,49471
100
118
  themis/experiment/visualization.py,sha256=dJYHrp3mntl8CPc5HPI3iKqPztVsddQB3ogRkd_FCNc,18473
101
119
  themis/generation/__init__.py,sha256=6KVwCQYMpPIsXNuWDZOGuqHkUkA45lbSacIFn8ZbD4s,36
102
120
  themis/generation/agentic_runner.py,sha256=armBQBk7qZDBEwT8HqjIWomYDQm57NfrP5CZJzay2uA,13669
@@ -113,22 +131,27 @@ themis/generation/types.py,sha256=MkJnZk6lMHmHzlJVEsuIC9ioRW8XhWcSk9AdDeb_aLE,33
113
131
  themis/generation/providers/litellm_provider.py,sha256=rlTuglIwhcvSakCo5G-ffgQtEHbCEX0ZeKk6M1MaWmU,8155
114
132
  themis/generation/providers/vllm_provider.py,sha256=0K4we6xDrRXlBXseC1ixLq2sJpRF4T8Ikv45dw-zNk4,4625
115
133
  themis/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
- themis/integrations/huggingface.py,sha256=oUkLVyacmukDwHrMUVayHJ4fWnVCA77ZyuEkC7aogmY,2032
117
- themis/integrations/wandb.py,sha256=VVVReLM-qjw4DyLVJcoaSH1rSTMNf3ReOKMxOFwJKu4,2247
118
- themis/interfaces/__init__.py,sha256=OSHATlp1fNBJrVq0lNM85MGjWmSj0NKtnLnp3fmYn1k,2330
134
+ themis/integrations/huggingface.py,sha256=vrLwYwn65pU4W3FUe0ImCOZxKKlpRshDqMoLFsclB3E,2370
135
+ themis/integrations/wandb.py,sha256=LJOPojjlqG05EIPxcjy3QmA15Gxgs1db3encDWVzYYw,2545
136
+ themis/interfaces/__init__.py,sha256=78dNE_eHfFmb9hXNy5sLZ1jOTGWS8TzdVE_eiYQPFVc,5967
137
+ themis/presets/__init__.py,sha256=hkoyODYiWFFSQAIKTpEbAIUuFIwTibBhzTOkiTbzhVQ,411
138
+ themis/presets/benchmarks.py,sha256=s9JxRogHwZs8oiuiI7Z7uiUBZXEp3gg7AQZnBvdGieA,12026
139
+ themis/presets/models.py,sha256=c6-I_drHa4vMLIajSkCcrFbsJOsauFjY8fU1leBxZLg,5173
119
140
  themis/project/__init__.py,sha256=vgLv2nS62yz1XsFSFzFf7eIo6FyQJXpOY9OPRUcTQLQ,465
120
141
  themis/project/definitions.py,sha256=vHARw0IjFOWE4RL4mGRwvke36A6GWQGep6cQFIRcpJg,3329
121
142
  themis/project/patterns.py,sha256=2J51Q9Jq7X-2N57uexvR191gaZKwusef5vIuIVUQY-E,7743
122
143
  themis/providers/__init__.py,sha256=K5nG0DsK_YPY0cT9MBLk5BLcLbBo0wBP0vQvLjpAw_Y,189
123
144
  themis/providers/registry.py,sha256=Za5Kg3-A-35wS_jiGpPXV2q1k6he_dRIWVqt36dKN-4,1056
145
+ themis/server/__init__.py,sha256=Hp0qGI5nvO4bhLAez3jQxim7H433l72EYE2IA8Xp2hA,731
146
+ themis/server/app.py,sha256=OZ39gCC47AXVqZxroC_4KtIYBYx_rfpde7C25AF3EI0,11166
124
147
  themis/utils/api_generator.py,sha256=3oQ7mGZlFx2Dpm45pMg3rNIqNK2Smj05PjOMXp5RIkQ,10776
125
148
  themis/utils/cost_tracking.py,sha256=9_Z2iTfNaQse9G_bnqn4hme4T0fG2W-fxOLEDeF_3VI,11545
126
149
  themis/utils/dashboard.py,sha256=2yiIu9_oENglTde_J3G1d5cpQ5VtSnfbUvdliw5Og1E,13008
127
150
  themis/utils/logging_utils.py,sha256=YNSiDfO4LsciSzUhHF1aTVI5rkfnWiVbn1NcGjjmJuQ,1019
128
151
  themis/utils/progress.py,sha256=b3YwHKV5x3Cvr5rBukqifJimK3Si4CGY2fpN6a_ZySI,1434
129
152
  themis/utils/tracing.py,sha256=VTeiRjcW_B5fOOoSeAp37nrmlwP1DiqPcoe6OtIQ7dk,8468
130
- themis_eval-0.1.1.dist-info/licenses/LICENSE,sha256=K5FLE7iqn5-_6k1sf3IGy7w-Wx_Vdx3t0sOVJByNlF0,1076
131
- themis_eval-0.1.1.dist-info/METADATA,sha256=7-oAglt5HH_AWi7yCzvTq_RUE07xTIVmZG02IcxHRjM,23516
132
- themis_eval-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
133
- themis_eval-0.1.1.dist-info/top_level.txt,sha256=QGIl4v-KB32upFS5UTXMJxHVX3vF7yBso82wJFI1Vbs,7
134
- themis_eval-0.1.1.dist-info/RECORD,,
153
+ themis_eval-0.2.0.dist-info/licenses/LICENSE,sha256=K5FLE7iqn5-_6k1sf3IGy7w-Wx_Vdx3t0sOVJByNlF0,1076
154
+ themis_eval-0.2.0.dist-info/METADATA,sha256=S4dy0AD2REsRtPfULUYMiYC2Zk8nWgz4BWjBBJz2gHU,15173
155
+ themis_eval-0.2.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
156
+ themis_eval-0.2.0.dist-info/top_level.txt,sha256=QGIl4v-KB32upFS5UTXMJxHVX3vF7yBso82wJFI1Vbs,7
157
+ themis_eval-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5