themis-eval 0.1.1__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- themis/__init__.py +12 -1
- themis/_version.py +2 -2
- themis/api.py +343 -0
- themis/backends/__init__.py +17 -0
- themis/backends/execution.py +197 -0
- themis/backends/storage.py +260 -0
- themis/cli/commands/results.py +252 -0
- themis/cli/main.py +427 -57
- themis/comparison/__init__.py +25 -0
- themis/comparison/engine.py +348 -0
- themis/comparison/reports.py +283 -0
- themis/comparison/statistics.py +402 -0
- themis/core/entities.py +23 -3
- themis/evaluation/metrics/code/__init__.py +19 -0
- themis/evaluation/metrics/code/codebleu.py +144 -0
- themis/evaluation/metrics/code/execution.py +280 -0
- themis/evaluation/metrics/code/pass_at_k.py +181 -0
- themis/evaluation/metrics/nlp/__init__.py +21 -0
- themis/evaluation/metrics/nlp/bertscore.py +138 -0
- themis/evaluation/metrics/nlp/bleu.py +129 -0
- themis/evaluation/metrics/nlp/meteor.py +153 -0
- themis/evaluation/metrics/nlp/rouge.py +136 -0
- themis/evaluation/pipelines/standard_pipeline.py +68 -8
- themis/experiment/cache_manager.py +8 -3
- themis/experiment/export.py +110 -2
- themis/experiment/orchestrator.py +48 -6
- themis/experiment/storage.py +1313 -110
- themis/integrations/huggingface.py +12 -1
- themis/integrations/wandb.py +13 -1
- themis/interfaces/__init__.py +86 -0
- themis/presets/__init__.py +10 -0
- themis/presets/benchmarks.py +354 -0
- themis/presets/models.py +190 -0
- themis/server/__init__.py +28 -0
- themis/server/app.py +337 -0
- themis_eval-0.2.0.dist-info/METADATA +596 -0
- {themis_eval-0.1.1.dist-info → themis_eval-0.2.0.dist-info}/RECORD +40 -17
- {themis_eval-0.1.1.dist-info → themis_eval-0.2.0.dist-info}/WHEEL +1 -1
- themis_eval-0.1.1.dist-info/METADATA +0 -758
- {themis_eval-0.1.1.dist-info → themis_eval-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {themis_eval-0.1.1.dist-info → themis_eval-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,13 @@
|
|
|
1
|
-
themis/__init__.py,sha256=
|
|
2
|
-
themis/_version.py,sha256=
|
|
1
|
+
themis/__init__.py,sha256=Pswn5ZiXyU5ANoknjdBLkqouZQdeWMm3DoUMVzU_j8M,543
|
|
2
|
+
themis/_version.py,sha256=xRJB6N107oMsasuLYKaoIzuBo5Oe2hlK3-lGyTzxAC8,378
|
|
3
|
+
themis/api.py,sha256=myHeMaWQMnyjCUAlr9P6cX2Awt50q1XGtyKDCimJgCg,12077
|
|
3
4
|
themis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
themis/backends/__init__.py,sha256=RWM5SnV5FrS_cVjpHHeZZM_b9CgqBu1rPS5DlT5YQTY,578
|
|
6
|
+
themis/backends/execution.py,sha256=RAFuB9ri8TMil5PcnsisypKO2ViyLFXj08P_vjNYguU,6095
|
|
7
|
+
themis/backends/storage.py,sha256=pQp20WagSCl8Vmd-Rgx0hDbpYFhCqARXtvGDw3DPgNQ,8021
|
|
4
8
|
themis/cli/__init__.py,sha256=An2DrMHRfmiee5BYJ6TGqvbG7sXWECjjyvEgcoGJ7cE,99
|
|
5
9
|
themis/cli/__main__.py,sha256=df2pOghoSuq18hZmVVikmGhaFSaRe-jeDOnrsu-1QDM,135
|
|
6
|
-
themis/cli/main.py,sha256=
|
|
10
|
+
themis/cli/main.py,sha256=AGBFxb1sPLQ-aUAq8RM3YI6gGNs6SdFmBzVSqwp_MSg,15482
|
|
7
11
|
themis/cli/new_project.py,sha256=D8asV4QbjgQNYvmXt_WhK4nPM-wKHe_K0VJiBdgtO_E,1121
|
|
8
12
|
themis/cli/utils.py,sha256=NAPyFiXspfpx5vBxA8aEcOMmWEDyt-R8ywoHo_8Nr4A,1307
|
|
9
13
|
themis/cli/commands/__init__.py,sha256=CTx7su3qTtq96qxLNclDsE6UM_86NhaS01M9-x9wFiw,287
|
|
@@ -16,8 +20,13 @@ themis/cli/commands/info.py,sha256=9maOaw-TFiBpuVhaqlMKukGuZ_zgESetqbMQ1Qdvjxs,2
|
|
|
16
20
|
themis/cli/commands/leaderboard.py,sha256=AVvsYIwZAY18jn3sOq3QD45yNtfdHUEl7eixM4aMCKw,10615
|
|
17
21
|
themis/cli/commands/math_benchmarks.py,sha256=nQ4TcPB7T9O3piAy4_TgrOQOQxh2Q8OyBreK_HoPCeQ,9946
|
|
18
22
|
themis/cli/commands/mcq_benchmarks.py,sha256=Cls5W1jGd7TKizmw07CnZWY5N6ywR8VhJ6jKDnY_cRk,7026
|
|
23
|
+
themis/cli/commands/results.py,sha256=rdN3SaMoFnSfAoAXlfpeCTt3V6MwIp0Dk7FIjvPNF7s,7774
|
|
19
24
|
themis/cli/commands/sample_run.py,sha256=r3Ymg5dVHg4IAVJvzoP0ZWUWWUE4Dia1t0062Yhdk9Q,9445
|
|
20
25
|
themis/cli/commands/visualize.py,sha256=ZECkB0NjIltuOeBE-Q1JnndZEMXVzc8KgcrbaP-GSXo,9740
|
|
26
|
+
themis/comparison/__init__.py,sha256=bRI8gDlcjMtnH77R7N5ARioq_V4daJcWWM4DXKsoE1k,679
|
|
27
|
+
themis/comparison/engine.py,sha256=UkzXKmEFI2JiX0y8534oc6JFySxgA5v1emzRcGj33Kk,12133
|
|
28
|
+
themis/comparison/reports.py,sha256=126VJbd-lxj8C2YJqul53Fyr-nrZgmbrBsRA6Qkh0ro,10117
|
|
29
|
+
themis/comparison/statistics.py,sha256=eLqKUtKFwSvXnbZax8S0lF8RiSepwYdhnmnDD7DcrZs,12929
|
|
21
30
|
themis/config/__init__.py,sha256=YMdFG1iLvOQUnSPlc_ZJVn5zCCTbIozML64b4qUtGR8,476
|
|
22
31
|
themis/config/loader.py,sha256=t_wcIDwekuy3EaLprQgWILSKH2h5lFkF7quvNfAHddM,746
|
|
23
32
|
themis/config/registry.py,sha256=sSrL1mTjUG86s30o-dhuRInY4YeumEICtmxdjS-PiiQ,1055
|
|
@@ -25,7 +34,7 @@ themis/config/runtime.py,sha256=hU69_oND7fJfAOIBJONENmsuf7Y8roO7n-w9OwxzoT8,7475
|
|
|
25
34
|
themis/config/schema.py,sha256=SMR9QHp8OBkSnb1dHyOgg-IJWSqpXfyAqywnBeMy46M,3196
|
|
26
35
|
themis/core/__init__.py,sha256=S8G1x-39sZ3_NQ5DJ6R1yBTWXp_gO0WxOtVjeB9sTwY,113
|
|
27
36
|
themis/core/conversation.py,sha256=wwO8RS4t4plDR0Sf1KjYv_ejonlvKe0ZwAD-4sfGak8,10155
|
|
28
|
-
themis/core/entities.py,sha256=
|
|
37
|
+
themis/core/entities.py,sha256=WV9kiYdZFGxn6oH0lPtqaViY4I8oq-hWi_SmCKjvRnc,4449
|
|
29
38
|
themis/core/serialization.py,sha256=cxfoSKwcZiNsnR8g_SAJAq1ZLrfLXM4S9_rVEDUT8qs,7071
|
|
30
39
|
themis/core/tools.py,sha256=v0_ctsBCtinZGNC_I4C-h0GUPNM5ZeTi7z-U4iCtyp4,11035
|
|
31
40
|
themis/core/types.py,sha256=I5rr9MMS0irX4lo-xlqGjosx-FjPgT64RzQAraM223A,3652
|
|
@@ -68,9 +77,18 @@ themis/evaluation/metrics/math_verify_accuracy.py,sha256=YhBhpONLmouLELfpcjNHiVS
|
|
|
68
77
|
themis/evaluation/metrics/pairwise_judge_metric.py,sha256=DEYKwt3smzXiSUhDV4lWxDFXWoHz-JMg3z5bMjlLPKo,4890
|
|
69
78
|
themis/evaluation/metrics/response_length.py,sha256=Xn2PQi4pMLhC_3bMmSbLEf-QVFOrMNm2ZJr0PiCDH-E,910
|
|
70
79
|
themis/evaluation/metrics/rubric_judge_metric.py,sha256=KSSqwpMHaXCK6krbb_A93nppZ_0xk6Or30u7csnw7rM,4796
|
|
80
|
+
themis/evaluation/metrics/code/__init__.py,sha256=meZYPwDZVdZhW4jVW-52kOkZaC6ItyGfEhRVX7jIkXg,599
|
|
81
|
+
themis/evaluation/metrics/code/codebleu.py,sha256=joZJH1VOTBmKhqW1YBvizribqO5rilqsDmvslEdB2as,4826
|
|
82
|
+
themis/evaluation/metrics/code/execution.py,sha256=ACvWuG3Fc4bWuISLeKmWajibZeDDZx1Le-shQgFDsc0,8543
|
|
83
|
+
themis/evaluation/metrics/code/pass_at_k.py,sha256=X4V0bK8uG9dh4vovW0GafzHctRQ-3bH28aFTI8FE9NE,5649
|
|
84
|
+
themis/evaluation/metrics/nlp/__init__.py,sha256=cop5o1tmMv21dNRrDyxrz17iRH9f4vIaKQZNzii4W7k,699
|
|
85
|
+
themis/evaluation/metrics/nlp/bertscore.py,sha256=czlIqYkOTBWsfHiE6U1vkq1KHRQm8pvUnQgTb-Fte1s,4807
|
|
86
|
+
themis/evaluation/metrics/nlp/bleu.py,sha256=o_aVkoFPSMmeOLYaHRMamIpSKlYSxrMA1OdntTIUe9g,4436
|
|
87
|
+
themis/evaluation/metrics/nlp/meteor.py,sha256=QZT09s4aiUcVvDJDVPZYjzi5SxXdS2gn2IaOTNmKp78,5076
|
|
88
|
+
themis/evaluation/metrics/nlp/rouge.py,sha256=YL05qluF-KsesHYFRfm5zELJlcvo6RvaKp7xKy6BuLI,4365
|
|
71
89
|
themis/evaluation/pipelines/__init__.py,sha256=5YI1xaUULHisctFxrumN4XRpWYneoonX7nd9zBtsjvQ,384
|
|
72
90
|
themis/evaluation/pipelines/composable_pipeline.py,sha256=nNP9MSvQQJvaSBw5_gO3FeyhGm9So2ZlGqh5qSvE8Ac,10905
|
|
73
|
-
themis/evaluation/pipelines/standard_pipeline.py,sha256=
|
|
91
|
+
themis/evaluation/pipelines/standard_pipeline.py,sha256=nDd_bkqAVQxgwG9RK6G_fsgqwZth3058uG3p4QM0Dck,14650
|
|
74
92
|
themis/evaluation/statistics/__init__.py,sha256=TTrScTLAW7EHNq0nbjuJs6iP3_HgDx1yy3EtYXx5JCk,1257
|
|
75
93
|
themis/evaluation/statistics/bootstrap.py,sha256=JUQ8rtzFvW2e41I2pLJ7pqgSEjuJ1r6McyYLI42At9g,2409
|
|
76
94
|
themis/evaluation/statistics/confidence_intervals.py,sha256=CN5EO2gWiSITQubuWuPryngnGXhGwczY9kO3mcG6JVc,3676
|
|
@@ -85,18 +103,18 @@ themis/evaluation/strategies/evaluation_strategy.py,sha256=YFF-bXkz4Z52GuCw52Fck
|
|
|
85
103
|
themis/evaluation/strategies/judge_evaluation_strategy.py,sha256=58pDB30y1VpM_1KPB6sGS0JImGZk5WTgnK9CKDF8N5k,2304
|
|
86
104
|
themis/experiment/__init__.py,sha256=dGranqpESugmmfbQlTU9efwspazW6j3vcmAKEtAoWZk,182
|
|
87
105
|
themis/experiment/builder.py,sha256=AEjCDeSOI2B0i0PBjkfY1GUDNrYGTGiqPvt0SxnDQFo,5618
|
|
88
|
-
themis/experiment/cache_manager.py,sha256=
|
|
106
|
+
themis/experiment/cache_manager.py,sha256=Fd8Qxifrmyn8f2zjAyPrLv-ZU4Dcp-MKo8-09BoW7tY,4361
|
|
89
107
|
themis/experiment/comparison.py,sha256=Mr1L5Zj7i87xk9XUQ_UueLTsC-sDZH8YGwLwg_gG0VI,21562
|
|
90
108
|
themis/experiment/cost.py,sha256=flhENfB5WKvyNWwPMDtygNZAv6y_yv4RoClsRz714Hc,10159
|
|
91
109
|
themis/experiment/definitions.py,sha256=oOZBFfEQkSBiZd9CMutCQ5luH6oeUT9yAZFd7fpVjnw,2015
|
|
92
|
-
themis/experiment/export.py,sha256=
|
|
110
|
+
themis/experiment/export.py,sha256=ujwiSvqQhLaO99WHyE8osdnmriHjyIM1C2zKf5o93Cw,29800
|
|
93
111
|
themis/experiment/export_csv.py,sha256=80w3gEGjeLjuiNq539rRP73k3MBtwrzJy90hgE91AKw,6030
|
|
94
112
|
themis/experiment/integration_manager.py,sha256=wTVTjDGcUkzz4tfnwSxa5nK1A4e2FKCPazDYGcdzYS8,3325
|
|
95
113
|
themis/experiment/math.py,sha256=P2E9F_UKI7pb-aXepSztGdr_g309WEMe83zqg1nWO7A,6973
|
|
96
114
|
themis/experiment/mcq.py,sha256=DDB99FHQsU_5vMIRDRhSZ7pReYvVf57wLmmo3OU_An4,6276
|
|
97
|
-
themis/experiment/orchestrator.py,sha256
|
|
115
|
+
themis/experiment/orchestrator.py,sha256=-6epspKnPoAJQPKzoNAxd54MrEX3lIhrKyqQ9dmD00A,16120
|
|
98
116
|
themis/experiment/pricing.py,sha256=fTM32yE3L8vahMP4sr1zr7dbp9zYCjiPN4D4VuZ8-q8,9346
|
|
99
|
-
themis/experiment/storage.py,sha256=
|
|
117
|
+
themis/experiment/storage.py,sha256=QS3fJD79bzgodM5x79yJ2A69O5hTL2r2ROAKSvtRnkI,49471
|
|
100
118
|
themis/experiment/visualization.py,sha256=dJYHrp3mntl8CPc5HPI3iKqPztVsddQB3ogRkd_FCNc,18473
|
|
101
119
|
themis/generation/__init__.py,sha256=6KVwCQYMpPIsXNuWDZOGuqHkUkA45lbSacIFn8ZbD4s,36
|
|
102
120
|
themis/generation/agentic_runner.py,sha256=armBQBk7qZDBEwT8HqjIWomYDQm57NfrP5CZJzay2uA,13669
|
|
@@ -113,22 +131,27 @@ themis/generation/types.py,sha256=MkJnZk6lMHmHzlJVEsuIC9ioRW8XhWcSk9AdDeb_aLE,33
|
|
|
113
131
|
themis/generation/providers/litellm_provider.py,sha256=rlTuglIwhcvSakCo5G-ffgQtEHbCEX0ZeKk6M1MaWmU,8155
|
|
114
132
|
themis/generation/providers/vllm_provider.py,sha256=0K4we6xDrRXlBXseC1ixLq2sJpRF4T8Ikv45dw-zNk4,4625
|
|
115
133
|
themis/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
116
|
-
themis/integrations/huggingface.py,sha256=
|
|
117
|
-
themis/integrations/wandb.py,sha256=
|
|
118
|
-
themis/interfaces/__init__.py,sha256=
|
|
134
|
+
themis/integrations/huggingface.py,sha256=vrLwYwn65pU4W3FUe0ImCOZxKKlpRshDqMoLFsclB3E,2370
|
|
135
|
+
themis/integrations/wandb.py,sha256=LJOPojjlqG05EIPxcjy3QmA15Gxgs1db3encDWVzYYw,2545
|
|
136
|
+
themis/interfaces/__init__.py,sha256=78dNE_eHfFmb9hXNy5sLZ1jOTGWS8TzdVE_eiYQPFVc,5967
|
|
137
|
+
themis/presets/__init__.py,sha256=hkoyODYiWFFSQAIKTpEbAIUuFIwTibBhzTOkiTbzhVQ,411
|
|
138
|
+
themis/presets/benchmarks.py,sha256=s9JxRogHwZs8oiuiI7Z7uiUBZXEp3gg7AQZnBvdGieA,12026
|
|
139
|
+
themis/presets/models.py,sha256=c6-I_drHa4vMLIajSkCcrFbsJOsauFjY8fU1leBxZLg,5173
|
|
119
140
|
themis/project/__init__.py,sha256=vgLv2nS62yz1XsFSFzFf7eIo6FyQJXpOY9OPRUcTQLQ,465
|
|
120
141
|
themis/project/definitions.py,sha256=vHARw0IjFOWE4RL4mGRwvke36A6GWQGep6cQFIRcpJg,3329
|
|
121
142
|
themis/project/patterns.py,sha256=2J51Q9Jq7X-2N57uexvR191gaZKwusef5vIuIVUQY-E,7743
|
|
122
143
|
themis/providers/__init__.py,sha256=K5nG0DsK_YPY0cT9MBLk5BLcLbBo0wBP0vQvLjpAw_Y,189
|
|
123
144
|
themis/providers/registry.py,sha256=Za5Kg3-A-35wS_jiGpPXV2q1k6he_dRIWVqt36dKN-4,1056
|
|
145
|
+
themis/server/__init__.py,sha256=Hp0qGI5nvO4bhLAez3jQxim7H433l72EYE2IA8Xp2hA,731
|
|
146
|
+
themis/server/app.py,sha256=OZ39gCC47AXVqZxroC_4KtIYBYx_rfpde7C25AF3EI0,11166
|
|
124
147
|
themis/utils/api_generator.py,sha256=3oQ7mGZlFx2Dpm45pMg3rNIqNK2Smj05PjOMXp5RIkQ,10776
|
|
125
148
|
themis/utils/cost_tracking.py,sha256=9_Z2iTfNaQse9G_bnqn4hme4T0fG2W-fxOLEDeF_3VI,11545
|
|
126
149
|
themis/utils/dashboard.py,sha256=2yiIu9_oENglTde_J3G1d5cpQ5VtSnfbUvdliw5Og1E,13008
|
|
127
150
|
themis/utils/logging_utils.py,sha256=YNSiDfO4LsciSzUhHF1aTVI5rkfnWiVbn1NcGjjmJuQ,1019
|
|
128
151
|
themis/utils/progress.py,sha256=b3YwHKV5x3Cvr5rBukqifJimK3Si4CGY2fpN6a_ZySI,1434
|
|
129
152
|
themis/utils/tracing.py,sha256=VTeiRjcW_B5fOOoSeAp37nrmlwP1DiqPcoe6OtIQ7dk,8468
|
|
130
|
-
themis_eval-0.
|
|
131
|
-
themis_eval-0.
|
|
132
|
-
themis_eval-0.
|
|
133
|
-
themis_eval-0.
|
|
134
|
-
themis_eval-0.
|
|
153
|
+
themis_eval-0.2.0.dist-info/licenses/LICENSE,sha256=K5FLE7iqn5-_6k1sf3IGy7w-Wx_Vdx3t0sOVJByNlF0,1076
|
|
154
|
+
themis_eval-0.2.0.dist-info/METADATA,sha256=S4dy0AD2REsRtPfULUYMiYC2Zk8nWgz4BWjBBJz2gHU,15173
|
|
155
|
+
themis_eval-0.2.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
156
|
+
themis_eval-0.2.0.dist-info/top_level.txt,sha256=QGIl4v-KB32upFS5UTXMJxHVX3vF7yBso82wJFI1Vbs,7
|
|
157
|
+
themis_eval-0.2.0.dist-info/RECORD,,
|