themis-eval 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. themis/__init__.py +12 -1
  2. themis/_version.py +2 -2
  3. themis/api.py +343 -0
  4. themis/backends/__init__.py +17 -0
  5. themis/backends/execution.py +197 -0
  6. themis/backends/storage.py +260 -0
  7. themis/cli/__init__.py +5 -0
  8. themis/cli/__main__.py +6 -0
  9. themis/cli/commands/__init__.py +19 -0
  10. themis/cli/commands/benchmarks.py +221 -0
  11. themis/cli/commands/comparison.py +394 -0
  12. themis/cli/commands/config_commands.py +244 -0
  13. themis/cli/commands/cost.py +214 -0
  14. themis/cli/commands/demo.py +68 -0
  15. themis/cli/commands/info.py +90 -0
  16. themis/cli/commands/leaderboard.py +362 -0
  17. themis/cli/commands/math_benchmarks.py +318 -0
  18. themis/cli/commands/mcq_benchmarks.py +207 -0
  19. themis/cli/commands/results.py +252 -0
  20. themis/cli/commands/sample_run.py +244 -0
  21. themis/cli/commands/visualize.py +299 -0
  22. themis/cli/main.py +463 -0
  23. themis/cli/new_project.py +33 -0
  24. themis/cli/utils.py +51 -0
  25. themis/comparison/__init__.py +25 -0
  26. themis/comparison/engine.py +348 -0
  27. themis/comparison/reports.py +283 -0
  28. themis/comparison/statistics.py +402 -0
  29. themis/config/__init__.py +19 -0
  30. themis/config/loader.py +27 -0
  31. themis/config/registry.py +34 -0
  32. themis/config/runtime.py +214 -0
  33. themis/config/schema.py +112 -0
  34. themis/core/__init__.py +5 -0
  35. themis/core/conversation.py +354 -0
  36. themis/core/entities.py +184 -0
  37. themis/core/serialization.py +231 -0
  38. themis/core/tools.py +393 -0
  39. themis/core/types.py +141 -0
  40. themis/datasets/__init__.py +273 -0
  41. themis/datasets/base.py +264 -0
  42. themis/datasets/commonsense_qa.py +174 -0
  43. themis/datasets/competition_math.py +265 -0
  44. themis/datasets/coqa.py +133 -0
  45. themis/datasets/gpqa.py +190 -0
  46. themis/datasets/gsm8k.py +123 -0
  47. themis/datasets/gsm_symbolic.py +124 -0
  48. themis/datasets/math500.py +122 -0
  49. themis/datasets/med_qa.py +179 -0
  50. themis/datasets/medmcqa.py +169 -0
  51. themis/datasets/mmlu_pro.py +262 -0
  52. themis/datasets/piqa.py +146 -0
  53. themis/datasets/registry.py +201 -0
  54. themis/datasets/schema.py +245 -0
  55. themis/datasets/sciq.py +150 -0
  56. themis/datasets/social_i_qa.py +151 -0
  57. themis/datasets/super_gpqa.py +263 -0
  58. themis/evaluation/__init__.py +1 -0
  59. themis/evaluation/conditional.py +410 -0
  60. themis/evaluation/extractors/__init__.py +19 -0
  61. themis/evaluation/extractors/error_taxonomy_extractor.py +80 -0
  62. themis/evaluation/extractors/exceptions.py +7 -0
  63. themis/evaluation/extractors/identity_extractor.py +29 -0
  64. themis/evaluation/extractors/json_field_extractor.py +45 -0
  65. themis/evaluation/extractors/math_verify_extractor.py +37 -0
  66. themis/evaluation/extractors/regex_extractor.py +43 -0
  67. themis/evaluation/math_verify_utils.py +87 -0
  68. themis/evaluation/metrics/__init__.py +21 -0
  69. themis/evaluation/metrics/code/__init__.py +19 -0
  70. themis/evaluation/metrics/code/codebleu.py +144 -0
  71. themis/evaluation/metrics/code/execution.py +280 -0
  72. themis/evaluation/metrics/code/pass_at_k.py +181 -0
  73. themis/evaluation/metrics/composite_metric.py +47 -0
  74. themis/evaluation/metrics/consistency_metric.py +80 -0
  75. themis/evaluation/metrics/exact_match.py +51 -0
  76. themis/evaluation/metrics/length_difference_tolerance.py +33 -0
  77. themis/evaluation/metrics/math_verify_accuracy.py +40 -0
  78. themis/evaluation/metrics/nlp/__init__.py +21 -0
  79. themis/evaluation/metrics/nlp/bertscore.py +138 -0
  80. themis/evaluation/metrics/nlp/bleu.py +129 -0
  81. themis/evaluation/metrics/nlp/meteor.py +153 -0
  82. themis/evaluation/metrics/nlp/rouge.py +136 -0
  83. themis/evaluation/metrics/pairwise_judge_metric.py +141 -0
  84. themis/evaluation/metrics/response_length.py +33 -0
  85. themis/evaluation/metrics/rubric_judge_metric.py +134 -0
  86. themis/evaluation/pipeline.py +49 -0
  87. themis/evaluation/pipelines/__init__.py +15 -0
  88. themis/evaluation/pipelines/composable_pipeline.py +357 -0
  89. themis/evaluation/pipelines/standard_pipeline.py +348 -0
  90. themis/evaluation/reports.py +293 -0
  91. themis/evaluation/statistics/__init__.py +53 -0
  92. themis/evaluation/statistics/bootstrap.py +79 -0
  93. themis/evaluation/statistics/confidence_intervals.py +121 -0
  94. themis/evaluation/statistics/distributions.py +207 -0
  95. themis/evaluation/statistics/effect_sizes.py +124 -0
  96. themis/evaluation/statistics/hypothesis_tests.py +305 -0
  97. themis/evaluation/statistics/types.py +139 -0
  98. themis/evaluation/strategies/__init__.py +13 -0
  99. themis/evaluation/strategies/attempt_aware_evaluation_strategy.py +51 -0
  100. themis/evaluation/strategies/default_evaluation_strategy.py +25 -0
  101. themis/evaluation/strategies/evaluation_strategy.py +24 -0
  102. themis/evaluation/strategies/judge_evaluation_strategy.py +64 -0
  103. themis/experiment/__init__.py +5 -0
  104. themis/experiment/builder.py +151 -0
  105. themis/experiment/cache_manager.py +134 -0
  106. themis/experiment/comparison.py +631 -0
  107. themis/experiment/cost.py +310 -0
  108. themis/experiment/definitions.py +62 -0
  109. themis/experiment/export.py +798 -0
  110. themis/experiment/export_csv.py +159 -0
  111. themis/experiment/integration_manager.py +104 -0
  112. themis/experiment/math.py +192 -0
  113. themis/experiment/mcq.py +169 -0
  114. themis/experiment/orchestrator.py +415 -0
  115. themis/experiment/pricing.py +317 -0
  116. themis/experiment/storage.py +1458 -0
  117. themis/experiment/visualization.py +588 -0
  118. themis/generation/__init__.py +1 -0
  119. themis/generation/agentic_runner.py +420 -0
  120. themis/generation/batching.py +254 -0
  121. themis/generation/clients.py +143 -0
  122. themis/generation/conversation_runner.py +236 -0
  123. themis/generation/plan.py +456 -0
  124. themis/generation/providers/litellm_provider.py +221 -0
  125. themis/generation/providers/vllm_provider.py +135 -0
  126. themis/generation/router.py +34 -0
  127. themis/generation/runner.py +207 -0
  128. themis/generation/strategies.py +98 -0
  129. themis/generation/templates.py +71 -0
  130. themis/generation/turn_strategies.py +393 -0
  131. themis/generation/types.py +9 -0
  132. themis/integrations/__init__.py +0 -0
  133. themis/integrations/huggingface.py +72 -0
  134. themis/integrations/wandb.py +77 -0
  135. themis/interfaces/__init__.py +169 -0
  136. themis/presets/__init__.py +10 -0
  137. themis/presets/benchmarks.py +354 -0
  138. themis/presets/models.py +190 -0
  139. themis/project/__init__.py +20 -0
  140. themis/project/definitions.py +98 -0
  141. themis/project/patterns.py +230 -0
  142. themis/providers/__init__.py +5 -0
  143. themis/providers/registry.py +39 -0
  144. themis/server/__init__.py +28 -0
  145. themis/server/app.py +337 -0
  146. themis/utils/api_generator.py +379 -0
  147. themis/utils/cost_tracking.py +376 -0
  148. themis/utils/dashboard.py +452 -0
  149. themis/utils/logging_utils.py +41 -0
  150. themis/utils/progress.py +58 -0
  151. themis/utils/tracing.py +320 -0
  152. themis_eval-0.2.0.dist-info/METADATA +596 -0
  153. themis_eval-0.2.0.dist-info/RECORD +157 -0
  154. {themis_eval-0.1.0.dist-info → themis_eval-0.2.0.dist-info}/WHEEL +1 -1
  155. themis_eval-0.1.0.dist-info/METADATA +0 -758
  156. themis_eval-0.1.0.dist-info/RECORD +0 -8
  157. {themis_eval-0.1.0.dist-info → themis_eval-0.2.0.dist-info}/licenses/LICENSE +0 -0
  158. {themis_eval-0.1.0.dist-info → themis_eval-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,157 @@
1
+ themis/__init__.py,sha256=Pswn5ZiXyU5ANoknjdBLkqouZQdeWMm3DoUMVzU_j8M,543
2
+ themis/_version.py,sha256=xRJB6N107oMsasuLYKaoIzuBo5Oe2hlK3-lGyTzxAC8,378
3
+ themis/api.py,sha256=myHeMaWQMnyjCUAlr9P6cX2Awt50q1XGtyKDCimJgCg,12077
4
+ themis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ themis/backends/__init__.py,sha256=RWM5SnV5FrS_cVjpHHeZZM_b9CgqBu1rPS5DlT5YQTY,578
6
+ themis/backends/execution.py,sha256=RAFuB9ri8TMil5PcnsisypKO2ViyLFXj08P_vjNYguU,6095
7
+ themis/backends/storage.py,sha256=pQp20WagSCl8Vmd-Rgx0hDbpYFhCqARXtvGDw3DPgNQ,8021
8
+ themis/cli/__init__.py,sha256=An2DrMHRfmiee5BYJ6TGqvbG7sXWECjjyvEgcoGJ7cE,99
9
+ themis/cli/__main__.py,sha256=df2pOghoSuq18hZmVVikmGhaFSaRe-jeDOnrsu-1QDM,135
10
+ themis/cli/main.py,sha256=AGBFxb1sPLQ-aUAq8RM3YI6gGNs6SdFmBzVSqwp_MSg,15482
11
+ themis/cli/new_project.py,sha256=D8asV4QbjgQNYvmXt_WhK4nPM-wKHe_K0VJiBdgtO_E,1121
12
+ themis/cli/utils.py,sha256=NAPyFiXspfpx5vBxA8aEcOMmWEDyt-R8ywoHo_8Nr4A,1307
13
+ themis/cli/commands/__init__.py,sha256=CTx7su3qTtq96qxLNclDsE6UM_86NhaS01M9-x9wFiw,287
14
+ themis/cli/commands/benchmarks.py,sha256=HjCfmhu1FYUEGlaxUZGIZs0I-2gNG4tn_kF29NWPGcc,7885
15
+ themis/cli/commands/comparison.py,sha256=Ki1_MMFFR4vBJkZTeIMWLh-_zdjbtJZurI3YyrEs4vw,12364
16
+ themis/cli/commands/config_commands.py,sha256=eL6GtdIllOIHo8GbNN2jOqLn5VUPBuqhnro9ooPxDog,7387
17
+ themis/cli/commands/cost.py,sha256=fFdF6hKIzsbPsyrJ1nt6-2m43PpVGUj8jx5T90tBTNo,7233
18
+ themis/cli/commands/demo.py,sha256=akQqjG-hbUDfeB3bI8K4F5-S0ibJqhflGBFQ5nvdUgE,2135
19
+ themis/cli/commands/info.py,sha256=9maOaw-TFiBpuVhaqlMKukGuZ_zgESetqbMQ1Qdvjxs,2515
20
+ themis/cli/commands/leaderboard.py,sha256=AVvsYIwZAY18jn3sOq3QD45yNtfdHUEl7eixM4aMCKw,10615
21
+ themis/cli/commands/math_benchmarks.py,sha256=nQ4TcPB7T9O3piAy4_TgrOQOQxh2Q8OyBreK_HoPCeQ,9946
22
+ themis/cli/commands/mcq_benchmarks.py,sha256=Cls5W1jGd7TKizmw07CnZWY5N6ywR8VhJ6jKDnY_cRk,7026
23
+ themis/cli/commands/results.py,sha256=rdN3SaMoFnSfAoAXlfpeCTt3V6MwIp0Dk7FIjvPNF7s,7774
24
+ themis/cli/commands/sample_run.py,sha256=r3Ymg5dVHg4IAVJvzoP0ZWUWWUE4Dia1t0062Yhdk9Q,9445
25
+ themis/cli/commands/visualize.py,sha256=ZECkB0NjIltuOeBE-Q1JnndZEMXVzc8KgcrbaP-GSXo,9740
26
+ themis/comparison/__init__.py,sha256=bRI8gDlcjMtnH77R7N5ARioq_V4daJcWWM4DXKsoE1k,679
27
+ themis/comparison/engine.py,sha256=UkzXKmEFI2JiX0y8534oc6JFySxgA5v1emzRcGj33Kk,12133
28
+ themis/comparison/reports.py,sha256=126VJbd-lxj8C2YJqul53Fyr-nrZgmbrBsRA6Qkh0ro,10117
29
+ themis/comparison/statistics.py,sha256=eLqKUtKFwSvXnbZax8S0lF8RiSepwYdhnmnDD7DcrZs,12929
30
+ themis/config/__init__.py,sha256=YMdFG1iLvOQUnSPlc_ZJVn5zCCTbIozML64b4qUtGR8,476
31
+ themis/config/loader.py,sha256=t_wcIDwekuy3EaLprQgWILSKH2h5lFkF7quvNfAHddM,746
32
+ themis/config/registry.py,sha256=sSrL1mTjUG86s30o-dhuRInY4YeumEICtmxdjS-PiiQ,1055
33
+ themis/config/runtime.py,sha256=hU69_oND7fJfAOIBJONENmsuf7Y8roO7n-w9OwxzoT8,7475
34
+ themis/config/schema.py,sha256=SMR9QHp8OBkSnb1dHyOgg-IJWSqpXfyAqywnBeMy46M,3196
35
+ themis/core/__init__.py,sha256=S8G1x-39sZ3_NQ5DJ6R1yBTWXp_gO0WxOtVjeB9sTwY,113
36
+ themis/core/conversation.py,sha256=wwO8RS4t4plDR0Sf1KjYv_ejonlvKe0ZwAD-4sfGak8,10155
37
+ themis/core/entities.py,sha256=WV9kiYdZFGxn6oH0lPtqaViY4I8oq-hWi_SmCKjvRnc,4449
38
+ themis/core/serialization.py,sha256=cxfoSKwcZiNsnR8g_SAJAq1ZLrfLXM4S9_rVEDUT8qs,7071
39
+ themis/core/tools.py,sha256=v0_ctsBCtinZGNC_I4C-h0GUPNM5ZeTi7z-U4iCtyp4,11035
40
+ themis/core/types.py,sha256=I5rr9MMS0irX4lo-xlqGjosx-FjPgT64RzQAraM223A,3652
41
+ themis/datasets/__init__.py,sha256=r1FobxkALtaXuphz1wU6LSbXhDezpmh6AUvoT-Hv1As,9013
42
+ themis/datasets/base.py,sha256=HtgIIiCgaog9aRnjZ4kJV3Ta2Dhl-_wrD8PB2lvK810,8417
43
+ themis/datasets/commonsense_qa.py,sha256=1eT_SaehbHC8F1HOJDpMi4RnzsKznGuJrdoL8RLuHu4,5842
44
+ themis/datasets/competition_math.py,sha256=XWsKMAPICb66RMXZNA8AvtUJwWQav3HVDuvAby8QjZU,7346
45
+ themis/datasets/coqa.py,sha256=bsh1AUYtagYIa7d-eM30JftGBQ7_0xUZq9dW9Pkz3Ks,4575
46
+ themis/datasets/gpqa.py,sha256=VQEDbDywDGhRE69M-mXwY5AxUO5WQw2RF6ND7hOlv50,7110
47
+ themis/datasets/gsm8k.py,sha256=wYbCACnF560RTAJTU-SMHqVvwzj8QT0bbzfQGTuRKl0,3887
48
+ themis/datasets/gsm_symbolic.py,sha256=PeWRBJIt8zEcsWIIj4xw5iT2drBotv8jB3DrHkN-PDw,4055
49
+ themis/datasets/math500.py,sha256=F1xBSadcCdnKWs-WKoFVHBeOt1tu6Rb6U2Yi4Ien578,3840
50
+ themis/datasets/med_qa.py,sha256=Z6FET8UrDsQnsbuvPoVlO1thJ6jeMcmSnqUvnHY0GtI,6019
51
+ themis/datasets/medmcqa.py,sha256=9MjkhjIqNaP_sslPnid4GHiqaGjXZkWGvwLJGaUeHzg,5532
52
+ themis/datasets/mmlu_pro.py,sha256=51cZGLRe1BVuXhQ5vFzIk4TCq4p8iYUow9E9hSSNFAE,8384
53
+ themis/datasets/piqa.py,sha256=YV-c56ZUrvIqS0wpBq4LdC683hDqEJaZCfhh4Bn5HwU,4420
54
+ themis/datasets/registry.py,sha256=GCgfdr7dp0LpDiK0DLhOFg8tS18m1Rx6t7bOJlh10rU,5530
55
+ themis/datasets/schema.py,sha256=9nfM0ygdtvh1PPNLLz1OF2P8Z4sjTVk7gT5GIffJsf0,7100
56
+ themis/datasets/sciq.py,sha256=VXxO2_cDPhs08AnUBtT5NE6aKvnGMeNa8FXdQZqas2U,4677
57
+ themis/datasets/social_i_qa.py,sha256=y2uT2mWwo7ArZwjEUSo5841oITDcFII77OCjNVTkwwk,4765
58
+ themis/datasets/super_gpqa.py,sha256=Mr1ag_FyAk1haxg6_ONX5F84wQYtbSVjV-MlMNmaHlI,8452
59
+ themis/evaluation/__init__.py,sha256=2Jl8tcVxYAsmHNAZev2mPS_mEwZcRzebqSM3QDc2cyY,36
60
+ themis/evaluation/conditional.py,sha256=ayndI7FcwxdIMR8B4ddgcKZd5Jl5NQcBJUp7eXI6Djk,13881
61
+ themis/evaluation/math_verify_utils.py,sha256=vXMvL11-IH16UHZ-mbi_r5hOFz7aUfR1J1laa6qmLMk,2213
62
+ themis/evaluation/pipeline.py,sha256=OOowN59UdOMF2Hwy_G4ky5yzR4ajAnohil6xteWBHqE,1503
63
+ themis/evaluation/reports.py,sha256=9om7jzZUtmlMH7EeteXp_98gfHct4x09AyTFy3FSAdQ,8715
64
+ themis/evaluation/extractors/__init__.py,sha256=BanoC_8e0iam-VU7l7uhvhac_6w_JJZYoYE4xXPUrGk,566
65
+ themis/evaluation/extractors/error_taxonomy_extractor.py,sha256=RrRx-23l3LwTdG89kvSQJng438cfYI-IdtOGUD6gEDw,2462
66
+ themis/evaluation/extractors/exceptions.py,sha256=lI4HOU98FEUwqhBq2uEzY0ym0evK85O5V3t6ULoANtM,167
67
+ themis/evaluation/extractors/identity_extractor.py,sha256=bBRgcry94AtUrchzBdHK-a9fnWAQT0eJDaPXjBmgBis,675
68
+ themis/evaluation/extractors/json_field_extractor.py,sha256=5_ndU7NBw9CkfP8oXmeue3wkGZZauryvFt_GVjO7X44,1232
69
+ themis/evaluation/extractors/math_verify_extractor.py,sha256=f8B_IeexS2QHCf-UUhqfk7xpqWcm8SgobLQ7GXYrM58,1155
70
+ themis/evaluation/extractors/regex_extractor.py,sha256=xNo8YRy7JOnF9ohbK_xUHcbLGMUmE1qooUlS0ucrw44,1148
71
+ themis/evaluation/metrics/__init__.py,sha256=HrrTgEwdCmXiib0ohUOArpUC0qHii2A_mgPX0m4LRoU,652
72
+ themis/evaluation/metrics/composite_metric.py,sha256=nF4FXMWwEMt1Gfq4NhwEuNVD28d97cgV30kFgt1LdEg,1474
73
+ themis/evaluation/metrics/consistency_metric.py,sha256=ITu4qFwgVmEiRD_tuVOhe5B9qtdmRZI3Pj-p0NlXizE,2404
74
+ themis/evaluation/metrics/exact_match.py,sha256=bAH3QCddbD7s0Mp3-4VGvzwnOv7MB8nexwcFpO0yJuo,1596
75
+ themis/evaluation/metrics/length_difference_tolerance.py,sha256=_YjDNGEMlodLogcw64RshG1-i0pKH27S3UiMeK6SdQw,966
76
+ themis/evaluation/metrics/math_verify_accuracy.py,sha256=YhBhpONLmouLELfpcjNHiVSlwpxkE2wkStF6__du3_0,1329
77
+ themis/evaluation/metrics/pairwise_judge_metric.py,sha256=DEYKwt3smzXiSUhDV4lWxDFXWoHz-JMg3z5bMjlLPKo,4890
78
+ themis/evaluation/metrics/response_length.py,sha256=Xn2PQi4pMLhC_3bMmSbLEf-QVFOrMNm2ZJr0PiCDH-E,910
79
+ themis/evaluation/metrics/rubric_judge_metric.py,sha256=KSSqwpMHaXCK6krbb_A93nppZ_0xk6Or30u7csnw7rM,4796
80
+ themis/evaluation/metrics/code/__init__.py,sha256=meZYPwDZVdZhW4jVW-52kOkZaC6ItyGfEhRVX7jIkXg,599
81
+ themis/evaluation/metrics/code/codebleu.py,sha256=joZJH1VOTBmKhqW1YBvizribqO5rilqsDmvslEdB2as,4826
82
+ themis/evaluation/metrics/code/execution.py,sha256=ACvWuG3Fc4bWuISLeKmWajibZeDDZx1Le-shQgFDsc0,8543
83
+ themis/evaluation/metrics/code/pass_at_k.py,sha256=X4V0bK8uG9dh4vovW0GafzHctRQ-3bH28aFTI8FE9NE,5649
84
+ themis/evaluation/metrics/nlp/__init__.py,sha256=cop5o1tmMv21dNRrDyxrz17iRH9f4vIaKQZNzii4W7k,699
85
+ themis/evaluation/metrics/nlp/bertscore.py,sha256=czlIqYkOTBWsfHiE6U1vkq1KHRQm8pvUnQgTb-Fte1s,4807
86
+ themis/evaluation/metrics/nlp/bleu.py,sha256=o_aVkoFPSMmeOLYaHRMamIpSKlYSxrMA1OdntTIUe9g,4436
87
+ themis/evaluation/metrics/nlp/meteor.py,sha256=QZT09s4aiUcVvDJDVPZYjzi5SxXdS2gn2IaOTNmKp78,5076
88
+ themis/evaluation/metrics/nlp/rouge.py,sha256=YL05qluF-KsesHYFRfm5zELJlcvo6RvaKp7xKy6BuLI,4365
89
+ themis/evaluation/pipelines/__init__.py,sha256=5YI1xaUULHisctFxrumN4XRpWYneoonX7nd9zBtsjvQ,384
90
+ themis/evaluation/pipelines/composable_pipeline.py,sha256=nNP9MSvQQJvaSBw5_gO3FeyhGm9So2ZlGqh5qSvE8Ac,10905
91
+ themis/evaluation/pipelines/standard_pipeline.py,sha256=nDd_bkqAVQxgwG9RK6G_fsgqwZth3058uG3p4QM0Dck,14650
92
+ themis/evaluation/statistics/__init__.py,sha256=TTrScTLAW7EHNq0nbjuJs6iP3_HgDx1yy3EtYXx5JCk,1257
93
+ themis/evaluation/statistics/bootstrap.py,sha256=JUQ8rtzFvW2e41I2pLJ7pqgSEjuJ1r6McyYLI42At9g,2409
94
+ themis/evaluation/statistics/confidence_intervals.py,sha256=CN5EO2gWiSITQubuWuPryngnGXhGwczY9kO3mcG6JVc,3676
95
+ themis/evaluation/statistics/distributions.py,sha256=h-sJgtpB-KnTwgN1B2TXrugjCJp8fl9DmGy-ZxZVEek,5538
96
+ themis/evaluation/statistics/effect_sizes.py,sha256=EWFVDilczpR8rR3_YurWy7QcjYcNIEzGOvf931OYPww,3283
97
+ themis/evaluation/statistics/hypothesis_tests.py,sha256=MVlVsY8wXifbBG5aSwauFShsQtIKqYREJApbriojS2o,10042
98
+ themis/evaluation/statistics/types.py,sha256=hW0RYWs-G4C_njNl0ZGG9lJROgU2CfLWfnTQDWYmWuw,3685
99
+ themis/evaluation/strategies/__init__.py,sha256=3f5LQkzlu3pRbN7dgDbdYOUNZTRexcn6f8D8I5-C724,439
100
+ themis/evaluation/strategies/attempt_aware_evaluation_strategy.py,sha256=O3dlsQ2F0Ucv2Dhjz2Qf-jpPhwaVs3zrdQDRRu9du5w,1714
101
+ themis/evaluation/strategies/default_evaluation_strategy.py,sha256=LShW-3Nxg_W4Ln-4qUvHJZqe5YMt64gHoK3uNJYLQNo,693
102
+ themis/evaluation/strategies/evaluation_strategy.py,sha256=YFF-bXkz4Z52GuCw52FcklfEnf8dK8_z_I40DJRcmwE,669
103
+ themis/evaluation/strategies/judge_evaluation_strategy.py,sha256=58pDB30y1VpM_1KPB6sGS0JImGZk5WTgnK9CKDF8N5k,2304
104
+ themis/experiment/__init__.py,sha256=dGranqpESugmmfbQlTU9efwspazW6j3vcmAKEtAoWZk,182
105
+ themis/experiment/builder.py,sha256=AEjCDeSOI2B0i0PBjkfY1GUDNrYGTGiqPvt0SxnDQFo,5618
106
+ themis/experiment/cache_manager.py,sha256=Fd8Qxifrmyn8f2zjAyPrLv-ZU4Dcp-MKo8-09BoW7tY,4361
107
+ themis/experiment/comparison.py,sha256=Mr1L5Zj7i87xk9XUQ_UueLTsC-sDZH8YGwLwg_gG0VI,21562
108
+ themis/experiment/cost.py,sha256=flhENfB5WKvyNWwPMDtygNZAv6y_yv4RoClsRz714Hc,10159
109
+ themis/experiment/definitions.py,sha256=oOZBFfEQkSBiZd9CMutCQ5luH6oeUT9yAZFd7fpVjnw,2015
110
+ themis/experiment/export.py,sha256=ujwiSvqQhLaO99WHyE8osdnmriHjyIM1C2zKf5o93Cw,29800
111
+ themis/experiment/export_csv.py,sha256=80w3gEGjeLjuiNq539rRP73k3MBtwrzJy90hgE91AKw,6030
112
+ themis/experiment/integration_manager.py,sha256=wTVTjDGcUkzz4tfnwSxa5nK1A4e2FKCPazDYGcdzYS8,3325
113
+ themis/experiment/math.py,sha256=P2E9F_UKI7pb-aXepSztGdr_g309WEMe83zqg1nWO7A,6973
114
+ themis/experiment/mcq.py,sha256=DDB99FHQsU_5vMIRDRhSZ7pReYvVf57wLmmo3OU_An4,6276
115
+ themis/experiment/orchestrator.py,sha256=-6epspKnPoAJQPKzoNAxd54MrEX3lIhrKyqQ9dmD00A,16120
116
+ themis/experiment/pricing.py,sha256=fTM32yE3L8vahMP4sr1zr7dbp9zYCjiPN4D4VuZ8-q8,9346
117
+ themis/experiment/storage.py,sha256=QS3fJD79bzgodM5x79yJ2A69O5hTL2r2ROAKSvtRnkI,49471
118
+ themis/experiment/visualization.py,sha256=dJYHrp3mntl8CPc5HPI3iKqPztVsddQB3ogRkd_FCNc,18473
119
+ themis/generation/__init__.py,sha256=6KVwCQYMpPIsXNuWDZOGuqHkUkA45lbSacIFn8ZbD4s,36
120
+ themis/generation/agentic_runner.py,sha256=armBQBk7qZDBEwT8HqjIWomYDQm57NfrP5CZJzay2uA,13669
121
+ themis/generation/batching.py,sha256=ddpgpn1pq_EwipvTg-K4WcoSs3c2rbW37jEA5Pa_spo,7557
122
+ themis/generation/clients.py,sha256=6apXCp_VNQosnpnmohTHOhHGXw-VZgsUyLds8MwtYUE,4910
123
+ themis/generation/conversation_runner.py,sha256=kSZHwEvfqzxZ-eQYxmg5OkNZcgEHggZExjad6nBOeTM,7980
124
+ themis/generation/plan.py,sha256=RmPIdefXkQMHYv5EWiilpx91I9a-svw31imvG0wV3fE,15961
125
+ themis/generation/router.py,sha256=jZc0KFL483f8TrYtt9yxzFKs-T9CG2CoE2kfOQdHMEc,1082
126
+ themis/generation/runner.py,sha256=iHTE5vSMWMYRrv4PEWMaZflF939nv1wWccK8V0e092c,8009
127
+ themis/generation/strategies.py,sha256=hjqaVkNycFxJWh_edJ7ilBl7HS6bL-8pYm24zTfoAvg,2975
128
+ themis/generation/templates.py,sha256=ut_6akp8Y6Ey_9O3s64jDbwCB74pw62Zf8URlYcKHkA,2325
129
+ themis/generation/turn_strategies.py,sha256=w33qhzpQbGTsfeOgOgMDovV0wEeXeNZUUBm5yZy1naw,10973
130
+ themis/generation/types.py,sha256=MkJnZk6lMHmHzlJVEsuIC9ioRW8XhWcSk9AdDeb_aLE,338
131
+ themis/generation/providers/litellm_provider.py,sha256=rlTuglIwhcvSakCo5G-ffgQtEHbCEX0ZeKk6M1MaWmU,8155
132
+ themis/generation/providers/vllm_provider.py,sha256=0K4we6xDrRXlBXseC1ixLq2sJpRF4T8Ikv45dw-zNk4,4625
133
+ themis/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
+ themis/integrations/huggingface.py,sha256=vrLwYwn65pU4W3FUe0ImCOZxKKlpRshDqMoLFsclB3E,2370
135
+ themis/integrations/wandb.py,sha256=LJOPojjlqG05EIPxcjy3QmA15Gxgs1db3encDWVzYYw,2545
136
+ themis/interfaces/__init__.py,sha256=78dNE_eHfFmb9hXNy5sLZ1jOTGWS8TzdVE_eiYQPFVc,5967
137
+ themis/presets/__init__.py,sha256=hkoyODYiWFFSQAIKTpEbAIUuFIwTibBhzTOkiTbzhVQ,411
138
+ themis/presets/benchmarks.py,sha256=s9JxRogHwZs8oiuiI7Z7uiUBZXEp3gg7AQZnBvdGieA,12026
139
+ themis/presets/models.py,sha256=c6-I_drHa4vMLIajSkCcrFbsJOsauFjY8fU1leBxZLg,5173
140
+ themis/project/__init__.py,sha256=vgLv2nS62yz1XsFSFzFf7eIo6FyQJXpOY9OPRUcTQLQ,465
141
+ themis/project/definitions.py,sha256=vHARw0IjFOWE4RL4mGRwvke36A6GWQGep6cQFIRcpJg,3329
142
+ themis/project/patterns.py,sha256=2J51Q9Jq7X-2N57uexvR191gaZKwusef5vIuIVUQY-E,7743
143
+ themis/providers/__init__.py,sha256=K5nG0DsK_YPY0cT9MBLk5BLcLbBo0wBP0vQvLjpAw_Y,189
144
+ themis/providers/registry.py,sha256=Za5Kg3-A-35wS_jiGpPXV2q1k6he_dRIWVqt36dKN-4,1056
145
+ themis/server/__init__.py,sha256=Hp0qGI5nvO4bhLAez3jQxim7H433l72EYE2IA8Xp2hA,731
146
+ themis/server/app.py,sha256=OZ39gCC47AXVqZxroC_4KtIYBYx_rfpde7C25AF3EI0,11166
147
+ themis/utils/api_generator.py,sha256=3oQ7mGZlFx2Dpm45pMg3rNIqNK2Smj05PjOMXp5RIkQ,10776
148
+ themis/utils/cost_tracking.py,sha256=9_Z2iTfNaQse9G_bnqn4hme4T0fG2W-fxOLEDeF_3VI,11545
149
+ themis/utils/dashboard.py,sha256=2yiIu9_oENglTde_J3G1d5cpQ5VtSnfbUvdliw5Og1E,13008
150
+ themis/utils/logging_utils.py,sha256=YNSiDfO4LsciSzUhHF1aTVI5rkfnWiVbn1NcGjjmJuQ,1019
151
+ themis/utils/progress.py,sha256=b3YwHKV5x3Cvr5rBukqifJimK3Si4CGY2fpN6a_ZySI,1434
152
+ themis/utils/tracing.py,sha256=VTeiRjcW_B5fOOoSeAp37nrmlwP1DiqPcoe6OtIQ7dk,8468
153
+ themis_eval-0.2.0.dist-info/licenses/LICENSE,sha256=K5FLE7iqn5-_6k1sf3IGy7w-Wx_Vdx3t0sOVJByNlF0,1076
154
+ themis_eval-0.2.0.dist-info/METADATA,sha256=S4dy0AD2REsRtPfULUYMiYC2Zk8nWgz4BWjBBJz2gHU,15173
155
+ themis_eval-0.2.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
156
+ themis_eval-0.2.0.dist-info/top_level.txt,sha256=QGIl4v-KB32upFS5UTXMJxHVX3vF7yBso82wJFI1Vbs,7
157
+ themis_eval-0.2.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5