valor-lite 0.33.14__py3-none-any.whl → 0.33.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of valor-lite might be problematic. Click here for more details.

@@ -0,0 +1,381 @@
1
+ from dataclasses import dataclass
2
+ from enum import Enum
3
+
4
+ from valor_lite.schemas import BaseMetric
5
+
6
+
7
+ class MetricType(str, Enum):
8
+ AnswerCorrectness = "AnswerCorrectness"
9
+ AnswerRelevance = "AnswerRelevance"
10
+ Bias = "Bias"
11
+ BLEU = "BLEU"
12
+ ContextPrecision = "ContextPrecision"
13
+ ContextRecall = "ContextRecall"
14
+ ContextRelevance = "ContextRelevance"
15
+ Faithfulness = "Faithfulness"
16
+ Hallucination = "Hallucination"
17
+ ROUGE = "ROUGE"
18
+ SummaryCoherence = "SummaryCoherence"
19
+ Toxicity = "Toxicity"
20
+
21
+
22
+ @dataclass
23
+ class Metric(BaseMetric):
24
+ """
25
+ Text Generation Metric.
26
+
27
+ Attributes
28
+ ----------
29
+ type : str
30
+ The metric type.
31
+ value : int | float | dict
32
+ The metric value.
33
+ parameters : dict[str, Any]
34
+ A dictionary containing metric parameters.
35
+ """
36
+
37
+ def __post_init__(self):
38
+ if not isinstance(self.type, str):
39
+ raise TypeError(
40
+ f"Metric type should be of type 'str': {self.type}"
41
+ )
42
+ elif not isinstance(self.value, (int, float, dict)):
43
+ raise TypeError(
44
+ f"Metric value must be of type 'int', 'float' or 'dict': {self.value}"
45
+ )
46
+ elif not isinstance(self.parameters, dict):
47
+ raise TypeError(
48
+ f"Metric parameters must be of type 'dict[str, Any]': {self.parameters}"
49
+ )
50
+ elif not all([isinstance(k, str) for k in self.parameters.keys()]):
51
+ raise TypeError(
52
+ f"Metric parameter dictionary should only have keys with type 'str': {self.parameters}"
53
+ )
54
+
55
+ @classmethod
56
+ def error(
57
+ cls,
58
+ error_type: str,
59
+ error_message: str,
60
+ model_name: str,
61
+ retries: int,
62
+ ):
63
+ return cls(
64
+ type="Error",
65
+ value={
66
+ "type": error_type,
67
+ "message": error_message,
68
+ },
69
+ parameters={
70
+ "evaluator": model_name,
71
+ "retries": retries,
72
+ },
73
+ )
74
+
75
+ @classmethod
76
+ def answer_correctness(
77
+ cls,
78
+ value: float,
79
+ model_name: str,
80
+ retries: int,
81
+ ):
82
+ """
83
+ Defines an answer correctness metric.
84
+
85
+ Parameters
86
+ ----------
87
+ value : float
88
+ The answer correctness score between 0 and 1, with higher values indicating that the answer
89
+ is more correct. A score of 1 indicates that all statements in the prediction are supported
90
+ by the ground truth and all statements in the ground truth are present in the prediction.
91
+ """
92
+ return cls(
93
+ type=MetricType.AnswerCorrectness,
94
+ value=value,
95
+ parameters={
96
+ "evaluator": model_name,
97
+ "retries": retries,
98
+ },
99
+ )
100
+
101
+ @classmethod
102
+ def answer_relevance(
103
+ cls,
104
+ value: float,
105
+ model_name: str,
106
+ retries: int,
107
+ ):
108
+ """
109
+ Defines an answer relevance metric.
110
+
111
+ Parameters
112
+ ----------
113
+ value : float
114
+ The number of statements in the answer that are relevant to the query divided by the total
115
+ number of statements in the answer.
116
+ """
117
+ return cls(
118
+ type=MetricType.AnswerRelevance,
119
+ value=value,
120
+ parameters={
121
+ "evaluator": model_name,
122
+ "retries": retries,
123
+ },
124
+ )
125
+
126
+ @classmethod
127
+ def bleu(
128
+ cls,
129
+ value: float,
130
+ weights: list[float],
131
+ ):
132
+ """
133
+ Defines a BLEU metric.
134
+
135
+ Parameters
136
+ ----------
137
+ value : float
138
+ The BLEU score for an individual datapoint.
139
+ weights : list[float]
140
+ The list of weights that the score was calculated with.
141
+ """
142
+ return cls(
143
+ type=MetricType.BLEU,
144
+ value=value,
145
+ parameters={
146
+ "weights": weights,
147
+ },
148
+ )
149
+
150
+ @classmethod
151
+ def bias(
152
+ cls,
153
+ value: float,
154
+ model_name: str,
155
+ retries: int,
156
+ ):
157
+ """
158
+ Defines a bias metric.
159
+
160
+ Parameters
161
+ ----------
162
+ value : float
163
+ The bias score for a datum. This is a float between 0 and 1, with 1 indicating that all
164
+ opinions in the datum text are biased and 0 indicating that there is no bias.
165
+ """
166
+ return cls(
167
+ type=MetricType.Bias,
168
+ value=value,
169
+ parameters={
170
+ "evaluator": model_name,
171
+ "retries": retries,
172
+ },
173
+ )
174
+
175
+ @classmethod
176
+ def context_precision(
177
+ cls,
178
+ value: float,
179
+ model_name: str,
180
+ retries: int,
181
+ ):
182
+ """
183
+ Defines a context precision metric.
184
+
185
+ Parameters
186
+ ----------
187
+ value : float
188
+ The context precision score for a datum. This is a float between 0 and 1, with 0 indicating
189
+ that none of the contexts are useful to arrive at the ground truth answer to the query
190
+ and 1 indicating that all contexts are useful to arrive at the ground truth answer to the
191
+ query. The score is more heavily influenced by earlier contexts in the list of contexts
192
+ than later contexts.
193
+ """
194
+ return cls(
195
+ type=MetricType.ContextPrecision,
196
+ value=value,
197
+ parameters={
198
+ "evaluator": model_name,
199
+ "retries": retries,
200
+ },
201
+ )
202
+
203
+ @classmethod
204
+ def context_recall(
205
+ cls,
206
+ value: float,
207
+ model_name: str,
208
+ retries: int,
209
+ ):
210
+ """
211
+ Defines a context recall metric.
212
+
213
+ Parameters
214
+ ----------
215
+ value : float
216
+ The context recall score for a datum. This is a float between 0 and 1, with 1 indicating
217
+ that all ground truth statements are attributable to the context list.
218
+ """
219
+ return cls(
220
+ type=MetricType.ContextRecall,
221
+ value=value,
222
+ parameters={
223
+ "evaluator": model_name,
224
+ "retries": retries,
225
+ },
226
+ )
227
+
228
+ @classmethod
229
+ def context_relevance(
230
+ cls,
231
+ value: float,
232
+ model_name: str,
233
+ retries: int,
234
+ ):
235
+ """
236
+ Defines a context relevance metric.
237
+
238
+ Parameters
239
+ ----------
240
+ value : float
241
+ The context relevance score for a datum. This is a float between 0 and 1, with 0 indicating
242
+ that none of the contexts are relevant and 1 indicating that all of the contexts are relevant.
243
+ """
244
+ return cls(
245
+ type=MetricType.ContextRelevance,
246
+ value=value,
247
+ parameters={
248
+ "evaluator": model_name,
249
+ "retries": retries,
250
+ },
251
+ )
252
+
253
+ @classmethod
254
+ def faithfulness(
255
+ cls,
256
+ value: float,
257
+ model_name: str,
258
+ retries: int,
259
+ ):
260
+ """
261
+ Defines a faithfulness metric.
262
+
263
+ Parameters
264
+ ----------
265
+ value : float
266
+ The faithfulness score for a datum. This is a float between 0 and 1, with 1 indicating that
267
+ all claims in the text are implied by the contexts.
268
+ """
269
+ return cls(
270
+ type=MetricType.Faithfulness,
271
+ value=value,
272
+ parameters={
273
+ "evaluator": model_name,
274
+ "retries": retries,
275
+ },
276
+ )
277
+
278
+ @classmethod
279
+ def hallucination(
280
+ cls,
281
+ value: float,
282
+ model_name: str,
283
+ retries: int,
284
+ ):
285
+ """
286
+ Defines a hallucination metric.
287
+
288
+ Parameters
289
+ ----------
290
+ value : float
291
+ The hallucination score for a datum. This is a float between 0 and 1, with 1 indicating that
292
+ all contexts are contradicted by the text.
293
+ """
294
+ return cls(
295
+ type=MetricType.Hallucination,
296
+ value=value,
297
+ parameters={
298
+ "evaluator": model_name,
299
+ "retries": retries,
300
+ },
301
+ )
302
+
303
+ @classmethod
304
+ def rouge(
305
+ cls,
306
+ value: float,
307
+ rouge_type: str,
308
+ use_stemmer: bool,
309
+ ):
310
+ """
311
+ Defines a ROUGE metric.
312
+
313
+ Parameters
314
+ ----------
315
+ value : float
316
+ A ROUGE score.
317
+ rouge_type : ROUGEType
318
+ The ROUGE variation used to compute the value. `rouge1` is unigram-based scoring, `rouge2` is bigram-based
319
+ scoring, `rougeL` is scoring based on sentences (i.e., splitting on "." and ignoring "\n"), and `rougeLsum`
320
+ is scoring based on splitting the text using "\n".
321
+ use_stemmer: bool, default=False
322
+ If True, uses Porter stemmer to strip word suffixes. Defaults to False.
323
+ """
324
+ return cls(
325
+ type=MetricType.ROUGE,
326
+ value=value,
327
+ parameters={
328
+ "rouge_type": rouge_type,
329
+ "use_stemmer": use_stemmer,
330
+ },
331
+ )
332
+
333
+ @classmethod
334
+ def summary_coherence(
335
+ cls,
336
+ value: int,
337
+ model_name: str,
338
+ retries: int,
339
+ ):
340
+ """
341
+ Defines a summary coherence metric.
342
+
343
+ Parameters
344
+ ----------
345
+ value : int
346
+ The summary coherence score for a datum. This is an integer with 1 being the lowest summary coherence
347
+ and 5 the highest summary coherence.
348
+ """
349
+ return cls(
350
+ type=MetricType.SummaryCoherence,
351
+ value=value,
352
+ parameters={
353
+ "evaluator": model_name,
354
+ "retries": retries,
355
+ },
356
+ )
357
+
358
+ @classmethod
359
+ def toxicity(
360
+ cls,
361
+ value: float,
362
+ model_name: str,
363
+ retries: int,
364
+ ):
365
+ """
366
+ Defines a toxicity metric.
367
+
368
+ Parameters
369
+ ----------
370
+ value : float
371
+ The toxicity score for a datum. This is a value between 0 and 1, with 1 indicating that all opinions
372
+ in the datum text are toxic and 0 indicating that there is no toxicity.
373
+ """
374
+ return cls(
375
+ type=MetricType.Toxicity,
376
+ value=value,
377
+ parameters={
378
+ "evaluator": model_name,
379
+ "retries": retries,
380
+ },
381
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: valor-lite
3
- Version: 0.33.14
3
+ Version: 0.33.16
4
4
  Summary: Compute valor metrics locally.
5
5
  License: MIT License
6
6
 
@@ -28,15 +28,23 @@ Project-URL: homepage, https://www.striveworks.com
28
28
  Requires-Python: >=3.10
29
29
  Description-Content-Type: text/markdown
30
30
  License-File: LICENSE
31
+ Requires-Dist: evaluate
32
+ Requires-Dist: nltk
33
+ Requires-Dist: numpy
31
34
  Requires-Dist: Pillow >=9.1.0
32
- Requires-Dist: tqdm
33
35
  Requires-Dist: requests
34
- Requires-Dist: numpy
36
+ Requires-Dist: rouge-score
35
37
  Requires-Dist: shapely
38
+ Requires-Dist: tqdm
36
39
  Requires-Dist: importlib-metadata ; python_version < "3.8"
40
+ Provides-Extra: mistral
41
+ Requires-Dist: mistralai >=1.0 ; extra == 'mistral'
42
+ Provides-Extra: openai
43
+ Requires-Dist: openai ; extra == 'openai'
37
44
  Provides-Extra: test
38
45
  Requires-Dist: pytest ; extra == 'test'
39
46
  Requires-Dist: coverage ; extra == 'test'
47
+ Requires-Dist: pre-commit ; extra == 'test'
40
48
 
41
49
  # valor-lite: Fast, local machine learning evaluation.
42
50
 
@@ -0,0 +1,38 @@
1
+ valor_lite/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
2
+ valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ valor_lite/schemas.py,sha256=pB0MrPx5qFLbwBWDiOUUm-vmXdWvbJLFCBmKgbcbI5g,198
4
+ valor_lite/classification/__init__.py,sha256=8MI8bGwCxYGqRP7KxG7ezhYv4qQ5947XGvvlF8WPM5g,392
5
+ valor_lite/classification/annotation.py,sha256=0aUOvcwBAZgiNOJuyh-pXyNTG7vP7r8CUfnU3OmpUwQ,1113
6
+ valor_lite/classification/computation.py,sha256=qfBhhuDYCiY8h2RdBG3shzgJbHLXDVNujkYFg9xZa6U,12116
7
+ valor_lite/classification/manager.py,sha256=8GXZECSx4CBbG5NfPrA19BPENqmrjo-wZBmaulWHY20,16676
8
+ valor_lite/classification/metric.py,sha256=fkAo-_3s4EIRSkyn3owBSf4_Gp6lBK9xdToDYMWmT8A,12236
9
+ valor_lite/classification/utilities.py,sha256=PmQar06Vt-ew4Jvnn0IM63mq730QVTsdRtFdVu1HMFU,6885
10
+ valor_lite/object_detection/__init__.py,sha256=Ql8rju2q7y0Zd9zFvtBJDRhgQFDm1RSYkTsyH3ZE6pA,648
11
+ valor_lite/object_detection/annotation.py,sha256=o6VfiRobiB0ljqsNBLAYMXgi32RSIR7uTA-dgxq6zBI,8248
12
+ valor_lite/object_detection/computation.py,sha256=P5ijxEBuZ3mxYjBQy24TiQpGxRmPuS40Gwn44uv0J7M,28064
13
+ valor_lite/object_detection/manager.py,sha256=rHY6-aiPVOXKQk7e_MmKpZxn6wdLAhdlj_njaNdYG7Q,23299
14
+ valor_lite/object_detection/metric.py,sha256=8QhdauuaRrzE39idetkFYTPxA12wrBalQDIR4IUzEbg,24794
15
+ valor_lite/object_detection/utilities.py,sha256=98VSW-g8EYI8Cdd9KHLHdm6F4fI89jaX5I4z99zny4s,16271
16
+ valor_lite/semantic_segmentation/__init__.py,sha256=HQQkr3iBPQfdUrsu0uvx-Uyv9SYmumU1B3slbWOnpNY,245
17
+ valor_lite/semantic_segmentation/annotation.py,sha256=CujYFdHS3fgr4Y7mEDs_u1XBmbPJzNU2CdqvjCT_d_A,2938
18
+ valor_lite/semantic_segmentation/computation.py,sha256=rrql3zmpqt4Zygc2BD4SyUfNW_NXC93_kHB-lGBzjXU,5122
19
+ valor_lite/semantic_segmentation/manager.py,sha256=pMepH3zk_fApyFtC9tLrmEYuCbg1n5TLh1J8QRadE44,14287
20
+ valor_lite/semantic_segmentation/metric.py,sha256=aJv3wPEl6USLhZ3c4yz6prnBU-EaG4Kz16f0BXcodd4,7046
21
+ valor_lite/semantic_segmentation/utilities.py,sha256=vZM66YNMz9VJclhuKvcWp74nF65s6bscnnD5U9iDW7Q,2925
22
+ valor_lite/text_generation/__init__.py,sha256=pGhpWCSZjLM0pPHCtPykAfos55B8ie3mi9EzbNxfj-U,356
23
+ valor_lite/text_generation/annotation.py,sha256=O5aXiwCS4WjA-fqn4ly-O0MsTHoIOmqxqCaAp9IeI3M,1270
24
+ valor_lite/text_generation/computation.py,sha256=cG35qMpxNPEYHXN2fz8wcanESriSHoWMl1idpm9-ous,18638
25
+ valor_lite/text_generation/manager.py,sha256=C4QwvronGHXmYSkaRmUGy7TN0C0aeyDx9Hb-ClNYXK4,24810
26
+ valor_lite/text_generation/metric.py,sha256=C9gbWejjOJ23JVLecuUhYW5rkx30NUCfRtgsM46uMds,10409
27
+ valor_lite/text_generation/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ valor_lite/text_generation/llm/exceptions.py,sha256=w4eYSJIJQ_jWuCmquCB6ETr_st_LzbDRlhnlPeqwmfo,349
29
+ valor_lite/text_generation/llm/generation.py,sha256=XKPjCxPUZHiWInQSO7wLOb0YtMFLu50s8rHZe1Yz0s0,28954
30
+ valor_lite/text_generation/llm/instructions.py,sha256=fz2onBZZWcl5W8iy7zEWkPGU9N07ez6O7SxZA5M2xe4,34056
31
+ valor_lite/text_generation/llm/integrations.py,sha256=-rTfdAjq1zH-4ixwYuMQEOQ80pIFzMTe0BYfroVx3Pg,6974
32
+ valor_lite/text_generation/llm/utilities.py,sha256=bjqatGgtVTcl1PrMwiDKTYPGJXKrBrx7PDtzIblGSys,1178
33
+ valor_lite/text_generation/llm/validators.py,sha256=Wzr5RlfF58_2wOU-uTw7C8skan_fYdhy4Gfn0jSJ8HM,2700
34
+ valor_lite-0.33.16.dist-info/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
35
+ valor_lite-0.33.16.dist-info/METADATA,sha256=mpXXDWKiCL8OsCLqRevVH6AkWMsYBT4Qjqdum3ZYFos,5888
36
+ valor_lite-0.33.16.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
37
+ valor_lite-0.33.16.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
38
+ valor_lite-0.33.16.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.2.0)
2
+ Generator: setuptools (75.3.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,27 +0,0 @@
1
- valor_lite/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
2
- valor_lite/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- valor_lite/schemas.py,sha256=hcCFJ4ZywlFCqhx0om80Uf4xpASYPvs2vpP1yOUIqaE,403
4
- valor_lite/classification/__init__.py,sha256=8MI8bGwCxYGqRP7KxG7ezhYv4qQ5947XGvvlF8WPM5g,392
5
- valor_lite/classification/annotation.py,sha256=0aUOvcwBAZgiNOJuyh-pXyNTG7vP7r8CUfnU3OmpUwQ,1113
6
- valor_lite/classification/computation.py,sha256=qfBhhuDYCiY8h2RdBG3shzgJbHLXDVNujkYFg9xZa6U,12116
7
- valor_lite/classification/manager.py,sha256=8GXZECSx4CBbG5NfPrA19BPENqmrjo-wZBmaulWHY20,16676
8
- valor_lite/classification/metric.py,sha256=0ZGp7Wm4oc0h_EBiYfVEs39QEeL5xa-F27gig7smnq8,11409
9
- valor_lite/classification/utilities.py,sha256=PmQar06Vt-ew4Jvnn0IM63mq730QVTsdRtFdVu1HMFU,6885
10
- valor_lite/object_detection/__init__.py,sha256=Ql8rju2q7y0Zd9zFvtBJDRhgQFDm1RSYkTsyH3ZE6pA,648
11
- valor_lite/object_detection/annotation.py,sha256=o6VfiRobiB0ljqsNBLAYMXgi32RSIR7uTA-dgxq6zBI,8248
12
- valor_lite/object_detection/computation.py,sha256=xqV_KdYAGyq32VePW0pL8pO3YGRO46ZUVo0luwYD1P8,28024
13
- valor_lite/object_detection/manager.py,sha256=YjM9Kx3xrIt2VMjNZ-8guPchPq7YBABlams_7eZvYVY,23298
14
- valor_lite/object_detection/metric.py,sha256=QbxYTOykysshhpdVJjxMPnw8hvcAv4SM3sXDZj8OwnE,23967
15
- valor_lite/object_detection/utilities.py,sha256=98VSW-g8EYI8Cdd9KHLHdm6F4fI89jaX5I4z99zny4s,16271
16
- valor_lite/semantic_segmentation/__init__.py,sha256=HQQkr3iBPQfdUrsu0uvx-Uyv9SYmumU1B3slbWOnpNY,245
17
- valor_lite/semantic_segmentation/annotation.py,sha256=CujYFdHS3fgr4Y7mEDs_u1XBmbPJzNU2CdqvjCT_d_A,2938
18
- valor_lite/semantic_segmentation/computation.py,sha256=rrql3zmpqt4Zygc2BD4SyUfNW_NXC93_kHB-lGBzjXU,5122
19
- valor_lite/semantic_segmentation/manager.py,sha256=pMepH3zk_fApyFtC9tLrmEYuCbg1n5TLh1J8QRadE44,14287
20
- valor_lite/semantic_segmentation/metric.py,sha256=i8uTcalwvzK7CDHJ_8I-zplWe-qrMtXwH_5ZcTBi3M8,6219
21
- valor_lite/semantic_segmentation/utilities.py,sha256=vZM66YNMz9VJclhuKvcWp74nF65s6bscnnD5U9iDW7Q,2925
22
- valor_lite/text_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- valor_lite-0.33.14.dist-info/LICENSE,sha256=M0L53VuwfEEqezhHb7NPeYcO_glw7-k4DMLZQ3eRN64,1068
24
- valor_lite-0.33.14.dist-info/METADATA,sha256=G2j4SCVunrizKzJ9pBRH8h73RbzbFMbbMViHFuXG4pM,5632
25
- valor_lite-0.33.14.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
26
- valor_lite-0.33.14.dist-info/top_level.txt,sha256=9ujykxSwpl2Hu0_R95UQTR_l07k9UUTSdrpiqmq6zc4,11
27
- valor_lite-0.33.14.dist-info/RECORD,,