valor-lite 0.37.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of valor-lite might be problematic. Click here for more details.
- valor_lite/LICENSE +21 -0
- valor_lite/__init__.py +0 -0
- valor_lite/cache/__init__.py +11 -0
- valor_lite/cache/compute.py +154 -0
- valor_lite/cache/ephemeral.py +302 -0
- valor_lite/cache/persistent.py +529 -0
- valor_lite/classification/__init__.py +14 -0
- valor_lite/classification/annotation.py +45 -0
- valor_lite/classification/computation.py +378 -0
- valor_lite/classification/evaluator.py +879 -0
- valor_lite/classification/loader.py +97 -0
- valor_lite/classification/metric.py +535 -0
- valor_lite/classification/numpy_compatibility.py +13 -0
- valor_lite/classification/shared.py +184 -0
- valor_lite/classification/utilities.py +314 -0
- valor_lite/exceptions.py +20 -0
- valor_lite/object_detection/__init__.py +17 -0
- valor_lite/object_detection/annotation.py +238 -0
- valor_lite/object_detection/computation.py +841 -0
- valor_lite/object_detection/evaluator.py +805 -0
- valor_lite/object_detection/loader.py +292 -0
- valor_lite/object_detection/metric.py +850 -0
- valor_lite/object_detection/shared.py +185 -0
- valor_lite/object_detection/utilities.py +396 -0
- valor_lite/schemas.py +11 -0
- valor_lite/semantic_segmentation/__init__.py +15 -0
- valor_lite/semantic_segmentation/annotation.py +123 -0
- valor_lite/semantic_segmentation/computation.py +165 -0
- valor_lite/semantic_segmentation/evaluator.py +414 -0
- valor_lite/semantic_segmentation/loader.py +205 -0
- valor_lite/semantic_segmentation/metric.py +275 -0
- valor_lite/semantic_segmentation/shared.py +149 -0
- valor_lite/semantic_segmentation/utilities.py +88 -0
- valor_lite/text_generation/__init__.py +15 -0
- valor_lite/text_generation/annotation.py +56 -0
- valor_lite/text_generation/computation.py +611 -0
- valor_lite/text_generation/llm/__init__.py +0 -0
- valor_lite/text_generation/llm/exceptions.py +14 -0
- valor_lite/text_generation/llm/generation.py +903 -0
- valor_lite/text_generation/llm/instructions.py +814 -0
- valor_lite/text_generation/llm/integrations.py +226 -0
- valor_lite/text_generation/llm/utilities.py +43 -0
- valor_lite/text_generation/llm/validators.py +68 -0
- valor_lite/text_generation/manager.py +697 -0
- valor_lite/text_generation/metric.py +381 -0
- valor_lite-0.37.1.dist-info/METADATA +174 -0
- valor_lite-0.37.1.dist-info/RECORD +49 -0
- valor_lite-0.37.1.dist-info/WHEEL +5 -0
- valor_lite-0.37.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Protocol
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def _validate_messages(messages: list[dict[str, str]]):
|
|
6
|
+
"""
|
|
7
|
+
Validate that the input is a list of dictionaries with "role" and "content" keys.
|
|
8
|
+
|
|
9
|
+
Parameters
|
|
10
|
+
----------
|
|
11
|
+
messages: list[dict[str, str]]
|
|
12
|
+
The messages formatted according to the OpenAI standard. Each message in messages is a dictionary with "role" and "content" keys.
|
|
13
|
+
"""
|
|
14
|
+
if not isinstance(messages, list):
|
|
15
|
+
raise TypeError(
|
|
16
|
+
f"messages must be a list, got {type(messages)} instead."
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
if not all(isinstance(message, dict) for message in messages):
|
|
20
|
+
raise TypeError("messages must be a list of dictionaries.")
|
|
21
|
+
|
|
22
|
+
if not all(
|
|
23
|
+
"role" in message and "content" in message for message in messages
|
|
24
|
+
):
|
|
25
|
+
raise ValueError(
|
|
26
|
+
'messages must be a list of dictionaries with "role" and "content" keys.'
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if not all(isinstance(message["role"], str) for message in messages):
|
|
30
|
+
raise TypeError("All roles in messages must be strings.")
|
|
31
|
+
|
|
32
|
+
if not all(isinstance(message["content"], str) for message in messages):
|
|
33
|
+
raise TypeError("All content in messages must be strings.")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ClientWrapper(Protocol):
|
|
37
|
+
def __call__(
|
|
38
|
+
self,
|
|
39
|
+
messages: list[dict[str, str]],
|
|
40
|
+
) -> str:
|
|
41
|
+
...
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class OpenAIWrapper:
|
|
45
|
+
"""
|
|
46
|
+
Wrapper for calls to OpenAI's API.
|
|
47
|
+
|
|
48
|
+
Attributes
|
|
49
|
+
----------
|
|
50
|
+
model_name : str
|
|
51
|
+
The model to use. Defaults to "gpt-3.5-turbo".
|
|
52
|
+
api_key : str, optional
|
|
53
|
+
The OpenAI API key to use. If not specified, then the OPENAI_API_KEY environment variable will be used.
|
|
54
|
+
seed : int, optional
|
|
55
|
+
An optional seed can be provided to GPT to get deterministic results.
|
|
56
|
+
total_prompt_tokens : int
|
|
57
|
+
A total count of tokens used for prompt inputs.
|
|
58
|
+
total_completion_tokens : int
|
|
59
|
+
A total count of tokens used to generate responses.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
model_name: str,
|
|
65
|
+
api_key: str | None = None,
|
|
66
|
+
seed: int | None = None,
|
|
67
|
+
):
|
|
68
|
+
"""
|
|
69
|
+
Wrapper for calls to OpenAI's API.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
model_name : str
|
|
74
|
+
The model to use (e.g. "gpt-3.5-turbo").
|
|
75
|
+
api_key : str, optional
|
|
76
|
+
The OpenAI API key to use. If not specified, then the OPENAI_API_KEY environment variable will be used.
|
|
77
|
+
seed : int, optional
|
|
78
|
+
An optional seed can be provided to GPT to get deterministic results.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
from openai import OpenAI
|
|
82
|
+
|
|
83
|
+
if api_key is None:
|
|
84
|
+
self.client = OpenAI()
|
|
85
|
+
else:
|
|
86
|
+
self.client = OpenAI(api_key=api_key)
|
|
87
|
+
|
|
88
|
+
self.model_name = model_name
|
|
89
|
+
self.seed = seed
|
|
90
|
+
|
|
91
|
+
# logs
|
|
92
|
+
self.total_prompt_tokens = 0
|
|
93
|
+
self.total_completion_tokens = 0
|
|
94
|
+
|
|
95
|
+
def __call__(
|
|
96
|
+
self,
|
|
97
|
+
messages: list[dict[str, str]],
|
|
98
|
+
) -> str:
|
|
99
|
+
"""
|
|
100
|
+
Call to the API.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
messages: list[dict[str, str]]
|
|
105
|
+
The messages formatted according to the OpenAI standard. Each message in messages is a dictionary with "role" and "content" keys.
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
str
|
|
110
|
+
The response from the API.
|
|
111
|
+
"""
|
|
112
|
+
_validate_messages(messages=messages)
|
|
113
|
+
|
|
114
|
+
openai_response = self.client.chat.completions.create(
|
|
115
|
+
model=self.model_name,
|
|
116
|
+
messages=messages, # type: ignore - mistralai issue
|
|
117
|
+
seed=self.seed,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
response = openai_response.choices[0].message.content
|
|
121
|
+
if openai_response.usage is not None:
|
|
122
|
+
self.total_prompt_tokens += openai_response.usage.prompt_tokens
|
|
123
|
+
self.total_completion_tokens += (
|
|
124
|
+
openai_response.usage.completion_tokens
|
|
125
|
+
)
|
|
126
|
+
finish_reason = openai_response.choices[
|
|
127
|
+
0
|
|
128
|
+
].finish_reason # Enum: "stop" "length" "content_filter" "tool_calls" "function_call"
|
|
129
|
+
|
|
130
|
+
if finish_reason == "length":
|
|
131
|
+
raise ValueError(
|
|
132
|
+
"OpenAI response reached max token limit. Resulting evaluation is likely invalid or of low quality."
|
|
133
|
+
)
|
|
134
|
+
elif finish_reason == "content_filter":
|
|
135
|
+
raise ValueError(
|
|
136
|
+
"OpenAI response was flagged by content filter. Resulting evaluation is likely invalid or of low quality."
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
if response is None:
|
|
140
|
+
response = ""
|
|
141
|
+
return response
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class MistralWrapper:
|
|
145
|
+
"""
|
|
146
|
+
Wrapper for calls to Mistral's API.
|
|
147
|
+
|
|
148
|
+
Attributes
|
|
149
|
+
----------
|
|
150
|
+
api_key : str, optional
|
|
151
|
+
The Mistral API key to use. If not specified, then the MISTRAL_API_KEY environment variable will be used.
|
|
152
|
+
model_name : str
|
|
153
|
+
The model to use. Defaults to "mistral-small-latest".
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
def __init__(
|
|
157
|
+
self,
|
|
158
|
+
model_name: str,
|
|
159
|
+
api_key: str | None = None,
|
|
160
|
+
):
|
|
161
|
+
"""
|
|
162
|
+
Creates an instance of the Mistral interface.
|
|
163
|
+
|
|
164
|
+
Parameters
|
|
165
|
+
----------
|
|
166
|
+
model_name : str
|
|
167
|
+
The model to use (e.g. "mistral-small-latest").
|
|
168
|
+
api_key : str, optional
|
|
169
|
+
The Mistral API key to use. If not specified, then the MISTRAL_API_KEY environment variable will be used.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
from mistralai import Mistral
|
|
173
|
+
|
|
174
|
+
if api_key is None:
|
|
175
|
+
self.client = Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
|
|
176
|
+
else:
|
|
177
|
+
self.client = Mistral(api_key=api_key)
|
|
178
|
+
|
|
179
|
+
self.model_name = model_name
|
|
180
|
+
|
|
181
|
+
def __call__(
|
|
182
|
+
self,
|
|
183
|
+
messages: list[dict[str, str]],
|
|
184
|
+
) -> str:
|
|
185
|
+
"""
|
|
186
|
+
Call to the API.
|
|
187
|
+
|
|
188
|
+
Parameters
|
|
189
|
+
----------
|
|
190
|
+
messages: list[dict[str, str]]
|
|
191
|
+
The messages formatted according to the OpenAI standard. Each message in messages is a dictionary with "role" and "content" keys.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
str
|
|
196
|
+
The response from the API.
|
|
197
|
+
"""
|
|
198
|
+
_validate_messages(messages)
|
|
199
|
+
|
|
200
|
+
mistral_response = self.client.chat.complete(
|
|
201
|
+
model=self.model_name,
|
|
202
|
+
messages=messages, # type: ignore - mistral complaining about native types
|
|
203
|
+
)
|
|
204
|
+
if (
|
|
205
|
+
mistral_response is None
|
|
206
|
+
or mistral_response.choices is None
|
|
207
|
+
or mistral_response.choices[0].message is None
|
|
208
|
+
or mistral_response.choices[0].message.content is None
|
|
209
|
+
):
|
|
210
|
+
return ""
|
|
211
|
+
|
|
212
|
+
response = mistral_response.choices[0].message.content
|
|
213
|
+
|
|
214
|
+
finish_reason = mistral_response.choices[
|
|
215
|
+
0
|
|
216
|
+
].finish_reason # Enum: "stop" "length" "model_length" "error" "tool_calls"
|
|
217
|
+
|
|
218
|
+
if finish_reason == "length":
|
|
219
|
+
raise ValueError(
|
|
220
|
+
"Mistral response reached max token limit. Resulting evaluation is likely invalid or of low quality."
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
if not isinstance(response, str):
|
|
224
|
+
raise TypeError("Mistral AI response was not a string.")
|
|
225
|
+
|
|
226
|
+
return response
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from valor_lite.text_generation.llm.exceptions import InvalidLLMResponseError
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def trim_and_load_json(text: str) -> dict[str, Any]:
|
|
9
|
+
"""
|
|
10
|
+
Trims and loads input_string as a dictionary. Adapted from DeepEval https://github.com/confident-ai/deepeval/blob/dc117a5ea2160dbb61909c537908a41f7da4dfe7/deepeval/metrics/utils.py#L50
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
input_string : str
|
|
15
|
+
The input string to trim and load as a json.
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
dict
|
|
20
|
+
A dictionary.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
pattern = r"\{[\s\S]*\}"
|
|
24
|
+
match = re.search(pattern, text)
|
|
25
|
+
if not match:
|
|
26
|
+
raise InvalidLLMResponseError(
|
|
27
|
+
f"LLM did not include valid brackets in its response: {text}"
|
|
28
|
+
)
|
|
29
|
+
extracted_text = match.group()
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
return json.loads(extracted_text)
|
|
33
|
+
except json.JSONDecodeError as e:
|
|
34
|
+
raise InvalidLLMResponseError(
|
|
35
|
+
f"Evaluation LLM responded with invalid JSON. JSONDecodeError: {str(e)}"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def find_first_signed_integer(text: str) -> int | None:
|
|
40
|
+
match = re.search(r"-?\d+", text)
|
|
41
|
+
if not match:
|
|
42
|
+
return None
|
|
43
|
+
return int(match.group())
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from valor_lite.text_generation.llm.exceptions import InvalidLLMResponseError
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def validate_statements(
|
|
5
|
+
response: dict[str, list[dict[str, str]]],
|
|
6
|
+
key: str,
|
|
7
|
+
allowed_values: set[str] | None = None,
|
|
8
|
+
enforce_length: int | None = None,
|
|
9
|
+
):
|
|
10
|
+
if key not in response:
|
|
11
|
+
raise InvalidLLMResponseError(
|
|
12
|
+
f"LLM did not include key '{key}' in its response: {response}"
|
|
13
|
+
)
|
|
14
|
+
elif (
|
|
15
|
+
not isinstance(key, str)
|
|
16
|
+
or not isinstance(response[key], list)
|
|
17
|
+
or not all([isinstance(v, str) for v in response[key]])
|
|
18
|
+
):
|
|
19
|
+
raise InvalidLLMResponseError(
|
|
20
|
+
f"LLM response should follow the format 'dict[str, list[str]': {response}"
|
|
21
|
+
)
|
|
22
|
+
elif allowed_values is not None and not all(
|
|
23
|
+
[v in allowed_values for v in response[key]]
|
|
24
|
+
):
|
|
25
|
+
raise InvalidLLMResponseError(
|
|
26
|
+
f"LLM response contains values from outside the allowed set {allowed_values}: {response}"
|
|
27
|
+
)
|
|
28
|
+
elif enforce_length is not None and enforce_length != len(response[key]):
|
|
29
|
+
raise InvalidLLMResponseError(
|
|
30
|
+
f"LLM response does not match input size of '{enforce_length}': {response}"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def validate_verdicts(
|
|
35
|
+
response: dict[str, list[dict[str, str]]],
|
|
36
|
+
key: str,
|
|
37
|
+
allowed_values: set[str] | None = None,
|
|
38
|
+
enforce_length: int | None = None,
|
|
39
|
+
):
|
|
40
|
+
if key not in response:
|
|
41
|
+
raise InvalidLLMResponseError(
|
|
42
|
+
f"LLM did not include key '{key}' in its response: {response}"
|
|
43
|
+
)
|
|
44
|
+
elif not isinstance(key, str) or not isinstance(response[key], list):
|
|
45
|
+
raise InvalidLLMResponseError(
|
|
46
|
+
f"LLM response should follow the format 'dict[str, list[dict[str, str]]]': {response}"
|
|
47
|
+
)
|
|
48
|
+
elif enforce_length is not None and enforce_length != len(response[key]):
|
|
49
|
+
raise InvalidLLMResponseError(
|
|
50
|
+
f"LLM response does not match input size of '{enforce_length}': {response}"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
for value in response[key]:
|
|
54
|
+
if not isinstance(value, dict):
|
|
55
|
+
raise InvalidLLMResponseError(
|
|
56
|
+
f"LLM response should follow the format 'dict[str, list[dict[str, str]]]': {response}"
|
|
57
|
+
)
|
|
58
|
+
elif set(value.keys()) != {"verdict", "analysis"}:
|
|
59
|
+
raise InvalidLLMResponseError(
|
|
60
|
+
f"LLM response is malformed. Inner dictionaries should only contain keys 'verdict' and 'analysis': {response} "
|
|
61
|
+
)
|
|
62
|
+
elif (
|
|
63
|
+
allowed_values is not None
|
|
64
|
+
and value["verdict"] not in allowed_values
|
|
65
|
+
):
|
|
66
|
+
raise InvalidLLMResponseError(
|
|
67
|
+
f"LLM response contains verdicts from outside the allowed set {allowed_values}: {response}"
|
|
68
|
+
)
|