unique_toolkit 0.7.11__py3-none-any.whl → 0.7.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,57 @@
1
- from unique_toolkit.language_model import LanguageModel, LanguageModelName
1
+ from typing import Annotated
2
2
 
3
+ from pydantic import BeforeValidator, PlainSerializer
3
4
 
4
- def validate_and_init_language_model(value: LanguageModelName | LanguageModel | str):
5
- if isinstance(value, LanguageModel):
6
- return value
5
+ from unique_toolkit.language_model import LanguageModelName
6
+ from unique_toolkit.language_model.infos import (
7
+ LanguageModelInfo,
8
+ LanguageModelProvider,
9
+ )
7
10
 
8
- return LanguageModel(value)
11
+ # TODO @klcd: Inform on deprecation of str as input
12
+ LMI = Annotated[
13
+ LanguageModelInfo,
14
+ BeforeValidator(
15
+ lambda v: validate_and_init_language_model_info(v),
16
+ json_schema_input_type=str | LanguageModelName | LanguageModelInfo,
17
+ ),
18
+ PlainSerializer(
19
+ lambda v: serialize_lmi(v),
20
+ when_used="json",
21
+ return_type=str | LanguageModelInfo,
22
+ ),
23
+ ]
24
+
25
+
26
+ def serialize_lmi(model: LanguageModelInfo) -> str | LanguageModelInfo:
27
+ if model.provider == LanguageModelProvider.CUSTOM:
28
+ return model
29
+
30
+ return model.name
31
+
32
+
33
+ def validate_and_init_language_model_info(
34
+ v: str | LanguageModelName | LanguageModelInfo,
35
+ ) -> LanguageModelInfo:
36
+ """Validate and initialize a LanguageModelInfo object.
37
+
38
+ Args:
39
+ v: The input value to validate and initialize.
40
+
41
+ Returns:
42
+ LanguageModelInfo: The validated and initialized LanguageModelInfo object.
43
+
44
+ """
45
+ if isinstance(v, LanguageModelName):
46
+ return LanguageModelInfo.from_name(v)
47
+ if isinstance(v, str):
48
+ if v in [name.value for name in LanguageModelName]:
49
+ return LanguageModelInfo.from_name(LanguageModelName(v))
50
+
51
+ return LanguageModelInfo(
52
+ name=v,
53
+ version="custom",
54
+ provider=LanguageModelProvider.CUSTOM,
55
+ )
56
+
57
+ return v
@@ -1,35 +1,26 @@
1
1
  from humps import camelize
2
- from pydantic import BaseModel, ConfigDict, field_validator
2
+ from pydantic import BaseModel, ConfigDict
3
3
 
4
- from unique_toolkit._common.validators import validate_and_init_language_model
4
+ from unique_toolkit._common.validators import LMI, LanguageModelInfo
5
5
  from unique_toolkit.evaluators.schemas import (
6
6
  EvaluationMetricName,
7
7
  )
8
8
  from unique_toolkit.language_model.infos import (
9
- LanguageModel,
10
9
  LanguageModelName,
11
10
  )
12
11
 
13
- model_config = ConfigDict(
14
- alias_generator=camelize,
15
- populate_by_name=True,
16
- arbitrary_types_allowed=True,
17
- validate_default=True,
18
- json_encoders={LanguageModel: lambda v: v.display_name},
19
- )
20
-
21
12
 
22
13
  class EvaluationMetricConfig(BaseModel):
23
- model_config = model_config
14
+ model_config = ConfigDict(
15
+ alias_generator=camelize,
16
+ populate_by_name=True,
17
+ validate_default=True,
18
+ )
24
19
 
25
20
  enabled: bool = False
26
21
  name: EvaluationMetricName
27
- language_model: LanguageModel = LanguageModel(
28
- LanguageModelName.AZURE_GPT_35_TURBO_0125
22
+ language_model: LMI = LanguageModelInfo.from_name(
23
+ LanguageModelName.AZURE_GPT_35_TURBO_0125,
29
24
  )
30
25
  custom_prompts: dict[str, str] = {}
31
26
  score_to_emoji: dict[str, str] = {}
32
-
33
- @field_validator("language_model", mode="before")
34
- def validate_language_model(cls, value: LanguageModelName | LanguageModel):
35
- return validate_and_init_language_model(value)
@@ -7,7 +7,7 @@ from unique_toolkit.evaluators.schemas import (
7
7
  EvaluationMetricInputFieldName,
8
8
  EvaluationMetricName,
9
9
  )
10
- from unique_toolkit.language_model.infos import LanguageModel
10
+ from unique_toolkit.language_model.infos import LanguageModelInfo
11
11
  from unique_toolkit.language_model.service import LanguageModelName
12
12
 
13
13
  SYSTEM_MSG_KEY = "systemPrompt"
@@ -23,7 +23,9 @@ context_relevancy_required_input_fields = [
23
23
  default_config = EvaluationMetricConfig(
24
24
  enabled=False,
25
25
  name=EvaluationMetricName.CONTEXT_RELEVANCY,
26
- language_model=LanguageModel(LanguageModelName.AZURE_GPT_35_TURBO_0125),
26
+ language_model=LanguageModelInfo.from_name(
27
+ LanguageModelName.AZURE_GPT_35_TURBO_0125
28
+ ),
27
29
  score_to_emoji={"LOW": "🟢", "MEDIUM": "🟡", "HIGH": "🔴"},
28
30
  custom_prompts={
29
31
  SYSTEM_MSG_KEY: CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG,
@@ -22,6 +22,7 @@ from unique_toolkit.evaluators.schemas import (
22
22
  EvaluationMetricName,
23
23
  EvaluationMetricResult,
24
24
  )
25
+ from unique_toolkit.language_model import LanguageModelName
25
26
  from unique_toolkit.language_model.schemas import (
26
27
  LanguageModelMessages,
27
28
  LanguageModelSystemMessage,
@@ -34,12 +35,12 @@ logger = logging.getLogger(__name__)
34
35
 
35
36
  async def check_context_relevancy_async(
36
37
  company_id: str,
37
- input: EvaluationMetricInput,
38
+ evaluation_metric_input: EvaluationMetricInput,
38
39
  config: EvaluationMetricConfig,
39
40
  logger: logging.Logger = logger,
40
41
  ) -> EvaluationMetricResult | None:
41
- """
42
- Analyzes the relevancy of the context provided for the given input and output.
42
+ """Analyzes the relevancy of the context provided for the given evaluation_metric_input and output.
43
+
43
44
  The analysis classifies the context relevancy level as:
44
45
  - low
45
46
  - medium
@@ -47,14 +48,14 @@ async def check_context_relevancy_async(
47
48
 
48
49
  This method performs the following steps:
49
50
  1. Logs the start of the analysis using the provided `logger`.
50
- 2. Validates the required fields in the `input` data.
51
+ 2. Validates the required fields in the `evaluation_metric_input` data.
51
52
  3. Retrieves the messages using the `_get_msgs` method.
52
53
  4. Calls `LanguageModelService.complete_async_util` to get a completion result.
53
54
  5. Parses and returns the evaluation metric result based on the content of the completion result.
54
55
 
55
56
  Args:
56
57
  company_id (str): The company ID for the analysis.
57
- input (EvaluationMetricInput): The input data used for evaluation, including the generated output and reference information.
58
+ evaluation_metric_input (EvaluationMetricInput): The evaluation_metric_input data used for evaluation, including the generated output and reference information.
58
59
  config (EvaluationMetricConfig): Configuration settings for the evaluation.
59
60
  logger (Optional[logging.Logger], optional): The logger used for logging information and errors. Defaults to the logger for the current module.
60
61
 
@@ -63,13 +64,23 @@ async def check_context_relevancy_async(
63
64
 
64
65
  Raises:
65
66
  EvaluatorException: If required fields are missing or an error occurs during the evaluation.
67
+
66
68
  """
67
- model_name = config.language_model.name
68
- logger.info(f"Analyzing context relevancy with {model_name}.")
69
+ model_group_name = (
70
+ config.language_model.name.value
71
+ if isinstance(config.language_model.name, LanguageModelName)
72
+ else config.language_model.name
73
+ )
74
+ logger.info(f"Analyzing context relevancy with {model_group_name}.")
69
75
 
70
- input.validate_required_fields(context_relevancy_required_input_fields)
76
+ evaluation_metric_input.validate_required_fields(
77
+ context_relevancy_required_input_fields,
78
+ )
71
79
 
72
- if input.context_texts and len(input.context_texts) == 0:
80
+ if (
81
+ evaluation_metric_input.context_texts
82
+ and len(evaluation_metric_input.context_texts) == 0
83
+ ):
73
84
  error_message = "No context texts provided."
74
85
  raise EvaluatorException(
75
86
  user_message=error_message,
@@ -77,11 +88,11 @@ async def check_context_relevancy_async(
77
88
  )
78
89
 
79
90
  try:
80
- msgs = _get_msgs(input, config)
91
+ msgs = _get_msgs(evaluation_metric_input, config)
81
92
  result = await LanguageModelService.complete_async_util(
82
93
  company_id=company_id,
83
94
  messages=msgs,
84
- model_name=model_name,
95
+ model_name=model_group_name,
85
96
  )
86
97
  result_content = result.choices[0].message.content
87
98
  if not result_content:
@@ -104,25 +115,28 @@ async def check_context_relevancy_async(
104
115
 
105
116
 
106
117
  def _get_msgs(
107
- input: EvaluationMetricInput,
118
+ evaluation_metric_input: EvaluationMetricInput,
108
119
  config: EvaluationMetricConfig,
109
- ):
110
- """
111
- Composes the messages for context relevancy analysis based on the provided input and configuration.
120
+ ) -> LanguageModelMessages:
121
+ """Composes the messages for context relevancy analysis.
122
+
123
+ The messages are based on the provided evaluation_metric_input and configuration.
112
124
 
113
125
  Args:
114
- input (EvaluationMetricInput): The input data that includes context texts for the analysis.
126
+ evaluation_metric_input (EvaluationMetricInput): The evaluation_metric_input data that includes context texts for the analysis.
115
127
  config (EvaluationMetricConfig): The configuration settings for composing messages.
116
128
 
117
129
  Returns:
118
- LanguageModelMessages: The composed messages as per the provided input and configuration.
130
+ LanguageModelMessages: The composed messages as per the provided evaluation_metric_input and configuration.
131
+
119
132
  """
120
133
  system_msg_content = _get_system_prompt(config)
121
134
  system_msg = LanguageModelSystemMessage(content=system_msg_content)
122
135
 
123
136
  user_msg_templ = Template(_get_user_prompt(config))
124
137
  user_msg_content = user_msg_templ.substitute(
125
- input_text=input.input_text, contexts_text=input.get_joined_context_texts()
138
+ evaluation_metric_input_text=evaluation_metric_input.evaluation_metric_input_text,
139
+ contexts_text=evaluation_metric_input.get_joined_context_texts(),
126
140
  )
127
141
  user_msg = LanguageModelUserMessage(content=user_msg_content)
128
142
  return LanguageModelMessages([system_msg, user_msg])
@@ -10,7 +10,7 @@ from unique_toolkit.evaluators.schemas import (
10
10
  EvaluationMetricName,
11
11
  )
12
12
  from unique_toolkit.language_model.infos import (
13
- LanguageModel,
13
+ LanguageModelInfo,
14
14
  LanguageModelName,
15
15
  )
16
16
 
@@ -23,7 +23,7 @@ USER_MSG_DEFAULT_KEY = "userPromptDefault"
23
23
  hallucination_metric_default_config = EvaluationMetricConfig(
24
24
  enabled=False,
25
25
  name=EvaluationMetricName.HALLUCINATION,
26
- language_model=LanguageModel(LanguageModelName.AZURE_GPT_4_0613),
26
+ language_model=LanguageModelInfo.from_name(LanguageModelName.AZURE_GPT_4_0613),
27
27
  score_to_emoji={"LOW": "🟢", "MEDIUM": "🟡", "HIGH": "🔴"},
28
28
  custom_prompts={
29
29
  SYSTEM_MSG_KEY: HALLUCINATION_METRIC_SYSTEM_MSG,
@@ -20,6 +20,7 @@ from unique_toolkit.evaluators.schemas import (
20
20
  EvaluationMetricName,
21
21
  EvaluationMetricResult,
22
22
  )
23
+ from unique_toolkit.language_model import LanguageModelName
23
24
  from unique_toolkit.language_model.schemas import (
24
25
  LanguageModelMessages,
25
26
  LanguageModelSystemMessage,
@@ -43,8 +44,9 @@ async def check_hallucination_async(
43
44
  config: EvaluationMetricConfig,
44
45
  logger: logging.Logger = logger,
45
46
  ) -> EvaluationMetricResult | None:
46
- """
47
- Analyzes the level of hallucination in the generated output by comparing it with the provided input
47
+ """Analyze the level of hallucination in the generated output.
48
+
49
+ by comparing it with the provided input
48
50
  and the contexts or history. The analysis classifies the hallucination level as:
49
51
  - low
50
52
  - medium
@@ -72,16 +74,23 @@ async def check_hallucination_async(
72
74
 
73
75
  Raises:
74
76
  EvaluatorException: If the context texts are empty, required fields are missing, or an error occurs during the evaluation.
77
+
75
78
  """
76
- model_name = config.language_model.name
77
- logger.info(f"Analyzing level of hallucination with {model_name}.")
79
+ model_group_name = (
80
+ config.language_model.name.value
81
+ if isinstance(config.language_model.name, LanguageModelName)
82
+ else config.language_model.name
83
+ )
84
+ logger.info(f"Analyzing level of hallucination with {model_group_name}.")
78
85
 
79
86
  input.validate_required_fields(hallucination_required_input_fields)
80
87
 
81
88
  try:
82
89
  msgs = _get_msgs(input, config, logger)
83
90
  result = await LanguageModelService.complete_async_util(
84
- company_id=company_id, messages=msgs, model_name=model_name
91
+ company_id=company_id,
92
+ messages=msgs,
93
+ model_name=model_group_name,
85
94
  )
86
95
  result_content = result.choices[0].message.content
87
96
  if not result_content:
@@ -104,71 +113,72 @@ async def check_hallucination_async(
104
113
 
105
114
 
106
115
  def _get_msgs(
107
- input: EvaluationMetricInput,
116
+ evaluation_metric_input: EvaluationMetricInput,
108
117
  config: EvaluationMetricConfig,
109
118
  logger: logging.Logger,
110
119
  ):
111
- """
112
- Composes the messages for hallucination analysis based on the provided input and configuration.
120
+ """Composes the messages for hallucination analysis based on the provided evaluation_metric_input and configuration.
113
121
 
114
122
  This method decides how to compose the messages based on the availability of context texts and history
115
- message texts in the `input`
123
+ message texts in the `evaluation_metric_input`
116
124
 
117
125
  Args:
118
- input (EvaluationMetricInput): The input data that includes context texts and history message texts
126
+ evaluation_metric_input (EvaluationMetricInput): The evaluation_metric_input data that includes context texts and history message texts
119
127
  for the analysis.
120
128
  config (EvaluationMetricConfig): The configuration settings for composing messages.
121
129
  logger (Optional[logging.Logger], optional): The logger used for logging debug information.
122
130
  Defaults to the logger for the current module.
123
131
 
124
132
  Returns:
125
- The composed messages as per the provided input and configuration. The exact type and structure
133
+ The composed messages as per the provided evaluation_metric_input and configuration. The exact type and structure
126
134
  depend on the implementation of the `compose_msgs` and `compose_msgs_default` methods.
127
135
 
128
136
  """
129
- if input.context_texts or input.history_messages:
137
+ if (
138
+ evaluation_metric_input.context_texts
139
+ or evaluation_metric_input.history_messages
140
+ ):
130
141
  logger.debug("Using context / history for hallucination evaluation.")
131
- return _compose_msgs(input, config)
132
- else:
133
- logger.debug("No contexts and history provided for hallucination evaluation.")
134
- return _compose_msgs_default(input, config)
142
+ return _compose_msgs(evaluation_metric_input, config)
143
+ logger.debug("No contexts and history provided for hallucination evaluation.")
144
+ return _compose_msgs_default(evaluation_metric_input, config)
135
145
 
136
146
 
137
147
  def _compose_msgs(
138
- input: EvaluationMetricInput,
148
+ evaluation_metric_input: EvaluationMetricInput,
139
149
  config: EvaluationMetricConfig,
140
150
  ):
141
- """
142
- Composes the hallucination analysis messages.
143
- """
151
+ """Composes the hallucination analysis messages."""
144
152
  system_msg_content = _get_system_prompt_with_contexts(config)
145
153
  system_msg = LanguageModelSystemMessage(content=system_msg_content)
146
154
 
147
155
  user_msg_templ = Template(_get_user_prompt_with_contexts(config))
148
156
  user_msg_content = user_msg_templ.substitute(
149
- input_text=input.input_text,
150
- contexts_text=input.get_joined_context_texts(tag_name="reference"),
151
- history_messages_text=input.get_joined_history_texts(tag_name="conversation"),
152
- output_text=input.output_text,
157
+ evaluation_metric_input_text=evaluation_metric_input.evaluation_metric_input_text,
158
+ contexts_text=evaluation_metric_input.get_joined_context_texts(
159
+ tag_name="reference",
160
+ ),
161
+ history_messages_text=evaluation_metric_input.get_joined_history_texts(
162
+ tag_name="conversation",
163
+ ),
164
+ output_text=evaluation_metric_input.output_text,
153
165
  )
154
166
  user_msg = LanguageModelUserMessage(content=user_msg_content)
155
167
  return LanguageModelMessages([system_msg, user_msg])
156
168
 
157
169
 
158
170
  def _compose_msgs_default(
159
- input: EvaluationMetricInput,
171
+ evaluation_metric_input: EvaluationMetricInput,
160
172
  config: EvaluationMetricConfig,
161
173
  ):
162
- """
163
- Composes the hallucination analysis prompt without messages.
164
- """
174
+ """Composes the hallucination analysis prompt without messages."""
165
175
  system_msg_content = _get_system_prompt_default(config)
166
176
  system_msg = LanguageModelSystemMessage(content=system_msg_content)
167
177
 
168
178
  user_msg_templ = Template(_get_user_prompt_default(config))
169
179
  user_msg_content = user_msg_templ.substitute(
170
- input_text=input.input_text,
171
- output_text=input.output_text,
180
+ evaluation_metric_input_text=evaluation_metric_input.evaluation_metric_input_text,
181
+ output_text=evaluation_metric_input.output_text,
172
182
  )
173
183
  user_msg = LanguageModelUserMessage(content=user_msg_content)
174
184
  return LanguageModelMessages([system_msg, user_msg])
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import Type, cast
2
+ from typing import cast
3
3
 
4
4
  import unique_sdk
5
5
  from pydantic import BaseModel
@@ -29,11 +29,10 @@ def complete(
29
29
  timeout: int = DEFAULT_COMPLETE_TIMEOUT,
30
30
  tools: list[LanguageModelTool] | None = None,
31
31
  other_options: dict | None = None,
32
- structured_output_model: Type[BaseModel] | None = None,
32
+ structured_output_model: type[BaseModel] | None = None,
33
33
  structured_output_enforce_schema: bool = False,
34
34
  ) -> LanguageModelResponse:
35
- """
36
- Calls the completion endpoint synchronously without streaming the response.
35
+ """Call the completion endpoint synchronously without streaming the response.
37
36
 
38
37
  Args:
39
38
  company_id (str): The company ID associated with the request.
@@ -46,6 +45,7 @@ def complete(
46
45
 
47
46
  Returns:
48
47
  LanguageModelResponse: The response object containing the completed result.
48
+
49
49
  """
50
50
  options, model, messages_dict, _ = _prepare_completion_params_util(
51
51
  messages=messages,
@@ -62,7 +62,7 @@ def complete(
62
62
  company_id=company_id,
63
63
  model=model,
64
64
  messages=cast(
65
- list[unique_sdk.Integrated.ChatCompletionRequestMessage],
65
+ "list[unique_sdk.Integrated.ChatCompletionRequestMessage]",
66
66
  messages_dict,
67
67
  ),
68
68
  timeout=timeout,
@@ -82,11 +82,10 @@ async def complete_async(
82
82
  timeout: int = DEFAULT_COMPLETE_TIMEOUT,
83
83
  tools: list[LanguageModelTool] | None = None,
84
84
  other_options: dict | None = None,
85
- structured_output_model: Type[BaseModel] | None = None,
85
+ structured_output_model: type[BaseModel] | None = None,
86
86
  structured_output_enforce_schema: bool = False,
87
87
  ) -> LanguageModelResponse:
88
- """
89
- Calls the completion endpoint asynchronously without streaming the response.
88
+ """Call the completion endpoint asynchronously without streaming the response.
90
89
 
91
90
  This method sends a request to the completion endpoint using the provided messages, model name,
92
91
  temperature, timeout, and optional tools. It returns a `LanguageModelResponse` object containing
@@ -105,7 +104,9 @@ async def complete_async(
105
104
  LanguageModelResponse: The response object containing the completed result.
106
105
 
107
106
  Raises:
108
- Exception: If an error occurs during the request, an exception is raised and logged.
107
+ Exception: If an error occurs during the request, an exception is raised
108
+ and logged.
109
+
109
110
  """
110
111
  options, model, messages_dict, _ = _prepare_completion_params_util(
111
112
  messages=messages,
@@ -122,7 +123,7 @@ async def complete_async(
122
123
  company_id=company_id,
123
124
  model=model,
124
125
  messages=cast(
125
- list[unique_sdk.Integrated.ChatCompletionRequestMessage],
126
+ "list[unique_sdk.Integrated.ChatCompletionRequestMessage]",
126
127
  messages_dict,
127
128
  ),
128
129
  timeout=timeout,
@@ -130,7 +131,7 @@ async def complete_async(
130
131
  )
131
132
  return LanguageModelResponse(**response)
132
133
  except Exception as e:
133
- logger.error(f"Error completing: {e}") # type: ignore
134
+ logger.exception(f"Error completing: {e}")
134
135
  raise e
135
136
 
136
137
 
@@ -163,14 +164,14 @@ def _to_search_context(chunks: list[ContentChunk]) -> dict | None:
163
164
  endPage=chunk.end_page,
164
165
  order=chunk.order,
165
166
  object=chunk.object,
166
- ) # type: ignore
167
+ )
167
168
  for chunk in chunks
168
169
  ]
169
170
 
170
171
 
171
172
  def _add_response_format_to_options(
172
173
  options: dict,
173
- structured_output_model: Type[BaseModel],
174
+ structured_output_model: type[BaseModel],
174
175
  structured_output_enforce_schema: bool = False,
175
176
  ) -> dict:
176
177
  options["responseFormat"] = {
@@ -191,11 +192,10 @@ def _prepare_completion_params_util(
191
192
  tools: list[LanguageModelTool] | None = None,
192
193
  other_options: dict | None = None,
193
194
  content_chunks: list[ContentChunk] | None = None,
194
- structured_output_model: Type[BaseModel] | None = None,
195
+ structured_output_model: type[BaseModel] | None = None,
195
196
  structured_output_enforce_schema: bool = False,
196
197
  ) -> tuple[dict, str, dict, dict | None]:
197
- """
198
- Prepares common parameters for completion requests.
198
+ """Prepare common parameters for completion requests.
199
199
 
200
200
  Returns:
201
201
  tuple containing:
@@ -203,18 +203,22 @@ def _prepare_completion_params_util(
203
203
  - model (str): Resolved model name
204
204
  - messages_dict (dict): Processed messages
205
205
  - search_context (dict | None): Processed content chunks if provided
206
- """
207
206
 
207
+ """
208
208
  options = _add_tools_to_options({}, tools)
209
209
  if structured_output_model:
210
210
  options = _add_response_format_to_options(
211
- options, structured_output_model, structured_output_enforce_schema
211
+ options,
212
+ structured_output_model,
213
+ structured_output_enforce_schema,
212
214
  )
213
215
  options["temperature"] = temperature
214
216
  if other_options:
215
217
  options.update(other_options)
216
218
 
217
- model = model_name.name if isinstance(model_name, LanguageModelName) else model_name
219
+ model = (
220
+ model_name.value if isinstance(model_name, LanguageModelName) else model_name
221
+ )
218
222
 
219
223
  # Different methods need different message dump parameters
220
224
  messages_dict = messages.model_dump(
@@ -23,6 +23,15 @@ class LanguageModelName(StrEnum):
23
23
  AZURE_o3_MINI_2025_0131 = "AZURE_o3_MINI_2025_0131"
24
24
  AZURE_GPT_45_PREVIEW_2025_0227 = "AZURE_GPT_45_PREVIEW_2025_0227"
25
25
  AZURE_GPT_41_2025_0414 = "AZURE_GPT_41_2025_0414"
26
+ AZURE_o3_2025_0416 = "AZURE_o3_2025_0416"
27
+ AZURE_o4_MINI_2025_0416 = "AZURE_o4_MINI_2025_0416"
28
+ ANTHROPIC_CLAUDE_3_7_SONNET = "litellm:anthropic-claude-3-7-sonnet"
29
+ ANTHROPIC_CLAUDE_3_7_SONNET_THINKING = (
30
+ "litellm:anthropic-claude-3-7-sonnet-thinking"
31
+ )
32
+ GEMINI_2_0_FLASH = "litellm:gemini-2-0-flash"
33
+ GEMINI_2_5_FLASH_PREVIEW_0417 = "litellm:gemini-2-5-flash-preview-04-17"
34
+ GEMINI_2_5_PRO_EXP_0325 = "litellm:gemini-2-5-pro-exp-03-25"
26
35
 
27
36
 
28
37
  class EncoderName(StrEnum):
@@ -57,6 +66,7 @@ def get_encoder_name(model_name: LanguageModelName) -> EncoderName:
57
66
  class LanguageModelProvider(StrEnum):
58
67
  AZURE = "AZURE"
59
68
  CUSTOM = "CUSTOM"
69
+ LITELLM = "LITELLM"
60
70
 
61
71
 
62
72
  class ModelCapabilities(StrEnum):
@@ -290,6 +300,44 @@ class LanguageModelInfo(BaseModel):
290
300
  info_cutoff_at=date(2023, 10, 1),
291
301
  published_at=date(2025, 1, 31),
292
302
  )
303
+ case LanguageModelName.AZURE_o3_2025_0416:
304
+ return cls(
305
+ name=model_name,
306
+ capabilities=[
307
+ ModelCapabilities.STRUCTURED_OUTPUT,
308
+ ModelCapabilities.FUNCTION_CALLING,
309
+ ModelCapabilities.STREAMING,
310
+ ModelCapabilities.REASONING,
311
+ ModelCapabilities.VISION,
312
+ ],
313
+ provider=LanguageModelProvider.AZURE,
314
+ version="2025-04-16",
315
+ encoder_name=EncoderName.O200K_BASE,
316
+ token_limits=LanguageModelTokenLimits(
317
+ token_limit_input=200_000, token_limit_output=100_000
318
+ ),
319
+ info_cutoff_at=date(2024, 5, 31),
320
+ published_at=date(2025, 4, 16),
321
+ )
322
+ case LanguageModelName.AZURE_o4_MINI_2025_0416:
323
+ return cls(
324
+ name=model_name,
325
+ capabilities=[
326
+ ModelCapabilities.STRUCTURED_OUTPUT,
327
+ ModelCapabilities.FUNCTION_CALLING,
328
+ ModelCapabilities.STREAMING,
329
+ ModelCapabilities.REASONING,
330
+ ModelCapabilities.VISION,
331
+ ],
332
+ provider=LanguageModelProvider.AZURE,
333
+ version="2025-04-16",
334
+ encoder_name=EncoderName.O200K_BASE,
335
+ token_limits=LanguageModelTokenLimits(
336
+ token_limit_input=200_000, token_limit_output=100_000
337
+ ),
338
+ info_cutoff_at=date(2024, 5, 31),
339
+ published_at=date(2025, 4, 16),
340
+ )
293
341
  case LanguageModelName.AZURE_GPT_45_PREVIEW_2025_0227:
294
342
  return cls(
295
343
  name=model_name,
@@ -326,6 +374,98 @@ class LanguageModelInfo(BaseModel):
326
374
  info_cutoff_at=date(2024, 5, 31),
327
375
  published_at=date(2025, 4, 14),
328
376
  )
377
+ case LanguageModelName.ANTHROPIC_CLAUDE_3_7_SONNET:
378
+ return cls(
379
+ name=model_name,
380
+ capabilities=[
381
+ ModelCapabilities.FUNCTION_CALLING,
382
+ ModelCapabilities.STREAMING,
383
+ ModelCapabilities.VISION,
384
+ ],
385
+ provider=LanguageModelProvider.LITELLM,
386
+ version="claude-3-7-sonnet",
387
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
388
+ token_limits=LanguageModelTokenLimits(
389
+ token_limit_input=200_000, token_limit_output=128_000
390
+ ),
391
+ info_cutoff_at=date(2024, 10, 31),
392
+ published_at=date(2025, 2, 24),
393
+ )
394
+ case LanguageModelName.ANTHROPIC_CLAUDE_3_7_SONNET_THINKING:
395
+ return cls(
396
+ name=model_name,
397
+ capabilities=[
398
+ ModelCapabilities.FUNCTION_CALLING,
399
+ ModelCapabilities.STREAMING,
400
+ ModelCapabilities.VISION,
401
+ ModelCapabilities.REASONING,
402
+ ],
403
+ provider=LanguageModelProvider.LITELLM,
404
+ version="claude-3-7-sonnet-thinking",
405
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
406
+ token_limits=LanguageModelTokenLimits(
407
+ token_limit_input=200_000, token_limit_output=128_000
408
+ ),
409
+ info_cutoff_at=date(2024, 10, 31),
410
+ published_at=date(2025, 2, 24),
411
+ )
412
+ case LanguageModelName.GEMINI_2_0_FLASH:
413
+ return cls(
414
+ name=model_name,
415
+ capabilities=[
416
+ ModelCapabilities.FUNCTION_CALLING,
417
+ ModelCapabilities.STREAMING,
418
+ ModelCapabilities.VISION,
419
+ ModelCapabilities.STRUCTURED_OUTPUT,
420
+ ModelCapabilities.REASONING,
421
+ ],
422
+ provider=LanguageModelProvider.LITELLM,
423
+ version="gemini-2-0-flash",
424
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
425
+ token_limits=LanguageModelTokenLimits(
426
+ token_limit_input=1_048_576, token_limit_output=8_192
427
+ ),
428
+ info_cutoff_at=date(2024, 8, 1),
429
+ published_at=date(2025, 2, 1),
430
+ )
431
+ case LanguageModelName.GEMINI_2_5_FLASH_PREVIEW_0417:
432
+ return cls(
433
+ name=model_name,
434
+ capabilities=[
435
+ ModelCapabilities.FUNCTION_CALLING,
436
+ ModelCapabilities.STREAMING,
437
+ ModelCapabilities.VISION,
438
+ ModelCapabilities.STRUCTURED_OUTPUT,
439
+ ModelCapabilities.REASONING,
440
+ ],
441
+ provider=LanguageModelProvider.LITELLM,
442
+ version="gemini-2-5-flash-preview-04-17",
443
+ encoder_name=EncoderName.O200K_BASE, # TODO:Replace with LLM tokenizer
444
+ token_limits=LanguageModelTokenLimits(
445
+ token_limit_input=1_048_576, token_limit_output=65_536
446
+ ),
447
+ info_cutoff_at=date(2025, 1, day=1),
448
+ published_at=date(2025, 4, 1),
449
+ )
450
+ case LanguageModelName.GEMINI_2_5_PRO_EXP_0325:
451
+ return cls(
452
+ name=model_name,
453
+ capabilities=[
454
+ ModelCapabilities.FUNCTION_CALLING,
455
+ ModelCapabilities.STREAMING,
456
+ ModelCapabilities.VISION,
457
+ ModelCapabilities.STRUCTURED_OUTPUT,
458
+ ModelCapabilities.REASONING,
459
+ ],
460
+ provider=LanguageModelProvider.LITELLM,
461
+ version="gemini-2-5-pro-exp-0325",
462
+ encoder_name=EncoderName.O200K_BASE, # TODO: Update encoder with litellm
463
+ token_limits=LanguageModelTokenLimits(
464
+ token_limit_input=1_048_576, token_limit_output=65_536
465
+ ),
466
+ info_cutoff_at=date(2025, 1, day=1),
467
+ published_at=date(2025, 3, 1),
468
+ )
329
469
  case _:
330
470
  if isinstance(model_name, LanguageModelName):
331
471
  raise ValueError(
@@ -352,8 +492,10 @@ class LanguageModelInfo(BaseModel):
352
492
 
353
493
  @deprecated(
354
494
  """
355
- Use `LanguageModelInfo` instead of `LanguageModel`
356
- """
495
+ Use `LanguageModelInfo` instead of `LanguageModel`.
496
+
497
+ `LanguageModel` will be deprecated on 31.12.2025
498
+ """,
357
499
  )
358
500
  class LanguageModel:
359
501
  _info: ClassVar[LanguageModelInfo]
@@ -363,8 +505,8 @@ class LanguageModel:
363
505
 
364
506
  @property
365
507
  def info(self) -> LanguageModelInfo:
366
- """
367
- Returns all infos about the model:
508
+ """Return all infos about the model.
509
+
368
510
  - name
369
511
  - version
370
512
  - provider
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 0.7.11
3
+ Version: 0.7.14
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Martin Fadler
@@ -111,6 +111,17 @@ All notable changes to this project will be documented in this file.
111
111
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
112
112
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
113
113
 
114
+ ## [0.7.14] - 2025-05-08
115
+ - Fix bug not selecting the correct llm
116
+ - Add LMI type for flexible init of LanguageModelInfo
117
+ - Replace LanguageModel with LanguageModelInfo in hallucination check
118
+
119
+ ## [0.7.13] - 2025-05-07
120
+ - Adding litellm models `litellm:anthropic-claude-3-7-sonnet`, `litellm:anthropic-claude-3-7-sonnet-thinking`, `litellm:gemini-2-0-flash`, `gemini-2-5-flash-preview-04-17` , `litellm:gemini-2-5-pro-exp-03-25`
121
+
122
+ ## [0.7.12] - 2025-05-02
123
+ - add `AZURE_o3_2025_0416` and `AZURE_o4_MINI_2025_0416` as part of the models
124
+
114
125
  ## [0.7.11] - 2025-04-28
115
126
  - Removing `STRUCTURED_OUTPUT` capability from `AZURE_GPT_35_TURBO_0125`, `AZURE_GPT_4_TURBO_2024_0409` and `AZURE_GPT_4o_2024_0513`
116
127
 
@@ -3,7 +3,7 @@ unique_toolkit/_common/_base_service.py,sha256=S8H0rAebx7GsOldA7xInLp3aQJt9yEPDQ
3
3
  unique_toolkit/_common/_time_utils.py,sha256=ztmTovTvr-3w71Ns2VwXC65OKUUh-sQlzbHdKTQWm-w,135
4
4
  unique_toolkit/_common/exception.py,sha256=caQIE1btsQnpKCHqL2cgWUSbHup06enQu_Pt7uGUTTE,727
5
5
  unique_toolkit/_common/validate_required_values.py,sha256=Y_M1ub9gIKP9qZ45F6Zq3ZHtuIqhmOjl8Z2Vd3avg8w,588
6
- unique_toolkit/_common/validators.py,sha256=w5lzvRxl0sBTvv0CXLF9UwtJyKmmS2lez0KXaqapgBE,258
6
+ unique_toolkit/_common/validators.py,sha256=l7-hWyRTZ3aF_e73oTQFZdz93s06VhNWVpkERbg2a64,1569
7
7
  unique_toolkit/app/__init__.py,sha256=jgwWfu27U911kZE1yRq920ZULGLAQGycD3222YxUvsY,1182
8
8
  unique_toolkit/app/init_logging.py,sha256=Sh26SRxOj8i8dzobKhYha2lLrkrMTHfB1V4jR3h23gQ,678
9
9
  unique_toolkit/app/init_sdk.py,sha256=Nv4Now4pMfM0AgRhbtatLpm_39rKxn0WmRLwmPhRl-8,1285
@@ -31,24 +31,24 @@ unique_toolkit/embedding/schemas.py,sha256=1GvKCaSk4jixzVQ2PKq8yDqwGEVY_hWclYtoA
31
31
  unique_toolkit/embedding/service.py,sha256=ptwNNe2ji7FGqAb5VayedrB9T5b1T00XABwYtgvlGO8,4076
32
32
  unique_toolkit/embedding/utils.py,sha256=v86lo__bCJbxZBQ3OcLu5SuwT6NbFfWlcq8iyk6BuzQ,279
33
33
  unique_toolkit/evaluators/__init__.py,sha256=3Rfpnowm7MUXHWmeU4UV4s_3Hk-sw3V20oBwQCYlejQ,50
34
- unique_toolkit/evaluators/config.py,sha256=iYiBi7M6u5MG9nVgpxl9dKfoS4j72stA6Hl-MQHmYp8,1056
34
+ unique_toolkit/evaluators/config.py,sha256=_DIXToJ-hGNpDAdWa7Q6GMjAsxiC_DquLF-SS5s9rTE,717
35
35
  unique_toolkit/evaluators/constants.py,sha256=1oI93jsh0R_TjX_8OenliiiywVe3vTooSnaMqtq6R18,27
36
- unique_toolkit/evaluators/context_relevancy/constants.py,sha256=9mAGc23e5XjTYOBfeuZVbaqOyYrvRoXYjfUnsBOVShU,1126
36
+ unique_toolkit/evaluators/context_relevancy/constants.py,sha256=QG2x32LzV42kAkeWTPuLvOX9NlTSxJlsAgDyxomUBmY,1158
37
37
  unique_toolkit/evaluators/context_relevancy/prompts.py,sha256=gTlWP7fDuxhrXhCYNCqXMbCey_DalZMdi5l-a6RHgk0,713
38
38
  unique_toolkit/evaluators/context_relevancy/service.py,sha256=9hzdMuF4A4T97-3X3zcXgrDISLn1bleZ6tTL1bHa9dQ,1722
39
- unique_toolkit/evaluators/context_relevancy/utils.py,sha256=E9ljdRNbwYlx04fQDLvgF4SwxvlTJT0vE328PlUF6KA,5191
39
+ unique_toolkit/evaluators/context_relevancy/utils.py,sha256=qwTkKah6S2hkEGOHxVdQ6RvV6OcjKj4eyd09TcJZlho,5813
40
40
  unique_toolkit/evaluators/exception.py,sha256=7lcVbCyoN4Md1chNJDFxpUYyWbVrcr9dcc3TxWykJTc,115
41
- unique_toolkit/evaluators/hallucination/constants.py,sha256=DEycXlxY9h01D0iF3aU5LIdPrDJ-5OkF0VdXDLn_tSs,1440
41
+ unique_toolkit/evaluators/hallucination/constants.py,sha256=KDhmSlRBnUkfEAFQLaD80rKtj6p-ZJ3L98hqNmNL7xI,1458
42
42
  unique_toolkit/evaluators/hallucination/prompts.py,sha256=9yCpO_WGLDvYfPWKL1VuRA-jt0P_-A-qvLUOmuv-Nks,3320
43
43
  unique_toolkit/evaluators/hallucination/service.py,sha256=k8qro5Lw4Ak58m4HYp3G4HPLIaexeFySIIVvW6fAdeA,2408
44
- unique_toolkit/evaluators/hallucination/utils.py,sha256=4KTJH8low_fBzOcuVlcHB2FRrtIiN8TR6uuU8EGwjJM,7668
44
+ unique_toolkit/evaluators/hallucination/utils.py,sha256=gO2AOzDQwVTev2_5vDKgJ9A6A9e0himJyAta_wglVG8,8326
45
45
  unique_toolkit/evaluators/output_parser.py,sha256=eI72qkzK1dZyUvnfP2SOAQCGBj_-PwX5wy_aLPMsJMY,883
46
46
  unique_toolkit/evaluators/schemas.py,sha256=Jaue6Uhx75X1CyHKWj8sT3RE1JZXTqoLtfLt2xQNCX8,2507
47
47
  unique_toolkit/language_model/__init__.py,sha256=jWko_vQj48wjnpTtlkg8iNdef0SMI3FN2kGywXRTMzg,1880
48
48
  unique_toolkit/language_model/builder.py,sha256=aIAXWWUoB5G-HONJiAt3MdRGd4jdP8nA-HYX2D2WlSI,3048
49
49
  unique_toolkit/language_model/constants.py,sha256=B-topqW0r83dkC_25DeQfnPk3n53qzIHUCBS7YJ0-1U,119
50
- unique_toolkit/language_model/functions.py,sha256=I5jHhHsKoq7GwEQyTrM8LXB2n_6dvMAk7UklenjuHSY,7945
51
- unique_toolkit/language_model/infos.py,sha256=CwdyoHhq645DC2-Y2jwkHS3m5umjmpmb1EQJ70zOdjg,18967
50
+ unique_toolkit/language_model/functions.py,sha256=0oSkG4xpbxeaVTJide6g-zunBrsBRuvp7UQlKVbjpSk,7949
51
+ unique_toolkit/language_model/infos.py,sha256=qPf4Xlanet8jf0apZ6-qxS_6zmDd6p9D40it2TqmF3w,25910
52
52
  unique_toolkit/language_model/prompt.py,sha256=JSawaLjQg3VR-E2fK8engFyJnNdk21zaO8pPIodzN4Q,3991
53
53
  unique_toolkit/language_model/schemas.py,sha256=rrwzUgKANFOrdehCULW8Hh03uRW3tsE5dXpWqxmClfg,8618
54
54
  unique_toolkit/language_model/service.py,sha256=FUf-HTKNslrMAh8qFMco_ZpP-N0t_iAFWK3juldoUe8,8343
@@ -58,7 +58,7 @@ unique_toolkit/short_term_memory/constants.py,sha256=698CL6-wjup2MvU19RxSmQk3gX7
58
58
  unique_toolkit/short_term_memory/functions.py,sha256=3WiK-xatY5nh4Dr5zlDUye1k3E6kr41RiscwtTplw5k,4484
59
59
  unique_toolkit/short_term_memory/schemas.py,sha256=OhfcXyF6ACdwIXW45sKzjtZX_gkcJs8FEZXcgQTNenw,1406
60
60
  unique_toolkit/short_term_memory/service.py,sha256=vEKFxP1SScPrFniso492fVthWR1sosdFibhiNF3zRvI,8081
61
- unique_toolkit-0.7.11.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
62
- unique_toolkit-0.7.11.dist-info/METADATA,sha256=OLYyH8X2yEgAkauCTWjsCJvIiI6JjmJnElRSeqS-Pn8,21641
63
- unique_toolkit-0.7.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
64
- unique_toolkit-0.7.11.dist-info/RECORD,,
61
+ unique_toolkit-0.7.14.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
62
+ unique_toolkit-0.7.14.dist-info/METADATA,sha256=PE6_LSa3whHD6727vPI4Wu2JEuHpPchisWC1RcuFNMA,22172
63
+ unique_toolkit-0.7.14.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
64
+ unique_toolkit-0.7.14.dist-info/RECORD,,