unique_toolkit 0.8.14__tar.gz → 0.8.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/CHANGELOG.md +6 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/PKG-INFO +7 -1
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/pyproject.toml +1 -1
- unique_toolkit-0.8.16/unique_toolkit/_common/default_language_model.py +6 -0
- unique_toolkit-0.8.16/unique_toolkit/_common/token/image_token_counting.py +67 -0
- unique_toolkit-0.8.16/unique_toolkit/_common/token/token_counting.py +196 -0
- unique_toolkit-0.8.16/unique_toolkit/evals/config.py +36 -0
- unique_toolkit-0.8.16/unique_toolkit/evals/context_relevancy/prompts.py +56 -0
- unique_toolkit-0.8.16/unique_toolkit/evals/context_relevancy/schema.py +88 -0
- unique_toolkit-0.8.16/unique_toolkit/evals/context_relevancy/service.py +241 -0
- unique_toolkit-0.8.16/unique_toolkit/evals/hallucination/constants.py +61 -0
- unique_toolkit-0.8.16/unique_toolkit/evals/hallucination/hallucination_evaluation.py +92 -0
- unique_toolkit-0.8.16/unique_toolkit/evals/hallucination/prompts.py +79 -0
- unique_toolkit-0.8.16/unique_toolkit/evals/hallucination/service.py +57 -0
- unique_toolkit-0.8.16/unique_toolkit/evals/hallucination/utils.py +213 -0
- unique_toolkit-0.8.16/unique_toolkit/evals/output_parser.py +48 -0
- unique_toolkit-0.8.16/unique_toolkit/evals/tests/test_context_relevancy_service.py +252 -0
- unique_toolkit-0.8.16/unique_toolkit/evals/tests/test_output_parser.py +80 -0
- unique_toolkit-0.8.16/unique_toolkit/history_manager/history_construction_with_contents.py +307 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/history_manager/history_manager.py +80 -111
- unique_toolkit-0.8.16/unique_toolkit/history_manager/loop_token_reducer.py +457 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/language_model/schemas.py +8 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/reference_manager/reference_manager.py +15 -2
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/LICENSE +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/README.md +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/__init__.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/_common/_base_service.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/_common/_time_utils.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/_common/exception.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/_common/validate_required_values.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/_common/validators.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/app/__init__.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/app/dev_util.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/app/init_logging.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/app/init_sdk.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/app/performance/async_tasks.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/app/performance/async_wrapper.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/app/schemas.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/app/unique_settings.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/app/verification.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/chat/__init__.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/chat/constants.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/chat/functions.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/chat/schemas.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/chat/service.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/chat/state.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/chat/utils.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/content/__init__.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/content/constants.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/content/functions.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/content/schemas.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/content/service.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/content/utils.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/embedding/__init__.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/embedding/constants.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/embedding/functions.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/embedding/schemas.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/embedding/service.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/embedding/utils.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evals/evaluation_manager.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evals/exception.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evals/schemas.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/__init__.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/config.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/constants.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/context_relevancy/constants.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/context_relevancy/prompts.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/context_relevancy/service.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/context_relevancy/utils.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/exception.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/hallucination/constants.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/hallucination/prompts.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/hallucination/service.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/hallucination/utils.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/output_parser.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/evaluators/schemas.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/framework_utilities/langchain/client.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/framework_utilities/langchain/history.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/framework_utilities/openai/client.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/framework_utilities/openai/message_builder.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/framework_utilities/utils.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/history_manager/utils.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/language_model/__init__.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/language_model/builder.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/language_model/constants.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/language_model/functions.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/language_model/infos.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/language_model/prompt.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/language_model/reference.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/language_model/service.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/language_model/utils.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/postprocessor/postprocessor_manager.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/protocols/support.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/short_term_memory/__init__.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/short_term_memory/constants.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/short_term_memory/functions.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/short_term_memory/persistent_short_term_memory_manager.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/short_term_memory/schemas.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/short_term_memory/service.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/smart_rules/__init__.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/smart_rules/compile.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/thinking_manager/thinking_manager.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/tools/agent_chunks_handler.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/tools/config.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/tools/factory.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/tools/schemas.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/tools/test/test_tool_progress_reporter.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/tools/tool.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/tools/tool_manager.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/tools/tool_progress_reporter.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/tools/utils/execution/execution.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/tools/utils/source_handling/schema.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/tools/utils/source_handling/source_formatting.py +0 -0
- {unique_toolkit-0.8.14 → unique_toolkit-0.8.16}/unique_toolkit/tools/utils/source_handling/tests/test_source_formatting.py +0 -0
|
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.8.16] - 2025-08-19
|
|
9
|
+
- moved Hallucination evaluator into toolkit
|
|
10
|
+
|
|
11
|
+
## [0.8.15] - 2025-08-19
|
|
12
|
+
- Added history loading from database for History Manager
|
|
13
|
+
|
|
8
14
|
## [0.8.14] - 2025-08-19
|
|
9
15
|
- Including GPT-5 series deployed via LiteLLM into language model info
|
|
10
16
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unique_toolkit
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.16
|
|
4
4
|
Summary:
|
|
5
5
|
License: Proprietary
|
|
6
6
|
Author: Martin Fadler
|
|
@@ -114,6 +114,12 @@ All notable changes to this project will be documented in this file.
|
|
|
114
114
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
115
115
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
116
116
|
|
|
117
|
+
## [0.8.16] - 2025-08-19
|
|
118
|
+
- moved Hallucination evaluator into toolkit
|
|
119
|
+
|
|
120
|
+
## [0.8.15] - 2025-08-19
|
|
121
|
+
- Added history loading from database for History Manager
|
|
122
|
+
|
|
117
123
|
## [0.8.14] - 2025-08-19
|
|
118
124
|
- Including GPT-5 series deployed via LiteLLM into language model info
|
|
119
125
|
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from unique_toolkit.language_model.infos import LanguageModelName
|
|
2
|
+
|
|
3
|
+
DEFAULT_GPT_35_TURBO = LanguageModelName.AZURE_GPT_35_TURBO_0125
|
|
4
|
+
DEFAULT_GPT_4o = LanguageModelName.AZURE_GPT_4o_2024_1120
|
|
5
|
+
DEFAULT_GPT_4o_STRUCTURED_OUTPUT = LanguageModelName.AZURE_GPT_4o_2024_0806
|
|
6
|
+
DEFAULT_GPT_4o_MINI = LanguageModelName.AZURE_GPT_4o_MINI_2024_0718
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import math
|
|
3
|
+
import re
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from io import BytesIO
|
|
6
|
+
|
|
7
|
+
from PIL import Image
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DetailLevel(Enum):
|
|
11
|
+
LOW = "low"
|
|
12
|
+
HIGH = "high"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# https://platform.openai.com/docs/guides/vision/calculating-costs#calculating-costs
|
|
16
|
+
def calculate_image_tokens(width, height, detail: DetailLevel):
|
|
17
|
+
"""
|
|
18
|
+
Calculate the token cost of an image based on its dimensions and detail level.
|
|
19
|
+
NOTE: While we followed the documentation provided by openai to calculate image token cost, in practice,
|
|
20
|
+
we notice that this function overestimate the number of tokens consumed by the model.
|
|
21
|
+
|
|
22
|
+
Parameters:
|
|
23
|
+
- width (int): The width of the image in pixels.
|
|
24
|
+
- height (int): The height of the image in pixels.
|
|
25
|
+
- detail (str): The detail level, either "low" or "high".
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
- int: The token cost of the image.
|
|
29
|
+
"""
|
|
30
|
+
# Base cost for low detail
|
|
31
|
+
if detail == DetailLevel.LOW:
|
|
32
|
+
return 85
|
|
33
|
+
|
|
34
|
+
# Scaling for high detail
|
|
35
|
+
# Scale down to fit within 2048x2048 square
|
|
36
|
+
max_long_dim = 2048
|
|
37
|
+
long_dim = max(width, height)
|
|
38
|
+
if long_dim > max_long_dim:
|
|
39
|
+
scale_factor = long_dim / max_long_dim
|
|
40
|
+
width = int(width / scale_factor)
|
|
41
|
+
height = int(height / scale_factor)
|
|
42
|
+
|
|
43
|
+
# Scale down the shortest side to 768
|
|
44
|
+
max_short_dim = 768
|
|
45
|
+
short_dim = min(width, height)
|
|
46
|
+
if short_dim > max_short_dim:
|
|
47
|
+
scale_factor = short_dim / max_short_dim
|
|
48
|
+
width = int(width / scale_factor)
|
|
49
|
+
height = int(height / scale_factor)
|
|
50
|
+
|
|
51
|
+
# Step 3: Calculate the number of 512x512 tiles
|
|
52
|
+
tiles = math.ceil(width / 512) * math.ceil(height / 512)
|
|
53
|
+
# Step 4: Compute token cost
|
|
54
|
+
token_cost = (tiles * 170) + 85
|
|
55
|
+
return token_cost
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def calculate_image_tokens_from_base64(base64_string: str):
|
|
59
|
+
base64_string = remove_base64_header(base64_string)
|
|
60
|
+
image = Image.open(BytesIO(base64.b64decode(base64_string)))
|
|
61
|
+
# DETAIL LEVEL HIGH IS THE DEFAULT TO BE ON THE SAFE SIDE
|
|
62
|
+
return calculate_image_tokens(image.width, image.height, DetailLevel.HIGH)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def remove_base64_header(base64_string: str):
|
|
66
|
+
header_pattern = r"^data:image/\w+;base64,"
|
|
67
|
+
return re.sub(header_pattern, "", base64_string)
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
# Original source
|
|
2
|
+
# https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
from typing import Any, Callable
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
from unique_toolkit.language_model import (
|
|
9
|
+
LanguageModelMessage,
|
|
10
|
+
LanguageModelMessages,
|
|
11
|
+
LanguageModelName,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from _common.utils.token.image_token_counting import (
|
|
15
|
+
calculate_image_tokens_from_base64,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SpecialToolCallingTokens(BaseModel):
|
|
20
|
+
func_init: int = 0
|
|
21
|
+
prop_init: int = 0
|
|
22
|
+
prop_key: int = 0
|
|
23
|
+
enum_init: int = 0
|
|
24
|
+
enum_item: int = 0
|
|
25
|
+
func_end: int = 0
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_special_token(model: LanguageModelName) -> SpecialToolCallingTokens:
|
|
29
|
+
special_token = SpecialToolCallingTokens()
|
|
30
|
+
|
|
31
|
+
match model:
|
|
32
|
+
case (
|
|
33
|
+
LanguageModelName.AZURE_GPT_4o_2024_0513
|
|
34
|
+
| LanguageModelName.AZURE_GPT_4o_2024_0806
|
|
35
|
+
| LanguageModelName.AZURE_GPT_4o_MINI_2024_0718
|
|
36
|
+
| LanguageModelName.AZURE_GPT_4o_2024_1120
|
|
37
|
+
):
|
|
38
|
+
special_token.func_init = 7
|
|
39
|
+
special_token.prop_init = 3
|
|
40
|
+
special_token.prop_key = 3
|
|
41
|
+
special_token.enum_init = -3
|
|
42
|
+
special_token.enum_item = 3
|
|
43
|
+
special_token.func_end = 12
|
|
44
|
+
|
|
45
|
+
case (
|
|
46
|
+
LanguageModelName.AZURE_GPT_35_TURBO_0125
|
|
47
|
+
| LanguageModelName.AZURE_GPT_4_0613
|
|
48
|
+
| LanguageModelName.AZURE_GPT_4_32K_0613
|
|
49
|
+
| LanguageModelName.AZURE_GPT_4_TURBO_2024_0409
|
|
50
|
+
):
|
|
51
|
+
special_token.func_init = 10
|
|
52
|
+
special_token.prop_init = 3
|
|
53
|
+
special_token.prop_key = 3
|
|
54
|
+
special_token.enum_init = -3
|
|
55
|
+
special_token.enum_item = 3
|
|
56
|
+
special_token.func_end = 12
|
|
57
|
+
|
|
58
|
+
case _:
|
|
59
|
+
raise NotImplementedError(
|
|
60
|
+
f"""num_tokens_for_tools() is not implemented for model {model}."""
|
|
61
|
+
)
|
|
62
|
+
return special_token
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def num_tokens_per_messages(
|
|
66
|
+
messages: list[dict[str, str]], encode: Callable[[str], list[int]]
|
|
67
|
+
) -> list[int]:
|
|
68
|
+
"""Return the number of tokens used by a list of messages."""
|
|
69
|
+
|
|
70
|
+
num_token_per_message = []
|
|
71
|
+
for message in messages:
|
|
72
|
+
num_tokens = 3 # extra_tokens_per_message
|
|
73
|
+
for key, value in message.items():
|
|
74
|
+
if isinstance(value, str):
|
|
75
|
+
num_tokens += len(encode(value))
|
|
76
|
+
elif isinstance(value, list):
|
|
77
|
+
# NOTE: The result returned by the function below is not 100% accurate.
|
|
78
|
+
num_tokens += handle_message_with_images(value, encode)
|
|
79
|
+
if key == "name":
|
|
80
|
+
num_tokens += 1 # extra_tokens_per_name
|
|
81
|
+
|
|
82
|
+
num_token_per_message.append(num_tokens)
|
|
83
|
+
|
|
84
|
+
return num_token_per_message
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def num_tokens_from_messages(
|
|
88
|
+
messages: list[dict[str, str]], encode: Callable[[str], list[int]]
|
|
89
|
+
) -> int:
|
|
90
|
+
"""Return the number of tokens used by a list of messages."""
|
|
91
|
+
|
|
92
|
+
num_tokens_per_message = num_tokens_per_messages(messages, encode)
|
|
93
|
+
num_tokens = sum(num_tokens_per_message) + 3
|
|
94
|
+
|
|
95
|
+
return num_tokens
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def num_tokens_for_tools(
|
|
99
|
+
functions: list[dict[str, Any]],
|
|
100
|
+
special_token: SpecialToolCallingTokens,
|
|
101
|
+
encode: Callable[[str], list[int]],
|
|
102
|
+
):
|
|
103
|
+
def num_token_function_enum(
|
|
104
|
+
properties: dict[str, Any], encode: Callable[[str], list[int]]
|
|
105
|
+
):
|
|
106
|
+
enum_token_count = 0
|
|
107
|
+
enum_token_count += special_token.enum_init
|
|
108
|
+
for item in properties[key]["enum"]:
|
|
109
|
+
enum_token_count += special_token.enum_item
|
|
110
|
+
enum_token_count += len(encode(item))
|
|
111
|
+
|
|
112
|
+
return enum_token_count
|
|
113
|
+
|
|
114
|
+
func_token_count = 0
|
|
115
|
+
if len(functions) > 0:
|
|
116
|
+
for func in functions:
|
|
117
|
+
func_token_count += special_token.func_init
|
|
118
|
+
function = func.get("function", {})
|
|
119
|
+
func_token_count += len(
|
|
120
|
+
encode(
|
|
121
|
+
function.get("name", "")
|
|
122
|
+
+ ":"
|
|
123
|
+
+ function.get("description", "").rstrip(".").rstrip()
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
if len(function.get("parameters", {}).get("properties", "")) > 0:
|
|
127
|
+
properties = function.get("parameters", {}).get(
|
|
128
|
+
"properties", ""
|
|
129
|
+
)
|
|
130
|
+
func_token_count += special_token.prop_init
|
|
131
|
+
|
|
132
|
+
for key in list(properties.keys()):
|
|
133
|
+
func_token_count += special_token.prop_key
|
|
134
|
+
|
|
135
|
+
if "enum" in properties[key].keys():
|
|
136
|
+
func_token_count += num_token_function_enum(
|
|
137
|
+
properties, encode
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
func_token_count += len(
|
|
141
|
+
encode(
|
|
142
|
+
f"{key}:{properties[key]['type']}:{properties[key]['description'].rstrip('.').rstrip()}"
|
|
143
|
+
)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
func_token_count += special_token.func_end
|
|
147
|
+
|
|
148
|
+
return func_token_count
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def handle_message_with_images(
|
|
152
|
+
message: list[dict], encode: Callable[[str], list[int]]
|
|
153
|
+
):
|
|
154
|
+
token_count = 0
|
|
155
|
+
for item in message:
|
|
156
|
+
if item.get("type") == "image_url":
|
|
157
|
+
image_url = item.get("imageUrl", {}).get("url")
|
|
158
|
+
if image_url:
|
|
159
|
+
token_count += calculate_image_tokens_from_base64(image_url)
|
|
160
|
+
elif item.get("type") == "text":
|
|
161
|
+
token_count += len(encode(item.get("text", "")))
|
|
162
|
+
return token_count
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def messages_to_openai_messages(
|
|
166
|
+
messages: LanguageModelMessages | list[LanguageModelMessage],
|
|
167
|
+
):
|
|
168
|
+
if isinstance(messages, list):
|
|
169
|
+
messages = LanguageModelMessages(messages)
|
|
170
|
+
|
|
171
|
+
return [
|
|
172
|
+
{
|
|
173
|
+
k: v
|
|
174
|
+
for k, v in m.items()
|
|
175
|
+
if (k in ["content", "role"] and v is not None)
|
|
176
|
+
}
|
|
177
|
+
for m in json.loads(messages.model_dump_json())
|
|
178
|
+
]
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def num_tokens_per_language_model_message(
|
|
182
|
+
messages: LanguageModelMessages | list[LanguageModelMessage],
|
|
183
|
+
encode: Callable[[str], list[int]],
|
|
184
|
+
) -> list[int]:
|
|
185
|
+
return num_tokens_per_messages(
|
|
186
|
+
messages=messages_to_openai_messages(messages), encode=encode
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def num_token_for_language_model_messages(
|
|
191
|
+
messages: LanguageModelMessages | list[LanguageModelMessage],
|
|
192
|
+
encode: Callable[[str], list[int]],
|
|
193
|
+
) -> int:
|
|
194
|
+
return num_tokens_from_messages(
|
|
195
|
+
messages_to_openai_messages(messages), encode
|
|
196
|
+
)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from humps import camelize
|
|
4
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
5
|
+
|
|
6
|
+
from unique_toolkit._common.validators import LMI
|
|
7
|
+
from unique_toolkit.language_model.infos import LanguageModelInfo, LanguageModelName
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
from .schemas import (
|
|
11
|
+
EvaluationMetricName,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
model_config = ConfigDict(
|
|
15
|
+
alias_generator=camelize,
|
|
16
|
+
populate_by_name=True,
|
|
17
|
+
arbitrary_types_allowed=True,
|
|
18
|
+
validate_default=True,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EvaluationMetricConfig(BaseModel):
|
|
23
|
+
model_config = model_config
|
|
24
|
+
|
|
25
|
+
enabled: bool = False
|
|
26
|
+
name: EvaluationMetricName
|
|
27
|
+
language_model: LMI = LanguageModelInfo.from_name(
|
|
28
|
+
LanguageModelName.AZURE_GPT_35_TURBO_0125,
|
|
29
|
+
)
|
|
30
|
+
additional_llm_options: dict[str, Any] = Field(
|
|
31
|
+
default={},
|
|
32
|
+
description="Additional options to pass to the language model.",
|
|
33
|
+
)
|
|
34
|
+
custom_prompts: dict[str, str] = {}
|
|
35
|
+
score_to_label: dict[str, str] = {}
|
|
36
|
+
score_to_title: dict[str, str] = {}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG = """
|
|
2
|
+
You will receive an input and a set of contexts.
|
|
3
|
+
Your task is to evaluate how relevant the contexts are to the input text.
|
|
4
|
+
|
|
5
|
+
Use the following rating scale to generate a score:
|
|
6
|
+
[low] - The contexts are not relevant to the input.
|
|
7
|
+
[medium] - The contexts are somewhat relevant to the input.
|
|
8
|
+
[high] - The contexts are highly relevant to the input.
|
|
9
|
+
|
|
10
|
+
Your answer must be in JSON format:
|
|
11
|
+
{
|
|
12
|
+
"reason": Your explanation of your judgement of the evaluation,
|
|
13
|
+
"value": decision, must be one of the following ["low", "medium", "high"]
|
|
14
|
+
}
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG_STRUCTURED_OUTPUT = """
|
|
18
|
+
You will receive an input and a set of contexts.
|
|
19
|
+
Your task is to evaluate how relevant the contexts are to the input text.
|
|
20
|
+
Further you should extract relevant facts from the contexts.
|
|
21
|
+
|
|
22
|
+
# Output Format
|
|
23
|
+
- Generate data according to the provided data schema.
|
|
24
|
+
- Ensure the output adheres to the format required by the pydantic object.
|
|
25
|
+
- All necessary fields should be populated as per the data schema guidelines.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
CONTEXT_RELEVANCY_METRIC_USER_MSG = """
|
|
29
|
+
Here is the data:
|
|
30
|
+
|
|
31
|
+
Input:
|
|
32
|
+
'''
|
|
33
|
+
$input_text
|
|
34
|
+
'''
|
|
35
|
+
|
|
36
|
+
Contexts:
|
|
37
|
+
'''
|
|
38
|
+
$context_texts
|
|
39
|
+
'''
|
|
40
|
+
|
|
41
|
+
Answer as JSON:
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
CONTEXT_RELEVANCY_METRIC_USER_MSG_STRUCTURED_OUTPUT = """
|
|
45
|
+
Here is the data:
|
|
46
|
+
|
|
47
|
+
Input:
|
|
48
|
+
'''
|
|
49
|
+
$input_text
|
|
50
|
+
'''
|
|
51
|
+
|
|
52
|
+
Contexts:
|
|
53
|
+
'''
|
|
54
|
+
$context_texts
|
|
55
|
+
'''
|
|
56
|
+
"""
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field, create_model
|
|
2
|
+
from pydantic.json_schema import SkipJsonSchema
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, ConfigDict
|
|
7
|
+
|
|
8
|
+
from unique_toolkit.tools.config import get_configuration_dict
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class StructuredOutputModel(BaseModel):
|
|
12
|
+
model_config = ConfigDict(extra="forbid")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class StructuredOutputConfig(BaseModel):
|
|
17
|
+
model_config = get_configuration_dict()
|
|
18
|
+
|
|
19
|
+
enabled: bool = Field(
|
|
20
|
+
default=False,
|
|
21
|
+
description="Whether to use structured output for the evaluation.",
|
|
22
|
+
)
|
|
23
|
+
extract_fact_list: bool = Field(
|
|
24
|
+
default=False,
|
|
25
|
+
description="Whether to extract a list of relevant facts from context chunks with structured output.",
|
|
26
|
+
)
|
|
27
|
+
reason_description: str = Field(
|
|
28
|
+
default="A brief explanation justifying your evaluation decision.",
|
|
29
|
+
description="The description of the reason field for structured output.",
|
|
30
|
+
)
|
|
31
|
+
value_description: str = Field(
|
|
32
|
+
default="Assessment of how relevant the facts are to the query. Must be one of: ['low', 'medium', 'high'].",
|
|
33
|
+
description="The description of the value field for structured output.",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
fact_description: str = Field(
|
|
37
|
+
default="A fact is an information that is directly answers the user's query. Make sure to emphasize the important information from the fact with bold text.",
|
|
38
|
+
description="The description of the fact field for structured output.",
|
|
39
|
+
)
|
|
40
|
+
fact_list_description: str = Field(
|
|
41
|
+
default="A list of relevant facts extracted from the source that supports or answers the user's query.",
|
|
42
|
+
description="The description of the fact list field for structured output.",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class Fact(StructuredOutputModel):
|
|
47
|
+
fact: str
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class EvaluationSchemaStructuredOutput(StructuredOutputModel):
|
|
51
|
+
reason: str
|
|
52
|
+
value: str
|
|
53
|
+
|
|
54
|
+
fact_list: list[Fact] = Field(default_factory=list[Fact])
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def get_with_descriptions(cls, config: StructuredOutputConfig):
|
|
58
|
+
if config.extract_fact_list:
|
|
59
|
+
FactWithDescription = create_model(
|
|
60
|
+
"Fact",
|
|
61
|
+
fact=(str, Field(..., description=config.fact_description)),
|
|
62
|
+
__base__=Fact,
|
|
63
|
+
)
|
|
64
|
+
fact_list_field = (
|
|
65
|
+
list[FactWithDescription],
|
|
66
|
+
Field(
|
|
67
|
+
description=config.fact_list_description,
|
|
68
|
+
),
|
|
69
|
+
)
|
|
70
|
+
else:
|
|
71
|
+
fact_list_field = (
|
|
72
|
+
SkipJsonSchema[list[Fact]],
|
|
73
|
+
Field(default_factory=list[Fact]),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return create_model(
|
|
77
|
+
"EvaluationSchemaStructuredOutputWithDescription",
|
|
78
|
+
reason=(
|
|
79
|
+
str,
|
|
80
|
+
Field(..., description=config.reason_description),
|
|
81
|
+
),
|
|
82
|
+
value=(
|
|
83
|
+
str,
|
|
84
|
+
Field(..., description=config.value_description),
|
|
85
|
+
),
|
|
86
|
+
fact_list=fact_list_field,
|
|
87
|
+
__base__=cls,
|
|
88
|
+
)
|