unique_toolkit 0.5.24__py3-none-any.whl → 0.5.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ from typing import Optional
2
+
3
+
4
+ class CommonException(Exception):
5
+ def __init__(
6
+ self,
7
+ user_message: str,
8
+ error_message: str,
9
+ exception: Optional[Exception] = None,
10
+ ):
11
+ super().__init__(error_message)
12
+ self._user_message = user_message
13
+ self._error_message = error_message
14
+ self._exception = exception
15
+
16
+ @property
17
+ def user_message(self):
18
+ return self._user_message
19
+
20
+ @property
21
+ def error_message(self):
22
+ return self._error_message
23
+
24
+ @property
25
+ def name(self):
26
+ return self.__class__.__name__
27
+
28
+ @property
29
+ def exception(self):
30
+ return self._exception
31
+
32
+ def __str__(self):
33
+ return self._error_message
@@ -0,0 +1,8 @@
1
+ from unique_toolkit.language_model import LanguageModel, LanguageModelName
2
+
3
+
4
+ def validate_and_init_language_model(value: LanguageModelName | LanguageModel | str):
5
+ if isinstance(value, LanguageModel):
6
+ return value
7
+
8
+ return LanguageModel(value)
@@ -204,6 +204,13 @@ class ContentService(BaseService):
204
204
 
205
205
  return self._map_contents(contents)
206
206
 
207
+ def search_content_on_chat(
208
+ self,
209
+ ) -> list[Content]:
210
+ where = {"ownerId": {"equals": self.event.payload.chat_id}}
211
+
212
+ return self.search_contents(where)
213
+
207
214
  @staticmethod
208
215
  def _map_content_chunk(content_chunk: dict):
209
216
  return ContentChunk(
@@ -0,0 +1,35 @@
1
+ from humps import camelize
2
+ from pydantic import BaseModel, ConfigDict, field_validator
3
+
4
+ from unique_toolkit._common.validators import validate_and_init_language_model
5
+ from unique_toolkit.evaluators.schemas import (
6
+ EvaluationMetricName,
7
+ )
8
+ from unique_toolkit.language_model.infos import (
9
+ LanguageModel,
10
+ LanguageModelName,
11
+ )
12
+
13
+ model_config = ConfigDict(
14
+ alias_generator=camelize,
15
+ populate_by_name=True,
16
+ arbitrary_types_allowed=True,
17
+ validate_default=True,
18
+ json_encoders={LanguageModel: lambda v: v.display_name},
19
+ )
20
+
21
+
22
+ class EvaluationMetricConfig(BaseModel):
23
+ model_config = model_config
24
+
25
+ enabled: bool = False
26
+ name: EvaluationMetricName
27
+ language_model: LanguageModel = LanguageModel(
28
+ LanguageModelName.AZURE_GPT_35_TURBO_0613
29
+ )
30
+ custom_prompts: dict[str, str] = {}
31
+ score_to_emoji: dict[str, str] = {}
32
+
33
+ @field_validator("language_model", mode="before")
34
+ def validate_language_model(cls, value: LanguageModelName | LanguageModel):
35
+ return validate_and_init_language_model(value)
@@ -0,0 +1,32 @@
1
+ from unique_toolkit.evaluators.config import EvaluationMetricConfig
2
+ from unique_toolkit.evaluators.context_relevancy.prompts import (
3
+ CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG,
4
+ CONTEXT_RELEVANCY_METRIC_USER_MSG,
5
+ )
6
+ from unique_toolkit.evaluators.schemas import (
7
+ EvaluationMetricName,
8
+ )
9
+ from unique_toolkit.language_model.infos import LanguageModel
10
+ from unique_toolkit.language_model.service import LanguageModelName
11
+
12
+ SYSTEM_MSG_KEY = "systemPrompt"
13
+ USER_MSG_KEY = "userPrompt"
14
+
15
+ # Required input fields for context relevancy evaluation
16
+ context_relevancy_required_input_fields = [
17
+ "input_text",
18
+ "output_text",
19
+ "context_texts",
20
+ ]
21
+
22
+
23
+ default_config = EvaluationMetricConfig(
24
+ enabled=False,
25
+ name=EvaluationMetricName.CONTEXT_RELEVANCY,
26
+ language_model=LanguageModel(LanguageModelName.AZURE_GPT_35_TURBO_0613),
27
+ score_to_emoji={"LOW": "🟢", "MEDIUM": "🟡", "HIGH": "🔴"},
28
+ custom_prompts={
29
+ SYSTEM_MSG_KEY: CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG,
30
+ USER_MSG_KEY: CONTEXT_RELEVANCY_METRIC_USER_MSG,
31
+ },
32
+ )
@@ -0,0 +1,31 @@
1
+ CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG = """
2
+ You will receive an input and a set of contexts.
3
+ Your task is to evaluate how relevant the contexts are to the input text.
4
+
5
+ Use the following rating scale to generate a score:
6
+ [low] - The contexts are not relevant to the input.
7
+ [medium] - The contexts are somewhat relevant to the input.
8
+ [high] - The contexts are highly relevant to the input.
9
+
10
+ Your answer must be in JSON format:
11
+ {
12
+ "reason": Your explanation of your judgement of the evaluation,
13
+ "value": decision, must be one of the following ["low", "medium", "high"]
14
+ }
15
+ """
16
+
17
+ CONTEXT_RELEVANCY_METRIC_USER_MSG = """
18
+ Here is the data:
19
+
20
+ Input:
21
+ '''
22
+ $input_text
23
+ '''
24
+
25
+ Contexts:
26
+ '''
27
+ $context_texts
28
+ '''
29
+
30
+ Answer as JSON:
31
+ """
@@ -0,0 +1,53 @@
1
+ from logging import Logger
2
+
3
+ from unique_toolkit.app.schemas import Event
4
+ from unique_toolkit.evaluators.config import EvaluationMetricConfig
5
+ from unique_toolkit.evaluators.context_relevancy.constants import default_config
6
+ from unique_toolkit.evaluators.context_relevancy.utils import (
7
+ check_context_relevancy_async,
8
+ )
9
+ from unique_toolkit.evaluators.schemas import (
10
+ EvaluationMetricInput,
11
+ EvaluationMetricResult,
12
+ )
13
+
14
+
15
+ class ContextRelevancyEvaluator:
16
+ def __init__(
17
+ self,
18
+ event: Event,
19
+ logger: Logger,
20
+ ):
21
+ self.event = event
22
+ self.logger = logger
23
+
24
+ async def run(
25
+ self,
26
+ input: EvaluationMetricInput,
27
+ config: EvaluationMetricConfig = default_config,
28
+ ) -> EvaluationMetricResult | None:
29
+ """
30
+ Analyzes the level of relevancy of a context by comparing
31
+ it with the input text.
32
+
33
+ Args:
34
+ input (EvaluationMetricInput): The input for the metric.
35
+ config (EvaluationMetricConfig): The configuration for the metric.
36
+
37
+ Returns:
38
+ EvaluationMetricResult | None: The result of the evaluation, indicating the level of context relevancy.
39
+ Returns None if the metric is not enabled.
40
+
41
+ Raises:
42
+ EvaluatorException: If required fields are missing or an error occurs during evaluation.
43
+ """
44
+ if config.enabled is False:
45
+ self.logger.info("Context relevancy metric is not enabled.")
46
+ return None
47
+
48
+ return await check_context_relevancy_async(
49
+ company_id=self.event.company_id,
50
+ input=input,
51
+ config=config,
52
+ logger=self.logger,
53
+ )
@@ -0,0 +1,139 @@
1
+ import logging
2
+ from string import Template
3
+
4
+ from unique_toolkit.evaluators.config import (
5
+ EvaluationMetricConfig,
6
+ )
7
+ from unique_toolkit.evaluators.context_relevancy.constants import (
8
+ SYSTEM_MSG_KEY,
9
+ USER_MSG_KEY,
10
+ context_relevancy_required_input_fields,
11
+ )
12
+ from unique_toolkit.evaluators.context_relevancy.prompts import (
13
+ CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG,
14
+ CONTEXT_RELEVANCY_METRIC_USER_MSG,
15
+ )
16
+ from unique_toolkit.evaluators.exception import EvaluatorException
17
+ from unique_toolkit.evaluators.output_parser import (
18
+ parse_eval_metric_result,
19
+ )
20
+ from unique_toolkit.evaluators.schemas import (
21
+ EvaluationMetricInput,
22
+ EvaluationMetricName,
23
+ EvaluationMetricResult,
24
+ )
25
+ from unique_toolkit.language_model.schemas import (
26
+ LanguageModelMessages,
27
+ LanguageModelSystemMessage,
28
+ LanguageModelUserMessage,
29
+ )
30
+ from unique_toolkit.language_model.service import LanguageModelService
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ async def check_context_relevancy_async(
36
+ company_id: str,
37
+ input: EvaluationMetricInput,
38
+ config: EvaluationMetricConfig,
39
+ logger: logging.Logger = logger,
40
+ ) -> EvaluationMetricResult | None:
41
+ """
42
+ Analyzes the relevancy of the context provided for the given input and output.
43
+ The analysis classifies the context relevancy level as:
44
+ - low
45
+ - medium
46
+ - high
47
+
48
+ This method performs the following steps:
49
+ 1. Logs the start of the analysis using the provided `logger`.
50
+ 2. Validates the required fields in the `input` data.
51
+ 3. Retrieves the messages using the `_get_msgs` method.
52
+ 4. Calls `LanguageModelService.complete_async_util` to get a completion result.
53
+ 5. Parses and returns the evaluation metric result based on the content of the completion result.
54
+
55
+ Args:
56
+ company_id (str): The company ID for the analysis.
57
+ input (EvaluationMetricInput): The input data used for evaluation, including the generated output and reference information.
58
+ config (EvaluationMetricConfig): Configuration settings for the evaluation.
59
+ logger (Optional[logging.Logger], optional): The logger used for logging information and errors. Defaults to the logger for the current module.
60
+
61
+ Returns:
62
+ EvaluationMetricResult | None: The result of the evaluation, indicating the level of context relevancy. Returns `None` if an error occurs.
63
+
64
+ Raises:
65
+ EvaluatorException: If required fields are missing or an error occurs during the evaluation.
66
+ """
67
+ model_name = config.language_model.name
68
+ logger.info(f"Analyzing context relevancy with {model_name}.")
69
+
70
+ input.validate_required_fields(context_relevancy_required_input_fields)
71
+
72
+ if input.context_texts and len(input.context_texts) == 0:
73
+ error_message = "No context texts provided."
74
+ raise EvaluatorException(
75
+ user_message=error_message,
76
+ error_message=error_message,
77
+ )
78
+
79
+ try:
80
+ msgs = _get_msgs(input, config)
81
+ result = await LanguageModelService.complete_async_util(
82
+ company_id=company_id, messages=msgs, model_name=model_name
83
+ )
84
+ result_content = result.choices[0].message.content
85
+ if not result_content:
86
+ error_message = "Context relevancy evaluation did not return a result."
87
+ raise EvaluatorException(
88
+ error_message=error_message,
89
+ user_message=error_message,
90
+ )
91
+ return parse_eval_metric_result(
92
+ result_content, EvaluationMetricName.CONTEXT_RELEVANCY
93
+ )
94
+ except Exception as e:
95
+ error_message = "Error occurred during context relevancy metric analysis"
96
+ raise EvaluatorException(
97
+ error_message=f"{error_message}: {e}",
98
+ user_message=error_message,
99
+ exception=e,
100
+ )
101
+
102
+
103
+ def _get_msgs(
104
+ input: EvaluationMetricInput,
105
+ config: EvaluationMetricConfig,
106
+ ):
107
+ """
108
+ Composes the messages for context relevancy analysis based on the provided input and configuration.
109
+
110
+ Args:
111
+ input (EvaluationMetricInput): The input data that includes context texts for the analysis.
112
+ config (EvaluationMetricConfig): The configuration settings for composing messages.
113
+
114
+ Returns:
115
+ LanguageModelMessages: The composed messages as per the provided input and configuration.
116
+ """
117
+ system_msg_content = _get_system_prompt(config)
118
+ system_msg = LanguageModelSystemMessage(content=system_msg_content)
119
+
120
+ user_msg_templ = Template(_get_user_prompt(config))
121
+ user_msg_content = user_msg_templ.substitute(
122
+ input_text=input.input_text, contexts_text=input.get_joined_context_texts()
123
+ )
124
+ user_msg = LanguageModelUserMessage(content=user_msg_content)
125
+ return LanguageModelMessages([system_msg, user_msg])
126
+
127
+
128
+ def _get_system_prompt(config: EvaluationMetricConfig):
129
+ return config.custom_prompts.setdefault(
130
+ SYSTEM_MSG_KEY,
131
+ CONTEXT_RELEVANCY_METRIC_SYSTEM_MSG,
132
+ )
133
+
134
+
135
+ def _get_user_prompt(config: EvaluationMetricConfig):
136
+ return config.custom_prompts.setdefault(
137
+ USER_MSG_KEY,
138
+ CONTEXT_RELEVANCY_METRIC_USER_MSG,
139
+ )
@@ -0,0 +1,5 @@
1
+ from unique_toolkit._common.exception import CommonException
2
+
3
+
4
+ class EvaluatorException(CommonException):
5
+ pass
@@ -0,0 +1,41 @@
1
+ from unique_toolkit.evaluators.config import EvaluationMetricConfig
2
+ from unique_toolkit.evaluators.hallucination.prompts import (
3
+ HALLUCINATION_METRIC_SYSTEM_MSG,
4
+ HALLUCINATION_METRIC_SYSTEM_MSG_DEFAULT,
5
+ HALLUCINATION_METRIC_USER_MSG,
6
+ HALLUCINATION_METRIC_USER_MSG_DEFAULT,
7
+ )
8
+ from unique_toolkit.evaluators.schemas import (
9
+ EvaluationMetricInputFieldName,
10
+ EvaluationMetricName,
11
+ )
12
+ from unique_toolkit.language_model.infos import (
13
+ LanguageModel,
14
+ LanguageModelName,
15
+ )
16
+
17
+ SYSTEM_MSG_KEY = "systemPrompt"
18
+ USER_MSG_KEY = "userPrompt"
19
+ SYSTEM_MSG_DEFAULT_KEY = "systemPromptDefault"
20
+ USER_MSG_DEFAULT_KEY = "userPromptDefault"
21
+
22
+
23
+ hallucination_metric_default_config = EvaluationMetricConfig(
24
+ enabled=False,
25
+ name=EvaluationMetricName.HALLUCINATION,
26
+ language_model=LanguageModel(LanguageModelName.AZURE_GPT_4_0613),
27
+ score_to_emoji={"LOW": "🟢", "MEDIUM": "🟡", "HIGH": "🔴"},
28
+ custom_prompts={
29
+ SYSTEM_MSG_KEY: HALLUCINATION_METRIC_SYSTEM_MSG,
30
+ USER_MSG_KEY: HALLUCINATION_METRIC_USER_MSG,
31
+ SYSTEM_MSG_DEFAULT_KEY: HALLUCINATION_METRIC_SYSTEM_MSG_DEFAULT,
32
+ USER_MSG_DEFAULT_KEY: HALLUCINATION_METRIC_USER_MSG_DEFAULT,
33
+ },
34
+ )
35
+
36
+ hallucination_required_input_fields = [
37
+ EvaluationMetricInputFieldName.INPUT_TEXT,
38
+ EvaluationMetricInputFieldName.CONTEXT_TEXTS,
39
+ EvaluationMetricInputFieldName.HISTORY_MESSAGES,
40
+ EvaluationMetricInputFieldName.OUTPUT_TEXT,
41
+ ]
@@ -0,0 +1,79 @@
1
+ HALLUCINATION_METRIC_SYSTEM_MSG = """
2
+ You will receive a question, references, a conversation between a user and an agent, and an output.
3
+ The output is the answer to the question.
4
+ Your task is to evaluate if the output is fully supported by the information provided in the references and conversation, and provide explanations on your judgement in 2 sentences.
5
+
6
+ Use the following entailment scale to generate a score:
7
+ [low] - All information in output is supported by the references/conversation, or extractions from the references/conversation.
8
+ [medium] - The output is supported by the references/conversation to some extent, but there is at least some information in the output that is not discussed in the references/conversation. For example, if an instruction asks about two concepts and the references/conversation only discusses either of them, it should be considered a [medium] hallucination level.
9
+ [high] - The output contains information that is not part of the references/conversation, is unrelated to the references/conversation, or contradicts the references/conversation.
10
+
11
+ Make sure to not use any external information/knowledge to judge whether the output is true or not. Only check whether the output is supported by the references/conversation, and not whether the output is correct or not. Also do not evaluate if the references/conversation contain further information that is not part of the output but could be relevant to the qestion.
12
+
13
+ Your answer must be in JSON format:
14
+ {
15
+ "reason": Your explanation of your judgement of the evaluation,
16
+ "value": decision, must be one of the following: ["high", "medium", "low"]
17
+ }
18
+ """
19
+
20
+ HALLUCINATION_METRIC_USER_MSG = """
21
+ Here is the data:
22
+
23
+ Input:
24
+ '''
25
+ $input_text
26
+ '''
27
+
28
+ References:
29
+ '''
30
+ $contexts_text
31
+ '''
32
+
33
+ Conversation:
34
+ '''
35
+ $history_messages_text
36
+ '''
37
+
38
+ Output:
39
+ '''
40
+ $output_text
41
+ '''
42
+
43
+ Answer as JSON:
44
+ """
45
+
46
+ HALLUCINATION_METRIC_SYSTEM_MSG_DEFAULT = """
47
+ You will receive a question and an output.
48
+ The output is the answer to the question.
49
+ The situation is that no references could be found to answer the question. Your task is to evaluate if the output contains any information to answer the question,
50
+ and provide a short explanations of your reasoning in 2 sentences. Also mention in your explanation that no references were provided to answer the question.
51
+
52
+ Use the following entailment scale to generate a score:
53
+ [low] - The output does not contain any information to answer the question.
54
+ [medium] - The output contains some information to answer the question, but does not answer the question entirely.
55
+ [high] - The output answers the question.
56
+
57
+ It is not considered an answer when the output relates to the questions subject. Make sure to not use any external information/knowledge to judge whether the output is true or not. Only check that the output does not answer the question, and not whether the output is correct or not.
58
+ Your answer must be in JSON format:
59
+ {
60
+ "reason": Your explanation of your reasoning of the evaluation,
61
+ "value": decision, must be one of the following: ["low", "medium", "high"]
62
+ }
63
+ """
64
+
65
+ HALLUCINATION_METRIC_USER_MSG_DEFAULT = """
66
+ Here is the data:
67
+
68
+ Input:
69
+ '''
70
+ $input_text
71
+ '''
72
+
73
+ Output:
74
+ '''
75
+ $output_text
76
+ '''
77
+
78
+ Answer as JSON:
79
+ """
@@ -0,0 +1,58 @@
1
+ import logging
2
+
3
+ from unique_toolkit.app.schemas import Event
4
+ from unique_toolkit.evaluators.config import (
5
+ EvaluationMetricConfig,
6
+ )
7
+ from unique_toolkit.evaluators.hallucination.constants import (
8
+ hallucination_metric_default_config,
9
+ )
10
+ from unique_toolkit.evaluators.hallucination.utils import check_hallucination_async
11
+ from unique_toolkit.evaluators.schemas import (
12
+ EvaluationMetricInput,
13
+ EvaluationMetricResult,
14
+ )
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class HallucinationEvaluator:
20
+ def __init__(self, event: Event, logger: logging.Logger = logger):
21
+ self.event = event
22
+ self.logger = logger
23
+
24
+ async def run(
25
+ self,
26
+ input: EvaluationMetricInput,
27
+ config: EvaluationMetricConfig = hallucination_metric_default_config,
28
+ ) -> EvaluationMetricResult | None:
29
+ """
30
+ Analyzes the level of hallucination in the generated output by comparing it with the input
31
+ and the provided contexts or history. The analysis classifies the hallucination level as:
32
+ - low
33
+ - medium
34
+ - high
35
+
36
+ If no contexts or history are referenced in the generated output, the method verifies
37
+ that the output does not contain any relevant information to answer the question.
38
+
39
+ This method calls `check_hallucination_async` to perform the actual analysis. The `check_hallucination_async`
40
+ function handles the evaluation using the company ID from the event, the provided input, and the configuration.
41
+
42
+ Args:
43
+ input (EvaluationMetricInput): The input data used for evaluation, including the generated output and reference information.
44
+ config (EvaluationMetricConfig, optional): Configuration settings for the evaluation. Defaults to `hallucination_metric_default_config`.
45
+
46
+ Returns:
47
+ EvaluationMetricResult | None: The result of the evaluation, indicating the level of hallucination. Returns `None` if the analysis cannot be performed.
48
+
49
+ Raises:
50
+ EvaluatorException: If the context texts are empty, required fields are missing, or an error occurs during the evaluation.
51
+ """
52
+ if config.enabled is False:
53
+ self.logger.info("Hallucination metric is not enabled.")
54
+ return None
55
+
56
+ return await check_hallucination_async(
57
+ company_id=self.event.company_id, input=input, config=config
58
+ )
@@ -0,0 +1,201 @@
1
+ import logging
2
+ from string import Template
3
+
4
+ from unique_toolkit.evaluators.config import (
5
+ EvaluationMetricConfig,
6
+ )
7
+ from unique_toolkit.evaluators.exception import EvaluatorException
8
+ from unique_toolkit.evaluators.hallucination.constants import (
9
+ SYSTEM_MSG_DEFAULT_KEY,
10
+ SYSTEM_MSG_KEY,
11
+ USER_MSG_DEFAULT_KEY,
12
+ USER_MSG_KEY,
13
+ hallucination_required_input_fields,
14
+ )
15
+ from unique_toolkit.evaluators.output_parser import (
16
+ parse_eval_metric_result,
17
+ )
18
+ from unique_toolkit.evaluators.schemas import (
19
+ EvaluationMetricInput,
20
+ EvaluationMetricName,
21
+ EvaluationMetricResult,
22
+ )
23
+ from unique_toolkit.language_model.schemas import (
24
+ LanguageModelMessages,
25
+ LanguageModelSystemMessage,
26
+ LanguageModelUserMessage,
27
+ )
28
+ from unique_toolkit.language_model.service import LanguageModelService
29
+
30
+ from .prompts import (
31
+ HALLUCINATION_METRIC_SYSTEM_MSG,
32
+ HALLUCINATION_METRIC_SYSTEM_MSG_DEFAULT,
33
+ HALLUCINATION_METRIC_USER_MSG,
34
+ HALLUCINATION_METRIC_USER_MSG_DEFAULT,
35
+ )
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ async def check_hallucination_async(
41
+ company_id: str,
42
+ input: EvaluationMetricInput,
43
+ config: EvaluationMetricConfig,
44
+ logger: logging.Logger = logger,
45
+ ) -> EvaluationMetricResult | None:
46
+ """
47
+ Analyzes the level of hallucination in the generated output by comparing it with the provided input
48
+ and the contexts or history. The analysis classifies the hallucination level as:
49
+ - low
50
+ - medium
51
+ - high
52
+
53
+ If no contexts or history are referenced in the generated output, the method checks that the output
54
+ does not contain any relevant information to answer the question.
55
+
56
+ This method performs the following steps:
57
+ 1. Checks if the hallucination metric is enabled using the provided `config`.
58
+ 2. Logs the start of the analysis using the provided `logger`.
59
+ 3. Validates the required fields in the `input` data.
60
+ 4. Retrieves the messages using the `_get_msgs` method.
61
+ 5. Calls `LanguageModelService.complete_async_util` to get a completion result.
62
+ 6. Parses and returns the evaluation metric result based on the content of the completion result.
63
+
64
+ Args:
65
+ company_id (str): The company ID for the analysis.
66
+ input (EvaluationMetricInput): The input data used for evaluation, including the generated output and reference information.
67
+ config (EvaluationMetricConfig, optional): Configuration settings for the evaluation. Defaults to `hallucination_metric_default_config`.
68
+ logger (Optional[logging.Logger], optional): The logger used for logging information and errors. Defaults to the logger for the current module.
69
+
70
+ Returns:
71
+ EvaluationMetricResult | None: The result of the evaluation, indicating the level of hallucination. Returns `None` if the metric is not enabled or if an error occurs.
72
+
73
+ Raises:
74
+ EvaluatorException: If the context texts are empty, required fields are missing, or an error occurs during the evaluation.
75
+ """
76
+ model_name = config.language_model.name
77
+ logger.info(f"Analyzing level of hallucination with {model_name}.")
78
+
79
+ input.validate_required_fields(hallucination_required_input_fields)
80
+
81
+ try:
82
+ msgs = _get_msgs(input, config, logger)
83
+ result = await LanguageModelService.complete_async_util(
84
+ company_id=company_id, messages=msgs, model_name=model_name
85
+ )
86
+ result_content = result.choices[0].message.content
87
+ if not result_content:
88
+ error_message = "Hallucination evaluation did not return a result."
89
+ raise EvaluatorException(
90
+ error_message=error_message,
91
+ user_message=error_message,
92
+ )
93
+ return parse_eval_metric_result(
94
+ result_content, EvaluationMetricName.HALLUCINATION
95
+ )
96
+ except Exception as e:
97
+ error_message = "Error occurred during hallucination metric analysis"
98
+ raise EvaluatorException(
99
+ error_message=f"{error_message}: {e}",
100
+ user_message=error_message,
101
+ exception=e,
102
+ )
103
+
104
+
105
+ def _get_msgs(
106
+ input: EvaluationMetricInput,
107
+ config: EvaluationMetricConfig,
108
+ logger: logging.Logger,
109
+ ):
110
+ """
111
+ Composes the messages for hallucination analysis based on the provided input and configuration.
112
+
113
+ This method decides how to compose the messages based on the availability of context texts and history
114
+ message texts in the `input`
115
+
116
+ Args:
117
+ input (EvaluationMetricInput): The input data that includes context texts and history message texts
118
+ for the analysis.
119
+ config (EvaluationMetricConfig): The configuration settings for composing messages.
120
+ logger (Optional[logging.Logger], optional): The logger used for logging debug information.
121
+ Defaults to the logger for the current module.
122
+
123
+ Returns:
124
+ The composed messages as per the provided input and configuration. The exact type and structure
125
+ depend on the implementation of the `compose_msgs` and `compose_msgs_default` methods.
126
+
127
+ """
128
+ if input.context_texts or input.history_messages:
129
+ logger.debug("Using context / history for hallucination evaluation.")
130
+ return _compose_msgs(input, config)
131
+ else:
132
+ logger.debug("No contexts and history provided for hallucination evaluation.")
133
+ return _compose_msgs_default(input, config)
134
+
135
+
136
+ def _compose_msgs(
137
+ input: EvaluationMetricInput,
138
+ config: EvaluationMetricConfig,
139
+ ):
140
+ """
141
+ Composes the hallucination analysis messages.
142
+ """
143
+ system_msg_content = _get_system_prompt_with_contexts(config)
144
+ system_msg = LanguageModelSystemMessage(content=system_msg_content)
145
+
146
+ user_msg_templ = Template(_get_user_prompt_with_contexts(config))
147
+ user_msg_content = user_msg_templ.substitute(
148
+ input_text=input.input_text,
149
+ contexts_text=input.get_joined_context_texts(tag_name="reference"),
150
+ history_messages_text=input.get_joined_history_texts(tag_name="conversation"),
151
+ output_text=input.output_text,
152
+ )
153
+ user_msg = LanguageModelUserMessage(content=user_msg_content)
154
+ return LanguageModelMessages([system_msg, user_msg])
155
+
156
+
157
+ def _compose_msgs_default(
158
+ input: EvaluationMetricInput,
159
+ config: EvaluationMetricConfig,
160
+ ):
161
+ """
162
+ Composes the hallucination analysis prompt without messages.
163
+ """
164
+ system_msg_content = _get_system_prompt_default(config)
165
+ system_msg = LanguageModelSystemMessage(content=system_msg_content)
166
+
167
+ user_msg_templ = Template(_get_user_prompt_default(config))
168
+ user_msg_content = user_msg_templ.substitute(
169
+ input_text=input.input_text,
170
+ output_text=input.output_text,
171
+ )
172
+ user_msg = LanguageModelUserMessage(content=user_msg_content)
173
+ return LanguageModelMessages([system_msg, user_msg])
174
+
175
+
176
+ def _get_system_prompt_with_contexts(config: EvaluationMetricConfig):
177
+ return config.custom_prompts.setdefault(
178
+ SYSTEM_MSG_KEY,
179
+ HALLUCINATION_METRIC_SYSTEM_MSG,
180
+ )
181
+
182
+
183
+ def _get_user_prompt_with_contexts(config: EvaluationMetricConfig):
184
+ return config.custom_prompts.setdefault(
185
+ USER_MSG_KEY,
186
+ HALLUCINATION_METRIC_USER_MSG,
187
+ )
188
+
189
+
190
+ def _get_system_prompt_default(config: EvaluationMetricConfig):
191
+ return config.custom_prompts.setdefault(
192
+ SYSTEM_MSG_DEFAULT_KEY,
193
+ HALLUCINATION_METRIC_SYSTEM_MSG_DEFAULT,
194
+ )
195
+
196
+
197
+ def _get_user_prompt_default(config: EvaluationMetricConfig):
198
+ return config.custom_prompts.setdefault(
199
+ USER_MSG_DEFAULT_KEY,
200
+ HALLUCINATION_METRIC_USER_MSG_DEFAULT,
201
+ )
@@ -0,0 +1,30 @@
1
+ from unique_toolkit.evaluators.exception import EvaluatorException
2
+ from unique_toolkit.evaluators.schemas import (
3
+ EvaluationMetricName,
4
+ EvaluationMetricResult,
5
+ )
6
+ from unique_toolkit.language_model.utils import convert_string_to_json
7
+
8
+
9
+ def parse_eval_metric_result(
10
+ result: str,
11
+ metric_name: EvaluationMetricName,
12
+ ):
13
+ """
14
+ Parses the evaluation metric result.
15
+ """
16
+
17
+ try:
18
+ parsed_result = convert_string_to_json(result)
19
+ except Exception as e:
20
+ error_message = "Error occurred during parsing the evaluation metric result"
21
+ raise EvaluatorException(
22
+ user_message=f"{error_message}.",
23
+ error_message=f"{error_message}: {str(e)}",
24
+ )
25
+
26
+ return EvaluationMetricResult(
27
+ name=metric_name,
28
+ value=parsed_result.get("value", "None"),
29
+ reason=parsed_result.get("reason", "None"),
30
+ )
@@ -0,0 +1,82 @@
1
+ from enum import Enum
2
+ from typing import Optional
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from unique_toolkit.chat import ChatMessage
7
+ from unique_toolkit.evaluators.exception import EvaluatorException
8
+
9
+
10
+ class EvaluationMetricName(Enum):
11
+ HALLUCINATION = "hallucination"
12
+ CONTEXT_RELEVANCY = "relevancy"
13
+
14
+
15
+ class EvaluationMetricInputFieldName(str, Enum):
16
+ INPUT_TEXT = "input_text"
17
+ CONTEXT_TEXTS = "context_texts"
18
+ HISTORY_MESSAGES = "history_messages"
19
+ OUTPUT_TEXT = "output_text"
20
+
21
+
22
+ class EvaluationMetricInput(BaseModel):
23
+ """
24
+ Input for any metric evaluation. Depending on the metric, the input can be different.
25
+ """
26
+
27
+ input_text: Optional[str] = None
28
+ context_texts: Optional[list[str]] = None
29
+ history_messages: Optional[list[ChatMessage]] = None
30
+ output_text: Optional[str] = None
31
+
32
+ def get_joined_context_texts(self, tag_name: str = "reference") -> str:
33
+ """
34
+ Concatenates context_texts.
35
+ """
36
+ if not self.context_texts:
37
+ return f"<No {tag_name} texts provided>"
38
+
39
+ return "\n".join(
40
+ [
41
+ f"<{tag_name}-{index}>{text}</{tag_name}-{index}>"
42
+ for index, text in enumerate(self.context_texts)
43
+ ]
44
+ )
45
+
46
+ def get_history_message_text(self, chat_message: ChatMessage):
47
+ return f"{chat_message.role.value}: {chat_message.content}"
48
+
49
+ def get_history_message_texts(self):
50
+ if not self.history_messages:
51
+ return []
52
+ return [self.get_history_message_text(msg) for msg in self.history_messages]
53
+
54
+ def get_joined_history_texts(self, tag_name: str = "conversation") -> str:
55
+ """
56
+ Concatenates history message texts.
57
+ """
58
+ if not self.history_messages:
59
+ return f"<No {tag_name} texts provided>"
60
+
61
+ return "\n".join(self.get_history_message_texts())
62
+
63
+ def validate_required_fields(
64
+ self, required_fields: list[EvaluationMetricInputFieldName]
65
+ ):
66
+ """
67
+ Validates the input fields for the hallucination metric.
68
+ """
69
+ for field in required_fields:
70
+ value = getattr(self, field)
71
+ if value is None:
72
+ error_message = f"Missing required input field: {field}"
73
+ raise EvaluatorException(
74
+ user_message=error_message,
75
+ error_message=error_message,
76
+ )
77
+
78
+
79
+ class EvaluationMetricResult(BaseModel):
80
+ name: EvaluationMetricName
81
+ value: str
82
+ reason: str
@@ -20,6 +20,35 @@ class LanguageModelName(StrEnum):
20
20
  AZURE_GPT_4o_MINI_2024_0718 = "AZURE_GPT_4o_MINI_2024_0718"
21
21
 
22
22
 
23
+ class EncoderName(StrEnum):
24
+ O200K_BASE = "o200k_base"
25
+ CL100K_BASE = "cl100k_base"
26
+
27
+
28
+ def get_encoder_name(model_name: LanguageModelName) -> Optional[EncoderName]:
29
+ LMN = LanguageModelName
30
+ match model_name:
31
+ case (
32
+ LMN.AZURE_GPT_35_TURBO
33
+ | LMN.AZURE_GPT_35_TURBO_16K
34
+ | LMN.AZURE_GPT_35_TURBO_0613
35
+ ):
36
+ return EncoderName.CL100K_BASE
37
+ case (
38
+ LMN.AZURE_GPT_4_0613
39
+ | LMN.AZURE_GPT_4_TURBO_1106
40
+ | LMN.AZURE_GPT_4_VISION_PREVIEW
41
+ | LMN.AZURE_GPT_4_32K_0613
42
+ | LMN.AZURE_GPT_4_TURBO_2024_0409
43
+ ):
44
+ return EncoderName.CL100K_BASE
45
+ case LMN.AZURE_GPT_4o_2024_0513 | LMN.AZURE_GPT_4o_MINI_2024_0718:
46
+ return EncoderName.O200K_BASE
47
+ case _:
48
+ print(f"{model_name} is not supported. Please add encoder information.")
49
+ return None
50
+
51
+
23
52
  class LanguageModelProvider(StrEnum):
24
53
  AZURE = "AZURE"
25
54
  CUSTOM = "CUSTOM"
@@ -30,6 +59,7 @@ class LanguageModelInfo(BaseModel):
30
59
  version: str
31
60
  provider: LanguageModelProvider
32
61
 
62
+ encoder_name: Optional[EncoderName] = None
33
63
  token_limits: Optional[LanguageModelTokenLimits] = None
34
64
 
35
65
  info_cutoff_at: Optional[date] = None
@@ -53,6 +83,7 @@ class LanguageModel:
53
83
  - name
54
84
  - version
55
85
  - provider
86
+ - encoder_name
56
87
  - token_limits
57
88
  - info_cutoff_at
58
89
  - published_at
@@ -86,6 +117,13 @@ class LanguageModel:
86
117
  """
87
118
  return self._model_info.version
88
119
 
120
+ @property
121
+ def encoder_name(self) -> Optional[EncoderName]:
122
+ """
123
+ Returns the encoder_name used for the model.
124
+ """
125
+ return self._model_info.encoder_name
126
+
89
127
  @property
90
128
  def token_limit(self) -> Optional[int]:
91
129
  """
@@ -191,6 +229,7 @@ def create_language_model(
191
229
  provider: LanguageModelProvider,
192
230
  info_cutoff_at: date,
193
231
  published_at: date,
232
+ encoder_name: Optional[EncoderName] = None,
194
233
  token_limit: Optional[int] = None,
195
234
  token_limit_input: Optional[int] = None,
196
235
  token_limit_output: Optional[int] = None,
@@ -202,6 +241,7 @@ def create_language_model(
202
241
  name=name,
203
242
  version=version,
204
243
  provider=provider,
244
+ encoder_name=encoder_name,
205
245
  token_limits=LanguageModelTokenLimits(
206
246
  token_limit=token_limit,
207
247
  token_limit_input=token_limit_input,
@@ -229,6 +269,7 @@ AzureGpt35Turbo0613 = create_language_model(
229
269
  name=LanguageModelName.AZURE_GPT_35_TURBO_0613,
230
270
  provider=LanguageModelProvider.AZURE,
231
271
  version="0613",
272
+ encoder_name=get_encoder_name(LanguageModelName.AZURE_GPT_35_TURBO_0613),
232
273
  token_limit=8192,
233
274
  info_cutoff_at=date(2021, 9, 1),
234
275
  published_at=date(2023, 6, 13),
@@ -239,6 +280,7 @@ AzureGpt35Turbo = create_language_model(
239
280
  name=LanguageModelName.AZURE_GPT_35_TURBO,
240
281
  provider=LanguageModelProvider.AZURE,
241
282
  version="0301",
283
+ encoder_name=get_encoder_name(LanguageModelName.AZURE_GPT_35_TURBO),
242
284
  token_limit=4096,
243
285
  info_cutoff_at=date(2021, 9, 1),
244
286
  published_at=date(2023, 3, 1),
@@ -249,6 +291,7 @@ AzureGpt35Turbo16k = create_language_model(
249
291
  name=LanguageModelName.AZURE_GPT_35_TURBO_16K,
250
292
  provider=LanguageModelProvider.AZURE,
251
293
  version="0613",
294
+ encoder_name=get_encoder_name(LanguageModelName.AZURE_GPT_35_TURBO_16K),
252
295
  token_limit=16382,
253
296
  info_cutoff_at=date(2021, 9, 1),
254
297
  published_at=date(2023, 6, 13),
@@ -260,6 +303,7 @@ AzureGpt40613 = create_language_model(
260
303
  name=LanguageModelName.AZURE_GPT_4_0613,
261
304
  provider=LanguageModelProvider.AZURE,
262
305
  version="0613",
306
+ encoder_name=get_encoder_name(LanguageModelName.AZURE_GPT_4_0613),
263
307
  token_limit=8192,
264
308
  info_cutoff_at=date(2021, 9, 1),
265
309
  published_at=date(2023, 6, 13),
@@ -272,6 +316,7 @@ AzureGpt4Turbo1106 = create_language_model(
272
316
  name=LanguageModelName.AZURE_GPT_4_TURBO_1106,
273
317
  provider=LanguageModelProvider.AZURE,
274
318
  version="1106-preview",
319
+ encoder_name=get_encoder_name(LanguageModelName.AZURE_GPT_4_TURBO_1106),
275
320
  token_limit_input=128000,
276
321
  token_limit_output=4096,
277
322
  info_cutoff_at=date(2023, 4, 1),
@@ -283,6 +328,7 @@ AzureGpt4VisionPreview = create_language_model(
283
328
  name=LanguageModelName.AZURE_GPT_4_VISION_PREVIEW,
284
329
  provider=LanguageModelProvider.AZURE,
285
330
  version="vision-preview",
331
+ encoder_name=get_encoder_name(LanguageModelName.AZURE_GPT_4_VISION_PREVIEW),
286
332
  token_limit_input=128000,
287
333
  token_limit_output=4096,
288
334
  info_cutoff_at=date(2023, 4, 1),
@@ -293,6 +339,7 @@ AzureGpt432k0613 = create_language_model(
293
339
  name=LanguageModelName.AZURE_GPT_4_32K_0613,
294
340
  provider=LanguageModelProvider.AZURE,
295
341
  version="1106-preview",
342
+ encoder_name=get_encoder_name(LanguageModelName.AZURE_GPT_4_32K_0613),
296
343
  token_limit=32768,
297
344
  info_cutoff_at=date(2021, 9, 1),
298
345
  published_at=date(2023, 6, 13),
@@ -302,6 +349,7 @@ AzureGpt432k0613 = create_language_model(
302
349
 
303
350
  AzureGpt4Turbo20240409 = create_language_model(
304
351
  name=LanguageModelName.AZURE_GPT_4_TURBO_2024_0409,
352
+ encoder_name=get_encoder_name(LanguageModelName.AZURE_GPT_4_TURBO_2024_0409),
305
353
  provider=LanguageModelProvider.AZURE,
306
354
  version="turbo-2024-04-09",
307
355
  token_limit_input=128000,
@@ -312,6 +360,7 @@ AzureGpt4Turbo20240409 = create_language_model(
312
360
 
313
361
  AzureGpt4o20240513 = create_language_model(
314
362
  name=LanguageModelName.AZURE_GPT_4o_2024_0513,
363
+ encoder_name=get_encoder_name(LanguageModelName.AZURE_GPT_4o_2024_0513),
315
364
  provider=LanguageModelProvider.AZURE,
316
365
  version="2024-05-13",
317
366
  token_limit_input=128000,
@@ -324,6 +373,7 @@ AzureGpt4oMini20240718 = create_language_model(
324
373
  name=LanguageModelName.AZURE_GPT_4o_MINI_2024_0718,
325
374
  provider=LanguageModelProvider.AZURE,
326
375
  version="2024-07-18",
376
+ encoder_name=get_encoder_name(LanguageModelName.AZURE_GPT_4o_MINI_2024_0718),
327
377
  token_limit_input=128000,
328
378
  token_limit_output=16384,
329
379
  info_cutoff_at=date(2023, 10, 1),
@@ -3,7 +3,14 @@ from enum import StrEnum
3
3
  from typing import Any, Optional, Self
4
4
 
5
5
  from humps import camelize
6
- from pydantic import BaseModel, ConfigDict, RootModel, field_validator, model_validator
6
+ from pydantic import (
7
+ BaseModel,
8
+ ConfigDict,
9
+ Field,
10
+ RootModel,
11
+ field_validator,
12
+ model_validator,
13
+ )
7
14
 
8
15
  # set config to convert camelCase to snake_case
9
16
  model_config = ConfigDict(
@@ -173,7 +180,11 @@ class LanguageModelToolParameters(BaseModel):
173
180
 
174
181
 
175
182
  class LanguageModelTool(BaseModel):
176
- name: str
183
+ name: str = Field(
184
+ ...,
185
+ pattern=r"^[a-zA-Z_-]+$",
186
+ description="Name must adhere to the pattern ^[a-zA-Z_-]+$",
187
+ )
177
188
  description: str
178
189
  parameters: LanguageModelToolParameters
179
190
  returns: LanguageModelToolParameterProperty | LanguageModelToolParameters | None = (
@@ -196,6 +196,7 @@ class LanguageModelService(BaseService):
196
196
  The LanguageModelStreamResponse object once the stream has finished.
197
197
  """
198
198
  options = self._add_tools_to_options({}, tools)
199
+ options["temperature"] = temperature
199
200
  search_context = self._to_search_context(content_chunks)
200
201
  messages = messages.model_dump(exclude_none=True)
201
202
  model = (
@@ -217,7 +218,6 @@ class LanguageModelService(BaseService):
217
218
  # TODO change or extend types in unique_sdk
218
219
  model=model,
219
220
  timeout=timeout,
220
- temperature=temperature,
221
221
  assistantId=self.event.payload.assistant_id,
222
222
  debugInfo=debug_info,
223
223
  options=options, # type: ignore
@@ -257,6 +257,7 @@ class LanguageModelService(BaseService):
257
257
  """
258
258
 
259
259
  options = self._add_tools_to_options({}, tools)
260
+ options["temperature"] = temperature
260
261
  search_context = self._to_search_context(content_chunks)
261
262
  messages = messages.model_dump(exclude_none=True, exclude=["tool_calls"])
262
263
  model = (
@@ -277,7 +278,6 @@ class LanguageModelService(BaseService):
277
278
  searchContext=search_context,
278
279
  model=model,
279
280
  timeout=timeout,
280
- temperature=temperature,
281
281
  assistantId=self.event.payload.assistant_id,
282
282
  debugInfo=debug_info,
283
283
  # TODO change or extend types in unique_sdk
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unique_toolkit
3
- Version: 0.5.24
3
+ Version: 0.5.28
4
4
  Summary:
5
5
  License: Proprietary
6
6
  Author: Martin Fadler
@@ -17,7 +17,7 @@ Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
17
17
  Requires-Dist: regex (>=2024.5.15,<2025.0.0)
18
18
  Requires-Dist: tiktoken (>=0.7.0,<0.8.0)
19
19
  Requires-Dist: typing-extensions (>=4.9.0,<5.0.0)
20
- Requires-Dist: unique-sdk (>=0.9.7,<0.10.0)
20
+ Requires-Dist: unique-sdk (>=0.9.8,<0.10.0)
21
21
  Description-Content-Type: text/markdown
22
22
 
23
23
  # Unique Toolkit
@@ -100,6 +100,20 @@ All notable changes to this project will be documented in this file.
100
100
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
101
101
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
102
102
 
103
+ ## [0.5.28] - 2024-10-23
104
+ - Correctly use `temperature` parameter in `LanguageModelService.complete`, `LanguageModelService.complete_async`, `LanguageModelService.stream_complete` and `LanguageModelService.stream_complete_async` methods
105
+
106
+ ## [0.5.27] - 2024-10-22
107
+ - Add encoder_name to to language model info
108
+ - Verify tool name for `LanguageModelTool` to conform with frontent requirements.
109
+ - Add `search_on_chat` to `ContentService`
110
+
111
+ ## [0.5.26] - 2024-10-16
112
+ - Bump `unique_sdk` version
113
+
114
+ ## [0.5.25] - 2024-09-26
115
+ - Add `evaluators` for hallucination and context relevancy evaluation
116
+
103
117
  ## [0.5.24] - 2024-09-26
104
118
  - Add `originalText` to `_construct_message_modify_params` and `_construct_message_create_params`. This addition makes sure that the `originalText` on the database is populated with the `text`
105
119
 
@@ -1,6 +1,8 @@
1
1
  unique_toolkit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  unique_toolkit/_common/_base_service.py,sha256=S8H0rAebx7GsOldA7xInLp3aQJt9yEPDQdsGSFRJsGg,276
3
3
  unique_toolkit/_common/_time_utils.py,sha256=ztmTovTvr-3w71Ns2VwXC65OKUUh-sQlzbHdKTQWm-w,135
4
+ unique_toolkit/_common/exception.py,sha256=caQIE1btsQnpKCHqL2cgWUSbHup06enQu_Pt7uGUTTE,727
5
+ unique_toolkit/_common/validators.py,sha256=w5lzvRxl0sBTvv0CXLF9UwtJyKmmS2lez0KXaqapgBE,258
4
6
  unique_toolkit/app/__init__.py,sha256=sZyGrz74jBlAjv6OcHgcp6VtP6-AKKpaVYjakr1Xk60,735
5
7
  unique_toolkit/app/init_logging.py,sha256=Sh26SRxOj8i8dzobKhYha2lLrkrMTHfB1V4jR3h23gQ,678
6
8
  unique_toolkit/app/init_sdk.py,sha256=Nv4Now4pMfM0AgRhbtatLpm_39rKxn0WmRLwmPhRl-8,1285
@@ -15,18 +17,30 @@ unique_toolkit/chat/state.py,sha256=Cjgwv_2vhDFbV69xxsn7SefhaoIAEqLx3ferdVFCnOg,
15
17
  unique_toolkit/chat/utils.py,sha256=ihm-wQykBWhB4liR3LnwPVPt_qGW6ETq21Mw4HY0THE,854
16
18
  unique_toolkit/content/__init__.py,sha256=MSH2sxjQyKD2Sef92fzE5Dt9SihdzivB6yliSwJfTmQ,890
17
19
  unique_toolkit/content/schemas.py,sha256=zks_Pkki2VhxICJJgHZyc-LPmRuj5dLbw3pgcUT7SW8,2362
18
- unique_toolkit/content/service.py,sha256=AHyMJTXm5IpYbg1uINzjGqvSL_5aJwEHwSH7Y5pkXBg,14028
20
+ unique_toolkit/content/service.py,sha256=ZGYWYTphXpcByXyMqr1VOVUHdmdnsR-XIS_YRX0Wyv4,14211
19
21
  unique_toolkit/content/utils.py,sha256=Lake671plRsqNvO3pN_rmyVcpwbdED_KQpLcCnc4lv4,6902
20
22
  unique_toolkit/embedding/__init__.py,sha256=dr8M9jvslQTxPpxgaGwzxY0FildiWf-DidN_cahPAWw,191
21
23
  unique_toolkit/embedding/schemas.py,sha256=1GvKCaSk4jixzVQ2PKq8yDqwGEVY_hWclYtoAr6CC2g,96
22
24
  unique_toolkit/embedding/service.py,sha256=Iiw-sbdkjuWlWMfLM9qyC4GNTJOotQAaVjkYvh5Su4Y,2370
23
25
  unique_toolkit/embedding/utils.py,sha256=v86lo__bCJbxZBQ3OcLu5SuwT6NbFfWlcq8iyk6BuzQ,279
26
+ unique_toolkit/evaluators/config.py,sha256=JRSHJvIjioXDMgd9hodK10J-52j3LMgJFvG0Vy7ePa8,1056
27
+ unique_toolkit/evaluators/context_relevancy/constants.py,sha256=YErC92sqsY31cmBUG3dFQw78mUjbcpjMG7TLfYuLYmw,1051
28
+ unique_toolkit/evaluators/context_relevancy/prompts.py,sha256=gTlWP7fDuxhrXhCYNCqXMbCey_DalZMdi5l-a6RHgk0,713
29
+ unique_toolkit/evaluators/context_relevancy/service.py,sha256=9hzdMuF4A4T97-3X3zcXgrDISLn1bleZ6tTL1bHa9dQ,1722
30
+ unique_toolkit/evaluators/context_relevancy/utils.py,sha256=DCFaoxZT_qDMKirjy3hTo1DIE7HpZ7-XR5P-rHuAoHQ,5137
31
+ unique_toolkit/evaluators/exception.py,sha256=7lcVbCyoN4Md1chNJDFxpUYyWbVrcr9dcc3TxWykJTc,115
32
+ unique_toolkit/evaluators/hallucination/constants.py,sha256=DEycXlxY9h01D0iF3aU5LIdPrDJ-5OkF0VdXDLn_tSs,1440
33
+ unique_toolkit/evaluators/hallucination/prompts.py,sha256=9yCpO_WGLDvYfPWKL1VuRA-jt0P_-A-qvLUOmuv-Nks,3320
34
+ unique_toolkit/evaluators/hallucination/service.py,sha256=k8qro5Lw4Ak58m4HYp3G4HPLIaexeFySIIVvW6fAdeA,2408
35
+ unique_toolkit/evaluators/hallucination/utils.py,sha256=507BsX1mFTEne1-LdRCNMgBj-IXSFvBj1t3BPe1UkGs,7639
36
+ unique_toolkit/evaluators/output_parser.py,sha256=eI72qkzK1dZyUvnfP2SOAQCGBj_-PwX5wy_aLPMsJMY,883
37
+ unique_toolkit/evaluators/schemas.py,sha256=Jaue6Uhx75X1CyHKWj8sT3RE1JZXTqoLtfLt2xQNCX8,2507
24
38
  unique_toolkit/language_model/__init__.py,sha256=YuhyczGPj6w9xX-sOVUhmozvzIFxcckHFEkeMBecr5s,1784
25
- unique_toolkit/language_model/infos.py,sha256=ETAUV0YTs6BjwuiTdhKz247CtL0W8Jwo3-c0ZQ2HdXs,9962
26
- unique_toolkit/language_model/schemas.py,sha256=sLpE29Ks0zEfhZUQrYOt1Cak2xzQcr9fpTXFDHkfURA,4868
27
- unique_toolkit/language_model/service.py,sha256=CvVo5CBa5Ia_fQD3DtJRsVChybuUfGFV5ml2_78_p1I,13395
39
+ unique_toolkit/language_model/infos.py,sha256=Oxkr9_6s8gFubxjox-iCm1GSs1RCAQQ5t8oh20izlC0,12002
40
+ unique_toolkit/language_model/schemas.py,sha256=LO3QHsyFuJXG3HxXWFf44QV28JJzW8YW5TeIYhVzZTI,5035
41
+ unique_toolkit/language_model/service.py,sha256=R8j2cr-lDbR96Vl5LVQIdtscS0gfscezKMXNMM2AZHM,13403
28
42
  unique_toolkit/language_model/utils.py,sha256=WBPj1XKkDgxy_-T8HCZvsfkkSzj_1w4UZzNmyvdbBLY,1081
29
- unique_toolkit-0.5.24.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
30
- unique_toolkit-0.5.24.dist-info/METADATA,sha256=N63Q5_PfoinaFgt94EqZ3lfCOdKRCEcCbz1vivjBRyg,11938
31
- unique_toolkit-0.5.24.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
32
- unique_toolkit-0.5.24.dist-info/RECORD,,
43
+ unique_toolkit-0.5.28.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
44
+ unique_toolkit-0.5.28.dist-info/METADATA,sha256=mvFUKA2gYyPWx19cex2Y6YIdoZ-WSwx0LeTGymenwVo,12521
45
+ unique_toolkit-0.5.28.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
46
+ unique_toolkit-0.5.28.dist-info/RECORD,,