unique_toolkit 1.45.5__py3-none-any.whl → 1.45.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unique_toolkit/agentic/evaluation/config.py +25 -6
- unique_toolkit/agentic/evaluation/context_relevancy/prompts/__init__.py +13 -0
- unique_toolkit/agentic/evaluation/context_relevancy/{prompts.py → prompts/system_prompt.j2} +11 -43
- unique_toolkit/agentic/evaluation/context_relevancy/prompts/user_prompt.j2 +15 -0
- unique_toolkit/agentic/evaluation/context_relevancy/service.py +24 -56
- unique_toolkit/agentic/evaluation/hallucination/constants.py +26 -15
- unique_toolkit/agentic/evaluation/hallucination/prompts/__init__.py +13 -0
- unique_toolkit/agentic/evaluation/hallucination/prompts/system_prompt.j2 +35 -0
- unique_toolkit/agentic/evaluation/hallucination/prompts/user_prompt.j2 +27 -0
- unique_toolkit/agentic/evaluation/hallucination/utils.py +153 -102
- unique_toolkit/agentic/evaluation/tests/fixtures.py +102 -0
- unique_toolkit/agentic/evaluation/tests/test_config.py +247 -0
- unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +141 -121
- unique_toolkit/agentic/evaluation/tests/test_hallucination_constants.py +600 -0
- unique_toolkit/agentic/evaluation/tests/test_hallucination_utils.py +1009 -0
- unique_toolkit/agentic/evaluation/tests/test_output_parser.py +82 -23
- unique_toolkit/agentic/evaluation/tests/test_prompt_loaders.py +348 -0
- unique_toolkit/agentic/evaluation/utils.py +8 -0
- unique_toolkit/chat/responses_api.py +49 -45
- {unique_toolkit-1.45.5.dist-info → unique_toolkit-1.45.7.dist-info}/METADATA +9 -1
- {unique_toolkit-1.45.5.dist-info → unique_toolkit-1.45.7.dist-info}/RECORD +23 -13
- unique_toolkit/agentic/evaluation/hallucination/prompts.py +0 -79
- {unique_toolkit-1.45.5.dist-info → unique_toolkit-1.45.7.dist-info}/LICENSE +0 -0
- {unique_toolkit-1.45.5.dist-info → unique_toolkit-1.45.7.dist-info}/WHEEL +0 -0
|
@@ -69,21 +69,31 @@ unique_toolkit/_common/validators.py,sha256=ElnkMsyEY24TfzfTVHvireyT39EnZgW5N40T
|
|
|
69
69
|
unique_toolkit/agentic/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
70
70
|
unique_toolkit/agentic/debug_info_manager/debug_info_manager.py,sha256=30ZZaw0vffjZjiu9AYdO1Sm8G9FN6XR2ehdOEUCKqh0,891
|
|
71
71
|
unique_toolkit/agentic/debug_info_manager/test/test_debug_info_manager.py,sha256=_fIS6_DHA8A3AB64-LPgHgUGa1w0CFUWwtgV-ZbhkzA,10535
|
|
72
|
-
unique_toolkit/agentic/evaluation/config.py,sha256=
|
|
73
|
-
unique_toolkit/agentic/evaluation/context_relevancy/prompts.py,sha256=
|
|
72
|
+
unique_toolkit/agentic/evaluation/config.py,sha256=u5-iuT-4mfA2_9UZjZ-TN3YutVf35aR4EsF9l61Odnk,1696
|
|
73
|
+
unique_toolkit/agentic/evaluation/context_relevancy/prompts/__init__.py,sha256=IKGvHW2viBylTUjEJNKvIXO4YrYwdJXBHSxA9rKtiRI,355
|
|
74
|
+
unique_toolkit/agentic/evaluation/context_relevancy/prompts/system_prompt.j2,sha256=PFT9Y7QQ85h-ronwK8sq6zJJrxfrZhvZkYHXspMdDHU,985
|
|
75
|
+
unique_toolkit/agentic/evaluation/context_relevancy/prompts/user_prompt.j2,sha256=9HXm0Qg0xAPIE5mnaUiHrscolD_qO7f83qlIjXiP1n4,150
|
|
74
76
|
unique_toolkit/agentic/evaluation/context_relevancy/schema.py,sha256=lZd0TPzH43ifgWWGg3WO6b1AQX8aK2R9y51yH0d1DHM,2919
|
|
75
|
-
unique_toolkit/agentic/evaluation/context_relevancy/service.py,sha256=
|
|
77
|
+
unique_toolkit/agentic/evaluation/context_relevancy/service.py,sha256=dsgpfKRSg9B4kjLhHJD_Kath4GVhHE-ZOVAGRkiCz20,8729
|
|
76
78
|
unique_toolkit/agentic/evaluation/evaluation_manager.py,sha256=wDN_Uuut9kEGek8JY3QeInKpF-ukbvOSKOVd7DHFT3Q,8121
|
|
77
79
|
unique_toolkit/agentic/evaluation/exception.py,sha256=7lcVbCyoN4Md1chNJDFxpUYyWbVrcr9dcc3TxWykJTc,115
|
|
78
|
-
unique_toolkit/agentic/evaluation/hallucination/constants.py,sha256
|
|
80
|
+
unique_toolkit/agentic/evaluation/hallucination/constants.py,sha256=-PnZ3N9VpwgbIe6hcUye40nvJa-JIRuTidCZAQwZ3GA,2473
|
|
79
81
|
unique_toolkit/agentic/evaluation/hallucination/hallucination_evaluation.py,sha256=x5ta2Fum4fE5ySgIXPKlnbTtmV140z0IazSATd0-REg,4092
|
|
80
|
-
unique_toolkit/agentic/evaluation/hallucination/prompts.py,sha256=
|
|
82
|
+
unique_toolkit/agentic/evaluation/hallucination/prompts/__init__.py,sha256=4KFYMZsB3fJUKzoiUJE1npZ0gueWgvceB32EUrN-v7A,343
|
|
83
|
+
unique_toolkit/agentic/evaluation/hallucination/prompts/system_prompt.j2,sha256=sDUX6G645Ba40D_qKu4cUI8g-sJOfG8JpZreTNFgf7M,2616
|
|
84
|
+
unique_toolkit/agentic/evaluation/hallucination/prompts/user_prompt.j2,sha256=mD_qE9fOkyc1XXrebFt097ddx8bTlA6lbY04hKSQmWs,273
|
|
81
85
|
unique_toolkit/agentic/evaluation/hallucination/service.py,sha256=WJF1f45uHnYLx1S4TW31bSFobFpV-YlOS3G_zMhuBVU,2512
|
|
82
|
-
unique_toolkit/agentic/evaluation/hallucination/utils.py,sha256=
|
|
86
|
+
unique_toolkit/agentic/evaluation/hallucination/utils.py,sha256=fxT7H1PQ6xANNvtViuhhR_9ac5ggDmFx-YfjcKUZRcg,12013
|
|
83
87
|
unique_toolkit/agentic/evaluation/output_parser.py,sha256=0FDo8YY_Dc4qlTNeYyQkznzIFj9aX9wMrLOTbhhTl6g,1418
|
|
84
88
|
unique_toolkit/agentic/evaluation/schemas.py,sha256=m9JMCUmeqP8KhsJOVEzsz6dRXUe1uKw-bxRDtn5qwvM,3156
|
|
85
|
-
unique_toolkit/agentic/evaluation/tests/
|
|
86
|
-
unique_toolkit/agentic/evaluation/tests/
|
|
89
|
+
unique_toolkit/agentic/evaluation/tests/fixtures.py,sha256=Q-ughTfDiAdsMKbBVGzFiBucFdAx-FXgJ9iqp5xMyPs,2801
|
|
90
|
+
unique_toolkit/agentic/evaluation/tests/test_config.py,sha256=p7xFQ7KE_yU8jGpqYA7ntAYe5Vln33wd6nwv3FM9XfI,8327
|
|
91
|
+
unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py,sha256=NcSOyBJ_lqYehtlraZPo9RLutCitTP76kvkuyogSD2A,9477
|
|
92
|
+
unique_toolkit/agentic/evaluation/tests/test_hallucination_constants.py,sha256=jT61WxKic-jDUJT1BeVjzhck02EnaMi1ng2H82-Aq_Q,19348
|
|
93
|
+
unique_toolkit/agentic/evaluation/tests/test_hallucination_utils.py,sha256=PKyGR073HxT0J_g8626kCURbMSlrMgkg-xPP7dPHD-0,31838
|
|
94
|
+
unique_toolkit/agentic/evaluation/tests/test_output_parser.py,sha256=KfltytmvqnPWLhmZpBXqcRmnlYorw_USwM5rkLVv8so,5179
|
|
95
|
+
unique_toolkit/agentic/evaluation/tests/test_prompt_loaders.py,sha256=zBREdlKf5tdDyB8XSaNgpQv3-tuZJoYteeJrp6WMWDM,11897
|
|
96
|
+
unique_toolkit/agentic/evaluation/utils.py,sha256=HmyPaDV8wdW-_gOjjW-wDaMKgdrsP5-SHP7OqTmGI_A,264
|
|
87
97
|
unique_toolkit/agentic/feature_flags/__init__.py,sha256=LhE2cHoa9AYBOR7TjiIToOn46sttm9paKcrzE7gnDPM,149
|
|
88
98
|
unique_toolkit/agentic/feature_flags/feature_flags.py,sha256=4jPH0GGGt5-tQ6PJWNpMBIlYzNrQIIqBLx8W02lwxD0,1140
|
|
89
99
|
unique_toolkit/agentic/history_manager/history_construction_with_contents.py,sha256=TwamOOnYTYZMQdY1mAzj6_MZOe3T5RsjFDarT1tCtYo,8150
|
|
@@ -185,7 +195,7 @@ unique_toolkit/chat/constants.py,sha256=05kq6zjqUVB2d6_P7s-90nbljpB3ryxwCI-CAz0r
|
|
|
185
195
|
unique_toolkit/chat/deprecated/service.py,sha256=CYwzXi7OB0RjHd73CO2jq8SlpdBmDYLatzPFkb5sA0k,6529
|
|
186
196
|
unique_toolkit/chat/functions.py,sha256=rF-WGMW1TXqeatHJSD5uGKwO7BGSEIFlS0VJwfE60O0,47874
|
|
187
197
|
unique_toolkit/chat/rendering.py,sha256=c8YiV9oADRrJQ5A_QBJ4_UFc0NZ-2vVaa7tupoMusso,880
|
|
188
|
-
unique_toolkit/chat/responses_api.py,sha256=
|
|
198
|
+
unique_toolkit/chat/responses_api.py,sha256=r_jywsfVI-Kyzm6R-jFmSQQoFdUSel9mmoNnQYe9cls,15156
|
|
189
199
|
unique_toolkit/chat/schemas.py,sha256=FJGFkTw7SKU_R9l_vWbcANMfBo978KH2X9psq6OIFfg,7048
|
|
190
200
|
unique_toolkit/chat/service.py,sha256=6D00OL4QrGafbOhTaC5zNXaNgg7gS5W_2ePVa4LhqpE,4439
|
|
191
201
|
unique_toolkit/chat/state.py,sha256=Cjgwv_2vhDFbV69xxsn7SefhaoIAEqLx3ferdVFCnOg,1445
|
|
@@ -244,7 +254,7 @@ unique_toolkit/short_term_memory/service.py,sha256=5PeVBu1ZCAfyDb2HLVvlmqSbyzBBu
|
|
|
244
254
|
unique_toolkit/smart_rules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
245
255
|
unique_toolkit/smart_rules/compile.py,sha256=Ozhh70qCn2yOzRWr9d8WmJeTo7AQurwd3tStgBMPFLA,1246
|
|
246
256
|
unique_toolkit/test_utilities/events.py,sha256=_mwV2bs5iLjxS1ynDCjaIq-gjjKhXYCK-iy3dRfvO3g,6410
|
|
247
|
-
unique_toolkit-1.45.
|
|
248
|
-
unique_toolkit-1.45.
|
|
249
|
-
unique_toolkit-1.45.
|
|
250
|
-
unique_toolkit-1.45.
|
|
257
|
+
unique_toolkit-1.45.7.dist-info/LICENSE,sha256=GlN8wHNdh53xwOPg44URnwag6TEolCjoq3YD_KrWgss,193
|
|
258
|
+
unique_toolkit-1.45.7.dist-info/METADATA,sha256=XahMDmU86Br2OPEJtdcQ6-6y65jFGzFj7QuD2SZGbSE,49534
|
|
259
|
+
unique_toolkit-1.45.7.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
260
|
+
unique_toolkit-1.45.7.dist-info/RECORD,,
|
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
HALLUCINATION_METRIC_SYSTEM_MSG = """
|
|
2
|
-
You will receive a question, references, a conversation between a user and an agent, and an output.
|
|
3
|
-
The output is the answer to the question.
|
|
4
|
-
Your task is to evaluate if the output is fully supported by the information provided in the references and conversation, and provide explanations on your judgement in 2 sentences.
|
|
5
|
-
|
|
6
|
-
Use the following entailment scale to generate a score:
|
|
7
|
-
[low] - All information in output is supported by the references/conversation, or extractions from the references/conversation.
|
|
8
|
-
[medium] - The output is supported by the references/conversation to some extent, but there is at least some information in the output that is not discussed in the references/conversation. For example, if an instruction asks about two concepts and the references/conversation only discusses either of them, it should be considered a [medium] hallucination level.
|
|
9
|
-
[high] - The output contains information that is not part of the references/conversation, is unrelated to the references/conversation, or contradicts the references/conversation.
|
|
10
|
-
|
|
11
|
-
Make sure to not use any external information/knowledge to judge whether the output is true or not. Only check whether the output is supported by the references/conversation, and not whether the output is correct or not. Also do not evaluate if the references/conversation contain further information that is not part of the output but could be relevant to the question. If the output mentions a plot or chart, ignore this information in your evaluation.
|
|
12
|
-
|
|
13
|
-
Your answer must be in JSON format:
|
|
14
|
-
{
|
|
15
|
-
"reason": Your explanation of your judgement of the evaluation,
|
|
16
|
-
"value": decision, must be one of the following: ["high", "medium", "low"]
|
|
17
|
-
}
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
HALLUCINATION_METRIC_USER_MSG = """
|
|
21
|
-
Here is the data:
|
|
22
|
-
|
|
23
|
-
Input:
|
|
24
|
-
'''
|
|
25
|
-
$input_text
|
|
26
|
-
'''
|
|
27
|
-
|
|
28
|
-
References:
|
|
29
|
-
'''
|
|
30
|
-
$contexts_text
|
|
31
|
-
'''
|
|
32
|
-
|
|
33
|
-
Conversation:
|
|
34
|
-
'''
|
|
35
|
-
$history_messages_text
|
|
36
|
-
'''
|
|
37
|
-
|
|
38
|
-
Output:
|
|
39
|
-
'''
|
|
40
|
-
$output_text
|
|
41
|
-
'''
|
|
42
|
-
|
|
43
|
-
Answer as JSON:
|
|
44
|
-
"""
|
|
45
|
-
|
|
46
|
-
HALLUCINATION_METRIC_SYSTEM_MSG_DEFAULT = """
|
|
47
|
-
You will receive a question and an output.
|
|
48
|
-
The output is the answer to the question.
|
|
49
|
-
The situation is that no references could be found to answer the question. Your task is to evaluate if the output contains any information to answer the question,
|
|
50
|
-
and provide a short explanations of your reasoning in 2 sentences. Also mention in your explanation that no references were provided to answer the question.
|
|
51
|
-
|
|
52
|
-
Use the following entailment scale to generate a score:
|
|
53
|
-
[low] - The output does not contain any information to answer the question.
|
|
54
|
-
[medium] - The output contains some information to answer the question, but does not answer the question entirely.
|
|
55
|
-
[high] - The output answers the question.
|
|
56
|
-
|
|
57
|
-
It is not considered an answer when the output relates to the questions subject. Make sure to not use any external information/knowledge to judge whether the output is true or not. Only check that the output does not answer the question, and not whether the output is correct or not.
|
|
58
|
-
Your answer must be in JSON format:
|
|
59
|
-
{
|
|
60
|
-
"reason": Your explanation of your reasoning of the evaluation,
|
|
61
|
-
"value": decision, must be one of the following: ["low", "medium", "high"]
|
|
62
|
-
}
|
|
63
|
-
"""
|
|
64
|
-
|
|
65
|
-
HALLUCINATION_METRIC_USER_MSG_DEFAULT = """
|
|
66
|
-
Here is the data:
|
|
67
|
-
|
|
68
|
-
Input:
|
|
69
|
-
'''
|
|
70
|
-
$input_text
|
|
71
|
-
'''
|
|
72
|
-
|
|
73
|
-
Output:
|
|
74
|
-
'''
|
|
75
|
-
$output_text
|
|
76
|
-
'''
|
|
77
|
-
|
|
78
|
-
Answer as JSON:
|
|
79
|
-
"""
|
|
File without changes
|
|
File without changes
|