unique_toolkit 1.45.5__py3-none-any.whl → 1.45.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unique_toolkit/agentic/evaluation/config.py +25 -6
- unique_toolkit/agentic/evaluation/context_relevancy/prompts/__init__.py +13 -0
- unique_toolkit/agentic/evaluation/context_relevancy/{prompts.py → prompts/system_prompt.j2} +11 -43
- unique_toolkit/agentic/evaluation/context_relevancy/prompts/user_prompt.j2 +15 -0
- unique_toolkit/agentic/evaluation/context_relevancy/service.py +24 -56
- unique_toolkit/agentic/evaluation/hallucination/constants.py +26 -15
- unique_toolkit/agentic/evaluation/hallucination/prompts/__init__.py +13 -0
- unique_toolkit/agentic/evaluation/hallucination/prompts/system_prompt.j2 +35 -0
- unique_toolkit/agentic/evaluation/hallucination/prompts/user_prompt.j2 +27 -0
- unique_toolkit/agentic/evaluation/hallucination/utils.py +153 -102
- unique_toolkit/agentic/evaluation/tests/fixtures.py +102 -0
- unique_toolkit/agentic/evaluation/tests/test_config.py +247 -0
- unique_toolkit/agentic/evaluation/tests/test_context_relevancy_service.py +141 -121
- unique_toolkit/agentic/evaluation/tests/test_hallucination_constants.py +600 -0
- unique_toolkit/agentic/evaluation/tests/test_hallucination_utils.py +1009 -0
- unique_toolkit/agentic/evaluation/tests/test_output_parser.py +82 -23
- unique_toolkit/agentic/evaluation/tests/test_prompt_loaders.py +348 -0
- unique_toolkit/agentic/evaluation/utils.py +8 -0
- unique_toolkit/chat/responses_api.py +49 -45
- {unique_toolkit-1.45.5.dist-info → unique_toolkit-1.45.7.dist-info}/METADATA +9 -1
- {unique_toolkit-1.45.5.dist-info → unique_toolkit-1.45.7.dist-info}/RECORD +23 -13
- unique_toolkit/agentic/evaluation/hallucination/prompts.py +0 -79
- {unique_toolkit-1.45.5.dist-info → unique_toolkit-1.45.7.dist-info}/LICENSE +0 -0
- {unique_toolkit-1.45.5.dist-info → unique_toolkit-1.45.7.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,600 @@
|
|
|
1
|
+
"""Tests for hallucination constants and configuration."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from unique_toolkit.agentic.evaluation.hallucination.constants import (
|
|
8
|
+
HallucinationConfig,
|
|
9
|
+
HallucinationPromptsConfig,
|
|
10
|
+
SourceSelectionMode,
|
|
11
|
+
hallucination_metric_default_config,
|
|
12
|
+
hallucination_required_input_fields,
|
|
13
|
+
)
|
|
14
|
+
from unique_toolkit.agentic.evaluation.schemas import (
|
|
15
|
+
EvaluationMetricInputFieldName,
|
|
16
|
+
EvaluationMetricName,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pytest.mark.ai
|
|
21
|
+
def test_source_selection_mode__has_from_ids_mode__as_enum_value() -> None:
|
|
22
|
+
"""
|
|
23
|
+
Purpose: Verify that FROM_IDS mode exists in SourceSelectionMode enum.
|
|
24
|
+
Why this matters: FROM_IDS is a core selection mode for chunk identification.
|
|
25
|
+
Setup summary: Check enum attribute exists and has correct value.
|
|
26
|
+
"""
|
|
27
|
+
# Arrange - No setup needed
|
|
28
|
+
|
|
29
|
+
# Act & Assert
|
|
30
|
+
assert hasattr(SourceSelectionMode, "FROM_IDS")
|
|
31
|
+
assert SourceSelectionMode.FROM_IDS == "FROM_IDS"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@pytest.mark.ai
|
|
35
|
+
def test_source_selection_mode__has_from_order_mode__as_enum_value() -> None:
|
|
36
|
+
"""
|
|
37
|
+
Purpose: Verify that FROM_ORDER mode exists in SourceSelectionMode enum.
|
|
38
|
+
Why this matters: FROM_ORDER enables index-based chunk selection.
|
|
39
|
+
Setup summary: Check enum attribute exists and has correct value.
|
|
40
|
+
"""
|
|
41
|
+
# Arrange - No setup needed
|
|
42
|
+
|
|
43
|
+
# Act & Assert
|
|
44
|
+
assert hasattr(SourceSelectionMode, "FROM_ORDER")
|
|
45
|
+
assert SourceSelectionMode.FROM_ORDER == "FROM_ORDER"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@pytest.mark.ai
|
|
49
|
+
def test_source_selection_mode__has_from_original_response_mode__as_enum_value() -> (
|
|
50
|
+
None
|
|
51
|
+
):
|
|
52
|
+
"""
|
|
53
|
+
Purpose: Verify that FROM_ORIGINAL_RESPONSE mode exists in SourceSelectionMode enum.
|
|
54
|
+
Why this matters: FROM_ORIGINAL_RESPONSE enables text-based reference extraction.
|
|
55
|
+
Setup summary: Check enum attribute exists and has correct value.
|
|
56
|
+
"""
|
|
57
|
+
# Arrange - No setup needed
|
|
58
|
+
|
|
59
|
+
# Act & Assert
|
|
60
|
+
assert hasattr(SourceSelectionMode, "FROM_ORIGINAL_RESPONSE")
|
|
61
|
+
assert SourceSelectionMode.FROM_ORIGINAL_RESPONSE == "FROM_ORIGINAL_RESPONSE"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@pytest.mark.ai
|
|
65
|
+
def test_source_selection_mode__uses_strings__for_all_mode_values() -> None:
|
|
66
|
+
"""
|
|
67
|
+
Purpose: Verify that all SourceSelectionMode enum values are strings.
|
|
68
|
+
Why this matters: String values enable easy serialization and comparison.
|
|
69
|
+
Setup summary: Iterate all modes, assert each value is string type.
|
|
70
|
+
"""
|
|
71
|
+
# Arrange - No setup needed
|
|
72
|
+
|
|
73
|
+
# Act & Assert
|
|
74
|
+
for mode in SourceSelectionMode:
|
|
75
|
+
assert isinstance(mode.value, str)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@pytest.mark.ai
|
|
79
|
+
def test_source_selection_mode__uses_uppercase__for_all_mode_values() -> None:
|
|
80
|
+
"""
|
|
81
|
+
Purpose: Verify that all SourceSelectionMode values are uppercase.
|
|
82
|
+
Why this matters: Consistent naming convention for enum values.
|
|
83
|
+
Setup summary: Iterate all modes, assert each value is uppercase.
|
|
84
|
+
"""
|
|
85
|
+
# Arrange - No setup needed
|
|
86
|
+
|
|
87
|
+
# Act & Assert
|
|
88
|
+
for mode in SourceSelectionMode:
|
|
89
|
+
assert mode.value.isupper()
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@pytest.mark.ai
|
|
93
|
+
def test_hallucination_prompts_config__loads_templates_from_files__on_default_initialization() -> (
|
|
94
|
+
None
|
|
95
|
+
):
|
|
96
|
+
"""
|
|
97
|
+
Purpose: Verify that default initialization loads template files automatically.
|
|
98
|
+
Why this matters: Templates must be loaded for hallucination evaluation to work.
|
|
99
|
+
Setup summary: Create config with defaults, assert templates are non-empty.
|
|
100
|
+
"""
|
|
101
|
+
# Arrange - No setup needed
|
|
102
|
+
|
|
103
|
+
# Act
|
|
104
|
+
config: HallucinationPromptsConfig = HallucinationPromptsConfig()
|
|
105
|
+
|
|
106
|
+
# Assert
|
|
107
|
+
assert len(config.system_prompt_template) > 0
|
|
108
|
+
assert len(config.user_prompt_template) > 0
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@pytest.mark.ai
|
|
112
|
+
def test_hallucination_prompts_config__contains_jinja_syntax__in_loaded_templates() -> (
|
|
113
|
+
None
|
|
114
|
+
):
|
|
115
|
+
"""
|
|
116
|
+
Purpose: Verify that loaded templates contain Jinja2 template syntax.
|
|
117
|
+
Why this matters: Templates must support dynamic content rendering.
|
|
118
|
+
Setup summary: Load default config, assert Jinja2 syntax present.
|
|
119
|
+
"""
|
|
120
|
+
# Arrange - No setup needed
|
|
121
|
+
|
|
122
|
+
# Act
|
|
123
|
+
config: HallucinationPromptsConfig = HallucinationPromptsConfig()
|
|
124
|
+
|
|
125
|
+
# Assert
|
|
126
|
+
assert "{%" in config.system_prompt_template
|
|
127
|
+
assert "{{" in config.user_prompt_template
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@pytest.mark.ai
|
|
131
|
+
def test_hallucination_prompts_config__accepts_custom_templates__on_initialization() -> (
|
|
132
|
+
None
|
|
133
|
+
):
|
|
134
|
+
"""
|
|
135
|
+
Purpose: Verify that templates can be overridden during initialization.
|
|
136
|
+
Why this matters: Allows customization of hallucination detection prompts.
|
|
137
|
+
Setup summary: Initialize with custom prompts, assert they override defaults.
|
|
138
|
+
"""
|
|
139
|
+
# Arrange
|
|
140
|
+
custom_system: str = "Custom system prompt"
|
|
141
|
+
custom_user: str = "Custom user prompt"
|
|
142
|
+
|
|
143
|
+
# Act
|
|
144
|
+
config: HallucinationPromptsConfig = HallucinationPromptsConfig(
|
|
145
|
+
system_prompt_template=custom_system,
|
|
146
|
+
user_prompt_template=custom_user,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Assert
|
|
150
|
+
assert config.system_prompt_template == custom_system
|
|
151
|
+
assert config.user_prompt_template == custom_user
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@pytest.mark.ai
|
|
155
|
+
def test_hallucination_prompts_config__allows_modification__after_initialization() -> (
|
|
156
|
+
None
|
|
157
|
+
):
|
|
158
|
+
"""
|
|
159
|
+
Purpose: Verify that templates can be modified after config creation.
|
|
160
|
+
Why this matters: Enables runtime template customization.
|
|
161
|
+
Setup summary: Create config, modify templates, assert new values.
|
|
162
|
+
"""
|
|
163
|
+
# Arrange
|
|
164
|
+
config: HallucinationPromptsConfig = HallucinationPromptsConfig()
|
|
165
|
+
|
|
166
|
+
# Act
|
|
167
|
+
config.system_prompt_template = "New system"
|
|
168
|
+
config.user_prompt_template = "New user"
|
|
169
|
+
|
|
170
|
+
# Assert
|
|
171
|
+
assert config.system_prompt_template == "New system"
|
|
172
|
+
assert config.user_prompt_template == "New user"
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@pytest.mark.ai
|
|
176
|
+
def test_hallucination_config__defaults_to_from_original_response__for_source_selection() -> (
|
|
177
|
+
None
|
|
178
|
+
):
|
|
179
|
+
"""
|
|
180
|
+
Purpose: Verify that default source selection mode is FROM_ORIGINAL_RESPONSE.
|
|
181
|
+
Why this matters: This is the most accurate mode for extracting used sources.
|
|
182
|
+
Setup summary: Create default config, assert source selection mode.
|
|
183
|
+
"""
|
|
184
|
+
# Arrange - No setup needed
|
|
185
|
+
|
|
186
|
+
# Act
|
|
187
|
+
config: HallucinationConfig = HallucinationConfig()
|
|
188
|
+
|
|
189
|
+
# Assert
|
|
190
|
+
assert config.source_selection_mode == SourceSelectionMode.FROM_ORIGINAL_RESPONSE
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
@pytest.mark.ai
|
|
194
|
+
def test_hallucination_config__has_default_regex_pattern__for_source_references() -> (
|
|
195
|
+
None
|
|
196
|
+
):
|
|
197
|
+
"""
|
|
198
|
+
Purpose: Verify that default reference_pattern is correctly configured.
|
|
199
|
+
Why this matters: Pattern must match common source reference formats.
|
|
200
|
+
Setup summary: Create default config, assert reference_pattern value.
|
|
201
|
+
"""
|
|
202
|
+
# Arrange - No setup needed
|
|
203
|
+
|
|
204
|
+
# Act
|
|
205
|
+
config: HallucinationConfig = HallucinationConfig()
|
|
206
|
+
|
|
207
|
+
# Assert
|
|
208
|
+
assert config.reference_pattern == r"[\[<]?source(\d+)[>\]]?"
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@pytest.mark.ai
|
|
212
|
+
def test_hallucination_config__uses_valid_regex__for_reference_pattern() -> None:
|
|
213
|
+
"""
|
|
214
|
+
Purpose: Verify that reference_pattern is a valid regular expression.
|
|
215
|
+
Why this matters: Invalid regex would cause runtime errors during extraction.
|
|
216
|
+
Setup summary: Create config, compile reference_pattern, assert no errors.
|
|
217
|
+
"""
|
|
218
|
+
# Arrange
|
|
219
|
+
config: HallucinationConfig = HallucinationConfig()
|
|
220
|
+
|
|
221
|
+
# Act & Assert
|
|
222
|
+
try:
|
|
223
|
+
re.compile(config.reference_pattern)
|
|
224
|
+
except re.error:
|
|
225
|
+
pytest.fail("reference_pattern is not a valid regex")
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
@pytest.mark.ai
|
|
229
|
+
def test_hallucination_config__is_disabled_by_default__for_safety() -> None:
|
|
230
|
+
"""
|
|
231
|
+
Purpose: Verify that hallucination metric is disabled by default.
|
|
232
|
+
Why this matters: Prevents unexpected evaluation costs and behavior.
|
|
233
|
+
Setup summary: Create default config, assert enabled is False.
|
|
234
|
+
"""
|
|
235
|
+
# Arrange - No setup needed
|
|
236
|
+
|
|
237
|
+
# Act
|
|
238
|
+
config: HallucinationConfig = HallucinationConfig()
|
|
239
|
+
|
|
240
|
+
# Assert
|
|
241
|
+
assert config.enabled is False
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
@pytest.mark.ai
|
|
245
|
+
def test_hallucination_config__has_hallucination_metric_name__by_default() -> None:
|
|
246
|
+
"""
|
|
247
|
+
Purpose: Verify that metric name is HALLUCINATION.
|
|
248
|
+
Why this matters: Correct metric identification for evaluation system.
|
|
249
|
+
Setup summary: Create default config, assert name field.
|
|
250
|
+
"""
|
|
251
|
+
# Arrange - No setup needed
|
|
252
|
+
|
|
253
|
+
# Act
|
|
254
|
+
config: HallucinationConfig = HallucinationConfig()
|
|
255
|
+
|
|
256
|
+
# Assert
|
|
257
|
+
assert config.name == EvaluationMetricName.HALLUCINATION
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
@pytest.mark.ai
|
|
261
|
+
def test_hallucination_config__includes_prompts_config__on_initialization() -> None:
|
|
262
|
+
"""
|
|
263
|
+
Purpose: Verify that config has prompts_config field with loaded templates.
|
|
264
|
+
Why this matters: Prompts are required for hallucination evaluation.
|
|
265
|
+
Setup summary: Create config, assert prompts_config exists and is correct type.
|
|
266
|
+
"""
|
|
267
|
+
# Arrange - No setup needed
|
|
268
|
+
|
|
269
|
+
# Act
|
|
270
|
+
config: HallucinationConfig = HallucinationConfig()
|
|
271
|
+
|
|
272
|
+
# Assert
|
|
273
|
+
assert hasattr(config, "prompts_config")
|
|
274
|
+
assert isinstance(config.prompts_config, HallucinationPromptsConfig)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
@pytest.mark.ai
|
|
278
|
+
def test_hallucination_config__loads_templates_in_prompts_config__by_default() -> None:
|
|
279
|
+
"""
|
|
280
|
+
Purpose: Verify that prompts_config is initialized with loaded templates.
|
|
281
|
+
Why this matters: Templates must be available for evaluation without extra setup.
|
|
282
|
+
Setup summary: Create config, assert prompts_config templates are non-empty.
|
|
283
|
+
"""
|
|
284
|
+
# Arrange - No setup needed
|
|
285
|
+
|
|
286
|
+
# Act
|
|
287
|
+
config: HallucinationConfig = HallucinationConfig()
|
|
288
|
+
|
|
289
|
+
# Assert
|
|
290
|
+
assert len(config.prompts_config.system_prompt_template) > 0
|
|
291
|
+
assert len(config.prompts_config.user_prompt_template) > 0
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
@pytest.mark.ai
|
|
295
|
+
def test_hallucination_config__has_score_mapping_dictionaries__for_labels_and_titles() -> (
|
|
296
|
+
None
|
|
297
|
+
):
|
|
298
|
+
"""
|
|
299
|
+
Purpose: Verify that config has score_to_label and score_to_title mappings.
|
|
300
|
+
Why this matters: Score mappings enable UI display of evaluation results.
|
|
301
|
+
Setup summary: Create config, assert mapping attributes exist.
|
|
302
|
+
"""
|
|
303
|
+
# Arrange - No setup needed
|
|
304
|
+
|
|
305
|
+
# Act
|
|
306
|
+
config: HallucinationConfig = HallucinationConfig()
|
|
307
|
+
|
|
308
|
+
# Assert
|
|
309
|
+
assert hasattr(config, "score_to_label")
|
|
310
|
+
assert hasattr(config, "score_to_title")
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
@pytest.mark.ai
|
|
314
|
+
def test_hallucination_config__maps_scores_to_color_labels__correctly() -> None:
|
|
315
|
+
"""
|
|
316
|
+
Purpose: Verify that score_to_label has expected color mappings.
|
|
317
|
+
Why this matters: Color coding provides intuitive hallucination severity indication.
|
|
318
|
+
Setup summary: Create config, assert score to color mappings.
|
|
319
|
+
"""
|
|
320
|
+
# Arrange - No setup needed
|
|
321
|
+
|
|
322
|
+
# Act
|
|
323
|
+
config: HallucinationConfig = HallucinationConfig()
|
|
324
|
+
|
|
325
|
+
# Assert
|
|
326
|
+
assert config.score_to_label["LOW"] == "GREEN"
|
|
327
|
+
assert config.score_to_label["MEDIUM"] == "YELLOW"
|
|
328
|
+
assert config.score_to_label["HIGH"] == "RED"
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
@pytest.mark.ai
|
|
332
|
+
def test_hallucination_config__includes_hallucination_in_titles__for_all_scores() -> (
|
|
333
|
+
None
|
|
334
|
+
):
|
|
335
|
+
"""
|
|
336
|
+
Purpose: Verify that score_to_title contains "Hallucination" in all titles.
|
|
337
|
+
Why this matters: Titles should clearly identify the metric being evaluated.
|
|
338
|
+
Setup summary: Create config, assert "Hallucination" present in all titles.
|
|
339
|
+
"""
|
|
340
|
+
# Arrange - No setup needed
|
|
341
|
+
|
|
342
|
+
# Act
|
|
343
|
+
config: HallucinationConfig = HallucinationConfig()
|
|
344
|
+
|
|
345
|
+
# Assert
|
|
346
|
+
assert "Hallucination" in config.score_to_title["LOW"]
|
|
347
|
+
assert "Hallucination" in config.score_to_title["MEDIUM"]
|
|
348
|
+
assert "Hallucination" in config.score_to_title["HIGH"]
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
@pytest.mark.ai
|
|
352
|
+
def test_hallucination_config__accepts_custom_source_selection_mode__on_initialization() -> (
|
|
353
|
+
None
|
|
354
|
+
):
|
|
355
|
+
"""
|
|
356
|
+
Purpose: Verify that source_selection_mode can be customized.
|
|
357
|
+
Why this matters: Different use cases may require different selection strategies.
|
|
358
|
+
Setup summary: Initialize with custom mode, assert it's set correctly.
|
|
359
|
+
"""
|
|
360
|
+
# Arrange
|
|
361
|
+
custom_mode: SourceSelectionMode = SourceSelectionMode.FROM_IDS
|
|
362
|
+
|
|
363
|
+
# Act
|
|
364
|
+
config: HallucinationConfig = HallucinationConfig(source_selection_mode=custom_mode)
|
|
365
|
+
|
|
366
|
+
# Assert
|
|
367
|
+
assert config.source_selection_mode == SourceSelectionMode.FROM_IDS
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
@pytest.mark.ai
|
|
371
|
+
def test_hallucination_config__accepts_custom_reference_pattern__on_initialization() -> (
|
|
372
|
+
None
|
|
373
|
+
):
|
|
374
|
+
"""
|
|
375
|
+
Purpose: Verify that reference_pattern can be customized during initialization.
|
|
376
|
+
Why this matters: Allows support for different reference citation formats.
|
|
377
|
+
Setup summary: Initialize with custom pattern, assert it's stored.
|
|
378
|
+
"""
|
|
379
|
+
# Arrange
|
|
380
|
+
custom_pattern: str = r"ref:(\d+)"
|
|
381
|
+
|
|
382
|
+
# Act
|
|
383
|
+
config: HallucinationConfig = HallucinationConfig(reference_pattern=custom_pattern)
|
|
384
|
+
|
|
385
|
+
# Assert
|
|
386
|
+
assert config.reference_pattern == custom_pattern
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
@pytest.mark.ai
|
|
390
|
+
def test_hallucination_config__can_be_enabled__via_initialization() -> None:
|
|
391
|
+
"""
|
|
392
|
+
Purpose: Verify that metric can be enabled during config creation.
|
|
393
|
+
Why this matters: Allows explicit opt-in to hallucination evaluation.
|
|
394
|
+
Setup summary: Initialize with enabled=True, assert enabled state.
|
|
395
|
+
"""
|
|
396
|
+
# Arrange - No setup needed
|
|
397
|
+
|
|
398
|
+
# Act
|
|
399
|
+
config: HallucinationConfig = HallucinationConfig(enabled=True)
|
|
400
|
+
|
|
401
|
+
# Assert
|
|
402
|
+
assert config.enabled is True
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
@pytest.mark.ai
|
|
406
|
+
def test_hallucination_config__serializes_to_dict__with_all_fields() -> None:
|
|
407
|
+
"""
|
|
408
|
+
Purpose: Verify that config can be serialized to dictionary format.
|
|
409
|
+
Why this matters: Required for persistence and API serialization.
|
|
410
|
+
Setup summary: Create config with custom values, serialize, assert structure.
|
|
411
|
+
"""
|
|
412
|
+
# Arrange - No setup needed
|
|
413
|
+
|
|
414
|
+
# Act
|
|
415
|
+
config: HallucinationConfig = HallucinationConfig(
|
|
416
|
+
enabled=True,
|
|
417
|
+
source_selection_mode=SourceSelectionMode.FROM_ORDER,
|
|
418
|
+
)
|
|
419
|
+
config_dict: dict = config.model_dump()
|
|
420
|
+
|
|
421
|
+
# Assert
|
|
422
|
+
assert "source_selection_mode" in config_dict
|
|
423
|
+
assert config_dict["source_selection_mode"] == "FROM_ORDER"
|
|
424
|
+
assert "reference_pattern" in config_dict
|
|
425
|
+
assert "prompts_config" in config_dict
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
@pytest.mark.ai
|
|
429
|
+
def test_hallucination_metric_default_config__exists__as_module_constant() -> None:
|
|
430
|
+
"""
|
|
431
|
+
Purpose: Verify that hallucination_metric_default_config constant exists.
|
|
432
|
+
Why this matters: Provides easy access to default configuration.
|
|
433
|
+
Setup summary: Check module constant exists and is not None.
|
|
434
|
+
"""
|
|
435
|
+
# Arrange - No setup needed
|
|
436
|
+
|
|
437
|
+
# Act & Assert
|
|
438
|
+
assert hallucination_metric_default_config is not None
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
@pytest.mark.ai
|
|
442
|
+
def test_hallucination_metric_default_config__is_hallucination_config_instance__for_type_safety() -> (
|
|
443
|
+
None
|
|
444
|
+
):
|
|
445
|
+
"""
|
|
446
|
+
Purpose: Verify that default config is an instance of HallucinationConfig.
|
|
447
|
+
Why this matters: Ensures type safety and correct configuration structure.
|
|
448
|
+
Setup summary: Check instance type of module constant.
|
|
449
|
+
"""
|
|
450
|
+
# Arrange - No setup needed
|
|
451
|
+
|
|
452
|
+
# Act & Assert
|
|
453
|
+
assert isinstance(hallucination_metric_default_config, HallucinationConfig)
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
@pytest.mark.ai
|
|
457
|
+
def test_hallucination_metric_default_config__is_disabled__for_safety() -> None:
|
|
458
|
+
"""
|
|
459
|
+
Purpose: Verify that default config has metric disabled.
|
|
460
|
+
Why this matters: Prevents accidental evaluation costs on startup.
|
|
461
|
+
Setup summary: Check enabled field of default config.
|
|
462
|
+
"""
|
|
463
|
+
# Arrange - No setup needed
|
|
464
|
+
|
|
465
|
+
# Act & Assert
|
|
466
|
+
assert hallucination_metric_default_config.enabled is False
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
@pytest.mark.ai
|
|
470
|
+
def test_hallucination_metric_default_config__has_expected_default_settings__configured() -> (
|
|
471
|
+
None
|
|
472
|
+
):
|
|
473
|
+
"""
|
|
474
|
+
Purpose: Verify that default config has expected field values.
|
|
475
|
+
Why this matters: Ensures consistent default behavior across deployments.
|
|
476
|
+
Setup summary: Check key configuration fields of default config.
|
|
477
|
+
"""
|
|
478
|
+
# Arrange
|
|
479
|
+
config: HallucinationConfig = hallucination_metric_default_config
|
|
480
|
+
|
|
481
|
+
# Act & Assert
|
|
482
|
+
assert config.name == EvaluationMetricName.HALLUCINATION
|
|
483
|
+
assert config.source_selection_mode == SourceSelectionMode.FROM_ORIGINAL_RESPONSE
|
|
484
|
+
assert config.reference_pattern == r"[\[<]?source(\d+)[>\]]?"
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
@pytest.mark.ai
|
|
488
|
+
def test_hallucination_required_input_fields__exists__as_list() -> None:
|
|
489
|
+
"""
|
|
490
|
+
Purpose: Verify that hallucination_required_input_fields list exists.
|
|
491
|
+
Why this matters: Defines required inputs for hallucination evaluation.
|
|
492
|
+
Setup summary: Check module constant exists and is list type.
|
|
493
|
+
"""
|
|
494
|
+
# Arrange - No setup needed
|
|
495
|
+
|
|
496
|
+
# Act & Assert
|
|
497
|
+
assert hallucination_required_input_fields is not None
|
|
498
|
+
assert isinstance(hallucination_required_input_fields, list)
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
@pytest.mark.ai
|
|
502
|
+
def test_hallucination_required_input_fields__includes_input_text__as_required() -> (
|
|
503
|
+
None
|
|
504
|
+
):
|
|
505
|
+
"""
|
|
506
|
+
Purpose: Verify that INPUT_TEXT is in required fields list.
|
|
507
|
+
Why this matters: Input text is essential for hallucination detection.
|
|
508
|
+
Setup summary: Check INPUT_TEXT enum value in required fields.
|
|
509
|
+
"""
|
|
510
|
+
# Arrange - No setup needed
|
|
511
|
+
|
|
512
|
+
# Act & Assert
|
|
513
|
+
assert (
|
|
514
|
+
EvaluationMetricInputFieldName.INPUT_TEXT in hallucination_required_input_fields
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
@pytest.mark.ai
|
|
519
|
+
def test_hallucination_required_input_fields__includes_context_texts__as_required() -> (
|
|
520
|
+
None
|
|
521
|
+
):
|
|
522
|
+
"""
|
|
523
|
+
Purpose: Verify that CONTEXT_TEXTS is in required fields list.
|
|
524
|
+
Why this matters: Context texts provide grounding source for hallucination check.
|
|
525
|
+
Setup summary: Check CONTEXT_TEXTS enum value in required fields.
|
|
526
|
+
"""
|
|
527
|
+
# Arrange - No setup needed
|
|
528
|
+
|
|
529
|
+
# Act & Assert
|
|
530
|
+
assert (
|
|
531
|
+
EvaluationMetricInputFieldName.CONTEXT_TEXTS
|
|
532
|
+
in hallucination_required_input_fields
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
@pytest.mark.ai
|
|
537
|
+
def test_hallucination_required_input_fields__includes_history_messages__as_required() -> (
|
|
538
|
+
None
|
|
539
|
+
):
|
|
540
|
+
"""
|
|
541
|
+
Purpose: Verify that HISTORY_MESSAGES is in required fields list.
|
|
542
|
+
Why this matters: Message history provides conversation context for evaluation.
|
|
543
|
+
Setup summary: Check HISTORY_MESSAGES enum value in required fields.
|
|
544
|
+
"""
|
|
545
|
+
# Arrange - No setup needed
|
|
546
|
+
|
|
547
|
+
# Act & Assert
|
|
548
|
+
assert (
|
|
549
|
+
EvaluationMetricInputFieldName.HISTORY_MESSAGES
|
|
550
|
+
in hallucination_required_input_fields
|
|
551
|
+
)
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
@pytest.mark.ai
|
|
555
|
+
def test_hallucination_required_input_fields__includes_output_text__as_required() -> (
|
|
556
|
+
None
|
|
557
|
+
):
|
|
558
|
+
"""
|
|
559
|
+
Purpose: Verify that OUTPUT_TEXT is in required fields list.
|
|
560
|
+
Why this matters: Output text is the target of hallucination evaluation.
|
|
561
|
+
Setup summary: Check OUTPUT_TEXT enum value in required fields.
|
|
562
|
+
"""
|
|
563
|
+
# Arrange - No setup needed
|
|
564
|
+
|
|
565
|
+
# Act & Assert
|
|
566
|
+
assert (
|
|
567
|
+
EvaluationMetricInputFieldName.OUTPUT_TEXT
|
|
568
|
+
in hallucination_required_input_fields
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
@pytest.mark.ai
|
|
573
|
+
def test_hallucination_required_input_fields__has_four_fields__for_complete_evaluation() -> (
|
|
574
|
+
None
|
|
575
|
+
):
|
|
576
|
+
"""
|
|
577
|
+
Purpose: Verify that required fields list contains exactly 4 fields.
|
|
578
|
+
Why this matters: Hallucination evaluation requires all 4 input components.
|
|
579
|
+
Setup summary: Check length of required fields list.
|
|
580
|
+
"""
|
|
581
|
+
# Arrange - No setup needed
|
|
582
|
+
|
|
583
|
+
# Act & Assert
|
|
584
|
+
assert len(hallucination_required_input_fields) == 4
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
@pytest.mark.ai
|
|
588
|
+
def test_hallucination_required_input_fields__contains_only_valid_enum_values__for_type_safety() -> (
|
|
589
|
+
None
|
|
590
|
+
):
|
|
591
|
+
"""
|
|
592
|
+
Purpose: Verify that all fields are valid EvaluationMetricInputFieldName enum values.
|
|
593
|
+
Why this matters: Ensures type safety and prevents invalid field references.
|
|
594
|
+
Setup summary: Iterate fields, assert each is enum instance.
|
|
595
|
+
"""
|
|
596
|
+
# Arrange - No setup needed
|
|
597
|
+
|
|
598
|
+
# Act & Assert
|
|
599
|
+
for field in hallucination_required_input_fields:
|
|
600
|
+
assert isinstance(field, EvaluationMetricInputFieldName)
|