vllm-judge 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vllm_judge/__init__.py +16 -2
- vllm_judge/metrics.py +714 -389
- vllm_judge/models.py +3 -2
- vllm_judge/prompts.py +9 -7
- {vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/METADATA +1 -1
- {vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/RECORD +9 -9
- {vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/WHEEL +0 -0
- {vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/entry_points.txt +0 -0
- {vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/top_level.txt +0 -0
vllm_judge/__init__.py
CHANGED
@@ -5,7 +5,7 @@ A lightweight library for evaluating text responses using self-hosted language m
|
|
5
5
|
via vLLM's OpenAI-compatible API.
|
6
6
|
"""
|
7
7
|
|
8
|
-
__version__ = "0.1.
|
8
|
+
__version__ = "0.1.5"
|
9
9
|
|
10
10
|
from vllm_judge.judge import Judge
|
11
11
|
from vllm_judge.models import (
|
@@ -24,10 +24,11 @@ from vllm_judge.metrics import (
|
|
24
24
|
CLARITY,
|
25
25
|
CONCISENESS,
|
26
26
|
RELEVANCE,
|
27
|
-
|
27
|
+
COHERENCE,
|
28
28
|
# Safety metrics
|
29
29
|
SAFETY,
|
30
30
|
TOXICITY,
|
31
|
+
BIAS_DETECTION,
|
31
32
|
LLAMA_GUARD_3_SAFETY,
|
32
33
|
|
33
34
|
# Code metrics
|
@@ -61,6 +62,12 @@ from vllm_judge.metrics import (
|
|
61
62
|
PRODUCT_REVIEW_TEMPLATE,
|
62
63
|
MEDICAL_INFO_TEMPLATE,
|
63
64
|
API_DOCS_TEMPLATE,
|
65
|
+
RAG_EVALUATION_TEMPLATE,
|
66
|
+
AGENT_PERFORMANCE_TEMPLATE,
|
67
|
+
|
68
|
+
# NLP metrics
|
69
|
+
TRANSLATION_QUALITY,
|
70
|
+
SUMMARIZATION_QUALITY,
|
64
71
|
|
65
72
|
)
|
66
73
|
from vllm_judge.exceptions import (
|
@@ -91,8 +98,10 @@ __all__ = [
|
|
91
98
|
"CLARITY",
|
92
99
|
"CONCISENESS",
|
93
100
|
"RELEVANCE",
|
101
|
+
"COHERENCE",
|
94
102
|
"SAFETY",
|
95
103
|
"TOXICITY",
|
104
|
+
"BIAS_DETECTION",
|
96
105
|
"LLAMA_GUARD_3_SAFETY",
|
97
106
|
"CODE_QUALITY",
|
98
107
|
"CODE_SECURITY",
|
@@ -112,6 +121,11 @@ __all__ = [
|
|
112
121
|
"PRODUCT_REVIEW_TEMPLATE",
|
113
122
|
"MEDICAL_INFO_TEMPLATE",
|
114
123
|
"API_DOCS_TEMPLATE",
|
124
|
+
"RAG_EVALUATION_TEMPLATE",
|
125
|
+
"AGENT_PERFORMANCE_TEMPLATE",
|
126
|
+
"TRANSLATION_QUALITY",
|
127
|
+
"SUMMARIZATION_QUALITY",
|
128
|
+
|
115
129
|
# Exceptions
|
116
130
|
"VLLMJudgeError",
|
117
131
|
"ConfigurationError",
|