vllm-judge 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vllm_judge/__init__.py +16 -2
- vllm_judge/metrics.py +714 -389
- vllm_judge/models.py +3 -2
- vllm_judge/prompts.py +9 -7
- {vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/METADATA +1 -1
- {vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/RECORD +9 -9
- {vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/WHEEL +0 -0
- {vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/entry_points.txt +0 -0
- {vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/top_level.txt +0 -0
vllm_judge/models.py
CHANGED
@@ -99,7 +99,8 @@ class Metric:
|
|
99
99
|
system_prompt: Optional[str] = None,
|
100
100
|
template_vars: Optional[Dict[str, Any]] = None,
|
101
101
|
required_vars: Optional[List[str]] = None,
|
102
|
-
template_engine: Union[str, TemplateEngine] = TemplateEngine.FORMAT
|
102
|
+
template_engine: Union[str, TemplateEngine] = TemplateEngine.FORMAT,
|
103
|
+
additional_instructions: Optional[str] = None
|
103
104
|
):
|
104
105
|
"""
|
105
106
|
Initialize a reusable metric.
|
@@ -125,7 +126,7 @@ class Metric:
|
|
125
126
|
self.template_vars = template_vars or {}
|
126
127
|
self.required_vars = required_vars or []
|
127
128
|
self.template_engine = TemplateEngine(template_engine)
|
128
|
-
|
129
|
+
self.additional_instructions = additional_instructions
|
129
130
|
# Auto-detect required variables if not specified
|
130
131
|
if not self.required_vars and self.template_engine == TemplateEngine.FORMAT:
|
131
132
|
self._auto_detect_required_vars()
|
vllm_judge/prompts.py
CHANGED
@@ -48,7 +48,7 @@ class PromptBuilder:
|
|
48
48
|
# Output format instructions
|
49
49
|
system_prompt+="\nYou must respond in JSON format:\n"
|
50
50
|
system_prompt+="""{
|
51
|
-
"decision": <your judgment - string|
|
51
|
+
"decision": <your judgment - string|boolean>,
|
52
52
|
"reasoning": "<concise explanation of your judgment>",
|
53
53
|
"score": <numeric score if requested, otherwise null>
|
54
54
|
}"""
|
@@ -105,16 +105,18 @@ class PromptBuilder:
|
|
105
105
|
parts.append(f"\nResponse B:\n{content['b']}")
|
106
106
|
else:
|
107
107
|
if input:
|
108
|
-
parts.append(f"Evaluate how well this
|
108
|
+
parts.append(f"Evaluate how well this content addresses the input for: {criteria}")
|
109
109
|
else:
|
110
|
-
parts.append(f"Evaluate the following
|
110
|
+
parts.append(f"Evaluate the following content based on: {criteria}")
|
111
111
|
if context:
|
112
112
|
parts.append(f"\nContext: {context}")
|
113
|
-
parts.append(f"\
|
113
|
+
parts.append(f"\nContent to evaluate:\n{content}")
|
114
114
|
|
115
|
+
parts.append(f"\nYou must return a decision label/class (your judgement) for the `decision` field and a concise explanation for the `reasoning` field.")
|
116
|
+
|
115
117
|
# Add scale and rubric
|
116
118
|
if scale:
|
117
|
-
parts.append(f"\
|
119
|
+
parts.append(f"\nIn addition to these, provide a score from {scale[0]} to {scale[1]}")
|
118
120
|
|
119
121
|
if isinstance(rubric, dict):
|
120
122
|
parts.append("\nScoring guide:")
|
@@ -137,7 +139,7 @@ class PromptBuilder:
|
|
137
139
|
if "input" in ex:
|
138
140
|
parts.append(f"Input: {ex['input']}")
|
139
141
|
if "content" in ex:
|
140
|
-
parts.append(f"
|
142
|
+
parts.append(f"Content: {ex['content']}")
|
141
143
|
elif "text" in ex:
|
142
144
|
parts.append(f"Text: {ex['text']}")
|
143
145
|
|
@@ -156,7 +158,7 @@ class PromptBuilder:
|
|
156
158
|
# Output format instructions
|
157
159
|
parts.append("\nYou must respond in JSON format:")
|
158
160
|
parts.append("""{
|
159
|
-
"decision": <your judgment - string|
|
161
|
+
"decision": <your judgment - string|boolean>,
|
160
162
|
"reasoning": "<concise explanation of your judgment>",
|
161
163
|
"score": <numeric score if requested, otherwise null>
|
162
164
|
}""")
|
@@ -1,20 +1,20 @@
|
|
1
|
-
vllm_judge/__init__.py,sha256=
|
1
|
+
vllm_judge/__init__.py,sha256=6OKo_RbNOov83pZIPfg12ITxiE6UZh2_UOTjQsgWbFY,2792
|
2
2
|
vllm_judge/batch.py,sha256=3zkatZxQESCjYz99qfLhxl2Dq2tHAfhtdTiXxjVqUxE,4836
|
3
3
|
vllm_judge/cli.py,sha256=tnMqJ2RvCFaXUY4ok4IO-d9IRNJhEck60AJNzdCaqhg,13679
|
4
4
|
vllm_judge/client.py,sha256=QPz64q9-7XEOOJiKQU7FBkGFWocJ-WGUmpETKSLQYDI,8386
|
5
5
|
vllm_judge/exceptions.py,sha256=X9YxnukDuI3RwJPkabj3pl6v0JIbflvhUaWrdAW4RTM,1066
|
6
6
|
vllm_judge/judge.py,sha256=SDT_cGDZzHu8NOjG6eqHQsYqIuXR12j7ocpyrVDhHrQ,16939
|
7
|
-
vllm_judge/metrics.py,sha256=
|
8
|
-
vllm_judge/models.py,sha256=
|
9
|
-
vllm_judge/prompts.py,sha256=
|
7
|
+
vllm_judge/metrics.py,sha256=WwtR6Bb4cc0gDplhZnysNzD1EfOMCEzFc8-3hJMqnJs,48709
|
8
|
+
vllm_judge/models.py,sha256=o4OdRtRdsz9n5RhHrz-uA9ylG0cGQg99NJYay0RaeDE,7998
|
9
|
+
vllm_judge/prompts.py,sha256=KC8AfiIgKKxQuhT1bnnyYXrSBbcU2-RnkSLqDJfrt8o,7251
|
10
10
|
vllm_judge/templating.py,sha256=LjVFXFcwHl8xnBLLVr_IIqtN-EbLp0HZ5ndNbBpcJTQ,6998
|
11
11
|
vllm_judge/utils.py,sha256=lhByBIMS_1EwvxEe31jFgVcTwcFwm5mWoJDXG4TnbvQ,509
|
12
12
|
vllm_judge/api/__init__.py,sha256=aPQ1o7_ZzbJJpm2UyX3H35snbOGbgQJoglJjzdnc1LU,762
|
13
13
|
vllm_judge/api/client.py,sha256=l46IpQHJxmbDfXpyCOXfir70c_3hPaIr6OEiOzOMk5Q,12449
|
14
14
|
vllm_judge/api/models.py,sha256=GXj3slwytJWg5M4f5MPZ8Ft_hrkEEAZh0qgpYDy-Qe4,5102
|
15
15
|
vllm_judge/api/server.py,sha256=1UQMV6MRdlqHS6NYdrQI41bi_wNb0QC8RZD4jCEeTkU,17888
|
16
|
-
vllm_judge-0.1.
|
17
|
-
vllm_judge-0.1.
|
18
|
-
vllm_judge-0.1.
|
19
|
-
vllm_judge-0.1.
|
20
|
-
vllm_judge-0.1.
|
16
|
+
vllm_judge-0.1.5.dist-info/METADATA,sha256=5UXUqyckWp9fGLQXcBxkI6ejmFfWpCjjpyIeMx96zTI,4251
|
17
|
+
vllm_judge-0.1.5.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
18
|
+
vllm_judge-0.1.5.dist-info/entry_points.txt,sha256=F3plmbMXOQ0pBIh0clqWPVIJWl20_1LZ7QHxC2XF5Lg,51
|
19
|
+
vllm_judge-0.1.5.dist-info/top_level.txt,sha256=bqtMvn2y13cHSz_1-HKCBMzYSTfDHsTQBG6U5STHvwM,11
|
20
|
+
vllm_judge-0.1.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|