whatap-python 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- whatap/LICENSE +0 -0
- whatap/README.rst +49 -0
- whatap/__init__.py +923 -0
- whatap/__main__.py +4 -0
- whatap/agent/darwin/amd64/whatap_python +0 -0
- whatap/agent/darwin/arm64/whatap_python +0 -0
- whatap/agent/linux/amd64/whatap_python +0 -0
- whatap/agent/linux/arm64/whatap_python +0 -0
- whatap/agent/windows/whatap_python.exe +0 -0
- whatap/bootstrap/__init__.py +0 -0
- whatap/bootstrap/sitecustomize.py +19 -0
- whatap/build.py +4 -0
- whatap/conf/__init__.py +0 -0
- whatap/conf/configuration.py +280 -0
- whatap/conf/configure.py +105 -0
- whatap/conf/license.py +49 -0
- whatap/control/__init__.py +0 -0
- whatap/counter/__init__.py +14 -0
- whatap/counter/counter_manager.py +45 -0
- whatap/counter/tasks/__init__.py +3 -0
- whatap/counter/tasks/base_task.py +26 -0
- whatap/counter/tasks/llm_evaluator_task.py +501 -0
- whatap/counter/tasks/llm_log_sink_task.py +309 -0
- whatap/counter/tasks/llm_stat_task.py +78 -0
- whatap/counter/tasks/openfiledescriptor.py +67 -0
- whatap/io/__init__.py +1 -0
- whatap/io/data_inputx.py +161 -0
- whatap/io/data_outputx.py +262 -0
- whatap/llm/__init__.py +17 -0
- whatap/llm/definitions.py +43 -0
- whatap/llm/evaluators/__init__.py +136 -0
- whatap/llm/evaluators/base.py +114 -0
- whatap/llm/evaluators/builtins/__init__.py +91 -0
- whatap/llm/evaluators/builtins/answer_relevance.py +46 -0
- whatap/llm/evaluators/builtins/combined_judge.py +271 -0
- whatap/llm/evaluators/builtins/factuality.py +71 -0
- whatap/llm/evaluators/builtins/hallucination.py +97 -0
- whatap/llm/evaluators/builtins/llm_judge.py +516 -0
- whatap/llm/evaluators/builtins/pii_leak.py +214 -0
- whatap/llm/evaluators/builtins/prompt_injection.py +71 -0
- whatap/llm/evaluators/builtins/toxicity.py +53 -0
- whatap/llm/evaluators/builtins/url_scan.py +194 -0
- whatap/llm/evaluators/registry.py +192 -0
- whatap/llm/evaluators/sampler.py +83 -0
- whatap/llm/evaluators/scope.py +334 -0
- whatap/llm/features.py +66 -0
- whatap/llm/log_sink_packs/__init__.py +9 -0
- whatap/llm/log_sink_packs/llm_input_message.py +16 -0
- whatap/llm/log_sink_packs/llm_log_sink_pack.py +72 -0
- whatap/llm/log_sink_packs/llm_output_message.py +19 -0
- whatap/llm/log_sink_packs/llm_step_eval_status.py +94 -0
- whatap/llm/log_sink_packs/llm_step_status.py +118 -0
- whatap/llm/log_sink_packs/llm_system_message.py +16 -0
- whatap/llm/log_sink_packs/llm_tool_calls.py +44 -0
- whatap/llm/log_sink_packs/llm_tool_results.py +16 -0
- whatap/llm/log_sink_packs/llm_tx_status.py +108 -0
- whatap/llm/pricing.py +236 -0
- whatap/llm/prompt_meta.py +288 -0
- whatap/llm/providers/__init__.py +0 -0
- whatap/llm/providers/anthropic/__init__.py +37 -0
- whatap/llm/providers/anthropic/messages/__init__.py +0 -0
- whatap/llm/providers/anthropic/messages/messages.py +70 -0
- whatap/llm/providers/anthropic/messages/messages_context.py +76 -0
- whatap/llm/providers/anthropic/messages/messages_extractor.py +126 -0
- whatap/llm/providers/interceptor.py +182 -0
- whatap/llm/providers/openai/__init__.py +133 -0
- whatap/llm/providers/openai/chat/__init__.py +0 -0
- whatap/llm/providers/openai/chat/chat.py +82 -0
- whatap/llm/providers/openai/chat/chat_context.py +78 -0
- whatap/llm/providers/openai/chat/chat_extractor.py +127 -0
- whatap/llm/providers/openai/completions/__init__.py +0 -0
- whatap/llm/providers/openai/completions/completions.py +70 -0
- whatap/llm/providers/openai/completions/completions_context.py +31 -0
- whatap/llm/providers/openai/completions/completions_extractor.py +61 -0
- whatap/llm/providers/openai/content_parser.py +41 -0
- whatap/llm/providers/openai/embeddings/__init__.py +0 -0
- whatap/llm/providers/openai/embeddings/embeddings.py +59 -0
- whatap/llm/providers/openai/embeddings/embeddings_context.py +25 -0
- whatap/llm/providers/openai/embeddings/embeddings_extractor.py +26 -0
- whatap/llm/providers/openai/responses/__init__.py +0 -0
- whatap/llm/providers/openai/responses/responses.py +70 -0
- whatap/llm/providers/openai/responses/responses_context.py +88 -0
- whatap/llm/providers/openai/responses/responses_extractor.py +126 -0
- whatap/llm/providers/stream_accumulator.py +73 -0
- whatap/llm/stats/__init__.py +35 -0
- whatap/llm/stats/active_stat.py +86 -0
- whatap/llm/stats/answer_relevance_eval_stat.py +10 -0
- whatap/llm/stats/api_status_stat.py +35 -0
- whatap/llm/stats/base_stat.py +107 -0
- whatap/llm/stats/combined_judge_eval_stat.py +11 -0
- whatap/llm/stats/error_stat.py +59 -0
- whatap/llm/stats/eval_stat.py +225 -0
- whatap/llm/stats/factuality_eval_stat.py +10 -0
- whatap/llm/stats/feature_stat.py +104 -0
- whatap/llm/stats/finish_stat.py +105 -0
- whatap/llm/stats/hallucination_eval_stat.py +10 -0
- whatap/llm/stats/meter.py +18 -0
- whatap/llm/stats/perf_stat.py +117 -0
- whatap/llm/stats/pii_leak_eval_stat.py +12 -0
- whatap/llm/stats/prompt_injection_eval_stat.py +10 -0
- whatap/llm/stats/token_usage_stat.py +133 -0
- whatap/llm/stats/toxicity_eval_stat.py +10 -0
- whatap/llm/stats/url_scan_eval_stat.py +12 -0
- whatap/net/__init__.py +0 -0
- whatap/net/async_sender.py +107 -0
- whatap/net/packet_enum.py +44 -0
- whatap/net/packet_type_enum.py +31 -0
- whatap/net/param_def.py +69 -0
- whatap/net/stackhelper.py +87 -0
- whatap/net/udp_session.py +394 -0
- whatap/net/udp_thread.py +54 -0
- whatap/pack/__init__.py +0 -0
- whatap/pack/logSinkPack.py +77 -0
- whatap/pack/pack.py +34 -0
- whatap/pack/pack_enum.py +41 -0
- whatap/pack/tagCountPack.py +61 -0
- whatap/scripts/__init__.py +208 -0
- whatap/trace/__init__.py +12 -0
- whatap/trace/mod/__init__.py +0 -0
- whatap/trace/mod/amqp/__init__.py +0 -0
- whatap/trace/mod/amqp/kombu.py +122 -0
- whatap/trace/mod/amqp/pika.py +62 -0
- whatap/trace/mod/application/__init__.py +0 -0
- whatap/trace/mod/application/bottle.py +34 -0
- whatap/trace/mod/application/celery.py +81 -0
- whatap/trace/mod/application/cherrypy.py +30 -0
- whatap/trace/mod/application/django.py +287 -0
- whatap/trace/mod/application/django_asgi.py +266 -0
- whatap/trace/mod/application/django_py3.py +251 -0
- whatap/trace/mod/application/fastapi/__init__.py +31 -0
- whatap/trace/mod/application/fastapi/endpoint.py +73 -0
- whatap/trace/mod/application/fastapi/exception_log.py +63 -0
- whatap/trace/mod/application/fastapi/instrumentation.py +204 -0
- whatap/trace/mod/application/fastapi/scope.py +115 -0
- whatap/trace/mod/application/fastapi/transaction.py +67 -0
- whatap/trace/mod/application/flask.py +52 -0
- whatap/trace/mod/application/frappe.py +224 -0
- whatap/trace/mod/application/graphql.py +170 -0
- whatap/trace/mod/application/nameko.py +39 -0
- whatap/trace/mod/application/odoo.py +63 -0
- whatap/trace/mod/application/starlette.py +126 -0
- whatap/trace/mod/application/tornado.py +163 -0
- whatap/trace/mod/application/wsgi.py +195 -0
- whatap/trace/mod/database/__init__.py +0 -0
- whatap/trace/mod/database/cxoracle.py +49 -0
- whatap/trace/mod/database/mongo.py +169 -0
- whatap/trace/mod/database/mysql.py +80 -0
- whatap/trace/mod/database/neo4j.py +90 -0
- whatap/trace/mod/database/psycopg2.py +45 -0
- whatap/trace/mod/database/psycopg3.py +359 -0
- whatap/trace/mod/database/redis.py +122 -0
- whatap/trace/mod/database/sqlalchemy.py +213 -0
- whatap/trace/mod/database/sqlite3.py +130 -0
- whatap/trace/mod/database/util.py +630 -0
- whatap/trace/mod/email/__init__.py +0 -0
- whatap/trace/mod/email/smtp.py +78 -0
- whatap/trace/mod/httpc/__init__.py +0 -0
- whatap/trace/mod/httpc/django.py +31 -0
- whatap/trace/mod/httpc/httplib.py +70 -0
- whatap/trace/mod/httpc/httpx.py +62 -0
- whatap/trace/mod/httpc/requests.py +20 -0
- whatap/trace/mod/httpc/urllib3.py +27 -0
- whatap/trace/mod/httpc/util.py +388 -0
- whatap/trace/mod/logging.py +161 -0
- whatap/trace/mod/plugin.py +84 -0
- whatap/trace/mod/standalone/__init__.py +0 -0
- whatap/trace/mod/standalone/multiple.py +293 -0
- whatap/trace/mod/standalone/single.py +135 -0
- whatap/trace/simple_trace_context.py +18 -0
- whatap/trace/trace_context.py +212 -0
- whatap/trace/trace_context_manager.py +244 -0
- whatap/trace/trace_error.py +84 -0
- whatap/trace/trace_handler.py +89 -0
- whatap/trace/trace_import.py +91 -0
- whatap/trace/trace_module_definition.py +156 -0
- whatap/util/__init__.py +0 -0
- whatap/util/bit_util.py +49 -0
- whatap/util/cardinality/__init__.py +0 -0
- whatap/util/cardinality/hyperloglog.py +84 -0
- whatap/util/cardinality/murmurhash.py +20 -0
- whatap/util/cardinality/registerset.py +60 -0
- whatap/util/compare_util.py +19 -0
- whatap/util/date_util.py +55 -0
- whatap/util/debug_util.py +73 -0
- whatap/util/escape_literal_sql.py +233 -0
- whatap/util/frame_util.py +20 -0
- whatap/util/hash_util.py +103 -0
- whatap/util/hexa32.py +66 -0
- whatap/util/int_set.py +199 -0
- whatap/util/ip_util.py +63 -0
- whatap/util/keygen.py +11 -0
- whatap/util/linked_list.py +113 -0
- whatap/util/linked_map.py +359 -0
- whatap/util/metering_util.py +103 -0
- whatap/util/request_double_queue.py +68 -0
- whatap/util/request_queue.py +60 -0
- whatap/util/string_util.py +20 -0
- whatap/util/throttle_util.py +99 -0
- whatap/util/userid_util.py +134 -0
- whatap/value/__init__.py +1 -0
- whatap/value/blob_value.py +38 -0
- whatap/value/boolean_value.py +33 -0
- whatap/value/decimal_value.py +36 -0
- whatap/value/double_summary.py +86 -0
- whatap/value/double_value.py +33 -0
- whatap/value/float_array.py +42 -0
- whatap/value/float_value.py +34 -0
- whatap/value/int_array.py +42 -0
- whatap/value/ip4_value.py +50 -0
- whatap/value/list_value.py +105 -0
- whatap/value/long_array.py +44 -0
- whatap/value/long_summary.py +83 -0
- whatap/value/map_value.py +154 -0
- whatap/value/null_value.py +21 -0
- whatap/value/number_value.py +33 -0
- whatap/value/summary_value.py +39 -0
- whatap/value/text_array.py +58 -0
- whatap/value/text_hash_value.py +37 -0
- whatap/value/text_value.py +43 -0
- whatap/value/value.py +26 -0
- whatap/value/value_enum.py +80 -0
- whatap/whatap.conf +14 -0
- whatap_python-2.1.0.dist-info/METADATA +87 -0
- whatap_python-2.1.0.dist-info/RECORD +227 -0
- whatap_python-2.1.0.dist-info/WHEEL +5 -0
- whatap_python-2.1.0.dist-info/entry_points.txt +6 -0
- whatap_python-2.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import math
|
|
3
|
+
import struct
|
|
4
|
+
|
|
5
|
+
from whatap import PY2, PY3
|
|
6
|
+
from whatap.pack.pack import Pack
|
|
7
|
+
|
|
8
|
+
from whatap.value.null_value import NullValue
|
|
9
|
+
|
|
10
|
+
BYTE_MIN_VALUE = -128
|
|
11
|
+
BYTE_MAX_VALUE = 127
|
|
12
|
+
SHORT_MIN_VALUE = -32768
|
|
13
|
+
SHORT_MAX_VALUE = 32767
|
|
14
|
+
INT3_MIN_VALUE = -0x800000
|
|
15
|
+
INT3_MAX_VALUE = 0x007fffff
|
|
16
|
+
INT_MIN_VALUE = -0x80000000
|
|
17
|
+
INT_MAX_VALUE = 0x7fffffff
|
|
18
|
+
LONG5_MIN_VALUE = -0x8000000000
|
|
19
|
+
LONG5_MAX_VALUE = 0x0000007fffffffff
|
|
20
|
+
LONG_MIN_VALUE = -0x8000000000000000
|
|
21
|
+
LONG_MAX_VALUE = 0x7fffffffffffffff
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DataOutputX(object):
|
|
25
|
+
def __init__(self, size=None):
|
|
26
|
+
if size:
|
|
27
|
+
self.buffer = io.BytesIO(bytearray(size))
|
|
28
|
+
else:
|
|
29
|
+
self.buffer = io.BytesIO()
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def toInt(buf, pos):
|
|
33
|
+
ch1 = buf[pos] & 0xff
|
|
34
|
+
ch2 = buf[pos + 1] & 0xff
|
|
35
|
+
ch3 = buf[pos + 2] & 0xff
|
|
36
|
+
ch4 = buf[pos + 3] & 0xff
|
|
37
|
+
return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0))
|
|
38
|
+
|
|
39
|
+
def writePack(self, v, ln_fmt):
|
|
40
|
+
self.writeShort(v.getPackType())
|
|
41
|
+
v.write(self)
|
|
42
|
+
if ln_fmt:
|
|
43
|
+
remainder = len(self.buffer.getvalue()) % ln_fmt
|
|
44
|
+
if remainder:
|
|
45
|
+
self.write(bytearray(int(math.floor(ln_fmt - remainder))))
|
|
46
|
+
return self
|
|
47
|
+
|
|
48
|
+
def writeStep(self, step):
|
|
49
|
+
self.writeByte(step.getStepType())
|
|
50
|
+
step.write(self)
|
|
51
|
+
return self
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def toBytes(cls, v, ln_fmt=None):
|
|
55
|
+
if isinstance(v, Pack):
|
|
56
|
+
return cls().writePack(v, ln_fmt).toByteArray()
|
|
57
|
+
|
|
58
|
+
buf = bytearray(4)
|
|
59
|
+
buf[0] = ((v % 0x100000000) >> 24) & 0xff
|
|
60
|
+
buf[1] = ((v % 0x100000000) >> 16) & 0xff
|
|
61
|
+
buf[2] = ((v % 0x100000000) >> 8) & 0xff
|
|
62
|
+
buf[3] = ((v % 0x100000000) >> 0) & 0xff
|
|
63
|
+
return buf
|
|
64
|
+
|
|
65
|
+
@classmethod
|
|
66
|
+
def toBytesLong(cls, v):
|
|
67
|
+
buf = bytearray(8)
|
|
68
|
+
buf[0] = ((v % 0x100000000) >> 56) & 0xff
|
|
69
|
+
buf[1] = ((v % 0x100000000) >> 48) & 0xff
|
|
70
|
+
buf[2] = ((v % 0x100000000) >> 40) & 0xff
|
|
71
|
+
buf[3] = ((v % 0x100000000) >> 32) & 0xff
|
|
72
|
+
buf[4] = ((v % 0x100000000) >> 24) & 0xff
|
|
73
|
+
buf[5] = ((v % 0x100000000) >> 16) & 0xff
|
|
74
|
+
buf[6] = ((v % 0x100000000) >> 8) & 0xff
|
|
75
|
+
buf[7] = ((v % 0x100000000) >> 0) & 0xff
|
|
76
|
+
return buf
|
|
77
|
+
|
|
78
|
+
def set(self, dest, pos, src):
|
|
79
|
+
dest[pos:pos + len(src)] = src[0:]
|
|
80
|
+
return dest
|
|
81
|
+
|
|
82
|
+
def writeIntBytes(self, b):
|
|
83
|
+
if not b or not len(b):
|
|
84
|
+
self.writeInt(0)
|
|
85
|
+
else:
|
|
86
|
+
self.writeInt(len(b))
|
|
87
|
+
self.write(b)
|
|
88
|
+
return self
|
|
89
|
+
|
|
90
|
+
def writeBoolean(self, v):
|
|
91
|
+
self.buffer.write(struct.pack('>?', v))
|
|
92
|
+
return self
|
|
93
|
+
|
|
94
|
+
def writeByte(self, v):
|
|
95
|
+
v = v & 0xFF
|
|
96
|
+
self.buffer.write(struct.pack('>B', v))
|
|
97
|
+
return self
|
|
98
|
+
|
|
99
|
+
def writeShort(self, v):
|
|
100
|
+
v = v & 0xFFFF
|
|
101
|
+
self.buffer.write(struct.pack('>H', v))
|
|
102
|
+
return self
|
|
103
|
+
|
|
104
|
+
def writeInt3(self, v):
|
|
105
|
+
v1 = (v >> 16) & 0xFF
|
|
106
|
+
v2 = (v >> 8) & 0xFF
|
|
107
|
+
v3 = (v >> 0) & 0xFF
|
|
108
|
+
self.buffer.write(struct.pack('>BBB', v1, v2, v3))
|
|
109
|
+
return self
|
|
110
|
+
|
|
111
|
+
def writeInt(self, v):
|
|
112
|
+
v = v & 0xFFFFFFFF
|
|
113
|
+
self.buffer.write(struct.pack('>I', v))
|
|
114
|
+
return self
|
|
115
|
+
|
|
116
|
+
def writeLong5(self, v):
|
|
117
|
+
v1 = ((v >> 32) & 0xFF)
|
|
118
|
+
v2 = ((v >> 24) & 0xFF)
|
|
119
|
+
v3 = ((v >> 16) & 0xFF)
|
|
120
|
+
v4 = ((v >> 8) & 0xFF)
|
|
121
|
+
v5 = ((v >> 0) & 0xFF)
|
|
122
|
+
self.buffer.write(struct.pack('>BBBBB', v1, v2, v3, v4, v5))
|
|
123
|
+
return self
|
|
124
|
+
|
|
125
|
+
def writeLong(self, v):
|
|
126
|
+
v = v & 0xFFFFFFFFFFFFFFFF
|
|
127
|
+
self.buffer.write(struct.pack('>Q', v))
|
|
128
|
+
return self
|
|
129
|
+
|
|
130
|
+
def writeFloat(self, v):
|
|
131
|
+
self.buffer.write(struct.pack('>f', v))
|
|
132
|
+
return self
|
|
133
|
+
|
|
134
|
+
def writeDouble(self, v):
|
|
135
|
+
self.buffer.write(struct.pack('>d', v))
|
|
136
|
+
return self
|
|
137
|
+
|
|
138
|
+
def writeDecimal(self, v):
|
|
139
|
+
if v == 0:
|
|
140
|
+
self.writeByte(0)
|
|
141
|
+
elif BYTE_MIN_VALUE <= v <= BYTE_MAX_VALUE:
|
|
142
|
+
self.writeByte(1)
|
|
143
|
+
self.writeByte(v)
|
|
144
|
+
elif SHORT_MIN_VALUE <= v <= SHORT_MAX_VALUE:
|
|
145
|
+
self.writeByte(2)
|
|
146
|
+
self.writeShort(v)
|
|
147
|
+
elif INT3_MIN_VALUE <= v <= INT3_MAX_VALUE:
|
|
148
|
+
self.writeByte(3)
|
|
149
|
+
self.writeInt3(v)
|
|
150
|
+
elif INT_MIN_VALUE <= v <= INT_MAX_VALUE:
|
|
151
|
+
self.writeByte(4)
|
|
152
|
+
self.writeInt(v)
|
|
153
|
+
elif LONG5_MIN_VALUE <= v <= LONG5_MAX_VALUE:
|
|
154
|
+
self.writeByte(5)
|
|
155
|
+
self.writeLong5(v)
|
|
156
|
+
elif LONG_MIN_VALUE <= v <= LONG_MAX_VALUE:
|
|
157
|
+
self.writeByte(8)
|
|
158
|
+
self.writeLong(v)
|
|
159
|
+
return self
|
|
160
|
+
|
|
161
|
+
def write(self, v):
|
|
162
|
+
self.buffer.write(v)
|
|
163
|
+
return self
|
|
164
|
+
|
|
165
|
+
def writeBlob(self, v):
|
|
166
|
+
ln = len(v)
|
|
167
|
+
if not v or not ln:
|
|
168
|
+
self.writeByte(0)
|
|
169
|
+
else:
|
|
170
|
+
if ln <= 253:
|
|
171
|
+
self.writeByte(ln)
|
|
172
|
+
self.write(v)
|
|
173
|
+
elif ln <= 65535:
|
|
174
|
+
self.writeByte(255)
|
|
175
|
+
self.writeShort(ln)
|
|
176
|
+
self.write(v)
|
|
177
|
+
else:
|
|
178
|
+
self.writeByte(254)
|
|
179
|
+
self.writeInt(ln)
|
|
180
|
+
self.write(v)
|
|
181
|
+
return self
|
|
182
|
+
|
|
183
|
+
def writeText(self, v):
|
|
184
|
+
if not v:
|
|
185
|
+
self.writeByte(0)
|
|
186
|
+
else:
|
|
187
|
+
self.writeBlob(v.encode("utf-8"))
|
|
188
|
+
return self
|
|
189
|
+
|
|
190
|
+
def writeUTF(self, v):
|
|
191
|
+
v = v.encode('utf-8')
|
|
192
|
+
if len(v) > 65535:
|
|
193
|
+
v = v[:65535]
|
|
194
|
+
|
|
195
|
+
self.buffer.write(struct.pack('>H', len(v)))
|
|
196
|
+
self.buffer.write(v)
|
|
197
|
+
return self
|
|
198
|
+
|
|
199
|
+
def writeValue(self, v):
|
|
200
|
+
if not v:
|
|
201
|
+
v = NullValue()
|
|
202
|
+
|
|
203
|
+
self.writeByte(v.getValueType())
|
|
204
|
+
v.write(self)
|
|
205
|
+
return self
|
|
206
|
+
|
|
207
|
+
def toByteArray(self):
|
|
208
|
+
return self.buffer.getvalue()
|
|
209
|
+
|
|
210
|
+
def flush(self):
|
|
211
|
+
self.buffer.flush()
|
|
212
|
+
|
|
213
|
+
def writeFloatArray(self, vv):
|
|
214
|
+
if not vv:
|
|
215
|
+
self.writeShort(0)
|
|
216
|
+
else:
|
|
217
|
+
self.writeShort(len(vv))
|
|
218
|
+
for v in vv:
|
|
219
|
+
self.writeFloat(v)
|
|
220
|
+
return self
|
|
221
|
+
|
|
222
|
+
def writeIntArray(self, vv):
|
|
223
|
+
if not vv:
|
|
224
|
+
self.writeShort(0)
|
|
225
|
+
else:
|
|
226
|
+
self.writeShort(len(vv))
|
|
227
|
+
for v in vv:
|
|
228
|
+
self.writeInt(v)
|
|
229
|
+
return self
|
|
230
|
+
|
|
231
|
+
def writeLongArray(self, vv):
|
|
232
|
+
if not vv:
|
|
233
|
+
self.writeShort(0)
|
|
234
|
+
else:
|
|
235
|
+
self.writeShort(len(vv))
|
|
236
|
+
for v in vv:
|
|
237
|
+
self.writeLong(v)
|
|
238
|
+
return self
|
|
239
|
+
|
|
240
|
+
def writeDecimalArray(self, vv):
|
|
241
|
+
if not vv:
|
|
242
|
+
self.writeShort(0)
|
|
243
|
+
else:
|
|
244
|
+
self.writeShort(len(vv))
|
|
245
|
+
for v in vv:
|
|
246
|
+
self.writeDecimal(v)
|
|
247
|
+
return self
|
|
248
|
+
|
|
249
|
+
def writeToPos(self, pos, v):
|
|
250
|
+
if PY3:
|
|
251
|
+
struct.pack_into('>I', self.buffer.getbuffer(), pos, v)
|
|
252
|
+
else:
|
|
253
|
+
b = io.BytesIO()
|
|
254
|
+
b.write(struct.pack('>I', v & 0xFFFFFFFF))
|
|
255
|
+
|
|
256
|
+
buffer_arr = bytearray(self.buffer.getvalue())
|
|
257
|
+
buffer_arr[pos:pos + 4] = b.getvalue()
|
|
258
|
+
self.buffer = io.BytesIO(buffer_arr)
|
|
259
|
+
|
|
260
|
+
def size(self):
|
|
261
|
+
return self.buffer.getbuffer().nbytes
|
|
262
|
+
|
whatap/llm/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from whatap.llm.log_sink_packs.llm_step_status import LlmStepStatus
|
|
2
|
+
|
|
3
|
+
# Public API: prompt 메타 (이름/버전) 데코레이터 + 컨텍스트 매니저.
|
|
4
|
+
# LLM API 호출 시 메트릭/pack 의 ``operation_type`` 태그에 name 이, ``prompt_version``
|
|
5
|
+
# 태그에 version 이 자동 인라인. 미적용 시 default/v1.
|
|
6
|
+
from whatap.llm.prompt_meta import (
|
|
7
|
+
prompt_meta,
|
|
8
|
+
prompt_meta_scope,
|
|
9
|
+
get_prompt_meta,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
'LlmStepStatus',
|
|
14
|
+
'prompt_meta',
|
|
15
|
+
'prompt_meta_scope',
|
|
16
|
+
'get_prompt_meta',
|
|
17
|
+
]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""LLM 패키지에서 사용하는 상수 및 매핑 데이터 정의."""
|
|
2
|
+
LOG_SINK_CATEGORY = '#LlmCallLog'
|
|
3
|
+
|
|
4
|
+
PROVIDER_TOKEN_FIELDS = {
|
|
5
|
+
'openai': [
|
|
6
|
+
'input_tokens', 'output_tokens', 'total_tokens_count',
|
|
7
|
+
'cached_tokens', 'reasoning_tokens',
|
|
8
|
+
'audio_input_tokens', 'audio_output_tokens',
|
|
9
|
+
'accepted_prediction_tokens', 'rejected_prediction_tokens',
|
|
10
|
+
'embedding_count', 'dimensions', 'similarity',
|
|
11
|
+
],
|
|
12
|
+
'anthropic': [
|
|
13
|
+
'input_tokens', 'output_tokens', 'total_tokens_count',
|
|
14
|
+
'cache_creation_input_tokens', 'cache_read_input_tokens',
|
|
15
|
+
],
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
DEFAULT_TOKEN_FIELDS = ['input_tokens', 'output_tokens', 'total_tokens_count']
|
|
19
|
+
|
|
20
|
+
OPENAI_URL_OPERATION_MAP = [
|
|
21
|
+
("/v1/chat/completions", "chat"),
|
|
22
|
+
("/v1/responses", "response"),
|
|
23
|
+
("/v1/completions", "completion"),
|
|
24
|
+
("/v1/embeddings", "embedding"),
|
|
25
|
+
("/v1/images/generations", "image_generation"),
|
|
26
|
+
("/v1/images/edits", "image_edit"),
|
|
27
|
+
("/v1/audio/transcriptions", "audio_transcription"),
|
|
28
|
+
("/v1/audio/translations", "audio_translation"),
|
|
29
|
+
("/v1/audio/speech", "audio_speech"),
|
|
30
|
+
("/v1/moderations", "moderation"),
|
|
31
|
+
("/v1/fine_tuning", "fine_tuning"),
|
|
32
|
+
("/v1/files", "file"),
|
|
33
|
+
("/v1/assistants", "assistant"),
|
|
34
|
+
("/v1/threads", "thread"),
|
|
35
|
+
("/v1/vector_stores", "vector_store"),
|
|
36
|
+
("/v1/batches", "batch"),
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
ANTHROPIC_URL_OPERATION_MAP = [
|
|
40
|
+
("/v1/messages", "chat"),
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
URL_OPERATION_MAP = OPENAI_URL_OPERATION_MAP + ANTHROPIC_URL_OPERATION_MAP
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""WhatAP LLM Evaluation SDK 공개 API.
|
|
2
|
+
|
|
3
|
+
권장 사용 패턴 — 데코레이터 / 컨텍스트 매니저로 트랜잭션·함수 단위 적용:
|
|
4
|
+
from whatap.llm.evaluators import evaluate_with, evaluation_scope
|
|
5
|
+
from whatap.llm.evaluators.builtins import HallucinationEvaluator, RefusalEvaluator
|
|
6
|
+
|
|
7
|
+
@evaluate_with(HallucinationEvaluator(judge_fn=judge), RefusalEvaluator())
|
|
8
|
+
def chat(question):
|
|
9
|
+
return client.chat.completions.create(...).choices[0].message.content
|
|
10
|
+
|
|
11
|
+
# 또는
|
|
12
|
+
def chat(question):
|
|
13
|
+
with evaluation_scope(HallucinationEvaluator(judge_fn=judge)):
|
|
14
|
+
return client.chat.completions.create(...).choices[0].message.content
|
|
15
|
+
|
|
16
|
+
전역 always-on (앱 전체 LLM 호출에 무조건 적용 — PII 검사 등에 적합):
|
|
17
|
+
from whatap.llm.evaluators import register_evaluator
|
|
18
|
+
register_evaluator(PIILeakEvaluator()) # 앱 시작 시 한 번
|
|
19
|
+
|
|
20
|
+
명시 평가 (사용자 피드백 등 시점이 다른 평가):
|
|
21
|
+
from whatap.llm.evaluators import submit_evaluation
|
|
22
|
+
submit_evaluation(label='user_thumbs', value=1.0, txid=req.txid, step_id=req.step_id)
|
|
23
|
+
|
|
24
|
+
활성화:
|
|
25
|
+
whatap.conf 에 ``llm_eval_enabled=true`` 또는
|
|
26
|
+
환경변수 ``WHATAP_LLM_EVAL_ENABLED=true``.
|
|
27
|
+
"""
|
|
28
|
+
import time
|
|
29
|
+
|
|
30
|
+
from whatap import logging
|
|
31
|
+
|
|
32
|
+
from whatap.llm.evaluators.base import (
|
|
33
|
+
BaseEvaluator,
|
|
34
|
+
EvaluatorContext,
|
|
35
|
+
EvaluatorResult,
|
|
36
|
+
)
|
|
37
|
+
from whatap.llm.evaluators.registry import (
|
|
38
|
+
EvaluatorRegistry,
|
|
39
|
+
register_evaluator,
|
|
40
|
+
unregister_evaluator,
|
|
41
|
+
)
|
|
42
|
+
from whatap.llm.evaluators.scope import (
|
|
43
|
+
evaluate_with,
|
|
44
|
+
evaluation_scope,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def submit_evaluation(label, value, metric_type=None, reasoning=None,
|
|
49
|
+
metadata=None, txid=None, step_id=None, index=None):
|
|
50
|
+
"""사용자가 명시적으로 평가 결과를 송출한다.
|
|
51
|
+
|
|
52
|
+
데이터독 ``LLMObs.submit_evaluation()`` 의 등가물.
|
|
53
|
+
호출 시점의 TraceContext 에서 (txid, step_id, index) 를 자동 추출하므로
|
|
54
|
+
LLM 호출 트랜잭션 안에서 호출하면 그대로 결합된다. 다른 스레드/태스크에서 호출
|
|
55
|
+
한다면 인자로 명시 전달하면 된다.
|
|
56
|
+
|
|
57
|
+
:param label: 평가 라벨 (필수, e.g. 'user_satisfaction').
|
|
58
|
+
:param value: 평가 값 (bool/int/float/str/dict).
|
|
59
|
+
:param metric_type: 'score'|'categorical'|'boolean'|'json'. 미지정 시 value 타입에서 추론.
|
|
60
|
+
:param reasoning: 평가 근거 (선택).
|
|
61
|
+
:param metadata: 임의 dict (선택).
|
|
62
|
+
:param txid, step_id, index: 결합 키 명시. 미지정 시 현재 TraceContext 에서 자동 추출.
|
|
63
|
+
"""
|
|
64
|
+
if not label:
|
|
65
|
+
logging.warning('[LLM] submit_evaluation: missing label', extra={'id': 'LLM050'})
|
|
66
|
+
return
|
|
67
|
+
if '.' in label:
|
|
68
|
+
logging.warning('[LLM] submit_evaluation: label must not contain "." (got %s)' % label,
|
|
69
|
+
extra={'id': 'LLM051'})
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
# context binding: 인자로 명시되지 않았으면 TraceContext 에서 추출
|
|
73
|
+
if txid is None or step_id is None:
|
|
74
|
+
try:
|
|
75
|
+
from whatap.trace.trace_context_manager import TraceContextManager
|
|
76
|
+
ctx = TraceContextManager.getLocalContext()
|
|
77
|
+
if ctx is not None:
|
|
78
|
+
if txid is None:
|
|
79
|
+
txid = str(ctx.id)
|
|
80
|
+
if step_id is None:
|
|
81
|
+
step_id = str(getattr(ctx, '_llm_step_id', 0))
|
|
82
|
+
if index is None:
|
|
83
|
+
index = getattr(ctx, '_llm_call_index', 0)
|
|
84
|
+
except Exception:
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
if txid is None:
|
|
88
|
+
logging.warning('[LLM] submit_evaluation: no active trace context (label=%s)' % label,
|
|
89
|
+
extra={'id': 'LLM052'})
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
# metric_type 자동 추론
|
|
93
|
+
if metric_type is None:
|
|
94
|
+
if isinstance(value, bool):
|
|
95
|
+
metric_type = 'boolean'
|
|
96
|
+
elif isinstance(value, (int, float)):
|
|
97
|
+
metric_type = 'score'
|
|
98
|
+
elif isinstance(value, dict):
|
|
99
|
+
metric_type = 'json'
|
|
100
|
+
else:
|
|
101
|
+
metric_type = 'categorical'
|
|
102
|
+
if value is not None and not isinstance(value, str):
|
|
103
|
+
value = str(value)
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
from whatap.llm.log_sink_packs.llm_step_eval_status import LlmStepEvalStatus
|
|
107
|
+
from whatap.counter.tasks.llm_evaluator_task import _LABEL_TO_FIELD
|
|
108
|
+
from whatap.counter.tasks.llm_log_sink_task import dispatch_llm_evaluation_pack
|
|
109
|
+
pack = LlmStepEvalStatus()
|
|
110
|
+
pack.txid = str(txid)
|
|
111
|
+
pack.step_id = str(step_id) if step_id is not None else None
|
|
112
|
+
pack.index = int(index) if index is not None else 0
|
|
113
|
+
# Late-binding user feedback — 원본 LLM 호출 메타 (model/tokens/cost) 모름.
|
|
114
|
+
# eval_success 와 5 점수 필드 중 매핑되는 것만 채움.
|
|
115
|
+
pack.eval_success = True if metric_type != 'error' else False
|
|
116
|
+
attr = _LABEL_TO_FIELD.get(label)
|
|
117
|
+
if attr is not None and isinstance(value, (int, float)) and not isinstance(value, bool):
|
|
118
|
+
setattr(pack, attr, float(value))
|
|
119
|
+
dispatch_llm_evaluation_pack(pack)
|
|
120
|
+
except Exception as e:
|
|
121
|
+
logging.warning('[LLM] submit_evaluation failed: %s' % e, extra={'id': 'LLM053'})
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
__all__ = [
|
|
125
|
+
'BaseEvaluator',
|
|
126
|
+
'EvaluatorContext',
|
|
127
|
+
'EvaluatorResult',
|
|
128
|
+
# primary: per-scope application
|
|
129
|
+
'evaluate_with',
|
|
130
|
+
'evaluation_scope',
|
|
131
|
+
# secondary: app-wide always-on
|
|
132
|
+
'register_evaluator',
|
|
133
|
+
'unregister_evaluator',
|
|
134
|
+
# late-binding (e.g. user feedback)
|
|
135
|
+
'submit_evaluation',
|
|
136
|
+
]
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""LLM 평가자 베이스 타입 정의.
|
|
2
|
+
|
|
3
|
+
EvaluatorContext : 평가자에 전달되는 읽기전용 입력 (LLM 호출의 결과 + 결합키).
|
|
4
|
+
EvaluatorResult : 평가자가 반환하는 표준 결과 컨테이너.
|
|
5
|
+
BaseEvaluator : 사용자가 상속하여 evaluate() 를 구현하는 베이스 클래스.
|
|
6
|
+
|
|
7
|
+
데이터독 LLMObs 의 BaseEvaluator + EvaluatorContext + EvaluatorResult 와 같은 역할이며,
|
|
8
|
+
WhatAP 의 (txid, step_id, index) 를 결합키로 사용한다.
|
|
9
|
+
"""
|
|
10
|
+
from collections import namedtuple
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# frozen 컨테이너로 evaluator가 입력을 변경하지 못하도록 한다.
|
|
14
|
+
EvaluatorContext = namedtuple(
|
|
15
|
+
'EvaluatorContext',
|
|
16
|
+
[
|
|
17
|
+
# ── 결합 키 (LlmStepStatus 와 동일) ──
|
|
18
|
+
'txid', # str — TraceContext.id
|
|
19
|
+
'step_id', # str — ctx._llm_step_id
|
|
20
|
+
'index', # int — 트랜잭션 내 LLM 호출 순번
|
|
21
|
+
|
|
22
|
+
# ── LLM 메타 ──
|
|
23
|
+
'provider', # str — e.g. 'api.openai.com'
|
|
24
|
+
'url', # str — e.g. '/v1/chat/completions'
|
|
25
|
+
'model', # str
|
|
26
|
+
'operation_type', # str — 'chat'|'embedding'|'completion'|...
|
|
27
|
+
|
|
28
|
+
# ── I/O 텍스트 (interceptor 가 추출한 원본) ──
|
|
29
|
+
'input_text', # str — prompt 본문
|
|
30
|
+
'output_text', # str — completion 본문
|
|
31
|
+
'system_text', # str — system message (여러 개면 \n으로 결합)
|
|
32
|
+
'reasoning_text', # str — reasoning 본문 (없으면 '')
|
|
33
|
+
'tool_calls_text', # str
|
|
34
|
+
'tool_results_text', # str
|
|
35
|
+
|
|
36
|
+
# ── 결과 메타 ──
|
|
37
|
+
'success', # bool
|
|
38
|
+
'finish_reason', # str|None
|
|
39
|
+
'latency_ms', # int|None
|
|
40
|
+
'input_tokens', # int|None
|
|
41
|
+
'output_tokens', # int|None
|
|
42
|
+
|
|
43
|
+
# ── 평가용 인프라 자동 전파 ──
|
|
44
|
+
# interceptor 가 캡처한 user 의 LLM client 객체 그대로.
|
|
45
|
+
# judge 호출은 이 client 인스턴스를 그대로 재사용 — 새 client / httpx 생성 안 함.
|
|
46
|
+
'client', # OpenAI|AsyncOpenAI|Anthropic|...|None
|
|
47
|
+
|
|
48
|
+
# interceptor 가 async 호출 시점에 캡처한 user 의 running event loop.
|
|
49
|
+
# AsyncClient 는 loop binding 이 있어 sync 워커에서 그 client 로 호출하려면
|
|
50
|
+
# 같은 loop 으로 ``run_coroutine_threadsafe`` dispatch 필요. sync 호출이거나
|
|
51
|
+
# capture 실패하면 None — 그땐 client 가 sync 라 직접 호출 가능.
|
|
52
|
+
'event_loop', # asyncio.AbstractEventLoop|None
|
|
53
|
+
],
|
|
54
|
+
)
|
|
55
|
+
EvaluatorContext.__new__.__defaults__ = (None,) # event_loop default
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class EvaluatorResult(object):
|
|
59
|
+
"""평가 결과 표준 컨테이너.
|
|
60
|
+
|
|
61
|
+
evaluator.evaluate() 가 raw value(bool/int/float/str/dict) 를 반환해도
|
|
62
|
+
LlmEvaluatorTask 가 자동으로 EvaluatorResult 로 래핑한다.
|
|
63
|
+
|
|
64
|
+
``extras`` 는 한 번의 evaluate() 호출에서 여러 라벨의 메트릭을 같이 송출하고
|
|
65
|
+
싶을 때 사용. 예: CombinedJudgeEvaluator 는 1번의 LLM judge 호출에서
|
|
66
|
+
hallucination + answer_relevance + toxicity 를 모두 산출하는데, primary 결과
|
|
67
|
+
(combined_judge) 에 extras={'hallucination': ..., ...} 를 붙여 모두 같은
|
|
68
|
+
LlmStepStatus pack 에 인라인되도록 한다.
|
|
69
|
+
|
|
70
|
+
``extras`` 의 값은 ``EvaluatorResult`` 인스턴스 또는 ``(value, metric_type,
|
|
71
|
+
reasoning, metadata)`` 형태의 dict.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
__slots__ = ('value', 'metric_type', 'reasoning', 'metadata', 'extras')
|
|
75
|
+
|
|
76
|
+
def __init__(self, value, metric_type=None, reasoning=None, metadata=None,
|
|
77
|
+
extras=None):
|
|
78
|
+
self.value = value
|
|
79
|
+
# metric_type 미지정 시 LlmEvaluatorTask 에서 value 타입으로 자동 추론.
|
|
80
|
+
self.metric_type = metric_type
|
|
81
|
+
self.reasoning = reasoning
|
|
82
|
+
self.metadata = metadata or {}
|
|
83
|
+
# extras: {label: EvaluatorResult or dict}
|
|
84
|
+
self.extras = extras or {}
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class BaseEvaluator(object):
|
|
88
|
+
"""모든 평가자의 베이스. 사용자는 LABEL 과 evaluate() 를 구현한다.
|
|
89
|
+
|
|
90
|
+
Example:
|
|
91
|
+
class OutputLengthEvaluator(BaseEvaluator):
|
|
92
|
+
LABEL = 'output_length'
|
|
93
|
+
METRIC_TYPE = 'score'
|
|
94
|
+
|
|
95
|
+
def evaluate(self, context):
|
|
96
|
+
return float(len(context.output_text))
|
|
97
|
+
|
|
98
|
+
모든 평가자는 평가 워커 스레드 풀에서 비동기로 실행되며, 결과는 별도
|
|
99
|
+
``LlmStepEvalStatus`` pack (llm_log_type=llm_step_eval_status) 으로 송출된다.
|
|
100
|
+
원본 LlmStepStatus 와 동일한 구조 (model/tokens/cost/...) + 평가 점수 5 필드.
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
LABEL = None # 필수: 샘플러 매칭 키 + 메트릭 라벨. None이면 LlmEvaluatorTask가 거부.
|
|
104
|
+
METRIC_TYPE = None # 선택: 'score'|'categorical'|'boolean'|'json'. None이면 자동 추론.
|
|
105
|
+
|
|
106
|
+
def evaluate(self, context):
|
|
107
|
+
"""단일 LLM 호출에 대한 평가를 수행한다.
|
|
108
|
+
|
|
109
|
+
:param context: EvaluatorContext — 읽기전용 입력.
|
|
110
|
+
:return: EvaluatorResult, 또는 raw value (bool/int/float/str/dict).
|
|
111
|
+
raw value 반환 시 LlmEvaluatorTask 가 EvaluatorResult 로 래핑하며
|
|
112
|
+
metric_type 은 value 타입으로 자동 추론된다.
|
|
113
|
+
"""
|
|
114
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""WhatAP LLM Evaluation 빌트인 평가자.
|
|
2
|
+
|
|
3
|
+
LLM judge 기반 평가자는 별도 LLM 호출(judge call)로 점수를 산출하므로 비용이
|
|
4
|
+
발생한다. ``whatap.conf`` 의 ``llm_eval_sample_rate`` (0.0~1.0) 로 샘플링 비율을
|
|
5
|
+
조정해 비용을 통제한다. 기본 1.0 (항상 실행) — 비용 통제가 필요하면 0.1 같은
|
|
6
|
+
값으로 낮춰서 judge 호출의 1/10 만 실제로 발생시킬 수 있음.
|
|
7
|
+
|
|
8
|
+
규칙 기반 평가자 (PIILeak / URLScan) 는 LLM 호출이 없어 비용 0 — 매 호출 평가해도 됨.
|
|
9
|
+
|
|
10
|
+
사용 예 — 함수 데코레이터로 트랜잭션 단위 적용 (권장):
|
|
11
|
+
|
|
12
|
+
import openai
|
|
13
|
+
from whatap.llm.evaluators import evaluate_with
|
|
14
|
+
from whatap.llm.evaluators.builtins import (
|
|
15
|
+
CombinedJudgeEvaluator, PIILeakEvaluator, URLScanEvaluator,
|
|
16
|
+
make_openai_judge,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
client = openai.OpenAI()
|
|
20
|
+
judge = make_openai_judge(client=client, model='gpt-4o-mini')
|
|
21
|
+
|
|
22
|
+
@evaluate_with(
|
|
23
|
+
CombinedJudgeEvaluator(judge_fn=judge), # 5 의미 aspect 1번 호출
|
|
24
|
+
PIILeakEvaluator(), # 정규식, 비용 0
|
|
25
|
+
URLScanEvaluator(), # 정규식, 비용 0
|
|
26
|
+
)
|
|
27
|
+
def chat(question: str) -> str:
|
|
28
|
+
return client.chat.completions.create(
|
|
29
|
+
model='gpt-4o',
|
|
30
|
+
messages=[{'role': 'user', 'content': question}],
|
|
31
|
+
).choices[0].message.content
|
|
32
|
+
|
|
33
|
+
평가자 종류:
|
|
34
|
+
|
|
35
|
+
LLM judge 기반 (judge call 발생) ─────────────────────────────────
|
|
36
|
+
HallucinationEvaluator 할루시네이션 0.0~1.0 (faithfulness / self-consistency)
|
|
37
|
+
AnswerRelevanceEvaluator 질문↔답변 관련성 0.0~1.0
|
|
38
|
+
ToxicityEvaluator 유해성 0.0~1.0 + 카테고리
|
|
39
|
+
PromptInjectionEvaluator prompt injection / system prompt leak 0.0~1.0
|
|
40
|
+
FactualityEvaluator 검증 가능한 사실 오류 0.0~1.0
|
|
41
|
+
CombinedJudgeEvaluator 위 5 개를 1번의 judge 호출로 모두 산출 (비용 1/5)
|
|
42
|
+
|
|
43
|
+
규칙 기반 (judge call 0회, 비용 0) ───────────────────────────────
|
|
44
|
+
PIILeakEvaluator 정규식+chksum 으로 PII 노출 탐지
|
|
45
|
+
URLScanEvaluator URL 추출 + suspicious 패턴 매칭
|
|
46
|
+
|
|
47
|
+
Judge LLM 호출 헬퍼:
|
|
48
|
+
|
|
49
|
+
make_openai_judge(client=..., model='gpt-4o-mini')
|
|
50
|
+
make_anthropic_judge(client=..., model='claude-3-5-haiku-latest')
|
|
51
|
+
|
|
52
|
+
커스텀 judge 작성 시 ``LLMJudgeBase`` 상속하고 ``make_prompt`` / ``parse_judgment`` 만
|
|
53
|
+
구현하면 된다.
|
|
54
|
+
"""
|
|
55
|
+
from whatap.llm.evaluators.builtins.llm_judge import (
|
|
56
|
+
LLMJudgeBase,
|
|
57
|
+
make_openai_judge,
|
|
58
|
+
make_anthropic_judge,
|
|
59
|
+
parse_json_response,
|
|
60
|
+
set_default_judge_fn,
|
|
61
|
+
get_default_judge_fn,
|
|
62
|
+
)
|
|
63
|
+
from whatap.llm.evaluators.builtins.hallucination import HallucinationEvaluator
|
|
64
|
+
from whatap.llm.evaluators.builtins.answer_relevance import AnswerRelevanceEvaluator
|
|
65
|
+
from whatap.llm.evaluators.builtins.toxicity import ToxicityEvaluator
|
|
66
|
+
from whatap.llm.evaluators.builtins.prompt_injection import PromptInjectionEvaluator
|
|
67
|
+
from whatap.llm.evaluators.builtins.factuality import FactualityEvaluator
|
|
68
|
+
from whatap.llm.evaluators.builtins.combined_judge import CombinedJudgeEvaluator
|
|
69
|
+
from whatap.llm.evaluators.builtins.pii_leak import PIILeakEvaluator
|
|
70
|
+
from whatap.llm.evaluators.builtins.url_scan import URLScanEvaluator
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
__all__ = [
|
|
74
|
+
# judge base + helpers
|
|
75
|
+
'LLMJudgeBase',
|
|
76
|
+
'make_openai_judge',
|
|
77
|
+
'make_anthropic_judge',
|
|
78
|
+
'parse_json_response',
|
|
79
|
+
'set_default_judge_fn',
|
|
80
|
+
'get_default_judge_fn',
|
|
81
|
+
# LLM judge 기반
|
|
82
|
+
'HallucinationEvaluator',
|
|
83
|
+
'AnswerRelevanceEvaluator',
|
|
84
|
+
'ToxicityEvaluator',
|
|
85
|
+
'PromptInjectionEvaluator',
|
|
86
|
+
'FactualityEvaluator',
|
|
87
|
+
'CombinedJudgeEvaluator',
|
|
88
|
+
# 규칙 기반 (LLM 호출 X)
|
|
89
|
+
'PIILeakEvaluator',
|
|
90
|
+
'URLScanEvaluator',
|
|
91
|
+
]
|