whatap-python 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. whatap/LICENSE +0 -0
  2. whatap/README.rst +49 -0
  3. whatap/__init__.py +923 -0
  4. whatap/__main__.py +4 -0
  5. whatap/agent/darwin/amd64/whatap_python +0 -0
  6. whatap/agent/darwin/arm64/whatap_python +0 -0
  7. whatap/agent/linux/amd64/whatap_python +0 -0
  8. whatap/agent/linux/arm64/whatap_python +0 -0
  9. whatap/agent/windows/whatap_python.exe +0 -0
  10. whatap/bootstrap/__init__.py +0 -0
  11. whatap/bootstrap/sitecustomize.py +19 -0
  12. whatap/build.py +4 -0
  13. whatap/conf/__init__.py +0 -0
  14. whatap/conf/configuration.py +280 -0
  15. whatap/conf/configure.py +105 -0
  16. whatap/conf/license.py +49 -0
  17. whatap/control/__init__.py +0 -0
  18. whatap/counter/__init__.py +14 -0
  19. whatap/counter/counter_manager.py +45 -0
  20. whatap/counter/tasks/__init__.py +3 -0
  21. whatap/counter/tasks/base_task.py +26 -0
  22. whatap/counter/tasks/llm_evaluator_task.py +501 -0
  23. whatap/counter/tasks/llm_log_sink_task.py +309 -0
  24. whatap/counter/tasks/llm_stat_task.py +78 -0
  25. whatap/counter/tasks/openfiledescriptor.py +67 -0
  26. whatap/io/__init__.py +1 -0
  27. whatap/io/data_inputx.py +161 -0
  28. whatap/io/data_outputx.py +262 -0
  29. whatap/llm/__init__.py +17 -0
  30. whatap/llm/definitions.py +43 -0
  31. whatap/llm/evaluators/__init__.py +136 -0
  32. whatap/llm/evaluators/base.py +114 -0
  33. whatap/llm/evaluators/builtins/__init__.py +91 -0
  34. whatap/llm/evaluators/builtins/answer_relevance.py +46 -0
  35. whatap/llm/evaluators/builtins/combined_judge.py +271 -0
  36. whatap/llm/evaluators/builtins/factuality.py +71 -0
  37. whatap/llm/evaluators/builtins/hallucination.py +97 -0
  38. whatap/llm/evaluators/builtins/llm_judge.py +516 -0
  39. whatap/llm/evaluators/builtins/pii_leak.py +214 -0
  40. whatap/llm/evaluators/builtins/prompt_injection.py +71 -0
  41. whatap/llm/evaluators/builtins/toxicity.py +53 -0
  42. whatap/llm/evaluators/builtins/url_scan.py +194 -0
  43. whatap/llm/evaluators/registry.py +192 -0
  44. whatap/llm/evaluators/sampler.py +83 -0
  45. whatap/llm/evaluators/scope.py +334 -0
  46. whatap/llm/features.py +66 -0
  47. whatap/llm/log_sink_packs/__init__.py +9 -0
  48. whatap/llm/log_sink_packs/llm_input_message.py +16 -0
  49. whatap/llm/log_sink_packs/llm_log_sink_pack.py +72 -0
  50. whatap/llm/log_sink_packs/llm_output_message.py +19 -0
  51. whatap/llm/log_sink_packs/llm_step_eval_status.py +94 -0
  52. whatap/llm/log_sink_packs/llm_step_status.py +118 -0
  53. whatap/llm/log_sink_packs/llm_system_message.py +16 -0
  54. whatap/llm/log_sink_packs/llm_tool_calls.py +44 -0
  55. whatap/llm/log_sink_packs/llm_tool_results.py +16 -0
  56. whatap/llm/log_sink_packs/llm_tx_status.py +108 -0
  57. whatap/llm/pricing.py +236 -0
  58. whatap/llm/prompt_meta.py +288 -0
  59. whatap/llm/providers/__init__.py +0 -0
  60. whatap/llm/providers/anthropic/__init__.py +37 -0
  61. whatap/llm/providers/anthropic/messages/__init__.py +0 -0
  62. whatap/llm/providers/anthropic/messages/messages.py +70 -0
  63. whatap/llm/providers/anthropic/messages/messages_context.py +76 -0
  64. whatap/llm/providers/anthropic/messages/messages_extractor.py +126 -0
  65. whatap/llm/providers/interceptor.py +182 -0
  66. whatap/llm/providers/openai/__init__.py +133 -0
  67. whatap/llm/providers/openai/chat/__init__.py +0 -0
  68. whatap/llm/providers/openai/chat/chat.py +82 -0
  69. whatap/llm/providers/openai/chat/chat_context.py +78 -0
  70. whatap/llm/providers/openai/chat/chat_extractor.py +127 -0
  71. whatap/llm/providers/openai/completions/__init__.py +0 -0
  72. whatap/llm/providers/openai/completions/completions.py +70 -0
  73. whatap/llm/providers/openai/completions/completions_context.py +31 -0
  74. whatap/llm/providers/openai/completions/completions_extractor.py +61 -0
  75. whatap/llm/providers/openai/content_parser.py +41 -0
  76. whatap/llm/providers/openai/embeddings/__init__.py +0 -0
  77. whatap/llm/providers/openai/embeddings/embeddings.py +59 -0
  78. whatap/llm/providers/openai/embeddings/embeddings_context.py +25 -0
  79. whatap/llm/providers/openai/embeddings/embeddings_extractor.py +26 -0
  80. whatap/llm/providers/openai/responses/__init__.py +0 -0
  81. whatap/llm/providers/openai/responses/responses.py +70 -0
  82. whatap/llm/providers/openai/responses/responses_context.py +88 -0
  83. whatap/llm/providers/openai/responses/responses_extractor.py +126 -0
  84. whatap/llm/providers/stream_accumulator.py +73 -0
  85. whatap/llm/stats/__init__.py +35 -0
  86. whatap/llm/stats/active_stat.py +86 -0
  87. whatap/llm/stats/answer_relevance_eval_stat.py +10 -0
  88. whatap/llm/stats/api_status_stat.py +35 -0
  89. whatap/llm/stats/base_stat.py +107 -0
  90. whatap/llm/stats/combined_judge_eval_stat.py +11 -0
  91. whatap/llm/stats/error_stat.py +59 -0
  92. whatap/llm/stats/eval_stat.py +225 -0
  93. whatap/llm/stats/factuality_eval_stat.py +10 -0
  94. whatap/llm/stats/feature_stat.py +104 -0
  95. whatap/llm/stats/finish_stat.py +105 -0
  96. whatap/llm/stats/hallucination_eval_stat.py +10 -0
  97. whatap/llm/stats/meter.py +18 -0
  98. whatap/llm/stats/perf_stat.py +117 -0
  99. whatap/llm/stats/pii_leak_eval_stat.py +12 -0
  100. whatap/llm/stats/prompt_injection_eval_stat.py +10 -0
  101. whatap/llm/stats/token_usage_stat.py +133 -0
  102. whatap/llm/stats/toxicity_eval_stat.py +10 -0
  103. whatap/llm/stats/url_scan_eval_stat.py +12 -0
  104. whatap/net/__init__.py +0 -0
  105. whatap/net/async_sender.py +107 -0
  106. whatap/net/packet_enum.py +44 -0
  107. whatap/net/packet_type_enum.py +31 -0
  108. whatap/net/param_def.py +69 -0
  109. whatap/net/stackhelper.py +87 -0
  110. whatap/net/udp_session.py +394 -0
  111. whatap/net/udp_thread.py +54 -0
  112. whatap/pack/__init__.py +0 -0
  113. whatap/pack/logSinkPack.py +77 -0
  114. whatap/pack/pack.py +34 -0
  115. whatap/pack/pack_enum.py +41 -0
  116. whatap/pack/tagCountPack.py +61 -0
  117. whatap/scripts/__init__.py +208 -0
  118. whatap/trace/__init__.py +12 -0
  119. whatap/trace/mod/__init__.py +0 -0
  120. whatap/trace/mod/amqp/__init__.py +0 -0
  121. whatap/trace/mod/amqp/kombu.py +122 -0
  122. whatap/trace/mod/amqp/pika.py +62 -0
  123. whatap/trace/mod/application/__init__.py +0 -0
  124. whatap/trace/mod/application/bottle.py +34 -0
  125. whatap/trace/mod/application/celery.py +81 -0
  126. whatap/trace/mod/application/cherrypy.py +30 -0
  127. whatap/trace/mod/application/django.py +287 -0
  128. whatap/trace/mod/application/django_asgi.py +266 -0
  129. whatap/trace/mod/application/django_py3.py +251 -0
  130. whatap/trace/mod/application/fastapi/__init__.py +31 -0
  131. whatap/trace/mod/application/fastapi/endpoint.py +73 -0
  132. whatap/trace/mod/application/fastapi/exception_log.py +63 -0
  133. whatap/trace/mod/application/fastapi/instrumentation.py +204 -0
  134. whatap/trace/mod/application/fastapi/scope.py +115 -0
  135. whatap/trace/mod/application/fastapi/transaction.py +67 -0
  136. whatap/trace/mod/application/flask.py +52 -0
  137. whatap/trace/mod/application/frappe.py +224 -0
  138. whatap/trace/mod/application/graphql.py +170 -0
  139. whatap/trace/mod/application/nameko.py +39 -0
  140. whatap/trace/mod/application/odoo.py +63 -0
  141. whatap/trace/mod/application/starlette.py +126 -0
  142. whatap/trace/mod/application/tornado.py +163 -0
  143. whatap/trace/mod/application/wsgi.py +195 -0
  144. whatap/trace/mod/database/__init__.py +0 -0
  145. whatap/trace/mod/database/cxoracle.py +49 -0
  146. whatap/trace/mod/database/mongo.py +169 -0
  147. whatap/trace/mod/database/mysql.py +80 -0
  148. whatap/trace/mod/database/neo4j.py +90 -0
  149. whatap/trace/mod/database/psycopg2.py +45 -0
  150. whatap/trace/mod/database/psycopg3.py +359 -0
  151. whatap/trace/mod/database/redis.py +122 -0
  152. whatap/trace/mod/database/sqlalchemy.py +213 -0
  153. whatap/trace/mod/database/sqlite3.py +130 -0
  154. whatap/trace/mod/database/util.py +630 -0
  155. whatap/trace/mod/email/__init__.py +0 -0
  156. whatap/trace/mod/email/smtp.py +78 -0
  157. whatap/trace/mod/httpc/__init__.py +0 -0
  158. whatap/trace/mod/httpc/django.py +31 -0
  159. whatap/trace/mod/httpc/httplib.py +70 -0
  160. whatap/trace/mod/httpc/httpx.py +62 -0
  161. whatap/trace/mod/httpc/requests.py +20 -0
  162. whatap/trace/mod/httpc/urllib3.py +27 -0
  163. whatap/trace/mod/httpc/util.py +388 -0
  164. whatap/trace/mod/logging.py +161 -0
  165. whatap/trace/mod/plugin.py +84 -0
  166. whatap/trace/mod/standalone/__init__.py +0 -0
  167. whatap/trace/mod/standalone/multiple.py +293 -0
  168. whatap/trace/mod/standalone/single.py +135 -0
  169. whatap/trace/simple_trace_context.py +18 -0
  170. whatap/trace/trace_context.py +212 -0
  171. whatap/trace/trace_context_manager.py +244 -0
  172. whatap/trace/trace_error.py +84 -0
  173. whatap/trace/trace_handler.py +89 -0
  174. whatap/trace/trace_import.py +91 -0
  175. whatap/trace/trace_module_definition.py +156 -0
  176. whatap/util/__init__.py +0 -0
  177. whatap/util/bit_util.py +49 -0
  178. whatap/util/cardinality/__init__.py +0 -0
  179. whatap/util/cardinality/hyperloglog.py +84 -0
  180. whatap/util/cardinality/murmurhash.py +20 -0
  181. whatap/util/cardinality/registerset.py +60 -0
  182. whatap/util/compare_util.py +19 -0
  183. whatap/util/date_util.py +55 -0
  184. whatap/util/debug_util.py +73 -0
  185. whatap/util/escape_literal_sql.py +233 -0
  186. whatap/util/frame_util.py +20 -0
  187. whatap/util/hash_util.py +103 -0
  188. whatap/util/hexa32.py +66 -0
  189. whatap/util/int_set.py +199 -0
  190. whatap/util/ip_util.py +63 -0
  191. whatap/util/keygen.py +11 -0
  192. whatap/util/linked_list.py +113 -0
  193. whatap/util/linked_map.py +359 -0
  194. whatap/util/metering_util.py +103 -0
  195. whatap/util/request_double_queue.py +68 -0
  196. whatap/util/request_queue.py +60 -0
  197. whatap/util/string_util.py +20 -0
  198. whatap/util/throttle_util.py +99 -0
  199. whatap/util/userid_util.py +134 -0
  200. whatap/value/__init__.py +1 -0
  201. whatap/value/blob_value.py +38 -0
  202. whatap/value/boolean_value.py +33 -0
  203. whatap/value/decimal_value.py +36 -0
  204. whatap/value/double_summary.py +86 -0
  205. whatap/value/double_value.py +33 -0
  206. whatap/value/float_array.py +42 -0
  207. whatap/value/float_value.py +34 -0
  208. whatap/value/int_array.py +42 -0
  209. whatap/value/ip4_value.py +50 -0
  210. whatap/value/list_value.py +105 -0
  211. whatap/value/long_array.py +44 -0
  212. whatap/value/long_summary.py +83 -0
  213. whatap/value/map_value.py +154 -0
  214. whatap/value/null_value.py +21 -0
  215. whatap/value/number_value.py +33 -0
  216. whatap/value/summary_value.py +39 -0
  217. whatap/value/text_array.py +58 -0
  218. whatap/value/text_hash_value.py +37 -0
  219. whatap/value/text_value.py +43 -0
  220. whatap/value/value.py +26 -0
  221. whatap/value/value_enum.py +80 -0
  222. whatap/whatap.conf +14 -0
  223. whatap_python-2.1.0.dist-info/METADATA +87 -0
  224. whatap_python-2.1.0.dist-info/RECORD +227 -0
  225. whatap_python-2.1.0.dist-info/WHEEL +5 -0
  226. whatap_python-2.1.0.dist-info/entry_points.txt +6 -0
  227. whatap_python-2.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,262 @@
1
+ import io
2
+ import math
3
+ import struct
4
+
5
+ from whatap import PY2, PY3
6
+ from whatap.pack.pack import Pack
7
+
8
+ from whatap.value.null_value import NullValue
9
+
10
+ BYTE_MIN_VALUE = -128
11
+ BYTE_MAX_VALUE = 127
12
+ SHORT_MIN_VALUE = -32768
13
+ SHORT_MAX_VALUE = 32767
14
+ INT3_MIN_VALUE = -0x800000
15
+ INT3_MAX_VALUE = 0x007fffff
16
+ INT_MIN_VALUE = -0x80000000
17
+ INT_MAX_VALUE = 0x7fffffff
18
+ LONG5_MIN_VALUE = -0x8000000000
19
+ LONG5_MAX_VALUE = 0x0000007fffffffff
20
+ LONG_MIN_VALUE = -0x8000000000000000
21
+ LONG_MAX_VALUE = 0x7fffffffffffffff
22
+
23
+
24
+ class DataOutputX(object):
25
+ def __init__(self, size=None):
26
+ if size:
27
+ self.buffer = io.BytesIO(bytearray(size))
28
+ else:
29
+ self.buffer = io.BytesIO()
30
+
31
+ @staticmethod
32
+ def toInt(buf, pos):
33
+ ch1 = buf[pos] & 0xff
34
+ ch2 = buf[pos + 1] & 0xff
35
+ ch3 = buf[pos + 2] & 0xff
36
+ ch4 = buf[pos + 3] & 0xff
37
+ return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0))
38
+
39
+ def writePack(self, v, ln_fmt):
40
+ self.writeShort(v.getPackType())
41
+ v.write(self)
42
+ if ln_fmt:
43
+ remainder = len(self.buffer.getvalue()) % ln_fmt
44
+ if remainder:
45
+ self.write(bytearray(int(math.floor(ln_fmt - remainder))))
46
+ return self
47
+
48
+ def writeStep(self, step):
49
+ self.writeByte(step.getStepType())
50
+ step.write(self)
51
+ return self
52
+
53
+ @classmethod
54
+ def toBytes(cls, v, ln_fmt=None):
55
+ if isinstance(v, Pack):
56
+ return cls().writePack(v, ln_fmt).toByteArray()
57
+
58
+ buf = bytearray(4)
59
+ buf[0] = ((v % 0x100000000) >> 24) & 0xff
60
+ buf[1] = ((v % 0x100000000) >> 16) & 0xff
61
+ buf[2] = ((v % 0x100000000) >> 8) & 0xff
62
+ buf[3] = ((v % 0x100000000) >> 0) & 0xff
63
+ return buf
64
+
65
+ @classmethod
66
+ def toBytesLong(cls, v):
67
+ buf = bytearray(8)
68
+ buf[0] = ((v % 0x100000000) >> 56) & 0xff
69
+ buf[1] = ((v % 0x100000000) >> 48) & 0xff
70
+ buf[2] = ((v % 0x100000000) >> 40) & 0xff
71
+ buf[3] = ((v % 0x100000000) >> 32) & 0xff
72
+ buf[4] = ((v % 0x100000000) >> 24) & 0xff
73
+ buf[5] = ((v % 0x100000000) >> 16) & 0xff
74
+ buf[6] = ((v % 0x100000000) >> 8) & 0xff
75
+ buf[7] = ((v % 0x100000000) >> 0) & 0xff
76
+ return buf
77
+
78
+ def set(self, dest, pos, src):
79
+ dest[pos:pos + len(src)] = src[0:]
80
+ return dest
81
+
82
+ def writeIntBytes(self, b):
83
+ if not b or not len(b):
84
+ self.writeInt(0)
85
+ else:
86
+ self.writeInt(len(b))
87
+ self.write(b)
88
+ return self
89
+
90
+ def writeBoolean(self, v):
91
+ self.buffer.write(struct.pack('>?', v))
92
+ return self
93
+
94
+ def writeByte(self, v):
95
+ v = v & 0xFF
96
+ self.buffer.write(struct.pack('>B', v))
97
+ return self
98
+
99
+ def writeShort(self, v):
100
+ v = v & 0xFFFF
101
+ self.buffer.write(struct.pack('>H', v))
102
+ return self
103
+
104
+ def writeInt3(self, v):
105
+ v1 = (v >> 16) & 0xFF
106
+ v2 = (v >> 8) & 0xFF
107
+ v3 = (v >> 0) & 0xFF
108
+ self.buffer.write(struct.pack('>BBB', v1, v2, v3))
109
+ return self
110
+
111
+ def writeInt(self, v):
112
+ v = v & 0xFFFFFFFF
113
+ self.buffer.write(struct.pack('>I', v))
114
+ return self
115
+
116
+ def writeLong5(self, v):
117
+ v1 = ((v >> 32) & 0xFF)
118
+ v2 = ((v >> 24) & 0xFF)
119
+ v3 = ((v >> 16) & 0xFF)
120
+ v4 = ((v >> 8) & 0xFF)
121
+ v5 = ((v >> 0) & 0xFF)
122
+ self.buffer.write(struct.pack('>BBBBB', v1, v2, v3, v4, v5))
123
+ return self
124
+
125
+ def writeLong(self, v):
126
+ v = v & 0xFFFFFFFFFFFFFFFF
127
+ self.buffer.write(struct.pack('>Q', v))
128
+ return self
129
+
130
+ def writeFloat(self, v):
131
+ self.buffer.write(struct.pack('>f', v))
132
+ return self
133
+
134
+ def writeDouble(self, v):
135
+ self.buffer.write(struct.pack('>d', v))
136
+ return self
137
+
138
+ def writeDecimal(self, v):
139
+ if v == 0:
140
+ self.writeByte(0)
141
+ elif BYTE_MIN_VALUE <= v <= BYTE_MAX_VALUE:
142
+ self.writeByte(1)
143
+ self.writeByte(v)
144
+ elif SHORT_MIN_VALUE <= v <= SHORT_MAX_VALUE:
145
+ self.writeByte(2)
146
+ self.writeShort(v)
147
+ elif INT3_MIN_VALUE <= v <= INT3_MAX_VALUE:
148
+ self.writeByte(3)
149
+ self.writeInt3(v)
150
+ elif INT_MIN_VALUE <= v <= INT_MAX_VALUE:
151
+ self.writeByte(4)
152
+ self.writeInt(v)
153
+ elif LONG5_MIN_VALUE <= v <= LONG5_MAX_VALUE:
154
+ self.writeByte(5)
155
+ self.writeLong5(v)
156
+ elif LONG_MIN_VALUE <= v <= LONG_MAX_VALUE:
157
+ self.writeByte(8)
158
+ self.writeLong(v)
159
+ return self
160
+
161
+ def write(self, v):
162
+ self.buffer.write(v)
163
+ return self
164
+
165
+ def writeBlob(self, v):
166
+ ln = len(v)
167
+ if not v or not ln:
168
+ self.writeByte(0)
169
+ else:
170
+ if ln <= 253:
171
+ self.writeByte(ln)
172
+ self.write(v)
173
+ elif ln <= 65535:
174
+ self.writeByte(255)
175
+ self.writeShort(ln)
176
+ self.write(v)
177
+ else:
178
+ self.writeByte(254)
179
+ self.writeInt(ln)
180
+ self.write(v)
181
+ return self
182
+
183
+ def writeText(self, v):
184
+ if not v:
185
+ self.writeByte(0)
186
+ else:
187
+ self.writeBlob(v.encode("utf-8"))
188
+ return self
189
+
190
+ def writeUTF(self, v):
191
+ v = v.encode('utf-8')
192
+ if len(v) > 65535:
193
+ v = v[:65535]
194
+
195
+ self.buffer.write(struct.pack('>H', len(v)))
196
+ self.buffer.write(v)
197
+ return self
198
+
199
+ def writeValue(self, v):
200
+ if not v:
201
+ v = NullValue()
202
+
203
+ self.writeByte(v.getValueType())
204
+ v.write(self)
205
+ return self
206
+
207
+ def toByteArray(self):
208
+ return self.buffer.getvalue()
209
+
210
+ def flush(self):
211
+ self.buffer.flush()
212
+
213
+ def writeFloatArray(self, vv):
214
+ if not vv:
215
+ self.writeShort(0)
216
+ else:
217
+ self.writeShort(len(vv))
218
+ for v in vv:
219
+ self.writeFloat(v)
220
+ return self
221
+
222
+ def writeIntArray(self, vv):
223
+ if not vv:
224
+ self.writeShort(0)
225
+ else:
226
+ self.writeShort(len(vv))
227
+ for v in vv:
228
+ self.writeInt(v)
229
+ return self
230
+
231
+ def writeLongArray(self, vv):
232
+ if not vv:
233
+ self.writeShort(0)
234
+ else:
235
+ self.writeShort(len(vv))
236
+ for v in vv:
237
+ self.writeLong(v)
238
+ return self
239
+
240
+ def writeDecimalArray(self, vv):
241
+ if not vv:
242
+ self.writeShort(0)
243
+ else:
244
+ self.writeShort(len(vv))
245
+ for v in vv:
246
+ self.writeDecimal(v)
247
+ return self
248
+
249
+ def writeToPos(self, pos, v):
250
+ if PY3:
251
+ struct.pack_into('>I', self.buffer.getbuffer(), pos, v)
252
+ else:
253
+ b = io.BytesIO()
254
+ b.write(struct.pack('>I', v & 0xFFFFFFFF))
255
+
256
+ buffer_arr = bytearray(self.buffer.getvalue())
257
+ buffer_arr[pos:pos + 4] = b.getvalue()
258
+ self.buffer = io.BytesIO(buffer_arr)
259
+
260
+ def size(self):
261
+ return self.buffer.getbuffer().nbytes
262
+
whatap/llm/__init__.py ADDED
@@ -0,0 +1,17 @@
1
+ from whatap.llm.log_sink_packs.llm_step_status import LlmStepStatus
2
+
3
+ # Public API: prompt 메타 (이름/버전) 데코레이터 + 컨텍스트 매니저.
4
+ # LLM API 호출 시 메트릭/pack 의 ``operation_type`` 태그에 name 이, ``prompt_version``
5
+ # 태그에 version 이 자동 인라인. 미적용 시 default/v1.
6
+ from whatap.llm.prompt_meta import (
7
+ prompt_meta,
8
+ prompt_meta_scope,
9
+ get_prompt_meta,
10
+ )
11
+
12
+ __all__ = [
13
+ 'LlmStepStatus',
14
+ 'prompt_meta',
15
+ 'prompt_meta_scope',
16
+ 'get_prompt_meta',
17
+ ]
@@ -0,0 +1,43 @@
1
+ """LLM 패키지에서 사용하는 상수 및 매핑 데이터 정의."""
2
+ LOG_SINK_CATEGORY = '#LlmCallLog'
3
+
4
+ PROVIDER_TOKEN_FIELDS = {
5
+ 'openai': [
6
+ 'input_tokens', 'output_tokens', 'total_tokens_count',
7
+ 'cached_tokens', 'reasoning_tokens',
8
+ 'audio_input_tokens', 'audio_output_tokens',
9
+ 'accepted_prediction_tokens', 'rejected_prediction_tokens',
10
+ 'embedding_count', 'dimensions', 'similarity',
11
+ ],
12
+ 'anthropic': [
13
+ 'input_tokens', 'output_tokens', 'total_tokens_count',
14
+ 'cache_creation_input_tokens', 'cache_read_input_tokens',
15
+ ],
16
+ }
17
+
18
+ DEFAULT_TOKEN_FIELDS = ['input_tokens', 'output_tokens', 'total_tokens_count']
19
+
20
+ OPENAI_URL_OPERATION_MAP = [
21
+ ("/v1/chat/completions", "chat"),
22
+ ("/v1/responses", "response"),
23
+ ("/v1/completions", "completion"),
24
+ ("/v1/embeddings", "embedding"),
25
+ ("/v1/images/generations", "image_generation"),
26
+ ("/v1/images/edits", "image_edit"),
27
+ ("/v1/audio/transcriptions", "audio_transcription"),
28
+ ("/v1/audio/translations", "audio_translation"),
29
+ ("/v1/audio/speech", "audio_speech"),
30
+ ("/v1/moderations", "moderation"),
31
+ ("/v1/fine_tuning", "fine_tuning"),
32
+ ("/v1/files", "file"),
33
+ ("/v1/assistants", "assistant"),
34
+ ("/v1/threads", "thread"),
35
+ ("/v1/vector_stores", "vector_store"),
36
+ ("/v1/batches", "batch"),
37
+ ]
38
+
39
+ ANTHROPIC_URL_OPERATION_MAP = [
40
+ ("/v1/messages", "chat"),
41
+ ]
42
+
43
+ URL_OPERATION_MAP = OPENAI_URL_OPERATION_MAP + ANTHROPIC_URL_OPERATION_MAP
@@ -0,0 +1,136 @@
1
+ """WhatAP LLM Evaluation SDK 공개 API.
2
+
3
+ 권장 사용 패턴 — 데코레이터 / 컨텍스트 매니저로 트랜잭션·함수 단위 적용:
4
+ from whatap.llm.evaluators import evaluate_with, evaluation_scope
5
+ from whatap.llm.evaluators.builtins import HallucinationEvaluator, RefusalEvaluator
6
+
7
+ @evaluate_with(HallucinationEvaluator(judge_fn=judge), RefusalEvaluator())
8
+ def chat(question):
9
+ return client.chat.completions.create(...).choices[0].message.content
10
+
11
+ # 또는
12
+ def chat(question):
13
+ with evaluation_scope(HallucinationEvaluator(judge_fn=judge)):
14
+ return client.chat.completions.create(...).choices[0].message.content
15
+
16
+ 전역 always-on (앱 전체 LLM 호출에 무조건 적용 — PII 검사 등에 적합):
17
+ from whatap.llm.evaluators import register_evaluator
18
+ register_evaluator(PIILeakEvaluator()) # 앱 시작 시 한 번
19
+
20
+ 명시 평가 (사용자 피드백 등 시점이 다른 평가):
21
+ from whatap.llm.evaluators import submit_evaluation
22
+ submit_evaluation(label='user_thumbs', value=1.0, txid=req.txid, step_id=req.step_id)
23
+
24
+ 활성화:
25
+ whatap.conf 에 ``llm_eval_enabled=true`` 또는
26
+ 환경변수 ``WHATAP_LLM_EVAL_ENABLED=true``.
27
+ """
28
+ import time
29
+
30
+ from whatap import logging
31
+
32
+ from whatap.llm.evaluators.base import (
33
+ BaseEvaluator,
34
+ EvaluatorContext,
35
+ EvaluatorResult,
36
+ )
37
+ from whatap.llm.evaluators.registry import (
38
+ EvaluatorRegistry,
39
+ register_evaluator,
40
+ unregister_evaluator,
41
+ )
42
+ from whatap.llm.evaluators.scope import (
43
+ evaluate_with,
44
+ evaluation_scope,
45
+ )
46
+
47
+
48
+ def submit_evaluation(label, value, metric_type=None, reasoning=None,
49
+ metadata=None, txid=None, step_id=None, index=None):
50
+ """사용자가 명시적으로 평가 결과를 송출한다.
51
+
52
+ 데이터독 ``LLMObs.submit_evaluation()`` 의 등가물.
53
+ 호출 시점의 TraceContext 에서 (txid, step_id, index) 를 자동 추출하므로
54
+ LLM 호출 트랜잭션 안에서 호출하면 그대로 결합된다. 다른 스레드/태스크에서 호출
55
+ 한다면 인자로 명시 전달하면 된다.
56
+
57
+ :param label: 평가 라벨 (필수, e.g. 'user_satisfaction').
58
+ :param value: 평가 값 (bool/int/float/str/dict).
59
+ :param metric_type: 'score'|'categorical'|'boolean'|'json'. 미지정 시 value 타입에서 추론.
60
+ :param reasoning: 평가 근거 (선택).
61
+ :param metadata: 임의 dict (선택).
62
+ :param txid, step_id, index: 결합 키 명시. 미지정 시 현재 TraceContext 에서 자동 추출.
63
+ """
64
+ if not label:
65
+ logging.warning('[LLM] submit_evaluation: missing label', extra={'id': 'LLM050'})
66
+ return
67
+ if '.' in label:
68
+ logging.warning('[LLM] submit_evaluation: label must not contain "." (got %s)' % label,
69
+ extra={'id': 'LLM051'})
70
+ return
71
+
72
+ # context binding: 인자로 명시되지 않았으면 TraceContext 에서 추출
73
+ if txid is None or step_id is None:
74
+ try:
75
+ from whatap.trace.trace_context_manager import TraceContextManager
76
+ ctx = TraceContextManager.getLocalContext()
77
+ if ctx is not None:
78
+ if txid is None:
79
+ txid = str(ctx.id)
80
+ if step_id is None:
81
+ step_id = str(getattr(ctx, '_llm_step_id', 0))
82
+ if index is None:
83
+ index = getattr(ctx, '_llm_call_index', 0)
84
+ except Exception:
85
+ pass
86
+
87
+ if txid is None:
88
+ logging.warning('[LLM] submit_evaluation: no active trace context (label=%s)' % label,
89
+ extra={'id': 'LLM052'})
90
+ return
91
+
92
+ # metric_type 자동 추론
93
+ if metric_type is None:
94
+ if isinstance(value, bool):
95
+ metric_type = 'boolean'
96
+ elif isinstance(value, (int, float)):
97
+ metric_type = 'score'
98
+ elif isinstance(value, dict):
99
+ metric_type = 'json'
100
+ else:
101
+ metric_type = 'categorical'
102
+ if value is not None and not isinstance(value, str):
103
+ value = str(value)
104
+
105
+ try:
106
+ from whatap.llm.log_sink_packs.llm_step_eval_status import LlmStepEvalStatus
107
+ from whatap.counter.tasks.llm_evaluator_task import _LABEL_TO_FIELD
108
+ from whatap.counter.tasks.llm_log_sink_task import dispatch_llm_evaluation_pack
109
+ pack = LlmStepEvalStatus()
110
+ pack.txid = str(txid)
111
+ pack.step_id = str(step_id) if step_id is not None else None
112
+ pack.index = int(index) if index is not None else 0
113
+ # Late-binding user feedback — 원본 LLM 호출 메타 (model/tokens/cost) 모름.
114
+ # eval_success 와 5 점수 필드 중 매핑되는 것만 채움.
115
+ pack.eval_success = True if metric_type != 'error' else False
116
+ attr = _LABEL_TO_FIELD.get(label)
117
+ if attr is not None and isinstance(value, (int, float)) and not isinstance(value, bool):
118
+ setattr(pack, attr, float(value))
119
+ dispatch_llm_evaluation_pack(pack)
120
+ except Exception as e:
121
+ logging.warning('[LLM] submit_evaluation failed: %s' % e, extra={'id': 'LLM053'})
122
+
123
+
124
+ __all__ = [
125
+ 'BaseEvaluator',
126
+ 'EvaluatorContext',
127
+ 'EvaluatorResult',
128
+ # primary: per-scope application
129
+ 'evaluate_with',
130
+ 'evaluation_scope',
131
+ # secondary: app-wide always-on
132
+ 'register_evaluator',
133
+ 'unregister_evaluator',
134
+ # late-binding (e.g. user feedback)
135
+ 'submit_evaluation',
136
+ ]
@@ -0,0 +1,114 @@
1
+ """LLM 평가자 베이스 타입 정의.
2
+
3
+ EvaluatorContext : 평가자에 전달되는 읽기전용 입력 (LLM 호출의 결과 + 결합키).
4
+ EvaluatorResult : 평가자가 반환하는 표준 결과 컨테이너.
5
+ BaseEvaluator : 사용자가 상속하여 evaluate() 를 구현하는 베이스 클래스.
6
+
7
+ 데이터독 LLMObs 의 BaseEvaluator + EvaluatorContext + EvaluatorResult 와 같은 역할이며,
8
+ WhatAP 의 (txid, step_id, index) 를 결합키로 사용한다.
9
+ """
10
+ from collections import namedtuple
11
+
12
+
13
+ # frozen 컨테이너로 evaluator가 입력을 변경하지 못하도록 한다.
14
+ EvaluatorContext = namedtuple(
15
+ 'EvaluatorContext',
16
+ [
17
+ # ── 결합 키 (LlmStepStatus 와 동일) ──
18
+ 'txid', # str — TraceContext.id
19
+ 'step_id', # str — ctx._llm_step_id
20
+ 'index', # int — 트랜잭션 내 LLM 호출 순번
21
+
22
+ # ── LLM 메타 ──
23
+ 'provider', # str — e.g. 'api.openai.com'
24
+ 'url', # str — e.g. '/v1/chat/completions'
25
+ 'model', # str
26
+ 'operation_type', # str — 'chat'|'embedding'|'completion'|...
27
+
28
+ # ── I/O 텍스트 (interceptor 가 추출한 원본) ──
29
+ 'input_text', # str — prompt 본문
30
+ 'output_text', # str — completion 본문
31
+ 'system_text', # str — system message (여러 개면 \n으로 결합)
32
+ 'reasoning_text', # str — reasoning 본문 (없으면 '')
33
+ 'tool_calls_text', # str
34
+ 'tool_results_text', # str
35
+
36
+ # ── 결과 메타 ──
37
+ 'success', # bool
38
+ 'finish_reason', # str|None
39
+ 'latency_ms', # int|None
40
+ 'input_tokens', # int|None
41
+ 'output_tokens', # int|None
42
+
43
+ # ── 평가용 인프라 자동 전파 ──
44
+ # interceptor 가 캡처한 user 의 LLM client 객체 그대로.
45
+ # judge 호출은 이 client 인스턴스를 그대로 재사용 — 새 client / httpx 생성 안 함.
46
+ 'client', # OpenAI|AsyncOpenAI|Anthropic|...|None
47
+
48
+ # interceptor 가 async 호출 시점에 캡처한 user 의 running event loop.
49
+ # AsyncClient 는 loop binding 이 있어 sync 워커에서 그 client 로 호출하려면
50
+ # 같은 loop 으로 ``run_coroutine_threadsafe`` dispatch 필요. sync 호출이거나
51
+ # capture 실패하면 None — 그땐 client 가 sync 라 직접 호출 가능.
52
+ 'event_loop', # asyncio.AbstractEventLoop|None
53
+ ],
54
+ )
55
+ EvaluatorContext.__new__.__defaults__ = (None,) # event_loop default
56
+
57
+
58
+ class EvaluatorResult(object):
59
+ """평가 결과 표준 컨테이너.
60
+
61
+ evaluator.evaluate() 가 raw value(bool/int/float/str/dict) 를 반환해도
62
+ LlmEvaluatorTask 가 자동으로 EvaluatorResult 로 래핑한다.
63
+
64
+ ``extras`` 는 한 번의 evaluate() 호출에서 여러 라벨의 메트릭을 같이 송출하고
65
+ 싶을 때 사용. 예: CombinedJudgeEvaluator 는 1번의 LLM judge 호출에서
66
+ hallucination + answer_relevance + toxicity 를 모두 산출하는데, primary 결과
67
+ (combined_judge) 에 extras={'hallucination': ..., ...} 를 붙여 모두 같은
68
+ LlmStepStatus pack 에 인라인되도록 한다.
69
+
70
+ ``extras`` 의 값은 ``EvaluatorResult`` 인스턴스 또는 ``(value, metric_type,
71
+ reasoning, metadata)`` 형태의 dict.
72
+ """
73
+
74
+ __slots__ = ('value', 'metric_type', 'reasoning', 'metadata', 'extras')
75
+
76
+ def __init__(self, value, metric_type=None, reasoning=None, metadata=None,
77
+ extras=None):
78
+ self.value = value
79
+ # metric_type 미지정 시 LlmEvaluatorTask 에서 value 타입으로 자동 추론.
80
+ self.metric_type = metric_type
81
+ self.reasoning = reasoning
82
+ self.metadata = metadata or {}
83
+ # extras: {label: EvaluatorResult or dict}
84
+ self.extras = extras or {}
85
+
86
+
87
+ class BaseEvaluator(object):
88
+ """모든 평가자의 베이스. 사용자는 LABEL 과 evaluate() 를 구현한다.
89
+
90
+ Example:
91
+ class OutputLengthEvaluator(BaseEvaluator):
92
+ LABEL = 'output_length'
93
+ METRIC_TYPE = 'score'
94
+
95
+ def evaluate(self, context):
96
+ return float(len(context.output_text))
97
+
98
+ 모든 평가자는 평가 워커 스레드 풀에서 비동기로 실행되며, 결과는 별도
99
+ ``LlmStepEvalStatus`` pack (llm_log_type=llm_step_eval_status) 으로 송출된다.
100
+ 원본 LlmStepStatus 와 동일한 구조 (model/tokens/cost/...) + 평가 점수 5 필드.
101
+ """
102
+
103
+ LABEL = None # 필수: 샘플러 매칭 키 + 메트릭 라벨. None이면 LlmEvaluatorTask가 거부.
104
+ METRIC_TYPE = None # 선택: 'score'|'categorical'|'boolean'|'json'. None이면 자동 추론.
105
+
106
+ def evaluate(self, context):
107
+ """단일 LLM 호출에 대한 평가를 수행한다.
108
+
109
+ :param context: EvaluatorContext — 읽기전용 입력.
110
+ :return: EvaluatorResult, 또는 raw value (bool/int/float/str/dict).
111
+ raw value 반환 시 LlmEvaluatorTask 가 EvaluatorResult 로 래핑하며
112
+ metric_type 은 value 타입으로 자동 추론된다.
113
+ """
114
+ raise NotImplementedError
@@ -0,0 +1,91 @@
1
+ """WhatAP LLM Evaluation 빌트인 평가자.
2
+
3
+ LLM judge 기반 평가자는 별도 LLM 호출(judge call)로 점수를 산출하므로 비용이
4
+ 발생한다. ``whatap.conf`` 의 ``llm_eval_sample_rate`` (0.0~1.0) 로 샘플링 비율을
5
+ 조정해 비용을 통제한다. 기본 1.0 (항상 실행) — 비용 통제가 필요하면 0.1 같은
6
+ 값으로 낮춰서 judge 호출의 1/10 만 실제로 발생시킬 수 있음.
7
+
8
+ 규칙 기반 평가자 (PIILeak / URLScan) 는 LLM 호출이 없어 비용 0 — 매 호출 평가해도 됨.
9
+
10
+ 사용 예 — 함수 데코레이터로 트랜잭션 단위 적용 (권장):
11
+
12
+ import openai
13
+ from whatap.llm.evaluators import evaluate_with
14
+ from whatap.llm.evaluators.builtins import (
15
+ CombinedJudgeEvaluator, PIILeakEvaluator, URLScanEvaluator,
16
+ make_openai_judge,
17
+ )
18
+
19
+ client = openai.OpenAI()
20
+ judge = make_openai_judge(client=client, model='gpt-4o-mini')
21
+
22
+ @evaluate_with(
23
+ CombinedJudgeEvaluator(judge_fn=judge), # 5 의미 aspect 1번 호출
24
+ PIILeakEvaluator(), # 정규식, 비용 0
25
+ URLScanEvaluator(), # 정규식, 비용 0
26
+ )
27
+ def chat(question: str) -> str:
28
+ return client.chat.completions.create(
29
+ model='gpt-4o',
30
+ messages=[{'role': 'user', 'content': question}],
31
+ ).choices[0].message.content
32
+
33
+ 평가자 종류:
34
+
35
+ LLM judge 기반 (judge call 발생) ─────────────────────────────────
36
+ HallucinationEvaluator 할루시네이션 0.0~1.0 (faithfulness / self-consistency)
37
+ AnswerRelevanceEvaluator 질문↔답변 관련성 0.0~1.0
38
+ ToxicityEvaluator 유해성 0.0~1.0 + 카테고리
39
+ PromptInjectionEvaluator prompt injection / system prompt leak 0.0~1.0
40
+ FactualityEvaluator 검증 가능한 사실 오류 0.0~1.0
41
+ CombinedJudgeEvaluator 위 5 개를 1번의 judge 호출로 모두 산출 (비용 1/5)
42
+
43
+ 규칙 기반 (judge call 0회, 비용 0) ───────────────────────────────
44
+ PIILeakEvaluator 정규식+chksum 으로 PII 노출 탐지
45
+ URLScanEvaluator URL 추출 + suspicious 패턴 매칭
46
+
47
+ Judge LLM 호출 헬퍼:
48
+
49
+ make_openai_judge(client=..., model='gpt-4o-mini')
50
+ make_anthropic_judge(client=..., model='claude-3-5-haiku-latest')
51
+
52
+ 커스텀 judge 작성 시 ``LLMJudgeBase`` 상속하고 ``make_prompt`` / ``parse_judgment`` 만
53
+ 구현하면 된다.
54
+ """
55
+ from whatap.llm.evaluators.builtins.llm_judge import (
56
+ LLMJudgeBase,
57
+ make_openai_judge,
58
+ make_anthropic_judge,
59
+ parse_json_response,
60
+ set_default_judge_fn,
61
+ get_default_judge_fn,
62
+ )
63
+ from whatap.llm.evaluators.builtins.hallucination import HallucinationEvaluator
64
+ from whatap.llm.evaluators.builtins.answer_relevance import AnswerRelevanceEvaluator
65
+ from whatap.llm.evaluators.builtins.toxicity import ToxicityEvaluator
66
+ from whatap.llm.evaluators.builtins.prompt_injection import PromptInjectionEvaluator
67
+ from whatap.llm.evaluators.builtins.factuality import FactualityEvaluator
68
+ from whatap.llm.evaluators.builtins.combined_judge import CombinedJudgeEvaluator
69
+ from whatap.llm.evaluators.builtins.pii_leak import PIILeakEvaluator
70
+ from whatap.llm.evaluators.builtins.url_scan import URLScanEvaluator
71
+
72
+
73
+ __all__ = [
74
+ # judge base + helpers
75
+ 'LLMJudgeBase',
76
+ 'make_openai_judge',
77
+ 'make_anthropic_judge',
78
+ 'parse_json_response',
79
+ 'set_default_judge_fn',
80
+ 'get_default_judge_fn',
81
+ # LLM judge 기반
82
+ 'HallucinationEvaluator',
83
+ 'AnswerRelevanceEvaluator',
84
+ 'ToxicityEvaluator',
85
+ 'PromptInjectionEvaluator',
86
+ 'FactualityEvaluator',
87
+ 'CombinedJudgeEvaluator',
88
+ # 규칙 기반 (LLM 호출 X)
89
+ 'PIILeakEvaluator',
90
+ 'URLScanEvaluator',
91
+ ]