whatap-python 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. whatap/LICENSE +0 -0
  2. whatap/README.rst +49 -0
  3. whatap/__init__.py +923 -0
  4. whatap/__main__.py +4 -0
  5. whatap/agent/darwin/amd64/whatap_python +0 -0
  6. whatap/agent/darwin/arm64/whatap_python +0 -0
  7. whatap/agent/linux/amd64/whatap_python +0 -0
  8. whatap/agent/linux/arm64/whatap_python +0 -0
  9. whatap/agent/windows/whatap_python.exe +0 -0
  10. whatap/bootstrap/__init__.py +0 -0
  11. whatap/bootstrap/sitecustomize.py +19 -0
  12. whatap/build.py +4 -0
  13. whatap/conf/__init__.py +0 -0
  14. whatap/conf/configuration.py +280 -0
  15. whatap/conf/configure.py +105 -0
  16. whatap/conf/license.py +49 -0
  17. whatap/control/__init__.py +0 -0
  18. whatap/counter/__init__.py +14 -0
  19. whatap/counter/counter_manager.py +45 -0
  20. whatap/counter/tasks/__init__.py +3 -0
  21. whatap/counter/tasks/base_task.py +26 -0
  22. whatap/counter/tasks/llm_evaluator_task.py +501 -0
  23. whatap/counter/tasks/llm_log_sink_task.py +309 -0
  24. whatap/counter/tasks/llm_stat_task.py +78 -0
  25. whatap/counter/tasks/openfiledescriptor.py +67 -0
  26. whatap/io/__init__.py +1 -0
  27. whatap/io/data_inputx.py +161 -0
  28. whatap/io/data_outputx.py +262 -0
  29. whatap/llm/__init__.py +17 -0
  30. whatap/llm/definitions.py +43 -0
  31. whatap/llm/evaluators/__init__.py +136 -0
  32. whatap/llm/evaluators/base.py +114 -0
  33. whatap/llm/evaluators/builtins/__init__.py +91 -0
  34. whatap/llm/evaluators/builtins/answer_relevance.py +46 -0
  35. whatap/llm/evaluators/builtins/combined_judge.py +271 -0
  36. whatap/llm/evaluators/builtins/factuality.py +71 -0
  37. whatap/llm/evaluators/builtins/hallucination.py +97 -0
  38. whatap/llm/evaluators/builtins/llm_judge.py +516 -0
  39. whatap/llm/evaluators/builtins/pii_leak.py +214 -0
  40. whatap/llm/evaluators/builtins/prompt_injection.py +71 -0
  41. whatap/llm/evaluators/builtins/toxicity.py +53 -0
  42. whatap/llm/evaluators/builtins/url_scan.py +194 -0
  43. whatap/llm/evaluators/registry.py +192 -0
  44. whatap/llm/evaluators/sampler.py +83 -0
  45. whatap/llm/evaluators/scope.py +334 -0
  46. whatap/llm/features.py +66 -0
  47. whatap/llm/log_sink_packs/__init__.py +9 -0
  48. whatap/llm/log_sink_packs/llm_input_message.py +16 -0
  49. whatap/llm/log_sink_packs/llm_log_sink_pack.py +72 -0
  50. whatap/llm/log_sink_packs/llm_output_message.py +19 -0
  51. whatap/llm/log_sink_packs/llm_step_eval_status.py +94 -0
  52. whatap/llm/log_sink_packs/llm_step_status.py +118 -0
  53. whatap/llm/log_sink_packs/llm_system_message.py +16 -0
  54. whatap/llm/log_sink_packs/llm_tool_calls.py +44 -0
  55. whatap/llm/log_sink_packs/llm_tool_results.py +16 -0
  56. whatap/llm/log_sink_packs/llm_tx_status.py +108 -0
  57. whatap/llm/pricing.py +236 -0
  58. whatap/llm/prompt_meta.py +288 -0
  59. whatap/llm/providers/__init__.py +0 -0
  60. whatap/llm/providers/anthropic/__init__.py +37 -0
  61. whatap/llm/providers/anthropic/messages/__init__.py +0 -0
  62. whatap/llm/providers/anthropic/messages/messages.py +70 -0
  63. whatap/llm/providers/anthropic/messages/messages_context.py +76 -0
  64. whatap/llm/providers/anthropic/messages/messages_extractor.py +126 -0
  65. whatap/llm/providers/interceptor.py +182 -0
  66. whatap/llm/providers/openai/__init__.py +133 -0
  67. whatap/llm/providers/openai/chat/__init__.py +0 -0
  68. whatap/llm/providers/openai/chat/chat.py +82 -0
  69. whatap/llm/providers/openai/chat/chat_context.py +78 -0
  70. whatap/llm/providers/openai/chat/chat_extractor.py +127 -0
  71. whatap/llm/providers/openai/completions/__init__.py +0 -0
  72. whatap/llm/providers/openai/completions/completions.py +70 -0
  73. whatap/llm/providers/openai/completions/completions_context.py +31 -0
  74. whatap/llm/providers/openai/completions/completions_extractor.py +61 -0
  75. whatap/llm/providers/openai/content_parser.py +41 -0
  76. whatap/llm/providers/openai/embeddings/__init__.py +0 -0
  77. whatap/llm/providers/openai/embeddings/embeddings.py +59 -0
  78. whatap/llm/providers/openai/embeddings/embeddings_context.py +25 -0
  79. whatap/llm/providers/openai/embeddings/embeddings_extractor.py +26 -0
  80. whatap/llm/providers/openai/responses/__init__.py +0 -0
  81. whatap/llm/providers/openai/responses/responses.py +70 -0
  82. whatap/llm/providers/openai/responses/responses_context.py +88 -0
  83. whatap/llm/providers/openai/responses/responses_extractor.py +126 -0
  84. whatap/llm/providers/stream_accumulator.py +73 -0
  85. whatap/llm/stats/__init__.py +35 -0
  86. whatap/llm/stats/active_stat.py +86 -0
  87. whatap/llm/stats/answer_relevance_eval_stat.py +10 -0
  88. whatap/llm/stats/api_status_stat.py +35 -0
  89. whatap/llm/stats/base_stat.py +107 -0
  90. whatap/llm/stats/combined_judge_eval_stat.py +11 -0
  91. whatap/llm/stats/error_stat.py +59 -0
  92. whatap/llm/stats/eval_stat.py +225 -0
  93. whatap/llm/stats/factuality_eval_stat.py +10 -0
  94. whatap/llm/stats/feature_stat.py +104 -0
  95. whatap/llm/stats/finish_stat.py +105 -0
  96. whatap/llm/stats/hallucination_eval_stat.py +10 -0
  97. whatap/llm/stats/meter.py +18 -0
  98. whatap/llm/stats/perf_stat.py +117 -0
  99. whatap/llm/stats/pii_leak_eval_stat.py +12 -0
  100. whatap/llm/stats/prompt_injection_eval_stat.py +10 -0
  101. whatap/llm/stats/token_usage_stat.py +133 -0
  102. whatap/llm/stats/toxicity_eval_stat.py +10 -0
  103. whatap/llm/stats/url_scan_eval_stat.py +12 -0
  104. whatap/net/__init__.py +0 -0
  105. whatap/net/async_sender.py +107 -0
  106. whatap/net/packet_enum.py +44 -0
  107. whatap/net/packet_type_enum.py +31 -0
  108. whatap/net/param_def.py +69 -0
  109. whatap/net/stackhelper.py +87 -0
  110. whatap/net/udp_session.py +394 -0
  111. whatap/net/udp_thread.py +54 -0
  112. whatap/pack/__init__.py +0 -0
  113. whatap/pack/logSinkPack.py +77 -0
  114. whatap/pack/pack.py +34 -0
  115. whatap/pack/pack_enum.py +41 -0
  116. whatap/pack/tagCountPack.py +61 -0
  117. whatap/scripts/__init__.py +208 -0
  118. whatap/trace/__init__.py +12 -0
  119. whatap/trace/mod/__init__.py +0 -0
  120. whatap/trace/mod/amqp/__init__.py +0 -0
  121. whatap/trace/mod/amqp/kombu.py +122 -0
  122. whatap/trace/mod/amqp/pika.py +62 -0
  123. whatap/trace/mod/application/__init__.py +0 -0
  124. whatap/trace/mod/application/bottle.py +34 -0
  125. whatap/trace/mod/application/celery.py +81 -0
  126. whatap/trace/mod/application/cherrypy.py +30 -0
  127. whatap/trace/mod/application/django.py +287 -0
  128. whatap/trace/mod/application/django_asgi.py +266 -0
  129. whatap/trace/mod/application/django_py3.py +251 -0
  130. whatap/trace/mod/application/fastapi/__init__.py +31 -0
  131. whatap/trace/mod/application/fastapi/endpoint.py +73 -0
  132. whatap/trace/mod/application/fastapi/exception_log.py +63 -0
  133. whatap/trace/mod/application/fastapi/instrumentation.py +204 -0
  134. whatap/trace/mod/application/fastapi/scope.py +115 -0
  135. whatap/trace/mod/application/fastapi/transaction.py +67 -0
  136. whatap/trace/mod/application/flask.py +52 -0
  137. whatap/trace/mod/application/frappe.py +224 -0
  138. whatap/trace/mod/application/graphql.py +170 -0
  139. whatap/trace/mod/application/nameko.py +39 -0
  140. whatap/trace/mod/application/odoo.py +63 -0
  141. whatap/trace/mod/application/starlette.py +126 -0
  142. whatap/trace/mod/application/tornado.py +163 -0
  143. whatap/trace/mod/application/wsgi.py +195 -0
  144. whatap/trace/mod/database/__init__.py +0 -0
  145. whatap/trace/mod/database/cxoracle.py +49 -0
  146. whatap/trace/mod/database/mongo.py +169 -0
  147. whatap/trace/mod/database/mysql.py +80 -0
  148. whatap/trace/mod/database/neo4j.py +90 -0
  149. whatap/trace/mod/database/psycopg2.py +45 -0
  150. whatap/trace/mod/database/psycopg3.py +359 -0
  151. whatap/trace/mod/database/redis.py +122 -0
  152. whatap/trace/mod/database/sqlalchemy.py +213 -0
  153. whatap/trace/mod/database/sqlite3.py +130 -0
  154. whatap/trace/mod/database/util.py +630 -0
  155. whatap/trace/mod/email/__init__.py +0 -0
  156. whatap/trace/mod/email/smtp.py +78 -0
  157. whatap/trace/mod/httpc/__init__.py +0 -0
  158. whatap/trace/mod/httpc/django.py +31 -0
  159. whatap/trace/mod/httpc/httplib.py +70 -0
  160. whatap/trace/mod/httpc/httpx.py +62 -0
  161. whatap/trace/mod/httpc/requests.py +20 -0
  162. whatap/trace/mod/httpc/urllib3.py +27 -0
  163. whatap/trace/mod/httpc/util.py +388 -0
  164. whatap/trace/mod/logging.py +161 -0
  165. whatap/trace/mod/plugin.py +84 -0
  166. whatap/trace/mod/standalone/__init__.py +0 -0
  167. whatap/trace/mod/standalone/multiple.py +293 -0
  168. whatap/trace/mod/standalone/single.py +135 -0
  169. whatap/trace/simple_trace_context.py +18 -0
  170. whatap/trace/trace_context.py +212 -0
  171. whatap/trace/trace_context_manager.py +244 -0
  172. whatap/trace/trace_error.py +84 -0
  173. whatap/trace/trace_handler.py +89 -0
  174. whatap/trace/trace_import.py +91 -0
  175. whatap/trace/trace_module_definition.py +156 -0
  176. whatap/util/__init__.py +0 -0
  177. whatap/util/bit_util.py +49 -0
  178. whatap/util/cardinality/__init__.py +0 -0
  179. whatap/util/cardinality/hyperloglog.py +84 -0
  180. whatap/util/cardinality/murmurhash.py +20 -0
  181. whatap/util/cardinality/registerset.py +60 -0
  182. whatap/util/compare_util.py +19 -0
  183. whatap/util/date_util.py +55 -0
  184. whatap/util/debug_util.py +73 -0
  185. whatap/util/escape_literal_sql.py +233 -0
  186. whatap/util/frame_util.py +20 -0
  187. whatap/util/hash_util.py +103 -0
  188. whatap/util/hexa32.py +66 -0
  189. whatap/util/int_set.py +199 -0
  190. whatap/util/ip_util.py +63 -0
  191. whatap/util/keygen.py +11 -0
  192. whatap/util/linked_list.py +113 -0
  193. whatap/util/linked_map.py +359 -0
  194. whatap/util/metering_util.py +103 -0
  195. whatap/util/request_double_queue.py +68 -0
  196. whatap/util/request_queue.py +60 -0
  197. whatap/util/string_util.py +20 -0
  198. whatap/util/throttle_util.py +99 -0
  199. whatap/util/userid_util.py +134 -0
  200. whatap/value/__init__.py +1 -0
  201. whatap/value/blob_value.py +38 -0
  202. whatap/value/boolean_value.py +33 -0
  203. whatap/value/decimal_value.py +36 -0
  204. whatap/value/double_summary.py +86 -0
  205. whatap/value/double_value.py +33 -0
  206. whatap/value/float_array.py +42 -0
  207. whatap/value/float_value.py +34 -0
  208. whatap/value/int_array.py +42 -0
  209. whatap/value/ip4_value.py +50 -0
  210. whatap/value/list_value.py +105 -0
  211. whatap/value/long_array.py +44 -0
  212. whatap/value/long_summary.py +83 -0
  213. whatap/value/map_value.py +154 -0
  214. whatap/value/null_value.py +21 -0
  215. whatap/value/number_value.py +33 -0
  216. whatap/value/summary_value.py +39 -0
  217. whatap/value/text_array.py +58 -0
  218. whatap/value/text_hash_value.py +37 -0
  219. whatap/value/text_value.py +43 -0
  220. whatap/value/value.py +26 -0
  221. whatap/value/value_enum.py +80 -0
  222. whatap/whatap.conf +14 -0
  223. whatap_python-2.1.0.dist-info/METADATA +87 -0
  224. whatap_python-2.1.0.dist-info/RECORD +227 -0
  225. whatap_python-2.1.0.dist-info/WHEEL +5 -0
  226. whatap_python-2.1.0.dist-info/entry_points.txt +6 -0
  227. whatap_python-2.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,192 @@
1
+ """평가자 등록소 (전역 싱글톤).
2
+
3
+ 사용자가 등록한 BaseEvaluator 인스턴스 리스트를 보관하고,
4
+ LlmEvaluatorTask 의 dispatcher 스레드가 매 큐 처리 시 이 리스트를 순회한다.
5
+
6
+ 또한 ``bootstrap_from_conf()`` 가 ``conf.llm_eval_evaluators`` (csv) 를 읽어
7
+ 빌트인 평가자를 자동 등록한다. LlmEvaluatorTask 시작 시 한 번 호출.
8
+ """
9
+ import threading
10
+
11
+ from whatap import logging
12
+ from whatap.conf.configure import Configure as conf
13
+
14
+
15
+ # label → builtin evaluator class 의 import path. lazy import — 빌트인 모듈이
16
+ # 아직 로딩 안 됐을 수도 있어서.
17
+ _BUILTIN_LABELS = (
18
+ 'combined_judge',
19
+ 'hallucination',
20
+ 'answer_relevance',
21
+ 'toxicity',
22
+ 'prompt_injection',
23
+ 'factuality',
24
+ 'pii_leak',
25
+ 'url_scan',
26
+ )
27
+
28
+ # default (conf 미지정 시) 자동 활성 라벨.
29
+ # combined_judge 가 5 의미 aspect (hallucination/answer_relevance/toxicity/
30
+ # prompt_injection/factuality) 를 1번의 judge 호출로 묶어 처리하므로 개별 evaluator
31
+ # 들을 같이 켜면 중복. default 는 combined_judge + 규칙 기반 2 개.
32
+ _DEFAULT_LABELS = ('combined_judge', 'pii_leak', 'url_scan')
33
+
34
+
35
+ def _instantiate(label):
36
+ """label 에 해당하는 빌트인 평가자 인스턴스 생성. judge_fn 미지정으로 두면
37
+ evaluate 시점에 ``_resolve_judge_fn`` 이 ctx.provider 자동 판별 + 전역 default
38
+ 사용."""
39
+ try:
40
+ from whatap.llm.evaluators import builtins as B
41
+ except Exception as e:
42
+ logging.warning('[LLM] failed to import builtin evaluators: %s' % e,
43
+ extra={'id': 'LLM044'})
44
+ return None
45
+
46
+ cls_map = {
47
+ 'combined_judge': B.CombinedJudgeEvaluator,
48
+ 'hallucination': B.HallucinationEvaluator,
49
+ 'answer_relevance': B.AnswerRelevanceEvaluator,
50
+ 'toxicity': B.ToxicityEvaluator,
51
+ 'prompt_injection': B.PromptInjectionEvaluator,
52
+ 'factuality': B.FactualityEvaluator,
53
+ 'pii_leak': B.PIILeakEvaluator,
54
+ 'url_scan': B.URLScanEvaluator,
55
+ }
56
+ cls = cls_map.get(label)
57
+ if cls is None:
58
+ logging.warning('[LLM] unknown evaluator label in conf.llm_eval_evaluators: %r '
59
+ '(valid: %s)' % (label, ', '.join(_BUILTIN_LABELS)),
60
+ extra={'id': 'LLM045'})
61
+ return None
62
+ try:
63
+ return cls()
64
+ except Exception as e:
65
+ logging.warning('[LLM] failed to instantiate evaluator %s: %s' % (label, e),
66
+ extra={'id': 'LLM046'})
67
+ return None
68
+
69
+
70
+ def _parse_labels(raw):
71
+ """csv 또는 빈 문자열을 라벨 list 로 파싱. 빈 값은 default."""
72
+ if raw is None:
73
+ return list(_DEFAULT_LABELS)
74
+ s = str(raw).strip()
75
+ if not s:
76
+ return list(_DEFAULT_LABELS)
77
+ out = []
78
+ for tok in s.split(','):
79
+ tok = tok.strip().lower()
80
+ if tok and tok not in out:
81
+ out.append(tok)
82
+ return out or list(_DEFAULT_LABELS)
83
+
84
+
85
+ class EvaluatorRegistry(object):
86
+ """전역 평가자 등록소."""
87
+
88
+ _instance = None
89
+ _lock = threading.Lock()
90
+
91
+ def __init__(self):
92
+ self._evaluators = []
93
+ self._reg_lock = threading.Lock()
94
+
95
+ @classmethod
96
+ def get_instance(cls):
97
+ if cls._instance is None:
98
+ with cls._lock:
99
+ if cls._instance is None:
100
+ cls._instance = cls()
101
+ return cls._instance
102
+
103
+ def register(self, evaluator):
104
+ """평가자 인스턴스를 등록한다. 같은 LABEL 이 이미 있으면 교체.
105
+
106
+ :param evaluator: BaseEvaluator 인스턴스.
107
+ """
108
+ from whatap.llm.evaluators.base import BaseEvaluator
109
+ if not isinstance(evaluator, BaseEvaluator):
110
+ logging.warning('[LLM] register_evaluator: not a BaseEvaluator: %r' % evaluator,
111
+ extra={'id': 'LLM040'})
112
+ return
113
+ if not evaluator.LABEL:
114
+ logging.warning('[LLM] register_evaluator: missing LABEL on %r' % evaluator,
115
+ extra={'id': 'LLM041'})
116
+ return
117
+ with self._reg_lock:
118
+ self._evaluators = [e for e in self._evaluators if e.LABEL != evaluator.LABEL]
119
+ self._evaluators.append(evaluator)
120
+
121
+ def unregister(self, label):
122
+ """라벨로 평가자를 제거한다."""
123
+ with self._reg_lock:
124
+ self._evaluators = [e for e in self._evaluators if e.LABEL != label]
125
+
126
+ def all(self):
127
+ """현재 등록된 평가자 리스트의 스냅샷을 반환한다."""
128
+ with self._reg_lock:
129
+ return list(self._evaluators)
130
+
131
+ def is_empty(self):
132
+ return not self._evaluators
133
+
134
+ def clear(self):
135
+ with self._reg_lock:
136
+ self._evaluators = []
137
+
138
+
139
+ def register_evaluator(evaluator):
140
+ """공개 API: 평가자 등록."""
141
+ EvaluatorRegistry.get_instance().register(evaluator)
142
+
143
+
144
+ def unregister_evaluator(label):
145
+ """공개 API: 평가자 제거."""
146
+ EvaluatorRegistry.get_instance().unregister(label)
147
+
148
+
149
+ _bootstrap_done = False
150
+ _bootstrap_lock = threading.Lock()
151
+
152
+
153
+ def bootstrap_from_conf():
154
+ """``conf.llm_eval_evaluators`` 의 csv 라벨 목록으로 빌트인 평가자를 자동 등록.
155
+
156
+ 이미 사용자가 ``register_evaluator()`` 로 같은 라벨을 등록했으면 register() 가
157
+ 그걸 교체하지 않고 그대로 둔다 (사용자 등록 우선).
158
+
159
+ LlmEvaluatorTask._ensure_started() 에서 첫 번째 enqueue 시점에 1회 호출.
160
+ """
161
+ global _bootstrap_done
162
+ if _bootstrap_done:
163
+ return
164
+ with _bootstrap_lock:
165
+ if _bootstrap_done:
166
+ return
167
+
168
+ labels = _parse_labels(getattr(conf, 'llm_eval_evaluators', ''))
169
+ registry = EvaluatorRegistry.get_instance()
170
+ already = {e.LABEL for e in registry.all()}
171
+
172
+ added = []
173
+ for label in labels:
174
+ if label in already:
175
+ # 사용자가 명시적으로 register 한 거 — 그대로 보존
176
+ continue
177
+ inst = _instantiate(label)
178
+ if inst is None:
179
+ continue
180
+ registry.register(inst)
181
+ added.append(label)
182
+
183
+ if added:
184
+ logging.info('[LLM] auto-registered builtin evaluators from conf: %s'
185
+ % ', '.join(added), extra={'id': 'LLM047'})
186
+ _bootstrap_done = True
187
+
188
+
189
+ def _reset_bootstrap_for_test():
190
+ """테스트용 — bootstrap 재실행 가능하게."""
191
+ global _bootstrap_done
192
+ _bootstrap_done = False
@@ -0,0 +1,83 @@
1
+ """평가자 샘플링 — judge LLM 호출 비용 통제.
2
+
3
+ ``whatap.conf`` 의 ``llm_eval_sample_rate`` (float 0.0~1.0) 하나로 동작:
4
+
5
+ - 1.0 (기본) — 모든 judge 평가자 항상 실행
6
+ - 0.5 — judge 평가자 50% 만 실행
7
+ - 0.0 — judge 평가자 전부 skip
8
+
9
+ 규칙 기반 평가자 (``USES_LLM_JUDGE=False`` — PIILeak / URLScan / 사용자 custom 등)
10
+ 는 비용 0 이라 항상 100% 실행 — rate 영향 받지 않음.
11
+
12
+ 샘플링은 txid 해시 기반 결정론적이라 같은 트랜잭션은 항상 같은 결정을 받는다.
13
+ """
14
+ import threading
15
+
16
+ from whatap import logging
17
+ from whatap.conf.configure import Configure as conf
18
+
19
+
20
+ class EvaluatorSampler(object):
21
+ """평가자 샘플링 결정 엔진.
22
+
23
+ 호출 시 ``conf.llm_eval_sample_rate`` 를 매번 다시 읽어 런타임 변경 반영.
24
+ """
25
+
26
+ def __init__(self):
27
+ self._lock = threading.Lock()
28
+ self._cached_raw = None
29
+ self._cached_rate = 1.0
30
+
31
+ def _get_rate(self):
32
+ """conf.llm_eval_sample_rate 를 float 으로 안전 변환. 변경 감지 + 캐시.
33
+
34
+ 파싱 실패 / 범위 밖이면 1.0 (keep all) — 안전 default.
35
+ """
36
+ raw = getattr(conf, 'llm_eval_sample_rate', 1.0)
37
+ if raw == self._cached_raw:
38
+ return self._cached_rate
39
+ try:
40
+ v = float(raw)
41
+ except (TypeError, ValueError):
42
+ logging.warning('[LLM] invalid llm_eval_sample_rate: %r — fallback to 1.0' % raw,
43
+ extra={'id': 'LLM042'})
44
+ v = 1.0
45
+ v = max(0.0, min(1.0, v))
46
+ with self._lock:
47
+ self._cached_raw = raw
48
+ self._cached_rate = v
49
+ return v
50
+
51
+ def should_run(self, evaluator, txid):
52
+ """샘플링 결정.
53
+
54
+ :param evaluator: BaseEvaluator 인스턴스. ``USES_LLM_JUDGE`` 플래그를 본다.
55
+ :param txid: 트랜잭션 ID. 같은 txid → 같은 결정 (결정론적 샘플링).
56
+ :return: True 면 평가 실행, False 면 skip.
57
+ """
58
+ # 규칙 기반 (judge 호출 없음 = 비용 0) → 항상 실행
59
+ if not getattr(evaluator, 'USES_LLM_JUDGE', False):
60
+ return True
61
+
62
+ rate = self._get_rate()
63
+ if rate >= 1.0:
64
+ return True
65
+ if rate <= 0.0:
66
+ return False
67
+ return _deterministic_unit(txid) < rate
68
+
69
+
70
+ def _deterministic_unit(txid):
71
+ """txid 로부터 [0, 1) 결정론적 값을 만든다.
72
+
73
+ 같은 txid → 같은 값. 다른 txid → 균등 분포.
74
+ """
75
+ if txid is None:
76
+ return 0.0
77
+ try:
78
+ seed = abs(int(txid))
79
+ except (TypeError, ValueError):
80
+ seed = abs(hash(txid))
81
+ # Knuth multiplicative hash
82
+ h = (seed * 2654435761) & 0xFFFFFFFF
83
+ return h / float(1 << 32)
@@ -0,0 +1,334 @@
1
+ """스코프 기반 평가자 적용 — 데코레이터 + 컨텍스트 매니저.
2
+
3
+ 전역 ``register_evaluator()`` 는 모든 LLM 호출에 평가자를 적용한다.
4
+ 대부분의 경우 사용자는 특정 트랜잭션/함수에만 평가를 붙이고 싶어한다.
5
+ 이 모듈이 그 자연스러운 패턴을 제공한다.
6
+
7
+ 사용 예 (데코레이터):
8
+ from whatap.llm.evaluators import evaluate_with
9
+ from whatap.llm.evaluators.builtins import HallucinationEvaluator, make_openai_judge
10
+
11
+ judge = make_openai_judge(client=openai.OpenAI(), model='gpt-4o-mini')
12
+
13
+ @evaluate_with(HallucinationEvaluator(judge_fn=judge))
14
+ def chat(question: str) -> str:
15
+ return client.chat.completions.create(...).choices[0].message.content
16
+
17
+ 사용 예 (컨텍스트 매니저):
18
+ from whatap.llm.evaluators import evaluation_scope
19
+
20
+ def chat(question):
21
+ with evaluation_scope(HallucinationEvaluator(judge_fn=judge)):
22
+ return client.chat.completions.create(...).choices[0].message.content
23
+
24
+ 스코프 안에서 일어난 LLM 호출에만 해당 평가자가 적용된다. 전역 ``register_evaluator``
25
+ 로 등록된 평가자가 있다면 함께 실행되며 (merge), 같은 LABEL 인 경우 스코프 평가자가 우선.
26
+
27
+ Storage:
28
+ 1) trace context (있으면) — interceptor 의 hot path 에서 같은 ctx 로 빠르게 lookup
29
+ 2) ContextVar (항상) — asyncio.create_task / TaskGroup 으로 분기된 sub-task 도
30
+ inherit. 같은 task chain 이면 ContextVar 가 같은 list 를
31
+ 가리켜 trace ctx 와 자동 sync.
32
+
33
+ 이전엔 fallback 이 ``threading.local()`` 이라 같은 스레드의 다른 asyncio task 들이 같은
34
+ 스코프를 공유 (cross-task contamination 위험) + ``asyncio.create_task`` 로 분기된 task
35
+ 는 trace ctx 가 다를 수 있어 못 보는 hole 이 있었음. ContextVar 로 둘 다 해결.
36
+ """
37
+ import asyncio
38
+ import contextvars
39
+ import functools
40
+ import inspect
41
+
42
+ from whatap import logging
43
+
44
+
45
+ # task 별로 격리되는 storage. asyncio.create_task / TaskGroup 으로 분기된 task 도
46
+ # 시작 시점의 ContextVar snapshot 을 inherit 하므로 sub-task 안에서 LLM 호출 시 scope
47
+ # 가 그대로 보임. None 이 default — append 시점에 list 생성.
48
+ _scope_cv = contextvars.ContextVar('whatap_llm_scope_evaluators', default=None)
49
+
50
+
51
+ def _ensure_cv_list():
52
+ cur = _scope_cv.get()
53
+ if cur is None:
54
+ cur = []
55
+ _scope_cv.set(cur)
56
+ return cur
57
+
58
+
59
+ def _ensure_ctx_list(ctx):
60
+ if not hasattr(ctx, '_llm_scope_evaluators'):
61
+ ctx._llm_scope_evaluators = []
62
+ return ctx._llm_scope_evaluators
63
+
64
+
65
+ def _get_active_storages():
66
+ """현재 활성 storage list 들을 반환 — (trace ctx + ContextVar) 양쪽 동시.
67
+
68
+ enter 시 양쪽 모두에 evaluator 추가, exit 시 양쪽 모두에서 제거. 그래야:
69
+ - LLM 호출 시점 trace ctx 기준 lookup 이 빠름 (hot path)
70
+ - asyncio.create_task / 다른 sub-task 에서도 ContextVar inherit 으로 보임
71
+ """
72
+ storages = []
73
+ try:
74
+ from whatap.trace.trace_context_manager import TraceContextManager
75
+ ctx = TraceContextManager.getLocalContext()
76
+ if ctx is not None:
77
+ storages.append(_ensure_ctx_list(ctx))
78
+ except Exception:
79
+ pass
80
+ storages.append(_ensure_cv_list())
81
+ return storages
82
+
83
+
84
+ def get_scope_evaluators():
85
+ """LlmEvaluatorTask.enqueue 에서 호출. 현재 활성 스코프의 평가자 리스트.
86
+
87
+ trace ctx 와 ContextVar 양쪽을 합치고 같은 객체는 dedup (id 기반). 둘이 sync 되어
88
+ 있으면 같은 entry 가 양쪽에 있을 수 있어 dedup 필요.
89
+ """
90
+ seen = set()
91
+ out = []
92
+ try:
93
+ from whatap.trace.trace_context_manager import TraceContextManager
94
+ ctx = TraceContextManager.getLocalContext()
95
+ if ctx is not None:
96
+ ctx_list = getattr(ctx, '_llm_scope_evaluators', None)
97
+ if ctx_list:
98
+ for e in ctx_list:
99
+ if id(e) not in seen:
100
+ seen.add(id(e))
101
+ out.append(e)
102
+ except Exception:
103
+ pass
104
+ cv_list = _scope_cv.get()
105
+ if cv_list:
106
+ for e in cv_list:
107
+ if id(e) not in seen:
108
+ seen.add(id(e))
109
+ out.append(e)
110
+ return out
111
+
112
+
113
+ class evaluation_scope(object):
114
+ """LLM 호출에 평가자를 스코프 단위로 적용하는 컨텍스트 매니저.
115
+
116
+ Example:
117
+ with evaluation_scope(HallucinationEvaluator(judge_fn=judge), RefusalEvaluator()):
118
+ response = client.chat.completions.create(...)
119
+
120
+ 중첩 가능:
121
+ with evaluation_scope(EvalA()):
122
+ with evaluation_scope(EvalB()):
123
+ # 안쪽 호출에는 EvalA + EvalB 모두 적용
124
+
125
+ 스코프 종료 시 해당 평가자들만 제거 (밖의 스코프는 그대로).
126
+ """
127
+
128
+ def __init__(self, *evaluators):
129
+ # BaseEvaluator 인스턴스만 필터 (LABEL 없으면 무의미)
130
+ from whatap.llm.evaluators.base import BaseEvaluator
131
+ self._evaluators = [
132
+ e for e in evaluators
133
+ if isinstance(e, BaseEvaluator) and getattr(e, 'LABEL', None)
134
+ ]
135
+ if len(self._evaluators) != len(evaluators):
136
+ logging.warning(
137
+ '[LLM] evaluation_scope: %d non-evaluator/no-LABEL items ignored'
138
+ % (len(evaluators) - len(self._evaluators)),
139
+ extra={'id': 'LLM045'},
140
+ )
141
+ self._storages = None
142
+ self._inserted_count = 0
143
+ self._inserted_ids = None
144
+
145
+ def __enter__(self):
146
+ if not self._evaluators:
147
+ return self
148
+ self._storages = _get_active_storages()
149
+ # 우리가 추가한 evaluator 들을 id 집합으로 기억 — exit 시점에 정확히 그 id 만 제거.
150
+ # 단순 ``del s[-N:]`` 는 외부 코드가 같은 storage 에 append 한 경우 잘못된
151
+ # 항목까지 잘라낼 위험. id() 매칭은 중첩 스코프 안전 + 외부 leak 에 robust.
152
+ self._inserted_ids = set(id(e) for e in self._evaluators)
153
+ for s in self._storages:
154
+ s.extend(self._evaluators)
155
+ self._inserted_count = len(self._evaluators)
156
+ return self
157
+
158
+ def __exit__(self, exc_type, exc_val, exc_tb):
159
+ if not self._storages or self._inserted_count == 0:
160
+ return False
161
+ # storage 끝에서부터 우리 id 매칭하는 항목만 제거. LIFO 원칙 + 외부 mutation 에 안전.
162
+ try:
163
+ for s in self._storages:
164
+ self._remove_inserted(s)
165
+ except Exception as e:
166
+ logging.warning('[LLM] evaluation_scope cleanup failed: %s' % e,
167
+ extra={'id': 'LLM046'})
168
+ finally:
169
+ self._storages = None
170
+ self._inserted_count = 0
171
+ self._inserted_ids = None
172
+ return False # 예외 propagate
173
+
174
+ def _remove_inserted(self, storage):
175
+ """우리가 enter 시 push 한 evaluator id 들을 storage 에서 정확히 제거.
176
+
177
+ 역순 (LIFO) 으로 훑어 id() 일치 항목만 del. 외부 코드가 같은 storage 에
178
+ 다른 evaluator 를 append 했더라도 영향 없음.
179
+ """
180
+ remaining = set(self._inserted_ids)
181
+ for i in range(len(storage) - 1, -1, -1):
182
+ if not remaining:
183
+ break
184
+ if id(storage[i]) in remaining:
185
+ remaining.discard(id(storage[i]))
186
+ del storage[i]
187
+
188
+
189
+ def _is_streaming_response(obj):
190
+ """FastAPI/Starlette StreamingResponse (또는 호환 객체) 감지."""
191
+ # body_iterator 어트리뷰트로 식별 (Starlette/FastAPI 표준)
192
+ return hasattr(obj, 'body_iterator')
193
+
194
+
195
+ def _wrap_async_iter_with_scope(orig_iter, scope):
196
+ """async iterator 를 감싸서 소진/예외 시 scope 정리."""
197
+ async def wrapped():
198
+ try:
199
+ async for chunk in orig_iter:
200
+ yield chunk
201
+ finally:
202
+ scope.__exit__(None, None, None)
203
+ return wrapped()
204
+
205
+
206
+ def _wrap_sync_iter_with_scope(orig_iter, scope):
207
+ """sync iterator 도 같은 방식으로 감싸기 (sync StreamingResponse 케이스)."""
208
+ def wrapped():
209
+ try:
210
+ for chunk in orig_iter:
211
+ yield chunk
212
+ finally:
213
+ scope.__exit__(None, None, None)
214
+ return wrapped()
215
+
216
+
217
+ def _wrap_result_keep_scope(result, scope):
218
+ """함수 result 의 type 별로 scope 유지 wrapping.
219
+
220
+ - StreamingResponse (body_iterator 보유) → body_iterator 를 감싸 scope 유지
221
+ - async generator object → 감싸 yield 끝까지 scope 유지
222
+ - sync generator object → 감싸 yield 끝까지 scope 유지
223
+ - 그 외 일반 객체 → 즉시 scope 닫고 result 반환
224
+
225
+ return: 변경된 (또는 그대로의) result. scope 는 외부에서 더 이상 close 호출하면 안 됨.
226
+ """
227
+ if _is_streaming_response(result):
228
+ orig = result.body_iterator
229
+ if inspect.isasyncgen(orig) or hasattr(orig, '__aiter__'):
230
+ result.body_iterator = _wrap_async_iter_with_scope(orig, scope)
231
+ else:
232
+ result.body_iterator = _wrap_sync_iter_with_scope(orig, scope)
233
+ return result
234
+
235
+ if inspect.isasyncgen(result):
236
+ return _wrap_async_iter_with_scope(result, scope)
237
+
238
+ if inspect.isgenerator(result):
239
+ return _wrap_sync_iter_with_scope(result, scope)
240
+
241
+ # 일반 리턴 — 즉시 scope 정리
242
+ scope.__exit__(None, None, None)
243
+ return result
244
+
245
+
246
+ def evaluate_with(*evaluators):
247
+ """함수에 평가자를 스코프 단위로 묶는 데코레이터. sync / async / generator /
248
+ async generator / FastAPI StreamingResponse 모두 지원.
249
+
250
+ Example:
251
+ @evaluate_with(HallucinationEvaluator(), RefusalEvaluator())
252
+ def chat(question):
253
+ return client.chat.completions.create(...).choices[0].message.content
254
+
255
+ @evaluate_with(ToxicityEvaluator())
256
+ async def achat(question):
257
+ return await aclient.chat.completions.create(...)
258
+
259
+ Streaming response (FastAPI/Starlette) 도 자동 처리:
260
+
261
+ @evaluate_with(CombinedJudgeEvaluator())
262
+ async def api_chat(request):
263
+ async def generate():
264
+ stream = await client.chat.completions.create(..., stream=True)
265
+ async for chunk in stream:
266
+ yield ...
267
+ return StreamingResponse(generate(), ...)
268
+
269
+ 이 경우 데코레이터는 함수가 StreamingResponse 를 리턴해도 scope 를 즉시 닫지 않고,
270
+ body_iterator 가 모두 소진되거나 예외가 날 때까지 유지한다. generate() 안의 LLM
271
+ 호출에도 평가자가 적용됨.
272
+
273
+ 함수가 직접 async generator / sync generator 를 리턴하는 경우, 그리고 sync 함수가
274
+ generator 객체를 return 하는 경우 (e.g. ``def fn(): return _make_gen()``) 도 모두
275
+ 감싸진다.
276
+
277
+ asyncio.create_task / TaskGroup 으로 분기된 sub-task 도 ContextVar inherit 으로
278
+ scope 가 보임. 단 ``run_in_executor`` 등 다른 thread 로 dispatch 시 ContextVar 는
279
+ propagate 안 되므로 그쪽은 명시적으로 ``with evaluation_scope(...)`` 사용 권장.
280
+ """
281
+ def decorator(fn):
282
+ if asyncio.iscoroutinefunction(fn):
283
+ @functools.wraps(fn)
284
+ async def async_wrapper(*args, **kwargs):
285
+ scope = evaluation_scope(*evaluators)
286
+ scope.__enter__()
287
+ try:
288
+ result = await fn(*args, **kwargs)
289
+ except BaseException:
290
+ scope.__exit__(None, None, None)
291
+ raise
292
+ return _wrap_result_keep_scope(result, scope)
293
+ return async_wrapper
294
+
295
+ if inspect.isasyncgenfunction(fn):
296
+ @functools.wraps(fn)
297
+ def async_gen_wrapper(*args, **kwargs):
298
+ scope = evaluation_scope(*evaluators)
299
+ scope.__enter__()
300
+ try:
301
+ gen = fn(*args, **kwargs)
302
+ except BaseException:
303
+ scope.__exit__(None, None, None)
304
+ raise
305
+ return _wrap_async_iter_with_scope(gen, scope)
306
+ return async_gen_wrapper
307
+
308
+ if inspect.isgeneratorfunction(fn):
309
+ @functools.wraps(fn)
310
+ def gen_wrapper(*args, **kwargs):
311
+ scope = evaluation_scope(*evaluators)
312
+ scope.__enter__()
313
+ try:
314
+ gen = fn(*args, **kwargs)
315
+ except BaseException:
316
+ scope.__exit__(None, None, None)
317
+ raise
318
+ return _wrap_sync_iter_with_scope(gen, scope)
319
+ return gen_wrapper
320
+
321
+ # 일반 sync 함수 — 단, return 이 generator/asyncgen/StreamingResponse 일 수도
322
+ # 있으니 result 분기 적용.
323
+ @functools.wraps(fn)
324
+ def sync_wrapper(*args, **kwargs):
325
+ scope = evaluation_scope(*evaluators)
326
+ scope.__enter__()
327
+ try:
328
+ result = fn(*args, **kwargs)
329
+ except BaseException:
330
+ scope.__exit__(None, None, None)
331
+ raise
332
+ return _wrap_result_keep_scope(result, scope)
333
+ return sync_wrapper
334
+ return decorator
whatap/llm/features.py ADDED
@@ -0,0 +1,66 @@
1
+ """LLM feature / finish_reason 상수 — producer 측 typo 방지용.
2
+
3
+ 이 모듈의 역할은 producer 코드 (provider extractor / context) 에서 string literal
4
+ 을 직접 쓰는 대신 ``LlmFeature.VISION`` 같은 상수로 참조해 typo / IDE 자동완성 /
5
+ 검색을 돕는 것. **closed set 강제는 아님** — consumer (FeatureStat / FinishStat) 는
6
+ producer 가 보내는 모든 raw 값을 그대로 누적한다.
7
+
8
+ 이렇게 한 이유:
9
+ - logsink (LlmStepStatus) 는 pack.features / pack.finish_reason 의 raw 값을
10
+ 그대로 보존한다.
11
+ - metric (FeatureStat / FinishStat) 도 logsink 와 같은 데이터를 다뤄야 한다 —
12
+ metric 측에서 미리 정의한 set 에 없는 값을 silently drop 하면 두 layer 가
13
+ 어긋난다.
14
+ - feature/reason 이름은 list-field (@id / features / features_count) 안의 entry
15
+ 이지 dimension (tag) 이 아니므로 새 값이 들어와도 row cardinality 폭주는 없다.
16
+
17
+ Producer 가 새 케이스를 잡고 싶으면:
18
+ 1. (선택) ``LlmFeature`` 클래스에 상수 추가 — 코드 가독성/검색용
19
+ 2. ``features.append(LlmFeature.NEW)`` 또는 raw 값 ``features.append('new_feat')``
20
+ 둘 다 동일하게 metric 에 노출된다.
21
+ """
22
+
23
+
24
+ class LlmFeature(object):
25
+ """``pack.features`` 에 들어가는 값을 producer 가 추가할 때 쓰는 상수.
26
+
27
+ 이 목록은 현재 provider 코드가 발생시키는 알려진 케이스를 나열한 reference 일
28
+ 뿐, FeatureStat 의 검증 set 이 아니다. 새 케이스가 생기면 producer 에서 raw
29
+ 값을 그대로 보내도 된다.
30
+ """
31
+
32
+ VISION = 'vision' # 입력: 이미지/멀티모달
33
+ TOOL_USE = 'tool_use' # 출력: function/tool 호출
34
+ REASONING = 'reasoning' # 입력 reasoning_effort or 출력 thinking
35
+ STRUCTURED_OUTPUT = 'structured_output' # 입력 response_format=json_schema
36
+ WEBSEARCH = 'websearch' # 출력: 웹검색 도구 호출
37
+ COMPUTER_USE = 'computer_use' # 출력: 컴퓨터 컨트롤 도구
38
+ DOCUMENT = 'document' # 입력: PDF/문서 첨부
39
+
40
+
41
+ class LlmFinishReason(object):
42
+ """``pack.finish_reason`` 의 알려진 raw 값 reference.
43
+
44
+ Provider 가 raw response 객체에서 직접 읽어 pack 에 set 하므로 producer 에서
45
+ 이 상수를 쓸 필요는 거의 없음. 외부 코드가 비교/검색할 때 참고용.
46
+ """
47
+
48
+ # OpenAI Chat / Completions
49
+ STOP = 'stop'
50
+ LENGTH = 'length'
51
+ TOOL_CALLS = 'tool_calls'
52
+ FUNCTION_CALL = 'function_call'
53
+ CONTENT_FILTER = 'content_filter'
54
+
55
+ # OpenAI Responses (response.status 가 pack.finish_reason 자리에 들어감)
56
+ COMPLETED = 'completed'
57
+ FAILED = 'failed'
58
+ INCOMPLETE = 'incomplete'
59
+ CANCELLED = 'cancelled'
60
+ IN_PROGRESS = 'in_progress'
61
+
62
+ # Anthropic Messages
63
+ END_TURN = 'end_turn'
64
+ MAX_TOKENS = 'max_tokens'
65
+ STOP_SEQUENCE = 'stop_sequence'
66
+ TOOL_USE = 'tool_use'
@@ -0,0 +1,9 @@
1
+ from whatap.llm.log_sink_packs.llm_log_sink_pack import LlmLogSinkPack
2
+ from whatap.llm.log_sink_packs.llm_step_status import LlmStepStatus
3
+ from whatap.llm.log_sink_packs.llm_step_eval_status import LlmStepEvalStatus
4
+ from whatap.llm.log_sink_packs.llm_system_message import LlmSystemMessage
5
+ from whatap.llm.log_sink_packs.llm_input_message import LlmInputMessage
6
+ from whatap.llm.log_sink_packs.llm_output_message import LlmOutputMessage
7
+ from whatap.llm.log_sink_packs.llm_tool_calls import LlmToolCalls
8
+ from whatap.llm.log_sink_packs.llm_tool_results import LlmToolResults
9
+ from whatap.llm.log_sink_packs.llm_tx_status import LlmTxStatus