whatap-python 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- whatap/LICENSE +0 -0
- whatap/README.rst +49 -0
- whatap/__init__.py +923 -0
- whatap/__main__.py +4 -0
- whatap/agent/darwin/amd64/whatap_python +0 -0
- whatap/agent/darwin/arm64/whatap_python +0 -0
- whatap/agent/linux/amd64/whatap_python +0 -0
- whatap/agent/linux/arm64/whatap_python +0 -0
- whatap/agent/windows/whatap_python.exe +0 -0
- whatap/bootstrap/__init__.py +0 -0
- whatap/bootstrap/sitecustomize.py +19 -0
- whatap/build.py +4 -0
- whatap/conf/__init__.py +0 -0
- whatap/conf/configuration.py +280 -0
- whatap/conf/configure.py +105 -0
- whatap/conf/license.py +49 -0
- whatap/control/__init__.py +0 -0
- whatap/counter/__init__.py +14 -0
- whatap/counter/counter_manager.py +45 -0
- whatap/counter/tasks/__init__.py +3 -0
- whatap/counter/tasks/base_task.py +26 -0
- whatap/counter/tasks/llm_evaluator_task.py +501 -0
- whatap/counter/tasks/llm_log_sink_task.py +309 -0
- whatap/counter/tasks/llm_stat_task.py +78 -0
- whatap/counter/tasks/openfiledescriptor.py +67 -0
- whatap/io/__init__.py +1 -0
- whatap/io/data_inputx.py +161 -0
- whatap/io/data_outputx.py +262 -0
- whatap/llm/__init__.py +17 -0
- whatap/llm/definitions.py +43 -0
- whatap/llm/evaluators/__init__.py +136 -0
- whatap/llm/evaluators/base.py +114 -0
- whatap/llm/evaluators/builtins/__init__.py +91 -0
- whatap/llm/evaluators/builtins/answer_relevance.py +46 -0
- whatap/llm/evaluators/builtins/combined_judge.py +271 -0
- whatap/llm/evaluators/builtins/factuality.py +71 -0
- whatap/llm/evaluators/builtins/hallucination.py +97 -0
- whatap/llm/evaluators/builtins/llm_judge.py +516 -0
- whatap/llm/evaluators/builtins/pii_leak.py +214 -0
- whatap/llm/evaluators/builtins/prompt_injection.py +71 -0
- whatap/llm/evaluators/builtins/toxicity.py +53 -0
- whatap/llm/evaluators/builtins/url_scan.py +194 -0
- whatap/llm/evaluators/registry.py +192 -0
- whatap/llm/evaluators/sampler.py +83 -0
- whatap/llm/evaluators/scope.py +334 -0
- whatap/llm/features.py +66 -0
- whatap/llm/log_sink_packs/__init__.py +9 -0
- whatap/llm/log_sink_packs/llm_input_message.py +16 -0
- whatap/llm/log_sink_packs/llm_log_sink_pack.py +72 -0
- whatap/llm/log_sink_packs/llm_output_message.py +19 -0
- whatap/llm/log_sink_packs/llm_step_eval_status.py +94 -0
- whatap/llm/log_sink_packs/llm_step_status.py +118 -0
- whatap/llm/log_sink_packs/llm_system_message.py +16 -0
- whatap/llm/log_sink_packs/llm_tool_calls.py +44 -0
- whatap/llm/log_sink_packs/llm_tool_results.py +16 -0
- whatap/llm/log_sink_packs/llm_tx_status.py +108 -0
- whatap/llm/pricing.py +236 -0
- whatap/llm/prompt_meta.py +288 -0
- whatap/llm/providers/__init__.py +0 -0
- whatap/llm/providers/anthropic/__init__.py +37 -0
- whatap/llm/providers/anthropic/messages/__init__.py +0 -0
- whatap/llm/providers/anthropic/messages/messages.py +70 -0
- whatap/llm/providers/anthropic/messages/messages_context.py +76 -0
- whatap/llm/providers/anthropic/messages/messages_extractor.py +126 -0
- whatap/llm/providers/interceptor.py +182 -0
- whatap/llm/providers/openai/__init__.py +133 -0
- whatap/llm/providers/openai/chat/__init__.py +0 -0
- whatap/llm/providers/openai/chat/chat.py +82 -0
- whatap/llm/providers/openai/chat/chat_context.py +78 -0
- whatap/llm/providers/openai/chat/chat_extractor.py +127 -0
- whatap/llm/providers/openai/completions/__init__.py +0 -0
- whatap/llm/providers/openai/completions/completions.py +70 -0
- whatap/llm/providers/openai/completions/completions_context.py +31 -0
- whatap/llm/providers/openai/completions/completions_extractor.py +61 -0
- whatap/llm/providers/openai/content_parser.py +41 -0
- whatap/llm/providers/openai/embeddings/__init__.py +0 -0
- whatap/llm/providers/openai/embeddings/embeddings.py +59 -0
- whatap/llm/providers/openai/embeddings/embeddings_context.py +25 -0
- whatap/llm/providers/openai/embeddings/embeddings_extractor.py +26 -0
- whatap/llm/providers/openai/responses/__init__.py +0 -0
- whatap/llm/providers/openai/responses/responses.py +70 -0
- whatap/llm/providers/openai/responses/responses_context.py +88 -0
- whatap/llm/providers/openai/responses/responses_extractor.py +126 -0
- whatap/llm/providers/stream_accumulator.py +73 -0
- whatap/llm/stats/__init__.py +35 -0
- whatap/llm/stats/active_stat.py +86 -0
- whatap/llm/stats/answer_relevance_eval_stat.py +10 -0
- whatap/llm/stats/api_status_stat.py +35 -0
- whatap/llm/stats/base_stat.py +107 -0
- whatap/llm/stats/combined_judge_eval_stat.py +11 -0
- whatap/llm/stats/error_stat.py +59 -0
- whatap/llm/stats/eval_stat.py +225 -0
- whatap/llm/stats/factuality_eval_stat.py +10 -0
- whatap/llm/stats/feature_stat.py +104 -0
- whatap/llm/stats/finish_stat.py +105 -0
- whatap/llm/stats/hallucination_eval_stat.py +10 -0
- whatap/llm/stats/meter.py +18 -0
- whatap/llm/stats/perf_stat.py +117 -0
- whatap/llm/stats/pii_leak_eval_stat.py +12 -0
- whatap/llm/stats/prompt_injection_eval_stat.py +10 -0
- whatap/llm/stats/token_usage_stat.py +133 -0
- whatap/llm/stats/toxicity_eval_stat.py +10 -0
- whatap/llm/stats/url_scan_eval_stat.py +12 -0
- whatap/net/__init__.py +0 -0
- whatap/net/async_sender.py +107 -0
- whatap/net/packet_enum.py +44 -0
- whatap/net/packet_type_enum.py +31 -0
- whatap/net/param_def.py +69 -0
- whatap/net/stackhelper.py +87 -0
- whatap/net/udp_session.py +394 -0
- whatap/net/udp_thread.py +54 -0
- whatap/pack/__init__.py +0 -0
- whatap/pack/logSinkPack.py +77 -0
- whatap/pack/pack.py +34 -0
- whatap/pack/pack_enum.py +41 -0
- whatap/pack/tagCountPack.py +61 -0
- whatap/scripts/__init__.py +208 -0
- whatap/trace/__init__.py +12 -0
- whatap/trace/mod/__init__.py +0 -0
- whatap/trace/mod/amqp/__init__.py +0 -0
- whatap/trace/mod/amqp/kombu.py +122 -0
- whatap/trace/mod/amqp/pika.py +62 -0
- whatap/trace/mod/application/__init__.py +0 -0
- whatap/trace/mod/application/bottle.py +34 -0
- whatap/trace/mod/application/celery.py +81 -0
- whatap/trace/mod/application/cherrypy.py +30 -0
- whatap/trace/mod/application/django.py +287 -0
- whatap/trace/mod/application/django_asgi.py +266 -0
- whatap/trace/mod/application/django_py3.py +251 -0
- whatap/trace/mod/application/fastapi/__init__.py +31 -0
- whatap/trace/mod/application/fastapi/endpoint.py +73 -0
- whatap/trace/mod/application/fastapi/exception_log.py +63 -0
- whatap/trace/mod/application/fastapi/instrumentation.py +204 -0
- whatap/trace/mod/application/fastapi/scope.py +115 -0
- whatap/trace/mod/application/fastapi/transaction.py +67 -0
- whatap/trace/mod/application/flask.py +52 -0
- whatap/trace/mod/application/frappe.py +224 -0
- whatap/trace/mod/application/graphql.py +170 -0
- whatap/trace/mod/application/nameko.py +39 -0
- whatap/trace/mod/application/odoo.py +63 -0
- whatap/trace/mod/application/starlette.py +126 -0
- whatap/trace/mod/application/tornado.py +163 -0
- whatap/trace/mod/application/wsgi.py +195 -0
- whatap/trace/mod/database/__init__.py +0 -0
- whatap/trace/mod/database/cxoracle.py +49 -0
- whatap/trace/mod/database/mongo.py +169 -0
- whatap/trace/mod/database/mysql.py +80 -0
- whatap/trace/mod/database/neo4j.py +90 -0
- whatap/trace/mod/database/psycopg2.py +45 -0
- whatap/trace/mod/database/psycopg3.py +359 -0
- whatap/trace/mod/database/redis.py +122 -0
- whatap/trace/mod/database/sqlalchemy.py +213 -0
- whatap/trace/mod/database/sqlite3.py +130 -0
- whatap/trace/mod/database/util.py +630 -0
- whatap/trace/mod/email/__init__.py +0 -0
- whatap/trace/mod/email/smtp.py +78 -0
- whatap/trace/mod/httpc/__init__.py +0 -0
- whatap/trace/mod/httpc/django.py +31 -0
- whatap/trace/mod/httpc/httplib.py +70 -0
- whatap/trace/mod/httpc/httpx.py +62 -0
- whatap/trace/mod/httpc/requests.py +20 -0
- whatap/trace/mod/httpc/urllib3.py +27 -0
- whatap/trace/mod/httpc/util.py +388 -0
- whatap/trace/mod/logging.py +161 -0
- whatap/trace/mod/plugin.py +84 -0
- whatap/trace/mod/standalone/__init__.py +0 -0
- whatap/trace/mod/standalone/multiple.py +293 -0
- whatap/trace/mod/standalone/single.py +135 -0
- whatap/trace/simple_trace_context.py +18 -0
- whatap/trace/trace_context.py +212 -0
- whatap/trace/trace_context_manager.py +244 -0
- whatap/trace/trace_error.py +84 -0
- whatap/trace/trace_handler.py +89 -0
- whatap/trace/trace_import.py +91 -0
- whatap/trace/trace_module_definition.py +156 -0
- whatap/util/__init__.py +0 -0
- whatap/util/bit_util.py +49 -0
- whatap/util/cardinality/__init__.py +0 -0
- whatap/util/cardinality/hyperloglog.py +84 -0
- whatap/util/cardinality/murmurhash.py +20 -0
- whatap/util/cardinality/registerset.py +60 -0
- whatap/util/compare_util.py +19 -0
- whatap/util/date_util.py +55 -0
- whatap/util/debug_util.py +73 -0
- whatap/util/escape_literal_sql.py +233 -0
- whatap/util/frame_util.py +20 -0
- whatap/util/hash_util.py +103 -0
- whatap/util/hexa32.py +66 -0
- whatap/util/int_set.py +199 -0
- whatap/util/ip_util.py +63 -0
- whatap/util/keygen.py +11 -0
- whatap/util/linked_list.py +113 -0
- whatap/util/linked_map.py +359 -0
- whatap/util/metering_util.py +103 -0
- whatap/util/request_double_queue.py +68 -0
- whatap/util/request_queue.py +60 -0
- whatap/util/string_util.py +20 -0
- whatap/util/throttle_util.py +99 -0
- whatap/util/userid_util.py +134 -0
- whatap/value/__init__.py +1 -0
- whatap/value/blob_value.py +38 -0
- whatap/value/boolean_value.py +33 -0
- whatap/value/decimal_value.py +36 -0
- whatap/value/double_summary.py +86 -0
- whatap/value/double_value.py +33 -0
- whatap/value/float_array.py +42 -0
- whatap/value/float_value.py +34 -0
- whatap/value/int_array.py +42 -0
- whatap/value/ip4_value.py +50 -0
- whatap/value/list_value.py +105 -0
- whatap/value/long_array.py +44 -0
- whatap/value/long_summary.py +83 -0
- whatap/value/map_value.py +154 -0
- whatap/value/null_value.py +21 -0
- whatap/value/number_value.py +33 -0
- whatap/value/summary_value.py +39 -0
- whatap/value/text_array.py +58 -0
- whatap/value/text_hash_value.py +37 -0
- whatap/value/text_value.py +43 -0
- whatap/value/value.py +26 -0
- whatap/value/value_enum.py +80 -0
- whatap/whatap.conf +14 -0
- whatap_python-2.1.0.dist-info/METADATA +87 -0
- whatap_python-2.1.0.dist-info/RECORD +227 -0
- whatap_python-2.1.0.dist-info/WHEEL +5 -0
- whatap_python-2.1.0.dist-info/entry_points.txt +6 -0
- whatap_python-2.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""OpenAI Responses API 응답에서 토큰 사용량 및 출력 텍스트를 추출하는 모듈."""
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
from whatap import logging
|
|
5
|
+
from whatap.llm.features import LlmFeature
|
|
6
|
+
from whatap.llm.providers.stream_accumulator import StreamAccumulator
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def extract_usage(usage):
|
|
10
|
+
"""usage 객체에서 토큰 사용량 정보를 딕셔너리로 추출한다."""
|
|
11
|
+
if not usage:
|
|
12
|
+
return {}
|
|
13
|
+
inp = getattr(usage, "input_tokens", 0) or 0
|
|
14
|
+
out = getattr(usage, "output_tokens", 0) or 0
|
|
15
|
+
data = {"input_tokens": inp, "output_tokens": out, "total_tokens_count": inp + out}
|
|
16
|
+
inp_d = getattr(usage, 'input_tokens_details', None)
|
|
17
|
+
if inp_d:
|
|
18
|
+
data["cached_tokens"] = getattr(inp_d, 'cached_tokens', 0) or 0
|
|
19
|
+
out_d = getattr(usage, 'output_tokens_details', None)
|
|
20
|
+
if out_d:
|
|
21
|
+
data["reasoning_tokens"] = getattr(out_d, 'reasoning_tokens', 0) or 0
|
|
22
|
+
return data
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _extract_output_text(response):
|
|
26
|
+
"""응답 객체에서 출력 텍스트를 추출한다."""
|
|
27
|
+
try:
|
|
28
|
+
if hasattr(response, 'output_text'):
|
|
29
|
+
return response.output_text
|
|
30
|
+
parts = []
|
|
31
|
+
for item in response.output:
|
|
32
|
+
if hasattr(item, 'type') and item.type == 'message':
|
|
33
|
+
for block in item.content:
|
|
34
|
+
if hasattr(block, 'type') and block.type == 'output_text':
|
|
35
|
+
parts.append(block.text)
|
|
36
|
+
return "".join(parts)
|
|
37
|
+
except Exception as e:
|
|
38
|
+
logging.warning('[LLM] _extract_responses_output_text failed: %s' % e, extra={'id': 'LLM015'})
|
|
39
|
+
return ""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _extract_tool_calls(response):
|
|
43
|
+
"""응답에서 function_call 타입의 도구 호출 목록을 추출한다."""
|
|
44
|
+
return [
|
|
45
|
+
{
|
|
46
|
+
"id": getattr(item, 'call_id', '') or getattr(item, 'id', ''),
|
|
47
|
+
"function": getattr(item, 'name', ''),
|
|
48
|
+
"arguments": getattr(item, 'arguments', ''),
|
|
49
|
+
}
|
|
50
|
+
for item in getattr(response, 'output', [])
|
|
51
|
+
if getattr(item, 'type', '') == 'function_call'
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def finalize(response, pack, features):
|
|
56
|
+
"""비스트리밍 Responses 응답에서 출력 텍스트, 토큰, 도구 호출 정보를 팩에 기록한다."""
|
|
57
|
+
for item in getattr(response, 'output', []):
|
|
58
|
+
t = getattr(item, 'type', '')
|
|
59
|
+
if t == 'function_call' and LlmFeature.TOOL_USE not in features:
|
|
60
|
+
features.append(LlmFeature.TOOL_USE)
|
|
61
|
+
elif 'web_search' in t and LlmFeature.WEBSEARCH not in features:
|
|
62
|
+
features.append(LlmFeature.WEBSEARCH)
|
|
63
|
+
pack.features = ",".join(features)
|
|
64
|
+
|
|
65
|
+
tool_calls = _extract_tool_calls(response)
|
|
66
|
+
if tool_calls:
|
|
67
|
+
pack.tool_calls_text = json.dumps(tool_calls, ensure_ascii=False)
|
|
68
|
+
|
|
69
|
+
pack.set_tokens(extract_usage(getattr(response, "usage", None)))
|
|
70
|
+
pack.completion_text = _extract_output_text(response)
|
|
71
|
+
pack.finish_reason = getattr(response, 'status', None)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ResponsesStream(StreamAccumulator):
|
|
75
|
+
"""OpenAI Responses 스트리밍 이벤트를 누적하는 어큐뮬레이터."""
|
|
76
|
+
|
|
77
|
+
def __init__(self, pack, active_key):
|
|
78
|
+
super().__init__(pack, active_key)
|
|
79
|
+
self.text = ""
|
|
80
|
+
self.usage = {}
|
|
81
|
+
self.output_features = set()
|
|
82
|
+
self.tool_calls = {}
|
|
83
|
+
|
|
84
|
+
def on_chunk(self, event):
|
|
85
|
+
t = getattr(event, 'type', '')
|
|
86
|
+
if t == 'response.output_text.delta':
|
|
87
|
+
delta = getattr(event, 'delta', '') or ''
|
|
88
|
+
if delta:
|
|
89
|
+
self.on_first_token()
|
|
90
|
+
self.text += delta
|
|
91
|
+
elif t == 'response.output_item.added':
|
|
92
|
+
item = getattr(event, 'item', None)
|
|
93
|
+
it = getattr(item, 'type', '') if item else ''
|
|
94
|
+
if it == 'function_call':
|
|
95
|
+
self.output_features.add(LlmFeature.TOOL_USE)
|
|
96
|
+
call_id = getattr(item, 'call_id', '') or getattr(item, 'id', '')
|
|
97
|
+
if call_id and call_id not in self.tool_calls:
|
|
98
|
+
self.tool_calls[call_id] = {"id": call_id, "function": getattr(item, 'name', ''), "arguments": ""}
|
|
99
|
+
elif 'web_search' in it:
|
|
100
|
+
self.output_features.add(LlmFeature.WEBSEARCH)
|
|
101
|
+
elif t == 'response.function_call_arguments.delta':
|
|
102
|
+
call_id = getattr(event, 'call_id', '')
|
|
103
|
+
if call_id in self.tool_calls:
|
|
104
|
+
self.tool_calls[call_id]["arguments"] += getattr(event, 'delta', '') or ''
|
|
105
|
+
elif t == 'response.function_call_arguments.done':
|
|
106
|
+
call_id = getattr(event, 'call_id', '')
|
|
107
|
+
if call_id in self.tool_calls:
|
|
108
|
+
self.tool_calls[call_id]["arguments"] = getattr(event, 'arguments', '') or self.tool_calls[call_id]["arguments"]
|
|
109
|
+
elif t == 'response.completed':
|
|
110
|
+
resp = getattr(event, 'response', None)
|
|
111
|
+
if resp:
|
|
112
|
+
self.usage = extract_usage(getattr(resp, 'usage', None))
|
|
113
|
+
self.pack.finish_reason = getattr(resp, 'status', None)
|
|
114
|
+
|
|
115
|
+
def _apply(self):
|
|
116
|
+
pack = self.pack
|
|
117
|
+
if self.output_features:
|
|
118
|
+
parts = [f for f in pack.features.split(",") if f]
|
|
119
|
+
for f in self.output_features:
|
|
120
|
+
if f not in parts:
|
|
121
|
+
parts.append(f)
|
|
122
|
+
pack.features = ",".join(parts)
|
|
123
|
+
if self.tool_calls:
|
|
124
|
+
pack.tool_calls_text = json.dumps(list(self.tool_calls.values()), ensure_ascii=False)
|
|
125
|
+
pack.set_tokens(self.usage)
|
|
126
|
+
pack.completion_text = self.text
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""스트리밍 응답 누적 처리.
|
|
2
|
+
|
|
3
|
+
각 프로바이더의 Stream 클래스가 StreamAccumulator를 상속하여
|
|
4
|
+
on_chunk()로 청크 데이터를 누적하고, _apply()로 pack에 반영한다.
|
|
5
|
+
"""
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
from whatap.llm.providers.interceptor import _dispatch, _ensure_end
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class StreamAccumulator(object):
|
|
12
|
+
"""스트리밍 응답 누적 기반 클래스.
|
|
13
|
+
|
|
14
|
+
서브클래스가 on_chunk()와 _apply()를 구현한다.
|
|
15
|
+
finalize()에서 latency/ttft 계산 → _apply() → 로그싱크 전송.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, pack, active_key=None):
|
|
19
|
+
self.pack = pack
|
|
20
|
+
self.active_key = active_key
|
|
21
|
+
self.first_token_time = None
|
|
22
|
+
|
|
23
|
+
def __del__(self):
|
|
24
|
+
"""GC 안전망: finalize 없이 제너레이터가 소멸될 때 active 카운터 정리."""
|
|
25
|
+
if self.active_key:
|
|
26
|
+
_ensure_end(self.pack, self.active_key)
|
|
27
|
+
|
|
28
|
+
def on_chunk(self, chunk):
|
|
29
|
+
"""스트림 청크 수신 시 호출. 서브클래스에서 구현."""
|
|
30
|
+
raise NotImplementedError
|
|
31
|
+
|
|
32
|
+
def on_first_token(self):
|
|
33
|
+
"""첫 번째 토큰 수신 시각 기록 (TTFT 계산용)."""
|
|
34
|
+
if self.first_token_time is None:
|
|
35
|
+
self.first_token_time = time.monotonic()
|
|
36
|
+
|
|
37
|
+
def finalize(self):
|
|
38
|
+
"""스트림 종료 시: latency/ttft 계산 → _apply() → 전송 → active 카운터 감소."""
|
|
39
|
+
try:
|
|
40
|
+
end_time = time.monotonic()
|
|
41
|
+
self.pack.latency = round((end_time - self.pack._start_time) * 1000)
|
|
42
|
+
if self.first_token_time is not None:
|
|
43
|
+
self.pack.ttft = round((self.first_token_time - self.pack._start_time) * 1000)
|
|
44
|
+
self._apply()
|
|
45
|
+
self.pack.success = True
|
|
46
|
+
_dispatch(self.pack)
|
|
47
|
+
finally:
|
|
48
|
+
if self.active_key:
|
|
49
|
+
_ensure_end(self.pack, self.active_key)
|
|
50
|
+
|
|
51
|
+
def _apply(self):
|
|
52
|
+
"""누적된 데이터를 pack에 반영. 서브클래스에서 구현."""
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def sync_stream(response, acc):
|
|
57
|
+
"""동기 스트림 래퍼. 원본 응답을 그대로 yield하면서 청크를 누적한다."""
|
|
58
|
+
try:
|
|
59
|
+
for chunk in response:
|
|
60
|
+
acc.on_chunk(chunk)
|
|
61
|
+
yield chunk
|
|
62
|
+
finally:
|
|
63
|
+
acc.finalize()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
async def async_stream(response, acc):
|
|
67
|
+
"""비동기 스트림 래퍼. 원본 응답을 그대로 yield하면서 청크를 누적한다."""
|
|
68
|
+
try:
|
|
69
|
+
async for chunk in response:
|
|
70
|
+
acc.on_chunk(chunk)
|
|
71
|
+
yield chunk
|
|
72
|
+
finally:
|
|
73
|
+
acc.finalize()
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from whatap.llm.stats.token_usage_stat import TokenUsageStat
|
|
2
|
+
from whatap.llm.stats.active_stat import ActiveStat
|
|
3
|
+
from whatap.llm.stats.perf_stat import PerfStat
|
|
4
|
+
from whatap.llm.stats.error_stat import ErrorStat
|
|
5
|
+
from whatap.llm.stats.feature_stat import FeatureStat
|
|
6
|
+
from whatap.llm.stats.finish_stat import FinishStat
|
|
7
|
+
from whatap.llm.stats.api_status_stat import ApiStatusStat
|
|
8
|
+
from whatap.llm.stats.eval_stat import EvalStat
|
|
9
|
+
from whatap.llm.stats.hallucination_eval_stat import HallucinationEvalStat
|
|
10
|
+
from whatap.llm.stats.answer_relevance_eval_stat import AnswerRelevanceEvalStat
|
|
11
|
+
from whatap.llm.stats.toxicity_eval_stat import ToxicityEvalStat
|
|
12
|
+
from whatap.llm.stats.combined_judge_eval_stat import CombinedJudgeEvalStat
|
|
13
|
+
from whatap.llm.stats.prompt_injection_eval_stat import PromptInjectionEvalStat
|
|
14
|
+
from whatap.llm.stats.factuality_eval_stat import FactualityEvalStat
|
|
15
|
+
from whatap.llm.stats.pii_leak_eval_stat import PIILeakEvalStat
|
|
16
|
+
from whatap.llm.stats.url_scan_eval_stat import URLScanEvalStat
|
|
17
|
+
|
|
18
|
+
LLM_STAT_CLASSES = [
|
|
19
|
+
TokenUsageStat,
|
|
20
|
+
ActiveStat,
|
|
21
|
+
PerfStat,
|
|
22
|
+
ErrorStat,
|
|
23
|
+
FeatureStat,
|
|
24
|
+
FinishStat,
|
|
25
|
+
ApiStatusStat,
|
|
26
|
+
EvalStat,
|
|
27
|
+
HallucinationEvalStat,
|
|
28
|
+
AnswerRelevanceEvalStat,
|
|
29
|
+
ToxicityEvalStat,
|
|
30
|
+
CombinedJudgeEvalStat,
|
|
31
|
+
PromptInjectionEvalStat,
|
|
32
|
+
FactualityEvalStat,
|
|
33
|
+
PIILeakEvalStat,
|
|
34
|
+
URLScanEvalStat,
|
|
35
|
+
]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
3
|
+
from whatap.llm.stats.base_stat import BaseStat
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ActiveStat(BaseStat):
|
|
7
|
+
_category = "llm_active_stat"
|
|
8
|
+
_conf_enabled_key = 'llm_enabled'
|
|
9
|
+
|
|
10
|
+
def __init__(self):
|
|
11
|
+
super().__init__()
|
|
12
|
+
self._model_hosts = {}
|
|
13
|
+
self._model_urls = {}
|
|
14
|
+
|
|
15
|
+
def _empty_stats(self):
|
|
16
|
+
return defaultdict(int)
|
|
17
|
+
|
|
18
|
+
def interval(self):
|
|
19
|
+
return 5
|
|
20
|
+
|
|
21
|
+
def on_start(self, model, operation_type, prompt_version='v1'):
|
|
22
|
+
key = (model or 'unknown', operation_type or 'default', prompt_version or 'v1')
|
|
23
|
+
with self._lock:
|
|
24
|
+
self._stats[key] += 1
|
|
25
|
+
|
|
26
|
+
def set_host(self, model, host, url=''):
|
|
27
|
+
with self._lock:
|
|
28
|
+
if host:
|
|
29
|
+
self._model_hosts[model or 'unknown'] = host
|
|
30
|
+
if url:
|
|
31
|
+
self._model_urls[model or 'unknown'] = url
|
|
32
|
+
|
|
33
|
+
def on_end(self, model, operation_type, prompt_version='v1', host='', url=''):
|
|
34
|
+
key = (model or 'unknown', operation_type or 'default', prompt_version or 'v1')
|
|
35
|
+
with self._lock:
|
|
36
|
+
self._stats[key] -= 1
|
|
37
|
+
if self._stats[key] <= 0:
|
|
38
|
+
del self._stats[key]
|
|
39
|
+
if host:
|
|
40
|
+
self._model_hosts[model or 'unknown'] = host
|
|
41
|
+
if url:
|
|
42
|
+
self._model_urls[model or 'unknown'] = url
|
|
43
|
+
|
|
44
|
+
def process(self):
|
|
45
|
+
from whatap.conf.configure import Configure as conf
|
|
46
|
+
if not getattr(conf, self._conf_enabled_key, False):
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
with self._lock:
|
|
50
|
+
snapshot = dict(self._stats)
|
|
51
|
+
hosts = dict(self._model_hosts)
|
|
52
|
+
urls = dict(self._model_urls)
|
|
53
|
+
|
|
54
|
+
if not snapshot:
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
for key, count in snapshot.items():
|
|
58
|
+
try:
|
|
59
|
+
# 3-tuple: (model, operation_type=prompt_name, prompt_version)
|
|
60
|
+
# backward-compat: 2-tuple key 도 처리
|
|
61
|
+
if len(key) == 3:
|
|
62
|
+
model, op_type, prompt_version = key
|
|
63
|
+
else:
|
|
64
|
+
model, op_type = key
|
|
65
|
+
prompt_version = 'v1'
|
|
66
|
+
|
|
67
|
+
from whatap.pack.tagCountPack import TagCountPack
|
|
68
|
+
from whatap import DateUtil
|
|
69
|
+
from whatap.llm.stats.base_stat import currentpid
|
|
70
|
+
|
|
71
|
+
p = TagCountPack()
|
|
72
|
+
p.time = DateUtil.now() // 5000 * 5000
|
|
73
|
+
p.Category = self._category
|
|
74
|
+
p.tags.putAuto("pid", currentpid)
|
|
75
|
+
p.tags.putAuto("model", model)
|
|
76
|
+
p.tags.putAuto("provider", hosts.get(model, ''))
|
|
77
|
+
p.tags.putAuto("operation_type", op_type)
|
|
78
|
+
p.tags.putAuto("url", urls.get(model, ''))
|
|
79
|
+
p.tags.putAuto("prompt_version", prompt_version)
|
|
80
|
+
|
|
81
|
+
p.fields.putAuto("count", count)
|
|
82
|
+
|
|
83
|
+
self.send_pack(p)
|
|
84
|
+
except Exception:
|
|
85
|
+
import traceback
|
|
86
|
+
traceback.print_exc()
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""llm_eval_answer_relevance 카테고리 — Answer Relevance 점수 히스토그램.
|
|
2
|
+
|
|
3
|
+
11-bucket 분포 (value0~value10). 자세한 동작은 ``eval_stat.ScoreHistogramStat``
|
|
4
|
+
docstring 참고.
|
|
5
|
+
"""
|
|
6
|
+
from whatap.llm.stats.eval_stat import ScoreHistogramStat
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AnswerRelevanceEvalStat(ScoreHistogramStat):
|
|
10
|
+
_category = 'llm_eval_answer_relevance'
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
3
|
+
from whatap.llm.stats.base_stat import BaseStat
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ApiStatusStat(BaseStat):
|
|
7
|
+
_category = "llm_api_status"
|
|
8
|
+
|
|
9
|
+
def _empty_stats(self):
|
|
10
|
+
return {
|
|
11
|
+
'4xx': defaultdict(int),
|
|
12
|
+
'5xx': defaultdict(int),
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
def _get_keys(self, stats):
|
|
16
|
+
return set(stats['4xx'].keys()) | set(stats['5xx'].keys())
|
|
17
|
+
|
|
18
|
+
def update_stats(self, model_name, provider, operation_type, status_code, url='',
|
|
19
|
+
prompt_version='v1'):
|
|
20
|
+
key = (model_name or 'unknown', provider or '', operation_type or 'unknown',
|
|
21
|
+
url or '', prompt_version or 'v1')
|
|
22
|
+
with self._lock:
|
|
23
|
+
if 400 <= status_code < 500:
|
|
24
|
+
self._stats['4xx'][key] += 1
|
|
25
|
+
elif 500 <= status_code < 600:
|
|
26
|
+
self._stats['5xx'][key] += 1
|
|
27
|
+
|
|
28
|
+
def update_from_pack(self, pack):
|
|
29
|
+
# 외부 호출자가 update_stats 를 직접 부르므로 update_from_pack 은 그쪽으로 prompt_version 전파
|
|
30
|
+
# 가 안 되는 케이스 대비 — 현재는 별도 쓰임 없음
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
def _build_fields(self, pack, stats, key):
|
|
34
|
+
pack.fields.putAuto("4xx_total_count", stats['4xx'][key])
|
|
35
|
+
pack.fields.putAuto("5xx_total_count", stats['5xx'][key])
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import threading
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
|
|
5
|
+
from whatap.counter.tasks.base_task import BaseTask
|
|
6
|
+
from whatap.pack.tagCountPack import TagCountPack
|
|
7
|
+
from whatap import DateUtil
|
|
8
|
+
|
|
9
|
+
currentpid = os.getpid()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BaseStat(BaseTask):
|
|
13
|
+
_llm_task = True
|
|
14
|
+
_category = None
|
|
15
|
+
_conf_enabled_key = 'llm_enabled'
|
|
16
|
+
|
|
17
|
+
# 리스트형 메트릭 (!rectype=2 + newList 사용) 은 True 로 override.
|
|
18
|
+
# True 면 5초 윈도우당 단일 TagCountPack 만 만들고, 키 차원 (model/provider/
|
|
19
|
+
# operation_type/url/prompt_version) 을 태그가 아닌 row 단위 평행 배열 필드로
|
|
20
|
+
# 평탄화한다. 서브클래스가 ``_build_fields_listed(pack, stats, keys)`` 구현.
|
|
21
|
+
_is_listed = False
|
|
22
|
+
|
|
23
|
+
def __init__(self):
|
|
24
|
+
self._lock = threading.Lock()
|
|
25
|
+
self._stats = self._empty_stats()
|
|
26
|
+
|
|
27
|
+
def _empty_stats(self):
|
|
28
|
+
raise NotImplementedError
|
|
29
|
+
|
|
30
|
+
def interval(self):
|
|
31
|
+
from whatap.conf.configure import Configure as conf
|
|
32
|
+
return 5
|
|
33
|
+
|
|
34
|
+
def process(self):
|
|
35
|
+
from whatap.conf.configure import Configure as conf
|
|
36
|
+
if not getattr(conf, self._conf_enabled_key, False):
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
with self._lock:
|
|
40
|
+
stats = self._stats
|
|
41
|
+
self._stats = self._empty_stats()
|
|
42
|
+
|
|
43
|
+
keys = list(self._get_keys(stats))
|
|
44
|
+
if not keys:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
# 리스트형: 단일 pack + 모든 키 차원을 row 단위 평행 배열로 평탄화.
|
|
48
|
+
if self._is_listed:
|
|
49
|
+
try:
|
|
50
|
+
p = TagCountPack()
|
|
51
|
+
p.time = DateUtil.now() // 5000 * 5000
|
|
52
|
+
p.Category = self._category
|
|
53
|
+
p.tags.putAuto("!rectype", 2)
|
|
54
|
+
self._build_fields_listed(p, stats, keys, currentpid)
|
|
55
|
+
self.send_pack(p)
|
|
56
|
+
except Exception:
|
|
57
|
+
import traceback
|
|
58
|
+
traceback.print_exc()
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
# 일반(스칼라) 메트릭: 키별 pack + 키 차원은 태그.
|
|
62
|
+
for key in keys:
|
|
63
|
+
# 5-tuple: (model, provider, op_type=prompt_name, url, prompt_version)
|
|
64
|
+
try:
|
|
65
|
+
model, provider, op_type, url, prompt_version = key
|
|
66
|
+
except ValueError:
|
|
67
|
+
# backward-compat: 4-tuple 키도 처리
|
|
68
|
+
model, provider, op_type, url = key
|
|
69
|
+
prompt_version = 'v1'
|
|
70
|
+
try:
|
|
71
|
+
p = TagCountPack()
|
|
72
|
+
p.time = DateUtil.now() // 5000 * 5000
|
|
73
|
+
p.Category = self._category
|
|
74
|
+
p.tags.putAuto("pid", currentpid)
|
|
75
|
+
p.tags.putAuto("model", model)
|
|
76
|
+
p.tags.putAuto("provider", provider)
|
|
77
|
+
p.tags.putAuto("operation_type", op_type)
|
|
78
|
+
p.tags.putAuto("url", url)
|
|
79
|
+
p.tags.putAuto("prompt_version", prompt_version)
|
|
80
|
+
|
|
81
|
+
self._build_fields(p, stats, key)
|
|
82
|
+
self.send_pack(p)
|
|
83
|
+
|
|
84
|
+
except Exception:
|
|
85
|
+
import traceback
|
|
86
|
+
traceback.print_exc()
|
|
87
|
+
|
|
88
|
+
def _get_keys(self, stats):
|
|
89
|
+
raise NotImplementedError
|
|
90
|
+
|
|
91
|
+
def _build_fields(self, pack, stats, key):
|
|
92
|
+
"""일반(스칼라) 메트릭의 필드 빌드. ``_is_listed=False`` 인 경우만 호출."""
|
|
93
|
+
raise NotImplementedError
|
|
94
|
+
|
|
95
|
+
def _build_fields_listed(self, pack, stats, keys, pid):
|
|
96
|
+
"""리스트형 메트릭의 필드 빌드. ``_is_listed=True`` 인 경우만 호출.
|
|
97
|
+
|
|
98
|
+
``pack`` 한 개에 ``keys`` 전체의 데이터를 평행 배열 column 으로 채워야
|
|
99
|
+
한다. 키 차원 6종 (pid/model/provider/operation_type/url/prompt_version)
|
|
100
|
+
은 모두 row 단위 평행 배열로 펼치고, key-level scalar metric (call_count
|
|
101
|
+
등) 은 같은 key 의 첫 row 에만 값을 적고 나머지는 0 으로 채워 합산
|
|
102
|
+
정확성을 보장한다.
|
|
103
|
+
"""
|
|
104
|
+
raise NotImplementedError
|
|
105
|
+
|
|
106
|
+
def update_from_pack(self, pack):
|
|
107
|
+
pass
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""llm_eval_combined_judge 카테고리 — Combined Judge 종합 위험도 히스토그램.
|
|
2
|
+
|
|
3
|
+
CombinedJudgeEvaluator 가 hallucination/answer_relevance/toxicity 의 worst-aspect
|
|
4
|
+
risk 를 primary 로 송출. 11-bucket 분포 (value0~value10). 자세한 동작은
|
|
5
|
+
``eval_stat.ScoreHistogramStat`` docstring 참고.
|
|
6
|
+
"""
|
|
7
|
+
from whatap.llm.stats.eval_stat import ScoreHistogramStat
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CombinedJudgeEvalStat(ScoreHistogramStat):
|
|
11
|
+
_category = 'llm_eval_combined_judge'
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
|
|
3
|
+
from whatap.llm.stats.base_stat import BaseStat
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ErrorStat(BaseStat):
|
|
7
|
+
_category = "llm_error_stat"
|
|
8
|
+
|
|
9
|
+
def _empty_stats(self):
|
|
10
|
+
return {
|
|
11
|
+
'api_error_count': defaultdict(int),
|
|
12
|
+
'program_error_count': defaultdict(int),
|
|
13
|
+
'last_api_error_count': defaultdict(int),
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
def _get_keys(self, stats):
|
|
17
|
+
return (set(stats['api_error_count'].keys())
|
|
18
|
+
| set(stats['program_error_count'].keys())
|
|
19
|
+
| set(stats['last_api_error_count'].keys()))
|
|
20
|
+
|
|
21
|
+
def update_stats(self, model_name, provider, operation_type, url='',
|
|
22
|
+
error_type='', prompt_version='v1'):
|
|
23
|
+
key = (model_name or 'unknown', provider or '', operation_type or 'unknown',
|
|
24
|
+
url or '', prompt_version or 'v1')
|
|
25
|
+
with self._lock:
|
|
26
|
+
if error_type == 'api_error':
|
|
27
|
+
self._stats['api_error_count'][key] += 1
|
|
28
|
+
elif error_type == 'program_error':
|
|
29
|
+
self._stats['program_error_count'][key] += 1
|
|
30
|
+
|
|
31
|
+
def update_last_error(self, model_name, provider, operation_type, url='',
|
|
32
|
+
error_type='', prompt_version='v1'):
|
|
33
|
+
if error_type != 'api_error':
|
|
34
|
+
return
|
|
35
|
+
key = (model_name or 'unknown', provider or '', operation_type or 'unknown',
|
|
36
|
+
url or '', prompt_version or 'v1')
|
|
37
|
+
with self._lock:
|
|
38
|
+
self._stats['last_api_error_count'][key] += 1
|
|
39
|
+
|
|
40
|
+
def update_from_pack(self, pack):
|
|
41
|
+
if pack.success:
|
|
42
|
+
return
|
|
43
|
+
self.update_stats(
|
|
44
|
+
pack.model or 'unknown',
|
|
45
|
+
pack.provider or '',
|
|
46
|
+
pack.operation_type or 'unknown',
|
|
47
|
+
url=pack.url or '',
|
|
48
|
+
error_type=pack.error_type or '',
|
|
49
|
+
prompt_version=getattr(pack, 'prompt_version', 'v1') or 'v1',
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
def _build_fields(self, pack, stats, key):
|
|
53
|
+
api_err = stats['api_error_count'][key]
|
|
54
|
+
prog_err = stats['program_error_count'][key]
|
|
55
|
+
|
|
56
|
+
pack.fields.putAuto("error_count", api_err + prog_err)
|
|
57
|
+
pack.fields.putAuto("api_error_count", api_err)
|
|
58
|
+
pack.fields.putAuto("program_error_count", prog_err)
|
|
59
|
+
pack.fields.putAuto("last_api_error_count", stats['last_api_error_count'][key])
|