PyPI - whatap-python - Versions diffs - 2.1.0__py3-none-any.whl - Mend

whatap-python 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (227) hide show

whatap/LICENSE +0 -0
whatap/README.rst +49 -0
whatap/__init__.py +923 -0
whatap/__main__.py +4 -0
whatap/agent/darwin/amd64/whatap_python +0 -0
whatap/agent/darwin/arm64/whatap_python +0 -0
whatap/agent/linux/amd64/whatap_python +0 -0
whatap/agent/linux/arm64/whatap_python +0 -0
whatap/agent/windows/whatap_python.exe +0 -0
whatap/bootstrap/__init__.py +0 -0
whatap/bootstrap/sitecustomize.py +19 -0
whatap/build.py +4 -0
whatap/conf/__init__.py +0 -0
whatap/conf/configuration.py +280 -0
whatap/conf/configure.py +105 -0
whatap/conf/license.py +49 -0
whatap/control/__init__.py +0 -0
whatap/counter/__init__.py +14 -0
whatap/counter/counter_manager.py +45 -0
whatap/counter/tasks/__init__.py +3 -0
whatap/counter/tasks/base_task.py +26 -0
whatap/counter/tasks/llm_evaluator_task.py +501 -0
whatap/counter/tasks/llm_log_sink_task.py +309 -0
whatap/counter/tasks/llm_stat_task.py +78 -0
whatap/counter/tasks/openfiledescriptor.py +67 -0
whatap/io/__init__.py +1 -0
whatap/io/data_inputx.py +161 -0
whatap/io/data_outputx.py +262 -0
whatap/llm/__init__.py +17 -0
whatap/llm/definitions.py +43 -0
whatap/llm/evaluators/__init__.py +136 -0
whatap/llm/evaluators/base.py +114 -0
whatap/llm/evaluators/builtins/__init__.py +91 -0
whatap/llm/evaluators/builtins/answer_relevance.py +46 -0
whatap/llm/evaluators/builtins/combined_judge.py +271 -0
whatap/llm/evaluators/builtins/factuality.py +71 -0
whatap/llm/evaluators/builtins/hallucination.py +97 -0
whatap/llm/evaluators/builtins/llm_judge.py +516 -0
whatap/llm/evaluators/builtins/pii_leak.py +214 -0
whatap/llm/evaluators/builtins/prompt_injection.py +71 -0
whatap/llm/evaluators/builtins/toxicity.py +53 -0
whatap/llm/evaluators/builtins/url_scan.py +194 -0
whatap/llm/evaluators/registry.py +192 -0
whatap/llm/evaluators/sampler.py +83 -0
whatap/llm/evaluators/scope.py +334 -0
whatap/llm/features.py +66 -0
whatap/llm/log_sink_packs/__init__.py +9 -0
whatap/llm/log_sink_packs/llm_input_message.py +16 -0
whatap/llm/log_sink_packs/llm_log_sink_pack.py +72 -0
whatap/llm/log_sink_packs/llm_output_message.py +19 -0
whatap/llm/log_sink_packs/llm_step_eval_status.py +94 -0
whatap/llm/log_sink_packs/llm_step_status.py +118 -0
whatap/llm/log_sink_packs/llm_system_message.py +16 -0
whatap/llm/log_sink_packs/llm_tool_calls.py +44 -0
whatap/llm/log_sink_packs/llm_tool_results.py +16 -0
whatap/llm/log_sink_packs/llm_tx_status.py +108 -0
whatap/llm/pricing.py +236 -0
whatap/llm/prompt_meta.py +288 -0
whatap/llm/providers/__init__.py +0 -0
whatap/llm/providers/anthropic/__init__.py +37 -0
whatap/llm/providers/anthropic/messages/__init__.py +0 -0
whatap/llm/providers/anthropic/messages/messages.py +70 -0
whatap/llm/providers/anthropic/messages/messages_context.py +76 -0
whatap/llm/providers/anthropic/messages/messages_extractor.py +126 -0
whatap/llm/providers/interceptor.py +182 -0
whatap/llm/providers/openai/__init__.py +133 -0
whatap/llm/providers/openai/chat/__init__.py +0 -0
whatap/llm/providers/openai/chat/chat.py +82 -0
whatap/llm/providers/openai/chat/chat_context.py +78 -0
whatap/llm/providers/openai/chat/chat_extractor.py +127 -0
whatap/llm/providers/openai/completions/__init__.py +0 -0
whatap/llm/providers/openai/completions/completions.py +70 -0
whatap/llm/providers/openai/completions/completions_context.py +31 -0
whatap/llm/providers/openai/completions/completions_extractor.py +61 -0
whatap/llm/providers/openai/content_parser.py +41 -0
whatap/llm/providers/openai/embeddings/__init__.py +0 -0
whatap/llm/providers/openai/embeddings/embeddings.py +59 -0
whatap/llm/providers/openai/embeddings/embeddings_context.py +25 -0
whatap/llm/providers/openai/embeddings/embeddings_extractor.py +26 -0
whatap/llm/providers/openai/responses/__init__.py +0 -0
whatap/llm/providers/openai/responses/responses.py +70 -0
whatap/llm/providers/openai/responses/responses_context.py +88 -0
whatap/llm/providers/openai/responses/responses_extractor.py +126 -0
whatap/llm/providers/stream_accumulator.py +73 -0
whatap/llm/stats/__init__.py +35 -0
whatap/llm/stats/active_stat.py +86 -0
whatap/llm/stats/answer_relevance_eval_stat.py +10 -0
whatap/llm/stats/api_status_stat.py +35 -0
whatap/llm/stats/base_stat.py +107 -0
whatap/llm/stats/combined_judge_eval_stat.py +11 -0
whatap/llm/stats/error_stat.py +59 -0
whatap/llm/stats/eval_stat.py +225 -0
whatap/llm/stats/factuality_eval_stat.py +10 -0
whatap/llm/stats/feature_stat.py +104 -0
whatap/llm/stats/finish_stat.py +105 -0
whatap/llm/stats/hallucination_eval_stat.py +10 -0
whatap/llm/stats/meter.py +18 -0
whatap/llm/stats/perf_stat.py +117 -0
whatap/llm/stats/pii_leak_eval_stat.py +12 -0
whatap/llm/stats/prompt_injection_eval_stat.py +10 -0
whatap/llm/stats/token_usage_stat.py +133 -0
whatap/llm/stats/toxicity_eval_stat.py +10 -0
whatap/llm/stats/url_scan_eval_stat.py +12 -0
whatap/net/__init__.py +0 -0
whatap/net/async_sender.py +107 -0
whatap/net/packet_enum.py +44 -0
whatap/net/packet_type_enum.py +31 -0
whatap/net/param_def.py +69 -0
whatap/net/stackhelper.py +87 -0
whatap/net/udp_session.py +394 -0
whatap/net/udp_thread.py +54 -0
whatap/pack/__init__.py +0 -0
whatap/pack/logSinkPack.py +77 -0
whatap/pack/pack.py +34 -0
whatap/pack/pack_enum.py +41 -0
whatap/pack/tagCountPack.py +61 -0
whatap/scripts/__init__.py +208 -0
whatap/trace/__init__.py +12 -0
whatap/trace/mod/__init__.py +0 -0
whatap/trace/mod/amqp/__init__.py +0 -0
whatap/trace/mod/amqp/kombu.py +122 -0
whatap/trace/mod/amqp/pika.py +62 -0
whatap/trace/mod/application/__init__.py +0 -0
whatap/trace/mod/application/bottle.py +34 -0
whatap/trace/mod/application/celery.py +81 -0
whatap/trace/mod/application/cherrypy.py +30 -0
whatap/trace/mod/application/django.py +287 -0
whatap/trace/mod/application/django_asgi.py +266 -0
whatap/trace/mod/application/django_py3.py +251 -0
whatap/trace/mod/application/fastapi/__init__.py +31 -0
whatap/trace/mod/application/fastapi/endpoint.py +73 -0
whatap/trace/mod/application/fastapi/exception_log.py +63 -0
whatap/trace/mod/application/fastapi/instrumentation.py +204 -0
whatap/trace/mod/application/fastapi/scope.py +115 -0
whatap/trace/mod/application/fastapi/transaction.py +67 -0
whatap/trace/mod/application/flask.py +52 -0
whatap/trace/mod/application/frappe.py +224 -0
whatap/trace/mod/application/graphql.py +170 -0
whatap/trace/mod/application/nameko.py +39 -0
whatap/trace/mod/application/odoo.py +63 -0
whatap/trace/mod/application/starlette.py +126 -0
whatap/trace/mod/application/tornado.py +163 -0
whatap/trace/mod/application/wsgi.py +195 -0
whatap/trace/mod/database/__init__.py +0 -0
whatap/trace/mod/database/cxoracle.py +49 -0
whatap/trace/mod/database/mongo.py +169 -0
whatap/trace/mod/database/mysql.py +80 -0
whatap/trace/mod/database/neo4j.py +90 -0
whatap/trace/mod/database/psycopg2.py +45 -0
whatap/trace/mod/database/psycopg3.py +359 -0
whatap/trace/mod/database/redis.py +122 -0
whatap/trace/mod/database/sqlalchemy.py +213 -0
whatap/trace/mod/database/sqlite3.py +130 -0
whatap/trace/mod/database/util.py +630 -0
whatap/trace/mod/email/__init__.py +0 -0
whatap/trace/mod/email/smtp.py +78 -0
whatap/trace/mod/httpc/__init__.py +0 -0
whatap/trace/mod/httpc/django.py +31 -0
whatap/trace/mod/httpc/httplib.py +70 -0
whatap/trace/mod/httpc/httpx.py +62 -0
whatap/trace/mod/httpc/requests.py +20 -0
whatap/trace/mod/httpc/urllib3.py +27 -0
whatap/trace/mod/httpc/util.py +388 -0
whatap/trace/mod/logging.py +161 -0
whatap/trace/mod/plugin.py +84 -0
whatap/trace/mod/standalone/__init__.py +0 -0
whatap/trace/mod/standalone/multiple.py +293 -0
whatap/trace/mod/standalone/single.py +135 -0
whatap/trace/simple_trace_context.py +18 -0
whatap/trace/trace_context.py +212 -0
whatap/trace/trace_context_manager.py +244 -0
whatap/trace/trace_error.py +84 -0
whatap/trace/trace_handler.py +89 -0
whatap/trace/trace_import.py +91 -0
whatap/trace/trace_module_definition.py +156 -0
whatap/util/__init__.py +0 -0
whatap/util/bit_util.py +49 -0
whatap/util/cardinality/__init__.py +0 -0
whatap/util/cardinality/hyperloglog.py +84 -0
whatap/util/cardinality/murmurhash.py +20 -0
whatap/util/cardinality/registerset.py +60 -0
whatap/util/compare_util.py +19 -0
whatap/util/date_util.py +55 -0
whatap/util/debug_util.py +73 -0
whatap/util/escape_literal_sql.py +233 -0
whatap/util/frame_util.py +20 -0
whatap/util/hash_util.py +103 -0
whatap/util/hexa32.py +66 -0
whatap/util/int_set.py +199 -0
whatap/util/ip_util.py +63 -0
whatap/util/keygen.py +11 -0
whatap/util/linked_list.py +113 -0
whatap/util/linked_map.py +359 -0
whatap/util/metering_util.py +103 -0
whatap/util/request_double_queue.py +68 -0
whatap/util/request_queue.py +60 -0
whatap/util/string_util.py +20 -0
whatap/util/throttle_util.py +99 -0
whatap/util/userid_util.py +134 -0
whatap/value/__init__.py +1 -0
whatap/value/blob_value.py +38 -0
whatap/value/boolean_value.py +33 -0
whatap/value/decimal_value.py +36 -0
whatap/value/double_summary.py +86 -0
whatap/value/double_value.py +33 -0
whatap/value/float_array.py +42 -0
whatap/value/float_value.py +34 -0
whatap/value/int_array.py +42 -0
whatap/value/ip4_value.py +50 -0
whatap/value/list_value.py +105 -0
whatap/value/long_array.py +44 -0
whatap/value/long_summary.py +83 -0
whatap/value/map_value.py +154 -0
whatap/value/null_value.py +21 -0
whatap/value/number_value.py +33 -0
whatap/value/summary_value.py +39 -0
whatap/value/text_array.py +58 -0
whatap/value/text_hash_value.py +37 -0
whatap/value/text_value.py +43 -0
whatap/value/value.py +26 -0
whatap/value/value_enum.py +80 -0
whatap/whatap.conf +14 -0
whatap_python-2.1.0.dist-info/METADATA +87 -0
whatap_python-2.1.0.dist-info/RECORD +227 -0
whatap_python-2.1.0.dist-info/WHEEL +5 -0
whatap_python-2.1.0.dist-info/entry_points.txt +6 -0
whatap_python-2.1.0.dist-info/top_level.txt +1 -0

whatap/llm/stats/eval_stat.py ADDED Viewed

@@ -0,0 +1,225 @@
+"""llm_eval_stat 카테고리 — LLM 평가 호출 통계 + 평가 점수 stat 공통 베이스.
+이 모듈은 LLM 평가 메트릭의 메인 진입점:
+  EvalStat               — llm_eval_stat 카테고리 (호출 통계)
+  ScoreHistogramStat     — 4종 점수 stat (hallucination / answer_relevance /
+                           toxicity / combined_judge) 의 공통 베이스
+  update_eval_metrics()  — evaluator 1회 실행 후 5 stat 일괄 갱신 헬퍼
+Tags (모든 카테고리 공통):
+  pid / model / provider / operation_type / url / prompt_version
+Fields:
+  llm_eval_stat:
+    call_count     — judge LLM API 호출 횟수 (judge_fn 실제 호출만)
+    failures       — 평가 실패 횟수 (judge HTTP 에러 / parse 에러)
+    latency_sum    — judge 호출 소요 시간 합 (ms)
+    latency_sketch — KLL sketch (datasketches 설치 시) — p50/p95/p99 분위수용
+  llm_eval_hallucination / llm_eval_answer_relevance /
+  llm_eval_toxicity      / llm_eval_combined_judge /
+  llm_eval_prompt_injection / llm_eval_factuality /
+  llm_eval_pii_leak       / llm_eval_url_scan:
+    value0 ~ value10 — 11-bucket 점수 카운트.
+      value0  : 0.0 ≤ s < 0.1
+      value1  : 0.1 ≤ s < 0.2
+      ...
+      value9  : 0.9 ≤ s < 1.0
+      value10 : s == 1.0           (만점만 별도)
+"""
+from collections import defaultdict
+from whatap.llm.stats.base_stat import BaseStat
+try:
+    from datasketches import kll_doubles_sketch
+    HAS_DATASKETCHES = True
+except ImportError:
+    HAS_DATASKETCHES = False
+_SKETCH_K = 200
+# ─────────────────────────────────────────────────────────────────────────
+# llm_eval_stat — 평가 호출 통계 (call_count / failures / latency)
+# ─────────────────────────────────────────────────────────────────────────
+class EvalStat(BaseStat):
+    """평가 호출 통계. 점수 분포는 별도 ScoreHistogramStat 서브클래스가 처리."""
+    _category = 'llm_eval_stat'
+    _conf_enabled_key = 'llm_eval_enabled'
+    def _use_sketch(self):
+        from whatap.conf.configure import Configure as conf
+        return (HAS_DATASKETCHES
+                and getattr(conf, 'llm_perf_sketch_enabled', True))
+    def _get_sketch_k(self):
+        from whatap.conf.configure import Configure as conf
+        return int(getattr(conf, 'llm_perf_sketch_k', _SKETCH_K))
+    def _empty_stats(self):
+        stats = {
+            'call_count': defaultdict(int),
+            'failures': defaultdict(int),
+            'latency_sum': defaultdict(float),
+        }
+        if self._use_sketch():
+            k = self._get_sketch_k()
+            stats['latency_sketch'] = defaultdict(lambda: kll_doubles_sketch(k))
+        return stats
+    def _get_keys(self, stats):
+        return stats['call_count'].keys()
+    def update_call(self, model, provider, operation_type, url, prompt_version,
+                    called_judge, success, latency_ms):
+        """평가 호출 완료 직후 갱신. ``called_judge=False`` 면 카운트 안 함
+        (judge_fn 자체가 없었던 케이스)."""
+        if not called_judge:
+            return
+        key = (model or 'unknown', provider or '', operation_type or 'default',
+               url or '', prompt_version or 'v1')
+        with self._lock:
+            self._stats['call_count'][key] += 1
+            if not success:
+                self._stats['failures'][key] += 1
+            try:
+                lat = float(latency_ms) if latency_ms is not None else 0.0
+            except (TypeError, ValueError):
+                lat = 0.0
+            if lat > 0:
+                self._stats['latency_sum'][key] += lat
+                if 'latency_sketch' in self._stats:
+                    self._stats['latency_sketch'][key].update(lat)
+    def _build_fields(self, pack, stats, key):
+        pack.fields.putAuto('call_count', stats['call_count'][key])
+        pack.fields.putAuto('failures', stats['failures'][key])
+        pack.fields.putAuto('latency_sum', stats['latency_sum'][key])
+        if 'latency_sketch' in stats:
+            from whatap.value.blob_value import BlobValue
+            sketch = stats['latency_sketch'].get(key)
+            if sketch is not None and not sketch.is_empty():
+                pack.fields.putValue('latency_sketch', BlobValue(sketch.serialize()))
+# ─────────────────────────────────────────────────────────────────────────
+# 점수 히스토그램 공통 베이스 — 4 evaluator label stat 이 _category 만 지정해 상속
+# ─────────────────────────────────────────────────────────────────────────
+_BUCKET_COUNT = 11   # value0 ~ value10
+def _bucket_idx(score):
+    """0.0 ~ 1.0 score 를 11-bucket index (0~10) 로 변환.
+    bucket 정의:
+      value0  : 0.0 ≤ s < 0.1
+      value1  : 0.1 ≤ s < 0.2
+      ...
+      value9  : 0.9 ≤ s < 1.0
+      value10 : s == 1.0           (만점만 별도 bucket)
+    범위 밖이면 클램프 (음수 → 0, 1.0 초과 → 10), 변환 실패 시 0 (방어).
+    """
+    try:
+        s = float(score)
+    except (TypeError, ValueError):
+        return 0
+    if s <= 0.0:
+        return 0
+    if s >= 1.0:
+        return 10
+    idx = int(s * 10)
+    if idx > 10:
+        idx = 10
+    return idx
+class ScoreHistogramStat(BaseStat):
+    """평가 라벨별 점수 히스토그램 베이스. 서브클래스가 ``_category`` 지정.
+    실제 카테고리 stat 은 hallucination_eval_stat / answer_relevance_eval_stat /
+    toxicity_eval_stat / combined_judge_eval_stat / prompt_injection_eval_stat /
+    factuality_eval_stat / pii_leak_eval_stat / url_scan_eval_stat 모듈 참고.
+    Fields: value0 ~ value10 (11 bucket — 자세한 구분은 ``_bucket_idx`` 참고).
+    """
+    _conf_enabled_key = 'llm_eval_enabled'
+    def _empty_stats(self):
+        # key → [int] * 11
+        return defaultdict(lambda: [0] * _BUCKET_COUNT)
+    def _get_keys(self, stats):
+        return stats.keys()
+    def update_score(self, model, provider, operation_type, url, prompt_version, score):
+        if score is None:
+            return
+        key = (model or 'unknown', provider or '', operation_type or 'default',
+               url or '', prompt_version or 'v1')
+        idx = _bucket_idx(score)
+        with self._lock:
+            self._stats[key][idx] += 1
+    def _build_fields(self, pack, stats, key):
+        buckets = stats.get(key) or [0] * _BUCKET_COUNT
+        for i, v in enumerate(buckets):
+            pack.fields.putAuto('value%d' % i, v)
+# ─────────────────────────────────────────────────────────────────────────
+# 라우팅 헬퍼 — evaluator 1회 실행 직후 5 stat 일괄 갱신
+# ─────────────────────────────────────────────────────────────────────────
+# evaluator label → score-stat 클래스명 매핑.
+# LlmEvaluatorTask._run_one 이 score 별로 적절한 stat 으로 라우팅.
+_LABEL_TO_STAT_NAME = {
+    'hallucination': 'HallucinationEvalStat',
+    'answer_relevance': 'AnswerRelevanceEvalStat',
+    'toxicity': 'ToxicityEvalStat',
+    'prompt_injection': 'PromptInjectionEvalStat',
+    'factuality': 'FactualityEvalStat',
+    'pii_leak': 'PIILeakEvalStat',
+    'url_scan': 'URLScanEvalStat',
+    'combined_judge': 'CombinedJudgeEvalStat',
+}
+def update_eval_metrics(model, provider, operation_type, url, prompt_version,
+                        called_judge, success, latency_ms, scores=None):
+    """평가 1회 실행 후 호출 통계 + 점수 히스토그램 4종 일괄 갱신.
+    ``LlmEvaluatorTask._run_one`` 이 evaluator.evaluate() 끝난 직후 호출.
+    """
+    from whatap.counter.tasks.llm_stat_task import LlmStatTask
+    # 1. EvalStat — 호출 통계
+    eval_stat = LlmStatTask.get_stat('EvalStat')
+    if eval_stat is not None:
+        eval_stat.update_call(
+            model=model, provider=provider, operation_type=operation_type,
+            url=url, prompt_version=prompt_version,
+            called_judge=called_judge, success=success, latency_ms=latency_ms,
+        )
+    # 2. 점수별 ScoreHistogramStat — label 매핑되는 것만 라우팅
+    if scores:
+        for label, score in scores.items():
+            stat_name = _LABEL_TO_STAT_NAME.get(label)
+            if stat_name is None:
+                continue
+            stat = LlmStatTask.get_stat(stat_name)
+            if stat is None:
+                continue
+            stat.update_score(
+                model=model, provider=provider, operation_type=operation_type,
+                url=url, prompt_version=prompt_version, score=score,
+            )

whatap/llm/stats/factuality_eval_stat.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""llm_eval_factuality 카테고리 — Factuality 평가 점수 히스토그램.
+11-bucket 분포 (value0~value10). 자세한 동작은 ``eval_stat.ScoreHistogramStat``
+docstring 참고.
+"""
+from whatap.llm.stats.eval_stat import ScoreHistogramStat
+class FactualityEvalStat(ScoreHistogramStat):
+    _category = 'llm_eval_factuality'

whatap/llm/stats/feature_stat.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""llm_feature_stat 카테고리 — LLM 호출 feature 분포.
+pack.features (csv) 의 raw 값을 그대로 누적. closed-set 검증 안 함 —
+logsink 와 metric 이 동일한 데이터를 다룬다는 원칙. feature 이름은 list field
+(@id / features / features_count) 안의 entry 이지 dimension (tag) 이 아니므로
+새 값이 들어와도 row cardinality 폭주는 없음.
+feature 는 호출당 multi-valued (vision,tool_use,... / 0개 가능) 라 finish_stat 과
+달리 sum(features_count) != 호출 수다. features_count 는 "그 feature 를 쓴 호출
+수" 이고, 전체 호출 수(adoption rate 분모) 는 동일 5-tuple 키의 llm_perf_stat.
+call_count 를 쓴다. feature 가 없는 호출은 어떤 feature 에도 안 잡히므로 (none
+버킷 없음), feature 를 하나도 안 쓴 key 는 이 메트릭에 등장하지 않는다.
+Tags : pid / model / provider / operation_type / url / prompt_version  (+ !rectype=2)
+Fields:
+  @id              — (model, provider, op_type, prompt_version, feature) 해시 list
+  features         — feature 이름 list (raw value 그대로)
+  features_count   — feature 별 카운트 list
+"""
+from collections import defaultdict
+from whatap.llm.stats.base_stat import BaseStat
+from whatap.util.hash_util import HashUtil
+class FeatureStat(BaseStat):
+    _category = "llm_feature_stat"
+    _is_listed = True
+    def _empty_stats(self):
+        return {
+            # key → {feature_name: count}
+            'features': defaultdict(lambda: defaultdict(int)),
+        }
+    def _get_keys(self, stats):
+        return stats['features'].keys()
+    def update_stats(self, model_name, host, operation_type, url='',
+                     features='', prompt_version='v1'):
+        if not features:
+            return
+        key = (model_name or 'unknown', host or '', operation_type or 'default',
+               url or '', prompt_version or 'v1')
+        with self._lock:
+            for feat in features.split(','):
+                feat = feat.strip()
+                if feat:
+                    self._stats['features'][key][feat] += 1
+    def update_from_pack(self, pack):
+        self.update_stats(
+            pack.model or 'unknown',
+            pack.provider or '',
+            pack.operation_type or 'default',
+            url=pack.url or '',
+            features=pack.features or '',
+            prompt_version=getattr(pack, 'prompt_version', 'v1') or 'v1',
+        )
+    def _build_fields_listed(self, pack, stats, keys, pid):
+        """리스트형 메트릭. 모든 key 의 (feature) row 를 평행 배열로 평탄화.
+        Layout:
+          - 키 차원 (row 단위): pid / model / provider / operation_type / url / prompt_version
+          - feature 차원 (row 단위): features / features_count / @id
+        feature 를 하나도 안 쓴 key 는 update_stats 에서 누적되지 않으므로 여기
+        keys 에 등장하지 않는다. call_count 는 llm_perf_stat 으로 일원화.
+        """
+        id_list = pack.fields.newList("@id")
+        pid_list = pack.fields.newList("pid")
+        model_list = pack.fields.newList("model")
+        provider_list = pack.fields.newList("provider")
+        op_type_list = pack.fields.newList("operation_type")
+        url_list = pack.fields.newList("url")
+        prompt_version_list = pack.fields.newList("prompt_version")
+        features_list = pack.fields.newList("features")
+        features_count_list = pack.fields.newList("features_count")
+        for key in keys:
+            try:
+                model, provider, op_type, url, prompt_version = key
+            except ValueError:
+                model, provider, op_type, url = key
+                prompt_version = 'v1'
+            feat_counts = stats['features'].get(key) or {}
+            # 정렬 — dashboard 디버깅/비교 안정성. set 은 순서 비결정적이므로 강제.
+            for feat in sorted(feat_counts.keys()):
+                id_list.addLong(HashUtil.hashFromString(
+                    "{}:{}:{}:{}:{}".format(model, provider, op_type, prompt_version, feat)))
+                features_list.addString(feat)
+                features_count_list.addLong(feat_counts[feat])
+                pid_list.addLong(pid)
+                model_list.addString(model)
+                provider_list.addString(provider)
+                op_type_list.addString(op_type)
+                url_list.addString(url)
+                prompt_version_list.addString(prompt_version)

whatap/llm/stats/finish_stat.py ADDED Viewed

@@ -0,0 +1,105 @@
+"""llm_finish_stat 카테고리 — LLM 호출 finish_reason 분포.
+pack.finish_reason 의 raw 값을 그대로 누적. closed-set 검증 안 함 — logsink 와
+metric 이 동일한 데이터를 다룬다는 원칙. reason 이름은 list field 안의 entry
+이지 dimension (tag) 이 아니므로 cardinality 폭주 없음.
+Provider 별로 들어오는 raw 값 예:
+  OpenAI Chat/Completions    : stop / length / tool_calls / function_call / content_filter
+  OpenAI Responses (status)  : completed / failed / incomplete / cancelled / in_progress
+  Anthropic Messages         : end_turn / max_tokens / stop_sequence / tool_use
+finish_reason 이 비는 호출(에러/중단/미완료 등 raw 값이 None/빈문자열) 은 "none"
+버킷으로 누적한다. 따라서 finish_reasons_count 의 합 == 해당 key 의 전체 호출 수
+이고, 배열 한 칸 = 그 finish_reason 으로 끝난 실제 호출 수로 유의미하다.
+별도 call_count 필드는 두지 않는다(llm_perf_stat 으로 일원화).
+Tags : pid / model / provider / operation_type / url / prompt_version  (+ !rectype=2)
+Fields:
+  @id                  — (model, provider, op_type, prompt_version, reason) 해시 list
+  finish_reasons       — reason 이름 list (raw value 그대로, 빈 값은 "none")
+  finish_reasons_count — reason 별 카운트 list
+"""
+from collections import defaultdict
+from whatap.llm.stats.base_stat import BaseStat
+from whatap.util.hash_util import HashUtil
+class FinishStat(BaseStat):
+    _category = "llm_finish_stat"
+    _is_listed = True
+    def _empty_stats(self):
+        return {
+            # key → {finish_reason: count}
+            'reasons': defaultdict(lambda: defaultdict(int)),
+        }
+    def _get_keys(self, stats):
+        return stats['reasons'].keys()
+    def update_stats(self, model_name, host, operation_type, url='',
+                     finish_reason='', prompt_version='v1'):
+        key = (model_name or 'unknown', host or '', operation_type or 'default',
+               url or '', prompt_version or 'v1')
+        reason = str(finish_reason).strip() if finish_reason else ''
+        if not reason:
+            reason = 'none'
+        with self._lock:
+            self._stats['reasons'][key][reason] += 1
+    def update_from_pack(self, pack):
+        self.update_stats(
+            pack.model or 'unknown',
+            pack.provider or '',
+            pack.operation_type or 'default',
+            url=pack.url or '',
+            finish_reason=getattr(pack, 'finish_reason', '') or '',
+            prompt_version=getattr(pack, 'prompt_version', 'v1') or 'v1',
+        )
+    def _build_fields_listed(self, pack, stats, keys, pid):
+        """리스트형 메트릭. 모든 key 의 (finish_reason) row 를 평행 배열로 평탄화.
+        Layout:
+          - 키 차원 (row 단위): pid / model / provider / operation_type / url / prompt_version
+          - reason 차원 (row 단위): finish_reasons / finish_reasons_count / @id
+        빈 finish_reason 은 update_stats 에서 "none" 버킷으로 누적되므로 모든 key 는
+        최소 1개 reason row 를 가지며, finish_reasons_count 의 합 == 호출 수.
+        """
+        id_list = pack.fields.newList("@id")
+        pid_list = pack.fields.newList("pid")
+        model_list = pack.fields.newList("model")
+        provider_list = pack.fields.newList("provider")
+        op_type_list = pack.fields.newList("operation_type")
+        url_list = pack.fields.newList("url")
+        prompt_version_list = pack.fields.newList("prompt_version")
+        reasons_list = pack.fields.newList("finish_reasons")
+        reasons_count_list = pack.fields.newList("finish_reasons_count")
+        for key in keys:
+            try:
+                model, provider, op_type, url, prompt_version = key
+            except ValueError:
+                model, provider, op_type, url = key
+                prompt_version = 'v1'
+            reason_counts = stats['reasons'].get(key) or {}
+            # 정렬 — dashboard 디버깅/비교 안정성
+            for reason in sorted(reason_counts.keys()):
+                id_list.addLong(HashUtil.hashFromString(
+                    "{}:{}:{}:{}:{}".format(model, provider, op_type, prompt_version, reason)))
+                reasons_list.addString(reason)
+                reasons_count_list.addLong(reason_counts[reason])
+                pid_list.addLong(pid)
+                model_list.addString(model)
+                provider_list.addString(provider)
+                op_type_list.addString(op_type)
+                url_list.addString(url)
+                prompt_version_list.addString(prompt_version)

whatap/llm/stats/hallucination_eval_stat.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""llm_eval_hallucination 카테고리 — Hallucination 평가 점수 히스토그램.
+11-bucket 분포 (value0~value10). 자세한 동작은 ``eval_stat.ScoreHistogramStat``
+docstring 참고.
+"""
+from whatap.llm.stats.eval_stat import ScoreHistogramStat
+class HallucinationEvalStat(ScoreHistogramStat):
+    _category = 'llm_eval_hallucination'

whatap/llm/stats/meter.py ADDED Viewed

@@ -0,0 +1,18 @@
+import threading
+class Meter:
+    _lock = threading.Lock()
+    _count = 0
+    @classmethod
+    def increment(cls):
+        with cls._lock:
+            cls._count += 1
+    @classmethod
+    def get_and_reset(cls):
+        with cls._lock:
+            count = cls._count
+            cls._count = 0
+            return count

whatap/llm/stats/perf_stat.py ADDED Viewed

@@ -0,0 +1,117 @@
+from collections import defaultdict
+from whatap.llm.stats.base_stat import BaseStat
+try:
+    from datasketches import kll_doubles_sketch
+    HAS_DATASKETCHES = True
+except ImportError:
+    HAS_DATASKETCHES = False
+_SKETCH_K = 200
+class PerfStat(BaseStat):
+    _category = "llm_perf_stat"
+    def _use_sketch(self):
+        from whatap.conf.configure import Configure as conf
+        return (HAS_DATASKETCHES
+                and getattr(conf, 'llm_perf_sketch_enabled', True))
+    def _get_sketch_k(self):
+        from whatap.conf.configure import Configure as conf
+        return int(getattr(conf, 'llm_perf_sketch_k', _SKETCH_K))
+    def _empty_stats(self):
+        stats = {
+            'call_count': defaultdict(int),
+            'error_count': defaultdict(int),
+            'stream_count': defaultdict(int),
+            'latency_sum': defaultdict(float),
+            'ttft_sum': defaultdict(float),
+            'ttft_count': defaultdict(int),
+            'tpot_sum': defaultdict(float),
+            'tpot_count': defaultdict(int),
+        }
+        if self._use_sketch():
+            k = self._get_sketch_k()
+            stats['latency_sketch'] = defaultdict(lambda: kll_doubles_sketch(k))
+            stats['ttft_sketch'] = defaultdict(lambda: kll_doubles_sketch(k))
+            stats['tpot_sketch'] = defaultdict(lambda: kll_doubles_sketch(k))
+        return stats
+    def _get_keys(self, stats):
+        return stats['call_count'].keys()
+    def update_stats(self, model_name, host, operation_type, url='',
+                     latency=None, ttft=None, tpot=None, prompt_version='v1',
+                     error_count=0, stream=False):
+        key = (model_name or 'unknown', host or '', operation_type or 'default',
+               url or '', prompt_version or 'v1')
+        with self._lock:
+            self._stats['call_count'][key] += 1
+            self._stats['error_count'][key] += error_count
+            if stream:
+                self._stats['stream_count'][key] += 1
+            if latency is not None and latency >= 0:
+                self._stats['latency_sum'][key] += latency
+                if 'latency_sketch' in self._stats:
+                    self._stats['latency_sketch'][key].update(latency)
+            if ttft is not None and ttft >= 0:
+                self._stats['ttft_sum'][key] += ttft
+                self._stats['ttft_count'][key] += 1
+                if 'ttft_sketch' in self._stats:
+                    self._stats['ttft_sketch'][key].update(ttft)
+            if tpot is not None and tpot >= 0:
+                self._stats['tpot_sum'][key] += tpot
+                self._stats['tpot_count'][key] += 1
+                if 'tpot_sketch' in self._stats:
+                    self._stats['tpot_sketch'][key].update(tpot)
+    def update_from_pack(self, pack):
+        tpot = None
+        output_tokens = getattr(pack, 'output_tokens', None) or 0
+        if pack.ttft is not None and pack.latency is not None and output_tokens > 1:
+            tpot = (pack.latency - pack.ttft) / (output_tokens - 1)
+        self.update_stats(
+            pack.model or 'unknown',
+            pack.provider or '',
+            pack.operation_type or 'default',
+            url=pack.url or '',
+            latency=pack.latency,
+            ttft=pack.ttft,
+            tpot=tpot,
+            prompt_version=getattr(pack, 'prompt_version', 'v1') or 'v1',
+            error_count=0 if pack.success else 1,
+            stream=pack.stream,
+        )
+    def _build_fields(self, pack, stats, key):
+        pack.fields.putAuto("call_count", stats['call_count'][key])
+        pack.fields.putAuto("error_count", stats['error_count'][key])
+        pack.fields.putAuto("stream_count", stats['stream_count'][key])
+        pack.fields.putAuto("latency_sum", stats['latency_sum'][key])
+        pack.fields.putAuto("ttft_sum", stats['ttft_sum'][key])
+        pack.fields.putAuto("ttft_count", stats['ttft_count'][key])
+        pack.fields.putAuto("tpot_sum", stats['tpot_sum'][key])
+        pack.fields.putAuto("tpot_count", stats['tpot_count'][key])
+        use_sketch = 'latency_sketch' in stats
+        if use_sketch:
+            from whatap.value.blob_value import BlobValue
+            sketch = stats['latency_sketch'].get(key)
+            if sketch and not sketch.is_empty():
+                pack.fields.putValue("latency_sketch", BlobValue(sketch.serialize()))
+            sketch = stats['ttft_sketch'].get(key)
+            if sketch and not sketch.is_empty():
+                pack.fields.putValue("ttft_sketch", BlobValue(sketch.serialize()))
+            sketch = stats['tpot_sketch'].get(key)
+            if sketch and not sketch.is_empty():
+                pack.fields.putValue("tpot_sketch", BlobValue(sketch.serialize()))

whatap/llm/stats/pii_leak_eval_stat.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""llm_eval_pii_leak 카테고리 — PII 노출 탐지 점수 히스토그램.
+11-bucket 분포 (value0~value10). 자세한 동작은 ``eval_stat.ScoreHistogramStat``
+docstring 참고.
+값 의미: 0.0 = PII 미탐지, 1.0 = 다수 탐지 (output_text 길이 대비 정규화).
+"""
+from whatap.llm.stats.eval_stat import ScoreHistogramStat
+class PIILeakEvalStat(ScoreHistogramStat):
+    _category = 'llm_eval_pii_leak'

whatap/llm/stats/prompt_injection_eval_stat.py ADDED Viewed

@@ -0,0 +1,10 @@
+"""llm_eval_prompt_injection 카테고리 — Prompt Injection 평가 점수 히스토그램.
+11-bucket 분포 (value0~value10). 자세한 동작은 ``eval_stat.ScoreHistogramStat``
+docstring 참고.
+"""
+from whatap.llm.stats.eval_stat import ScoreHistogramStat
+class PromptInjectionEvalStat(ScoreHistogramStat):
+    _category = 'llm_eval_prompt_injection'