whatap-python 2.0.3rc1__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/PKG-INFO +1 -1
  2. whatap_python-2.1.0/whatap/build.py +4 -0
  3. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/conf/configuration.py +45 -1
  4. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/conf/configure.py +1 -1
  5. whatap_python-2.1.0/whatap/counter/tasks/llm_evaluator_task.py +501 -0
  6. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/counter/tasks/llm_log_sink_task.py +65 -0
  7. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/counter/tasks/llm_stat_task.py +1 -0
  8. whatap_python-2.1.0/whatap/llm/__init__.py +17 -0
  9. whatap_python-2.1.0/whatap/llm/evaluators/__init__.py +136 -0
  10. whatap_python-2.1.0/whatap/llm/evaluators/base.py +114 -0
  11. whatap_python-2.1.0/whatap/llm/evaluators/builtins/__init__.py +91 -0
  12. whatap_python-2.1.0/whatap/llm/evaluators/builtins/answer_relevance.py +46 -0
  13. whatap_python-2.1.0/whatap/llm/evaluators/builtins/combined_judge.py +271 -0
  14. whatap_python-2.1.0/whatap/llm/evaluators/builtins/factuality.py +71 -0
  15. whatap_python-2.1.0/whatap/llm/evaluators/builtins/hallucination.py +97 -0
  16. whatap_python-2.1.0/whatap/llm/evaluators/builtins/llm_judge.py +516 -0
  17. whatap_python-2.1.0/whatap/llm/evaluators/builtins/pii_leak.py +214 -0
  18. whatap_python-2.1.0/whatap/llm/evaluators/builtins/prompt_injection.py +71 -0
  19. whatap_python-2.1.0/whatap/llm/evaluators/builtins/toxicity.py +53 -0
  20. whatap_python-2.1.0/whatap/llm/evaluators/builtins/url_scan.py +194 -0
  21. whatap_python-2.1.0/whatap/llm/evaluators/registry.py +192 -0
  22. whatap_python-2.1.0/whatap/llm/evaluators/sampler.py +83 -0
  23. whatap_python-2.1.0/whatap/llm/evaluators/scope.py +334 -0
  24. whatap_python-2.1.0/whatap/llm/features.py +66 -0
  25. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/log_sink_packs/__init__.py +1 -0
  26. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/log_sink_packs/llm_log_sink_pack.py +11 -3
  27. whatap_python-2.1.0/whatap/llm/log_sink_packs/llm_step_eval_status.py +94 -0
  28. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/log_sink_packs/llm_step_status.py +7 -3
  29. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/log_sink_packs/llm_tx_status.py +8 -2
  30. whatap_python-2.1.0/whatap/llm/prompt_meta.py +288 -0
  31. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/anthropic/messages/messages.py +5 -2
  32. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/anthropic/messages/messages_context.py +5 -5
  33. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/anthropic/messages/messages_extractor.py +5 -2
  34. whatap_python-2.1.0/whatap/llm/providers/interceptor.py +182 -0
  35. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/chat/chat.py +5 -2
  36. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/chat/chat_context.py +5 -5
  37. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/chat/chat_extractor.py +9 -8
  38. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/completions/completions.py +5 -2
  39. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/completions/completions_context.py +0 -1
  40. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/embeddings/embeddings.py +8 -3
  41. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/embeddings/embeddings_context.py +0 -1
  42. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/responses/responses.py +5 -2
  43. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/responses/responses_context.py +4 -4
  44. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/responses/responses_extractor.py +7 -6
  45. whatap_python-2.1.0/whatap/llm/stats/__init__.py +35 -0
  46. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/stats/active_stat.py +14 -5
  47. whatap_python-2.1.0/whatap/llm/stats/answer_relevance_eval_stat.py +10 -0
  48. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/stats/api_status_stat.py +9 -2
  49. whatap_python-2.1.0/whatap/llm/stats/base_stat.py +107 -0
  50. whatap_python-2.1.0/whatap/llm/stats/combined_judge_eval_stat.py +11 -0
  51. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/stats/error_stat.py +7 -4
  52. whatap_python-2.1.0/whatap/llm/stats/eval_stat.py +225 -0
  53. whatap_python-2.1.0/whatap/llm/stats/factuality_eval_stat.py +10 -0
  54. whatap_python-2.1.0/whatap/llm/stats/feature_stat.py +104 -0
  55. whatap_python-2.1.0/whatap/llm/stats/finish_stat.py +105 -0
  56. whatap_python-2.1.0/whatap/llm/stats/hallucination_eval_stat.py +10 -0
  57. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/stats/perf_stat.py +15 -3
  58. whatap_python-2.1.0/whatap/llm/stats/pii_leak_eval_stat.py +12 -0
  59. whatap_python-2.1.0/whatap/llm/stats/prompt_injection_eval_stat.py +10 -0
  60. whatap_python-2.1.0/whatap/llm/stats/token_usage_stat.py +133 -0
  61. whatap_python-2.1.0/whatap/llm/stats/toxicity_eval_stat.py +10 -0
  62. whatap_python-2.1.0/whatap/llm/stats/url_scan_eval_stat.py +12 -0
  63. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/httpc/util.py +42 -19
  64. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/trace_handler.py +28 -2
  65. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap_python.egg-info/PKG-INFO +1 -1
  66. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap_python.egg-info/SOURCES.txt +29 -0
  67. whatap_python-2.0.3rc1/whatap/build.py +0 -4
  68. whatap_python-2.0.3rc1/whatap/llm/__init__.py +0 -1
  69. whatap_python-2.0.3rc1/whatap/llm/providers/interceptor.py +0 -85
  70. whatap_python-2.0.3rc1/whatap/llm/stats/__init__.py +0 -15
  71. whatap_python-2.0.3rc1/whatap/llm/stats/base_stat.py +0 -67
  72. whatap_python-2.0.3rc1/whatap/llm/stats/feature_stat.py +0 -63
  73. whatap_python-2.0.3rc1/whatap/llm/stats/token_usage_stat.py +0 -104
  74. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/README.md +0 -0
  75. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/pyproject.toml +0 -0
  76. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/setup.cfg +0 -0
  77. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/setup.py +0 -0
  78. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/LICENSE +0 -0
  79. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/README.rst +0 -0
  80. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/__init__.py +0 -0
  81. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/__main__.py +0 -0
  82. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/agent/darwin/amd64/whatap_python +0 -0
  83. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/agent/darwin/arm64/whatap_python +0 -0
  84. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/agent/linux/amd64/whatap_python +0 -0
  85. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/agent/linux/arm64/whatap_python +0 -0
  86. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/agent/windows/whatap_python.exe +0 -0
  87. {whatap_python-2.0.3rc1/whatap/util/cardinality → whatap_python-2.1.0/whatap/bootstrap}/__init__.py +0 -0
  88. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/bootstrap/sitecustomize.py +0 -0
  89. {whatap_python-2.0.3rc1/whatap/util → whatap_python-2.1.0/whatap/conf}/__init__.py +0 -0
  90. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/conf/license.py +0 -0
  91. {whatap_python-2.0.3rc1/whatap/trace/mod/standalone → whatap_python-2.1.0/whatap/control}/__init__.py +0 -0
  92. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/counter/__init__.py +0 -0
  93. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/counter/counter_manager.py +0 -0
  94. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/counter/tasks/__init__.py +0 -0
  95. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/counter/tasks/base_task.py +0 -0
  96. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/counter/tasks/openfiledescriptor.py +0 -0
  97. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/io/__init__.py +0 -0
  98. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/io/data_inputx.py +0 -0
  99. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/io/data_outputx.py +0 -0
  100. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/definitions.py +0 -0
  101. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/log_sink_packs/llm_input_message.py +0 -0
  102. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/log_sink_packs/llm_output_message.py +0 -0
  103. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/log_sink_packs/llm_system_message.py +0 -0
  104. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/log_sink_packs/llm_tool_calls.py +0 -0
  105. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/log_sink_packs/llm_tool_results.py +0 -0
  106. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/pricing.py +0 -0
  107. {whatap_python-2.0.3rc1/whatap/trace/mod/httpc → whatap_python-2.1.0/whatap/llm/providers}/__init__.py +0 -0
  108. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/anthropic/__init__.py +0 -0
  109. {whatap_python-2.0.3rc1/whatap/trace/mod/email → whatap_python-2.1.0/whatap/llm/providers/anthropic/messages}/__init__.py +0 -0
  110. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/__init__.py +0 -0
  111. {whatap_python-2.0.3rc1/whatap/trace/mod/database → whatap_python-2.1.0/whatap/llm/providers/openai/chat}/__init__.py +0 -0
  112. {whatap_python-2.0.3rc1/whatap/trace/mod/application → whatap_python-2.1.0/whatap/llm/providers/openai/completions}/__init__.py +0 -0
  113. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/completions/completions_extractor.py +0 -0
  114. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/content_parser.py +0 -0
  115. {whatap_python-2.0.3rc1/whatap/trace/mod/amqp → whatap_python-2.1.0/whatap/llm/providers/openai/embeddings}/__init__.py +0 -0
  116. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/openai/embeddings/embeddings_extractor.py +0 -0
  117. {whatap_python-2.0.3rc1/whatap/trace/mod → whatap_python-2.1.0/whatap/llm/providers/openai/responses}/__init__.py +0 -0
  118. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/providers/stream_accumulator.py +0 -0
  119. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/llm/stats/meter.py +0 -0
  120. {whatap_python-2.0.3rc1/whatap/pack → whatap_python-2.1.0/whatap/net}/__init__.py +0 -0
  121. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/net/async_sender.py +0 -0
  122. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/net/packet_enum.py +0 -0
  123. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/net/packet_type_enum.py +0 -0
  124. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/net/param_def.py +0 -0
  125. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/net/stackhelper.py +0 -0
  126. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/net/udp_session.py +0 -0
  127. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/net/udp_thread.py +0 -0
  128. {whatap_python-2.0.3rc1/whatap/net → whatap_python-2.1.0/whatap/pack}/__init__.py +0 -0
  129. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/pack/logSinkPack.py +0 -0
  130. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/pack/pack.py +0 -0
  131. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/pack/pack_enum.py +0 -0
  132. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/pack/tagCountPack.py +0 -0
  133. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/scripts/__init__.py +0 -0
  134. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/__init__.py +0 -0
  135. {whatap_python-2.0.3rc1/whatap/llm/providers/openai/responses → whatap_python-2.1.0/whatap/trace/mod}/__init__.py +0 -0
  136. {whatap_python-2.0.3rc1/whatap/llm/providers/openai/embeddings → whatap_python-2.1.0/whatap/trace/mod/amqp}/__init__.py +0 -0
  137. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/amqp/kombu.py +0 -0
  138. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/amqp/pika.py +0 -0
  139. {whatap_python-2.0.3rc1/whatap/llm/providers/openai/completions → whatap_python-2.1.0/whatap/trace/mod/application}/__init__.py +0 -0
  140. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/bottle.py +0 -0
  141. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/celery.py +0 -0
  142. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/cherrypy.py +0 -0
  143. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/django.py +0 -0
  144. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/django_asgi.py +0 -0
  145. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/django_py3.py +0 -0
  146. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/fastapi/__init__.py +0 -0
  147. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/fastapi/endpoint.py +0 -0
  148. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/fastapi/exception_log.py +0 -0
  149. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/fastapi/instrumentation.py +0 -0
  150. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/fastapi/scope.py +0 -0
  151. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/fastapi/transaction.py +0 -0
  152. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/flask.py +0 -0
  153. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/frappe.py +0 -0
  154. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/graphql.py +0 -0
  155. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/nameko.py +0 -0
  156. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/odoo.py +0 -0
  157. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/starlette.py +0 -0
  158. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/tornado.py +0 -0
  159. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/application/wsgi.py +0 -0
  160. {whatap_python-2.0.3rc1/whatap/llm/providers/openai/chat → whatap_python-2.1.0/whatap/trace/mod/database}/__init__.py +0 -0
  161. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/database/cxoracle.py +0 -0
  162. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/database/mongo.py +0 -0
  163. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/database/mysql.py +0 -0
  164. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/database/neo4j.py +0 -0
  165. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/database/psycopg2.py +0 -0
  166. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/database/psycopg3.py +0 -0
  167. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/database/redis.py +0 -0
  168. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/database/sqlalchemy.py +0 -0
  169. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/database/sqlite3.py +0 -0
  170. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/database/util.py +0 -0
  171. {whatap_python-2.0.3rc1/whatap/llm/providers/anthropic/messages → whatap_python-2.1.0/whatap/trace/mod/email}/__init__.py +0 -0
  172. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/email/smtp.py +0 -0
  173. {whatap_python-2.0.3rc1/whatap/llm/providers → whatap_python-2.1.0/whatap/trace/mod/httpc}/__init__.py +0 -0
  174. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/httpc/django.py +0 -0
  175. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/httpc/httplib.py +0 -0
  176. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/httpc/httpx.py +0 -0
  177. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/httpc/requests.py +0 -0
  178. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/httpc/urllib3.py +0 -0
  179. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/logging.py +0 -0
  180. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/plugin.py +0 -0
  181. {whatap_python-2.0.3rc1/whatap/control → whatap_python-2.1.0/whatap/trace/mod/standalone}/__init__.py +0 -0
  182. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/standalone/multiple.py +0 -0
  183. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/mod/standalone/single.py +0 -0
  184. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/simple_trace_context.py +0 -0
  185. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/trace_context.py +0 -0
  186. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/trace_context_manager.py +0 -0
  187. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/trace_error.py +0 -0
  188. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/trace_import.py +0 -0
  189. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/trace/trace_module_definition.py +0 -0
  190. {whatap_python-2.0.3rc1/whatap/conf → whatap_python-2.1.0/whatap/util}/__init__.py +0 -0
  191. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/bit_util.py +0 -0
  192. {whatap_python-2.0.3rc1/whatap/bootstrap → whatap_python-2.1.0/whatap/util/cardinality}/__init__.py +0 -0
  193. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/cardinality/hyperloglog.py +0 -0
  194. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/cardinality/murmurhash.py +0 -0
  195. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/cardinality/registerset.py +0 -0
  196. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/compare_util.py +0 -0
  197. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/date_util.py +0 -0
  198. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/debug_util.py +0 -0
  199. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/escape_literal_sql.py +0 -0
  200. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/frame_util.py +0 -0
  201. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/hash_util.py +0 -0
  202. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/hexa32.py +0 -0
  203. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/int_set.py +0 -0
  204. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/ip_util.py +0 -0
  205. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/keygen.py +0 -0
  206. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/linked_list.py +0 -0
  207. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/linked_map.py +0 -0
  208. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/metering_util.py +0 -0
  209. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/request_double_queue.py +0 -0
  210. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/request_queue.py +0 -0
  211. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/string_util.py +0 -0
  212. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/throttle_util.py +0 -0
  213. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/util/userid_util.py +0 -0
  214. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/__init__.py +0 -0
  215. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/blob_value.py +0 -0
  216. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/boolean_value.py +0 -0
  217. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/decimal_value.py +0 -0
  218. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/double_summary.py +0 -0
  219. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/double_value.py +0 -0
  220. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/float_array.py +0 -0
  221. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/float_value.py +0 -0
  222. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/int_array.py +0 -0
  223. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/ip4_value.py +0 -0
  224. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/list_value.py +0 -0
  225. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/long_array.py +0 -0
  226. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/long_summary.py +0 -0
  227. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/map_value.py +0 -0
  228. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/null_value.py +0 -0
  229. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/number_value.py +0 -0
  230. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/summary_value.py +0 -0
  231. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/text_array.py +0 -0
  232. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/text_hash_value.py +0 -0
  233. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/text_value.py +0 -0
  234. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/value.py +0 -0
  235. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/value/value_enum.py +0 -0
  236. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap/whatap.conf +0 -0
  237. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap_python.egg-info/dependency_links.txt +0 -0
  238. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap_python.egg-info/entry_points.txt +0 -0
  239. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap_python.egg-info/not-zip-safe +0 -0
  240. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap_python.egg-info/requires.txt +0 -0
  241. {whatap_python-2.0.3rc1 → whatap_python-2.1.0}/whatap_python.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: whatap-python
3
- Version: 2.0.3rc1
3
+ Version: 2.1.0
4
4
  Summary: Monitoring and Profiling Service
5
5
  Home-page: https://www.whatap.io
6
6
  Author: whatap
@@ -0,0 +1,4 @@
1
+ app = 'Python'
2
+ name = 'whatap-python'
3
+ version = '2.1.0'
4
+ release_date = '20260610'
@@ -232,5 +232,49 @@ Configuration = {
232
232
  "force_llm_net_udp_port": False,
233
233
  "llm_model_pricing": "",
234
234
  "llm_perf_sketch_enabled": True,
235
- "llm_perf_sketch_k": 200
235
+ "llm_perf_sketch_k": 200,
236
+
237
+ # ── LLM Evaluation ──
238
+ # 평가 파이프라인 마스터 토글. false면 enqueue/worker가 동작하지 않음.
239
+ "llm_eval_enabled": False,
240
+ # judge LLM 호출 평가자의 샘플링 비율 (0.0 ~ 1.0).
241
+ # 1.0 (기본) — 모든 judge 평가자 항상 실행
242
+ # 0.1 — judge 평가자 10% 만 실행 (비용 1/10)
243
+ # 0.0 — judge 평가자 전부 skip
244
+ # 규칙 기반 평가자 (PIILeak / URLScan 등 USES_LLM_JUDGE=False) 는 영향 X.
245
+ # 결정론적 샘플링 — 같은 txid 는 항상 같은 결정.
246
+ "llm_eval_sample_rate": 1.0,
247
+ # 평가 큐 최대 크기. 초과 시 drop + LLM030 경고.
248
+ "llm_eval_buffer_limit": 1000,
249
+ # 평가자 실행에 쓸 ThreadPoolExecutor worker 수.
250
+ "llm_eval_workers": 4,
251
+ # judge LLM 1회 호출의 최대 대기 시간 (초). 초과 시 TimeoutError → judge_error
252
+ # 로 graceful degrade. user app 의 event loop 가 hang/backpressure 일 때
253
+ # 평가 워커가 무기한 블록되는 것을 차단. 0 또는 음수면 무제한 (legacy 동작).
254
+ "llm_eval_judge_timeout_sec": 30,
255
+ # judge LLM 호출도 기존 인스트루멘테이션 (intercept) 으로 추적할지 여부.
256
+ # False (기본): judge 호출은 intercept 우회 → 메트릭/logsink 에 안 잡힘.
257
+ # (eval 결과 점수만 LlmStepEvalStatus + llm_eval_stat 로 송출)
258
+ # True : judge 호출이 llm_step_status pack + 메트릭에 잡힘 (사용자 호출과 동일).
259
+ # cost/token/latency 가시화. 메트릭 카운트 2배 증가 가능 (user + judge).
260
+ "llm_eval_track_judge_calls": False,
261
+ # 활성 평가자 csv. 미지정 (빈 값) 이면 default 3 종 자동 활성:
262
+ # combined_judge, pii_leak, url_scan
263
+ # 개별 aspect evaluator (hallucination / answer_relevance / toxicity /
264
+ # prompt_injection / factuality) 는 combined_judge 가 1회 judge 호출로 이미
265
+ # 모두 산출하므로 default 에서 제외 — 명시 활성 시에만 별도 evaluator 가 추가
266
+ # judge 호출을 발생시킴.
267
+ #
268
+ # 가용 라벨 (각 라벨이 evaluator 1개 + 동명 stat 카테고리에 매핑):
269
+ # combined_judge — 1번의 LLM judge 호출로 5 의미 aspect 동시 평가
270
+ # (hallucination / answer_relevance / toxicity /
271
+ # prompt_injection / factuality)
272
+ # pii_leak — 정규식 + Luhn 으로 PII 노출 탐지 (LLM 호출 X)
273
+ # url_scan — URL 추출 + suspicious 패턴 매칭 (LLM 호출 X)
274
+ # hallucination / answer_relevance / toxicity / prompt_injection / factuality
275
+ # — combined_judge 대신 개별 evaluator 만 쓰고 싶을 때
276
+ # 예: "combined_judge,pii_leak,url_scan" (default 와 동일)
277
+ # 예: "pii_leak,url_scan" (정형 평가만, LLM judge 0회)
278
+ # 예: "hallucination,toxicity,pii_leak" (개별 aspect — combined 대신)
279
+ "llm_eval_evaluators": "",
236
280
  }
@@ -60,7 +60,7 @@ class Configure(object):
60
60
  except Exception as e:
61
61
  print('WHATAP: ', e)
62
62
  continue
63
- if not getattr(cls, "license") and getattr(cls, "accesskey"):
63
+ if not getattr(cls, "license", None) and getattr(cls, "accesskey", None):
64
64
  setattr(cls, "license", getattr(cls, "accesskey"))
65
65
  for callback in cls.observers:
66
66
  callback()
@@ -0,0 +1,501 @@
1
+ """LLM 평가자 큐잉 + 비동기 실행 + 결과 송출 태스크.
2
+
3
+ 데이터 흐름:
4
+ [Producer] LlmLogSinkTask.dispatch(LlmStepStatus)
5
+ └─ enqueue_evaluation(pack)
6
+ └─ self._q.put((pack, scope_evs)) ← 큐 적재
7
+
8
+ [Dispatcher Thread] daemon, q.get() blocking loop
9
+ └─ for evaluator in registry + scope:
10
+ if sampler.should_run(evaluator, pack.txid):
11
+ executor.submit(_run_one, evaluator, pack)
12
+
13
+ [Worker Thread (ThreadPoolExecutor)]
14
+ └─ evaluator.evaluate(ctx) → EvaluatorResult (+extras)
15
+ └─ LlmStepEvalStatus = LlmStepStatus.from_step_status(pack)
16
+ └─ score 5 필드 채움 (eval_hallucination.n 등)
17
+ └─ dispatch_llm_evaluation_pack(eval_pack)
18
+ └─ LlmLogSinkTask._send_log_sink(eval_pack)
19
+
20
+ 평가 결과 pack 은 원본 LlmStepStatus 와 동일한 구조 (model/tokens/cost/latency/...) +
21
+ ``llm_log_type=llm_step_eval_status`` 로 차별 + 평가 점수 5 필드 추가.
22
+
23
+ 평가는 fire-and-forget. LlmStepStatus 송출과 완전 독립 — 사용자 트랜잭션 영향 0.
24
+ 모든 예외는 swallow 후 [LLM] 로그로만 기록.
25
+ """
26
+ import contextvars
27
+ import queue
28
+ import threading
29
+ import time
30
+
31
+ from whatap import logging
32
+ from whatap.conf.configure import Configure as conf
33
+
34
+ from whatap.llm.evaluators.base import EvaluatorContext, EvaluatorResult
35
+ from whatap.llm.evaluators.registry import EvaluatorRegistry
36
+ from whatap.llm.evaluators.sampler import EvaluatorSampler
37
+ from whatap.llm.log_sink_packs.llm_step_eval_status import LlmStepEvalStatus
38
+
39
+
40
+ _DEFAULT_BUFFER_LIMIT = 1000
41
+ _DEFAULT_WORKERS = 4
42
+
43
+
44
+ def _conf_truthy(name, default=False):
45
+ """Configure attr 을 bool 로 안전 변환.
46
+
47
+ Configure.setProperty 가 config file 에서 읽은 문자열 'true'/'false' 를 그대로
48
+ setattr 해서 string 으로 저장 → 'false' 도 truthy 로 평가되는 reload 버그 우회.
49
+ """
50
+ try:
51
+ val = getattr(conf, name, default)
52
+ except Exception:
53
+ return bool(default)
54
+ if isinstance(val, bool):
55
+ return val
56
+ if isinstance(val, str):
57
+ return val.strip().lower() in ('true', 'yes', '1', 'on')
58
+ return bool(val)
59
+
60
+ # 평가 점수가 매핑되는 attribute 이름.
61
+ # evaluator.LABEL (또는 EvaluatorResult.extras 의 label) 를 키로 룩업.
62
+ _LABEL_TO_FIELD = {
63
+ 'hallucination': 'eval_hallucination',
64
+ 'answer_relevance': 'eval_answer_relevance',
65
+ 'toxicity': 'eval_toxicity',
66
+ 'prompt_injection': 'eval_prompt_injection',
67
+ 'factuality': 'eval_factuality',
68
+ 'pii_leak': 'eval_pii_leak',
69
+ 'url_scan': 'eval_url_scan',
70
+ 'combined_judge': 'eval_combined_judge',
71
+ }
72
+
73
+ # 평가 워커 안인지 판별하는 플래그 — 두 저장소 동시 사용:
74
+ # - thread-local : sync 워커 스레드 자체에서 발생한 LLM 호출 인지용
75
+ # - ContextVar : async judge 가 ``run_coroutine_threadsafe`` 로 user loop 에 dispatch
76
+ # 된 task 의 context 에서도 인지되도록 propagate
77
+ # 무한 재귀 방지 (judge 호출이 다시 평가 큐에 안 들어가도록) + intercept 가 judge 호출도
78
+ # 정상 추적 (단, 재귀 가드 덕분에 다시 평가 enqueue 되지 않음) 의 두 가지 목적.
79
+ _eval_worker_local = threading.local()
80
+ _eval_worker_cv = contextvars.ContextVar('whatap_eval_worker', default=False)
81
+
82
+
83
+ def _is_in_evaluator_worker():
84
+ """평가 워커 안인지 — thread-local 또는 ContextVar 기준."""
85
+ if getattr(_eval_worker_local, 'in_eval', False):
86
+ return True
87
+ cv_val = _eval_worker_cv.get()
88
+ if isinstance(cv_val, dict):
89
+ return bool(cv_val.get('in_eval'))
90
+ return bool(cv_val)
91
+
92
+
93
+ def _get_eval_worker_state():
94
+ """현재 eval worker state 반환. 없으면 None.
95
+
96
+ 구조: {'in_eval', 'parent_txid', 'parent_step_id', 'parent_index'}.
97
+ CV 우선, fallback TL.
98
+ """
99
+ cv_val = _eval_worker_cv.get()
100
+ if isinstance(cv_val, dict) and cv_val.get('in_eval'):
101
+ return cv_val
102
+ if getattr(_eval_worker_local, 'in_eval', False):
103
+ return {
104
+ 'in_eval': True,
105
+ 'parent_txid': getattr(_eval_worker_local, 'parent_txid', None),
106
+ 'parent_step_id': getattr(_eval_worker_local, 'parent_step_id', None),
107
+ 'parent_index': getattr(_eval_worker_local, 'parent_index', None),
108
+ }
109
+ return None
110
+
111
+
112
+ # EvalStat 인스턴스 캐시.
113
+ _eval_stat_cache = [None]
114
+
115
+
116
+ def _eval_stat():
117
+ if _eval_stat_cache[0] is not None:
118
+ return _eval_stat_cache[0]
119
+ try:
120
+ from whatap.counter.tasks.llm_stat_task import LlmStatTask
121
+ stat = LlmStatTask.get_stat('EvalStat')
122
+ if stat is not None:
123
+ _eval_stat_cache[0] = stat
124
+ return stat
125
+ except Exception:
126
+ return None
127
+
128
+
129
+ class LlmEvaluatorTask(object):
130
+ """LLM 평가 파이프라인 (큐 + dispatcher + worker pool)."""
131
+
132
+ _instance = None
133
+ _lock = threading.Lock()
134
+
135
+ def __init__(self):
136
+ buffer_limit = _coerce_int(getattr(conf, 'llm_eval_buffer_limit', _DEFAULT_BUFFER_LIMIT),
137
+ _DEFAULT_BUFFER_LIMIT)
138
+ self._q = queue.Queue(buffer_limit)
139
+ self._workers = _coerce_int(getattr(conf, 'llm_eval_workers', _DEFAULT_WORKERS),
140
+ _DEFAULT_WORKERS)
141
+ self._executor = None
142
+ self._sampler = EvaluatorSampler()
143
+ self._started = False
144
+ self._start_lock = threading.Lock()
145
+
146
+ @classmethod
147
+ def get_instance(cls):
148
+ if cls._instance is None:
149
+ with cls._lock:
150
+ if cls._instance is None:
151
+ cls._instance = cls()
152
+ return cls._instance
153
+
154
+ # ── public producers ──
155
+
156
+ def enqueue(self, pack):
157
+ """LlmLogSinkTask.dispatch 가 매 LLM 호출 종료 시 호출.
158
+
159
+ :param pack: 원본 ``LlmStepStatus`` pack. worker 가 이걸 복제해 LlmStepEvalStatus 를
160
+ 만들어 점수 채워 송출. user 스레드에서 호출되므로 scope 평가자도 여기서 캡처.
161
+ """
162
+ if not _conf_truthy('llm_eval_enabled'):
163
+ return
164
+ if _is_in_evaluator_worker():
165
+ return # judge 평가자의 자기 호출이 다시 평가 큐에 들어가는 무한 재귀 차단
166
+ if pack is None:
167
+ return
168
+
169
+ # conf.llm_eval_evaluators 기반 빌트인 자동 register (1회 — 안에서 fast-path).
170
+ # is_empty() 검사 전에 호출해야 첫 enqueue 가 skip 안 됨.
171
+ try:
172
+ from whatap.llm.evaluators.registry import bootstrap_from_conf
173
+ bootstrap_from_conf()
174
+ except Exception as e:
175
+ logging.warning('[LLM] evaluator bootstrap failed: %s' % e,
176
+ extra={'id': 'LLM048'})
177
+
178
+ # user 스레드에서 scope 평가자 캡처 (dispatcher 스레드는 못 봄)
179
+ try:
180
+ from whatap.llm.evaluators.scope import get_scope_evaluators
181
+ scope_evs = list(get_scope_evaluators())
182
+ except Exception:
183
+ scope_evs = []
184
+
185
+ if EvaluatorRegistry.get_instance().is_empty() and not scope_evs:
186
+ return
187
+
188
+ try:
189
+ self._q.put_nowait((pack, scope_evs))
190
+ except queue.Full:
191
+ logging.warning('[LLM] eval queue full, pack dropped: txid=%s' % pack.txid,
192
+ extra={'id': 'LLM030'})
193
+ return
194
+ self._ensure_started()
195
+
196
+ # ── internal ──
197
+
198
+ def _ensure_started(self):
199
+ if self._started:
200
+ return
201
+ with self._start_lock:
202
+ if self._started:
203
+ return
204
+ try:
205
+ from concurrent.futures import ThreadPoolExecutor
206
+ self._executor = ThreadPoolExecutor(
207
+ max_workers=self._workers,
208
+ thread_name_prefix='whatap-llm-eval',
209
+ )
210
+ t = threading.Thread(target=self._run, daemon=True,
211
+ name='whatap-llm-eval-dispatch')
212
+ t.start()
213
+ self._started = True
214
+ except Exception as e:
215
+ logging.warning('[LLM] eval task start failed: %s' % e,
216
+ extra={'id': 'LLM035'})
217
+
218
+ def _run(self):
219
+ """Dispatcher 스레드 본체. 큐에서 (pack, scope_evs) 받아 evaluator × pack fan-out."""
220
+ while True:
221
+ try:
222
+ item = self._q.get()
223
+ except Exception:
224
+ continue
225
+ if item is None:
226
+ continue
227
+ try:
228
+ pack, scope_evs = item
229
+ except (ValueError, TypeError):
230
+ continue
231
+ try:
232
+ merged = self._merge_evaluators(scope_evs)
233
+ ctx_txid = pack.txid
234
+ # 샘플링 통과한 평가자만 모아서 한 워커 task 로 묶음 — 한 LLM 호출당
235
+ # LlmStepEvalStatus pack 1개만 송출되도록.
236
+ active = []
237
+ for evaluator in merged:
238
+ if not evaluator.LABEL:
239
+ continue
240
+ if not self._sampler.should_run(evaluator, ctx_txid):
241
+ continue
242
+ active.append(evaluator)
243
+ if active:
244
+ self._executor.submit(self._safe_run_pack, active, pack)
245
+ except Exception as e:
246
+ logging.warning('[LLM] eval dispatch failed: %s' % e,
247
+ extra={'id': 'LLM031'})
248
+
249
+ @staticmethod
250
+ def _merge_evaluators(scope_evs=None):
251
+ """전역 registry + 캡처된 scope 평가자 합치기. 같은 LABEL 이면 scope 가 우선."""
252
+ base_evs = EvaluatorRegistry.get_instance().all()
253
+ merged = {}
254
+ for e in base_evs:
255
+ merged[e.LABEL] = e
256
+ if scope_evs:
257
+ for e in scope_evs:
258
+ if getattr(e, 'LABEL', None):
259
+ merged[e.LABEL] = e
260
+ return list(merged.values())
261
+
262
+ def _safe_run_pack(self, evaluators, pack):
263
+ """워커 스레드 진입점 — 한 pack 에 대해 활성 evaluator 들을 순차 실행 + 단일
264
+ LlmStepEvalStatus pack 송출.
265
+
266
+ 모든 예외 swallow + 재귀 방지 + 부모 step 키 propagation.
267
+
268
+ TL/CV 에 다음 stash:
269
+ - in_eval=True (재귀 가드)
270
+ - parent_txid / parent_step_id / parent_index : 원본 user step 의 키
271
+ ``llm_eval_track_judge_calls=true`` 일 때 judge LLM 호출 pack 의 결합 키를
272
+ user step 과 동일하게 override 해서 dashboard 에서 같은 transaction 으로
273
+ 보이도록 함.
274
+
275
+ TL 은 워커 스레드 자체에서 발생한 LLM 호출 (sync judge), CV 는 dispatched coro
276
+ (async judge, user loop 으로 dispatch) 에서 propagate 되도록.
277
+ """
278
+ state = {
279
+ 'in_eval': True,
280
+ 'parent_txid': pack.txid,
281
+ 'parent_step_id': pack.step_id,
282
+ 'parent_index': pack.index,
283
+ }
284
+ _eval_worker_local.in_eval = True
285
+ _eval_worker_local.parent_txid = pack.txid
286
+ _eval_worker_local.parent_step_id = pack.step_id
287
+ _eval_worker_local.parent_index = pack.index
288
+ cv_token = _eval_worker_cv.set(state)
289
+ try:
290
+ self._run_pack(evaluators, pack)
291
+ except Exception as e:
292
+ logging.warning('[LLM] evaluator worker crashed: count=%d err=%s'
293
+ % (len(evaluators), e),
294
+ extra={'id': 'LLM036'})
295
+ finally:
296
+ _eval_worker_cv.reset(cv_token)
297
+ _eval_worker_local.in_eval = False
298
+ _eval_worker_local.parent_txid = None
299
+ _eval_worker_local.parent_step_id = None
300
+ _eval_worker_local.parent_index = None
301
+
302
+ def _run_pack(self, evaluators, pack):
303
+ """한 pack 의 모든 활성 evaluator 를 순차 실행 + 점수 모아서 단일
304
+ LlmStepEvalStatus pack 으로 송출.
305
+
306
+ 한 LLM 호출 → 한 LlmStepEvalStatus pack 이라는 1:1 invariant. 평가자가 8개여도
307
+ pack 은 1개. 점수가 없는 필드 (해당 evaluator 가 등록 안 됐거나 실패) 는 None
308
+ 이라 fields() 송출 시 누락.
309
+
310
+ Metric 갱신은 evaluator 별 ``update_eval_metrics()`` 가 따로 처리:
311
+ - llm_eval_stat (호출 통계 — call_count/failures/latency_sum/latency_sketch).
312
+ ``USES_LLM_JUDGE=True`` 인 평가자만 카운트 — 규칙 기반 (PII/URL) 은 skip.
313
+ - llm_eval_<label> (점수 히스토그램 value0~value10).
314
+ """
315
+ from whatap.counter.tasks.llm_log_sink_task import dispatch_llm_evaluation_pack
316
+
317
+ ctx = build_eval_context_from_pack(pack)
318
+
319
+ all_scores = {} # {label: float} — pack 에 합쳐 채울 모든 점수
320
+ any_score_emitted = False # 한 evaluator 라도 점수를 냈으면 True
321
+ all_succeeded = True # 모두 성공했을 때만 eval_success=True
322
+
323
+ for evaluator in evaluators:
324
+ scores, success = self._evaluate_one(evaluator, ctx, pack)
325
+ if scores:
326
+ all_scores.update(scores)
327
+ any_score_emitted = True
328
+ if not success:
329
+ all_succeeded = False
330
+
331
+ # 원본 pack 의 모든 attr 복제 (model/tokens/cost/latency/txid/...) → eval pack
332
+ eval_pack = LlmStepEvalStatus.from_step_status(pack)
333
+ # 한 evaluator 라도 점수를 냈고 모두 성공했으면 True. 모두 실패면 False.
334
+ eval_pack.eval_success = bool(any_score_emitted and all_succeeded)
335
+
336
+ for label, score in all_scores.items():
337
+ self._assign_score(eval_pack, label, score)
338
+
339
+ try:
340
+ dispatch_llm_evaluation_pack(eval_pack)
341
+ except Exception as e:
342
+ logging.warning('[LLM] eval pack send failed: %s' % e,
343
+ extra={'id': 'LLM033'})
344
+
345
+ def _evaluate_one(self, evaluator, ctx, pack):
346
+ """1 evaluator 실행 → ({label: score}, success bool) 반환 + metric 갱신.
347
+
348
+ pack 송출은 안 함 (호출자 ``_run_pack`` 이 모든 evaluator 끝나면 1 pack 으로 송출).
349
+ """
350
+ start = time.monotonic()
351
+ success = True
352
+ result = None
353
+ try:
354
+ raw = evaluator.evaluate(ctx)
355
+ result = _coerce_result(raw, getattr(evaluator, 'METRIC_TYPE', None))
356
+ except Exception as e:
357
+ success = False
358
+ logging.warning('[LLM] evaluator %s failed: %s'
359
+ % (getattr(evaluator, 'LABEL', '?'), e),
360
+ extra={'id': 'LLM032'})
361
+
362
+ duration_ms = int((time.monotonic() - start) * 1000)
363
+
364
+ # 점수 추출 (primary + extras) — pack 채움 + EvalStat 업데이트 둘 다에 사용
365
+ scores = {} # {label: float}
366
+ if result is not None:
367
+ v = result.value
368
+ if isinstance(v, (int, float)) and not isinstance(v, bool):
369
+ scores[evaluator.LABEL] = float(v)
370
+ extras = getattr(result, 'extras', None) or {}
371
+ for ex_label, extra in extras.items():
372
+ if not ex_label:
373
+ continue
374
+ if isinstance(extra, EvaluatorResult):
375
+ val = extra.value
376
+ elif isinstance(extra, dict):
377
+ val = extra.get('value')
378
+ else:
379
+ val = extra
380
+ if isinstance(val, (int, float)) and not isinstance(val, bool):
381
+ scores[ex_label] = float(val)
382
+
383
+ # judge HTTP 가 실제로 호출됐는지 / 평가 결과 유효성:
384
+ # USES_LLM_JUDGE=False → 규칙 기반 평가자 (PIILeak / URLScan) → 호출 X
385
+ # no_judge_configured → judge_fn 자체 없음 → 호출 X
386
+ # judge_error → 호출 시도, HTTP/parse 실패 → 호출 O, 점수 X (failure)
387
+ # numeric score → 호출 성공 (success)
388
+ # called_judge=False 면 EvalStat (호출 통계) 갱신 skip — 점수 히스토그램은
389
+ # 영향 없음.
390
+ called_judge = bool(getattr(evaluator, 'USES_LLM_JUDGE', False))
391
+ if called_judge and result is not None and result.value == 'no_judge_configured':
392
+ called_judge = False
393
+ eval_success = success and bool(scores) # 점수 받았으면 성공
394
+
395
+ # 평가 메트릭 갱신 — llm_eval_stat + llm_eval_<label> N종 한번에.
396
+ try:
397
+ from whatap.llm.stats.eval_stat import update_eval_metrics
398
+ judge_model = evaluator._model if getattr(evaluator, '_model', None) else ctx.model
399
+ update_eval_metrics(
400
+ model=judge_model,
401
+ provider=ctx.provider,
402
+ operation_type=ctx.operation_type,
403
+ url=ctx.url,
404
+ prompt_version=getattr(pack, 'prompt_version', 'v1') or 'v1',
405
+ called_judge=called_judge,
406
+ success=eval_success,
407
+ latency_ms=duration_ms,
408
+ scores=scores,
409
+ )
410
+ except Exception as e:
411
+ logging.warning('[LLM] eval metrics update failed: %s' % e,
412
+ extra={'id': 'LLM038'})
413
+
414
+ return scores, eval_success
415
+
416
+ @staticmethod
417
+ def _assign_score(eval_pack, label, value):
418
+ """label 에 매핑되는 eval_<x> 어트리뷰트에 score 값 세팅. 매핑 없으면 무시."""
419
+ attr = _LABEL_TO_FIELD.get(label)
420
+ if attr is None:
421
+ return
422
+ if isinstance(value, bool) or not isinstance(value, (int, float)):
423
+ return # 숫자 점수만 (judge_error 같은 categorical 은 eval_success=False 로 표현)
424
+ setattr(eval_pack, attr, float(value))
425
+
426
+
427
+ # ── helpers ──
428
+
429
+ def _coerce_int(value, default):
430
+ try:
431
+ v = int(value)
432
+ return v if v > 0 else default
433
+ except (TypeError, ValueError):
434
+ return default
435
+
436
+
437
+ def _coerce_result(raw, metric_type_hint=None):
438
+ """evaluator.evaluate() 의 raw 반환값을 EvaluatorResult 로 표준화."""
439
+ if isinstance(raw, EvaluatorResult):
440
+ result = raw
441
+ else:
442
+ result = EvaluatorResult(value=raw)
443
+
444
+ if not result.metric_type:
445
+ if metric_type_hint:
446
+ result.metric_type = metric_type_hint
447
+ else:
448
+ v = result.value
449
+ if isinstance(v, bool):
450
+ result.metric_type = 'boolean'
451
+ elif isinstance(v, (int, float)):
452
+ result.metric_type = 'score'
453
+ elif isinstance(v, dict):
454
+ result.metric_type = 'json'
455
+ else:
456
+ result.metric_type = 'categorical'
457
+ if v is not None and not isinstance(v, str):
458
+ result.value = str(v)
459
+ return result
460
+
461
+
462
+ # ── module-level API (LlmLogSinkTask 가 호출) ──
463
+
464
+ def enqueue_evaluation(pack):
465
+ """LlmLogSinkTask.dispatch() 끝에서 호출되는 entrypoint.
466
+
467
+ :param pack: 원본 ``LlmStepStatus``. worker 가 복제해 LlmStepEvalStatus 만들어 송출.
468
+ """
469
+ LlmEvaluatorTask.get_instance().enqueue(pack)
470
+
471
+
472
+ def build_eval_context_from_pack(pack):
473
+ """LlmStepStatus pack 으로부터 EvaluatorContext (evaluator.evaluate 입력) 생성."""
474
+ system_texts = getattr(pack, 'system_texts', None) or []
475
+ if isinstance(system_texts, (list, tuple)):
476
+ system_text = '\n'.join(str(t) for t in system_texts if t)
477
+ else:
478
+ system_text = str(system_texts) if system_texts else ''
479
+
480
+ return EvaluatorContext(
481
+ txid=pack.txid,
482
+ step_id=pack.step_id,
483
+ index=pack.index,
484
+ provider=pack.provider or '',
485
+ url=pack.url or '',
486
+ model=getattr(pack, 'model', None),
487
+ operation_type=getattr(pack, 'operation_type', '') or 'unknown',
488
+ input_text=getattr(pack, 'prompt_text', '') or '',
489
+ output_text=getattr(pack, 'completion_text', '') or '',
490
+ system_text=system_text,
491
+ reasoning_text=getattr(pack, 'reasoning_text', '') or '',
492
+ tool_calls_text=getattr(pack, 'tool_calls_text', '') or '',
493
+ tool_results_text=getattr(pack, 'tool_results_text', '') or '',
494
+ success=bool(getattr(pack, 'success', False)),
495
+ finish_reason=getattr(pack, 'finish_reason', None),
496
+ latency_ms=getattr(pack, 'latency', None),
497
+ input_tokens=getattr(pack, 'input_tokens', None),
498
+ output_tokens=getattr(pack, 'output_tokens', None),
499
+ client=getattr(pack, '_llm_client', None),
500
+ event_loop=getattr(pack, '_llm_event_loop', None),
501
+ )
@@ -23,6 +23,16 @@ _MAX_CONTENT_BYTES = 20000
23
23
 
24
24
 
25
25
  class LlmLogSinkTask(object):
26
+ """LLM logsink pack 송출 파이프라인.
27
+
28
+ LLM 호출 결과 pack 을 받아 즉시 송출. 평가 (LLM-judge 등) 는 별도 파이프라인
29
+ (``LlmEvaluatorTask``) 에서 비동기로 처리되며, 평가 결과는 ``LlmStepEvalStatus``
30
+ pack (``llm_log_type=llm_step_eval_status``) 으로 송출된다 — 원본 LlmStepStatus 와
31
+ 동일한 fields/tags + 평가 점수 5 필드. (txid, step_id) 결합 키로 백엔드 사후 결합.
32
+
33
+ 이 단순한 모델 덕분에 hold/timeout/callback 등의 복잡도가 모두 제거됨.
34
+ """
35
+
26
36
  _instance = None
27
37
  _lock = threading.Lock()
28
38
 
@@ -39,6 +49,14 @@ class LlmLogSinkTask(object):
39
49
  return cls._instance
40
50
 
41
51
  def dispatch(self, pack):
52
+ """LLM 호출 결과 pack 을 즉시 송출 + 평가 큐 트리거.
53
+
54
+ 평가 워커 안에서 호출된 judge LLM call 인 경우 (intercept 가 fire 한 케이스):
55
+ - operation_type='whatap_evaluation' / prompt_version='v1' 고정
56
+ - parent (user) step 의 (txid, step_id, index) 그대로 propagate
57
+ → dashboard 에서 같은 transaction 으로 묶여 보임
58
+ - eval enqueue 는 재귀 가드가 자동 차단
59
+ """
42
60
  if not isinstance(pack, LlmStepStatus):
43
61
  return
44
62
 
@@ -55,6 +73,25 @@ class LlmLogSinkTask(object):
55
73
  % (pack.model, pack.input_tokens, pack.output_tokens, pack.cached_tokens, e),
56
74
  extra={'id': 'LLM024'})
57
75
 
76
+ # ── judge LLM call (eval 워커 안에서 발생) — 고정 라벨 + parent step ids ──
77
+ try:
78
+ from whatap.counter.tasks.llm_evaluator_task import (
79
+ _is_in_evaluator_worker, _get_eval_worker_state,
80
+ )
81
+ if _is_in_evaluator_worker():
82
+ state = _get_eval_worker_state() or {}
83
+ pack.operation_type = 'whatap_evaluation'
84
+ pack.prompt_version = 'v1'
85
+ if state.get('parent_txid'):
86
+ pack.txid = state['parent_txid']
87
+ if state.get('parent_step_id'):
88
+ pack.step_id = state['parent_step_id']
89
+ if state.get('parent_index') is not None:
90
+ pack.index = state['parent_index']
91
+ except Exception:
92
+ pass
93
+
94
+ # tx_summary 누적
58
95
  self._accumulate_tx_summary(ctx, pack)
59
96
 
60
97
  if ctx and not pack.success and pack.error_type:
@@ -63,7 +100,22 @@ class LlmLogSinkTask(object):
63
100
  ctx._llm_last_error_provider = pack.provider or ''
64
101
  ctx._llm_last_error_op_type = pack.operation_type or 'unknown'
65
102
  ctx._llm_last_error_url = pack.url or ''
103
+ ctx._llm_last_error_prompt_version = (
104
+ getattr(pack, 'prompt_version', 'v1') or 'v1')
105
+
106
+ # pack 즉시 송출 — eval 결과 기다리지 않음
107
+ self._enqueue_for_send(pack)
66
108
 
109
+ # 평가 큐 트리거 — worker 가 pack 을 복제해 LlmStepEvalStatus 로 만들어 점수 채워 송출
110
+ # judge 호출 인 경우 enqueue_evaluation 의 재귀 가드가 차단
111
+ try:
112
+ from whatap.counter.tasks.llm_evaluator_task import enqueue_evaluation
113
+ enqueue_evaluation(pack)
114
+ except Exception as e:
115
+ logging.warning('[LLM] eval enqueue failed: %s' % e, extra={'id': 'LLM034'})
116
+
117
+ def _enqueue_for_send(self, pack):
118
+ """완성된 pack 을 송출 큐에 적재. queue full 이면 drop + LLM025."""
67
119
  self._ensure_started()
68
120
  if self._q.full():
69
121
  logging.warning('[LLM] send queue full, pack dropped: model=%s' % pack.model,
@@ -242,3 +294,16 @@ def send_llm_pack(metadata):
242
294
  if hasattr(pack, key):
243
295
  setattr(pack, key, val)
244
296
  dispatch_llm_pack(pack)
297
+
298
+
299
+ def dispatch_llm_evaluation_pack(pack):
300
+ """LlmStepEvalStatus pack 을 LogSink 인프라로 송출.
301
+
302
+ LlmEvaluatorTask._run_one 이 원본 LlmStepStatus 를 복제해 만든 LlmStepEvalStatus 를
303
+ 이 함수로 송출. (txid, step_id) 결합 키로 백엔드에서 LlmStepStatus 와 사후 결합.
304
+ """
305
+ try:
306
+ LlmLogSinkTask.get_instance()._send_log_sink(pack)
307
+ except Exception as e:
308
+ logging.warning('[LLM] dispatch_llm_evaluation_pack failed: %s' % e,
309
+ extra={'id': 'LLM037'})
@@ -68,6 +68,7 @@ class LlmStatTask(Thread):
68
68
  getattr(ctx, '_llm_last_error_op_type', 'unknown'),
69
69
  url=getattr(ctx, '_llm_last_error_url', ''),
70
70
  error_type=last_error_type,
71
+ prompt_version=getattr(ctx, '_llm_last_error_prompt_version', 'v1'),
71
72
  )
72
73
 
73
74
  @classmethod