statis-kit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statis_kit-0.1.0/.buildvenv/lib/python3.9/site-packages/pkg_resources/tests/data/my-test-package-source/setup.py +6 -0
- statis_kit-0.1.0/.gitignore +71 -0
- statis_kit-0.1.0/PKG-INFO +12 -0
- statis_kit-0.1.0/data/pricing.yaml +35 -0
- statis_kit-0.1.0/pyproject.toml +28 -0
- statis_kit-0.1.0/src/statis_kit/__init__.py +94 -0
- statis_kit-0.1.0/src/statis_kit/_cli.py +193 -0
- statis_kit-0.1.0/src/statis_kit/_patterns.py +105 -0
- statis_kit-0.1.0/src/statis_kit/_pricing.py +49 -0
- statis_kit-0.1.0/src/statis_kit/_types.py +158 -0
- statis_kit-0.1.0/src/statis_kit/compressor.py +215 -0
- statis_kit-0.1.0/src/statis_kit/cost_meter.py +91 -0
- statis_kit-0.1.0/src/statis_kit/guard.py +108 -0
- statis_kit-0.1.0/src/statis_kit/py.typed +0 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# OS
|
|
2
|
+
.DS_Store
|
|
3
|
+
Thumbs.db
|
|
4
|
+
|
|
5
|
+
# Python
|
|
6
|
+
__pycache__/
|
|
7
|
+
*.py[cod]
|
|
8
|
+
*$py.class
|
|
9
|
+
*.so
|
|
10
|
+
.env
|
|
11
|
+
.env.*
|
|
12
|
+
.venv
|
|
13
|
+
env/
|
|
14
|
+
venv/
|
|
15
|
+
pip-log.txt
|
|
16
|
+
pip-delete-this-directory.txt
|
|
17
|
+
|
|
18
|
+
# Node / Next.js
|
|
19
|
+
node_modules/
|
|
20
|
+
**/.next/
|
|
21
|
+
out/
|
|
22
|
+
build/
|
|
23
|
+
.pnp.*
|
|
24
|
+
.yarn/*
|
|
25
|
+
!.yarn/patches
|
|
26
|
+
!.yarn/plugins
|
|
27
|
+
!.yarn/releases
|
|
28
|
+
!.yarn/sdks
|
|
29
|
+
!.yarn/versions
|
|
30
|
+
*.tsbuildinfo
|
|
31
|
+
|
|
32
|
+
# Logs
|
|
33
|
+
logs
|
|
34
|
+
*.log
|
|
35
|
+
npm-debug.log*
|
|
36
|
+
yarn-debug.log*
|
|
37
|
+
yarn-error.log*
|
|
38
|
+
|
|
39
|
+
# Databases
|
|
40
|
+
*.sqlite3
|
|
41
|
+
*.db
|
|
42
|
+
api/statis.db
|
|
43
|
+
|
|
44
|
+
# IDEs / Editors
|
|
45
|
+
.idea/
|
|
46
|
+
.vscode/
|
|
47
|
+
.cursor/
|
|
48
|
+
*.swp
|
|
49
|
+
*.swo
|
|
50
|
+
|
|
51
|
+
# Claude Code
|
|
52
|
+
.claude/
|
|
53
|
+
|
|
54
|
+
# Tests
|
|
55
|
+
.pytest_cache/
|
|
56
|
+
coverage/
|
|
57
|
+
.tox/
|
|
58
|
+
htmlcov/
|
|
59
|
+
|
|
60
|
+
# Cache
|
|
61
|
+
.cache/
|
|
62
|
+
**/.cache/
|
|
63
|
+
|
|
64
|
+
# Build outputs
|
|
65
|
+
sdk/dist/
|
|
66
|
+
sdk-ts/dist/
|
|
67
|
+
|
|
68
|
+
# Archived / unused files
|
|
69
|
+
landing/public/_archive/
|
|
70
|
+
landing/src/components/_archive/
|
|
71
|
+
.vercel
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: statis-kit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Offline context processing for LLM message arrays — compress, guard, meter
|
|
5
|
+
Project-URL: Homepage, https://statis.dev
|
|
6
|
+
Project-URL: Repository, https://github.com/statis-ai/statis-core
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Requires-Dist: pyyaml>=6.0
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
11
|
+
Provides-Extra: tiktoken
|
|
12
|
+
Requires-Dist: tiktoken>=0.5.0; extra == 'tiktoken'
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
version: "2026-04"
|
|
2
|
+
models:
|
|
3
|
+
gpt-4o:
|
|
4
|
+
input_per_1k: 0.0025
|
|
5
|
+
output_per_1k: 0.01
|
|
6
|
+
gpt-4o-mini:
|
|
7
|
+
input_per_1k: 0.00015
|
|
8
|
+
output_per_1k: 0.0006
|
|
9
|
+
gpt-4.1:
|
|
10
|
+
input_per_1k: 0.002
|
|
11
|
+
output_per_1k: 0.008
|
|
12
|
+
gpt-4.1-mini:
|
|
13
|
+
input_per_1k: 0.0004
|
|
14
|
+
output_per_1k: 0.0016
|
|
15
|
+
gpt-4.1-nano:
|
|
16
|
+
input_per_1k: 0.0001
|
|
17
|
+
output_per_1k: 0.0004
|
|
18
|
+
claude-opus-4:
|
|
19
|
+
input_per_1k: 0.015
|
|
20
|
+
output_per_1k: 0.075
|
|
21
|
+
claude-sonnet-4:
|
|
22
|
+
input_per_1k: 0.003
|
|
23
|
+
output_per_1k: 0.015
|
|
24
|
+
claude-haiku-4-5:
|
|
25
|
+
input_per_1k: 0.0008
|
|
26
|
+
output_per_1k: 0.004
|
|
27
|
+
gemini-2.5-pro:
|
|
28
|
+
input_per_1k: 0.00125
|
|
29
|
+
output_per_1k: 0.01
|
|
30
|
+
gemini-2.5-flash:
|
|
31
|
+
input_per_1k: 0.00015
|
|
32
|
+
output_per_1k: 0.0006
|
|
33
|
+
gemini-2.0-flash:
|
|
34
|
+
input_per_1k: 0.0001
|
|
35
|
+
output_per_1k: 0.0004
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "statis-kit"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Offline context processing for LLM message arrays — compress, guard, meter"
|
|
9
|
+
requires-python = ">=3.9"
|
|
10
|
+
dependencies = ["pyyaml>=6.0"]
|
|
11
|
+
|
|
12
|
+
[project.optional-dependencies]
|
|
13
|
+
tiktoken = ["tiktoken>=0.5.0"]
|
|
14
|
+
dev = ["pytest>=7.0"]
|
|
15
|
+
|
|
16
|
+
[project.scripts]
|
|
17
|
+
statis-kit = "statis_kit._cli:main"
|
|
18
|
+
|
|
19
|
+
[project.urls]
|
|
20
|
+
Homepage = "https://statis.dev"
|
|
21
|
+
Repository = "https://github.com/statis-ai/statis-core"
|
|
22
|
+
|
|
23
|
+
[tool.hatch.build.targets.wheel]
|
|
24
|
+
packages = ["src/statis_kit"]
|
|
25
|
+
artifacts = ["data/*"]
|
|
26
|
+
|
|
27
|
+
[tool.hatch.build.targets.sdist]
|
|
28
|
+
include = ["src/statis_kit", "data"]
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""statis-kit — Offline context processing for LLM message arrays."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from ._types import (
|
|
7
|
+
CompressorConfig,
|
|
8
|
+
CostEstimate,
|
|
9
|
+
GuardConfig,
|
|
10
|
+
GuardDetection,
|
|
11
|
+
KitConfig,
|
|
12
|
+
MeterConfig,
|
|
13
|
+
Message,
|
|
14
|
+
ProcessedContext,
|
|
15
|
+
Report,
|
|
16
|
+
TurnCost,
|
|
17
|
+
messages_from_dicts,
|
|
18
|
+
messages_to_dicts,
|
|
19
|
+
)
|
|
20
|
+
from .compressor import Compressor
|
|
21
|
+
from .cost_meter import CostMeter
|
|
22
|
+
from .guard import Guard, GuardHaltError, GuardResult
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"process",
|
|
26
|
+
"KitConfig",
|
|
27
|
+
"CompressorConfig",
|
|
28
|
+
"MeterConfig",
|
|
29
|
+
"GuardConfig",
|
|
30
|
+
"ProcessedContext",
|
|
31
|
+
"Report",
|
|
32
|
+
"Message",
|
|
33
|
+
"CostEstimate",
|
|
34
|
+
"TurnCost",
|
|
35
|
+
"GuardDetection",
|
|
36
|
+
"GuardHaltError",
|
|
37
|
+
"GuardResult",
|
|
38
|
+
"Guard",
|
|
39
|
+
"CostMeter",
|
|
40
|
+
"Compressor",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def process(
|
|
45
|
+
messages: list[dict[str, Any]],
|
|
46
|
+
config: Optional[KitConfig] = None,
|
|
47
|
+
) -> ProcessedContext:
|
|
48
|
+
"""Unified entry point. Runs guard -> compressor -> meter in sequence.
|
|
49
|
+
|
|
50
|
+
Accepts raw message dicts (OpenAI format) for convenience.
|
|
51
|
+
"""
|
|
52
|
+
cfg = config or KitConfig()
|
|
53
|
+
msgs = messages_from_dicts(messages)
|
|
54
|
+
|
|
55
|
+
guard_detections: list[GuardDetection] = []
|
|
56
|
+
stripped_payloads: list[int] = []
|
|
57
|
+
|
|
58
|
+
# --- Phase 1: Guard ---
|
|
59
|
+
if cfg.guard is not None or cfg.guard is None:
|
|
60
|
+
# Guard runs by default unless explicitly disabled by passing no config
|
|
61
|
+
# and no messages — we always guard
|
|
62
|
+
guard = Guard(cfg.guard)
|
|
63
|
+
guard_result = guard.scan(msgs)
|
|
64
|
+
guard_detections = guard_result.detections
|
|
65
|
+
stripped_payloads = [d.turn_index for d in guard_detections]
|
|
66
|
+
msgs = guard_result.messages
|
|
67
|
+
|
|
68
|
+
# --- Phase 2: Compressor ---
|
|
69
|
+
compressed_ranges: list[tuple[int, int]] = []
|
|
70
|
+
if cfg.compressor is not None:
|
|
71
|
+
compressor = Compressor(cfg.compressor, cfg.summarizer)
|
|
72
|
+
msgs, compressed_ranges = compressor.process(msgs)
|
|
73
|
+
|
|
74
|
+
# --- Phase 3: Cost Meter ---
|
|
75
|
+
meter = CostMeter(cfg.meter)
|
|
76
|
+
original_tokens = sum(
|
|
77
|
+
meter.count_tokens(m.get("content", "")) for m in messages
|
|
78
|
+
)
|
|
79
|
+
processed_tokens, per_turn_costs = meter.count_messages(msgs)
|
|
80
|
+
cost_estimate = meter.estimate_cost(processed_tokens)
|
|
81
|
+
|
|
82
|
+
return ProcessedContext(
|
|
83
|
+
messages=messages_to_dicts(msgs),
|
|
84
|
+
report=Report(
|
|
85
|
+
original_tokens=original_tokens,
|
|
86
|
+
processed_tokens=processed_tokens,
|
|
87
|
+
token_delta=original_tokens - processed_tokens,
|
|
88
|
+
cost_estimate=cost_estimate,
|
|
89
|
+
per_turn_costs=per_turn_costs,
|
|
90
|
+
guard_detections=guard_detections,
|
|
91
|
+
compressed_ranges=compressed_ranges,
|
|
92
|
+
stripped_payloads=list(set(stripped_payloads)),
|
|
93
|
+
),
|
|
94
|
+
)
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""statis-kit CLI — context diff viewer."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import json
|
|
6
|
+
import sys
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
# ANSI color codes
|
|
10
|
+
_RESET = "\033[0m"
|
|
11
|
+
_RED = "\033[31m"
|
|
12
|
+
_GREEN = "\033[32m"
|
|
13
|
+
_YELLOW = "\033[33m"
|
|
14
|
+
_CYAN = "\033[36m"
|
|
15
|
+
_DIM = "\033[2m"
|
|
16
|
+
_BOLD = "\033[1m"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _load_json(path: str) -> list[dict[str, Any]]:
|
|
20
|
+
try:
|
|
21
|
+
with open(path) as f:
|
|
22
|
+
data = json.load(f)
|
|
23
|
+
except (json.JSONDecodeError, OSError) as e:
|
|
24
|
+
sys.exit(f"Error reading {path}: {e}")
|
|
25
|
+
if not isinstance(data, list):
|
|
26
|
+
sys.exit(f"Error: {path} must contain a JSON array of messages.")
|
|
27
|
+
return data
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _truncate(text: str, max_len: int = 80) -> str:
|
|
31
|
+
if len(text) <= max_len:
|
|
32
|
+
return text
|
|
33
|
+
return text[:max_len - 3] + "..."
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def cmd_diff(args: argparse.Namespace) -> None:
|
|
37
|
+
before = _load_json(args.before)
|
|
38
|
+
after = _load_json(args.after)
|
|
39
|
+
|
|
40
|
+
if args.json:
|
|
41
|
+
_diff_json(before, after)
|
|
42
|
+
else:
|
|
43
|
+
_diff_colored(before, after)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _diff_colored(
|
|
47
|
+
before: list[dict[str, Any]],
|
|
48
|
+
after: list[dict[str, Any]],
|
|
49
|
+
) -> None:
|
|
50
|
+
from .cost_meter import CostMeter
|
|
51
|
+
|
|
52
|
+
meter = CostMeter()
|
|
53
|
+
|
|
54
|
+
before_tokens = sum(meter.count_tokens(m.get("content", "")) for m in before)
|
|
55
|
+
after_tokens = sum(meter.count_tokens(m.get("content", "")) for m in after)
|
|
56
|
+
delta = before_tokens - after_tokens
|
|
57
|
+
pct = (delta / before_tokens * 100) if before_tokens > 0 else 0
|
|
58
|
+
|
|
59
|
+
print(f"\n{_BOLD}Context Diff{_RESET}")
|
|
60
|
+
print(f" Before: {len(before)} messages, {before_tokens} tokens")
|
|
61
|
+
print(f" After: {len(after)} messages, {after_tokens} tokens")
|
|
62
|
+
|
|
63
|
+
if delta > 0:
|
|
64
|
+
print(f" Delta: {_GREEN}-{delta} tokens ({pct:.1f}% reduction){_RESET}")
|
|
65
|
+
elif delta < 0:
|
|
66
|
+
print(f" Delta: {_RED}+{-delta} tokens ({-pct:.1f}% increase){_RESET}")
|
|
67
|
+
else:
|
|
68
|
+
print(f" Delta: {_DIM}no change{_RESET}")
|
|
69
|
+
|
|
70
|
+
print()
|
|
71
|
+
|
|
72
|
+
# Build content maps for comparison
|
|
73
|
+
before_set = {(m.get("role", ""), m.get("content", "")) for m in before}
|
|
74
|
+
after_set = {(m.get("role", ""), m.get("content", "")) for m in after}
|
|
75
|
+
|
|
76
|
+
removed = before_set - after_set
|
|
77
|
+
added = after_set - before_set
|
|
78
|
+
|
|
79
|
+
for m in before:
|
|
80
|
+
key = (m.get("role", ""), m.get("content", ""))
|
|
81
|
+
role = m.get("role", "?")
|
|
82
|
+
content = _truncate(m.get("content", ""))
|
|
83
|
+
if key in removed:
|
|
84
|
+
print(f" {_RED}- [{role}] {content}{_RESET}")
|
|
85
|
+
else:
|
|
86
|
+
print(f" {_DIM} [{role}] {content}{_RESET}")
|
|
87
|
+
|
|
88
|
+
for m in after:
|
|
89
|
+
key = (m.get("role", ""), m.get("content", ""))
|
|
90
|
+
if key in added:
|
|
91
|
+
role = m.get("role", "?")
|
|
92
|
+
content = _truncate(m.get("content", ""))
|
|
93
|
+
print(f" {_GREEN}+ [{role}] {content}{_RESET}")
|
|
94
|
+
|
|
95
|
+
print()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _diff_json(
|
|
99
|
+
before: list[dict[str, Any]],
|
|
100
|
+
after: list[dict[str, Any]],
|
|
101
|
+
) -> None:
|
|
102
|
+
from .cost_meter import CostMeter
|
|
103
|
+
|
|
104
|
+
meter = CostMeter()
|
|
105
|
+
|
|
106
|
+
before_tokens = sum(meter.count_tokens(m.get("content", "")) for m in before)
|
|
107
|
+
after_tokens = sum(meter.count_tokens(m.get("content", "")) for m in after)
|
|
108
|
+
|
|
109
|
+
before_set = {(m.get("role", ""), m.get("content", "")) for m in before}
|
|
110
|
+
after_set = {(m.get("role", ""), m.get("content", "")) for m in after}
|
|
111
|
+
|
|
112
|
+
removed = [
|
|
113
|
+
{"role": m.get("role", ""), "content": m.get("content", "")}
|
|
114
|
+
for m in before
|
|
115
|
+
if (m.get("role", ""), m.get("content", "")) in (before_set - after_set)
|
|
116
|
+
]
|
|
117
|
+
added = [
|
|
118
|
+
{"role": m.get("role", ""), "content": m.get("content", "")}
|
|
119
|
+
for m in after
|
|
120
|
+
if (m.get("role", ""), m.get("content", "")) in (after_set - before_set)
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
output = {
|
|
124
|
+
"before_messages": len(before),
|
|
125
|
+
"after_messages": len(after),
|
|
126
|
+
"before_tokens": before_tokens,
|
|
127
|
+
"after_tokens": after_tokens,
|
|
128
|
+
"token_delta": before_tokens - after_tokens,
|
|
129
|
+
"removed": removed,
|
|
130
|
+
"added": added,
|
|
131
|
+
}
|
|
132
|
+
print(json.dumps(output, indent=2))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def cmd_process(args: argparse.Namespace) -> None:
|
|
136
|
+
"""Process a message file through the kit pipeline."""
|
|
137
|
+
from . import process
|
|
138
|
+
|
|
139
|
+
messages = _load_json(args.file)
|
|
140
|
+
result = process(messages)
|
|
141
|
+
|
|
142
|
+
if args.json:
|
|
143
|
+
output = {
|
|
144
|
+
"messages": result.messages,
|
|
145
|
+
"report": {
|
|
146
|
+
"original_tokens": result.report.original_tokens,
|
|
147
|
+
"processed_tokens": result.report.processed_tokens,
|
|
148
|
+
"token_delta": result.report.token_delta,
|
|
149
|
+
"guard_detections": len(result.report.guard_detections),
|
|
150
|
+
"compressed_ranges": result.report.compressed_ranges,
|
|
151
|
+
},
|
|
152
|
+
}
|
|
153
|
+
print(json.dumps(output, indent=2))
|
|
154
|
+
else:
|
|
155
|
+
r = result.report
|
|
156
|
+
print(f"\n{_BOLD}Processing Report{_RESET}")
|
|
157
|
+
print(f" Messages: {len(messages)} -> {len(result.messages)}")
|
|
158
|
+
print(f" Tokens: {r.original_tokens} -> {r.processed_tokens} ({_GREEN}-{r.token_delta}{_RESET})")
|
|
159
|
+
if r.guard_detections:
|
|
160
|
+
print(f" Detections: {_YELLOW}{len(r.guard_detections)} injection pattern(s){_RESET}")
|
|
161
|
+
if r.compressed_ranges:
|
|
162
|
+
print(f" Compressed: {len(r.compressed_ranges)} range(s)")
|
|
163
|
+
if r.cost_estimate:
|
|
164
|
+
print(f" Est. cost: ${r.cost_estimate.total_cost_usd:.6f} ({r.cost_estimate.model})")
|
|
165
|
+
print()
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def main() -> None:
|
|
169
|
+
parser = argparse.ArgumentParser(
|
|
170
|
+
prog="statis-kit",
|
|
171
|
+
description="Offline context processing for LLM message arrays",
|
|
172
|
+
)
|
|
173
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
174
|
+
|
|
175
|
+
p_diff = sub.add_parser("diff", help="Compare two message arrays")
|
|
176
|
+
p_diff.add_argument("before", help="Path to before messages JSON")
|
|
177
|
+
p_diff.add_argument("after", help="Path to after messages JSON")
|
|
178
|
+
p_diff.add_argument("--json", action="store_true", help="JSON output for CI")
|
|
179
|
+
|
|
180
|
+
p_proc = sub.add_parser("process", help="Process a message array through the kit")
|
|
181
|
+
p_proc.add_argument("file", help="Path to messages JSON")
|
|
182
|
+
p_proc.add_argument("--json", action="store_true", help="JSON output")
|
|
183
|
+
|
|
184
|
+
args = parser.parse_args()
|
|
185
|
+
|
|
186
|
+
if args.command == "diff":
|
|
187
|
+
cmd_diff(args)
|
|
188
|
+
elif args.command == "process":
|
|
189
|
+
cmd_process(args)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
if __name__ == "__main__":
|
|
193
|
+
main()
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Built-in guard patterns for prompt injection detection.
|
|
2
|
+
|
|
3
|
+
Each pattern is a dict with:
|
|
4
|
+
id: unique identifier
|
|
5
|
+
category: one of instruction_override, authority_impersonation,
|
|
6
|
+
external_anomalies, hidden_text
|
|
7
|
+
pattern: regex string (case-insensitive by default)
|
|
8
|
+
flags: optional regex flags override
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
BUILTIN_PATTERNS: list[dict[str, str]] = [
|
|
13
|
+
# -----------------------------------------------------------------------
|
|
14
|
+
# instruction_override — attempts to reset or replace system instructions
|
|
15
|
+
# -----------------------------------------------------------------------
|
|
16
|
+
{
|
|
17
|
+
"id": "io_ignore_previous",
|
|
18
|
+
"category": "instruction_override",
|
|
19
|
+
"pattern": r"(?:ignore|disregard|forget|override|bypass)\s+(?:all\s+)?(?:previous|above|prior|earlier|preceding)\s+(?:instructions?|prompts?|rules?|guidelines?|directives?|context)",
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"id": "io_new_instructions",
|
|
23
|
+
"category": "instruction_override",
|
|
24
|
+
"pattern": r"(?:new|updated|revised|replacement)\s+(?:system\s+)?(?:instructions?|prompt|directive|task|role)\s*[:.]",
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
"id": "io_you_are_now",
|
|
28
|
+
"category": "instruction_override",
|
|
29
|
+
"pattern": r"you\s+are\s+now\s+(?:a|an|the|my)\s+\w+",
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"id": "io_do_not_follow",
|
|
33
|
+
"category": "instruction_override",
|
|
34
|
+
"pattern": r"(?:do\s+not|don'?t|stop)\s+follow(?:ing)?\s+(?:your|the|any)\s+(?:previous|original|initial|system)\s+(?:instructions?|rules?|guidelines?|prompt)",
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"id": "io_jailbreak_keywords",
|
|
38
|
+
"category": "instruction_override",
|
|
39
|
+
"pattern": r"(?:DAN|STAN|DUDE|AIM)\s+mode|(?:developer|maintenance|god|sudo|root)\s+mode|jailbreak",
|
|
40
|
+
},
|
|
41
|
+
# -----------------------------------------------------------------------
|
|
42
|
+
# authority_impersonation — pretending to be system / admin / developer
|
|
43
|
+
# -----------------------------------------------------------------------
|
|
44
|
+
{
|
|
45
|
+
"id": "ai_system_prefix",
|
|
46
|
+
"category": "authority_impersonation",
|
|
47
|
+
"pattern": r"^(?:SYSTEM|ADMIN|DEVELOPER|OPERATOR|ENGINEER(?:ING)?)\s*:\s*",
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"id": "ai_authorization_code",
|
|
51
|
+
"category": "authority_impersonation",
|
|
52
|
+
"pattern": r"(?:authorization|auth|access)\s+(?:code|token|key)\s*:\s*\S+",
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"id": "ai_im_the_developer",
|
|
56
|
+
"category": "authority_impersonation",
|
|
57
|
+
"pattern": r"(?:i\s+am|i'?m)\s+(?:the|your|a)\s+(?:developer|admin(?:istrator)?|system\s+admin|operator|engineer|owner|creator)",
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"id": "ai_new_directive",
|
|
61
|
+
"category": "authority_impersonation",
|
|
62
|
+
"pattern": r"(?:new|updated?)\s+directive\s+from\s+(?:engineering|development|admin|management|security)\s+team",
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"id": "ai_override_command",
|
|
66
|
+
"category": "authority_impersonation",
|
|
67
|
+
"pattern": r"(?:admin|system|security|master)\s*[-_]?\s*override",
|
|
68
|
+
},
|
|
69
|
+
# -----------------------------------------------------------------------
|
|
70
|
+
# external_anomalies — suspicious URLs, emails, data exfiltration
|
|
71
|
+
# -----------------------------------------------------------------------
|
|
72
|
+
{
|
|
73
|
+
"id": "ea_data_exfil_email",
|
|
74
|
+
"category": "external_anomalies",
|
|
75
|
+
"pattern": r"(?:forward|send|email|transmit|exfiltrate|leak)\s+(?:all\s+)?(?:customer|user|private|internal|sensitive|confidential)?\s*(?:data|information|records|credentials|passwords|keys|tokens)\s+to\s+\S+@\S+",
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"id": "ea_data_exfil_url",
|
|
79
|
+
"category": "external_anomalies",
|
|
80
|
+
"pattern": r"(?:post|send|upload|transmit|forward)\s+(?:all\s+)?(?:data|information|records|responses?)\s+to\s+https?://",
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
"id": "ea_grant_access",
|
|
84
|
+
"category": "external_anomalies",
|
|
85
|
+
"pattern": r"(?:grant|give|assign|add)\s+(?:admin|root|full|elevated)\s+(?:access|permissions?|privileges?|rights?)\s+to\s+(?:user|account|id)\s+",
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"id": "ea_encoded_instructions",
|
|
89
|
+
"category": "external_anomalies",
|
|
90
|
+
"pattern": r"(?:execute|run|eval|decode)\s+(?:the\s+)?(?:following\s+)?(?:base64|encoded|hex)\s*:",
|
|
91
|
+
},
|
|
92
|
+
# -----------------------------------------------------------------------
|
|
93
|
+
# hidden_text — zero-width characters, homoglyphs, steganographic tricks
|
|
94
|
+
# -----------------------------------------------------------------------
|
|
95
|
+
{
|
|
96
|
+
"id": "ht_zero_width",
|
|
97
|
+
"category": "hidden_text",
|
|
98
|
+
"pattern": r"[\u200b\u200c\u200d\u200e\u200f\u2060\u2061\u2062\u2063\u2064\ufeff]{2,}",
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
"id": "ht_tag_like_injection",
|
|
102
|
+
"category": "hidden_text",
|
|
103
|
+
"pattern": r"<\s*(?:system|admin|instruction|prompt|override|ignore)\s*>",
|
|
104
|
+
},
|
|
105
|
+
]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Load and query the model pricing table."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
import yaml # type: ignore[import]
|
|
8
|
+
|
|
9
|
+
_DEFAULT_PRICING_PATH = os.path.join(
|
|
10
|
+
os.path.dirname(__file__), "..", "..", "data", "pricing.yaml",
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
_cache: Optional[dict[str, Any]] = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load_pricing(path: Optional[str] = None) -> dict[str, Any]:
|
|
17
|
+
global _cache
|
|
18
|
+
resolved = path or _DEFAULT_PRICING_PATH
|
|
19
|
+
if _cache is None or path is not None:
|
|
20
|
+
with open(resolved) as f:
|
|
21
|
+
_cache = yaml.safe_load(f)
|
|
22
|
+
return _cache # type: ignore[return-value]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_model_pricing(
|
|
26
|
+
model: str,
|
|
27
|
+
pricing: Optional[dict[str, Any]] = None,
|
|
28
|
+
) -> tuple[float, float]:
|
|
29
|
+
"""Return (input_per_1k, output_per_1k) for a model.
|
|
30
|
+
|
|
31
|
+
Falls back to gpt-4o pricing if model not found.
|
|
32
|
+
"""
|
|
33
|
+
table = pricing or load_pricing()
|
|
34
|
+
models = table.get("models", {})
|
|
35
|
+
|
|
36
|
+
if model in models:
|
|
37
|
+
entry = models[model]
|
|
38
|
+
else:
|
|
39
|
+
# Bidirectional prefix match — handles both directions:
|
|
40
|
+
# passed "gpt-4o-2024-05-13" against key "gpt-4o" (dated -> short)
|
|
41
|
+
# passed "claude-sonnet-4" against key "claude-sonnet-4-20250514" (short -> dated)
|
|
42
|
+
matched = None
|
|
43
|
+
for key in models:
|
|
44
|
+
if model.startswith(key) or key.startswith(model):
|
|
45
|
+
matched = key
|
|
46
|
+
break
|
|
47
|
+
entry = models.get(matched or "gpt-4o", {"input_per_1k": 0.0025, "output_per_1k": 0.01})
|
|
48
|
+
|
|
49
|
+
return (entry["input_per_1k"], entry["output_per_1k"])
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Any, Callable, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# ---------------------------------------------------------------------------
|
|
9
|
+
# Message
|
|
10
|
+
# ---------------------------------------------------------------------------
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class Message:
|
|
14
|
+
role: str
|
|
15
|
+
content: str
|
|
16
|
+
name: Optional[str] = None
|
|
17
|
+
tool_call_id: Optional[str] = None
|
|
18
|
+
metadata: Optional[dict[str, Any]] = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def messages_from_dicts(raw: list[dict[str, Any]]) -> list[Message]:
|
|
22
|
+
"""Convert a list of OpenAI-format dicts to Message dataclasses."""
|
|
23
|
+
out: list[Message] = []
|
|
24
|
+
for d in raw:
|
|
25
|
+
out.append(Message(
|
|
26
|
+
role=d["role"],
|
|
27
|
+
content=d.get("content", ""),
|
|
28
|
+
name=d.get("name"),
|
|
29
|
+
tool_call_id=d.get("tool_call_id"),
|
|
30
|
+
metadata=d.get("metadata"),
|
|
31
|
+
))
|
|
32
|
+
return out
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def messages_to_dicts(msgs: list[Message]) -> list[dict[str, Any]]:
|
|
36
|
+
"""Convert Message dataclasses back to OpenAI-format dicts."""
|
|
37
|
+
out: list[dict[str, Any]] = []
|
|
38
|
+
for m in msgs:
|
|
39
|
+
d: dict[str, Any] = {"role": m.role, "content": m.content}
|
|
40
|
+
if m.name is not None:
|
|
41
|
+
d["name"] = m.name
|
|
42
|
+
if m.tool_call_id is not None:
|
|
43
|
+
d["tool_call_id"] = m.tool_call_id
|
|
44
|
+
if m.metadata is not None:
|
|
45
|
+
d["metadata"] = m.metadata
|
|
46
|
+
out.append(d)
|
|
47
|
+
return out
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
# Classification
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
class Bucket(str, Enum):
|
|
55
|
+
PINNED = "pinned"
|
|
56
|
+
RECENT = "recent"
|
|
57
|
+
COMPRESSIBLE = "compressible"
|
|
58
|
+
PRUNABLE = "prunable"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class ClassifiedMessage:
|
|
63
|
+
message: Message
|
|
64
|
+
index: int
|
|
65
|
+
bucket: Bucket
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
# ---------------------------------------------------------------------------
|
|
69
|
+
# Cost / metering
|
|
70
|
+
# ---------------------------------------------------------------------------
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class CostEstimate:
|
|
74
|
+
input_cost_usd: float
|
|
75
|
+
output_cost_usd: float
|
|
76
|
+
total_cost_usd: float
|
|
77
|
+
model: str
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class TurnCost:
|
|
82
|
+
turn_index: int
|
|
83
|
+
role: str
|
|
84
|
+
tokens: int
|
|
85
|
+
cost_usd: float
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# ---------------------------------------------------------------------------
|
|
89
|
+
# Guard
|
|
90
|
+
# ---------------------------------------------------------------------------
|
|
91
|
+
|
|
92
|
+
@dataclass
|
|
93
|
+
class GuardDetection:
|
|
94
|
+
turn_index: int
|
|
95
|
+
pattern_id: str
|
|
96
|
+
category: str
|
|
97
|
+
matched_text: str
|
|
98
|
+
action_taken: str
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ---------------------------------------------------------------------------
|
|
102
|
+
# Report + ProcessedContext (top-level output)
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
@dataclass
|
|
106
|
+
class Report:
|
|
107
|
+
original_tokens: int
|
|
108
|
+
processed_tokens: int
|
|
109
|
+
token_delta: int
|
|
110
|
+
cost_estimate: Optional[CostEstimate]
|
|
111
|
+
per_turn_costs: list[TurnCost] = field(default_factory=list)
|
|
112
|
+
guard_detections: list[GuardDetection] = field(default_factory=list)
|
|
113
|
+
compressed_ranges: list[tuple[int, int]] = field(default_factory=list)
|
|
114
|
+
stripped_payloads: list[int] = field(default_factory=list)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass
|
|
118
|
+
class ProcessedContext:
|
|
119
|
+
messages: list[dict[str, Any]]
|
|
120
|
+
report: Report
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# ---------------------------------------------------------------------------
|
|
124
|
+
# Configs
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
@dataclass
|
|
128
|
+
class CompressorConfig:
|
|
129
|
+
pin_top: int = 1
|
|
130
|
+
recent_turns: int = 4
|
|
131
|
+
summary_max_tokens: int = 200
|
|
132
|
+
prune_older_than_turns: int = 20
|
|
133
|
+
prune_if_superseded: bool = True
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
@dataclass
|
|
137
|
+
class MeterConfig:
|
|
138
|
+
model: str = "gpt-4o"
|
|
139
|
+
pricing_path: Optional[str] = None
|
|
140
|
+
on_turn: Optional[Callable[[TurnCost], None]] = None
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@dataclass
|
|
144
|
+
class GuardConfig:
|
|
145
|
+
on_detect: str = "strip" # "strip" | "halt"
|
|
146
|
+
extra_patterns: Optional[list[dict[str, Any]]] = None
|
|
147
|
+
disabled_categories: Optional[list[str]] = None
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
SummarizerFn = Callable[[str], str]
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@dataclass
|
|
154
|
+
class KitConfig:
|
|
155
|
+
compressor: Optional[CompressorConfig] = None
|
|
156
|
+
meter: Optional[MeterConfig] = None
|
|
157
|
+
guard: Optional[GuardConfig] = None
|
|
158
|
+
summarizer: Optional[SummarizerFn] = None
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Three-pass context compressor: classify -> summarize -> prune."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
from typing import Callable, Optional
|
|
6
|
+
|
|
7
|
+
from ._types import Bucket, ClassifiedMessage, CompressorConfig, Message
|
|
8
|
+
|
|
9
|
+
SummarizerFn = Callable[[str], str]
|
|
10
|
+
|
|
11
|
+
# Heuristics for detecting superseded content
|
|
12
|
+
_CORRECTION_PATTERNS = [
|
|
13
|
+
re.compile(r"^(?:actually|correction|sorry|wait|no,)\b", re.IGNORECASE),
|
|
14
|
+
re.compile(r"\b(?:instead|rather|update|revised|changed? (?:to|my))\b", re.IGNORECASE),
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Compressor:
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
config: Optional[CompressorConfig] = None,
|
|
22
|
+
summarizer: Optional[SummarizerFn] = None,
|
|
23
|
+
) -> None:
|
|
24
|
+
cfg = config or CompressorConfig()
|
|
25
|
+
self._pin_top = cfg.pin_top
|
|
26
|
+
self._recent_turns = cfg.recent_turns
|
|
27
|
+
self._prune_older_than = cfg.prune_older_than_turns
|
|
28
|
+
self._prune_if_superseded = cfg.prune_if_superseded
|
|
29
|
+
self._summary_max_tokens = cfg.summary_max_tokens
|
|
30
|
+
self._summarizer = summarizer
|
|
31
|
+
|
|
32
|
+
def process(
|
|
33
|
+
self, messages: list[Message],
|
|
34
|
+
) -> tuple[list[Message], list[tuple[int, int]]]:
|
|
35
|
+
"""Run the three-pass pipeline. Returns (processed_messages, compressed_ranges)."""
|
|
36
|
+
classified = self._classify(messages)
|
|
37
|
+
classified = self._summarize(classified)
|
|
38
|
+
return self._prune(classified)
|
|
39
|
+
|
|
40
|
+
# ------------------------------------------------------------------
|
|
41
|
+
# Pass 1: Classify
|
|
42
|
+
# ------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
def _classify(self, messages: list[Message]) -> list[ClassifiedMessage]:
|
|
45
|
+
n = len(messages)
|
|
46
|
+
classified: list[ClassifiedMessage] = []
|
|
47
|
+
|
|
48
|
+
# Count conversational turns (user/assistant pairs) from the end
|
|
49
|
+
# to determine "recent" boundary
|
|
50
|
+
turn_count = 0
|
|
51
|
+
recent_boundary = self._pin_top # default: everything after pinned is recent
|
|
52
|
+
for i in range(n - 1, -1, -1):
|
|
53
|
+
if messages[i].role in ("user", "assistant"):
|
|
54
|
+
turn_count += 1
|
|
55
|
+
if turn_count >= self._recent_turns * 2: # 2 messages per turn
|
|
56
|
+
recent_boundary = i
|
|
57
|
+
break
|
|
58
|
+
|
|
59
|
+
for i, msg in enumerate(messages):
|
|
60
|
+
if i < self._pin_top:
|
|
61
|
+
bucket = Bucket.PINNED
|
|
62
|
+
elif i >= recent_boundary:
|
|
63
|
+
bucket = Bucket.RECENT
|
|
64
|
+
else:
|
|
65
|
+
# Check age (by turn count from start)
|
|
66
|
+
turns_from_start = sum(
|
|
67
|
+
1 for m in messages[:i + 1]
|
|
68
|
+
if m.role in ("user", "assistant")
|
|
69
|
+
) // 2
|
|
70
|
+
if turns_from_start > self._prune_older_than:
|
|
71
|
+
bucket = Bucket.PRUNABLE
|
|
72
|
+
else:
|
|
73
|
+
bucket = Bucket.COMPRESSIBLE
|
|
74
|
+
|
|
75
|
+
classified.append(ClassifiedMessage(
|
|
76
|
+
message=msg,
|
|
77
|
+
index=i,
|
|
78
|
+
bucket=bucket,
|
|
79
|
+
))
|
|
80
|
+
|
|
81
|
+
# Superseded detection
|
|
82
|
+
if self._prune_if_superseded:
|
|
83
|
+
classified = self._mark_superseded(classified)
|
|
84
|
+
|
|
85
|
+
return classified
|
|
86
|
+
|
|
87
|
+
def _mark_superseded(
|
|
88
|
+
self, classified: list[ClassifiedMessage],
|
|
89
|
+
) -> list[ClassifiedMessage]:
|
|
90
|
+
"""Mark messages as prunable if superseded by later content."""
|
|
91
|
+
# Track tool_call_ids: if a later tool result shares the same
|
|
92
|
+
# tool_call_id pattern (same tool called again), earlier is superseded
|
|
93
|
+
tool_results: dict[str, int] = {} # name -> latest index
|
|
94
|
+
for cm in classified:
|
|
95
|
+
if cm.message.role == "tool" and cm.message.name:
|
|
96
|
+
prev = tool_results.get(cm.message.name)
|
|
97
|
+
if prev is not None:
|
|
98
|
+
# Mark the earlier one as prunable
|
|
99
|
+
if classified[prev].bucket == Bucket.COMPRESSIBLE:
|
|
100
|
+
classified[prev] = ClassifiedMessage(
|
|
101
|
+
message=classified[prev].message,
|
|
102
|
+
index=classified[prev].index,
|
|
103
|
+
bucket=Bucket.PRUNABLE,
|
|
104
|
+
)
|
|
105
|
+
tool_results[cm.message.name] = cm.index
|
|
106
|
+
|
|
107
|
+
# Mark user messages that are corrections of earlier messages
|
|
108
|
+
for cm in classified:
|
|
109
|
+
if cm.bucket != Bucket.COMPRESSIBLE or cm.message.role != "user":
|
|
110
|
+
continue
|
|
111
|
+
for pat in _CORRECTION_PATTERNS:
|
|
112
|
+
if pat.search(cm.message.content):
|
|
113
|
+
# The message this corrects is likely the previous user message
|
|
114
|
+
# in the compressible zone — mark it prunable
|
|
115
|
+
for prev in reversed(classified[:cm.index]):
|
|
116
|
+
if prev.message.role == "user" and prev.bucket == Bucket.COMPRESSIBLE:
|
|
117
|
+
classified[prev.index] = ClassifiedMessage(
|
|
118
|
+
message=prev.message,
|
|
119
|
+
index=prev.index,
|
|
120
|
+
bucket=Bucket.PRUNABLE,
|
|
121
|
+
)
|
|
122
|
+
break
|
|
123
|
+
break
|
|
124
|
+
|
|
125
|
+
return classified
|
|
126
|
+
|
|
127
|
+
# ------------------------------------------------------------------
|
|
128
|
+
# Pass 2: Summarize
|
|
129
|
+
# ------------------------------------------------------------------
|
|
130
|
+
|
|
131
|
+
def _summarize(
|
|
132
|
+
self, classified: list[ClassifiedMessage],
|
|
133
|
+
) -> list[ClassifiedMessage]:
|
|
134
|
+
"""Summarize compressible messages. Without a summarizer, moves them to prunable."""
|
|
135
|
+
if self._summarizer is None:
|
|
136
|
+
# No summarizer provided — compressible becomes prunable
|
|
137
|
+
return [
|
|
138
|
+
ClassifiedMessage(
|
|
139
|
+
message=cm.message,
|
|
140
|
+
index=cm.index,
|
|
141
|
+
bucket=Bucket.PRUNABLE if cm.bucket == Bucket.COMPRESSIBLE else cm.bucket,
|
|
142
|
+
)
|
|
143
|
+
for cm in classified
|
|
144
|
+
]
|
|
145
|
+
|
|
146
|
+
# Group consecutive compressible messages for batch summarization
|
|
147
|
+
result: list[ClassifiedMessage] = []
|
|
148
|
+
i = 0
|
|
149
|
+
while i < len(classified):
|
|
150
|
+
cm = classified[i]
|
|
151
|
+
if cm.bucket != Bucket.COMPRESSIBLE:
|
|
152
|
+
result.append(cm)
|
|
153
|
+
i += 1
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
# Collect consecutive compressible block
|
|
157
|
+
block: list[ClassifiedMessage] = []
|
|
158
|
+
while i < len(classified) and classified[i].bucket == Bucket.COMPRESSIBLE:
|
|
159
|
+
block.append(classified[i])
|
|
160
|
+
i += 1
|
|
161
|
+
|
|
162
|
+
# Build text to summarize
|
|
163
|
+
text_parts = [f"[{m.message.role}]: {m.message.content}" for m in block]
|
|
164
|
+
combined = "\n".join(text_parts)
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
summary = self._summarizer(combined)
|
|
168
|
+
except Exception:
|
|
169
|
+
# If summarizer fails, keep original messages
|
|
170
|
+
result.extend(block)
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
# Replace block with a single summarized assistant message
|
|
174
|
+
summary_msg = Message(
|
|
175
|
+
role="assistant",
|
|
176
|
+
content=f"[Summary of turns {block[0].index}-{block[-1].index}]: {summary}",
|
|
177
|
+
metadata={"statis_kit_summary": True},
|
|
178
|
+
)
|
|
179
|
+
result.append(ClassifiedMessage(
|
|
180
|
+
message=summary_msg,
|
|
181
|
+
index=block[0].index,
|
|
182
|
+
bucket=Bucket.RECENT, # Keep summaries
|
|
183
|
+
))
|
|
184
|
+
|
|
185
|
+
return result
|
|
186
|
+
|
|
187
|
+
# ------------------------------------------------------------------
|
|
188
|
+
# Pass 3: Prune
|
|
189
|
+
# ------------------------------------------------------------------
|
|
190
|
+
|
|
191
|
+
def _prune(
|
|
192
|
+
self, classified: list[ClassifiedMessage],
|
|
193
|
+
) -> tuple[list[Message], list[tuple[int, int]]]:
|
|
194
|
+
"""Drop prunable messages. Returns (messages, compressed_ranges)."""
|
|
195
|
+
output: list[Message] = []
|
|
196
|
+
compressed_ranges: list[tuple[int, int]] = []
|
|
197
|
+
|
|
198
|
+
# Track consecutive pruned ranges
|
|
199
|
+
prune_start: Optional[int] = None
|
|
200
|
+
|
|
201
|
+
for cm in classified:
|
|
202
|
+
if cm.bucket == Bucket.PRUNABLE:
|
|
203
|
+
if prune_start is None:
|
|
204
|
+
prune_start = cm.index
|
|
205
|
+
else:
|
|
206
|
+
if prune_start is not None:
|
|
207
|
+
compressed_ranges.append((prune_start, cm.index - 1))
|
|
208
|
+
prune_start = None
|
|
209
|
+
output.append(cm.message)
|
|
210
|
+
|
|
211
|
+
# Close any trailing prune range
|
|
212
|
+
if prune_start is not None and classified:
|
|
213
|
+
compressed_ranges.append((prune_start, classified[-1].index))
|
|
214
|
+
|
|
215
|
+
return output, compressed_ranges
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Token counting and cost estimation — tiktoken optional."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Callable, Optional
|
|
5
|
+
|
|
6
|
+
from ._pricing import get_model_pricing, load_pricing
|
|
7
|
+
from ._types import CostEstimate, MeterConfig, Message, TurnCost
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _get_counter(model: str) -> Callable[[str], int]:
|
|
11
|
+
"""Return a token-counting function. Uses tiktoken when available."""
|
|
12
|
+
try:
|
|
13
|
+
import tiktoken # type: ignore[import]
|
|
14
|
+
|
|
15
|
+
# Map model names to tiktoken encodings
|
|
16
|
+
try:
|
|
17
|
+
enc = tiktoken.encoding_for_model(model)
|
|
18
|
+
except KeyError:
|
|
19
|
+
enc = tiktoken.get_encoding("cl100k_base")
|
|
20
|
+
|
|
21
|
+
def _count(text: str) -> int:
|
|
22
|
+
return len(enc.encode(text))
|
|
23
|
+
|
|
24
|
+
return _count
|
|
25
|
+
except ImportError:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
# Fallback: chars / 4 (rough approximation for English text)
|
|
29
|
+
def _fallback(text: str) -> int:
|
|
30
|
+
return max(1, len(text) // 4)
|
|
31
|
+
|
|
32
|
+
return _fallback
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CostMeter:
|
|
36
|
+
def __init__(self, config: Optional[MeterConfig] = None) -> None:
|
|
37
|
+
cfg = config or MeterConfig()
|
|
38
|
+
self._model = cfg.model
|
|
39
|
+
self._pricing = load_pricing(cfg.pricing_path)
|
|
40
|
+
self._on_turn = cfg.on_turn
|
|
41
|
+
self._counter = _get_counter(cfg.model)
|
|
42
|
+
|
|
43
|
+
def count_tokens(self, text: str) -> int:
|
|
44
|
+
"""Count tokens in a single string."""
|
|
45
|
+
return self._counter(text)
|
|
46
|
+
|
|
47
|
+
def count_messages(
|
|
48
|
+
self, messages: list[Message],
|
|
49
|
+
) -> tuple[int, list[TurnCost]]:
|
|
50
|
+
"""Count tokens per turn and session total.
|
|
51
|
+
|
|
52
|
+
Returns (total_tokens, per_turn_costs).
|
|
53
|
+
"""
|
|
54
|
+
input_per_1k, _ = get_model_pricing(self._model, self._pricing)
|
|
55
|
+
per_turn: list[TurnCost] = []
|
|
56
|
+
total = 0
|
|
57
|
+
|
|
58
|
+
for i, msg in enumerate(messages):
|
|
59
|
+
tokens = self._counter(msg.content)
|
|
60
|
+
cost = (tokens / 1000.0) * input_per_1k
|
|
61
|
+
tc = TurnCost(
|
|
62
|
+
turn_index=i,
|
|
63
|
+
role=msg.role,
|
|
64
|
+
tokens=tokens,
|
|
65
|
+
cost_usd=cost,
|
|
66
|
+
)
|
|
67
|
+
per_turn.append(tc)
|
|
68
|
+
total += tokens
|
|
69
|
+
|
|
70
|
+
if self._on_turn is not None:
|
|
71
|
+
self._on_turn(tc)
|
|
72
|
+
|
|
73
|
+
return total, per_turn
|
|
74
|
+
|
|
75
|
+
def estimate_cost(
|
|
76
|
+
self,
|
|
77
|
+
input_tokens: int,
|
|
78
|
+
output_tokens: int = 0,
|
|
79
|
+
) -> CostEstimate:
|
|
80
|
+
"""Estimate cost based on model pricing table."""
|
|
81
|
+
input_per_1k, output_per_1k = get_model_pricing(
|
|
82
|
+
self._model, self._pricing,
|
|
83
|
+
)
|
|
84
|
+
input_cost = (input_tokens / 1000.0) * input_per_1k
|
|
85
|
+
output_cost = (output_tokens / 1000.0) * output_per_1k
|
|
86
|
+
return CostEstimate(
|
|
87
|
+
input_cost_usd=round(input_cost, 8),
|
|
88
|
+
output_cost_usd=round(output_cost, 8),
|
|
89
|
+
total_cost_usd=round(input_cost + output_cost, 8),
|
|
90
|
+
model=self._model,
|
|
91
|
+
)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Pattern-based injection guard — pure local, zero latency budget."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
from ._patterns import BUILTIN_PATTERNS
|
|
8
|
+
from ._types import GuardConfig, GuardDetection, Message
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class GuardHaltError(Exception):
|
|
12
|
+
"""Raised when on_detect='halt' and an injection is found."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, detections: list[GuardDetection]) -> None:
|
|
15
|
+
count = len(detections)
|
|
16
|
+
super().__init__(f"Guard halted: {count} injection(s) detected")
|
|
17
|
+
self.detections = detections
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class GuardResult:
|
|
21
|
+
__slots__ = ("clean", "detections", "messages")
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
clean: bool,
|
|
26
|
+
detections: list[GuardDetection],
|
|
27
|
+
messages: list[Message],
|
|
28
|
+
) -> None:
|
|
29
|
+
self.clean = clean
|
|
30
|
+
self.detections = detections
|
|
31
|
+
self.messages = messages
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Guard:
|
|
35
|
+
def __init__(self, config: Optional[GuardConfig] = None) -> None:
|
|
36
|
+
cfg = config or GuardConfig()
|
|
37
|
+
self._on_detect = cfg.on_detect
|
|
38
|
+
disabled = set(cfg.disabled_categories or [])
|
|
39
|
+
|
|
40
|
+
# Build compiled pattern list
|
|
41
|
+
raw_patterns: list[dict[str, Any]] = []
|
|
42
|
+
for p in BUILTIN_PATTERNS:
|
|
43
|
+
if p["category"] not in disabled:
|
|
44
|
+
raw_patterns.append(p)
|
|
45
|
+
for p in (cfg.extra_patterns or []):
|
|
46
|
+
if p.get("category", "") not in disabled:
|
|
47
|
+
raw_patterns.append(p)
|
|
48
|
+
|
|
49
|
+
self._patterns: list[tuple[str, str, re.Pattern[str]]] = []
|
|
50
|
+
for p in raw_patterns:
|
|
51
|
+
flags = re.IGNORECASE | re.MULTILINE
|
|
52
|
+
self._patterns.append((
|
|
53
|
+
p["id"],
|
|
54
|
+
p["category"],
|
|
55
|
+
re.compile(p["pattern"], flags),
|
|
56
|
+
))
|
|
57
|
+
|
|
58
|
+
def scan(self, messages: list[Message]) -> GuardResult:
|
|
59
|
+
"""Scan all messages for injection patterns."""
|
|
60
|
+
detections: list[GuardDetection] = []
|
|
61
|
+
cleaned: list[Message] = []
|
|
62
|
+
|
|
63
|
+
for i, msg in enumerate(messages):
|
|
64
|
+
# Only scan user and tool messages — system/assistant are trusted
|
|
65
|
+
if msg.role not in ("user", "tool"):
|
|
66
|
+
cleaned.append(msg)
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
msg_detections: list[GuardDetection] = []
|
|
70
|
+
content = msg.content
|
|
71
|
+
|
|
72
|
+
for pat_id, cat, regex in self._patterns:
|
|
73
|
+
for match in regex.finditer(content):
|
|
74
|
+
msg_detections.append(GuardDetection(
|
|
75
|
+
turn_index=i,
|
|
76
|
+
pattern_id=pat_id,
|
|
77
|
+
category=cat,
|
|
78
|
+
matched_text=match.group(),
|
|
79
|
+
action_taken=self._on_detect,
|
|
80
|
+
))
|
|
81
|
+
|
|
82
|
+
if msg_detections:
|
|
83
|
+
detections.extend(msg_detections)
|
|
84
|
+
|
|
85
|
+
if self._on_detect == "halt":
|
|
86
|
+
raise GuardHaltError(detections)
|
|
87
|
+
|
|
88
|
+
# Strip mode: remove matched content
|
|
89
|
+
stripped_content = content
|
|
90
|
+
for det in msg_detections:
|
|
91
|
+
stripped_content = stripped_content.replace(det.matched_text, "")
|
|
92
|
+
stripped_content = re.sub(r"\s{2,}", " ", stripped_content).strip()
|
|
93
|
+
|
|
94
|
+
cleaned.append(Message(
|
|
95
|
+
role=msg.role,
|
|
96
|
+
content=stripped_content,
|
|
97
|
+
name=msg.name,
|
|
98
|
+
tool_call_id=msg.tool_call_id,
|
|
99
|
+
metadata=msg.metadata,
|
|
100
|
+
))
|
|
101
|
+
else:
|
|
102
|
+
cleaned.append(msg)
|
|
103
|
+
|
|
104
|
+
return GuardResult(
|
|
105
|
+
clean=len(detections) == 0,
|
|
106
|
+
detections=detections,
|
|
107
|
+
messages=cleaned,
|
|
108
|
+
)
|
|
File without changes
|