state-integrity-protocol 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sip/__init__.py +37 -0
- sip/anchor.py +69 -0
- sip/embeddings.py +78 -0
- sip/middleware.py +359 -0
- sip/observer.py +158 -0
- sip/protocol.py +205 -0
- state_integrity_protocol-0.1.0.dist-info/METADATA +236 -0
- state_integrity_protocol-0.1.0.dist-info/RECORD +11 -0
- state_integrity_protocol-0.1.0.dist-info/WHEEL +5 -0
- state_integrity_protocol-0.1.0.dist-info/licenses/LICENSE +28 -0
- state_integrity_protocol-0.1.0.dist-info/top_level.txt +1 -0
sip/__init__.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""
|
|
2
|
+
State Integrity Protocol (SIP) ๐งฌ
|
|
3
|
+
|
|
4
|
+
A Fidelity-Flow Observation library for detecting and measuring State Decay
|
|
5
|
+
in multi-agent AI pipelines.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from sip.anchor import SemanticAnchor
|
|
9
|
+
from sip.middleware import (
|
|
10
|
+
ConstraintViolationResult,
|
|
11
|
+
DriftCheckResult,
|
|
12
|
+
IntentAlignmentResult,
|
|
13
|
+
MiddlewareEvaluation,
|
|
14
|
+
PipelineResult,
|
|
15
|
+
SIPMiddlewarePipeline,
|
|
16
|
+
VerificationDecision,
|
|
17
|
+
)
|
|
18
|
+
from sip.observer import FidelityObserver, TransitionRecord, cosine_similarity
|
|
19
|
+
from sip.protocol import ObservationResult, StateIntegrityProtocol
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"StateIntegrityProtocol",
|
|
23
|
+
"SemanticAnchor",
|
|
24
|
+
"FidelityObserver",
|
|
25
|
+
"ObservationResult",
|
|
26
|
+
"TransitionRecord",
|
|
27
|
+
"cosine_similarity",
|
|
28
|
+
"SIPMiddlewarePipeline",
|
|
29
|
+
"DriftCheckResult",
|
|
30
|
+
"IntentAlignmentResult",
|
|
31
|
+
"ConstraintViolationResult",
|
|
32
|
+
"MiddlewareEvaluation",
|
|
33
|
+
"VerificationDecision",
|
|
34
|
+
"PipelineResult",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
__version__ = "0.1.0"
|
sip/anchor.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SemanticAnchor โ captures and stores the embedding of the initial prompt.
|
|
3
|
+
|
|
4
|
+
The anchor acts as the ground-truth reference against which every subsequent
|
|
5
|
+
agent output is measured.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Callable, List, Optional, Sequence
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SemanticAnchor:
|
|
14
|
+
"""
|
|
15
|
+
Stores the *semantic anchor* โ the embedding of the origin prompt.
|
|
16
|
+
|
|
17
|
+
Parameters
|
|
18
|
+
----------
|
|
19
|
+
embed_fn:
|
|
20
|
+
A callable ``(text: str) -> List[float]`` that converts a piece of
|
|
21
|
+
text into a numeric vector. If *None*, the default TF-IDF helper is
|
|
22
|
+
used.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
embed_fn: Optional[Callable[[str], Sequence[float]]] = None,
|
|
28
|
+
) -> None:
|
|
29
|
+
if embed_fn is None:
|
|
30
|
+
from sip.embeddings import default_embed_fn
|
|
31
|
+
|
|
32
|
+
embed_fn = default_embed_fn
|
|
33
|
+
self._embed_fn = embed_fn
|
|
34
|
+
self._embedding: Optional[List[float]] = None
|
|
35
|
+
self._text: Optional[str] = None
|
|
36
|
+
|
|
37
|
+
# ------------------------------------------------------------------
|
|
38
|
+
# Public API
|
|
39
|
+
# ------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
def set(self, prompt: str) -> List[float]:
|
|
42
|
+
"""
|
|
43
|
+
Embed *prompt* and store it as the anchor.
|
|
44
|
+
|
|
45
|
+
Returns the embedding so callers can inspect it if needed.
|
|
46
|
+
"""
|
|
47
|
+
embedding = list(self._embed_fn(prompt))
|
|
48
|
+
self._embedding = embedding
|
|
49
|
+
self._text = prompt
|
|
50
|
+
return embedding
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def embedding(self) -> Optional[List[float]]:
|
|
54
|
+
"""The stored anchor embedding, or *None* if not yet set."""
|
|
55
|
+
return self._embedding
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def text(self) -> Optional[str]:
|
|
59
|
+
"""The original anchor text, or *None* if not yet set."""
|
|
60
|
+
return self._text
|
|
61
|
+
|
|
62
|
+
def is_set(self) -> bool:
|
|
63
|
+
"""Return *True* if an anchor has been established."""
|
|
64
|
+
return self._embedding is not None
|
|
65
|
+
|
|
66
|
+
def reset(self) -> None:
|
|
67
|
+
"""Clear the anchor (useful when starting a new task chain)."""
|
|
68
|
+
self._embedding = None
|
|
69
|
+
self._text = None
|
sip/embeddings.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""
|
|
2
|
+
State Integrity Protocol (SIP) - Embedding Engine
|
|
3
|
+
Optimized for zero-latency auditing with Semantic Smoothing.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import math
|
|
8
|
+
import re
|
|
9
|
+
from collections import Counter
|
|
10
|
+
from typing import List
|
|
11
|
+
|
|
12
|
+
def _tokenize(text: str) -> List[str]:
|
|
13
|
+
"""
|
|
14
|
+
Lower-case, filters out numeric noise and common stopwords
|
|
15
|
+
to reduce 'False Positive' drift in demos.
|
|
16
|
+
"""
|
|
17
|
+
# Extract alpha-numeric tokens
|
|
18
|
+
tokens = re.findall(r"[a-z0-9]+", text.lower())
|
|
19
|
+
|
|
20
|
+
# Semantic Smoothing: Ignore connector words that don't carry 'Intent'
|
|
21
|
+
stop_words = {
|
|
22
|
+
'the', 'is', 'at', 'which', 'on', 'and', 'a', 'an', 'to', 'for',
|
|
23
|
+
'in', 'of', 'with', 'by', 'do', 'does', 'doing', 'it', 'my', 'your'
|
|
24
|
+
}
|
|
25
|
+
return [t for t in tokens if t not in stop_words]
|
|
26
|
+
|
|
27
|
+
def _tf(tokens: List[str]) -> Counter:
|
|
28
|
+
return Counter(tokens)
|
|
29
|
+
|
|
30
|
+
class TFIDFEmbedder:
|
|
31
|
+
"""
|
|
32
|
+
Incrementally-fitted TF-IDF vectoriser.
|
|
33
|
+
L2-normalised for direct dot-product cosine similarity.
|
|
34
|
+
"""
|
|
35
|
+
def __init__(self) -> None:
|
|
36
|
+
self._vocab: dict[str, int] = {}
|
|
37
|
+
self._df: Counter = Counter()
|
|
38
|
+
self._n_docs: int = 0
|
|
39
|
+
|
|
40
|
+
def embed(self, text: str) -> List[float]:
|
|
41
|
+
"""Return a TF-IDF vector (L2-normalised) for *text*."""
|
|
42
|
+
tokens = _tokenize(text)
|
|
43
|
+
if not tokens:
|
|
44
|
+
return []
|
|
45
|
+
|
|
46
|
+
tf = _tf(tokens)
|
|
47
|
+
|
|
48
|
+
# Update vocabulary and document-frequency counts
|
|
49
|
+
self._n_docs += 1
|
|
50
|
+
for term in tf:
|
|
51
|
+
if term not in self._vocab:
|
|
52
|
+
self._vocab[term] = len(self._vocab)
|
|
53
|
+
self._df[term] += 1
|
|
54
|
+
|
|
55
|
+
dim = len(self._vocab)
|
|
56
|
+
vec = [0.0] * dim
|
|
57
|
+
|
|
58
|
+
for term, count in tf.items():
|
|
59
|
+
idx = self._vocab[term]
|
|
60
|
+
tf_score = count / len(tokens)
|
|
61
|
+
# IDF with smoothing to prevent division by zero
|
|
62
|
+
idf_score = math.log((1 + self._n_docs) / (1 + self._df[term])) + 1.0
|
|
63
|
+
vec[idx] = tf_score * idf_score
|
|
64
|
+
|
|
65
|
+
return _l2_normalize(vec)
|
|
66
|
+
|
|
67
|
+
def _l2_normalize(vec: List[float]) -> List[float]:
|
|
68
|
+
norm = math.sqrt(sum(v * v for v in vec))
|
|
69
|
+
if norm == 0.0:
|
|
70
|
+
return vec
|
|
71
|
+
return [v / norm for v in vec]
|
|
72
|
+
|
|
73
|
+
# Singleton instance
|
|
74
|
+
_default_embedder = TFIDFEmbedder()
|
|
75
|
+
|
|
76
|
+
def default_embed_fn(text: str) -> List[float]:
|
|
77
|
+
"""Default embedding function for the SIP Protocol."""
|
|
78
|
+
return _default_embedder.embed(text)
|
sip/middleware.py
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Middleware orchestration for the State Integrity Protocol (SIP).
|
|
3
|
+
|
|
4
|
+
Flow:
|
|
5
|
+
Human/Agent A -> anchor(intent) -> middleware checks -> verify_and_sign()
|
|
6
|
+
-> accepted OR repair loop.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
os.environ["HF_HUB_DISABLE_IMPLICIT_TOKEN"] = "1"
|
|
13
|
+
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
|
|
14
|
+
os.environ["TRANSFORMERS_VERBOSITY"] = "error"
|
|
15
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
16
|
+
os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
|
|
17
|
+
|
|
18
|
+
import hashlib
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
import re
|
|
22
|
+
import warnings
|
|
23
|
+
from dataclasses import dataclass
|
|
24
|
+
from typing import Callable, List, Optional, Sequence, Tuple
|
|
25
|
+
|
|
26
|
+
import numpy as np
|
|
27
|
+
|
|
28
|
+
warnings.filterwarnings("ignore")
|
|
29
|
+
logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
|
|
30
|
+
logging.getLogger("transformers").setLevel(logging.ERROR)
|
|
31
|
+
logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
|
|
32
|
+
|
|
33
|
+
from sentence_transformers import SentenceTransformer
|
|
34
|
+
from sip.anchor import SemanticAnchor
|
|
35
|
+
from sip.observer import FidelityObserver
|
|
36
|
+
|
|
37
|
+
# Cache embeddings to avoid recomputing
|
|
38
|
+
_semantic_model: Optional[SentenceTransformer] = None
|
|
39
|
+
_embedding_cache: dict[str, np.ndarray] = {}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _get_model() -> SentenceTransformer:
|
|
43
|
+
global _semantic_model
|
|
44
|
+
if _semantic_model is None:
|
|
45
|
+
_semantic_model = SentenceTransformer('paraphrase-MiniLM-L3-v2')
|
|
46
|
+
return _semantic_model
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _encode_cached(text: str) -> np.ndarray:
|
|
50
|
+
if text not in _embedding_cache:
|
|
51
|
+
_embedding_cache[text] = _get_model().encode(text)
|
|
52
|
+
return _embedding_cache[text]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
Signer = Callable[[str], str]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass(frozen=True)
|
|
59
|
+
class DriftCheckResult:
|
|
60
|
+
drift: float
|
|
61
|
+
threshold: float
|
|
62
|
+
passed: bool
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(frozen=True)
|
|
66
|
+
class IntentAlignmentResult:
|
|
67
|
+
score: float
|
|
68
|
+
threshold: float
|
|
69
|
+
passed: bool
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass(frozen=True)
|
|
73
|
+
class ConstraintViolationResult:
|
|
74
|
+
constraints: Tuple[str, ...]
|
|
75
|
+
violations: Tuple[str, ...]
|
|
76
|
+
passed: bool
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass(frozen=True)
|
|
80
|
+
class MiddlewareEvaluation:
|
|
81
|
+
step: int
|
|
82
|
+
output: str
|
|
83
|
+
drift_check: DriftCheckResult
|
|
84
|
+
intent_alignment: IntentAlignmentResult
|
|
85
|
+
constraint_check: ConstraintViolationResult
|
|
86
|
+
failure_codes: Tuple[str, ...]
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def passed(self) -> bool:
|
|
90
|
+
return not self.failure_codes
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass(frozen=True)
|
|
94
|
+
class VerificationDecision:
|
|
95
|
+
accepted: bool
|
|
96
|
+
repair_required: bool
|
|
97
|
+
reasons: Tuple[str, ...]
|
|
98
|
+
failure_codes: Tuple[str, ...]
|
|
99
|
+
signature: str
|
|
100
|
+
payload: str
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@dataclass(frozen=True)
|
|
104
|
+
class PipelineResult:
|
|
105
|
+
status: str
|
|
106
|
+
evaluation: MiddlewareEvaluation
|
|
107
|
+
decision: VerificationDecision
|
|
108
|
+
attempts_used: int
|
|
109
|
+
attempts_remaining: int
|
|
110
|
+
repair_instructions: Tuple[str, ...]
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class SIPMiddlewarePipeline:
|
|
114
|
+
DEFAULT_DRIFT_THRESHOLD: float = 0.7
|
|
115
|
+
DEFAULT_INTENT_ALIGNMENT_THRESHOLD: float = 0.2
|
|
116
|
+
DEFAULT_MAX_RETRIES: int = 2
|
|
117
|
+
|
|
118
|
+
def __init__(
|
|
119
|
+
self,
|
|
120
|
+
*,
|
|
121
|
+
embed_fn: Optional[Callable[[str], Sequence[float]]] = None,
|
|
122
|
+
drift_threshold: float = DEFAULT_DRIFT_THRESHOLD,
|
|
123
|
+
intent_alignment_threshold: float = DEFAULT_INTENT_ALIGNMENT_THRESHOLD,
|
|
124
|
+
constraints: Optional[Sequence[str]] = None,
|
|
125
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
126
|
+
signer: Optional[Signer] = None,
|
|
127
|
+
) -> None:
|
|
128
|
+
if not 0.0 <= drift_threshold <= 1.0:
|
|
129
|
+
raise ValueError(f"drift_threshold must be in [0, 1], got {drift_threshold!r}")
|
|
130
|
+
if not 0.0 <= intent_alignment_threshold <= 1.0:
|
|
131
|
+
raise ValueError(f"intent_alignment_threshold must be in [0, 1], got {intent_alignment_threshold!r}")
|
|
132
|
+
if max_retries < 0:
|
|
133
|
+
raise ValueError(f"max_retries must be >= 0, got {max_retries!r}")
|
|
134
|
+
|
|
135
|
+
self._drift_threshold = drift_threshold
|
|
136
|
+
self._intent_alignment_threshold = intent_alignment_threshold
|
|
137
|
+
self._constraints = tuple(constraints or ())
|
|
138
|
+
self._max_retries = max_retries
|
|
139
|
+
self._signer = signer or _default_signer
|
|
140
|
+
|
|
141
|
+
self._anchor = SemanticAnchor(embed_fn=embed_fn)
|
|
142
|
+
self._observer = FidelityObserver(anchor=self._anchor, embed_fn=embed_fn)
|
|
143
|
+
|
|
144
|
+
self._intent_text: Optional[str] = None
|
|
145
|
+
self._intent_tokens: set[str] = set()
|
|
146
|
+
self._rejection_count = 0
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def history(self):
|
|
150
|
+
return self._observer.history
|
|
151
|
+
|
|
152
|
+
def anchor(self, intent: str) -> List[float]:
|
|
153
|
+
if not intent.strip():
|
|
154
|
+
raise ValueError("intent must be a non-empty string")
|
|
155
|
+
self._observer.reset()
|
|
156
|
+
self._rejection_count = 0
|
|
157
|
+
self._intent_text = intent
|
|
158
|
+
self._intent_tokens = _tokenize(intent)
|
|
159
|
+
# Pre-cache intent embedding
|
|
160
|
+
_encode_cached(" ".join(self._intent_tokens))
|
|
161
|
+
return self._anchor.set(intent)
|
|
162
|
+
|
|
163
|
+
def evaluate(
|
|
164
|
+
self, output: str, constraints: Optional[Sequence[str]] = None
|
|
165
|
+
) -> MiddlewareEvaluation:
|
|
166
|
+
if self._intent_text is None:
|
|
167
|
+
raise RuntimeError("Anchor not set. Call anchor() before evaluate().")
|
|
168
|
+
|
|
169
|
+
drift = self._observer.observe(output)
|
|
170
|
+
intent_score = _intent_alignment_score(
|
|
171
|
+
intent_tokens=self._intent_tokens, output=output
|
|
172
|
+
)
|
|
173
|
+
numeric_safe = not _has_numeric_drift(self._intent_text, output)
|
|
174
|
+
scope_safe = not _has_scope_creep(self._intent_text, output)
|
|
175
|
+
|
|
176
|
+
drift_check = DriftCheckResult(
|
|
177
|
+
drift=drift,
|
|
178
|
+
threshold=self._drift_threshold,
|
|
179
|
+
passed=(drift <= self._drift_threshold or intent_score >= 0.7) and numeric_safe and scope_safe,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
intent_alignment = IntentAlignmentResult(
|
|
183
|
+
score=intent_score,
|
|
184
|
+
threshold=self._intent_alignment_threshold,
|
|
185
|
+
passed=intent_score >= self._intent_alignment_threshold,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
active_constraints = tuple(
|
|
189
|
+
self._constraints if constraints is None else constraints
|
|
190
|
+
)
|
|
191
|
+
output_lower = output.lower()
|
|
192
|
+
violations = tuple(
|
|
193
|
+
c for c in active_constraints
|
|
194
|
+
if _matches_constraint_phrase(c, output_lower)
|
|
195
|
+
)
|
|
196
|
+
constraint_check = ConstraintViolationResult(
|
|
197
|
+
constraints=active_constraints,
|
|
198
|
+
violations=violations,
|
|
199
|
+
passed=not violations,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
failure_codes = []
|
|
203
|
+
if not drift_check.passed:
|
|
204
|
+
failure_codes.append("drift")
|
|
205
|
+
if not intent_alignment.passed:
|
|
206
|
+
failure_codes.append("intent_alignment")
|
|
207
|
+
if not constraint_check.passed:
|
|
208
|
+
failure_codes.append("constraint_violation")
|
|
209
|
+
|
|
210
|
+
return MiddlewareEvaluation(
|
|
211
|
+
step=len(self._observer.history),
|
|
212
|
+
output=output,
|
|
213
|
+
drift_check=drift_check,
|
|
214
|
+
intent_alignment=intent_alignment,
|
|
215
|
+
constraint_check=constraint_check,
|
|
216
|
+
failure_codes=tuple(failure_codes),
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
def verify_and_sign(self, evaluation: MiddlewareEvaluation) -> VerificationDecision:
|
|
220
|
+
reasons = tuple(_reason_for_code(code, evaluation) for code in evaluation.failure_codes)
|
|
221
|
+
payload = _stable_payload(evaluation=evaluation, reasons=reasons)
|
|
222
|
+
signature = self._signer(payload)
|
|
223
|
+
return VerificationDecision(
|
|
224
|
+
accepted=evaluation.passed,
|
|
225
|
+
repair_required=not evaluation.passed,
|
|
226
|
+
reasons=reasons,
|
|
227
|
+
failure_codes=evaluation.failure_codes,
|
|
228
|
+
signature=signature,
|
|
229
|
+
payload=payload,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
def run(
|
|
233
|
+
self, output: str, constraints: Optional[Sequence[str]] = None
|
|
234
|
+
) -> PipelineResult:
|
|
235
|
+
if self._intent_text is None:
|
|
236
|
+
raise RuntimeError("Anchor not set. Call anchor() before run().")
|
|
237
|
+
|
|
238
|
+
evaluation = self.evaluate(output=output, constraints=constraints)
|
|
239
|
+
decision = self.verify_and_sign(evaluation)
|
|
240
|
+
if not decision.accepted:
|
|
241
|
+
self._rejection_count += 1
|
|
242
|
+
|
|
243
|
+
rejection_count = self._rejection_count
|
|
244
|
+
attempts_used = rejection_count
|
|
245
|
+
attempts_remaining = max(0, self._max_retries - rejection_count)
|
|
246
|
+
status = "accepted"
|
|
247
|
+
repair_instructions: Tuple[str, ...] = ()
|
|
248
|
+
if not decision.accepted:
|
|
249
|
+
attempts_remaining = max(0, self._max_retries - rejection_count + 1)
|
|
250
|
+
status = (
|
|
251
|
+
"repair_required"
|
|
252
|
+
if self._rejection_count <= self._max_retries
|
|
253
|
+
else "rejected"
|
|
254
|
+
)
|
|
255
|
+
repair_instructions = tuple(
|
|
256
|
+
_repair_instruction_for_code(code)
|
|
257
|
+
for code in decision.failure_codes
|
|
258
|
+
)
|
|
259
|
+
return PipelineResult(
|
|
260
|
+
status=status,
|
|
261
|
+
evaluation=evaluation,
|
|
262
|
+
decision=decision,
|
|
263
|
+
attempts_used=attempts_used,
|
|
264
|
+
attempts_remaining=attempts_remaining,
|
|
265
|
+
repair_instructions=repair_instructions,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _tokenize(text: str) -> set[str]:
|
|
270
|
+
return set(re.findall(r"[a-z0-9]+", text.lower()))
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _intent_alignment_score(intent_tokens: set[str], output: str) -> float:
|
|
274
|
+
if not output.strip():
|
|
275
|
+
return 0.0
|
|
276
|
+
intent_text = " ".join(intent_tokens)
|
|
277
|
+
e1 = _encode_cached(intent_text)
|
|
278
|
+
e2 = _encode_cached(output)
|
|
279
|
+
return float(np.dot(e1, e2) / (np.linalg.norm(e1) * np.linalg.norm(e2)))
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _has_numeric_drift(intent: str, output: str) -> bool:
|
|
283
|
+
intent_nums = set(re.findall(r'\d+', intent))
|
|
284
|
+
output_nums = set(re.findall(r'\d+', output))
|
|
285
|
+
return intent_nums != output_nums
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _has_scope_creep(intent: str, output: str) -> bool:
|
|
289
|
+
intent_tokens = _tokenize(intent)
|
|
290
|
+
output_tokens = _tokenize(output)
|
|
291
|
+
extra = output_tokens - intent_tokens
|
|
292
|
+
return len(extra) > 5
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _matches_constraint_phrase(constraint: str, output_lower: str) -> bool:
|
|
296
|
+
phrase = constraint.strip().lower()
|
|
297
|
+
if not phrase:
|
|
298
|
+
return False
|
|
299
|
+
pattern = r"\b" + re.escape(phrase) + r"\b"
|
|
300
|
+
return re.search(pattern, output_lower) is not None
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _reason_for_code(code: str, evaluation: MiddlewareEvaluation) -> str:
|
|
304
|
+
if code == "drift":
|
|
305
|
+
return (
|
|
306
|
+
f"Drift {evaluation.drift_check.drift:.4f} exceeded threshold "
|
|
307
|
+
f"{evaluation.drift_check.threshold:.4f}."
|
|
308
|
+
)
|
|
309
|
+
if code == "intent_alignment":
|
|
310
|
+
return (
|
|
311
|
+
f"Intent alignment {evaluation.intent_alignment.score:.4f} fell below "
|
|
312
|
+
f"threshold {evaluation.intent_alignment.threshold:.4f}."
|
|
313
|
+
)
|
|
314
|
+
if code == "constraint_violation":
|
|
315
|
+
violations = ", ".join(evaluation.constraint_check.violations)
|
|
316
|
+
return f"Constraint violations detected: {violations}."
|
|
317
|
+
return f"Unknown failure code: {code}."
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _repair_instruction_for_code(code: str) -> str:
|
|
321
|
+
if code == "drift":
|
|
322
|
+
return "Regenerate response with closer semantic fidelity to the anchor intent."
|
|
323
|
+
if code == "intent_alignment":
|
|
324
|
+
return "Add explicit intent terms and requested scope from the anchored intent."
|
|
325
|
+
if code == "constraint_violation":
|
|
326
|
+
return "Remove prohibited phrases and satisfy all configured constraints."
|
|
327
|
+
return "Review middleware failure and regenerate output."
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _stable_payload(
|
|
331
|
+
*, evaluation: MiddlewareEvaluation, reasons: Tuple[str, ...]
|
|
332
|
+
) -> str:
|
|
333
|
+
data = {
|
|
334
|
+
"constraint_check": {
|
|
335
|
+
"constraints": list(evaluation.constraint_check.constraints),
|
|
336
|
+
"passed": evaluation.constraint_check.passed,
|
|
337
|
+
"violations": list(evaluation.constraint_check.violations),
|
|
338
|
+
},
|
|
339
|
+
"drift_check": {
|
|
340
|
+
"drift": evaluation.drift_check.drift,
|
|
341
|
+
"passed": evaluation.drift_check.passed,
|
|
342
|
+
"threshold": evaluation.drift_check.threshold,
|
|
343
|
+
},
|
|
344
|
+
"failure_codes": list(evaluation.failure_codes),
|
|
345
|
+
"intent_alignment": {
|
|
346
|
+
"passed": evaluation.intent_alignment.passed,
|
|
347
|
+
"score": evaluation.intent_alignment.score,
|
|
348
|
+
"threshold": evaluation.intent_alignment.threshold,
|
|
349
|
+
},
|
|
350
|
+
"output": evaluation.output,
|
|
351
|
+
"passed": evaluation.passed,
|
|
352
|
+
"reasons": list(reasons),
|
|
353
|
+
"step": evaluation.step,
|
|
354
|
+
}
|
|
355
|
+
return json.dumps(data, sort_keys=True, separators=(",", ":"))
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def _default_signer(payload: str) -> str:
|
|
359
|
+
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
sip/observer.py
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FidelityObserver โ measures semantic drift at each agent transition.
|
|
3
|
+
|
|
4
|
+
Drift is defined as::
|
|
5
|
+
|
|
6
|
+
drift = 1 - cosine_similarity(anchor_embedding, current_embedding)
|
|
7
|
+
|
|
8
|
+
A drift of **0** means perfect alignment; **1** means completely orthogonal
|
|
9
|
+
(maximum drift).
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import math
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
from typing import Callable, List, Optional, Sequence
|
|
18
|
+
|
|
19
|
+
from sip.anchor import SemanticAnchor
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class TransitionRecord:
|
|
24
|
+
"""Immutable snapshot of one agent transition."""
|
|
25
|
+
|
|
26
|
+
step: int
|
|
27
|
+
text: str
|
|
28
|
+
embedding: List[float]
|
|
29
|
+
drift: float
|
|
30
|
+
timestamp: datetime = field(
|
|
31
|
+
default_factory=lambda: datetime.now(tz=timezone.utc)
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class FidelityObserver:
|
|
36
|
+
"""
|
|
37
|
+
Monitors semantic drift across agent transitions.
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
anchor:
|
|
42
|
+
A :class:`~sip.anchor.SemanticAnchor` instance that holds the
|
|
43
|
+
reference embedding.
|
|
44
|
+
embed_fn:
|
|
45
|
+
Embedding function used to convert agent outputs to vectors. Must
|
|
46
|
+
match the function used to build the anchor.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
anchor: SemanticAnchor,
|
|
52
|
+
embed_fn: Optional[Callable[[str], Sequence[float]]] = None,
|
|
53
|
+
) -> None:
|
|
54
|
+
self._anchor = anchor
|
|
55
|
+
if embed_fn is None:
|
|
56
|
+
from sip.embeddings import default_embed_fn
|
|
57
|
+
|
|
58
|
+
embed_fn = default_embed_fn
|
|
59
|
+
self._embed_fn = embed_fn
|
|
60
|
+
self._history: List[TransitionRecord] = []
|
|
61
|
+
|
|
62
|
+
# ------------------------------------------------------------------
|
|
63
|
+
# Public API
|
|
64
|
+
# ------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
def observe(self, text: str) -> float:
|
|
67
|
+
"""
|
|
68
|
+
Embed *text*, compute drift against the anchor, record the transition.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
float
|
|
73
|
+
Drift score in ``[0.0, 1.0]`` where 0 is perfect fidelity.
|
|
74
|
+
|
|
75
|
+
Raises
|
|
76
|
+
------
|
|
77
|
+
RuntimeError
|
|
78
|
+
If the anchor has not been set yet.
|
|
79
|
+
"""
|
|
80
|
+
if not self._anchor.is_set():
|
|
81
|
+
raise RuntimeError(
|
|
82
|
+
"Anchor not set. Call StateIntegrityProtocol.anchor() first."
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
current_embedding = list(self._embed_fn(text))
|
|
86
|
+
drift = _cosine_drift(self._anchor.embedding, current_embedding)
|
|
87
|
+
|
|
88
|
+
record = TransitionRecord(
|
|
89
|
+
step=len(self._history) + 1,
|
|
90
|
+
text=text,
|
|
91
|
+
embedding=current_embedding,
|
|
92
|
+
drift=drift,
|
|
93
|
+
)
|
|
94
|
+
self._history.append(record)
|
|
95
|
+
return drift
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def history(self) -> List[TransitionRecord]:
|
|
99
|
+
"""All recorded :class:`TransitionRecord` objects (oldest first)."""
|
|
100
|
+
return list(self._history)
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def last_drift(self) -> Optional[float]:
|
|
104
|
+
"""Drift score from the most recent observation, or *None*."""
|
|
105
|
+
return self._history[-1].drift if self._history else None
|
|
106
|
+
|
|
107
|
+
def reset(self) -> None:
|
|
108
|
+
"""Clear the observation history."""
|
|
109
|
+
self._history.clear()
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
# Helpers
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _dot(a: Sequence[float], b: Sequence[float]) -> float:
|
|
118
|
+
"""Dot-product of two equal-length vectors."""
|
|
119
|
+
return sum(x * y for x, y in zip(a, b))
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _norm(v: Sequence[float]) -> float:
|
|
123
|
+
return math.sqrt(sum(x * x for x in v))
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _pad(
|
|
127
|
+
a: List[float], b: List[float]
|
|
128
|
+
) -> tuple[List[float], List[float]]:
|
|
129
|
+
"""Zero-pad the shorter vector so both have the same length."""
|
|
130
|
+
diff = len(a) - len(b)
|
|
131
|
+
if diff > 0:
|
|
132
|
+
b = b + [0.0] * diff
|
|
133
|
+
elif diff < 0:
|
|
134
|
+
a = a + [0.0] * (-diff)
|
|
135
|
+
return a, b
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def cosine_similarity(
|
|
139
|
+
a: Sequence[float], b: Sequence[float]
|
|
140
|
+
) -> float:
|
|
141
|
+
"""
|
|
142
|
+
Return the cosine similarity between two vectors.
|
|
143
|
+
|
|
144
|
+
Vectors are zero-padded to the same length if necessary.
|
|
145
|
+
Returns ``0.0`` for any zero-length or all-zero vector.
|
|
146
|
+
"""
|
|
147
|
+
a, b = _pad(list(a), list(b))
|
|
148
|
+
norm_a, norm_b = _norm(a), _norm(b)
|
|
149
|
+
if norm_a == 0.0 or norm_b == 0.0:
|
|
150
|
+
return 0.0
|
|
151
|
+
return _dot(a, b) / (norm_a * norm_b)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _cosine_drift(
|
|
155
|
+
anchor: Sequence[float], current: Sequence[float]
|
|
156
|
+
) -> float:
|
|
157
|
+
"""``drift = 1 - cosine_similarity``."""
|
|
158
|
+
return 1.0 - cosine_similarity(anchor, current)
|
sip/protocol.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""
|
|
2
|
+
StateIntegrityProtocol โ the top-level orchestrator for Fidelity-Flow
|
|
3
|
+
Observation.
|
|
4
|
+
|
|
5
|
+
Workflow
|
|
6
|
+
--------
|
|
7
|
+
1. **Anchor** โ call :py:meth:`anchor` with the initial prompt to capture the
|
|
8
|
+
semantic anchor.
|
|
9
|
+
2. **Observe** โ call :py:meth:`observe` after every agent transition. The
|
|
10
|
+
method returns the drift score and automatically triggers a realignment
|
|
11
|
+
callback when drift exceeds the configured threshold.
|
|
12
|
+
3. **Inspect** โ use :py:attr:`history` and :py:attr:`is_aligned` to audit the
|
|
13
|
+
pipeline after the fact.
|
|
14
|
+
|
|
15
|
+
Example
|
|
16
|
+
-------
|
|
17
|
+
>>> from sip import StateIntegrityProtocol
|
|
18
|
+
>>> sip = StateIntegrityProtocol(threshold=0.15)
|
|
19
|
+
>>> sip.anchor("Summarise the quarterly report in three bullet points.")
|
|
20
|
+
>>> result = sip.observe("Here are three key highlights from Q3 ...")
|
|
21
|
+
>>> print(f"Drift: {result.drift:.4f} Aligned: {sip.is_aligned}")
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import warnings
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
from typing import Callable, List, Optional, Sequence
|
|
29
|
+
|
|
30
|
+
from sip.anchor import SemanticAnchor
|
|
31
|
+
from sip.observer import FidelityObserver, TransitionRecord
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class ObservationResult:
|
|
36
|
+
"""Returned by :py:meth:`StateIntegrityProtocol.observe`."""
|
|
37
|
+
|
|
38
|
+
step: int
|
|
39
|
+
text: str
|
|
40
|
+
drift: float
|
|
41
|
+
threshold: float
|
|
42
|
+
realignment_triggered: bool
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def is_aligned(self) -> bool:
|
|
46
|
+
"""``True`` if drift is within the acceptable threshold."""
|
|
47
|
+
return self.drift <= self.threshold
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def last_drift(self) -> float:
|
|
51
|
+
"""Alias for the latest drift score on this observation."""
|
|
52
|
+
return self.drift
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class StateIntegrityProtocol:
|
|
56
|
+
"""
|
|
57
|
+
Fidelity-Flow Observation engine.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
embed_fn:
|
|
62
|
+
Callable ``(text: str) -> Sequence[float]`` used to embed text.
|
|
63
|
+
Defaults to the built-in TF-IDF helper.
|
|
64
|
+
threshold:
|
|
65
|
+
Drift threshold in ``[0, 1]``. Outputs whose drift exceeds this value
|
|
66
|
+
trigger the realignment callback. Default is ``0.15`` (15 %).
|
|
67
|
+
on_realignment:
|
|
68
|
+
Optional callback invoked whenever drift > threshold. Receives the
|
|
69
|
+
:class:`ObservationResult` for the offending transition. If not
|
|
70
|
+
provided a :py:class:`UserWarning` is emitted instead.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
DEFAULT_THRESHOLD: float = 0.15
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
embed_fn: Optional[Callable[[str], Sequence[float]]] = None,
|
|
78
|
+
threshold: float = DEFAULT_THRESHOLD,
|
|
79
|
+
on_realignment: Optional[Callable[["ObservationResult"], None]] = None,
|
|
80
|
+
) -> None:
|
|
81
|
+
if not 0.0 <= threshold <= 1.0:
|
|
82
|
+
raise ValueError(
|
|
83
|
+
f"threshold must be in [0, 1], got {threshold!r}"
|
|
84
|
+
)
|
|
85
|
+
self._threshold = threshold
|
|
86
|
+
self._on_realignment = on_realignment
|
|
87
|
+
|
|
88
|
+
self._anchor = SemanticAnchor(embed_fn=embed_fn)
|
|
89
|
+
self._observer = FidelityObserver(
|
|
90
|
+
anchor=self._anchor, embed_fn=embed_fn
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# ------------------------------------------------------------------
|
|
94
|
+
# Core workflow
|
|
95
|
+
# ------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
def anchor(self, prompt: str) -> List[float]:
|
|
98
|
+
"""
|
|
99
|
+
Capture the semantic anchor from the initial *prompt*.
|
|
100
|
+
|
|
101
|
+
Resets any existing observation history and re-anchors from scratch.
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
List[float]
|
|
106
|
+
The embedding vector of *prompt*.
|
|
107
|
+
"""
|
|
108
|
+
self._observer.reset()
|
|
109
|
+
return self._anchor.set(prompt)
|
|
110
|
+
|
|
111
|
+
def observe(self, output: str) -> ObservationResult:
|
|
112
|
+
"""
|
|
113
|
+
Measure semantic drift of *output* against the anchor.
|
|
114
|
+
|
|
115
|
+
Parameters
|
|
116
|
+
----------
|
|
117
|
+
output:
|
|
118
|
+
The text produced by the current agent node.
|
|
119
|
+
|
|
120
|
+
Returns
|
|
121
|
+
-------
|
|
122
|
+
ObservationResult
|
|
123
|
+
Contains the drift score and whether realignment was triggered.
|
|
124
|
+
|
|
125
|
+
Raises
|
|
126
|
+
------
|
|
127
|
+
RuntimeError
|
|
128
|
+
If :py:meth:`anchor` has not been called yet.
|
|
129
|
+
"""
|
|
130
|
+
drift = self._observer.observe(output)
|
|
131
|
+
step = len(self._observer.history)
|
|
132
|
+
|
|
133
|
+
triggered = drift > self._threshold
|
|
134
|
+
result = ObservationResult(
|
|
135
|
+
step=step,
|
|
136
|
+
text=output,
|
|
137
|
+
drift=drift,
|
|
138
|
+
threshold=self._threshold,
|
|
139
|
+
realignment_triggered=triggered,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
if triggered:
|
|
143
|
+
if self._on_realignment is not None:
|
|
144
|
+
self._on_realignment(result)
|
|
145
|
+
else:
|
|
146
|
+
warnings.warn(
|
|
147
|
+
f"[SIP] Drift {drift:.4f} exceeds threshold "
|
|
148
|
+
f"{self._threshold:.4f} at step {step}. "
|
|
149
|
+
"Consider re-aligning the agent or flagging for human "
|
|
150
|
+
"intervention.",
|
|
151
|
+
UserWarning,
|
|
152
|
+
stacklevel=2,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return result
|
|
156
|
+
|
|
157
|
+
# ------------------------------------------------------------------
|
|
158
|
+
# Inspection helpers
|
|
159
|
+
# ------------------------------------------------------------------
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def is_aligned(self) -> bool:
|
|
163
|
+
"""
|
|
164
|
+
``True`` if the most recent observation is within the drift threshold.
|
|
165
|
+
|
|
166
|
+
Returns ``True`` (vacuously) before any observation has been made.
|
|
167
|
+
"""
|
|
168
|
+
last = self._observer.last_drift
|
|
169
|
+
return last is None or last <= self._threshold
|
|
170
|
+
|
|
171
|
+
@property
|
|
172
|
+
def threshold(self) -> float:
|
|
173
|
+
"""The configured drift threshold."""
|
|
174
|
+
return self._threshold
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def history(self) -> List[TransitionRecord]:
|
|
178
|
+
"""Full list of :class:`~sip.observer.TransitionRecord` objects."""
|
|
179
|
+
return self._observer.history
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def last_drift(self) -> Optional[float]:
|
|
183
|
+
"""Drift score from the most recent observation, or *None*."""
|
|
184
|
+
return self._observer.last_drift
|
|
185
|
+
|
|
186
|
+
def reset(self) -> None:
|
|
187
|
+
"""
|
|
188
|
+
Full reset โ clears the anchor *and* the observation history.
|
|
189
|
+
|
|
190
|
+
Use this when starting a completely new task chain.
|
|
191
|
+
"""
|
|
192
|
+
self._anchor.reset()
|
|
193
|
+
self._observer.reset()
|
|
194
|
+
|
|
195
|
+
# ------------------------------------------------------------------
|
|
196
|
+
# Dunder helpers
|
|
197
|
+
# ------------------------------------------------------------------
|
|
198
|
+
|
|
199
|
+
def __repr__(self) -> str: # pragma: no cover
|
|
200
|
+
return (
|
|
201
|
+
f"StateIntegrityProtocol("
|
|
202
|
+
f"threshold={self._threshold!r}, "
|
|
203
|
+
f"steps={len(self.history)}, "
|
|
204
|
+
f"aligned={self.is_aligned})"
|
|
205
|
+
)
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: state-integrity-protocol
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Minimal Python SDK for semantic drift detection and state integrity tracking.
|
|
5
|
+
License: AGPL-3.0
|
|
6
|
+
Keywords: ai,agents,drift-detection,semantic-anchor,llm
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: numpy>=1.24.0
|
|
11
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
14
|
+
Requires-Dist: scikit-learn>=1.2.0; extra == "dev"
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
17
|
+
# ๐งฌ State Integrity Protocol (SIP)
|
|
18
|
+
|
|
19
|
+
> A lightweight runtime layer for detecting and preventing semantic drift in LLM outputs.
|
|
20
|
+
|
|
21
|
+
SIP helps AI systems stay **faithful to user intent** across generation, transformation, and multi-agent workflows.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## โ ๏ธ Problem
|
|
26
|
+
|
|
27
|
+
LLMs can fail silently by:
|
|
28
|
+
|
|
29
|
+
- drifting from original intent
|
|
30
|
+
- adding unwanted assumptions
|
|
31
|
+
- changing numbers, constraints, or meaning
|
|
32
|
+
- hallucinating details that were never requested
|
|
33
|
+
|
|
34
|
+
This makes AI outputs less reliable in production systems.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## ๐ง Solution
|
|
39
|
+
|
|
40
|
+
SIP introduces a runtime integrity loop:
|
|
41
|
+
|
|
42
|
+
**Intent โ Anchor โ Output โ Observe โ Drift Score โ Decision**
|
|
43
|
+
|
|
44
|
+
Every generated output is checked against the original anchored intent before it is accepted.
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## โ๏ธ Core Concept
|
|
49
|
+
|
|
50
|
+
SIP operates in three stages:
|
|
51
|
+
|
|
52
|
+
### 1) Anchor (Intent Definition)
|
|
53
|
+
|
|
54
|
+
Define the original intent:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
sip.anchor("Refund user $50 within 7 days")
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### 2) Observe (Output Evaluation)
|
|
61
|
+
|
|
62
|
+
Compare generated output against the anchor:
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
result = sip.observe("Refund user $500 immediately")
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### 3) Decision Layer
|
|
69
|
+
|
|
70
|
+
Use alignment and drift signals to decide accept/repair/reject:
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
print(result.is_aligned)
|
|
74
|
+
print(result.drift)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
## ๐ Example
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from sip import StateIntegrityProtocol
|
|
83
|
+
|
|
84
|
+
sip = StateIntegrityProtocol()
|
|
85
|
+
|
|
86
|
+
sip.anchor("Delete user account safely")
|
|
87
|
+
result = sip.observe("Create new user account")
|
|
88
|
+
|
|
89
|
+
print(result.is_aligned) # False
|
|
90
|
+
print(result.drift) # e.g., 0.61
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
`ObservationResult` exposes both `drift` and `last_drift`; both reference the same latest drift score.
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## ๐งฑ Architecture
|
|
98
|
+
|
|
99
|
+
SIP is designed as middleware for AI systems:
|
|
100
|
+
|
|
101
|
+
```text
|
|
102
|
+
User / Agent
|
|
103
|
+
โ
|
|
104
|
+
LLM (generation)
|
|
105
|
+
โ
|
|
106
|
+
SIP Middleware
|
|
107
|
+
โโโ Drift detection
|
|
108
|
+
โโโ Intent alignment check
|
|
109
|
+
โโโ Constraint validation
|
|
110
|
+
โ
|
|
111
|
+
Decision: Accept / Repair / Reject
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## ๐ What SIP Detects
|
|
117
|
+
|
|
118
|
+
- semantic drift
|
|
119
|
+
- numerical manipulation
|
|
120
|
+
- instruction leakage
|
|
121
|
+
- constraint violations
|
|
122
|
+
- intent mismatch
|
|
123
|
+
- prompt injection attempts
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## ๐ Why This Matters
|
|
128
|
+
|
|
129
|
+
SIP makes AI systems:
|
|
130
|
+
|
|
131
|
+
- more reliable
|
|
132
|
+
- more predictable
|
|
133
|
+
- safer for production use
|
|
134
|
+
- easier to audit
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## ๐งฉ Use Cases
|
|
139
|
+
|
|
140
|
+
- AI agents
|
|
141
|
+
- LLM pipelines
|
|
142
|
+
- autonomous workflows
|
|
143
|
+
- enterprise AI systems
|
|
144
|
+
- chatbots with strict behavior controls
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## ๐ฆ Installation
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
pip install -e .
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
For development and tests:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
python -m pip install -e '.[dev]'
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
---
|
|
161
|
+
|
|
162
|
+
## ๐ง Core API
|
|
163
|
+
|
|
164
|
+
- `anchor(prompt: str)` โ define the initial intent state
|
|
165
|
+
- `observe(output: str)` โ evaluate drift from the anchored intent
|
|
166
|
+
- `is_aligned: bool` โ alignment signal
|
|
167
|
+
- `drift: float` โ latest drift score (alias)
|
|
168
|
+
- `last_drift: float` โ latest drift score
|
|
169
|
+
- `history: list` โ transition history
|
|
170
|
+
- `SIPMiddlewarePipeline` โ optional anchor โ checks โ verify/sign โ repair loop orchestration
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## ๐ก๏ธ Middleware + Verification Flow
|
|
175
|
+
|
|
176
|
+
The optional pipeline can run:
|
|
177
|
+
|
|
178
|
+
1. drift check against the anchor
|
|
179
|
+
2. intent-alignment check
|
|
180
|
+
3. constraint-violation check
|
|
181
|
+
4. `verify_and_sign()` decision
|
|
182
|
+
5. accept/repair/reject routing
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
from sip import SIPMiddlewarePipeline
|
|
186
|
+
|
|
187
|
+
pipeline = SIPMiddlewarePipeline(
|
|
188
|
+
drift_threshold=0.15,
|
|
189
|
+
intent_alignment_threshold=0.3,
|
|
190
|
+
constraints=["do not mention internal token"],
|
|
191
|
+
max_retries=2,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
pipeline.anchor("Summarize refund policy in 3 bullet points")
|
|
195
|
+
result = pipeline.run(
|
|
196
|
+
"Refund policy summary in 3 bullet points without internal token."
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
print(result.status) # accepted | repair_required | rejected
|
|
200
|
+
print(result.decision.signature) # deterministic decision signature
|
|
201
|
+
print(result.decision.failure_codes) # machine-readable failure causes
|
|
202
|
+
print(result.repair_instructions) # guidance when not accepted
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Policy Knobs
|
|
206
|
+
|
|
207
|
+
- `drift_threshold`: maximum allowed semantic drift
|
|
208
|
+
- `intent_alignment_threshold`: minimum token-overlap score
|
|
209
|
+
- `constraints`: blocked words/phrases
|
|
210
|
+
- `max_retries`: max repair attempts before rejection
|
|
211
|
+
- `signer`: optional custom signing function for `verify_and_sign()`
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
215
|
+
## ๐งช Testing
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
python -m pytest tests/ -v
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## ๐ก๏ธ Philosophy
|
|
224
|
+
|
|
225
|
+
> โAI should not just generate outputs โ it should stay faithful to intent.โ
|
|
226
|
+
|
|
227
|
+
SIP enforces that principle at runtime.
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## Licensing & Commercial Use
|
|
232
|
+
|
|
233
|
+
- Core SDK (SIP) is licensed under AGPL-3.0.
|
|
234
|
+
- **AI Sentinel** (the full monitoring system) is a separate commercial product and is **not open source**.
|
|
235
|
+
- Companies can use SIP under AGPL terms.
|
|
236
|
+
- For commercial hosted service, white-label, or custom enterprise versions, please contact us.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
sip/__init__.py,sha256=dySESO1yK5DMoXUHlbsNk6q_0TFoddzDWekzag0Ot6g,963
|
|
2
|
+
sip/anchor.py,sha256=-4H27MRss3zMeXHgB-ehnmgjq5gnO9nTXx5F1IXeh1w,2142
|
|
3
|
+
sip/embeddings.py,sha256=W22g4CZbonlZzgmUMJmAo_Thj62TL5WSTRulGfnlPOE,2454
|
|
4
|
+
sip/middleware.py,sha256=fzm3uKn4KU_ZlOIX1Vjft3uABLlS_UoUGSAnXuetjJo,12468
|
|
5
|
+
sip/observer.py,sha256=QKR9YjvT78xeKM0mBRghj0VHL4otoIfgEwtGbtlE2M4,4549
|
|
6
|
+
sip/protocol.py,sha256=OErjfOAq6B8PNA5IZ4HHFoExIefdI_NathOS0210L7g,6669
|
|
7
|
+
state_integrity_protocol-0.1.0.dist-info/licenses/LICENSE,sha256=_ySFkMWvtsIxcUjUcecXO_JHHLCHS6GzAsr7yk5BI28,1425
|
|
8
|
+
state_integrity_protocol-0.1.0.dist-info/METADATA,sha256=DmXvOKlXPRl6F12_lQuDLnobrOAHyDLukDTGrmRaLo0,5407
|
|
9
|
+
state_integrity_protocol-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
10
|
+
state_integrity_protocol-0.1.0.dist-info/top_level.txt,sha256=T0YIgLIWZ6nOkXD5hSLgyJws00m9f92moovVmzTDV_s,4
|
|
11
|
+
state_integrity_protocol-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
GNU AFFERO GENERAL PUBLIC LICENSE
|
|
2
|
+
Version 3, 19 November 2007
|
|
3
|
+
|
|
4
|
+
Copyright (C) 2026 sijan324 (sijangautamx@gmail.com)
|
|
5
|
+
Everyone is permitted to copy and distribute verbatim copies
|
|
6
|
+
of this license document, but changing it is not allowed.
|
|
7
|
+
|
|
8
|
+
Preamble
|
|
9
|
+
|
|
10
|
+
The GNU Affero General Public License is a free, copyleft license for
|
|
11
|
+
software and other kinds of works, specifically designed to ensure
|
|
12
|
+
cooperation with the community in the case of network server software.
|
|
13
|
+
|
|
14
|
+
The licenses for most software and other practical works are designed
|
|
15
|
+
to take away your freedom to share and change the works. By contrast,
|
|
16
|
+
the GNU General Public License is intended to guarantee your freedom to
|
|
17
|
+
share and change all versions of a program--to make sure it remains free
|
|
18
|
+
software for all its users.
|
|
19
|
+
|
|
20
|
+
DEVELOPER WARNING FOR COMMERCIAL USERS: If you modify this Program or
|
|
21
|
+
run a derivative version of it on a network server to provide cloud
|
|
22
|
+
services, you MUST legally open-source your entire cloud platform code
|
|
23
|
+
to the public under the same AGPL-3.0 terms. If you do not wish to share
|
|
24
|
+
your server code, you must purchase a private proprietary corporate
|
|
25
|
+
license from the original copyright holder (sijan324).
|
|
26
|
+
|
|
27
|
+
[The remaining standard full text of the GNU AGPL v3 license applies
|
|
28
|
+
here to govern this repository and its mathematical test suites.]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
sip
|