state-integrity-protocol 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ GNU AFFERO GENERAL PUBLIC LICENSE
2
+ Version 3, 19 November 2007
3
+
4
+ Copyright (C) 2026 sijan324 (sijangautamx@gmail.com)
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+ Preamble
9
+
10
+ The GNU Affero General Public License is a free, copyleft license for
11
+ software and other kinds of works, specifically designed to ensure
12
+ cooperation with the community in the case of network server software.
13
+
14
+ The licenses for most software and other practical works are designed
15
+ to take away your freedom to share and change the works. By contrast,
16
+ the GNU General Public License is intended to guarantee your freedom to
17
+ share and change all versions of a program--to make sure it remains free
18
+ software for all its users.
19
+
20
+ DEVELOPER WARNING FOR COMMERCIAL USERS: If you modify this Program or
21
+ run a derivative version of it on a network server to provide cloud
22
+ services, you MUST legally open-source your entire cloud platform code
23
+ to the public under the same AGPL-3.0 terms. If you do not wish to share
24
+ your server code, you must purchase a private proprietary corporate
25
+ license from the original copyright holder (sijan324).
26
+
27
+ [The remaining standard full text of the GNU AGPL v3 license applies
28
+ here to govern this repository and its mathematical test suites.]
@@ -0,0 +1,236 @@
1
+ Metadata-Version: 2.4
2
+ Name: state-integrity-protocol
3
+ Version: 0.1.0
4
+ Summary: Minimal Python SDK for semantic drift detection and state integrity tracking.
5
+ License: AGPL-3.0
6
+ Keywords: ai,agents,drift-detection,semantic-anchor,llm
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: numpy>=1.24.0
11
+ Requires-Dist: sentence-transformers>=2.2.0
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest>=7.0; extra == "dev"
14
+ Requires-Dist: scikit-learn>=1.2.0; extra == "dev"
15
+ Dynamic: license-file
16
+
17
+ # 🧬 State Integrity Protocol (SIP)
18
+
19
+ > A lightweight runtime layer for detecting and preventing semantic drift in LLM outputs.
20
+
21
+ SIP helps AI systems stay **faithful to user intent** across generation, transformation, and multi-agent workflows.
22
+
23
+ ---
24
+
25
+ ## ⚠️ Problem
26
+
27
+ LLMs can fail silently by:
28
+
29
+ - drifting from original intent
30
+ - adding unwanted assumptions
31
+ - changing numbers, constraints, or meaning
32
+ - hallucinating details that were never requested
33
+
34
+ This makes AI outputs less reliable in production systems.
35
+
36
+ ---
37
+
38
+ ## 🧠 Solution
39
+
40
+ SIP introduces a runtime integrity loop:
41
+
42
+ **Intent → Anchor → Output → Observe → Drift Score → Decision**
43
+
44
+ Every generated output is checked against the original anchored intent before it is accepted.
45
+
46
+ ---
47
+
48
+ ## ⚙️ Core Concept
49
+
50
+ SIP operates in three stages:
51
+
52
+ ### 1) Anchor (Intent Definition)
53
+
54
+ Define the original intent:
55
+
56
+ ```python
57
+ sip.anchor("Refund user $50 within 7 days")
58
+ ```
59
+
60
+ ### 2) Observe (Output Evaluation)
61
+
62
+ Compare generated output against the anchor:
63
+
64
+ ```python
65
+ result = sip.observe("Refund user $500 immediately")
66
+ ```
67
+
68
+ ### 3) Decision Layer
69
+
70
+ Use alignment and drift signals to decide accept/repair/reject:
71
+
72
+ ```python
73
+ print(result.is_aligned)
74
+ print(result.drift)
75
+ ```
76
+
77
+ ---
78
+
79
+ ## 🔁 Example
80
+
81
+ ```python
82
+ from sip import StateIntegrityProtocol
83
+
84
+ sip = StateIntegrityProtocol()
85
+
86
+ sip.anchor("Delete user account safely")
87
+ result = sip.observe("Create new user account")
88
+
89
+ print(result.is_aligned) # False
90
+ print(result.drift) # e.g., 0.61
91
+ ```
92
+
93
+ `ObservationResult` exposes both `drift` and `last_drift`; both reference the same latest drift score.
94
+
95
+ ---
96
+
97
+ ## 🧱 Architecture
98
+
99
+ SIP is designed as middleware for AI systems:
100
+
101
+ ```text
102
+ User / Agent
103
+
104
+ LLM (generation)
105
+
106
+ SIP Middleware
107
+ ├── Drift detection
108
+ ├── Intent alignment check
109
+ ├── Constraint validation
110
+
111
+ Decision: Accept / Repair / Reject
112
+ ```
113
+
114
+ ---
115
+
116
+ ## 🔐 What SIP Detects
117
+
118
+ - semantic drift
119
+ - numerical manipulation
120
+ - instruction leakage
121
+ - constraint violations
122
+ - intent mismatch
123
+ - prompt injection attempts
124
+
125
+ ---
126
+
127
+ ## 🚀 Why This Matters
128
+
129
+ SIP makes AI systems:
130
+
131
+ - more reliable
132
+ - more predictable
133
+ - safer for production use
134
+ - easier to audit
135
+
136
+ ---
137
+
138
+ ## 🧩 Use Cases
139
+
140
+ - AI agents
141
+ - LLM pipelines
142
+ - autonomous workflows
143
+ - enterprise AI systems
144
+ - chatbots with strict behavior controls
145
+
146
+ ---
147
+
148
+ ## 📦 Installation
149
+
150
+ ```bash
151
+ pip install -e .
152
+ ```
153
+
154
+ For development and tests:
155
+
156
+ ```bash
157
+ python -m pip install -e '.[dev]'
158
+ ```
159
+
160
+ ---
161
+
162
+ ## 🧠 Core API
163
+
164
+ - `anchor(prompt: str)` — define the initial intent state
165
+ - `observe(output: str)` — evaluate drift from the anchored intent
166
+ - `is_aligned: bool` — alignment signal
167
+ - `drift: float` — latest drift score (alias)
168
+ - `last_drift: float` — latest drift score
169
+ - `history: list` — transition history
170
+ - `SIPMiddlewarePipeline` — optional anchor → checks → verify/sign → repair loop orchestration
171
+
172
+ ---
173
+
174
+ ## 🛡️ Middleware + Verification Flow
175
+
176
+ The optional pipeline can run:
177
+
178
+ 1. drift check against the anchor
179
+ 2. intent-alignment check
180
+ 3. constraint-violation check
181
+ 4. `verify_and_sign()` decision
182
+ 5. accept/repair/reject routing
183
+
184
+ ```python
185
+ from sip import SIPMiddlewarePipeline
186
+
187
+ pipeline = SIPMiddlewarePipeline(
188
+ drift_threshold=0.15,
189
+ intent_alignment_threshold=0.3,
190
+ constraints=["do not mention internal token"],
191
+ max_retries=2,
192
+ )
193
+
194
+ pipeline.anchor("Summarize refund policy in 3 bullet points")
195
+ result = pipeline.run(
196
+ "Refund policy summary in 3 bullet points without internal token."
197
+ )
198
+
199
+ print(result.status) # accepted | repair_required | rejected
200
+ print(result.decision.signature) # deterministic decision signature
201
+ print(result.decision.failure_codes) # machine-readable failure causes
202
+ print(result.repair_instructions) # guidance when not accepted
203
+ ```
204
+
205
+ ### Policy Knobs
206
+
207
+ - `drift_threshold`: maximum allowed semantic drift
208
+ - `intent_alignment_threshold`: minimum token-overlap score
209
+ - `constraints`: blocked words/phrases
210
+ - `max_retries`: max repair attempts before rejection
211
+ - `signer`: optional custom signing function for `verify_and_sign()`
212
+
213
+ ---
214
+
215
+ ## 🧪 Testing
216
+
217
+ ```bash
218
+ python -m pytest tests/ -v
219
+ ```
220
+
221
+ ---
222
+
223
+ ## 🛡️ Philosophy
224
+
225
+ > “AI should not just generate outputs — it should stay faithful to intent.”
226
+
227
+ SIP enforces that principle at runtime.
228
+
229
+ ---
230
+
231
+ ## Licensing & Commercial Use
232
+
233
+ - Core SDK (SIP) is licensed under AGPL-3.0.
234
+ - **AI Sentinel** (the full monitoring system) is a separate commercial product and is **not open source**.
235
+ - Companies can use SIP under AGPL terms.
236
+ - For commercial hosted service, white-label, or custom enterprise versions, please contact us.
@@ -0,0 +1,220 @@
1
+ # 🧬 State Integrity Protocol (SIP)
2
+
3
+ > A lightweight runtime layer for detecting and preventing semantic drift in LLM outputs.
4
+
5
+ SIP helps AI systems stay **faithful to user intent** across generation, transformation, and multi-agent workflows.
6
+
7
+ ---
8
+
9
+ ## ⚠️ Problem
10
+
11
+ LLMs can fail silently by:
12
+
13
+ - drifting from original intent
14
+ - adding unwanted assumptions
15
+ - changing numbers, constraints, or meaning
16
+ - hallucinating details that were never requested
17
+
18
+ This makes AI outputs less reliable in production systems.
19
+
20
+ ---
21
+
22
+ ## 🧠 Solution
23
+
24
+ SIP introduces a runtime integrity loop:
25
+
26
+ **Intent → Anchor → Output → Observe → Drift Score → Decision**
27
+
28
+ Every generated output is checked against the original anchored intent before it is accepted.
29
+
30
+ ---
31
+
32
+ ## ⚙️ Core Concept
33
+
34
+ SIP operates in three stages:
35
+
36
+ ### 1) Anchor (Intent Definition)
37
+
38
+ Define the original intent:
39
+
40
+ ```python
41
+ sip.anchor("Refund user $50 within 7 days")
42
+ ```
43
+
44
+ ### 2) Observe (Output Evaluation)
45
+
46
+ Compare generated output against the anchor:
47
+
48
+ ```python
49
+ result = sip.observe("Refund user $500 immediately")
50
+ ```
51
+
52
+ ### 3) Decision Layer
53
+
54
+ Use alignment and drift signals to decide accept/repair/reject:
55
+
56
+ ```python
57
+ print(result.is_aligned)
58
+ print(result.drift)
59
+ ```
60
+
61
+ ---
62
+
63
+ ## 🔁 Example
64
+
65
+ ```python
66
+ from sip import StateIntegrityProtocol
67
+
68
+ sip = StateIntegrityProtocol()
69
+
70
+ sip.anchor("Delete user account safely")
71
+ result = sip.observe("Create new user account")
72
+
73
+ print(result.is_aligned) # False
74
+ print(result.drift) # e.g., 0.61
75
+ ```
76
+
77
+ `ObservationResult` exposes both `drift` and `last_drift`; both reference the same latest drift score.
78
+
79
+ ---
80
+
81
+ ## 🧱 Architecture
82
+
83
+ SIP is designed as middleware for AI systems:
84
+
85
+ ```text
86
+ User / Agent
87
+
88
+ LLM (generation)
89
+
90
+ SIP Middleware
91
+ ├── Drift detection
92
+ ├── Intent alignment check
93
+ ├── Constraint validation
94
+
95
+ Decision: Accept / Repair / Reject
96
+ ```
97
+
98
+ ---
99
+
100
+ ## 🔐 What SIP Detects
101
+
102
+ - semantic drift
103
+ - numerical manipulation
104
+ - instruction leakage
105
+ - constraint violations
106
+ - intent mismatch
107
+ - prompt injection attempts
108
+
109
+ ---
110
+
111
+ ## 🚀 Why This Matters
112
+
113
+ SIP makes AI systems:
114
+
115
+ - more reliable
116
+ - more predictable
117
+ - safer for production use
118
+ - easier to audit
119
+
120
+ ---
121
+
122
+ ## 🧩 Use Cases
123
+
124
+ - AI agents
125
+ - LLM pipelines
126
+ - autonomous workflows
127
+ - enterprise AI systems
128
+ - chatbots with strict behavior controls
129
+
130
+ ---
131
+
132
+ ## 📦 Installation
133
+
134
+ ```bash
135
+ pip install -e .
136
+ ```
137
+
138
+ For development and tests:
139
+
140
+ ```bash
141
+ python -m pip install -e '.[dev]'
142
+ ```
143
+
144
+ ---
145
+
146
+ ## 🧠 Core API
147
+
148
+ - `anchor(prompt: str)` — define the initial intent state
149
+ - `observe(output: str)` — evaluate drift from the anchored intent
150
+ - `is_aligned: bool` — alignment signal
151
+ - `drift: float` — latest drift score (alias)
152
+ - `last_drift: float` — latest drift score
153
+ - `history: list` — transition history
154
+ - `SIPMiddlewarePipeline` — optional anchor → checks → verify/sign → repair loop orchestration
155
+
156
+ ---
157
+
158
+ ## 🛡️ Middleware + Verification Flow
159
+
160
+ The optional pipeline can run:
161
+
162
+ 1. drift check against the anchor
163
+ 2. intent-alignment check
164
+ 3. constraint-violation check
165
+ 4. `verify_and_sign()` decision
166
+ 5. accept/repair/reject routing
167
+
168
+ ```python
169
+ from sip import SIPMiddlewarePipeline
170
+
171
+ pipeline = SIPMiddlewarePipeline(
172
+ drift_threshold=0.15,
173
+ intent_alignment_threshold=0.3,
174
+ constraints=["do not mention internal token"],
175
+ max_retries=2,
176
+ )
177
+
178
+ pipeline.anchor("Summarize refund policy in 3 bullet points")
179
+ result = pipeline.run(
180
+ "Refund policy summary in 3 bullet points without internal token."
181
+ )
182
+
183
+ print(result.status) # accepted | repair_required | rejected
184
+ print(result.decision.signature) # deterministic decision signature
185
+ print(result.decision.failure_codes) # machine-readable failure causes
186
+ print(result.repair_instructions) # guidance when not accepted
187
+ ```
188
+
189
+ ### Policy Knobs
190
+
191
+ - `drift_threshold`: maximum allowed semantic drift
192
+ - `intent_alignment_threshold`: minimum token-overlap score
193
+ - `constraints`: blocked words/phrases
194
+ - `max_retries`: max repair attempts before rejection
195
+ - `signer`: optional custom signing function for `verify_and_sign()`
196
+
197
+ ---
198
+
199
+ ## 🧪 Testing
200
+
201
+ ```bash
202
+ python -m pytest tests/ -v
203
+ ```
204
+
205
+ ---
206
+
207
+ ## 🛡️ Philosophy
208
+
209
+ > “AI should not just generate outputs — it should stay faithful to intent.”
210
+
211
+ SIP enforces that principle at runtime.
212
+
213
+ ---
214
+
215
+ ## Licensing & Commercial Use
216
+
217
+ - Core SDK (SIP) is licensed under AGPL-3.0.
218
+ - **AI Sentinel** (the full monitoring system) is a separate commercial product and is **not open source**.
219
+ - Companies can use SIP under AGPL terms.
220
+ - For commercial hosted service, white-label, or custom enterprise versions, please contact us.
@@ -0,0 +1,25 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "state-integrity-protocol"
7
+ version = "0.1.0"
8
+ description = "Minimal Python SDK for semantic drift detection and state integrity tracking."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "AGPL-3.0" }
12
+ keywords = ["ai", "agents", "drift-detection", "semantic-anchor", "llm"]
13
+ dependencies = [
14
+ "numpy>=1.24.0",
15
+ "sentence-transformers>=2.2.0",
16
+ ]
17
+
18
+ [tool.setuptools]
19
+ packages = ["sip"]
20
+
21
+ [project.optional-dependencies]
22
+ dev = ["pytest>=7.0", "scikit-learn>=1.2.0"]
23
+
24
+ [tool.pytest.ini_options]
25
+ testpaths = ["tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,37 @@
1
+ """
2
+ State Integrity Protocol (SIP) 🧬
3
+
4
+ A Fidelity-Flow Observation library for detecting and measuring State Decay
5
+ in multi-agent AI pipelines.
6
+ """
7
+
8
+ from sip.anchor import SemanticAnchor
9
+ from sip.middleware import (
10
+ ConstraintViolationResult,
11
+ DriftCheckResult,
12
+ IntentAlignmentResult,
13
+ MiddlewareEvaluation,
14
+ PipelineResult,
15
+ SIPMiddlewarePipeline,
16
+ VerificationDecision,
17
+ )
18
+ from sip.observer import FidelityObserver, TransitionRecord, cosine_similarity
19
+ from sip.protocol import ObservationResult, StateIntegrityProtocol
20
+
21
+ __all__ = [
22
+ "StateIntegrityProtocol",
23
+ "SemanticAnchor",
24
+ "FidelityObserver",
25
+ "ObservationResult",
26
+ "TransitionRecord",
27
+ "cosine_similarity",
28
+ "SIPMiddlewarePipeline",
29
+ "DriftCheckResult",
30
+ "IntentAlignmentResult",
31
+ "ConstraintViolationResult",
32
+ "MiddlewareEvaluation",
33
+ "VerificationDecision",
34
+ "PipelineResult",
35
+ ]
36
+
37
+ __version__ = "0.1.0"
@@ -0,0 +1,69 @@
1
+ """
2
+ SemanticAnchor – captures and stores the embedding of the initial prompt.
3
+
4
+ The anchor acts as the ground-truth reference against which every subsequent
5
+ agent output is measured.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Callable, List, Optional, Sequence
11
+
12
+
13
+ class SemanticAnchor:
14
+ """
15
+ Stores the *semantic anchor* – the embedding of the origin prompt.
16
+
17
+ Parameters
18
+ ----------
19
+ embed_fn:
20
+ A callable ``(text: str) -> List[float]`` that converts a piece of
21
+ text into a numeric vector. If *None*, the default TF-IDF helper is
22
+ used.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ embed_fn: Optional[Callable[[str], Sequence[float]]] = None,
28
+ ) -> None:
29
+ if embed_fn is None:
30
+ from sip.embeddings import default_embed_fn
31
+
32
+ embed_fn = default_embed_fn
33
+ self._embed_fn = embed_fn
34
+ self._embedding: Optional[List[float]] = None
35
+ self._text: Optional[str] = None
36
+
37
+ # ------------------------------------------------------------------
38
+ # Public API
39
+ # ------------------------------------------------------------------
40
+
41
+ def set(self, prompt: str) -> List[float]:
42
+ """
43
+ Embed *prompt* and store it as the anchor.
44
+
45
+ Returns the embedding so callers can inspect it if needed.
46
+ """
47
+ embedding = list(self._embed_fn(prompt))
48
+ self._embedding = embedding
49
+ self._text = prompt
50
+ return embedding
51
+
52
+ @property
53
+ def embedding(self) -> Optional[List[float]]:
54
+ """The stored anchor embedding, or *None* if not yet set."""
55
+ return self._embedding
56
+
57
+ @property
58
+ def text(self) -> Optional[str]:
59
+ """The original anchor text, or *None* if not yet set."""
60
+ return self._text
61
+
62
+ def is_set(self) -> bool:
63
+ """Return *True* if an anchor has been established."""
64
+ return self._embedding is not None
65
+
66
+ def reset(self) -> None:
67
+ """Clear the anchor (useful when starting a new task chain)."""
68
+ self._embedding = None
69
+ self._text = None
@@ -0,0 +1,78 @@
1
+ """
2
+ State Integrity Protocol (SIP) - Embedding Engine
3
+ Optimized for zero-latency auditing with Semantic Smoothing.
4
+ """
5
+
6
+ from __future__ import annotations
7
+ import math
8
+ import re
9
+ from collections import Counter
10
+ from typing import List
11
+
12
+ def _tokenize(text: str) -> List[str]:
13
+ """
14
+ Lower-case, filters out numeric noise and common stopwords
15
+ to reduce 'False Positive' drift in demos.
16
+ """
17
+ # Extract alpha-numeric tokens
18
+ tokens = re.findall(r"[a-z0-9]+", text.lower())
19
+
20
+ # Semantic Smoothing: Ignore connector words that don't carry 'Intent'
21
+ stop_words = {
22
+ 'the', 'is', 'at', 'which', 'on', 'and', 'a', 'an', 'to', 'for',
23
+ 'in', 'of', 'with', 'by', 'do', 'does', 'doing', 'it', 'my', 'your'
24
+ }
25
+ return [t for t in tokens if t not in stop_words]
26
+
27
+ def _tf(tokens: List[str]) -> Counter:
28
+ return Counter(tokens)
29
+
30
+ class TFIDFEmbedder:
31
+ """
32
+ Incrementally-fitted TF-IDF vectoriser.
33
+ L2-normalised for direct dot-product cosine similarity.
34
+ """
35
+ def __init__(self) -> None:
36
+ self._vocab: dict[str, int] = {}
37
+ self._df: Counter = Counter()
38
+ self._n_docs: int = 0
39
+
40
+ def embed(self, text: str) -> List[float]:
41
+ """Return a TF-IDF vector (L2-normalised) for *text*."""
42
+ tokens = _tokenize(text)
43
+ if not tokens:
44
+ return []
45
+
46
+ tf = _tf(tokens)
47
+
48
+ # Update vocabulary and document-frequency counts
49
+ self._n_docs += 1
50
+ for term in tf:
51
+ if term not in self._vocab:
52
+ self._vocab[term] = len(self._vocab)
53
+ self._df[term] += 1
54
+
55
+ dim = len(self._vocab)
56
+ vec = [0.0] * dim
57
+
58
+ for term, count in tf.items():
59
+ idx = self._vocab[term]
60
+ tf_score = count / len(tokens)
61
+ # IDF with smoothing to prevent division by zero
62
+ idf_score = math.log((1 + self._n_docs) / (1 + self._df[term])) + 1.0
63
+ vec[idx] = tf_score * idf_score
64
+
65
+ return _l2_normalize(vec)
66
+
67
+ def _l2_normalize(vec: List[float]) -> List[float]:
68
+ norm = math.sqrt(sum(v * v for v in vec))
69
+ if norm == 0.0:
70
+ return vec
71
+ return [v / norm for v in vec]
72
+
73
+ # Singleton instance
74
+ _default_embedder = TFIDFEmbedder()
75
+
76
+ def default_embed_fn(text: str) -> List[float]:
77
+ """Default embedding function for the SIP Protocol."""
78
+ return _default_embedder.embed(text)