t1-t2-protocol 2.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- t1_t2_mcp_server.py +783 -0
- t1_t2_protocol-2.5.2.dist-info/METADATA +250 -0
- t1_t2_protocol-2.5.2.dist-info/RECORD +7 -0
- t1_t2_protocol-2.5.2.dist-info/WHEEL +5 -0
- t1_t2_protocol-2.5.2.dist-info/entry_points.txt +2 -0
- t1_t2_protocol-2.5.2.dist-info/licenses/LICENSE +21 -0
- t1_t2_protocol-2.5.2.dist-info/top_level.txt +1 -0
t1_t2_mcp_server.py
ADDED
|
@@ -0,0 +1,783 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
T1/T2 Protocol MCP Server — v2.5.2
|
|
4
|
+
|
|
5
|
+
MCP server exposing T1 (structured prompt translation), T2 (confidence evaluation),
|
|
6
|
+
and checksum (structural validation) as standard MCP tools. Compatible with MCP v1.0.
|
|
7
|
+
|
|
8
|
+
v2.5.0 changes (2026-06-12):
|
|
9
|
+
- Bumped version from v2.0.0 → v2.5.0 (Tentative: pragmatic grounding protocol)
|
|
10
|
+
- Enhanced L3 template: added explicit evidence status tracking field
|
|
11
|
+
- New tool: checksum — deterministic structural pre-validation (semantic ECC管线)
|
|
12
|
+
- Refined tool descriptions to reflect protocol lifecycle (0→1 stage)
|
|
13
|
+
- Added "core link exemption" clarification to T1 output requirements
|
|
14
|
+
|
|
15
|
+
v2.0.0 changes (2026-06-12):
|
|
16
|
+
- Added evaluation criteria weight assumption preamble to all outputs
|
|
17
|
+
- Added recursion depth detection and automatic termination
|
|
18
|
+
- Changed confidence from precise % to qualitative descriptors (high/medium/low)
|
|
19
|
+
- Added recursion depth label to all T2 outputs
|
|
20
|
+
- Clarified pure T2 vs T2+RAG scope
|
|
21
|
+
- Added recursion stop signal after 3 layers (diminishing returns threshold)
|
|
22
|
+
- Added version tag in output footer
|
|
23
|
+
|
|
24
|
+
Standard component — no user-specific information included.
|
|
25
|
+
All references use generic terms (user/user's) for portability.
|
|
26
|
+
|
|
27
|
+
Usage:
|
|
28
|
+
python3 t1_t2_mcp_server.py
|
|
29
|
+
|
|
30
|
+
Registration example (in MCP client config):
|
|
31
|
+
mcp_servers:
|
|
32
|
+
t1-t2-protocol:
|
|
33
|
+
command: "python3"
|
|
34
|
+
args: ["/path/to/t1_t2_mcp_server.py"]
|
|
35
|
+
"""
|
|
36
|
+
import json
|
|
37
|
+
import os
|
|
38
|
+
import re
|
|
39
|
+
import sys
|
|
40
|
+
from datetime import datetime, timezone
|
|
41
|
+
from pathlib import Path
|
|
42
|
+
from typing import Any, Optional
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# ─── Qualitative confidence descriptors ───
|
|
46
|
+
|
|
47
|
+
CONFIDENCE_MAP = {
|
|
48
|
+
(85, 100): ("高", "证据充分,逻辑自洽,跨条件检验一致"),
|
|
49
|
+
(65, 85): ("中偏高", "方向合理,但精确数字或部分断言待验证"),
|
|
50
|
+
(45, 65): ("中", "方向判断可信,但依赖未验证的假设或来源"),
|
|
51
|
+
(25, 45): ("中偏低", "部分断言有依据,但整体可靠性不足"),
|
|
52
|
+
(0, 25): ("低", "缺乏可验证依据,或存在显式逻辑断裂"),
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
LEVEL_MAP = {
|
|
56
|
+
"L1": "物理事实/可独立验证",
|
|
57
|
+
"L2": "合理假设/在上下文中高度可能",
|
|
58
|
+
"L3": "科学假说/可证伪但未充分验证",
|
|
59
|
+
"L4": "未知领域/不在推理范围内",
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
WEIGHT_OPTIONS = ["事实优先", "效率优先", "成本优先", "鲁棒性优先", "通用优先"]
|
|
63
|
+
|
|
64
|
+
WEIGHT_ALIASES = {
|
|
65
|
+
"fact-first": "事实优先",
|
|
66
|
+
"facts-first": "事实优先",
|
|
67
|
+
"factual": "事实优先",
|
|
68
|
+
"efficiency-first": "效率优先",
|
|
69
|
+
"efficiency": "效率优先",
|
|
70
|
+
"cost-first": "成本优先",
|
|
71
|
+
"cost": "成本优先",
|
|
72
|
+
"robustness-first": "鲁棒性优先",
|
|
73
|
+
"robustness": "鲁棒性优先",
|
|
74
|
+
"general-first": "通用优先",
|
|
75
|
+
"general": "通用优先",
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
WEIGHT_LABEL_EN = {
|
|
79
|
+
"事实优先": "fact-first",
|
|
80
|
+
"效率优先": "efficiency-first",
|
|
81
|
+
"成本优先": "cost-first",
|
|
82
|
+
"鲁棒性优先": "robustness-first",
|
|
83
|
+
"通用优先": "general-first",
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
DEFAULT_LOCALE = "en"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def qualitative_confidence(score: int) -> dict:
|
|
90
|
+
"""Map a numeric confidence score (0-100) to qualitative descriptor."""
|
|
91
|
+
for (lo, hi), (label, desc) in sorted(CONFIDENCE_MAP.items(), reverse=True):
|
|
92
|
+
if lo <= score <= hi:
|
|
93
|
+
return {"label": label, "description": desc, "score": score}
|
|
94
|
+
return {"label": "未评估", "description": "无法确定置信度", "score": 0}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def detect_recursion_depth(answer: str) -> int:
|
|
98
|
+
"""
|
|
99
|
+
Detect recursion depth by counting known recursion markers.
|
|
100
|
+
|
|
101
|
+
Layer 0 = original evaluation call.
|
|
102
|
+
Layer 1 = T2 evaluation of an answer.
|
|
103
|
+
Layer 2 = meta-response correcting a T2 evaluation.
|
|
104
|
+
Layer 3+ = recursive correction of corrections.
|
|
105
|
+
|
|
106
|
+
Markers:
|
|
107
|
+
- "元级回应" / "元级递归" / "meta-response" → +1 from base
|
|
108
|
+
- "三层递归" / "递归评估" / "recursive" → depth hint
|
|
109
|
+
"""
|
|
110
|
+
text = answer.lower()
|
|
111
|
+
depth = 0
|
|
112
|
+
|
|
113
|
+
if re.search(r'元级回应|元级递归|meta-response|meta-evaluation', text):
|
|
114
|
+
depth = 1
|
|
115
|
+
if re.search(r'三层递归|四层递归|n=4.*递归|递归评估.*第.*层', text):
|
|
116
|
+
depth = 2
|
|
117
|
+
if re.search(r'第4层|第四层|layer 4|depth 4', text):
|
|
118
|
+
depth = 3
|
|
119
|
+
if re.search(r'递归终止|停止递归|recursion.*halt|terminate.*recursion', text):
|
|
120
|
+
depth = 3
|
|
121
|
+
|
|
122
|
+
return depth
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def should_terminate_recursion(depth: int) -> bool:
|
|
126
|
+
"""Auto-terminate at depth >= 3."""
|
|
127
|
+
return depth >= 3
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
RECURSION_TERMINATION_NOTICE = """
|
|
131
|
+
══════════════════════════════════════════════
|
|
132
|
+
⚠️ 递归终止通知
|
|
133
|
+
|
|
134
|
+
递归深度已达阈值(≥3层)。边际信息回报率已降至<5%。
|
|
135
|
+
方向性结论已收敛至稳定状态。继续递归将指数级衰减回报。
|
|
136
|
+
|
|
137
|
+
建议:停止递归评估,将已收敛的方向结论作为最终输出。
|
|
138
|
+
══════════════════════════════════════════════
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
WEIGHT_ASSUMPTION_PREAMBLE = """
|
|
142
|
+
[权重假设声明]
|
|
143
|
+
当前评估默认采用以下权重排序(若未明确指定):
|
|
144
|
+
1. 事实准确性/幻觉抑制(最高权重)
|
|
145
|
+
2. 逻辑自洽性
|
|
146
|
+
3. 可验证性
|
|
147
|
+
4. 效率/成本
|
|
148
|
+
5. 通用性
|
|
149
|
+
|
|
150
|
+
若你的评价标准权重与上述不同,请重新指定评估参数。
|
|
151
|
+
(例如:若效率 > 事实准确性,请声明"效率优先")
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
WEIGHT_ASSUMPTION_PREAMBLE_EN = """
|
|
155
|
+
[Weight Assumption Declaration]
|
|
156
|
+
Default evaluation weights (unless specified):
|
|
157
|
+
1. Factual accuracy / hallucination suppression (highest)
|
|
158
|
+
2. Logical consistency
|
|
159
|
+
3. Verifiability
|
|
160
|
+
4. Efficiency / cost
|
|
161
|
+
5. Generality
|
|
162
|
+
|
|
163
|
+
If your criteria differ, restate weights explicitly
|
|
164
|
+
(e.g. declare "efficiency-first" when efficiency > factual accuracy).
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
RECURSION_DEPTH_TEMPLATE = """
|
|
168
|
+
[递归深度] Layer {depth}({note})
|
|
169
|
+
递归层数警告:深度每增加1层,自指偏差累积增加约15-20%。
|
|
170
|
+
第{depth}层评估的置信度上限 = {ceiling}%(理论最大值)
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
RECURSION_DEPTH_TEMPLATE_EN = """
|
|
174
|
+
[Recursion Depth] Layer {depth} ({note})
|
|
175
|
+
Warning: each additional layer adds ~15-20% self-reference bias.
|
|
176
|
+
Layer {depth} confidence ceiling = {ceiling}% (theoretical maximum)
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
RECURSION_TERMINATION_NOTICE_EN = """
|
|
180
|
+
══════════════════════════════════════════════
|
|
181
|
+
⚠️ Recursion Termination Notice
|
|
182
|
+
|
|
183
|
+
Recursion depth reached threshold (≥3). Marginal information gain <5%.
|
|
184
|
+
Directional conclusions have converged. Further recursion yields diminishing returns.
|
|
185
|
+
|
|
186
|
+
Recommendation: stop recursive evaluation; treat converged conclusions as final.
|
|
187
|
+
══════════════════════════════════════════════
|
|
188
|
+
"""
|
|
189
|
+
|
|
190
|
+
VERSION_TAG = "T1/T2 Protocol v2.5.2"
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def normalize_locale(locale: Any) -> str:
|
|
194
|
+
"""Normalize locale to 'en' or 'zh'. Defaults to English for international OSS."""
|
|
195
|
+
if not isinstance(locale, str) or not locale.strip():
|
|
196
|
+
return DEFAULT_LOCALE
|
|
197
|
+
loc = locale.strip().lower()
|
|
198
|
+
if loc in ("en", "english"):
|
|
199
|
+
return "en"
|
|
200
|
+
if loc in ("zh", "cn", "chinese", "zh-cn", "zh-hans"):
|
|
201
|
+
return "zh"
|
|
202
|
+
return DEFAULT_LOCALE
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def weight_display(canonical: str, locale: str) -> str:
|
|
206
|
+
"""Return human-readable weight label for the active locale."""
|
|
207
|
+
if locale == "en":
|
|
208
|
+
return WEIGHT_LABEL_EN.get(canonical, canonical)
|
|
209
|
+
return canonical
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def validate_weight_hint(hint: str) -> tuple[bool, str]:
|
|
213
|
+
"""Validate weight_hint against allowed options. Returns (valid, normalized_or_error)."""
|
|
214
|
+
if not hint:
|
|
215
|
+
return True, ""
|
|
216
|
+
hint_lower = hint.strip().lower()
|
|
217
|
+
if hint_lower in WEIGHT_ALIASES:
|
|
218
|
+
return True, WEIGHT_ALIASES[hint_lower]
|
|
219
|
+
for alias, canonical in WEIGHT_ALIASES.items():
|
|
220
|
+
if alias in hint_lower or hint_lower in alias:
|
|
221
|
+
return True, canonical
|
|
222
|
+
for opt in WEIGHT_OPTIONS:
|
|
223
|
+
if opt in hint or hint in opt:
|
|
224
|
+
return True, opt
|
|
225
|
+
allowed = list(WEIGHT_LABEL_EN.values()) + WEIGHT_OPTIONS
|
|
226
|
+
return False, f"Unknown weight: '{hint}'. Allowed values: {allowed}"
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# ─── Checksum validation (v2.5 new) ───
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def structural_checksum(text: str) -> dict:
|
|
233
|
+
"""
|
|
234
|
+
Deterministic structural pre-validation for AI output.
|
|
235
|
+
v2.5: semantic ECC 管线 — Pre-T2 校验和
|
|
236
|
+
|
|
237
|
+
Returns dict with pass/fail + diagnostic fields.
|
|
238
|
+
Zero LLM dependency — pure regex/string operations.
|
|
239
|
+
"""
|
|
240
|
+
lines = text.splitlines()
|
|
241
|
+
has_level_tag = bool(re.search(r'\[L[1-4]', text))
|
|
242
|
+
has_close_delimiter = bool(text.rstrip().endswith("---"))
|
|
243
|
+
section_count = len(re.findall(r'\[L[1-4]', text))
|
|
244
|
+
has_failure_keywords = any(
|
|
245
|
+
kw in text.lower() for kw in ["未知", "边界", "不适用", "伪命题"]
|
|
246
|
+
)
|
|
247
|
+
has_dangerous_patterns = bool(re.search(
|
|
248
|
+
r'\brm\s+-rf\b|\bos\.system\b|\beval\(|\bexec\(',
|
|
249
|
+
text
|
|
250
|
+
))
|
|
251
|
+
|
|
252
|
+
errors = []
|
|
253
|
+
if not has_level_tag:
|
|
254
|
+
errors.append("Missing [L1]/[L2]/[L3]/[L4] level tags")
|
|
255
|
+
if not has_close_delimiter:
|
|
256
|
+
errors.append("Missing trailing delimiter ---")
|
|
257
|
+
if section_count < 2:
|
|
258
|
+
errors.append(f"Insufficient level tags (found {section_count}, expected ≥2)")
|
|
259
|
+
if has_dangerous_patterns:
|
|
260
|
+
errors.append("Dangerous pattern detected; structural validation failed")
|
|
261
|
+
|
|
262
|
+
passed = len(errors) == 0
|
|
263
|
+
|
|
264
|
+
# Optional counter for blocked checksums (disable in tests via T1T2_DISABLE_COUNTERS=1)
|
|
265
|
+
if not passed and os.environ.get("T1T2_DISABLE_COUNTERS") != "1":
|
|
266
|
+
_counter_path = Path.home() / ".t1-t2-protocol" / "var" / "counters" / "checksum_blocked.json"
|
|
267
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
268
|
+
try:
|
|
269
|
+
d = json.loads(_counter_path.read_text())
|
|
270
|
+
if d.get("date") == today:
|
|
271
|
+
d["count"] += 1
|
|
272
|
+
else:
|
|
273
|
+
d = {"date": today, "count": 1}
|
|
274
|
+
except (FileNotFoundError, json.JSONDecodeError):
|
|
275
|
+
d = {"date": today, "count": 1}
|
|
276
|
+
_counter_path.parent.mkdir(parents=True, exist_ok=True)
|
|
277
|
+
_counter_path.write_text(json.dumps(d, ensure_ascii=False))
|
|
278
|
+
|
|
279
|
+
return {
|
|
280
|
+
"checksum_passed": passed,
|
|
281
|
+
"section_count": section_count,
|
|
282
|
+
"line_count": len(lines),
|
|
283
|
+
"has_level_tag": has_level_tag,
|
|
284
|
+
"has_close_delimiter": has_close_delimiter,
|
|
285
|
+
"has_failure_keywords": has_failure_keywords,
|
|
286
|
+
"has_dangerous_patterns": has_dangerous_patterns,
|
|
287
|
+
"errors": errors,
|
|
288
|
+
"_info": "Deterministic struct validation, zero LLM dependency. Checks format completeness, not semantic correctness.",
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def json_rpc_error(id_: Any, code: int, message: str, data: Any = None):
|
|
293
|
+
resp = {
|
|
294
|
+
"jsonrpc": "2.0",
|
|
295
|
+
"id": id_,
|
|
296
|
+
"error": {"code": code, "message": message},
|
|
297
|
+
}
|
|
298
|
+
if data is not None:
|
|
299
|
+
resp["error"]["data"] = data
|
|
300
|
+
return resp
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def json_rpc_result(id_: Any, result: Any):
|
|
304
|
+
return {"jsonrpc": "2.0", "id": id_, "result": result}
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def validate_tool_arguments(arguments: Any, required: list[str]) -> Optional[str]:
|
|
308
|
+
"""Validate MCP tool arguments before dispatching."""
|
|
309
|
+
if not isinstance(arguments, dict):
|
|
310
|
+
return "params.arguments must be an object"
|
|
311
|
+
|
|
312
|
+
missing = [name for name in required if name not in arguments]
|
|
313
|
+
if missing:
|
|
314
|
+
return f"Missing required argument(s): {', '.join(missing)}"
|
|
315
|
+
|
|
316
|
+
for name in required:
|
|
317
|
+
if not isinstance(arguments[name], str):
|
|
318
|
+
return f"Argument '{name}' must be a string"
|
|
319
|
+
|
|
320
|
+
weight_hint = arguments.get("weight_hint")
|
|
321
|
+
if weight_hint is not None and not isinstance(weight_hint, str):
|
|
322
|
+
return "Argument 'weight_hint' must be a string"
|
|
323
|
+
|
|
324
|
+
return None
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
class T1T2Server:
|
|
328
|
+
"""Minimal MCP server for T1/T2 Protocol v2.5."""
|
|
329
|
+
|
|
330
|
+
def __init__(self):
|
|
331
|
+
self.server_info = {
|
|
332
|
+
"name": "t1-t2-protocol",
|
|
333
|
+
"version": "2.5.2",
|
|
334
|
+
"description": (
|
|
335
|
+
"T1/T2 Protocol: structured prompt translation, "
|
|
336
|
+
"confidence evaluation, and structural checksum validation "
|
|
337
|
+
"(v2.5: pragmatic grounding + semantic ECC pipeline)"
|
|
338
|
+
),
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
def handle_initialize(self, req: dict) -> dict:
|
|
342
|
+
return json_rpc_result(req.get("id"), {
|
|
343
|
+
"protocolVersion": "2025-03-26",
|
|
344
|
+
"capabilities": {
|
|
345
|
+
"tools": {},
|
|
346
|
+
},
|
|
347
|
+
"serverInfo": self.server_info,
|
|
348
|
+
})
|
|
349
|
+
|
|
350
|
+
def handle_list_tools(self, req: dict) -> dict:
|
|
351
|
+
tools = [
|
|
352
|
+
{
|
|
353
|
+
"name": "t1_protocol",
|
|
354
|
+
"description": (
|
|
355
|
+
"Translates ambiguous questions into T1 structured prompts. "
|
|
356
|
+
"Output: L1 facts / L2 assumptions / L3 hypotheses / L4 unknowns / "
|
|
357
|
+
"core question / output requirements. v2.5+: L3 evidence status, core link exemption."
|
|
358
|
+
),
|
|
359
|
+
"inputSchema": {
|
|
360
|
+
"type": "object",
|
|
361
|
+
"properties": {
|
|
362
|
+
"question": {
|
|
363
|
+
"type": "string",
|
|
364
|
+
"description": "The original question to translate",
|
|
365
|
+
},
|
|
366
|
+
"weight_hint": {
|
|
367
|
+
"type": "string",
|
|
368
|
+
"description": (
|
|
369
|
+
"Optional evaluation weight hint. "
|
|
370
|
+
"English: fact-first, efficiency-first, cost-first, "
|
|
371
|
+
"robustness-first, general-first. "
|
|
372
|
+
"Chinese: 事实优先, 效率优先, 成本优先, 鲁棒性优先, 通用优先."
|
|
373
|
+
),
|
|
374
|
+
},
|
|
375
|
+
"locale": {
|
|
376
|
+
"type": "string",
|
|
377
|
+
"description": "Output language: 'en' (default) or 'zh'",
|
|
378
|
+
},
|
|
379
|
+
},
|
|
380
|
+
"required": ["question"],
|
|
381
|
+
},
|
|
382
|
+
},
|
|
383
|
+
{
|
|
384
|
+
"name": "t2_protocol",
|
|
385
|
+
"description": (
|
|
386
|
+
"Confidence evaluation for AI answers. "
|
|
387
|
+
"Output: qualitative confidence (high/medium/low) + adoption recommendations. "
|
|
388
|
+
"Run checksum before T2 when format integrity matters."
|
|
389
|
+
),
|
|
390
|
+
"inputSchema": {
|
|
391
|
+
"type": "object",
|
|
392
|
+
"properties": {
|
|
393
|
+
"answer": {
|
|
394
|
+
"type": "string",
|
|
395
|
+
"description": "The AI answer to evaluate",
|
|
396
|
+
},
|
|
397
|
+
"weight_hint": {
|
|
398
|
+
"type": "string",
|
|
399
|
+
"description": (
|
|
400
|
+
"Optional evaluation weight hint "
|
|
401
|
+
"(default: factual accuracy > efficiency). "
|
|
402
|
+
"Same values as t1_protocol weight_hint."
|
|
403
|
+
),
|
|
404
|
+
},
|
|
405
|
+
"locale": {
|
|
406
|
+
"type": "string",
|
|
407
|
+
"description": "Output language: 'en' (default) or 'zh'",
|
|
408
|
+
},
|
|
409
|
+
},
|
|
410
|
+
"required": ["answer"],
|
|
411
|
+
},
|
|
412
|
+
},
|
|
413
|
+
{
|
|
414
|
+
"name": "checksum",
|
|
415
|
+
"description": (
|
|
416
|
+
"Deterministic structural pre-validation (semantic ECC Pre-T2 Checksum)\n"
|
|
417
|
+
"Input: AI answer text\n"
|
|
418
|
+
"Output: structural integrity assessment (pass/fail) + diagnostics\n"
|
|
419
|
+
"Zero LLM dependency - pure regex/string ops. Run before T2 to filter corrupted output."
|
|
420
|
+
),
|
|
421
|
+
"inputSchema": {
|
|
422
|
+
"type": "object",
|
|
423
|
+
"properties": {
|
|
424
|
+
"text": {
|
|
425
|
+
"type": "string",
|
|
426
|
+
"description": "The AI answer text to validate",
|
|
427
|
+
},
|
|
428
|
+
},
|
|
429
|
+
"required": ["text"],
|
|
430
|
+
},
|
|
431
|
+
},
|
|
432
|
+
]
|
|
433
|
+
return json_rpc_result(req.get("id"), {"tools": tools})
|
|
434
|
+
|
|
435
|
+
def handle_tools_call(self, req: dict) -> dict:
|
|
436
|
+
params = req.get("params", {})
|
|
437
|
+
if not isinstance(params, dict):
|
|
438
|
+
return json_rpc_error(req.get("id"), -32602, "params must be an object")
|
|
439
|
+
|
|
440
|
+
tool_name = params.get("name")
|
|
441
|
+
arguments = params.get("arguments", {})
|
|
442
|
+
|
|
443
|
+
if tool_name == "t1_protocol":
|
|
444
|
+
err = validate_tool_arguments(arguments, ["question"])
|
|
445
|
+
if err:
|
|
446
|
+
return json_rpc_error(req.get("id"), -32602, err)
|
|
447
|
+
question = arguments.get("question", "")
|
|
448
|
+
weight_hint = arguments.get("weight_hint", "")
|
|
449
|
+
locale = normalize_locale(arguments.get("locale", DEFAULT_LOCALE))
|
|
450
|
+
return json_rpc_result(req.get("id"), {
|
|
451
|
+
"content": [
|
|
452
|
+
{
|
|
453
|
+
"type": "text",
|
|
454
|
+
"text": self._handle_t1(question, weight_hint, locale),
|
|
455
|
+
}
|
|
456
|
+
],
|
|
457
|
+
})
|
|
458
|
+
|
|
459
|
+
elif tool_name == "t2_protocol":
|
|
460
|
+
err = validate_tool_arguments(arguments, ["answer"])
|
|
461
|
+
if err:
|
|
462
|
+
return json_rpc_error(req.get("id"), -32602, err)
|
|
463
|
+
answer = arguments.get("answer", "")
|
|
464
|
+
weight_hint = arguments.get("weight_hint", "")
|
|
465
|
+
locale = normalize_locale(arguments.get("locale", DEFAULT_LOCALE))
|
|
466
|
+
return json_rpc_result(req.get("id"), {
|
|
467
|
+
"content": [
|
|
468
|
+
{
|
|
469
|
+
"type": "text",
|
|
470
|
+
"text": self._handle_t2(answer, weight_hint, locale),
|
|
471
|
+
}
|
|
472
|
+
],
|
|
473
|
+
})
|
|
474
|
+
|
|
475
|
+
elif tool_name == "checksum":
|
|
476
|
+
err = validate_tool_arguments(arguments, ["text"])
|
|
477
|
+
if err:
|
|
478
|
+
return json_rpc_error(req.get("id"), -32602, err)
|
|
479
|
+
text = arguments.get("text", "")
|
|
480
|
+
return json_rpc_result(req.get("id"), {
|
|
481
|
+
"content": [
|
|
482
|
+
{
|
|
483
|
+
"type": "text",
|
|
484
|
+
"text": json.dumps(
|
|
485
|
+
structural_checksum(text),
|
|
486
|
+
ensure_ascii=True,
|
|
487
|
+
indent=2,
|
|
488
|
+
),
|
|
489
|
+
}
|
|
490
|
+
],
|
|
491
|
+
})
|
|
492
|
+
|
|
493
|
+
else:
|
|
494
|
+
return json_rpc_error(req.get("id"), -32601, f"Tool not found: {tool_name}")
|
|
495
|
+
|
|
496
|
+
def _t1_template_zh(self, weight_section: str, question: str) -> list:
|
|
497
|
+
return [
|
|
498
|
+
"根据T1协议 v2.5(层级化前提分级 + 权重假设 + L3 evidence status),"
|
|
499
|
+
"对以下问题做L1/L2/L3/L4前提分级后,输出格式化提示词。\n",
|
|
500
|
+
weight_section,
|
|
501
|
+
f"\n原始问题:{question}\n",
|
|
502
|
+
"---",
|
|
503
|
+
"请按以下格式输出:\n",
|
|
504
|
+
"[L1事实]",
|
|
505
|
+
"1. (可独立验证的物理事实)",
|
|
506
|
+
"2. ...\n",
|
|
507
|
+
"[L2假设]",
|
|
508
|
+
"1. (合理但未严格验证的假设)",
|
|
509
|
+
"2. ...\n",
|
|
510
|
+
"[L3假说](如有)",
|
|
511
|
+
"1. (可证伪但未充分验证的理论/假说)",
|
|
512
|
+
" ├─ evidence status:{已被实验验证 / 有间接证据支持 / 纯理论推演 / 已被证伪}",
|
|
513
|
+
" └─ 关键争议点:(如果存在不同派系,标注分歧的核心)",
|
|
514
|
+
"2. ...\n",
|
|
515
|
+
"[L4未知/伪命题](不在当前工程考虑范围内,或受限于物理定律的伪命题)",
|
|
516
|
+
"1. ...\n",
|
|
517
|
+
"[核心问题]",
|
|
518
|
+
"(在L1-L3交集内,附当前权重假设下的精确定义)\n",
|
|
519
|
+
"[输出要求]",
|
|
520
|
+
"1. 标注每条结论的L1/L2/L3/L4层级",
|
|
521
|
+
"2. 标注已知边界:什么条件下此结论可能失效",
|
|
522
|
+
"3. 标注未覆盖的问题",
|
|
523
|
+
"4. 标注当前评估遵循的权重假设",
|
|
524
|
+
"5. (v2.5)核心链路声明:若本分析涉及核心链路(资金结算/状态机/权限控制),"
|
|
525
|
+
"精度不可降级。非核心链路适用精度匹配原则。",
|
|
526
|
+
]
|
|
527
|
+
|
|
528
|
+
def _t1_template_en(self, weight_section: str, question: str) -> list:
|
|
529
|
+
return [
|
|
530
|
+
"According to T1 Protocol v2.5 (tiered premise decomposition + weight assumptions + L3 evidence status), "
|
|
531
|
+
"decompose the following question into L1/L2/L3/L4 tiers and output a formatted prompt.\n",
|
|
532
|
+
weight_section,
|
|
533
|
+
f"\nOriginal question: {question}\n",
|
|
534
|
+
"---",
|
|
535
|
+
"Output format:\n",
|
|
536
|
+
"[L1 Facts]",
|
|
537
|
+
"1. (Independently verifiable facts)",
|
|
538
|
+
"2. ...\n",
|
|
539
|
+
"[L2 Assumptions]",
|
|
540
|
+
"1. (Reasonable but unverified assumptions)",
|
|
541
|
+
"2. ...\n",
|
|
542
|
+
"[L3 Hypotheses] (if applicable)",
|
|
543
|
+
"1. (Falsifiable claim that is not yet fully verified)",
|
|
544
|
+
" ├─ evidence status: {experimentally verified / indirect evidence / theoretical / falsified}",
|
|
545
|
+
" └─ key debate: (if competing views exist, note the core disagreement)",
|
|
546
|
+
"2. ...\n",
|
|
547
|
+
"[L4 Unknown / Pseudo-proposition] (outside current scope or physically impossible)",
|
|
548
|
+
"1. ...\n",
|
|
549
|
+
"[Core Question]",
|
|
550
|
+
"(Precise definition at the intersection of L1-L3, under current weight assumptions)\n",
|
|
551
|
+
"[Output Requirements]",
|
|
552
|
+
"1. Label each conclusion with L1/L2/L3/L4 tier",
|
|
553
|
+
"2. Note boundary conditions: when might this conclusion be invalid",
|
|
554
|
+
"3. Note uncovered questions",
|
|
555
|
+
"4. State the weight assumption this evaluation follows",
|
|
556
|
+
"5. (v2.5) Core link exemption: if this analysis involves core systems "
|
|
557
|
+
"(settlement/state machine/access control), precision is not negotiable. "
|
|
558
|
+
"Non-core links follow the precision-matching principle.",
|
|
559
|
+
]
|
|
560
|
+
|
|
561
|
+
def _handle_t1(self, question: str, weight_hint: str = "", locale: str = DEFAULT_LOCALE) -> str:
|
|
562
|
+
"""Generate T1 template from question. Supports locale='zh' or 'en'."""
|
|
563
|
+
valid, result = validate_weight_hint(weight_hint)
|
|
564
|
+
if not valid:
|
|
565
|
+
if locale == "en":
|
|
566
|
+
weight_section = f"[Weight Error] {result} Using default weights.\n"
|
|
567
|
+
else:
|
|
568
|
+
weight_section = f"[权重错误] {result} 使用默认权重。\n"
|
|
569
|
+
elif weight_hint:
|
|
570
|
+
label = weight_display(result, locale)
|
|
571
|
+
if locale == "en":
|
|
572
|
+
weight_section = f"[Weight Declaration] User-specified weight: {label}\n"
|
|
573
|
+
else:
|
|
574
|
+
weight_section = f"[权重声明] 用户指定权重:{label}\n"
|
|
575
|
+
else:
|
|
576
|
+
weight_section = (
|
|
577
|
+
WEIGHT_ASSUMPTION_PREAMBLE_EN.strip()
|
|
578
|
+
if locale == "en"
|
|
579
|
+
else WEIGHT_ASSUMPTION_PREAMBLE.strip()
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
if locale == "en":
|
|
583
|
+
parts = self._t1_template_en(weight_section, question)
|
|
584
|
+
else:
|
|
585
|
+
parts = self._t1_template_zh(weight_section, question)
|
|
586
|
+
|
|
587
|
+
parts.extend([
|
|
588
|
+
"\n---",
|
|
589
|
+
f"{VERSION_TAG} | {datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')}",
|
|
590
|
+
])
|
|
591
|
+
return "\n".join(parts)
|
|
592
|
+
|
|
593
|
+
def _handle_t2(self, answer: str, weight_hint: str = "", locale: str = DEFAULT_LOCALE) -> str:
|
|
594
|
+
"""Generate T2 evaluation template. Supports locale='zh' or 'en'."""
|
|
595
|
+
depth = detect_recursion_depth(answer)
|
|
596
|
+
terminate = should_terminate_recursion(depth)
|
|
597
|
+
|
|
598
|
+
ceiling = max(100 - depth * 15, 40)
|
|
599
|
+
if locale == "en":
|
|
600
|
+
depth_note = {
|
|
601
|
+
0: "first evaluation",
|
|
602
|
+
1: "meta-response",
|
|
603
|
+
2: "meta-meta response",
|
|
604
|
+
3: "recursion threshold",
|
|
605
|
+
}.get(depth, "recursion depth exceeded")
|
|
606
|
+
else:
|
|
607
|
+
depth_note = {
|
|
608
|
+
0: "首次评估",
|
|
609
|
+
1: "元级回应",
|
|
610
|
+
2: "元-元级回应",
|
|
611
|
+
3: "递归终止阈值",
|
|
612
|
+
}.get(depth, "递归深度超过阈值")
|
|
613
|
+
|
|
614
|
+
valid, result = validate_weight_hint(weight_hint)
|
|
615
|
+
if not valid:
|
|
616
|
+
if locale == "en":
|
|
617
|
+
weight_section = f"[Weight Error] {result} Using default weights.\n"
|
|
618
|
+
else:
|
|
619
|
+
weight_section = f"[权重错误] {result} 使用默认权重。\n"
|
|
620
|
+
elif weight_hint:
|
|
621
|
+
label = weight_display(result, locale)
|
|
622
|
+
if locale == "en":
|
|
623
|
+
weight_section = f"[Weight Declaration] User-specified weight: {label}\n"
|
|
624
|
+
else:
|
|
625
|
+
weight_section = f"[权重声明] 用户指定权重:{label}\n"
|
|
626
|
+
else:
|
|
627
|
+
weight_section = (
|
|
628
|
+
WEIGHT_ASSUMPTION_PREAMBLE_EN.strip()
|
|
629
|
+
if locale == "en"
|
|
630
|
+
else WEIGHT_ASSUMPTION_PREAMBLE.strip()
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
depth_template = (
|
|
634
|
+
RECURSION_DEPTH_TEMPLATE_EN if locale == "en" else RECURSION_DEPTH_TEMPLATE
|
|
635
|
+
)
|
|
636
|
+
termination_notice = (
|
|
637
|
+
RECURSION_TERMINATION_NOTICE_EN if locale == "en" else RECURSION_TERMINATION_NOTICE
|
|
638
|
+
)
|
|
639
|
+
|
|
640
|
+
if locale == "en":
|
|
641
|
+
parts = [
|
|
642
|
+
"According to T2 Protocol v2.5 (10-step reasoning pipeline + weight assumptions + recursion depth detection + pre-checksum filter), "
|
|
643
|
+
"evaluate the confidence of the following answer.\n",
|
|
644
|
+
weight_section,
|
|
645
|
+
depth_template.format(depth=depth, note=depth_note, ceiling=ceiling),
|
|
646
|
+
]
|
|
647
|
+
if terminate:
|
|
648
|
+
parts.append(termination_notice.strip())
|
|
649
|
+
parts.extend([
|
|
650
|
+
"\nAnswer to evaluate:",
|
|
651
|
+
answer,
|
|
652
|
+
"---",
|
|
653
|
+
"v2.5 pre-check: run checksum before T2 to filter malformed output.\n",
|
|
654
|
+
"Evaluation steps:",
|
|
655
|
+
"Step 0: Recursion depth detection + weight declaration",
|
|
656
|
+
"Step 1: First principles — identify essential claims",
|
|
657
|
+
"Step 2: Systems thinking — decompose structure",
|
|
658
|
+
"Step 3: Critical thinking — identify biases",
|
|
659
|
+
"Step 4-5: Meta-task decomposition + algorithmic execution",
|
|
660
|
+
"Step 6: Strict logical verification — identify breaks",
|
|
661
|
+
"Step 7: Incorporate boundaries and rethink",
|
|
662
|
+
"Step 8: Native framework reasoning (assume true)",
|
|
663
|
+
"Step 9: Final critical pass",
|
|
664
|
+
"Step 10: Synthesis\n",
|
|
665
|
+
"Output format (v2.5 — qualitative confidence, no precise percentages):",
|
|
666
|
+
"**Confidence**: {high/medium-high/medium/medium-low/low}",
|
|
667
|
+
" - Description: (one-line rationale)",
|
|
668
|
+
" - Recursion depth: Layer N",
|
|
669
|
+
" - Weight assumption: (current evaluation weight)\n",
|
|
670
|
+
"**Adoption Recommendations**:",
|
|
671
|
+
"| Adopt | Content | Correction | Evidence Tier |",
|
|
672
|
+
"|-------|---------|------------|---------------|",
|
|
673
|
+
"| ✅ Adopt | ... | — | L1/L2/L3 |",
|
|
674
|
+
"| ⚠️ Reserved | ... | ... | L1/L2/L3 |",
|
|
675
|
+
"| ❌ N/A | ... | ... | L1/L2/L3 |",
|
|
676
|
+
"\n**Key notes (v2.5 conclusions)**:",
|
|
677
|
+
"1. T2's core value is structured exposure of uncertainty, not hallucination elimination",
|
|
678
|
+
"2. All precise numbers (including source citations) are L3 hypotheses until independently verified",
|
|
679
|
+
"3. At recursion depth >= 3, incremental info < 5% — terminate recursion",
|
|
680
|
+
"4. Pure T2 (no external retrieval) should not be the default confidence method",
|
|
681
|
+
"5. T2+RAG (with external verification) recommended only for high-risk + non-real-time scenarios",
|
|
682
|
+
"6. (v2.5) Core link precision is not negotiable; non-core links follow precision-matching",
|
|
683
|
+
])
|
|
684
|
+
else:
|
|
685
|
+
parts = [
|
|
686
|
+
"根据T2协议 v2.5(10步推理管道 + 权重假设 + 递归深度检测 + 校验和前置过滤),"
|
|
687
|
+
"对以下回答做置信度评估。\n",
|
|
688
|
+
weight_section,
|
|
689
|
+
depth_template.format(depth=depth, note=depth_note, ceiling=ceiling),
|
|
690
|
+
]
|
|
691
|
+
if terminate:
|
|
692
|
+
parts.append(termination_notice.strip())
|
|
693
|
+
parts.extend([
|
|
694
|
+
"\n被评估回答:",
|
|
695
|
+
answer,
|
|
696
|
+
"---",
|
|
697
|
+
"v2.5前置建议:在调用T2之前,建议先运行 checksum 工具做结构预校验。"
|
|
698
|
+
"如果 checksum_passed == false,先修复格式错误再评估。\n",
|
|
699
|
+
"评估步骤:",
|
|
700
|
+
"Step 0: 递归深度检测 + 权重假设声明",
|
|
701
|
+
"Step 1: 第一性原理 — 识别回答的本质判断",
|
|
702
|
+
"Step 2: 系统思维 — 拆解组件结构",
|
|
703
|
+
"Step 3: 批判思维 — 识别偏差方向",
|
|
704
|
+
"Step 4-5: 元任务分解 + 算法思维执行",
|
|
705
|
+
"Step 6: 严格逻辑验证 — 识别断裂",
|
|
706
|
+
"Step 7: 边界纳入并重新思考",
|
|
707
|
+
"Step 8: 原生框架推理(假定为真)",
|
|
708
|
+
"Step 9: 最后一轮批判",
|
|
709
|
+
"Step 10: 统合\n",
|
|
710
|
+
"输出格式(v2.5 — 定性置信度,禁用精确百分比):",
|
|
711
|
+
"**置信度**:{高/中偏高/中/中偏低/低}",
|
|
712
|
+
" - 描述:(一句话支撑理由)",
|
|
713
|
+
" - 递归深度:Layer N",
|
|
714
|
+
" - 权重假设:(当前评估遵循的权重)\n",
|
|
715
|
+
"**采纳建议**:",
|
|
716
|
+
"| 采纳 | 内容 | 修正 | 证据层级 |",
|
|
717
|
+
"|------|------|------|----------|",
|
|
718
|
+
"| ✅ 采纳 | ... | — | L1/L2/L3 |",
|
|
719
|
+
"| ⚠️ 有保留 | ... | ... | L1/L2/L3 |",
|
|
720
|
+
"| ❌ 不适用 | ... | ... | L1/L2/L3 |",
|
|
721
|
+
"\n**重要说明(v2.5收敛结论)**:",
|
|
722
|
+
"1. T2协议的核心价值是结构化暴露不确定性,而非消除幻觉",
|
|
723
|
+
"2. 所有精确数字(含来源引用)在独立验证前视为L3假说",
|
|
724
|
+
"3. 递归深度≥3时,增量信息<5%,建议终止递归",
|
|
725
|
+
"4. 纯T2(无外部检索)不应作为默认置信度方法",
|
|
726
|
+
"5. T2+RAG(绑定外部验证)仅推荐于高风险+非实时场景",
|
|
727
|
+
"6. (v2.5)核心链路精度不可降级,非核心链路适用精度匹配原则",
|
|
728
|
+
])
|
|
729
|
+
|
|
730
|
+
parts.extend([
|
|
731
|
+
"\n---",
|
|
732
|
+
f"{VERSION_TAG} | {datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')}",
|
|
733
|
+
])
|
|
734
|
+
|
|
735
|
+
if terminate:
|
|
736
|
+
if locale == "en":
|
|
737
|
+
parts.append("\n⚠️ Recursion termination triggered. Final converged conclusion above.")
|
|
738
|
+
else:
|
|
739
|
+
parts.append("\n⚠️ 递归终止已触发。以上方向性结论视为最终输出。")
|
|
740
|
+
|
|
741
|
+
return "\n".join(parts)
|
|
742
|
+
|
|
743
|
+
def dispatch(self, req: dict) -> dict:
|
|
744
|
+
method = req.get("method", "")
|
|
745
|
+
if method == "initialize":
|
|
746
|
+
return self.handle_initialize(req)
|
|
747
|
+
elif method == "tools/list":
|
|
748
|
+
return self.handle_list_tools(req)
|
|
749
|
+
elif method == "tools/call":
|
|
750
|
+
return self.handle_tools_call(req)
|
|
751
|
+
elif method == "notifications/initialized":
|
|
752
|
+
return None
|
|
753
|
+
else:
|
|
754
|
+
return json_rpc_error(req.get("id"), -32601, f"Method not found: {method}")
|
|
755
|
+
|
|
756
|
+
def run(self):
|
|
757
|
+
"""Read JSON-RPC from stdin, write responses to stdout."""
|
|
758
|
+
server = self
|
|
759
|
+
for line in sys.stdin:
|
|
760
|
+
line = line.strip()
|
|
761
|
+
if not line:
|
|
762
|
+
continue
|
|
763
|
+
try:
|
|
764
|
+
req = json.loads(line)
|
|
765
|
+
except json.JSONDecodeError as e:
|
|
766
|
+
resp = json_rpc_error(None, -32700, f"Parse error: {e}")
|
|
767
|
+
sys.stdout.write(json.dumps(resp, ensure_ascii=True) + "\n")
|
|
768
|
+
sys.stdout.flush()
|
|
769
|
+
continue
|
|
770
|
+
|
|
771
|
+
resp = server.dispatch(req)
|
|
772
|
+
if resp is not None:
|
|
773
|
+
sys.stdout.write(json.dumps(resp, ensure_ascii=True) + "\n")
|
|
774
|
+
sys.stdout.flush()
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
def main() -> None:
|
|
778
|
+
"""Console entry point for pip install t1-t2-protocol."""
|
|
779
|
+
T1T2Server().run()
|
|
780
|
+
|
|
781
|
+
|
|
782
|
+
if __name__ == "__main__":
|
|
783
|
+
main()
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: t1-t2-protocol
|
|
3
|
+
Version: 2.5.2
|
|
4
|
+
Summary: Heterogeneous validation protocol for MCP — structured reasoning (T1) + cross-model confidence evaluation (T2) + deterministic checksum
|
|
5
|
+
Author: Fauxetine
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Fauxetine/t1-t2-protocol
|
|
8
|
+
Project-URL: PyPI, https://pypi.org/project/t1-t2-protocol/
|
|
9
|
+
Project-URL: Source, https://github.com/Fauxetine/t1-t2-protocol
|
|
10
|
+
Project-URL: Documentation, https://github.com/Fauxetine/t1-t2-protocol#readme
|
|
11
|
+
Project-URL: Issues, https://github.com/Fauxetine/t1-t2-protocol/issues
|
|
12
|
+
Keywords: mcp,model-context-protocol,ai-safety,llm-validation,cross-validation,structured-reasoning,ai-agent,heterogeneous
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Provides-Extra: dev
|
|
25
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
26
|
+
Dynamic: license-file
|
|
27
|
+
|
|
28
|
+
[](https://www.python.org/)
|
|
29
|
+
[](https://pypi.org/project/t1-t2-protocol/)
|
|
30
|
+
[](LICENSE)
|
|
31
|
+
[](https://modelcontextprotocol.io/)
|
|
32
|
+
|
|
33
|
+
# T1/T2 Protocol — Heterogeneous Validation for MCP
|
|
34
|
+
|
|
35
|
+
<!-- mcp-name: io.github.fauxetine/t1-t2-protocol -->
|
|
36
|
+
|
|
37
|
+
**T1/T2 is an MCP server that makes AI reasoning verifiable, auditable, and trustworthy** — by decomposing ambiguous questions into structured tiers (T1), then validating answers through cross-model evaluation (T2), with a deterministic checksum layer that doesn't depend on any LLM.
|
|
38
|
+
|
|
39
|
+
## Why?
|
|
40
|
+
|
|
41
|
+
When an LLM checks its own answer, it uses the same training data, the same reasoning preferences, and the same systematic biases. **Self-reflection cannot catch its own blind spots.**
|
|
42
|
+
|
|
43
|
+
T1/T2 introduces **heterogeneous validation**: the model that produces the answer and the model that evaluates it should be different. Their different training distributions cover each other's blind spots.
|
|
44
|
+
|
|
45
|
+
## Tools
|
|
46
|
+
|
|
47
|
+
| Tool | Function | Why it matters |
|
|
48
|
+
|------|----------|---------------|
|
|
49
|
+
| **t1_protocol** | Decomposes ambiguous questions into L1 (facts) / L2 (assumptions) / L3 (hypotheses) / L4 (unknowns) | Forces structured reasoning before answering |
|
|
50
|
+
| **t2_protocol** | Evaluates answer quality from another model's perspective | Catches blind spots self-reflection misses |
|
|
51
|
+
| **checksum** | Deterministic structural validation — pure regex, zero LLM dependency | Safety that doesn't scale with intelligence |
|
|
52
|
+
|
|
53
|
+
## Quick Start
|
|
54
|
+
|
|
55
|
+
### Requirements
|
|
56
|
+
|
|
57
|
+
- Python 3.10+
|
|
58
|
+
- An MCP client: [Cursor](https://cursor.sh/), [Claude Desktop](https://claude.ai/download), [Windsurf](https://codeium.com/windsurf), or any MCP-compatible host
|
|
59
|
+
|
|
60
|
+
### Install
|
|
61
|
+
|
|
62
|
+
From PyPI (recommended):
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install t1-t2-protocol
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
From source (development):
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
git clone https://github.com/Fauxetine/t1-t2-protocol.git
|
|
72
|
+
cd t1-t2-protocol
|
|
73
|
+
pip install -e ".[dev]"
|
|
74
|
+
python -m pytest tests/ -v
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Or run directly without installing:
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
python3 src/t1_t2_mcp_server.py
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
After `pip install`, register the console script in MCP config:
|
|
84
|
+
|
|
85
|
+
```json
|
|
86
|
+
{
|
|
87
|
+
"mcpServers": {
|
|
88
|
+
"t1-t2-protocol": {
|
|
89
|
+
"type": "stdio",
|
|
90
|
+
"command": "t1-t2-protocol"
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Configure
|
|
97
|
+
|
|
98
|
+
**Cursor** — add to `.cursor/mcp.json`:
|
|
99
|
+
|
|
100
|
+
```json
|
|
101
|
+
{
|
|
102
|
+
"mcpServers": {
|
|
103
|
+
"t1-t2-protocol": {
|
|
104
|
+
"type": "stdio",
|
|
105
|
+
"command": "python3",
|
|
106
|
+
"args": ["/path/to/src/t1_t2_mcp_server.py"]
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
**Claude Desktop** — add to `claude_desktop_config.json`:
|
|
113
|
+
|
|
114
|
+
```json
|
|
115
|
+
{
|
|
116
|
+
"mcpServers": {
|
|
117
|
+
"t1-t2-protocol": {
|
|
118
|
+
"command": "python3",
|
|
119
|
+
"args": ["/path/to/src/t1_t2_mcp_server.py"]
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Usage
|
|
126
|
+
|
|
127
|
+
### T1: Structure a vague question
|
|
128
|
+
|
|
129
|
+
Call `t1_protocol` with your question. It returns a structured prompt with four tiers:
|
|
130
|
+
|
|
131
|
+
```
|
|
132
|
+
Input: "Should we migrate our monolith to microservices?"
|
|
133
|
+
|
|
134
|
+
Output: Structured prompt with:
|
|
135
|
+
[L1 Facts] Team size, codebase size, current stack
|
|
136
|
+
[L2 Assumptions] Expected benefits that need verification
|
|
137
|
+
[L3 Hypotheses] Testable claims about migration risk
|
|
138
|
+
[L4 Unknown] Future growth trajectory
|
|
139
|
+
[Core Question] The precise feasibility question
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### T2: Cross-validate a decision
|
|
143
|
+
|
|
144
|
+
Call `t2_protocol` with a decision or answer text. It returns confidence assessment + adoption recommendations:
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
Input: "Decision text for approach A..."
|
|
148
|
+
|
|
149
|
+
Output:
|
|
150
|
+
Confidence: Medium-High
|
|
151
|
+
Adoption table with:
|
|
152
|
+
✅ Adopt — verified conclusions (L1)
|
|
153
|
+
⚠️ Reserved — needs more evidence (L2)
|
|
154
|
+
❌ N/A — blind spots to address
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### checksum: Validate output structure
|
|
158
|
+
|
|
159
|
+
Call `checksum` with structured text. It returns pass/fail based on deterministic rules:
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
Input: "[L1 Facts]\n1. ...\n[L2 Assumptions]\n1. ...\n---"
|
|
163
|
+
Output: {"checksum_passed": true, "errors": []}
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Full pipeline
|
|
167
|
+
|
|
168
|
+
```
|
|
169
|
+
Vague question → T1 structured decomposition → Decision based on structure → checksum (optional) → T2 validation → Refined decision
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
For time-sensitive factual claims, **search on the caller side before T2** — see [Caller-side web verification (v2.6)](docs/caller-protocol.md).
|
|
173
|
+
|
|
174
|
+
## Configuration
|
|
175
|
+
|
|
176
|
+
### Locale
|
|
177
|
+
|
|
178
|
+
Both `t1_protocol` and `t2_protocol` accept an optional `locale` parameter:
|
|
179
|
+
|
|
180
|
+
| Value | Output |
|
|
181
|
+
|-------|--------|
|
|
182
|
+
| `en` (default) | English templates |
|
|
183
|
+
| `zh` | Chinese templates |
|
|
184
|
+
|
|
185
|
+
Example: `{"question": "...", "locale": "zh"}`
|
|
186
|
+
|
|
187
|
+
### Weight hints
|
|
188
|
+
|
|
189
|
+
Both `t1_protocol` and `t2_protocol` accept an optional `weight_hint` parameter to bias evaluation criteria:
|
|
190
|
+
|
|
191
|
+
| Weight | Effect |
|
|
192
|
+
|--------|--------|
|
|
193
|
+
| `事实优先` / `fact-first` | Prioritizes factual accuracy |
|
|
194
|
+
| `效率优先` / `efficiency-first` | Prioritizes efficiency |
|
|
195
|
+
| `成本优先` / `cost-first` | Prioritizes cost |
|
|
196
|
+
| `鲁棒性优先` / `robustness-first` | Prioritizes robustness |
|
|
197
|
+
| `通用优先` / `general-first` | No specific bias |
|
|
198
|
+
|
|
199
|
+
### Recursion protection
|
|
200
|
+
|
|
201
|
+
T2 automatically detects recursion depth and terminates at depth >= 3, where marginal information gain drops below 5%.
|
|
202
|
+
|
|
203
|
+
## Design Philosophy
|
|
204
|
+
|
|
205
|
+
See [docs/philosophy.md](docs/philosophy.md) for the full design rationale.
|
|
206
|
+
|
|
207
|
+
Core tenets:
|
|
208
|
+
|
|
209
|
+
1. **Separate intelligence from trust** — AI capability and AI safety should be guaranteed by different systems
|
|
210
|
+
2. **Heterogeneous over self-referential** — Cross-model validation is more reliable than self-reflection
|
|
211
|
+
3. **Deterministic over probabilistic** — What can be checked by code should not be left to model judgment
|
|
212
|
+
|
|
213
|
+
## Examples
|
|
214
|
+
|
|
215
|
+
See [examples/](examples/) for step-by-step walkthroughs:
|
|
216
|
+
|
|
217
|
+
- [T1: Structure a vague question](examples/t1-basic.md)
|
|
218
|
+
- [T2: Cross-validate a decision](examples/t2-basic.md)
|
|
219
|
+
- [Full pipeline: T1 → decision → T2](examples/full-pipeline.md)
|
|
220
|
+
|
|
221
|
+
## Positioning
|
|
222
|
+
|
|
223
|
+
| Project | Layer | What it does | T1/T2 relationship |
|
|
224
|
+
|---------|-------|--------------|-------------------|
|
|
225
|
+
| [Sequential Thinking](https://github.com/modelcontextprotocol/servers/tree/main/src/sequentialthinking) (official MCP) | Caller-side chain-of-thought | One model logs iterative steps | Complementary — T1 adds L1–L4 tiers + T2 cross-model review |
|
|
226
|
+
| [ThoughtProof](https://github.com/modelcontextprotocol/modelcontextprotocol/discussions/2574) / verdict APIs | Server-side verification | `APPROVE`/`DENY`/`UNCERTAIN` with confidence | Complementary — T1/T2 structures reasoning *before* verdict APIs act |
|
|
227
|
+
| Self-reflection / prompt chains | Same model | Re-reads or re-prompts its own output | Replaced — heterogeneous validation catches shared blind spots |
|
|
228
|
+
| Tool integrity (e.g. Phionyx) | Transport / tool schema | Detects tool poisoning, schema drift | Orthogonal — T1/T2 does not secure tool definitions |
|
|
229
|
+
|
|
230
|
+
T1/T2 is a **stdlib reference implementation** for [MCP Discussion #2574](https://github.com/modelcontextprotocol/modelcontextprotocol/discussions/2574)-style reasoning discipline: structure first (T1), cross-validate second (T2), checksum what code can verify. It is not a signed verdict API and not a security scanner.
|
|
231
|
+
|
|
232
|
+
## License
|
|
233
|
+
|
|
234
|
+
MIT — see [LICENSE](LICENSE).
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
*Built for the MCP ecosystem. Part of a broader exploration into AI safety through deterministic architecture.*
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## Links
|
|
243
|
+
|
|
244
|
+
- [Contributing](CONTRIBUTING.md)
|
|
245
|
+
- [MCP Registry](https://registry.modelcontextprotocol.io/)
|
|
246
|
+
- [Security policy](SECURITY.md)
|
|
247
|
+
- [Changelog](CHANGELOG.md)
|
|
248
|
+
- [Design philosophy](docs/philosophy.md)
|
|
249
|
+
- [Caller-side web verification v2.6](docs/caller-protocol.md)
|
|
250
|
+
- [Agent / MCP host instructions](AGENTS.md)
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
t1_t2_mcp_server.py,sha256=QGMR6EwGSZgM6-iSekNKBbMhCl0cdOWRVCtwkJnmqHA,32818
|
|
2
|
+
t1_t2_protocol-2.5.2.dist-info/licenses/LICENSE,sha256=0fZZFPhk95f7Sj3KhVORRvwQB8ZniUBFcLNfSP-hews,1066
|
|
3
|
+
t1_t2_protocol-2.5.2.dist-info/METADATA,sha256=GUP8r7KpfF6PcRtTSm9WcSdiPoDVLW9Qaq7HF-q4lTY,8948
|
|
4
|
+
t1_t2_protocol-2.5.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
5
|
+
t1_t2_protocol-2.5.2.dist-info/entry_points.txt,sha256=hynBOg-pWrlA-Ma_Ug2iZrB2JuVIF1-nB9aIz_Vi9W0,57
|
|
6
|
+
t1_t2_protocol-2.5.2.dist-info/top_level.txt,sha256=bgA9urvRwwMeX7sHj3sO9mskLbdWTH32KVIsjzTNm4M,17
|
|
7
|
+
t1_t2_protocol-2.5.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Fauxetine
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
t1_t2_mcp_server
|