source-kb 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +50 -0
- cli/__main__.py +5 -0
- cli/commands/__init__.py +1 -0
- cli/commands/anchor_fix.py +47 -0
- cli/commands/diff_doc.py +52 -0
- cli/commands/dispatch.py +77 -0
- cli/commands/extract.py +72 -0
- cli/commands/file_list.py +74 -0
- cli/commands/index.py +84 -0
- cli/commands/lock.py +89 -0
- cli/commands/merge.py +60 -0
- cli/commands/merge_delta.py +19 -0
- cli/commands/metadata.py +24 -0
- cli/commands/pipeline.py +45 -0
- cli/commands/post_merge.py +43 -0
- cli/commands/query.py +52 -0
- cli/commands/render.py +101 -0
- cli/commands/scan_repos.py +46 -0
- cli/commands/setup.py +94 -0
- cli/commands/split.py +196 -0
- cli/commands/stale_files.py +98 -0
- cli/commands/validate.py +191 -0
- core/__init__.py +32 -0
- core/config.py +261 -0
- core/docs/__init__.py +7 -0
- core/docs/section_updater.py +286 -0
- core/docs/shared.py +149 -0
- core/git.py +294 -0
- core/interfaces.py +249 -0
- core/monitor/__init__.py +5 -0
- core/monitor/progress.py +83 -0
- core/monitor/prompt_store.py +49 -0
- core/paths.py +141 -0
- core/preset.py +237 -0
- core/preset_accessors.py +202 -0
- core/preset_classify.py +132 -0
- core/preset_hooks.py +129 -0
- core/preset_profile.py +89 -0
- core/prompt/__init__.py +7 -0
- core/prompt/__main__.py +147 -0
- core/prompt/content.py +320 -0
- core/prompt/context_manager.py +164 -0
- core/prompt/renderer.py +236 -0
- core/prompt/response_parser.py +274 -0
- core/prompt/templates.py +357 -0
- core/prompt/validate_parity.py +162 -0
- core/prompt/variables.py +339 -0
- core/rag/__init__.py +22 -0
- core/rag/__main__.py +136 -0
- core/rag/bm25_index.py +268 -0
- core/rag/chunker.py +273 -0
- core/rag/embedder.py +151 -0
- core/rag/indexer.py +292 -0
- core/rag/loader.py +89 -0
- core/rag/retriever.py +82 -0
- core/skeleton/__init__.py +11 -0
- core/skeleton/__main__.py +934 -0
- core/skeleton/anchor_fix.py +250 -0
- core/skeleton/classify.py +331 -0
- core/skeleton/cmd_anchor_fix.py +43 -0
- core/skeleton/cmd_diff_doc.py +44 -0
- core/skeleton/cmd_lock.py +87 -0
- core/skeleton/cmd_merge_delta.py +41 -0
- core/skeleton/community.py +233 -0
- core/skeleton/dependency_graph.py +306 -0
- core/skeleton/diff_doc.py +248 -0
- core/skeleton/dispatch.py +273 -0
- core/skeleton/dispatch_render.py +319 -0
- core/skeleton/dispatch_source.py +111 -0
- core/skeleton/extract.py +218 -0
- core/skeleton/extract_methods.py +298 -0
- core/skeleton/file_list.py +239 -0
- core/skeleton/impact.py +278 -0
- core/skeleton/jar_download.py +177 -0
- core/skeleton/jar_resolver.py +186 -0
- core/skeleton/loader.py +162 -0
- core/skeleton/merge.py +278 -0
- core/skeleton/merge_delta.py +229 -0
- core/skeleton/metadata.py +96 -0
- core/skeleton/metadata_builders.py +264 -0
- core/skeleton/module_dag.py +330 -0
- core/skeleton/parsers/__init__.py +71 -0
- core/skeleton/parsers/jqassistant.py +300 -0
- core/skeleton/parsers/jqassistant_cypher.py +225 -0
- core/skeleton/parsers/regex.py +171 -0
- core/skeleton/parsers/treesitter.py +324 -0
- core/skeleton/parsers/treesitter_java.py +284 -0
- core/skeleton/parsers/treesitter_multi.py +289 -0
- core/skeleton/pom_parser.py +299 -0
- core/skeleton/post_merge.py +295 -0
- core/skeleton/post_merge_llm.py +82 -0
- core/skeleton/query.py +195 -0
- core/skeleton/shard_context.py +177 -0
- core/skeleton/split.py +180 -0
- core/skeleton/split_cache.py +107 -0
- core/skeleton/split_feedback.py +174 -0
- core/skeleton/split_plan.py +219 -0
- core/skeleton/split_plan_helpers.py +305 -0
- core/skeleton/split_plan_llm.py +274 -0
- core/utils.py +135 -0
- core/validators/__init__.py +65 -0
- core/validators/__main__.py +215 -0
- core/validators/consistency.py +203 -0
- core/validators/coverage.py +171 -0
- core/validators/duplicates.py +76 -0
- core/validators/engine.py +224 -0
- core/validators/links.py +76 -0
- core/validators/sampling.py +169 -0
- core/validators/structure.py +144 -0
- engine/__init__.py +7 -0
- engine/assembler.py +231 -0
- engine/confirm.py +65 -0
- engine/dedup.py +106 -0
- engine/main.py +211 -0
- engine/pipeline/__init__.py +163 -0
- engine/pipeline/recovery.py +250 -0
- engine/pipeline/steps/__init__.py +23 -0
- engine/pipeline/steps/audit.py +220 -0
- engine/pipeline/steps/audit_apply.py +195 -0
- engine/pipeline/steps/audit_helpers.py +155 -0
- engine/pipeline/steps/classify_llm.py +236 -0
- engine/pipeline/steps/classify_prompt.py +223 -0
- engine/pipeline/steps/finalize.py +160 -0
- engine/pipeline/steps/generate.py +169 -0
- engine/pipeline/steps/generate_batch.py +197 -0
- engine/pipeline/steps/generate_recovery.py +170 -0
- engine/pipeline/steps/llm_plan_split.py +253 -0
- engine/pipeline/steps/lock.py +64 -0
- engine/pipeline/steps/preflight.py +237 -0
- engine/pipeline/steps/preflight_adjust.py +147 -0
- engine/pipeline/steps/pregenerate.py +130 -0
- engine/pipeline/steps/quality.py +81 -0
- engine/pipeline/steps/skeleton.py +149 -0
- engine/pipeline/steps/source.py +163 -0
- engine/pipeline/steps/sync.py +117 -0
- engine/pipeline/steps/sync_finalize.py +237 -0
- engine/pipeline/steps/sync_update.py +341 -0
- engine/pipelines.py +91 -0
- engine/runner.py +335 -0
- engine/strategies/__init__.py +86 -0
- engine/strategies/api.py +128 -0
- engine/strategies/delegated.py +50 -0
- engine/strategies/dryrun.py +25 -0
- engine/two_phase.py +143 -0
- mcp_server/__init__.py +73 -0
- mcp_server/__main__.py +5 -0
- mcp_server/tools/__init__.py +1 -0
- mcp_server/tools/config.py +63 -0
- mcp_server/tools/discovery.py +276 -0
- mcp_server/tools/generation.py +184 -0
- mcp_server/tools/planning.py +144 -0
- mcp_server/tools/source.py +175 -0
- mcp_server/tools/validation.py +140 -0
- mcp_server/tools/workflow.py +166 -0
- mcp_server/workflow_loader.py +204 -0
- presets/generic/audit_dimensions.md +132 -0
- presets/generic/doc_types.yaml +152 -0
- presets/generic/preset.yaml +115 -0
- presets/java-spring/audit_dimensions.md +228 -0
- presets/java-spring/audit_dimensions.yaml +203 -0
- presets/java-spring/doc_types.yaml +269 -0
- presets/java-spring/hooks.py +122 -0
- presets/java-spring/preset.yaml +341 -0
- presets/java-spring/templates/README.md +34 -0
- presets/java-spring/templates/audit-system.md +15 -0
- presets/java-spring/templates/subagent-aop.md +105 -0
- presets/java-spring/templates/subagent-api.md +63 -0
- presets/java-spring/templates/subagent-architecture.md +111 -0
- presets/java-spring/templates/subagent-async-events.md +107 -0
- presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
- presets/java-spring/templates/subagent-audit-architecture.md +38 -0
- presets/java-spring/templates/subagent-audit-business.md +40 -0
- presets/java-spring/templates/subagent-audit-data-models.md +40 -0
- presets/java-spring/templates/subagent-business.md +129 -0
- presets/java-spring/templates/subagent-caching.md +75 -0
- presets/java-spring/templates/subagent-database-access.md +114 -0
- presets/java-spring/templates/subagent-enum.md +75 -0
- presets/java-spring/templates/subagent-error-handling.md +91 -0
- presets/java-spring/templates/subagent-external-integrations.md +80 -0
- presets/java-spring/templates/subagent-index.md +122 -0
- presets/java-spring/templates/subagent-messaging.md +97 -0
- presets/java-spring/templates/subagent-model.md +88 -0
- presets/java-spring/templates/subagent-observability.md +91 -0
- presets/java-spring/templates/subagent-scheduled.md +81 -0
- presets/java-spring/templates/subagent-security.md +102 -0
- presets/java-spring/templates/subagent-structure.md +101 -0
- presets/java-spring/templates/subagent-sync-section.md +34 -0
- presets/java-spring/templates/subagent-utils.md +73 -0
- presets/java-spring/templates/sync-system.md +8 -0
- presets/java-spring/workflow-extensions.md +112 -0
- skills/__init__.py +1 -0
- skills/_shared/README.md +30 -0
- skills/_shared/doc-coverage-shared.md +134 -0
- skills/_shared/doc-quality-standard.md +1058 -0
- skills/_shared/doc-subagent-rules.md +762 -0
- skills/_shared/windows-compat.md +89 -0
- skills/kb-audit/SKILL.md +52 -0
- skills/kb-audit/rules.md +88 -0
- skills/kb-audit/steps/step-01-prepare.md +75 -0
- skills/kb-audit/steps/step-02-audit.md +96 -0
- skills/kb-audit/steps/step-03-verify.md +65 -0
- skills/kb-audit/steps/step-04-report.md +64 -0
- skills/kb-init/SKILL.md +146 -0
- skills/kb-init/rules.md +187 -0
- skills/kb-init/steps/step-01-scope.md +62 -0
- skills/kb-init/steps/step-02-source.md +410 -0
- skills/kb-init/steps/step-03-generate.md +307 -0
- skills/kb-init/steps/step-04-quality.md +92 -0
- skills/kb-init/steps/step-05-finalize.md +132 -0
- skills/kb-init/templates/core/execution-modes.md +29 -0
- skills/kb-init/templates/core/output-only.md +4 -0
- skills/kb-init/templates/core/readwrite.md +33 -0
- skills/kb-search/SKILL.md +138 -0
- skills/kb-search/rules.md +64 -0
- skills/kb-sync/SKILL.md +43 -0
- skills/kb-sync/rules.md +70 -0
- skills/kb-sync/scripts/rebuild_module.py +91 -0
- skills/kb-sync/scripts/scan_repos.py +687 -0
- skills/kb-sync/steps/step-01-detect.md +72 -0
- skills/kb-sync/steps/step-02-update.md +71 -0
- skills/kb-sync/steps/step-03-verify.md +47 -0
- skills/kb-sync/steps/step-04-finalize.md +52 -0
- source_kb-0.2.2.dist-info/METADATA +194 -0
- source_kb-0.2.2.dist-info/RECORD +228 -0
- source_kb-0.2.2.dist-info/WHEEL +5 -0
- source_kb-0.2.2.dist-info/entry_points.txt +3 -0
- source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
- source_kb-0.2.2.dist-info/top_level.txt +6 -0
core/prompt/renderer.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""Prompt renderer — template loading, variable computation, rendering.
|
|
2
|
+
|
|
3
|
+
Delegates content assembly (source, skeleton, file list) to an injected
|
|
4
|
+
PromptAssembler strategy. Core renderer is mode-agnostic.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from core.prompt.renderer import render_prompt
|
|
8
|
+
|
|
9
|
+
prompt = render_prompt(
|
|
10
|
+
template_path="skill/kb-init/templates/subagent-business.md",
|
|
11
|
+
config=config, kb_name="my-kb", module_name="my-service",
|
|
12
|
+
doc_type="business-logic", assembler=my_assembler,
|
|
13
|
+
)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
import re
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from core.interfaces import PromptAssembler
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def render_prompt(
|
|
29
|
+
template_path: str | Path,
|
|
30
|
+
config: dict[str, Any],
|
|
31
|
+
kb_name: str,
|
|
32
|
+
module_name: str,
|
|
33
|
+
doc_type: str,
|
|
34
|
+
assembler: PromptAssembler,
|
|
35
|
+
extras: dict[str, str] | None = None,
|
|
36
|
+
execution_snippet: str = "",
|
|
37
|
+
preset: dict[str, Any] | None = None,
|
|
38
|
+
) -> str:
|
|
39
|
+
"""Render a sub-agent prompt from template + computed variables.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
template_path: Path to the .md template file
|
|
43
|
+
config: Full kb-project.yaml config dict
|
|
44
|
+
kb_name: Knowledge base name
|
|
45
|
+
module_name: Module name
|
|
46
|
+
doc_type: Document type key
|
|
47
|
+
assembler: PromptAssembler strategy (inline or reference)
|
|
48
|
+
extras: Additional variables to inject
|
|
49
|
+
execution_snippet: Mode-specific execution guidance text
|
|
50
|
+
preset: Preset config dict (for doc_type filename resolution)
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Fully rendered prompt string
|
|
54
|
+
"""
|
|
55
|
+
template_path = Path(template_path)
|
|
56
|
+
if not template_path.exists():
|
|
57
|
+
raise FileNotFoundError(f"Template not found: {template_path}")
|
|
58
|
+
|
|
59
|
+
template = template_path.read_text(encoding="utf-8")
|
|
60
|
+
extras = extras or {}
|
|
61
|
+
|
|
62
|
+
# Compute all template variables
|
|
63
|
+
kb_config = config["knowledge_bases"][kb_name]
|
|
64
|
+
base_dir = Path(config.get("_config_dir", ".")).resolve()
|
|
65
|
+
|
|
66
|
+
knowledge_dir = Path(kb_config["knowledge_dir"])
|
|
67
|
+
if not knowledge_dir.is_absolute():
|
|
68
|
+
knowledge_dir = (base_dir / knowledge_dir).resolve()
|
|
69
|
+
|
|
70
|
+
source = kb_config.get("source", {})
|
|
71
|
+
cache_dir = Path(source.get("cache_dir", "./.source-cache"))
|
|
72
|
+
if not cache_dir.is_absolute():
|
|
73
|
+
cache_dir = (base_dir / cache_dir).resolve()
|
|
74
|
+
|
|
75
|
+
# Determine module_dir and source_cache
|
|
76
|
+
repo_info = _find_repo(config, kb_name, module_name)
|
|
77
|
+
if repo_info.get("path") == ".":
|
|
78
|
+
module_dir = knowledge_dir
|
|
79
|
+
else:
|
|
80
|
+
module_dir = knowledge_dir / module_name
|
|
81
|
+
|
|
82
|
+
if source.get("structure") == "monorepo":
|
|
83
|
+
repo_name = source.get("repo_name", "repo")
|
|
84
|
+
module_path = repo_info.get("path", module_name)
|
|
85
|
+
source_cache = cache_dir / repo_name / module_path
|
|
86
|
+
else:
|
|
87
|
+
source_cache = cache_dir / module_name
|
|
88
|
+
|
|
89
|
+
# Delegate content assembly to the injected strategy
|
|
90
|
+
file_list_override = (extras or {}).get("file_list_override")
|
|
91
|
+
file_list = assembler.resolve_file_list(module_dir, doc_type, file_list_override=file_list_override)
|
|
92
|
+
if file_list_override and Path(file_list_override).exists():
|
|
93
|
+
override_content = Path(file_list_override).read_text(encoding="utf-8").strip()
|
|
94
|
+
source_content = assembler.resolve_source_content_from_paths(
|
|
95
|
+
module_dir, doc_type, source_cache, override_content.splitlines()
|
|
96
|
+
)
|
|
97
|
+
else:
|
|
98
|
+
source_content = assembler.resolve_source_content(module_dir, doc_type, source_cache)
|
|
99
|
+
skeleton_content = assembler.resolve_skeleton_content(module_dir)
|
|
100
|
+
|
|
101
|
+
# Compute metadata
|
|
102
|
+
from core.skeleton.metadata import load_pregenerated
|
|
103
|
+
from core.prompt.content import compute_high_methods, scan_generated_docs, compute_sibling_modules
|
|
104
|
+
from core.paths import resolve_skeleton, resolve_skeleton_summary
|
|
105
|
+
|
|
106
|
+
# Compute skeleton path/size for template variables
|
|
107
|
+
# R2 rule: skeleton reading strategy based on size
|
|
108
|
+
# < 50KB → read skeleton.json directly
|
|
109
|
+
# 50-200KB → read summary.json only
|
|
110
|
+
# > 200KB → read summary only, batch offset/limit
|
|
111
|
+
skel_path_str = ""
|
|
112
|
+
skel_size_kb = 0
|
|
113
|
+
skel_read_instruction = "Read directly"
|
|
114
|
+
skel_summary = resolve_skeleton_summary(module_dir)
|
|
115
|
+
skel_full = resolve_skeleton(module_dir)
|
|
116
|
+
|
|
117
|
+
# Determine actual skeleton size (full skeleton is the reference for R2 threshold)
|
|
118
|
+
full_size_kb = 0
|
|
119
|
+
if skel_full and skel_full.exists():
|
|
120
|
+
full_size_kb = round(skel_full.stat().st_size / 1024, 1)
|
|
121
|
+
|
|
122
|
+
if full_size_kb > 200 and skel_summary and skel_summary.exists():
|
|
123
|
+
# > 200KB: must use summary, batch if needed
|
|
124
|
+
skel_path_str = str(skel_summary).replace("\\", "/")
|
|
125
|
+
skel_size_kb = round(skel_summary.stat().st_size / 1024, 1)
|
|
126
|
+
skel_read_instruction = "Skeleton too large, reading summary file only"
|
|
127
|
+
elif full_size_kb > 50 and skel_summary and skel_summary.exists():
|
|
128
|
+
# 50-200KB: use summary
|
|
129
|
+
skel_path_str = str(skel_summary).replace("\\", "/")
|
|
130
|
+
skel_size_kb = round(skel_summary.stat().st_size / 1024, 1)
|
|
131
|
+
skel_read_instruction = "Please read this summary file"
|
|
132
|
+
elif skel_full and skel_full.exists():
|
|
133
|
+
# < 50KB: read full skeleton directly
|
|
134
|
+
skel_path_str = str(skel_full).replace("\\", "/")
|
|
135
|
+
skel_size_kb = full_size_kb
|
|
136
|
+
skel_read_instruction = "Can read the full skeleton directly"
|
|
137
|
+
elif skel_summary and skel_summary.exists():
|
|
138
|
+
# No full skeleton but summary exists
|
|
139
|
+
skel_path_str = str(skel_summary).replace("\\", "/")
|
|
140
|
+
skel_size_kb = round(skel_summary.stat().st_size / 1024, 1)
|
|
141
|
+
skel_read_instruction = "Please read this summary file"
|
|
142
|
+
|
|
143
|
+
variables: dict[str, str] = {
|
|
144
|
+
"module_name": module_name,
|
|
145
|
+
"module_description": repo_info.get("description", f"{module_name} module"),
|
|
146
|
+
"module_dir": str(module_dir.relative_to(base_dir)).replace("\\", "/") if module_dir.is_relative_to(base_dir) else str(module_dir).replace("\\", "/"),
|
|
147
|
+
"doc_type": doc_type,
|
|
148
|
+
"source_cache_path": str(source_cache.relative_to(base_dir)).replace("\\", "/") if source_cache.is_relative_to(base_dir) else str(source_cache).replace("\\", "/"),
|
|
149
|
+
"output_path": str(module_dir.relative_to(base_dir)).replace("\\", "/") if module_dir.is_relative_to(base_dir) else str(module_dir).replace("\\", "/"),
|
|
150
|
+
"file_list": file_list,
|
|
151
|
+
"source_content": source_content,
|
|
152
|
+
"skeleton_content": skeleton_content,
|
|
153
|
+
"skeleton_path": skel_path_str if not Path(skel_path_str).is_absolute() else (str(Path(skel_path_str).relative_to(base_dir)).replace("\\", "/") if Path(skel_path_str).is_relative_to(base_dir) else skel_path_str),
|
|
154
|
+
"skeleton_size": str(skel_size_kb),
|
|
155
|
+
"skeleton_read_instruction": skel_read_instruction,
|
|
156
|
+
"high_methods": compute_high_methods(module_dir),
|
|
157
|
+
"generated_docs": scan_generated_docs(module_dir),
|
|
158
|
+
"generated_docs_files": scan_generated_docs(module_dir),
|
|
159
|
+
"sibling_modules": compute_sibling_modules(config, kb_name, module_name),
|
|
160
|
+
"global_metadata": load_pregenerated(module_dir),
|
|
161
|
+
"prior_docs_context": "",
|
|
162
|
+
"branch": repo_info.get("branch", "main"),
|
|
163
|
+
"module_type": repo_info.get("type", "service"),
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
# Inject prior docs context for dependent doc types
|
|
167
|
+
from core.prompt.content import build_prior_docs_context
|
|
168
|
+
from core.preset import load_preset
|
|
169
|
+
preset_name = kb_config.get("preset", "generic")
|
|
170
|
+
preset = load_preset(preset_name)
|
|
171
|
+
prior = build_prior_docs_context(module_dir, doc_type, preset=preset)
|
|
172
|
+
if prior:
|
|
173
|
+
variables["prior_docs_context"] = prior
|
|
174
|
+
|
|
175
|
+
# Merge extras (user-provided overrides)
|
|
176
|
+
variables.update({k: v for k, v in extras.items() if not k.startswith("__")})
|
|
177
|
+
|
|
178
|
+
# Inject execution guidance
|
|
179
|
+
if execution_snippet:
|
|
180
|
+
if "{execution_guidance}" in template:
|
|
181
|
+
template = template.replace("{execution_guidance}", execution_snippet)
|
|
182
|
+
else:
|
|
183
|
+
marker = "## Rules you must follow"
|
|
184
|
+
if marker in template:
|
|
185
|
+
pos = template.index(marker)
|
|
186
|
+
template = template[:pos] + execution_snippet + "\n\n" + template[pos:]
|
|
187
|
+
|
|
188
|
+
# Variable substitution
|
|
189
|
+
used: set[str] = set()
|
|
190
|
+
|
|
191
|
+
def replacer(m: re.Match) -> str:
|
|
192
|
+
key = m.group(1)
|
|
193
|
+
if key in variables:
|
|
194
|
+
used.add(key)
|
|
195
|
+
return variables[key]
|
|
196
|
+
return m.group(0)
|
|
197
|
+
|
|
198
|
+
rendered = re.sub(r"\{([a-z0-9_]+)\}", replacer, template)
|
|
199
|
+
|
|
200
|
+
# Append source content if assembler says so and not already used
|
|
201
|
+
if assembler.should_append_source():
|
|
202
|
+
appendix = []
|
|
203
|
+
src = variables.get("source_content", "")
|
|
204
|
+
if src and "source_content" not in used:
|
|
205
|
+
appendix.append(f"\n\n## Source file content\n\n{src}")
|
|
206
|
+
skel = variables.get("skeleton_content", "")
|
|
207
|
+
if skel and "skeleton_content" not in used and "[truncated" in src:
|
|
208
|
+
appendix.append(f"\n\n## Source skeleton (method signatures of truncated files only)\n\n{skel}")
|
|
209
|
+
if appendix:
|
|
210
|
+
rendered += "".join(appendix)
|
|
211
|
+
|
|
212
|
+
# Append global context if not already used in template
|
|
213
|
+
context_parts = []
|
|
214
|
+
for key in ("global_metadata", "prior_docs_context"):
|
|
215
|
+
val = variables.get(key, "")
|
|
216
|
+
if val and key not in used:
|
|
217
|
+
context_parts.append(f"\n\n{val}")
|
|
218
|
+
if context_parts:
|
|
219
|
+
rendered += "".join(context_parts)
|
|
220
|
+
|
|
221
|
+
return rendered
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _find_repo(config: dict, kb_name: str, module_name: str) -> dict:
|
|
225
|
+
"""Find repo/module config entry by name."""
|
|
226
|
+
kb = config["knowledge_bases"][kb_name]
|
|
227
|
+
source = kb["source"]
|
|
228
|
+
if source.get("structure") == "monorepo":
|
|
229
|
+
for mod in source.get("modules", []):
|
|
230
|
+
if mod["name"] == module_name:
|
|
231
|
+
return mod
|
|
232
|
+
else:
|
|
233
|
+
for repo in source.get("repos", []):
|
|
234
|
+
if repo["name"] == module_name:
|
|
235
|
+
return repo
|
|
236
|
+
return {"name": module_name}
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
"""LLM response parsing and validation.
|
|
2
|
+
|
|
3
|
+
Handles malformed JSON, markdown code blocks, and content validation
|
|
4
|
+
for both audit and sync responses.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from core.prompt.response_parser import parse_audit_response, validate_sync_response
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import logging
|
|
14
|
+
import re
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import Literal
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# ---------------------------------------------------------------------------
|
|
22
|
+
# Data classes
|
|
23
|
+
# ---------------------------------------------------------------------------
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class Finding:
|
|
28
|
+
"""Single audit finding from LLM response."""
|
|
29
|
+
|
|
30
|
+
dimension: str
|
|
31
|
+
status: Literal["pass", "fail"]
|
|
32
|
+
detail: str = ""
|
|
33
|
+
fix: str = ""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class ParseResult:
|
|
38
|
+
"""Result of response parsing attempt."""
|
|
39
|
+
|
|
40
|
+
success: bool
|
|
41
|
+
findings: list[Finding] = field(default_factory=list)
|
|
42
|
+
raw_length: int = 0
|
|
43
|
+
parse_method: str = "" # "direct", "code_block", "json_fix", "fallback"
|
|
44
|
+
error: str = ""
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
# Public API
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def parse_audit_response(raw: str) -> ParseResult:
|
|
53
|
+
"""Parse LLM audit response into structured findings.
|
|
54
|
+
|
|
55
|
+
Fallback chain:
|
|
56
|
+
1. json.loads(raw) directly
|
|
57
|
+
2. Extract from ```json ... ``` code block
|
|
58
|
+
3. Fix common JSON errors (trailing commas, unquoted keys, single quotes)
|
|
59
|
+
4. Return empty findings with error logged
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
raw: Raw LLM response string
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
ParseResult with findings list and parse metadata
|
|
66
|
+
"""
|
|
67
|
+
if not raw or not raw.strip():
|
|
68
|
+
return ParseResult(success=False, raw_length=0, error="Empty response")
|
|
69
|
+
|
|
70
|
+
raw_length = len(raw)
|
|
71
|
+
|
|
72
|
+
# Attempt 1: Direct JSON parse
|
|
73
|
+
data = _try_parse_json(raw.strip())
|
|
74
|
+
if data is not None:
|
|
75
|
+
findings = _extract_findings(data)
|
|
76
|
+
return ParseResult(
|
|
77
|
+
success=True, findings=findings,
|
|
78
|
+
raw_length=raw_length, parse_method="direct",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Attempt 2: Extract from code block
|
|
82
|
+
extracted = _extract_json_from_code_block(raw)
|
|
83
|
+
if extracted:
|
|
84
|
+
data = _try_parse_json(extracted)
|
|
85
|
+
if data is not None:
|
|
86
|
+
findings = _extract_findings(data)
|
|
87
|
+
return ParseResult(
|
|
88
|
+
success=True, findings=findings,
|
|
89
|
+
raw_length=raw_length, parse_method="code_block",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Attempt 3: Fix common errors
|
|
93
|
+
fixed = _fix_common_json_errors(raw.strip())
|
|
94
|
+
data = _try_parse_json(fixed)
|
|
95
|
+
if data is not None:
|
|
96
|
+
findings = _extract_findings(data)
|
|
97
|
+
return ParseResult(
|
|
98
|
+
success=True, findings=findings,
|
|
99
|
+
raw_length=raw_length, parse_method="json_fix",
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Also try fixing the code block content
|
|
103
|
+
if extracted:
|
|
104
|
+
fixed_extracted = _fix_common_json_errors(extracted)
|
|
105
|
+
data = _try_parse_json(fixed_extracted)
|
|
106
|
+
if data is not None:
|
|
107
|
+
findings = _extract_findings(data)
|
|
108
|
+
return ParseResult(
|
|
109
|
+
success=True, findings=findings,
|
|
110
|
+
raw_length=raw_length, parse_method="json_fix",
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# All attempts failed
|
|
114
|
+
logger.warning("[response_parser] Failed to parse audit response (%d chars)", raw_length)
|
|
115
|
+
return ParseResult(
|
|
116
|
+
success=False, raw_length=raw_length,
|
|
117
|
+
parse_method="fallback", error="All parse attempts failed",
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def validate_sync_response(
|
|
122
|
+
raw: str,
|
|
123
|
+
original_content: str,
|
|
124
|
+
*,
|
|
125
|
+
max_expansion_ratio: float = 2.0,
|
|
126
|
+
) -> tuple[bool, str]:
|
|
127
|
+
"""Validate LLM sync response (section content).
|
|
128
|
+
|
|
129
|
+
Checks:
|
|
130
|
+
1. Non-empty response
|
|
131
|
+
2. Contains at least one heading or paragraph
|
|
132
|
+
3. Does not echo back prompt instructions
|
|
133
|
+
4. Length within max_expansion_ratio of original
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
raw: LLM response (proposed section content)
|
|
137
|
+
original_content: Original section content for comparison
|
|
138
|
+
max_expansion_ratio: Maximum allowed length ratio vs original
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
(is_valid, validated_content_or_error_message)
|
|
142
|
+
"""
|
|
143
|
+
if not raw or not raw.strip():
|
|
144
|
+
return False, "Empty response"
|
|
145
|
+
|
|
146
|
+
content = raw.strip()
|
|
147
|
+
|
|
148
|
+
# Strip markdown code fences if the entire response is wrapped
|
|
149
|
+
content = _strip_outer_fence(content)
|
|
150
|
+
|
|
151
|
+
# Check for prompt echo (common LLM failure mode)
|
|
152
|
+
prompt_markers = [
|
|
153
|
+
"You are a knowledge base maintainer",
|
|
154
|
+
"Rewrite this section",
|
|
155
|
+
"Document type:",
|
|
156
|
+
"Current section content:",
|
|
157
|
+
"Changed source files:",
|
|
158
|
+
]
|
|
159
|
+
for marker in prompt_markers:
|
|
160
|
+
if marker in content[:500]:
|
|
161
|
+
return False, f"Response echoes prompt instructions: '{marker}'"
|
|
162
|
+
|
|
163
|
+
# Check minimum content
|
|
164
|
+
lines = [l for l in content.splitlines() if l.strip()]
|
|
165
|
+
if len(lines) < 1:
|
|
166
|
+
return False, "Response has no meaningful content"
|
|
167
|
+
|
|
168
|
+
# Check expansion ratio (only meaningful for substantial original content)
|
|
169
|
+
if original_content.strip():
|
|
170
|
+
original_len = len(original_content.strip())
|
|
171
|
+
new_len = len(content)
|
|
172
|
+
# Only apply ratio check when original is substantial (>100 chars)
|
|
173
|
+
# For short originals, allow more expansion since a few lines can legitimately grow
|
|
174
|
+
if original_len > 100 and new_len > original_len * max_expansion_ratio:
|
|
175
|
+
return False, (
|
|
176
|
+
f"Response too long: {new_len} chars vs original {original_len} "
|
|
177
|
+
f"(ratio {new_len/original_len:.1f}x > {max_expansion_ratio}x)"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
return True, content
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# ---------------------------------------------------------------------------
|
|
184
|
+
# Internal helpers
|
|
185
|
+
# ---------------------------------------------------------------------------
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _try_parse_json(text: str) -> dict | list | None:
|
|
189
|
+
"""Attempt JSON parse, return None on failure."""
|
|
190
|
+
try:
|
|
191
|
+
return json.loads(text)
|
|
192
|
+
except (json.JSONDecodeError, ValueError):
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _extract_json_from_code_block(text: str) -> str | None:
|
|
197
|
+
"""Extract JSON content from markdown code block."""
|
|
198
|
+
# Match ```json ... ``` or ``` ... ```
|
|
199
|
+
pattern = r"```(?:json)?\s*\n(.*?)\n\s*```"
|
|
200
|
+
match = re.search(pattern, text, re.DOTALL)
|
|
201
|
+
if match:
|
|
202
|
+
return match.group(1).strip()
|
|
203
|
+
|
|
204
|
+
# Try without newline requirement (single-line blocks)
|
|
205
|
+
pattern2 = r"```(?:json)?\s*(.*?)\s*```"
|
|
206
|
+
match2 = re.search(pattern2, text, re.DOTALL)
|
|
207
|
+
if match2:
|
|
208
|
+
return match2.group(1).strip()
|
|
209
|
+
|
|
210
|
+
return None
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _fix_common_json_errors(text: str) -> str:
|
|
214
|
+
"""Attempt to fix common LLM JSON formatting errors."""
|
|
215
|
+
# Remove trailing commas before } or ]
|
|
216
|
+
text = re.sub(r",\s*([}\]])", r"\1", text)
|
|
217
|
+
|
|
218
|
+
# Replace single quotes with double quotes (careful with apostrophes)
|
|
219
|
+
# Only do this if the text looks like it uses single quotes for strings
|
|
220
|
+
if text.count("'") > text.count('"') and "{" in text:
|
|
221
|
+
text = re.sub(r"'([^']*)'", r'"\1"', text)
|
|
222
|
+
|
|
223
|
+
# Fix unquoted keys: { key: "value" } → { "key": "value" }
|
|
224
|
+
text = re.sub(r'{\s*(\w+)\s*:', r'{"\1":', text)
|
|
225
|
+
text = re.sub(r',\s*(\w+)\s*:', r',"\1":', text)
|
|
226
|
+
|
|
227
|
+
return text
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _extract_findings(data: dict | list) -> list[Finding]:
|
|
231
|
+
"""Extract Finding objects from parsed JSON data."""
|
|
232
|
+
items: list[dict] = []
|
|
233
|
+
|
|
234
|
+
if isinstance(data, list):
|
|
235
|
+
items = data
|
|
236
|
+
elif isinstance(data, dict):
|
|
237
|
+
# Try common wrapper keys
|
|
238
|
+
for key in ("findings", "results", "audit", "items", "data"):
|
|
239
|
+
if key in data and isinstance(data[key], list):
|
|
240
|
+
items = data[key]
|
|
241
|
+
break
|
|
242
|
+
if not items:
|
|
243
|
+
# Single finding as dict
|
|
244
|
+
if "dimension" in data or "status" in data:
|
|
245
|
+
items = [data]
|
|
246
|
+
|
|
247
|
+
findings: list[Finding] = []
|
|
248
|
+
for item in items:
|
|
249
|
+
if not isinstance(item, dict):
|
|
250
|
+
continue
|
|
251
|
+
dimension = item.get("dimension", item.get("name", item.get("check", "")))
|
|
252
|
+
status = item.get("status", "pass")
|
|
253
|
+
if status not in ("pass", "fail"):
|
|
254
|
+
status = "fail" if status in ("failed", "error", "no", "false") else "pass"
|
|
255
|
+
detail = item.get("detail", item.get("message", item.get("description", "")))
|
|
256
|
+
fix = item.get("fix", item.get("suggestion", item.get("fix_content", "")))
|
|
257
|
+
|
|
258
|
+
findings.append(Finding(
|
|
259
|
+
dimension=str(dimension),
|
|
260
|
+
status=status,
|
|
261
|
+
detail=str(detail),
|
|
262
|
+
fix=str(fix) if fix else "",
|
|
263
|
+
))
|
|
264
|
+
|
|
265
|
+
return findings
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def _strip_outer_fence(content: str) -> str:
|
|
269
|
+
"""Strip markdown code fence if the entire content is wrapped in one."""
|
|
270
|
+
lines = content.splitlines()
|
|
271
|
+
if len(lines) >= 2:
|
|
272
|
+
if lines[0].startswith("```") and lines[-1].strip() == "```":
|
|
273
|
+
return "\n".join(lines[1:-1])
|
|
274
|
+
return content
|