source-kb 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. cli/__init__.py +50 -0
  2. cli/__main__.py +5 -0
  3. cli/commands/__init__.py +1 -0
  4. cli/commands/anchor_fix.py +47 -0
  5. cli/commands/diff_doc.py +52 -0
  6. cli/commands/dispatch.py +77 -0
  7. cli/commands/extract.py +72 -0
  8. cli/commands/file_list.py +74 -0
  9. cli/commands/index.py +84 -0
  10. cli/commands/lock.py +89 -0
  11. cli/commands/merge.py +60 -0
  12. cli/commands/merge_delta.py +19 -0
  13. cli/commands/metadata.py +24 -0
  14. cli/commands/pipeline.py +45 -0
  15. cli/commands/post_merge.py +43 -0
  16. cli/commands/query.py +52 -0
  17. cli/commands/render.py +101 -0
  18. cli/commands/scan_repos.py +46 -0
  19. cli/commands/setup.py +94 -0
  20. cli/commands/split.py +196 -0
  21. cli/commands/stale_files.py +98 -0
  22. cli/commands/validate.py +191 -0
  23. core/__init__.py +32 -0
  24. core/config.py +261 -0
  25. core/docs/__init__.py +7 -0
  26. core/docs/section_updater.py +286 -0
  27. core/docs/shared.py +149 -0
  28. core/git.py +294 -0
  29. core/interfaces.py +249 -0
  30. core/monitor/__init__.py +5 -0
  31. core/monitor/progress.py +83 -0
  32. core/monitor/prompt_store.py +49 -0
  33. core/paths.py +141 -0
  34. core/preset.py +237 -0
  35. core/preset_accessors.py +202 -0
  36. core/preset_classify.py +132 -0
  37. core/preset_hooks.py +129 -0
  38. core/preset_profile.py +89 -0
  39. core/prompt/__init__.py +7 -0
  40. core/prompt/__main__.py +147 -0
  41. core/prompt/content.py +320 -0
  42. core/prompt/context_manager.py +164 -0
  43. core/prompt/renderer.py +236 -0
  44. core/prompt/response_parser.py +274 -0
  45. core/prompt/templates.py +357 -0
  46. core/prompt/validate_parity.py +162 -0
  47. core/prompt/variables.py +339 -0
  48. core/rag/__init__.py +22 -0
  49. core/rag/__main__.py +136 -0
  50. core/rag/bm25_index.py +268 -0
  51. core/rag/chunker.py +273 -0
  52. core/rag/embedder.py +151 -0
  53. core/rag/indexer.py +292 -0
  54. core/rag/loader.py +89 -0
  55. core/rag/retriever.py +82 -0
  56. core/skeleton/__init__.py +11 -0
  57. core/skeleton/__main__.py +934 -0
  58. core/skeleton/anchor_fix.py +250 -0
  59. core/skeleton/classify.py +331 -0
  60. core/skeleton/cmd_anchor_fix.py +43 -0
  61. core/skeleton/cmd_diff_doc.py +44 -0
  62. core/skeleton/cmd_lock.py +87 -0
  63. core/skeleton/cmd_merge_delta.py +41 -0
  64. core/skeleton/community.py +233 -0
  65. core/skeleton/dependency_graph.py +306 -0
  66. core/skeleton/diff_doc.py +248 -0
  67. core/skeleton/dispatch.py +273 -0
  68. core/skeleton/dispatch_render.py +319 -0
  69. core/skeleton/dispatch_source.py +111 -0
  70. core/skeleton/extract.py +218 -0
  71. core/skeleton/extract_methods.py +298 -0
  72. core/skeleton/file_list.py +239 -0
  73. core/skeleton/impact.py +278 -0
  74. core/skeleton/jar_download.py +177 -0
  75. core/skeleton/jar_resolver.py +186 -0
  76. core/skeleton/loader.py +162 -0
  77. core/skeleton/merge.py +278 -0
  78. core/skeleton/merge_delta.py +229 -0
  79. core/skeleton/metadata.py +96 -0
  80. core/skeleton/metadata_builders.py +264 -0
  81. core/skeleton/module_dag.py +330 -0
  82. core/skeleton/parsers/__init__.py +71 -0
  83. core/skeleton/parsers/jqassistant.py +300 -0
  84. core/skeleton/parsers/jqassistant_cypher.py +225 -0
  85. core/skeleton/parsers/regex.py +171 -0
  86. core/skeleton/parsers/treesitter.py +324 -0
  87. core/skeleton/parsers/treesitter_java.py +284 -0
  88. core/skeleton/parsers/treesitter_multi.py +289 -0
  89. core/skeleton/pom_parser.py +299 -0
  90. core/skeleton/post_merge.py +295 -0
  91. core/skeleton/post_merge_llm.py +82 -0
  92. core/skeleton/query.py +195 -0
  93. core/skeleton/shard_context.py +177 -0
  94. core/skeleton/split.py +180 -0
  95. core/skeleton/split_cache.py +107 -0
  96. core/skeleton/split_feedback.py +174 -0
  97. core/skeleton/split_plan.py +219 -0
  98. core/skeleton/split_plan_helpers.py +305 -0
  99. core/skeleton/split_plan_llm.py +274 -0
  100. core/utils.py +135 -0
  101. core/validators/__init__.py +65 -0
  102. core/validators/__main__.py +215 -0
  103. core/validators/consistency.py +203 -0
  104. core/validators/coverage.py +171 -0
  105. core/validators/duplicates.py +76 -0
  106. core/validators/engine.py +224 -0
  107. core/validators/links.py +76 -0
  108. core/validators/sampling.py +169 -0
  109. core/validators/structure.py +144 -0
  110. engine/__init__.py +7 -0
  111. engine/assembler.py +231 -0
  112. engine/confirm.py +65 -0
  113. engine/dedup.py +106 -0
  114. engine/main.py +211 -0
  115. engine/pipeline/__init__.py +163 -0
  116. engine/pipeline/recovery.py +250 -0
  117. engine/pipeline/steps/__init__.py +23 -0
  118. engine/pipeline/steps/audit.py +220 -0
  119. engine/pipeline/steps/audit_apply.py +195 -0
  120. engine/pipeline/steps/audit_helpers.py +155 -0
  121. engine/pipeline/steps/classify_llm.py +236 -0
  122. engine/pipeline/steps/classify_prompt.py +223 -0
  123. engine/pipeline/steps/finalize.py +160 -0
  124. engine/pipeline/steps/generate.py +169 -0
  125. engine/pipeline/steps/generate_batch.py +197 -0
  126. engine/pipeline/steps/generate_recovery.py +170 -0
  127. engine/pipeline/steps/llm_plan_split.py +253 -0
  128. engine/pipeline/steps/lock.py +64 -0
  129. engine/pipeline/steps/preflight.py +237 -0
  130. engine/pipeline/steps/preflight_adjust.py +147 -0
  131. engine/pipeline/steps/pregenerate.py +130 -0
  132. engine/pipeline/steps/quality.py +81 -0
  133. engine/pipeline/steps/skeleton.py +149 -0
  134. engine/pipeline/steps/source.py +163 -0
  135. engine/pipeline/steps/sync.py +117 -0
  136. engine/pipeline/steps/sync_finalize.py +237 -0
  137. engine/pipeline/steps/sync_update.py +341 -0
  138. engine/pipelines.py +91 -0
  139. engine/runner.py +335 -0
  140. engine/strategies/__init__.py +86 -0
  141. engine/strategies/api.py +128 -0
  142. engine/strategies/delegated.py +50 -0
  143. engine/strategies/dryrun.py +25 -0
  144. engine/two_phase.py +143 -0
  145. mcp_server/__init__.py +73 -0
  146. mcp_server/__main__.py +5 -0
  147. mcp_server/tools/__init__.py +1 -0
  148. mcp_server/tools/config.py +63 -0
  149. mcp_server/tools/discovery.py +276 -0
  150. mcp_server/tools/generation.py +184 -0
  151. mcp_server/tools/planning.py +144 -0
  152. mcp_server/tools/source.py +175 -0
  153. mcp_server/tools/validation.py +140 -0
  154. mcp_server/tools/workflow.py +166 -0
  155. mcp_server/workflow_loader.py +204 -0
  156. presets/generic/audit_dimensions.md +132 -0
  157. presets/generic/doc_types.yaml +152 -0
  158. presets/generic/preset.yaml +115 -0
  159. presets/java-spring/audit_dimensions.md +228 -0
  160. presets/java-spring/audit_dimensions.yaml +203 -0
  161. presets/java-spring/doc_types.yaml +269 -0
  162. presets/java-spring/hooks.py +122 -0
  163. presets/java-spring/preset.yaml +341 -0
  164. presets/java-spring/templates/README.md +34 -0
  165. presets/java-spring/templates/audit-system.md +15 -0
  166. presets/java-spring/templates/subagent-aop.md +105 -0
  167. presets/java-spring/templates/subagent-api.md +63 -0
  168. presets/java-spring/templates/subagent-architecture.md +111 -0
  169. presets/java-spring/templates/subagent-async-events.md +107 -0
  170. presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
  171. presets/java-spring/templates/subagent-audit-architecture.md +38 -0
  172. presets/java-spring/templates/subagent-audit-business.md +40 -0
  173. presets/java-spring/templates/subagent-audit-data-models.md +40 -0
  174. presets/java-spring/templates/subagent-business.md +129 -0
  175. presets/java-spring/templates/subagent-caching.md +75 -0
  176. presets/java-spring/templates/subagent-database-access.md +114 -0
  177. presets/java-spring/templates/subagent-enum.md +75 -0
  178. presets/java-spring/templates/subagent-error-handling.md +91 -0
  179. presets/java-spring/templates/subagent-external-integrations.md +80 -0
  180. presets/java-spring/templates/subagent-index.md +122 -0
  181. presets/java-spring/templates/subagent-messaging.md +97 -0
  182. presets/java-spring/templates/subagent-model.md +88 -0
  183. presets/java-spring/templates/subagent-observability.md +91 -0
  184. presets/java-spring/templates/subagent-scheduled.md +81 -0
  185. presets/java-spring/templates/subagent-security.md +102 -0
  186. presets/java-spring/templates/subagent-structure.md +101 -0
  187. presets/java-spring/templates/subagent-sync-section.md +34 -0
  188. presets/java-spring/templates/subagent-utils.md +73 -0
  189. presets/java-spring/templates/sync-system.md +8 -0
  190. presets/java-spring/workflow-extensions.md +112 -0
  191. skills/__init__.py +1 -0
  192. skills/_shared/README.md +30 -0
  193. skills/_shared/doc-coverage-shared.md +134 -0
  194. skills/_shared/doc-quality-standard.md +1058 -0
  195. skills/_shared/doc-subagent-rules.md +762 -0
  196. skills/_shared/windows-compat.md +89 -0
  197. skills/kb-audit/SKILL.md +52 -0
  198. skills/kb-audit/rules.md +88 -0
  199. skills/kb-audit/steps/step-01-prepare.md +75 -0
  200. skills/kb-audit/steps/step-02-audit.md +96 -0
  201. skills/kb-audit/steps/step-03-verify.md +65 -0
  202. skills/kb-audit/steps/step-04-report.md +64 -0
  203. skills/kb-init/SKILL.md +146 -0
  204. skills/kb-init/rules.md +187 -0
  205. skills/kb-init/steps/step-01-scope.md +62 -0
  206. skills/kb-init/steps/step-02-source.md +410 -0
  207. skills/kb-init/steps/step-03-generate.md +307 -0
  208. skills/kb-init/steps/step-04-quality.md +92 -0
  209. skills/kb-init/steps/step-05-finalize.md +132 -0
  210. skills/kb-init/templates/core/execution-modes.md +29 -0
  211. skills/kb-init/templates/core/output-only.md +4 -0
  212. skills/kb-init/templates/core/readwrite.md +33 -0
  213. skills/kb-search/SKILL.md +138 -0
  214. skills/kb-search/rules.md +64 -0
  215. skills/kb-sync/SKILL.md +43 -0
  216. skills/kb-sync/rules.md +70 -0
  217. skills/kb-sync/scripts/rebuild_module.py +91 -0
  218. skills/kb-sync/scripts/scan_repos.py +687 -0
  219. skills/kb-sync/steps/step-01-detect.md +72 -0
  220. skills/kb-sync/steps/step-02-update.md +71 -0
  221. skills/kb-sync/steps/step-03-verify.md +47 -0
  222. skills/kb-sync/steps/step-04-finalize.md +52 -0
  223. source_kb-0.2.2.dist-info/METADATA +194 -0
  224. source_kb-0.2.2.dist-info/RECORD +228 -0
  225. source_kb-0.2.2.dist-info/WHEEL +5 -0
  226. source_kb-0.2.2.dist-info/entry_points.txt +3 -0
  227. source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
  228. source_kb-0.2.2.dist-info/top_level.txt +6 -0
@@ -0,0 +1,236 @@
1
+ """Prompt renderer — template loading, variable computation, rendering.
2
+
3
+ Delegates content assembly (source, skeleton, file list) to an injected
4
+ PromptAssembler strategy. Core renderer is mode-agnostic.
5
+
6
+ Usage:
7
+ from core.prompt.renderer import render_prompt
8
+
9
+ prompt = render_prompt(
10
+ template_path="skill/kb-init/templates/subagent-business.md",
11
+ config=config, kb_name="my-kb", module_name="my-service",
12
+ doc_type="business-logic", assembler=my_assembler,
13
+ )
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ import re
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+ from core.interfaces import PromptAssembler
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ def render_prompt(
29
+ template_path: str | Path,
30
+ config: dict[str, Any],
31
+ kb_name: str,
32
+ module_name: str,
33
+ doc_type: str,
34
+ assembler: PromptAssembler,
35
+ extras: dict[str, str] | None = None,
36
+ execution_snippet: str = "",
37
+ preset: dict[str, Any] | None = None,
38
+ ) -> str:
39
+ """Render a sub-agent prompt from template + computed variables.
40
+
41
+ Args:
42
+ template_path: Path to the .md template file
43
+ config: Full kb-project.yaml config dict
44
+ kb_name: Knowledge base name
45
+ module_name: Module name
46
+ doc_type: Document type key
47
+ assembler: PromptAssembler strategy (inline or reference)
48
+ extras: Additional variables to inject
49
+ execution_snippet: Mode-specific execution guidance text
50
+ preset: Preset config dict (for doc_type filename resolution)
51
+
52
+ Returns:
53
+ Fully rendered prompt string
54
+ """
55
+ template_path = Path(template_path)
56
+ if not template_path.exists():
57
+ raise FileNotFoundError(f"Template not found: {template_path}")
58
+
59
+ template = template_path.read_text(encoding="utf-8")
60
+ extras = extras or {}
61
+
62
+ # Compute all template variables
63
+ kb_config = config["knowledge_bases"][kb_name]
64
+ base_dir = Path(config.get("_config_dir", ".")).resolve()
65
+
66
+ knowledge_dir = Path(kb_config["knowledge_dir"])
67
+ if not knowledge_dir.is_absolute():
68
+ knowledge_dir = (base_dir / knowledge_dir).resolve()
69
+
70
+ source = kb_config.get("source", {})
71
+ cache_dir = Path(source.get("cache_dir", "./.source-cache"))
72
+ if not cache_dir.is_absolute():
73
+ cache_dir = (base_dir / cache_dir).resolve()
74
+
75
+ # Determine module_dir and source_cache
76
+ repo_info = _find_repo(config, kb_name, module_name)
77
+ if repo_info.get("path") == ".":
78
+ module_dir = knowledge_dir
79
+ else:
80
+ module_dir = knowledge_dir / module_name
81
+
82
+ if source.get("structure") == "monorepo":
83
+ repo_name = source.get("repo_name", "repo")
84
+ module_path = repo_info.get("path", module_name)
85
+ source_cache = cache_dir / repo_name / module_path
86
+ else:
87
+ source_cache = cache_dir / module_name
88
+
89
+ # Delegate content assembly to the injected strategy
90
+ file_list_override = (extras or {}).get("file_list_override")
91
+ file_list = assembler.resolve_file_list(module_dir, doc_type, file_list_override=file_list_override)
92
+ if file_list_override and Path(file_list_override).exists():
93
+ override_content = Path(file_list_override).read_text(encoding="utf-8").strip()
94
+ source_content = assembler.resolve_source_content_from_paths(
95
+ module_dir, doc_type, source_cache, override_content.splitlines()
96
+ )
97
+ else:
98
+ source_content = assembler.resolve_source_content(module_dir, doc_type, source_cache)
99
+ skeleton_content = assembler.resolve_skeleton_content(module_dir)
100
+
101
+ # Compute metadata
102
+ from core.skeleton.metadata import load_pregenerated
103
+ from core.prompt.content import compute_high_methods, scan_generated_docs, compute_sibling_modules
104
+ from core.paths import resolve_skeleton, resolve_skeleton_summary
105
+
106
+ # Compute skeleton path/size for template variables
107
+ # R2 rule: skeleton reading strategy based on size
108
+ # < 50KB → read skeleton.json directly
109
+ # 50-200KB → read summary.json only
110
+ # > 200KB → read summary only, batch offset/limit
111
+ skel_path_str = ""
112
+ skel_size_kb = 0
113
+ skel_read_instruction = "Read directly"
114
+ skel_summary = resolve_skeleton_summary(module_dir)
115
+ skel_full = resolve_skeleton(module_dir)
116
+
117
+ # Determine actual skeleton size (full skeleton is the reference for R2 threshold)
118
+ full_size_kb = 0
119
+ if skel_full and skel_full.exists():
120
+ full_size_kb = round(skel_full.stat().st_size / 1024, 1)
121
+
122
+ if full_size_kb > 200 and skel_summary and skel_summary.exists():
123
+ # > 200KB: must use summary, batch if needed
124
+ skel_path_str = str(skel_summary).replace("\\", "/")
125
+ skel_size_kb = round(skel_summary.stat().st_size / 1024, 1)
126
+ skel_read_instruction = "Skeleton too large, reading summary file only"
127
+ elif full_size_kb > 50 and skel_summary and skel_summary.exists():
128
+ # 50-200KB: use summary
129
+ skel_path_str = str(skel_summary).replace("\\", "/")
130
+ skel_size_kb = round(skel_summary.stat().st_size / 1024, 1)
131
+ skel_read_instruction = "Please read this summary file"
132
+ elif skel_full and skel_full.exists():
133
+ # < 50KB: read full skeleton directly
134
+ skel_path_str = str(skel_full).replace("\\", "/")
135
+ skel_size_kb = full_size_kb
136
+ skel_read_instruction = "Can read the full skeleton directly"
137
+ elif skel_summary and skel_summary.exists():
138
+ # No full skeleton but summary exists
139
+ skel_path_str = str(skel_summary).replace("\\", "/")
140
+ skel_size_kb = round(skel_summary.stat().st_size / 1024, 1)
141
+ skel_read_instruction = "Please read this summary file"
142
+
143
+ variables: dict[str, str] = {
144
+ "module_name": module_name,
145
+ "module_description": repo_info.get("description", f"{module_name} module"),
146
+ "module_dir": str(module_dir.relative_to(base_dir)).replace("\\", "/") if module_dir.is_relative_to(base_dir) else str(module_dir).replace("\\", "/"),
147
+ "doc_type": doc_type,
148
+ "source_cache_path": str(source_cache.relative_to(base_dir)).replace("\\", "/") if source_cache.is_relative_to(base_dir) else str(source_cache).replace("\\", "/"),
149
+ "output_path": str(module_dir.relative_to(base_dir)).replace("\\", "/") if module_dir.is_relative_to(base_dir) else str(module_dir).replace("\\", "/"),
150
+ "file_list": file_list,
151
+ "source_content": source_content,
152
+ "skeleton_content": skeleton_content,
153
+ "skeleton_path": skel_path_str if not Path(skel_path_str).is_absolute() else (str(Path(skel_path_str).relative_to(base_dir)).replace("\\", "/") if Path(skel_path_str).is_relative_to(base_dir) else skel_path_str),
154
+ "skeleton_size": str(skel_size_kb),
155
+ "skeleton_read_instruction": skel_read_instruction,
156
+ "high_methods": compute_high_methods(module_dir),
157
+ "generated_docs": scan_generated_docs(module_dir),
158
+ "generated_docs_files": scan_generated_docs(module_dir),
159
+ "sibling_modules": compute_sibling_modules(config, kb_name, module_name),
160
+ "global_metadata": load_pregenerated(module_dir),
161
+ "prior_docs_context": "",
162
+ "branch": repo_info.get("branch", "main"),
163
+ "module_type": repo_info.get("type", "service"),
164
+ }
165
+
166
+ # Inject prior docs context for dependent doc types
167
+ from core.prompt.content import build_prior_docs_context
168
+ from core.preset import load_preset
169
+ preset_name = kb_config.get("preset", "generic")
170
+ preset = load_preset(preset_name)
171
+ prior = build_prior_docs_context(module_dir, doc_type, preset=preset)
172
+ if prior:
173
+ variables["prior_docs_context"] = prior
174
+
175
+ # Merge extras (user-provided overrides)
176
+ variables.update({k: v for k, v in extras.items() if not k.startswith("__")})
177
+
178
+ # Inject execution guidance
179
+ if execution_snippet:
180
+ if "{execution_guidance}" in template:
181
+ template = template.replace("{execution_guidance}", execution_snippet)
182
+ else:
183
+ marker = "## Rules you must follow"
184
+ if marker in template:
185
+ pos = template.index(marker)
186
+ template = template[:pos] + execution_snippet + "\n\n" + template[pos:]
187
+
188
+ # Variable substitution
189
+ used: set[str] = set()
190
+
191
+ def replacer(m: re.Match) -> str:
192
+ key = m.group(1)
193
+ if key in variables:
194
+ used.add(key)
195
+ return variables[key]
196
+ return m.group(0)
197
+
198
+ rendered = re.sub(r"\{([a-z0-9_]+)\}", replacer, template)
199
+
200
+ # Append source content if assembler says so and not already used
201
+ if assembler.should_append_source():
202
+ appendix = []
203
+ src = variables.get("source_content", "")
204
+ if src and "source_content" not in used:
205
+ appendix.append(f"\n\n## Source file content\n\n{src}")
206
+ skel = variables.get("skeleton_content", "")
207
+ if skel and "skeleton_content" not in used and "[truncated" in src:
208
+ appendix.append(f"\n\n## Source skeleton (method signatures of truncated files only)\n\n{skel}")
209
+ if appendix:
210
+ rendered += "".join(appendix)
211
+
212
+ # Append global context if not already used in template
213
+ context_parts = []
214
+ for key in ("global_metadata", "prior_docs_context"):
215
+ val = variables.get(key, "")
216
+ if val and key not in used:
217
+ context_parts.append(f"\n\n{val}")
218
+ if context_parts:
219
+ rendered += "".join(context_parts)
220
+
221
+ return rendered
222
+
223
+
224
+ def _find_repo(config: dict, kb_name: str, module_name: str) -> dict:
225
+ """Find repo/module config entry by name."""
226
+ kb = config["knowledge_bases"][kb_name]
227
+ source = kb["source"]
228
+ if source.get("structure") == "monorepo":
229
+ for mod in source.get("modules", []):
230
+ if mod["name"] == module_name:
231
+ return mod
232
+ else:
233
+ for repo in source.get("repos", []):
234
+ if repo["name"] == module_name:
235
+ return repo
236
+ return {"name": module_name}
@@ -0,0 +1,274 @@
1
+ """LLM response parsing and validation.
2
+
3
+ Handles malformed JSON, markdown code blocks, and content validation
4
+ for both audit and sync responses.
5
+
6
+ Usage:
7
+ from core.prompt.response_parser import parse_audit_response, validate_sync_response
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import json
13
+ import logging
14
+ import re
15
+ from dataclasses import dataclass, field
16
+ from typing import Literal
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Data classes
23
+ # ---------------------------------------------------------------------------
24
+
25
+
26
+ @dataclass
27
+ class Finding:
28
+ """Single audit finding from LLM response."""
29
+
30
+ dimension: str
31
+ status: Literal["pass", "fail"]
32
+ detail: str = ""
33
+ fix: str = ""
34
+
35
+
36
+ @dataclass
37
+ class ParseResult:
38
+ """Result of response parsing attempt."""
39
+
40
+ success: bool
41
+ findings: list[Finding] = field(default_factory=list)
42
+ raw_length: int = 0
43
+ parse_method: str = "" # "direct", "code_block", "json_fix", "fallback"
44
+ error: str = ""
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Public API
49
+ # ---------------------------------------------------------------------------
50
+
51
+
52
+ def parse_audit_response(raw: str) -> ParseResult:
53
+ """Parse LLM audit response into structured findings.
54
+
55
+ Fallback chain:
56
+ 1. json.loads(raw) directly
57
+ 2. Extract from ```json ... ``` code block
58
+ 3. Fix common JSON errors (trailing commas, unquoted keys, single quotes)
59
+ 4. Return empty findings with error logged
60
+
61
+ Args:
62
+ raw: Raw LLM response string
63
+
64
+ Returns:
65
+ ParseResult with findings list and parse metadata
66
+ """
67
+ if not raw or not raw.strip():
68
+ return ParseResult(success=False, raw_length=0, error="Empty response")
69
+
70
+ raw_length = len(raw)
71
+
72
+ # Attempt 1: Direct JSON parse
73
+ data = _try_parse_json(raw.strip())
74
+ if data is not None:
75
+ findings = _extract_findings(data)
76
+ return ParseResult(
77
+ success=True, findings=findings,
78
+ raw_length=raw_length, parse_method="direct",
79
+ )
80
+
81
+ # Attempt 2: Extract from code block
82
+ extracted = _extract_json_from_code_block(raw)
83
+ if extracted:
84
+ data = _try_parse_json(extracted)
85
+ if data is not None:
86
+ findings = _extract_findings(data)
87
+ return ParseResult(
88
+ success=True, findings=findings,
89
+ raw_length=raw_length, parse_method="code_block",
90
+ )
91
+
92
+ # Attempt 3: Fix common errors
93
+ fixed = _fix_common_json_errors(raw.strip())
94
+ data = _try_parse_json(fixed)
95
+ if data is not None:
96
+ findings = _extract_findings(data)
97
+ return ParseResult(
98
+ success=True, findings=findings,
99
+ raw_length=raw_length, parse_method="json_fix",
100
+ )
101
+
102
+ # Also try fixing the code block content
103
+ if extracted:
104
+ fixed_extracted = _fix_common_json_errors(extracted)
105
+ data = _try_parse_json(fixed_extracted)
106
+ if data is not None:
107
+ findings = _extract_findings(data)
108
+ return ParseResult(
109
+ success=True, findings=findings,
110
+ raw_length=raw_length, parse_method="json_fix",
111
+ )
112
+
113
+ # All attempts failed
114
+ logger.warning("[response_parser] Failed to parse audit response (%d chars)", raw_length)
115
+ return ParseResult(
116
+ success=False, raw_length=raw_length,
117
+ parse_method="fallback", error="All parse attempts failed",
118
+ )
119
+
120
+
121
+ def validate_sync_response(
122
+ raw: str,
123
+ original_content: str,
124
+ *,
125
+ max_expansion_ratio: float = 2.0,
126
+ ) -> tuple[bool, str]:
127
+ """Validate LLM sync response (section content).
128
+
129
+ Checks:
130
+ 1. Non-empty response
131
+ 2. Contains at least one heading or paragraph
132
+ 3. Does not echo back prompt instructions
133
+ 4. Length within max_expansion_ratio of original
134
+
135
+ Args:
136
+ raw: LLM response (proposed section content)
137
+ original_content: Original section content for comparison
138
+ max_expansion_ratio: Maximum allowed length ratio vs original
139
+
140
+ Returns:
141
+ (is_valid, validated_content_or_error_message)
142
+ """
143
+ if not raw or not raw.strip():
144
+ return False, "Empty response"
145
+
146
+ content = raw.strip()
147
+
148
+ # Strip markdown code fences if the entire response is wrapped
149
+ content = _strip_outer_fence(content)
150
+
151
+ # Check for prompt echo (common LLM failure mode)
152
+ prompt_markers = [
153
+ "You are a knowledge base maintainer",
154
+ "Rewrite this section",
155
+ "Document type:",
156
+ "Current section content:",
157
+ "Changed source files:",
158
+ ]
159
+ for marker in prompt_markers:
160
+ if marker in content[:500]:
161
+ return False, f"Response echoes prompt instructions: '{marker}'"
162
+
163
+ # Check minimum content
164
+ lines = [l for l in content.splitlines() if l.strip()]
165
+ if len(lines) < 1:
166
+ return False, "Response has no meaningful content"
167
+
168
+ # Check expansion ratio (only meaningful for substantial original content)
169
+ if original_content.strip():
170
+ original_len = len(original_content.strip())
171
+ new_len = len(content)
172
+ # Only apply ratio check when original is substantial (>100 chars)
173
+ # For short originals, allow more expansion since a few lines can legitimately grow
174
+ if original_len > 100 and new_len > original_len * max_expansion_ratio:
175
+ return False, (
176
+ f"Response too long: {new_len} chars vs original {original_len} "
177
+ f"(ratio {new_len/original_len:.1f}x > {max_expansion_ratio}x)"
178
+ )
179
+
180
+ return True, content
181
+
182
+
183
+ # ---------------------------------------------------------------------------
184
+ # Internal helpers
185
+ # ---------------------------------------------------------------------------
186
+
187
+
188
+ def _try_parse_json(text: str) -> dict | list | None:
189
+ """Attempt JSON parse, return None on failure."""
190
+ try:
191
+ return json.loads(text)
192
+ except (json.JSONDecodeError, ValueError):
193
+ return None
194
+
195
+
196
+ def _extract_json_from_code_block(text: str) -> str | None:
197
+ """Extract JSON content from markdown code block."""
198
+ # Match ```json ... ``` or ``` ... ```
199
+ pattern = r"```(?:json)?\s*\n(.*?)\n\s*```"
200
+ match = re.search(pattern, text, re.DOTALL)
201
+ if match:
202
+ return match.group(1).strip()
203
+
204
+ # Try without newline requirement (single-line blocks)
205
+ pattern2 = r"```(?:json)?\s*(.*?)\s*```"
206
+ match2 = re.search(pattern2, text, re.DOTALL)
207
+ if match2:
208
+ return match2.group(1).strip()
209
+
210
+ return None
211
+
212
+
213
+ def _fix_common_json_errors(text: str) -> str:
214
+ """Attempt to fix common LLM JSON formatting errors."""
215
+ # Remove trailing commas before } or ]
216
+ text = re.sub(r",\s*([}\]])", r"\1", text)
217
+
218
+ # Replace single quotes with double quotes (careful with apostrophes)
219
+ # Only do this if the text looks like it uses single quotes for strings
220
+ if text.count("'") > text.count('"') and "{" in text:
221
+ text = re.sub(r"'([^']*)'", r'"\1"', text)
222
+
223
+ # Fix unquoted keys: { key: "value" } → { "key": "value" }
224
+ text = re.sub(r'{\s*(\w+)\s*:', r'{"\1":', text)
225
+ text = re.sub(r',\s*(\w+)\s*:', r',"\1":', text)
226
+
227
+ return text
228
+
229
+
230
+ def _extract_findings(data: dict | list) -> list[Finding]:
231
+ """Extract Finding objects from parsed JSON data."""
232
+ items: list[dict] = []
233
+
234
+ if isinstance(data, list):
235
+ items = data
236
+ elif isinstance(data, dict):
237
+ # Try common wrapper keys
238
+ for key in ("findings", "results", "audit", "items", "data"):
239
+ if key in data and isinstance(data[key], list):
240
+ items = data[key]
241
+ break
242
+ if not items:
243
+ # Single finding as dict
244
+ if "dimension" in data or "status" in data:
245
+ items = [data]
246
+
247
+ findings: list[Finding] = []
248
+ for item in items:
249
+ if not isinstance(item, dict):
250
+ continue
251
+ dimension = item.get("dimension", item.get("name", item.get("check", "")))
252
+ status = item.get("status", "pass")
253
+ if status not in ("pass", "fail"):
254
+ status = "fail" if status in ("failed", "error", "no", "false") else "pass"
255
+ detail = item.get("detail", item.get("message", item.get("description", "")))
256
+ fix = item.get("fix", item.get("suggestion", item.get("fix_content", "")))
257
+
258
+ findings.append(Finding(
259
+ dimension=str(dimension),
260
+ status=status,
261
+ detail=str(detail),
262
+ fix=str(fix) if fix else "",
263
+ ))
264
+
265
+ return findings
266
+
267
+
268
+ def _strip_outer_fence(content: str) -> str:
269
+ """Strip markdown code fence if the entire content is wrapped in one."""
270
+ lines = content.splitlines()
271
+ if len(lines) >= 2:
272
+ if lines[0].startswith("```") and lines[-1].strip() == "```":
273
+ return "\n".join(lines[1:-1])
274
+ return content