source-kb 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. cli/__init__.py +50 -0
  2. cli/__main__.py +5 -0
  3. cli/commands/__init__.py +1 -0
  4. cli/commands/anchor_fix.py +47 -0
  5. cli/commands/diff_doc.py +52 -0
  6. cli/commands/dispatch.py +77 -0
  7. cli/commands/extract.py +72 -0
  8. cli/commands/file_list.py +74 -0
  9. cli/commands/index.py +84 -0
  10. cli/commands/lock.py +89 -0
  11. cli/commands/merge.py +60 -0
  12. cli/commands/merge_delta.py +19 -0
  13. cli/commands/metadata.py +24 -0
  14. cli/commands/pipeline.py +45 -0
  15. cli/commands/post_merge.py +43 -0
  16. cli/commands/query.py +52 -0
  17. cli/commands/render.py +101 -0
  18. cli/commands/scan_repos.py +46 -0
  19. cli/commands/setup.py +94 -0
  20. cli/commands/split.py +196 -0
  21. cli/commands/stale_files.py +98 -0
  22. cli/commands/validate.py +191 -0
  23. core/__init__.py +32 -0
  24. core/config.py +261 -0
  25. core/docs/__init__.py +7 -0
  26. core/docs/section_updater.py +286 -0
  27. core/docs/shared.py +149 -0
  28. core/git.py +294 -0
  29. core/interfaces.py +249 -0
  30. core/monitor/__init__.py +5 -0
  31. core/monitor/progress.py +83 -0
  32. core/monitor/prompt_store.py +49 -0
  33. core/paths.py +141 -0
  34. core/preset.py +237 -0
  35. core/preset_accessors.py +202 -0
  36. core/preset_classify.py +132 -0
  37. core/preset_hooks.py +129 -0
  38. core/preset_profile.py +89 -0
  39. core/prompt/__init__.py +7 -0
  40. core/prompt/__main__.py +147 -0
  41. core/prompt/content.py +320 -0
  42. core/prompt/context_manager.py +164 -0
  43. core/prompt/renderer.py +236 -0
  44. core/prompt/response_parser.py +274 -0
  45. core/prompt/templates.py +357 -0
  46. core/prompt/validate_parity.py +162 -0
  47. core/prompt/variables.py +339 -0
  48. core/rag/__init__.py +22 -0
  49. core/rag/__main__.py +136 -0
  50. core/rag/bm25_index.py +268 -0
  51. core/rag/chunker.py +273 -0
  52. core/rag/embedder.py +151 -0
  53. core/rag/indexer.py +292 -0
  54. core/rag/loader.py +89 -0
  55. core/rag/retriever.py +82 -0
  56. core/skeleton/__init__.py +11 -0
  57. core/skeleton/__main__.py +934 -0
  58. core/skeleton/anchor_fix.py +250 -0
  59. core/skeleton/classify.py +331 -0
  60. core/skeleton/cmd_anchor_fix.py +43 -0
  61. core/skeleton/cmd_diff_doc.py +44 -0
  62. core/skeleton/cmd_lock.py +87 -0
  63. core/skeleton/cmd_merge_delta.py +41 -0
  64. core/skeleton/community.py +233 -0
  65. core/skeleton/dependency_graph.py +306 -0
  66. core/skeleton/diff_doc.py +248 -0
  67. core/skeleton/dispatch.py +273 -0
  68. core/skeleton/dispatch_render.py +319 -0
  69. core/skeleton/dispatch_source.py +111 -0
  70. core/skeleton/extract.py +218 -0
  71. core/skeleton/extract_methods.py +298 -0
  72. core/skeleton/file_list.py +239 -0
  73. core/skeleton/impact.py +278 -0
  74. core/skeleton/jar_download.py +177 -0
  75. core/skeleton/jar_resolver.py +186 -0
  76. core/skeleton/loader.py +162 -0
  77. core/skeleton/merge.py +278 -0
  78. core/skeleton/merge_delta.py +229 -0
  79. core/skeleton/metadata.py +96 -0
  80. core/skeleton/metadata_builders.py +264 -0
  81. core/skeleton/module_dag.py +330 -0
  82. core/skeleton/parsers/__init__.py +71 -0
  83. core/skeleton/parsers/jqassistant.py +300 -0
  84. core/skeleton/parsers/jqassistant_cypher.py +225 -0
  85. core/skeleton/parsers/regex.py +171 -0
  86. core/skeleton/parsers/treesitter.py +324 -0
  87. core/skeleton/parsers/treesitter_java.py +284 -0
  88. core/skeleton/parsers/treesitter_multi.py +289 -0
  89. core/skeleton/pom_parser.py +299 -0
  90. core/skeleton/post_merge.py +295 -0
  91. core/skeleton/post_merge_llm.py +82 -0
  92. core/skeleton/query.py +195 -0
  93. core/skeleton/shard_context.py +177 -0
  94. core/skeleton/split.py +180 -0
  95. core/skeleton/split_cache.py +107 -0
  96. core/skeleton/split_feedback.py +174 -0
  97. core/skeleton/split_plan.py +219 -0
  98. core/skeleton/split_plan_helpers.py +305 -0
  99. core/skeleton/split_plan_llm.py +274 -0
  100. core/utils.py +135 -0
  101. core/validators/__init__.py +65 -0
  102. core/validators/__main__.py +215 -0
  103. core/validators/consistency.py +203 -0
  104. core/validators/coverage.py +171 -0
  105. core/validators/duplicates.py +76 -0
  106. core/validators/engine.py +224 -0
  107. core/validators/links.py +76 -0
  108. core/validators/sampling.py +169 -0
  109. core/validators/structure.py +144 -0
  110. engine/__init__.py +7 -0
  111. engine/assembler.py +231 -0
  112. engine/confirm.py +65 -0
  113. engine/dedup.py +106 -0
  114. engine/main.py +211 -0
  115. engine/pipeline/__init__.py +163 -0
  116. engine/pipeline/recovery.py +250 -0
  117. engine/pipeline/steps/__init__.py +23 -0
  118. engine/pipeline/steps/audit.py +220 -0
  119. engine/pipeline/steps/audit_apply.py +195 -0
  120. engine/pipeline/steps/audit_helpers.py +155 -0
  121. engine/pipeline/steps/classify_llm.py +236 -0
  122. engine/pipeline/steps/classify_prompt.py +223 -0
  123. engine/pipeline/steps/finalize.py +160 -0
  124. engine/pipeline/steps/generate.py +169 -0
  125. engine/pipeline/steps/generate_batch.py +197 -0
  126. engine/pipeline/steps/generate_recovery.py +170 -0
  127. engine/pipeline/steps/llm_plan_split.py +253 -0
  128. engine/pipeline/steps/lock.py +64 -0
  129. engine/pipeline/steps/preflight.py +237 -0
  130. engine/pipeline/steps/preflight_adjust.py +147 -0
  131. engine/pipeline/steps/pregenerate.py +130 -0
  132. engine/pipeline/steps/quality.py +81 -0
  133. engine/pipeline/steps/skeleton.py +149 -0
  134. engine/pipeline/steps/source.py +163 -0
  135. engine/pipeline/steps/sync.py +117 -0
  136. engine/pipeline/steps/sync_finalize.py +237 -0
  137. engine/pipeline/steps/sync_update.py +341 -0
  138. engine/pipelines.py +91 -0
  139. engine/runner.py +335 -0
  140. engine/strategies/__init__.py +86 -0
  141. engine/strategies/api.py +128 -0
  142. engine/strategies/delegated.py +50 -0
  143. engine/strategies/dryrun.py +25 -0
  144. engine/two_phase.py +143 -0
  145. mcp_server/__init__.py +73 -0
  146. mcp_server/__main__.py +5 -0
  147. mcp_server/tools/__init__.py +1 -0
  148. mcp_server/tools/config.py +63 -0
  149. mcp_server/tools/discovery.py +276 -0
  150. mcp_server/tools/generation.py +184 -0
  151. mcp_server/tools/planning.py +144 -0
  152. mcp_server/tools/source.py +175 -0
  153. mcp_server/tools/validation.py +140 -0
  154. mcp_server/tools/workflow.py +166 -0
  155. mcp_server/workflow_loader.py +204 -0
  156. presets/generic/audit_dimensions.md +132 -0
  157. presets/generic/doc_types.yaml +152 -0
  158. presets/generic/preset.yaml +115 -0
  159. presets/java-spring/audit_dimensions.md +228 -0
  160. presets/java-spring/audit_dimensions.yaml +203 -0
  161. presets/java-spring/doc_types.yaml +269 -0
  162. presets/java-spring/hooks.py +122 -0
  163. presets/java-spring/preset.yaml +341 -0
  164. presets/java-spring/templates/README.md +34 -0
  165. presets/java-spring/templates/audit-system.md +15 -0
  166. presets/java-spring/templates/subagent-aop.md +105 -0
  167. presets/java-spring/templates/subagent-api.md +63 -0
  168. presets/java-spring/templates/subagent-architecture.md +111 -0
  169. presets/java-spring/templates/subagent-async-events.md +107 -0
  170. presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
  171. presets/java-spring/templates/subagent-audit-architecture.md +38 -0
  172. presets/java-spring/templates/subagent-audit-business.md +40 -0
  173. presets/java-spring/templates/subagent-audit-data-models.md +40 -0
  174. presets/java-spring/templates/subagent-business.md +129 -0
  175. presets/java-spring/templates/subagent-caching.md +75 -0
  176. presets/java-spring/templates/subagent-database-access.md +114 -0
  177. presets/java-spring/templates/subagent-enum.md +75 -0
  178. presets/java-spring/templates/subagent-error-handling.md +91 -0
  179. presets/java-spring/templates/subagent-external-integrations.md +80 -0
  180. presets/java-spring/templates/subagent-index.md +122 -0
  181. presets/java-spring/templates/subagent-messaging.md +97 -0
  182. presets/java-spring/templates/subagent-model.md +88 -0
  183. presets/java-spring/templates/subagent-observability.md +91 -0
  184. presets/java-spring/templates/subagent-scheduled.md +81 -0
  185. presets/java-spring/templates/subagent-security.md +102 -0
  186. presets/java-spring/templates/subagent-structure.md +101 -0
  187. presets/java-spring/templates/subagent-sync-section.md +34 -0
  188. presets/java-spring/templates/subagent-utils.md +73 -0
  189. presets/java-spring/templates/sync-system.md +8 -0
  190. presets/java-spring/workflow-extensions.md +112 -0
  191. skills/__init__.py +1 -0
  192. skills/_shared/README.md +30 -0
  193. skills/_shared/doc-coverage-shared.md +134 -0
  194. skills/_shared/doc-quality-standard.md +1058 -0
  195. skills/_shared/doc-subagent-rules.md +762 -0
  196. skills/_shared/windows-compat.md +89 -0
  197. skills/kb-audit/SKILL.md +52 -0
  198. skills/kb-audit/rules.md +88 -0
  199. skills/kb-audit/steps/step-01-prepare.md +75 -0
  200. skills/kb-audit/steps/step-02-audit.md +96 -0
  201. skills/kb-audit/steps/step-03-verify.md +65 -0
  202. skills/kb-audit/steps/step-04-report.md +64 -0
  203. skills/kb-init/SKILL.md +146 -0
  204. skills/kb-init/rules.md +187 -0
  205. skills/kb-init/steps/step-01-scope.md +62 -0
  206. skills/kb-init/steps/step-02-source.md +410 -0
  207. skills/kb-init/steps/step-03-generate.md +307 -0
  208. skills/kb-init/steps/step-04-quality.md +92 -0
  209. skills/kb-init/steps/step-05-finalize.md +132 -0
  210. skills/kb-init/templates/core/execution-modes.md +29 -0
  211. skills/kb-init/templates/core/output-only.md +4 -0
  212. skills/kb-init/templates/core/readwrite.md +33 -0
  213. skills/kb-search/SKILL.md +138 -0
  214. skills/kb-search/rules.md +64 -0
  215. skills/kb-sync/SKILL.md +43 -0
  216. skills/kb-sync/rules.md +70 -0
  217. skills/kb-sync/scripts/rebuild_module.py +91 -0
  218. skills/kb-sync/scripts/scan_repos.py +687 -0
  219. skills/kb-sync/steps/step-01-detect.md +72 -0
  220. skills/kb-sync/steps/step-02-update.md +71 -0
  221. skills/kb-sync/steps/step-03-verify.md +47 -0
  222. skills/kb-sync/steps/step-04-finalize.md +52 -0
  223. source_kb-0.2.2.dist-info/METADATA +194 -0
  224. source_kb-0.2.2.dist-info/RECORD +228 -0
  225. source_kb-0.2.2.dist-info/WHEEL +5 -0
  226. source_kb-0.2.2.dist-info/entry_points.txt +3 -0
  227. source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
  228. source_kb-0.2.2.dist-info/top_level.txt +6 -0
engine/dedup.py ADDED
@@ -0,0 +1,106 @@
1
+ """LLM-based document deduplication — CLI-only post-merge optimization.
2
+
3
+ Uses LLM to intelligently remove cross-document redundancy:
4
+ - Internal dedup: repeated tables/paragraphs within a document
5
+ - Cross-doc dedup: content that belongs in another document → replace with reference
6
+
7
+ Only processes documents exceeding a configurable size threshold.
8
+ Agent mode uses rule-based refinement (core/skeleton/merge.py) instead.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ import re
15
+ import time
16
+ from pathlib import Path
17
+
18
+ from core.interfaces import LlmRequest, LlmStrategy
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Configurable thresholds (should come from config in production)
23
+ MIN_SIZE_KB = 15
24
+ CHUNK_THRESHOLD_KB = 60
25
+
26
+ SYSTEM_PROMPT = (
27
+ "You are a technical documentation editing expert. Your task is to deduplicate and streamline a knowledge base document.\n"
28
+ "Rules:\n"
29
+ "1. Internal dedup: if the same content appears multiple times, keep only the most complete version\n"
30
+ "2. Cross-document dedup: replace content that belongs in another document with a cross-reference `> See [xxx.md](./xxx.md)`\n"
31
+ "3. Preserve structure: keep heading hierarchy, do not add new content\n"
32
+ "4. Merge conflicting data: keep the more specific version\n"
33
+ "Output the optimized full markdown document content directly."
34
+ )
35
+
36
+
37
+ def dedup_document(
38
+ doc_path: Path,
39
+ sibling_docs: list[str],
40
+ strategy: LlmStrategy,
41
+ ) -> tuple[bool, float]:
42
+ """Deduplicate a single document using LLM.
43
+
44
+ Args:
45
+ doc_path: Path to the document to deduplicate
46
+ sibling_docs: Names of other docs in the same module (for cross-doc reference)
47
+ strategy: LLM execution strategy
48
+
49
+ Returns:
50
+ (changed: bool, saved_kb: float) — whether content changed and KB saved
51
+ """
52
+ content = doc_path.read_text(encoding="utf-8")
53
+ original_size = len(content)
54
+
55
+ if original_size < MIN_SIZE_KB * 1024:
56
+ return False, 0.0
57
+
58
+ user_prompt = _build_prompt(doc_path.name, content, sibling_docs)
59
+
60
+ resp = strategy.call(LlmRequest(
61
+ system=SYSTEM_PROMPT,
62
+ user=user_prompt,
63
+ max_tokens=16384,
64
+ temperature=0.1,
65
+ ))
66
+
67
+ if resp.status != "done" or not resp.content:
68
+ return False, 0.0
69
+
70
+ new_content = _strip_fence(resp.content)
71
+ new_size = len(new_content)
72
+
73
+ # Safety: reject if too short (lost content) or longer (added content)
74
+ if new_size < original_size * 0.3 or new_size >= original_size:
75
+ return False, 0.0
76
+
77
+ if not new_content.endswith("\n"):
78
+ new_content += "\n"
79
+ doc_path.write_text(new_content, encoding="utf-8")
80
+
81
+ saved_kb = (original_size - new_size) / 1024
82
+ logger.info("[dedup] %s: %.1fKB → %.1fKB (saved %.1fKB)",
83
+ doc_path.name, original_size / 1024, new_size / 1024, saved_kb)
84
+ return True, saved_kb
85
+
86
+
87
+ def _build_prompt(doc_name: str, content: str, siblings: list[str]) -> str:
88
+ siblings_str = ", ".join(f"`{s}`" for s in siblings if s != doc_name)
89
+ return (
90
+ f"Filename: `{doc_name}`\n"
91
+ f"Sibling documents in same directory: {siblings_str}\n\n"
92
+ f"```markdown\n{content}\n```\n\n"
93
+ f"Output the deduplicated and optimized full document content."
94
+ )
95
+
96
+
97
+ def _strip_fence(text: str) -> str:
98
+ """Remove markdown code fence wrapper from LLM output."""
99
+ content = text.strip()
100
+ for prefix in ("```markdown", "```md", "```"):
101
+ if content.startswith(prefix):
102
+ content = content[len(prefix):].strip()
103
+ break
104
+ if content.endswith("```"):
105
+ content = content[:-3].strip()
106
+ return content
engine/main.py ADDED
@@ -0,0 +1,211 @@
1
+ """CLI entry point — simplified commands with sensible defaults.
2
+
3
+ Usage:
4
+ python -m cli.main init --kb my-kb
5
+ python -m cli.main sync --kb my-kb
6
+ python -m cli.main audit --kb my-kb
7
+ python -m cli.main search --kb my-kb "query"
8
+ python -m cli.main index --kb my-kb
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import argparse
14
+ import logging
15
+ import sys
16
+
17
+ from core import __version__, setup_logging
18
+
19
+ sys.stdout.reconfigure(encoding="utf-8")
20
+
21
+
22
+ def cmd_init(args: argparse.Namespace) -> None:
23
+ """Run kb-init pipeline."""
24
+ config, ctx = _setup(args)
25
+ from engine.pipelines import build_init_pipeline
26
+ from engine.pipeline.recovery import load_checkpoint, should_skip_step
27
+
28
+ pipeline = build_init_pipeline()
29
+ if args.dry_run:
30
+ _print_plan(pipeline)
31
+ return
32
+
33
+ # Resume support
34
+ checkpoint = None
35
+ if args.resume:
36
+ checkpoint = load_checkpoint(ctx.knowledge_dir)
37
+ if checkpoint is None:
38
+ print("No checkpoint found — starting from scratch")
39
+ else:
40
+ print(f"Resuming from: {checkpoint.next_step}")
41
+
42
+ result = pipeline.execute(ctx, checkpoint=checkpoint)
43
+ _print_result(result)
44
+ if not result.passed:
45
+ sys.exit(1)
46
+
47
+
48
+ def cmd_sync(args: argparse.Namespace) -> None:
49
+ """Run kb-sync pipeline."""
50
+ config, ctx = _setup(args)
51
+ from engine.pipelines import build_sync_pipeline
52
+
53
+ if getattr(args, "dry_run", False):
54
+ ctx.state["dry_run"] = True
55
+
56
+ pipeline = build_sync_pipeline()
57
+ result = pipeline.execute(ctx)
58
+ _print_result(result)
59
+ if not result.passed:
60
+ sys.exit(1)
61
+
62
+
63
+ def cmd_audit(args: argparse.Namespace) -> None:
64
+ """Run kb-audit pipeline."""
65
+ config, ctx = _setup(args)
66
+ from engine.pipelines import build_audit_pipeline
67
+
68
+ if getattr(args, "dry_run", False):
69
+ ctx.state["dry_run"] = True
70
+ if getattr(args, "scope", None):
71
+ ctx.state["audit_scope"] = args.scope
72
+ if getattr(args, "force", False):
73
+ ctx.state["audit_force"] = True
74
+
75
+ pipeline = build_audit_pipeline()
76
+ result = pipeline.execute(ctx)
77
+ _print_result(result)
78
+ if not result.passed:
79
+ sys.exit(1)
80
+
81
+
82
+ def cmd_search(args: argparse.Namespace) -> None:
83
+ """Search knowledge base."""
84
+ config, _ = _setup(args, need_ctx=False)
85
+ from core.rag.retriever import retrieve
86
+
87
+ results = retrieve(args.query, config, kb_name=args.kb)
88
+ if not results:
89
+ print(f"No results for '{args.query}'")
90
+ return
91
+ for i, r in enumerate(results, 1):
92
+ score = r["score"]
93
+ source = r["metadata"].get("source", "?")
94
+ section = r["metadata"].get("section", "")
95
+ header = f"[{i}] {source}"
96
+ if section:
97
+ header += f" > {section}"
98
+ print(f"{header} (score: {score:.3f})")
99
+ print(f" {r['text'][:200]}...")
100
+ print()
101
+
102
+
103
+ def cmd_index(args: argparse.Namespace) -> None:
104
+ """Build/rebuild vector index."""
105
+ config, _ = _setup(args, need_ctx=False)
106
+ from core.rag.loader import load_documents
107
+ from core.rag.chunker import chunk_documents
108
+ from core.rag.indexer import build_index
109
+
110
+ kb_cfg = config.get_kb(args.kb)
111
+ knowledge_dir = kb_cfg["knowledge_dir"]
112
+ collection_name = kb_cfg["collection"]
113
+
114
+ docs = load_documents(knowledge_dir)
115
+ if not docs:
116
+ print(f"No documents found in {knowledge_dir}")
117
+ return
118
+
119
+ chunks = chunk_documents(docs)
120
+ print(f"Loaded {len(docs)} docs, {len(chunks)} chunks. Building index...")
121
+ build_index(chunks, collection_name, config, kb_name=args.kb)
122
+ print(f"Index built: {len(chunks)} chunks → collection '{collection_name}'")
123
+
124
+
125
+ # ---------------------------------------------------------------------------
126
+ # Helpers
127
+ # ---------------------------------------------------------------------------
128
+
129
+ def _setup(args: argparse.Namespace, need_ctx: bool = True):
130
+ """Load config and create pipeline context."""
131
+ from core.config import load_config, find_config
132
+ from core.interfaces import PipelineContext
133
+ from pathlib import Path
134
+
135
+ config_path = Path(args.config) if args.config else find_config()
136
+ config = load_config(config_path)
137
+
138
+ if not need_ctx:
139
+ return config, None
140
+
141
+ ctx = PipelineContext.from_config(config.raw, args.kb, module=getattr(args, "module", None))
142
+ return config, ctx
143
+
144
+
145
+ def _print_plan(pipeline) -> None:
146
+ """Print pipeline steps without executing."""
147
+ print(f"[pipeline] {pipeline.name} — {len(pipeline.steps)} steps:")
148
+ for info in pipeline.describe():
149
+ cp = f" (checkpoint={info['checkpoint']})" if info["checkpoint"] else ""
150
+ print(f" {info['name']}{cp}")
151
+
152
+
153
+ def _print_result(result) -> None:
154
+ """Print pipeline execution result."""
155
+ markers = {"ok": "+", "skipped": "-", "failed": "!", "delegated": "~"}
156
+ print(f"\n{result.summary()}")
157
+ for step_name, sr in result.step_results:
158
+ marker = markers.get(sr.status, "?")
159
+ print(f" [{marker}] {step_name}: {sr.message}")
160
+
161
+
162
+ def main():
163
+ parser = argparse.ArgumentParser(description="source-kb CLI")
164
+ parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
165
+ parser.add_argument("--config", help="kb-project.yaml path (auto-detected)")
166
+ sub = parser.add_subparsers(dest="command")
167
+
168
+ # init
169
+ p_init = sub.add_parser("init", help="Initialize knowledge base")
170
+ p_init.add_argument("--kb", required=True)
171
+ p_init.add_argument("--module", default=None)
172
+ p_init.add_argument("--dry-run", action="store_true")
173
+ p_init.add_argument("--resume", action="store_true", help="Resume from last checkpoint")
174
+
175
+ # sync
176
+ p_sync = sub.add_parser("sync", help="Incremental sync")
177
+ p_sync.add_argument("--kb", required=True)
178
+ p_sync.add_argument("--module", default=None)
179
+ p_sync.add_argument("--dry-run", action="store_true", help="Preview changes without writing")
180
+
181
+ # audit
182
+ p_audit = sub.add_parser("audit", help="Document quality audit")
183
+ p_audit.add_argument("--kb", required=True)
184
+ p_audit.add_argument("--module", default=None)
185
+ p_audit.add_argument("--scope", default=None, help="Audit only this doc type (e.g. business-logic)")
186
+ p_audit.add_argument("--dry-run", action="store_true", help="Preview without applying fixes")
187
+ p_audit.add_argument("--force", action="store_true", help="Ignore progress, re-audit all")
188
+
189
+ # search
190
+ p_search = sub.add_parser("search", help="Search knowledge base")
191
+ p_search.add_argument("--kb", required=True)
192
+ p_search.add_argument("query")
193
+
194
+ # index
195
+ p_index = sub.add_parser("index", help="Build/rebuild index")
196
+ p_index.add_argument("--kb", required=True)
197
+
198
+ args = parser.parse_args()
199
+ if not args.command:
200
+ parser.print_help()
201
+ sys.exit(1)
202
+
203
+ setup_logging()
204
+
205
+ commands = {"init": cmd_init, "sync": cmd_sync, "audit": cmd_audit,
206
+ "search": cmd_search, "index": cmd_index}
207
+ commands[args.command](args)
208
+
209
+
210
+ if __name__ == "__main__":
211
+ main()
@@ -0,0 +1,163 @@
1
+ """CLI pipeline framework — Pipeline class, PipelineBuilder, step registry.
2
+
3
+ Orchestrates Step sequences with checkpoint validation, recovery, and rollback.
4
+
5
+ Usage:
6
+ from engine.pipeline import Pipeline, PipelineBuilder, register_step
7
+
8
+ pipeline = PipelineBuilder("kb-init").add("fetch-source").add("extract-skeleton").build()
9
+ result = pipeline.execute(ctx)
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ import time
16
+ from dataclasses import dataclass, field
17
+ from typing import Any
18
+
19
+ from core.interfaces import Step, StepResult, PipelineContext
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # ---------------------------------------------------------------------------
24
+ # Step registry
25
+ # ---------------------------------------------------------------------------
26
+
27
+ _STEP_REGISTRY: dict[str, type[Step]] = {}
28
+
29
+
30
+ def register_step(cls: type[Step]) -> type[Step]:
31
+ """Decorator to register a step class by its default name."""
32
+ # Use class attribute or class name as key
33
+ name = getattr(cls, "default_name", cls.__name__)
34
+ _STEP_REGISTRY[name] = cls
35
+ return cls
36
+
37
+
38
+ def get_step(name: str) -> Step:
39
+ """Instantiate a registered step by name."""
40
+ if name not in _STEP_REGISTRY:
41
+ available = sorted(_STEP_REGISTRY.keys())
42
+ raise KeyError(f"Step '{name}' not registered. Available: {available}")
43
+ return _STEP_REGISTRY[name]()
44
+
45
+
46
+ # ---------------------------------------------------------------------------
47
+ # PipelineResult
48
+ # ---------------------------------------------------------------------------
49
+
50
+ @dataclass
51
+ class PipelineResult:
52
+ """Outcome of a full pipeline execution."""
53
+ pipeline_name: str
54
+ step_results: list[tuple[str, StepResult]] = field(default_factory=list)
55
+ elapsed: float = 0.0
56
+
57
+ @property
58
+ def passed(self) -> bool:
59
+ return all(r.ok or r.skipped or r.delegated for _, r in self.step_results)
60
+
61
+ @property
62
+ def has_delegated(self) -> bool:
63
+ return any(r.delegated for _, r in self.step_results)
64
+
65
+ @property
66
+ def failed_steps(self) -> list[tuple[str, StepResult]]:
67
+ return [(n, r) for n, r in self.step_results if r.status == "failed"]
68
+
69
+ def summary(self) -> str:
70
+ ok = sum(1 for _, r in self.step_results if r.ok)
71
+ skip = sum(1 for _, r in self.step_results if r.skipped)
72
+ fail = sum(1 for _, r in self.step_results if r.status == "failed")
73
+ status = "PASS" if self.passed else "FAIL"
74
+ return f"{status}: {ok} ok, {skip} skipped, {fail} failed ({self.elapsed:.1f}s)"
75
+
76
+
77
+ # ---------------------------------------------------------------------------
78
+ # Pipeline
79
+ # ---------------------------------------------------------------------------
80
+
81
+ class Pipeline:
82
+ """Executes a sequence of Steps with error handling and rollback."""
83
+
84
+ def __init__(self, name: str, steps: list[Step]):
85
+ self.name = name
86
+ self.steps = steps
87
+
88
+ def execute(self, ctx: PipelineContext, checkpoint=None) -> PipelineResult:
89
+ result = PipelineResult(pipeline_name=self.name)
90
+ t0 = time.time()
91
+ executed: list[Step] = []
92
+ short_circuit = False
93
+
94
+ # Determine steps to skip if resuming from checkpoint
95
+ skip_steps: set[str] = set()
96
+ if checkpoint is not None:
97
+ from engine.pipeline.recovery import should_skip_step
98
+ skip_steps = {s.name for s in self.steps if should_skip_step(checkpoint, s.name)}
99
+
100
+ for step in self.steps:
101
+ if short_circuit:
102
+ result.step_results.append((step.name, StepResult(status="skipped", message="short-circuited")))
103
+ continue
104
+
105
+ if step.name in skip_steps:
106
+ result.step_results.append((step.name, StepResult(status="skipped", message="resumed (checkpoint)")))
107
+ continue
108
+
109
+ logger.info("[%s] running...", step.name)
110
+ try:
111
+ sr = step.run(ctx)
112
+ except Exception as e:
113
+ sr = StepResult(status="failed", message=str(e))
114
+
115
+ result.step_results.append((step.name, sr))
116
+ logger.info("[%s] %s", step.name, sr)
117
+
118
+ if sr.ok or sr.delegated:
119
+ executed.append(step)
120
+ elif sr.skipped:
121
+ if sr.details.get("short_circuit"):
122
+ short_circuit = True
123
+ else:
124
+ # Failed — rollback
125
+ executed.append(step)
126
+ self._rollback(executed, ctx)
127
+ break
128
+
129
+ result.elapsed = time.time() - t0
130
+ return result
131
+
132
+ @staticmethod
133
+ def _rollback(executed: list[Step], ctx: PipelineContext) -> None:
134
+ for step in reversed(executed):
135
+ try:
136
+ step.rollback(ctx)
137
+ except Exception as e:
138
+ logger.error("[%s] rollback error: %s", step.name, e)
139
+
140
+ def describe(self) -> list[dict[str, str]]:
141
+ return [{"name": s.name, "checkpoint": s.checkpoint or ""} for s in self.steps]
142
+
143
+
144
+ # ---------------------------------------------------------------------------
145
+ # PipelineBuilder
146
+ # ---------------------------------------------------------------------------
147
+
148
+ class PipelineBuilder:
149
+ """Declarative pipeline composition from registered steps."""
150
+
151
+ def __init__(self, name: str):
152
+ self._name = name
153
+ self._steps: list[Step] = []
154
+
155
+ def add(self, step: Step | str) -> PipelineBuilder:
156
+ """Add a step (instance or registered name)."""
157
+ if isinstance(step, str):
158
+ step = get_step(step)
159
+ self._steps.append(step)
160
+ return self
161
+
162
+ def build(self) -> Pipeline:
163
+ return Pipeline(self._name, self._steps)