source-kb 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. cli/__init__.py +50 -0
  2. cli/__main__.py +5 -0
  3. cli/commands/__init__.py +1 -0
  4. cli/commands/anchor_fix.py +47 -0
  5. cli/commands/diff_doc.py +52 -0
  6. cli/commands/dispatch.py +77 -0
  7. cli/commands/extract.py +72 -0
  8. cli/commands/file_list.py +74 -0
  9. cli/commands/index.py +84 -0
  10. cli/commands/lock.py +89 -0
  11. cli/commands/merge.py +60 -0
  12. cli/commands/merge_delta.py +19 -0
  13. cli/commands/metadata.py +24 -0
  14. cli/commands/pipeline.py +45 -0
  15. cli/commands/post_merge.py +43 -0
  16. cli/commands/query.py +52 -0
  17. cli/commands/render.py +101 -0
  18. cli/commands/scan_repos.py +46 -0
  19. cli/commands/setup.py +94 -0
  20. cli/commands/split.py +196 -0
  21. cli/commands/stale_files.py +98 -0
  22. cli/commands/validate.py +191 -0
  23. core/__init__.py +32 -0
  24. core/config.py +261 -0
  25. core/docs/__init__.py +7 -0
  26. core/docs/section_updater.py +286 -0
  27. core/docs/shared.py +149 -0
  28. core/git.py +294 -0
  29. core/interfaces.py +249 -0
  30. core/monitor/__init__.py +5 -0
  31. core/monitor/progress.py +83 -0
  32. core/monitor/prompt_store.py +49 -0
  33. core/paths.py +141 -0
  34. core/preset.py +237 -0
  35. core/preset_accessors.py +202 -0
  36. core/preset_classify.py +132 -0
  37. core/preset_hooks.py +129 -0
  38. core/preset_profile.py +89 -0
  39. core/prompt/__init__.py +7 -0
  40. core/prompt/__main__.py +147 -0
  41. core/prompt/content.py +320 -0
  42. core/prompt/context_manager.py +164 -0
  43. core/prompt/renderer.py +236 -0
  44. core/prompt/response_parser.py +274 -0
  45. core/prompt/templates.py +357 -0
  46. core/prompt/validate_parity.py +162 -0
  47. core/prompt/variables.py +339 -0
  48. core/rag/__init__.py +22 -0
  49. core/rag/__main__.py +136 -0
  50. core/rag/bm25_index.py +268 -0
  51. core/rag/chunker.py +273 -0
  52. core/rag/embedder.py +151 -0
  53. core/rag/indexer.py +292 -0
  54. core/rag/loader.py +89 -0
  55. core/rag/retriever.py +82 -0
  56. core/skeleton/__init__.py +11 -0
  57. core/skeleton/__main__.py +934 -0
  58. core/skeleton/anchor_fix.py +250 -0
  59. core/skeleton/classify.py +331 -0
  60. core/skeleton/cmd_anchor_fix.py +43 -0
  61. core/skeleton/cmd_diff_doc.py +44 -0
  62. core/skeleton/cmd_lock.py +87 -0
  63. core/skeleton/cmd_merge_delta.py +41 -0
  64. core/skeleton/community.py +233 -0
  65. core/skeleton/dependency_graph.py +306 -0
  66. core/skeleton/diff_doc.py +248 -0
  67. core/skeleton/dispatch.py +273 -0
  68. core/skeleton/dispatch_render.py +319 -0
  69. core/skeleton/dispatch_source.py +111 -0
  70. core/skeleton/extract.py +218 -0
  71. core/skeleton/extract_methods.py +298 -0
  72. core/skeleton/file_list.py +239 -0
  73. core/skeleton/impact.py +278 -0
  74. core/skeleton/jar_download.py +177 -0
  75. core/skeleton/jar_resolver.py +186 -0
  76. core/skeleton/loader.py +162 -0
  77. core/skeleton/merge.py +278 -0
  78. core/skeleton/merge_delta.py +229 -0
  79. core/skeleton/metadata.py +96 -0
  80. core/skeleton/metadata_builders.py +264 -0
  81. core/skeleton/module_dag.py +330 -0
  82. core/skeleton/parsers/__init__.py +71 -0
  83. core/skeleton/parsers/jqassistant.py +300 -0
  84. core/skeleton/parsers/jqassistant_cypher.py +225 -0
  85. core/skeleton/parsers/regex.py +171 -0
  86. core/skeleton/parsers/treesitter.py +324 -0
  87. core/skeleton/parsers/treesitter_java.py +284 -0
  88. core/skeleton/parsers/treesitter_multi.py +289 -0
  89. core/skeleton/pom_parser.py +299 -0
  90. core/skeleton/post_merge.py +295 -0
  91. core/skeleton/post_merge_llm.py +82 -0
  92. core/skeleton/query.py +195 -0
  93. core/skeleton/shard_context.py +177 -0
  94. core/skeleton/split.py +180 -0
  95. core/skeleton/split_cache.py +107 -0
  96. core/skeleton/split_feedback.py +174 -0
  97. core/skeleton/split_plan.py +219 -0
  98. core/skeleton/split_plan_helpers.py +305 -0
  99. core/skeleton/split_plan_llm.py +274 -0
  100. core/utils.py +135 -0
  101. core/validators/__init__.py +65 -0
  102. core/validators/__main__.py +215 -0
  103. core/validators/consistency.py +203 -0
  104. core/validators/coverage.py +171 -0
  105. core/validators/duplicates.py +76 -0
  106. core/validators/engine.py +224 -0
  107. core/validators/links.py +76 -0
  108. core/validators/sampling.py +169 -0
  109. core/validators/structure.py +144 -0
  110. engine/__init__.py +7 -0
  111. engine/assembler.py +231 -0
  112. engine/confirm.py +65 -0
  113. engine/dedup.py +106 -0
  114. engine/main.py +211 -0
  115. engine/pipeline/__init__.py +163 -0
  116. engine/pipeline/recovery.py +250 -0
  117. engine/pipeline/steps/__init__.py +23 -0
  118. engine/pipeline/steps/audit.py +220 -0
  119. engine/pipeline/steps/audit_apply.py +195 -0
  120. engine/pipeline/steps/audit_helpers.py +155 -0
  121. engine/pipeline/steps/classify_llm.py +236 -0
  122. engine/pipeline/steps/classify_prompt.py +223 -0
  123. engine/pipeline/steps/finalize.py +160 -0
  124. engine/pipeline/steps/generate.py +169 -0
  125. engine/pipeline/steps/generate_batch.py +197 -0
  126. engine/pipeline/steps/generate_recovery.py +170 -0
  127. engine/pipeline/steps/llm_plan_split.py +253 -0
  128. engine/pipeline/steps/lock.py +64 -0
  129. engine/pipeline/steps/preflight.py +237 -0
  130. engine/pipeline/steps/preflight_adjust.py +147 -0
  131. engine/pipeline/steps/pregenerate.py +130 -0
  132. engine/pipeline/steps/quality.py +81 -0
  133. engine/pipeline/steps/skeleton.py +149 -0
  134. engine/pipeline/steps/source.py +163 -0
  135. engine/pipeline/steps/sync.py +117 -0
  136. engine/pipeline/steps/sync_finalize.py +237 -0
  137. engine/pipeline/steps/sync_update.py +341 -0
  138. engine/pipelines.py +91 -0
  139. engine/runner.py +335 -0
  140. engine/strategies/__init__.py +86 -0
  141. engine/strategies/api.py +128 -0
  142. engine/strategies/delegated.py +50 -0
  143. engine/strategies/dryrun.py +25 -0
  144. engine/two_phase.py +143 -0
  145. mcp_server/__init__.py +73 -0
  146. mcp_server/__main__.py +5 -0
  147. mcp_server/tools/__init__.py +1 -0
  148. mcp_server/tools/config.py +63 -0
  149. mcp_server/tools/discovery.py +276 -0
  150. mcp_server/tools/generation.py +184 -0
  151. mcp_server/tools/planning.py +144 -0
  152. mcp_server/tools/source.py +175 -0
  153. mcp_server/tools/validation.py +140 -0
  154. mcp_server/tools/workflow.py +166 -0
  155. mcp_server/workflow_loader.py +204 -0
  156. presets/generic/audit_dimensions.md +132 -0
  157. presets/generic/doc_types.yaml +152 -0
  158. presets/generic/preset.yaml +115 -0
  159. presets/java-spring/audit_dimensions.md +228 -0
  160. presets/java-spring/audit_dimensions.yaml +203 -0
  161. presets/java-spring/doc_types.yaml +269 -0
  162. presets/java-spring/hooks.py +122 -0
  163. presets/java-spring/preset.yaml +341 -0
  164. presets/java-spring/templates/README.md +34 -0
  165. presets/java-spring/templates/audit-system.md +15 -0
  166. presets/java-spring/templates/subagent-aop.md +105 -0
  167. presets/java-spring/templates/subagent-api.md +63 -0
  168. presets/java-spring/templates/subagent-architecture.md +111 -0
  169. presets/java-spring/templates/subagent-async-events.md +107 -0
  170. presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
  171. presets/java-spring/templates/subagent-audit-architecture.md +38 -0
  172. presets/java-spring/templates/subagent-audit-business.md +40 -0
  173. presets/java-spring/templates/subagent-audit-data-models.md +40 -0
  174. presets/java-spring/templates/subagent-business.md +129 -0
  175. presets/java-spring/templates/subagent-caching.md +75 -0
  176. presets/java-spring/templates/subagent-database-access.md +114 -0
  177. presets/java-spring/templates/subagent-enum.md +75 -0
  178. presets/java-spring/templates/subagent-error-handling.md +91 -0
  179. presets/java-spring/templates/subagent-external-integrations.md +80 -0
  180. presets/java-spring/templates/subagent-index.md +122 -0
  181. presets/java-spring/templates/subagent-messaging.md +97 -0
  182. presets/java-spring/templates/subagent-model.md +88 -0
  183. presets/java-spring/templates/subagent-observability.md +91 -0
  184. presets/java-spring/templates/subagent-scheduled.md +81 -0
  185. presets/java-spring/templates/subagent-security.md +102 -0
  186. presets/java-spring/templates/subagent-structure.md +101 -0
  187. presets/java-spring/templates/subagent-sync-section.md +34 -0
  188. presets/java-spring/templates/subagent-utils.md +73 -0
  189. presets/java-spring/templates/sync-system.md +8 -0
  190. presets/java-spring/workflow-extensions.md +112 -0
  191. skills/__init__.py +1 -0
  192. skills/_shared/README.md +30 -0
  193. skills/_shared/doc-coverage-shared.md +134 -0
  194. skills/_shared/doc-quality-standard.md +1058 -0
  195. skills/_shared/doc-subagent-rules.md +762 -0
  196. skills/_shared/windows-compat.md +89 -0
  197. skills/kb-audit/SKILL.md +52 -0
  198. skills/kb-audit/rules.md +88 -0
  199. skills/kb-audit/steps/step-01-prepare.md +75 -0
  200. skills/kb-audit/steps/step-02-audit.md +96 -0
  201. skills/kb-audit/steps/step-03-verify.md +65 -0
  202. skills/kb-audit/steps/step-04-report.md +64 -0
  203. skills/kb-init/SKILL.md +146 -0
  204. skills/kb-init/rules.md +187 -0
  205. skills/kb-init/steps/step-01-scope.md +62 -0
  206. skills/kb-init/steps/step-02-source.md +410 -0
  207. skills/kb-init/steps/step-03-generate.md +307 -0
  208. skills/kb-init/steps/step-04-quality.md +92 -0
  209. skills/kb-init/steps/step-05-finalize.md +132 -0
  210. skills/kb-init/templates/core/execution-modes.md +29 -0
  211. skills/kb-init/templates/core/output-only.md +4 -0
  212. skills/kb-init/templates/core/readwrite.md +33 -0
  213. skills/kb-search/SKILL.md +138 -0
  214. skills/kb-search/rules.md +64 -0
  215. skills/kb-sync/SKILL.md +43 -0
  216. skills/kb-sync/rules.md +70 -0
  217. skills/kb-sync/scripts/rebuild_module.py +91 -0
  218. skills/kb-sync/scripts/scan_repos.py +687 -0
  219. skills/kb-sync/steps/step-01-detect.md +72 -0
  220. skills/kb-sync/steps/step-02-update.md +71 -0
  221. skills/kb-sync/steps/step-03-verify.md +47 -0
  222. skills/kb-sync/steps/step-04-finalize.md +52 -0
  223. source_kb-0.2.2.dist-info/METADATA +194 -0
  224. source_kb-0.2.2.dist-info/RECORD +228 -0
  225. source_kb-0.2.2.dist-info/WHEEL +5 -0
  226. source_kb-0.2.2.dist-info/entry_points.txt +3 -0
  227. source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
  228. source_kb-0.2.2.dist-info/top_level.txt +6 -0
@@ -0,0 +1,45 @@
1
+ """source-kb pipeline — Run engine pipelines (init/sync/audit) with LLM integration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import sys
7
+
8
+
9
+ def register(subparsers: argparse._SubParsersAction) -> None:
10
+ p = subparsers.add_parser("pipeline", help="Run engine pipelines (requires LLM config)")
11
+ sub = p.add_subparsers(dest="action")
12
+
13
+ p_init = sub.add_parser("init", help="Run full kb-init pipeline")
14
+ p_init.add_argument("--kb", required=True)
15
+ p_init.add_argument("--module", default=None)
16
+ p_init.add_argument("--dry-run", action="store_true")
17
+ p_init.add_argument("--resume", action="store_true", help="Resume from last checkpoint")
18
+ p_init.add_argument("--config", help="kb-project.yaml path")
19
+
20
+ p_sync = sub.add_parser("sync", help="Run incremental sync pipeline")
21
+ p_sync.add_argument("--kb", required=True)
22
+ p_sync.add_argument("--module", default=None)
23
+ p_sync.add_argument("--dry-run", action="store_true")
24
+ p_sync.add_argument("--config", help="kb-project.yaml path")
25
+
26
+ p_audit = sub.add_parser("audit", help="Run document audit pipeline")
27
+ p_audit.add_argument("--kb", required=True)
28
+ p_audit.add_argument("--module", default=None)
29
+ p_audit.add_argument("--scope", default=None, help="Audit only this doc type")
30
+ p_audit.add_argument("--dry-run", action="store_true")
31
+ p_audit.add_argument("--force", action="store_true")
32
+ p_audit.add_argument("--config", help="kb-project.yaml path")
33
+
34
+ p.set_defaults(func=run)
35
+
36
+
37
+ def run(args: argparse.Namespace) -> None:
38
+ if not getattr(args, "action", None):
39
+ print("Usage: source-kb pipeline {init|sync|audit} --kb <name>", file=sys.stderr)
40
+ sys.exit(1)
41
+
42
+ from engine.main import cmd_init, cmd_sync, cmd_audit
43
+
44
+ dispatch = {"init": cmd_init, "sync": cmd_sync, "audit": cmd_audit}
45
+ dispatch[args.action](args)
@@ -0,0 +1,43 @@
1
+ """source-kb post-merge — Post-merge refinement (dedup, terms, anchors)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("post-merge", help="Run post-merge refinement on module docs")
13
+ p.add_argument("--module-dir", required=True, help="Module directory")
14
+ p.set_defaults(func=run)
15
+
16
+
17
+ def run(args: argparse.Namespace) -> None:
18
+ from core.skeleton.post_merge import refine_merged_doc
19
+
20
+ module_dir = Path(args.module_dir)
21
+ if not module_dir.is_dir():
22
+ print(f"Error: directory not found: {module_dir}", file=sys.stderr)
23
+ sys.exit(1)
24
+
25
+ results = {"refined": 0, "unchanged": 0, "errors": []}
26
+ for md in sorted(module_dir.glob("*.md")):
27
+ if md.name.startswith(".") or md.name.lower() == "readme.md":
28
+ continue
29
+ try:
30
+ result = refine_merged_doc(md)
31
+ if result.changed:
32
+ result.apply()
33
+ results["refined"] += 1
34
+ print(f" {md.name}: {result.summary()}")
35
+ else:
36
+ results["unchanged"] += 1
37
+ except Exception as e:
38
+ results["errors"].append(f"{md.name}: {e}")
39
+ print(f" {md.name}: error - {e}", file=sys.stderr)
40
+
41
+ print(json.dumps({"status": "ok", "refined": results["refined"],
42
+ "unchanged": results["unchanged"],
43
+ "errors": len(results["errors"])}, ensure_ascii=False), file=sys.stderr)
cli/commands/query.py ADDED
@@ -0,0 +1,52 @@
1
+ """source-kb query — Query skeleton data."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("query", help="Query skeleton data (stats, search, file-list)")
13
+ p.add_argument("subcmd", choices=["stats", "high-methods", "file-list", "method-count", "search"])
14
+ p.add_argument("path", help="Skeleton JSON path or directory")
15
+ p.add_argument("query", nargs="?", default="", help="Search query (for search subcmd)")
16
+ p.set_defaults(func=run)
17
+
18
+
19
+ def run(args: argparse.Namespace) -> None:
20
+ from core.skeleton.query import load_skeleton, stats, high_methods, file_list, method_count, search
21
+
22
+ path = Path(args.path)
23
+ entries = load_skeleton(path)
24
+
25
+ if args.subcmd == "stats":
26
+ result = stats(entries)
27
+ print(json.dumps(result, ensure_ascii=False, indent=2))
28
+ elif args.subcmd == "high-methods":
29
+ methods = high_methods(entries)
30
+ for m in methods[:20]:
31
+ print(f"{m['file']} {m['method']} ({m['line_count']} lines)")
32
+ if len(methods) > 20:
33
+ print(f" ... and {len(methods) - 20} more")
34
+ print(json.dumps({"status": "ok", "count": len(methods)}, ensure_ascii=False), file=sys.stderr)
35
+ elif args.subcmd == "file-list":
36
+ files = file_list(entries)
37
+ print(f"Total files: {len(files)}")
38
+ for f in files[:30]:
39
+ print(f" {f}")
40
+ if len(files) > 30:
41
+ print(f" ... and {len(files) - 30} more")
42
+ print(json.dumps({"status": "ok", "files": len(files)}, ensure_ascii=False), file=sys.stderr)
43
+ elif args.subcmd == "method-count":
44
+ count = method_count(entries)
45
+ print(json.dumps({"status": "ok", "methods": count}, ensure_ascii=False))
46
+ elif args.subcmd == "search":
47
+ results = search(entries, args.query)
48
+ for r in results[:20]:
49
+ print(f"[{r['type']}] {r['file']}:{r['line']} {r['name']}")
50
+ if len(results) > 20:
51
+ print(f" ... and {len(results) - 20} more")
52
+ print(json.dumps({"status": "ok", "matches": len(results)}, ensure_ascii=False), file=sys.stderr)
cli/commands/render.py ADDED
@@ -0,0 +1,101 @@
1
+ """source-kb render — Render a sub-agent prompt from template."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("render", help="Render a sub-agent prompt from template")
13
+ p.add_argument("--template", help="Template filename (auto-resolved from doc_types.yaml if omitted)")
14
+ p.add_argument("--module", required=True, help="Module name")
15
+ p.add_argument("--config", help="kb-project.yaml path")
16
+ p.add_argument("--kb", required=True, help="Knowledge base name")
17
+ p.add_argument("--doc-type", required=True, help="Document type")
18
+ p.add_argument("--mode", default="readwrite", choices=["readwrite", "output-only"])
19
+ p.add_argument("--output", help="Output file path")
20
+ p.add_argument("--extra", nargs="*", help="Extra variables (key=value)")
21
+ p.set_defaults(func=run)
22
+
23
+
24
+ def run(args: argparse.Namespace) -> None:
25
+ from core.config import load_config
26
+ from core.preset import load_preset
27
+ from core.prompt.renderer import render_prompt
28
+ from core.prompt.variables import ReferencePromptAssembler
29
+
30
+ config = load_config(Path(args.config) if args.config else None)
31
+ kb_config = config.get_kb(args.kb)
32
+ preset_name = kb_config.get("preset", "generic")
33
+ preset = load_preset(preset_name)
34
+
35
+ assembler = ReferencePromptAssembler(project_root=Path(".").resolve(), preset=preset)
36
+
37
+ template_name = args.template
38
+ if not template_name:
39
+ doc_types = preset.get("doc_types", {})
40
+ dt_cfg = doc_types.get(args.doc_type, {})
41
+ template_name = dt_cfg.get("template")
42
+ if not template_name:
43
+ print(f"Error: no template mapping for doc-type '{args.doc_type}' in {preset_name}/doc_types.yaml. "
44
+ f"Specify --template explicitly.", file=sys.stderr)
45
+ sys.exit(1)
46
+
47
+ template_path = _find_template(template_name, preset_name)
48
+ if not template_path:
49
+ print(f"Error: template not found: {template_name}", file=sys.stderr)
50
+ sys.exit(1)
51
+
52
+ extras = {}
53
+ if args.extra:
54
+ for item in args.extra:
55
+ if "=" in item:
56
+ k, v = item.split("=", 1)
57
+ extras[k] = v
58
+
59
+ execution_snippet = ""
60
+ if args.mode == "readwrite":
61
+ snippet_path = Path("skills/kb-init/templates/core/readwrite.md")
62
+ if snippet_path.exists():
63
+ execution_snippet = snippet_path.read_text(encoding="utf-8")
64
+ elif args.mode == "output-only":
65
+ snippet_path = Path("skills/kb-init/templates/core/output-only.md")
66
+ if snippet_path.exists():
67
+ execution_snippet = snippet_path.read_text(encoding="utf-8")
68
+
69
+ rendered = render_prompt(
70
+ template_path=template_path,
71
+ config=config.raw,
72
+ kb_name=args.kb,
73
+ module_name=args.module,
74
+ doc_type=args.doc_type,
75
+ assembler=assembler,
76
+ extras=extras,
77
+ execution_snippet=execution_snippet,
78
+ preset=preset,
79
+ )
80
+
81
+ if args.output:
82
+ Path(args.output).parent.mkdir(parents=True, exist_ok=True)
83
+ Path(args.output).write_text(rendered, encoding="utf-8")
84
+ print(f"Rendered to: {args.output} ({len(rendered)} chars)")
85
+ else:
86
+ meta_prompts = Path(f"knowledge/{args.module}/.meta/prompts")
87
+ meta_prompts.mkdir(parents=True, exist_ok=True)
88
+ out_path = meta_prompts / f"{args.doc_type}.md"
89
+ out_path.write_text(rendered, encoding="utf-8")
90
+ print(f"Rendered to: {out_path} ({len(rendered)} chars)")
91
+
92
+ print(json.dumps({"status": "ok", "chars": len(rendered)}, ensure_ascii=False), file=sys.stderr)
93
+
94
+
95
+ def _find_template(template_name: str, preset_name: str) -> Path | None:
96
+ from core.preset import find_preset_template
97
+
98
+ p = Path(template_name)
99
+ if p.exists():
100
+ return p
101
+ return find_preset_template(preset_name, template_name)
@@ -0,0 +1,46 @@
1
+ """source-kb scan-repos — Scan repositories for changes since last baseline."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import sys
7
+ from pathlib import Path
8
+
9
+
10
+ def register(subparsers: argparse._SubParsersAction) -> None:
11
+ p = subparsers.add_parser("scan-repos", help="Scan repositories for changes since last baseline")
12
+ p.add_argument("--config", help="Path to kb-project.yaml")
13
+ p.add_argument("--init", action="store_true", help="Initialize state file")
14
+ p.add_argument("--kb", help="Only scan specified knowledge base")
15
+ p.add_argument("--module", help="Only scan specified module")
16
+ p.add_argument("--force", action="store_true", help="Force scan (ignore state file)")
17
+ p.add_argument("--update-state", action="store_true", help="Update baseline commit in state")
18
+ p.add_argument("--batch-update", action="store_true", help="Batch update all baselines to latest")
19
+ p.add_argument("--commit", help="Commit hash (for --update-state)")
20
+ p.set_defaults(func=run)
21
+
22
+
23
+ def run(args: argparse.Namespace) -> None:
24
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
25
+ from skills.kb_sync.scripts.scan_repos import main as scan_main
26
+
27
+ argv = []
28
+ if args.config:
29
+ argv += ["--config", args.config]
30
+ if args.init:
31
+ argv.append("--init")
32
+ if args.kb:
33
+ argv += ["--kb", args.kb]
34
+ if args.module:
35
+ argv += ["--module", args.module]
36
+ if args.force:
37
+ argv.append("--force")
38
+ if args.update_state:
39
+ argv.append("--update-state")
40
+ if args.batch_update:
41
+ argv.append("--batch-update")
42
+ if args.commit:
43
+ argv += ["--commit", args.commit]
44
+
45
+ sys.argv = ["source-kb scan-repos"] + argv
46
+ scan_main()
cli/commands/setup.py ADDED
@@ -0,0 +1,94 @@
1
+ """source-kb setup — Project setup and agent configuration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("setup", help="Set up source-kb for a project")
13
+ p.add_argument("--agent", choices=["claude", "kiro", "generic"],
14
+ default="claude", help="Target agent platform")
15
+ p.add_argument("--preset", default="java-spring", help="Preset to use")
16
+ p.add_argument("--project-dir", default=".", help="Project root directory")
17
+ p.set_defaults(func=run)
18
+
19
+
20
+ def run(args: argparse.Namespace) -> None:
21
+ project_dir = Path(args.project_dir).resolve()
22
+
23
+ config_path = project_dir / "kb-project.yaml"
24
+ if config_path.exists():
25
+ print(f"kb-project.yaml already exists at {config_path}")
26
+ print("Use 'source-kb init' to regenerate, or edit manually.")
27
+ sys.exit(0)
28
+
29
+ config_template = {
30
+ "version": "1.0",
31
+ "knowledge_bases": {
32
+ "my-kb": {
33
+ "preset": args.preset,
34
+ "knowledge_dir": "./knowledge",
35
+ "collection": "my-kb-index",
36
+ "source": {
37
+ "cache_dir": "./.source-cache",
38
+ "repos": [
39
+ {
40
+ "name": "my-service",
41
+ "url": "https://github.com/org/my-service.git",
42
+ "branch": "main",
43
+ "type": "service",
44
+ }
45
+ ],
46
+ },
47
+ }
48
+ },
49
+ }
50
+
51
+ import yaml
52
+ config_path.write_text(
53
+ yaml.dump(config_template, default_flow_style=False, allow_unicode=True, sort_keys=False),
54
+ encoding="utf-8",
55
+ )
56
+ print(f"Created: {config_path}")
57
+
58
+ if args.agent == "claude":
59
+ _setup_claude_skills(project_dir)
60
+ elif args.agent == "kiro":
61
+ _setup_kiro_skills(project_dir)
62
+
63
+ print(f"\nSetup complete. Edit kb-project.yaml to configure your repositories.")
64
+ print(json.dumps({"status": "ok", "agent": args.agent, "preset": args.preset},
65
+ ensure_ascii=False), file=sys.stderr)
66
+
67
+
68
+ def _setup_claude_skills(project_dir: Path) -> None:
69
+ claude_md = project_dir / "CLAUDE.md"
70
+ if claude_md.exists():
71
+ print(f" CLAUDE.md already exists, skipping")
72
+ return
73
+ content = (
74
+ "# source-kb\n\n"
75
+ "This project uses source-kb for knowledge base generation.\n\n"
76
+ "## Available commands\n\n"
77
+ "```\n"
78
+ "source-kb extract --repo <path> --preset <preset>\n"
79
+ "source-kb dispatch --kb <name> --module <module>\n"
80
+ "source-kb render --kb <name> --module <module> --doc-type <type>\n"
81
+ "source-kb merge --dir <module-dir>\n"
82
+ "source-kb validate --module-dir <dir> --preset <preset>\n"
83
+ "source-kb index --kb <name>\n"
84
+ "source-kb search --kb <name> <query>\n"
85
+ "```\n"
86
+ )
87
+ claude_md.write_text(content, encoding="utf-8")
88
+ print(f" Created: CLAUDE.md")
89
+
90
+
91
+ def _setup_kiro_skills(project_dir: Path) -> None:
92
+ kiro_dir = project_dir / ".kiro"
93
+ kiro_dir.mkdir(exist_ok=True)
94
+ print(f" Created: .kiro/ (add steering docs as needed)")
cli/commands/split.py ADDED
@@ -0,0 +1,196 @@
1
+ """source-kb split-files / split-apply — Shard splitting for large doc types."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("split-files", help="Generate shard file lists for a doc type")
13
+ p.add_argument("--config", help="kb-project.yaml path")
14
+ p.add_argument("--kb", required=True, help="Knowledge base name")
15
+ p.add_argument("--module", required=True, help="Module name")
16
+ p.add_argument("--doc-type", required=True, help="Document type to split")
17
+ p.add_argument("--mode", default="readwrite", choices=["readwrite", "output-only"])
18
+ p.set_defaults(func=run_split_files)
19
+
20
+ p = subparsers.add_parser("split-apply", help="Validate and apply Agent-provided grouping")
21
+ p.add_argument("--module-dir", required=True, help="Module knowledge directory")
22
+ p.add_argument("--doc-type", required=True, help="Document type")
23
+ p.add_argument("--groups", required=True, help="Path to groups JSON file (Agent output)")
24
+ p.set_defaults(func=run_split_apply)
25
+
26
+
27
+ def run_split_files(args: argparse.Namespace) -> None:
28
+ from core.config import load_config
29
+ from core.preset import load_preset, get_doc_type_mapping
30
+ from core.skeleton.split import SplitConfig
31
+ from core.skeleton.split_plan import plan_splits
32
+ from core.skeleton.dispatch import get_file_list_with_stats
33
+ from core.skeleton.file_list import load_skeleton as load_skeleton_entries
34
+ from core.paths import file_list_dir
35
+
36
+ config = load_config(Path(args.config) if args.config else None)
37
+ kb_config = config.get_kb(args.kb)
38
+ preset_name = kb_config.get("preset", "generic")
39
+ preset = load_preset(preset_name)
40
+
41
+ knowledge_dir = Path(kb_config["knowledge_dir"])
42
+ source = kb_config.get("source", {})
43
+ cache_dir = Path(source.get("cache_dir", "./.source-cache"))
44
+
45
+ module_name = args.module
46
+ module_dir = knowledge_dir / module_name
47
+
48
+ if source.get("structure") == "monorepo":
49
+ repo_name = source.get("repo_name", "repo")
50
+ module_cfg = next((m for m in source.get("modules", []) if m["name"] == module_name), {})
51
+ module_path = module_cfg.get("path", module_name)
52
+ source_cache = cache_dir / repo_name / module_path
53
+ else:
54
+ source_cache = cache_dir / module_name
55
+
56
+ doc_type = args.doc_type
57
+ dt_mapping = get_doc_type_mapping(preset)
58
+ split_config = SplitConfig.from_preset(preset, mode=args.mode)
59
+
60
+ files = get_file_list_with_stats(module_dir, doc_type, source_cache, dt_mapping)
61
+ if not files:
62
+ print(f"No files found for doc-type '{doc_type}'", file=sys.stderr)
63
+ sys.exit(1)
64
+
65
+ entries = load_skeleton_entries(module_dir)
66
+ plan = plan_splits(
67
+ entries=entries, file_list=files, split_config=split_config,
68
+ doc_type=doc_type, module_dir=module_dir,
69
+ )
70
+
71
+ if plan.recommended_agents <= 1:
72
+ print(f"No split needed for {doc_type} (strategy: {plan.strategy})")
73
+ print(json.dumps({"status": "ok", "splits": 1, "strategy": plan.strategy},
74
+ ensure_ascii=False), file=sys.stderr)
75
+ return
76
+
77
+ fl_dir = file_list_dir(module_dir)
78
+ fl_dir.mkdir(parents=True, exist_ok=True)
79
+
80
+ for old_file in fl_dir.glob(f"{doc_type}-*.txt"):
81
+ old_file.unlink()
82
+
83
+ shard_info = []
84
+ name_to_rel = {f.get("name", ""): f.get("rel_path", f.get("name", "")) for f in files}
85
+
86
+ for i, split in enumerate(plan.splits, 1):
87
+ shard_name = split.get("name", f"shard-{i}")
88
+ if len(shard_name) > 30 or "/" in shard_name:
89
+ shard_name = f"shard-{i}"
90
+ safe_name = shard_name.replace("/", "_").replace("\\", "_").replace(" ", "-").replace(":", "")
91
+ if len(safe_name) > 30:
92
+ safe_name = safe_name[:30]
93
+
94
+ shard_file = fl_dir / f"{doc_type}-{safe_name}.txt"
95
+ shard_files = split.get("files", [])
96
+ lines_out = [name_to_rel.get(fname, fname) for fname in shard_files]
97
+ shard_file.write_text("\n".join(lines_out) + "\n", encoding="utf-8")
98
+ shard_info.append({"shard_name": safe_name, "file_count": len(lines_out), "lines": split.get("lines", 0)})
99
+
100
+ print(f"Split {doc_type} into {len(plan.splits)} shards (strategy: {plan.strategy})")
101
+ for i, info in enumerate(shard_info, 1):
102
+ print(f" {i}. {info['shard_name']} ({info['file_count']} files, {info['lines']} lines)")
103
+
104
+ print(json.dumps({"status": "ok", "doc_type": doc_type, "splits": len(plan.splits),
105
+ "strategy": plan.strategy}, ensure_ascii=False), file=sys.stderr)
106
+
107
+
108
+ def run_split_apply(args: argparse.Namespace) -> None:
109
+ module_dir = Path(args.module_dir)
110
+ doc_type = args.doc_type
111
+ groups_path = Path(args.groups)
112
+
113
+ if not groups_path.exists():
114
+ print(json.dumps({"status": "error", "message": f"Groups file not found: {groups_path}"}))
115
+ sys.exit(1)
116
+
117
+ request_path = module_dir / ".meta" / "split-requests" / f"{doc_type}-grouping-request.json"
118
+ if not request_path.exists():
119
+ print(json.dumps({"status": "error", "message": f"No grouping request found: {request_path}"}))
120
+ sys.exit(1)
121
+
122
+ request = json.loads(request_path.read_text(encoding="utf-8"))
123
+ constraints = request["constraints"]
124
+ all_files = {f["name"] for f in request["files"]}
125
+ file_lookup = {f["name"]: f for f in request["files"]}
126
+
127
+ groups = json.loads(groups_path.read_text(encoding="utf-8"))
128
+ if not isinstance(groups, list):
129
+ print(json.dumps({"status": "error", "message": "Groups must be a JSON array"}))
130
+ sys.exit(1)
131
+
132
+ errors: list[str] = []
133
+ assigned: set[str] = set()
134
+ group_stats: list[dict] = []
135
+
136
+ for i, g in enumerate(groups):
137
+ name = g.get("name", f"group-{i+1}")
138
+ files = g.get("files", [])
139
+ resolved: list[str] = []
140
+ for fname in files:
141
+ if fname in all_files:
142
+ resolved.append(fname)
143
+ else:
144
+ matches = [f for f in all_files if f.endswith(fname)]
145
+ if matches:
146
+ resolved.append(matches[0])
147
+ else:
148
+ errors.append(f"Group '{name}': unknown file '{fname}'")
149
+ for f in resolved:
150
+ if f in assigned:
151
+ errors.append(f"Group '{name}': duplicate file '{f}'")
152
+ assigned.add(f)
153
+ lines = sum(file_lookup.get(f, {}).get("lines", 0) for f in resolved)
154
+ group_stats.append({"name": name, "files": resolved, "file_count": len(resolved), "lines": lines})
155
+
156
+ missing = all_files - assigned
157
+ if missing and constraints.get("all_files_must_be_assigned", True):
158
+ errors.append(f"{len(missing)} files not assigned: {sorted(missing)[:5]}...")
159
+
160
+ max_files = constraints.get("max_files_per_group", 80)
161
+ max_lines = constraints.get("max_lines_per_group", 10000)
162
+ max_ratio = constraints.get("max_imbalance_ratio", 3.0)
163
+
164
+ for gs in group_stats:
165
+ if gs["file_count"] > max_files:
166
+ errors.append(f"Group '{gs['name']}': {gs['file_count']} files > max {max_files}")
167
+ if gs["lines"] > max_lines:
168
+ errors.append(f"Group '{gs['name']}': {gs['lines']} lines > max {max_lines}")
169
+
170
+ if group_stats:
171
+ line_counts = [gs["lines"] for gs in group_stats if gs["lines"] > 0]
172
+ if line_counts and max(line_counts) / max(min(line_counts), 1) > max_ratio:
173
+ errors.append(f"Imbalance ratio {max(line_counts)/max(min(line_counts),1):.1f}x > {max_ratio}x")
174
+
175
+ if errors:
176
+ print(json.dumps({"status": "error", "errors": errors}, ensure_ascii=False))
177
+ sys.exit(1)
178
+
179
+ shards_dir = module_dir / ".meta" / "shards"
180
+ shards_dir.mkdir(parents=True, exist_ok=True)
181
+
182
+ for i, gs in enumerate(group_stats, 1):
183
+ shard_path = shards_dir / f"{doc_type}-shard-{i:02d}.txt"
184
+ rel_paths = []
185
+ for fname in gs["files"]:
186
+ entry = file_lookup.get(fname, {})
187
+ rel_paths.append(entry.get("rel_path", fname) if "rel_path" in entry else fname)
188
+ shard_path.write_text("\n".join(rel_paths) + "\n", encoding="utf-8")
189
+
190
+ request_path.unlink(missing_ok=True)
191
+
192
+ print(json.dumps({
193
+ "status": "ok", "doc_type": doc_type, "shards": len(group_stats),
194
+ "groups": [{"name": gs["name"], "file_count": gs["file_count"], "lines": gs["lines"]}
195
+ for gs in group_stats],
196
+ }, ensure_ascii=False))
@@ -0,0 +1,98 @@
1
+ """source-kb stale-files — Detect stale/orphaned documentation files."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ import time
9
+ from pathlib import Path
10
+
11
+
12
+ def register(subparsers: argparse._SubParsersAction) -> None:
13
+ p = subparsers.add_parser("stale-files", help="Detect stale docs not in dispatch plan")
14
+ p.add_argument("--config", help="kb-project.yaml path")
15
+ p.add_argument("--kb", required=True, help="Knowledge base name")
16
+ p.add_argument("--module", required=True, help="Module name")
17
+ p.set_defaults(func=run)
18
+
19
+
20
+ def run(args: argparse.Namespace) -> None:
21
+ from core.config import load_config
22
+ from core.preset import load_preset, get_doc_type_mapping
23
+ from core.skeleton.dispatch import compute_dispatch_plan
24
+
25
+ config = load_config(Path(args.config) if args.config else None)
26
+ kb_config = config.get_kb(args.kb)
27
+ preset_name = kb_config.get("preset", "generic")
28
+ preset = load_preset(preset_name)
29
+
30
+ knowledge_dir = Path(kb_config["knowledge_dir"])
31
+ source = kb_config.get("source", {})
32
+ cache_dir = Path(source.get("cache_dir", "./.source-cache"))
33
+
34
+ module_name = args.module
35
+ module_dir = knowledge_dir / module_name
36
+
37
+ if source.get("structure") == "monorepo":
38
+ repo_name = source.get("repo_name", "repo")
39
+ module_cfg = next((m for m in source.get("modules", []) if m["name"] == module_name), {})
40
+ module_path = module_cfg.get("path", module_name)
41
+ source_cache = cache_dir / repo_name / module_path
42
+ else:
43
+ source_cache = cache_dir / module_name
44
+
45
+ module_type = "service"
46
+ repos = source.get("repos", [])
47
+ for repo in repos:
48
+ if repo.get("name") == module_name:
49
+ module_type = repo.get("type", "service")
50
+ break
51
+
52
+ plan = compute_dispatch_plan(
53
+ preset=preset, module_dir=module_dir, source_cache=source_cache,
54
+ mode="readwrite", module_name=module_name, module_type=module_type,
55
+ )
56
+
57
+ expected_files = {e.doc_filename for e in plan.entries}
58
+ doc_types_cfg = preset.get("doc_types", {})
59
+ for dt_key, dt_config in doc_types_cfg.items():
60
+ if isinstance(dt_config, dict) and not dt_config.get("conditional", True):
61
+ expected_files.add(dt_config.get("filename", f"{dt_key}.md"))
62
+
63
+ existing_files: list[Path] = []
64
+ if module_dir.is_dir():
65
+ existing_files = [f for f in module_dir.glob("*.md")
66
+ if not f.name.startswith(".") and f.name.lower() != "readme.md"]
67
+
68
+ stale: list[dict] = []
69
+ doc_type_filenames = set(get_doc_type_mapping(preset).values())
70
+ module_types_config = preset.get("module_types", {})
71
+ type_config = module_types_config.get(module_type, {})
72
+ skip_docs = set(type_config.get("skip", []))
73
+
74
+ for f in existing_files:
75
+ if f.name not in expected_files:
76
+ mtime = f.stat().st_mtime
77
+ mtime_str = time.strftime("%Y-%m-%d %H:%M", time.localtime(mtime))
78
+ size_kb = round(f.stat().st_size / 1024, 1)
79
+
80
+ reason = "not in current dispatch plan"
81
+ if f.name in skip_docs:
82
+ reason = f"skipped for {module_type} module type"
83
+ elif f.name in doc_type_filenames:
84
+ reason = "file classification did not trigger this doc"
85
+
86
+ stale.append({"file": f.name, "size_kb": size_kb, "last_modified": mtime_str, "reason": reason})
87
+
88
+ if stale:
89
+ print(f"Found {len(stale)} stale/orphaned files:\n")
90
+ print("| File | Size | Last Modified | Reason |")
91
+ print("|------|------|---------------|--------|")
92
+ for s in stale:
93
+ print(f"| {s['file']} | {s['size_kb']}KB | {s['last_modified']} | {s['reason']} |")
94
+ else:
95
+ print("No stale files found. All docs are in the dispatch plan.")
96
+
97
+ print(json.dumps({"status": "ok", "stale_count": len(stale),
98
+ "stale_files": [s["file"] for s in stale]}, ensure_ascii=False), file=sys.stderr)