source-kb 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. cli/__init__.py +50 -0
  2. cli/__main__.py +5 -0
  3. cli/commands/__init__.py +1 -0
  4. cli/commands/anchor_fix.py +47 -0
  5. cli/commands/diff_doc.py +52 -0
  6. cli/commands/dispatch.py +77 -0
  7. cli/commands/extract.py +72 -0
  8. cli/commands/file_list.py +74 -0
  9. cli/commands/index.py +84 -0
  10. cli/commands/lock.py +89 -0
  11. cli/commands/merge.py +60 -0
  12. cli/commands/merge_delta.py +19 -0
  13. cli/commands/metadata.py +24 -0
  14. cli/commands/pipeline.py +45 -0
  15. cli/commands/post_merge.py +43 -0
  16. cli/commands/query.py +52 -0
  17. cli/commands/render.py +101 -0
  18. cli/commands/scan_repos.py +46 -0
  19. cli/commands/setup.py +94 -0
  20. cli/commands/split.py +196 -0
  21. cli/commands/stale_files.py +98 -0
  22. cli/commands/validate.py +191 -0
  23. core/__init__.py +32 -0
  24. core/config.py +261 -0
  25. core/docs/__init__.py +7 -0
  26. core/docs/section_updater.py +286 -0
  27. core/docs/shared.py +149 -0
  28. core/git.py +294 -0
  29. core/interfaces.py +249 -0
  30. core/monitor/__init__.py +5 -0
  31. core/monitor/progress.py +83 -0
  32. core/monitor/prompt_store.py +49 -0
  33. core/paths.py +141 -0
  34. core/preset.py +237 -0
  35. core/preset_accessors.py +202 -0
  36. core/preset_classify.py +132 -0
  37. core/preset_hooks.py +129 -0
  38. core/preset_profile.py +89 -0
  39. core/prompt/__init__.py +7 -0
  40. core/prompt/__main__.py +147 -0
  41. core/prompt/content.py +320 -0
  42. core/prompt/context_manager.py +164 -0
  43. core/prompt/renderer.py +236 -0
  44. core/prompt/response_parser.py +274 -0
  45. core/prompt/templates.py +357 -0
  46. core/prompt/validate_parity.py +162 -0
  47. core/prompt/variables.py +339 -0
  48. core/rag/__init__.py +22 -0
  49. core/rag/__main__.py +136 -0
  50. core/rag/bm25_index.py +268 -0
  51. core/rag/chunker.py +273 -0
  52. core/rag/embedder.py +151 -0
  53. core/rag/indexer.py +292 -0
  54. core/rag/loader.py +89 -0
  55. core/rag/retriever.py +82 -0
  56. core/skeleton/__init__.py +11 -0
  57. core/skeleton/__main__.py +934 -0
  58. core/skeleton/anchor_fix.py +250 -0
  59. core/skeleton/classify.py +331 -0
  60. core/skeleton/cmd_anchor_fix.py +43 -0
  61. core/skeleton/cmd_diff_doc.py +44 -0
  62. core/skeleton/cmd_lock.py +87 -0
  63. core/skeleton/cmd_merge_delta.py +41 -0
  64. core/skeleton/community.py +233 -0
  65. core/skeleton/dependency_graph.py +306 -0
  66. core/skeleton/diff_doc.py +248 -0
  67. core/skeleton/dispatch.py +273 -0
  68. core/skeleton/dispatch_render.py +319 -0
  69. core/skeleton/dispatch_source.py +111 -0
  70. core/skeleton/extract.py +218 -0
  71. core/skeleton/extract_methods.py +298 -0
  72. core/skeleton/file_list.py +239 -0
  73. core/skeleton/impact.py +278 -0
  74. core/skeleton/jar_download.py +177 -0
  75. core/skeleton/jar_resolver.py +186 -0
  76. core/skeleton/loader.py +162 -0
  77. core/skeleton/merge.py +278 -0
  78. core/skeleton/merge_delta.py +229 -0
  79. core/skeleton/metadata.py +96 -0
  80. core/skeleton/metadata_builders.py +264 -0
  81. core/skeleton/module_dag.py +330 -0
  82. core/skeleton/parsers/__init__.py +71 -0
  83. core/skeleton/parsers/jqassistant.py +300 -0
  84. core/skeleton/parsers/jqassistant_cypher.py +225 -0
  85. core/skeleton/parsers/regex.py +171 -0
  86. core/skeleton/parsers/treesitter.py +324 -0
  87. core/skeleton/parsers/treesitter_java.py +284 -0
  88. core/skeleton/parsers/treesitter_multi.py +289 -0
  89. core/skeleton/pom_parser.py +299 -0
  90. core/skeleton/post_merge.py +295 -0
  91. core/skeleton/post_merge_llm.py +82 -0
  92. core/skeleton/query.py +195 -0
  93. core/skeleton/shard_context.py +177 -0
  94. core/skeleton/split.py +180 -0
  95. core/skeleton/split_cache.py +107 -0
  96. core/skeleton/split_feedback.py +174 -0
  97. core/skeleton/split_plan.py +219 -0
  98. core/skeleton/split_plan_helpers.py +305 -0
  99. core/skeleton/split_plan_llm.py +274 -0
  100. core/utils.py +135 -0
  101. core/validators/__init__.py +65 -0
  102. core/validators/__main__.py +215 -0
  103. core/validators/consistency.py +203 -0
  104. core/validators/coverage.py +171 -0
  105. core/validators/duplicates.py +76 -0
  106. core/validators/engine.py +224 -0
  107. core/validators/links.py +76 -0
  108. core/validators/sampling.py +169 -0
  109. core/validators/structure.py +144 -0
  110. engine/__init__.py +7 -0
  111. engine/assembler.py +231 -0
  112. engine/confirm.py +65 -0
  113. engine/dedup.py +106 -0
  114. engine/main.py +211 -0
  115. engine/pipeline/__init__.py +163 -0
  116. engine/pipeline/recovery.py +250 -0
  117. engine/pipeline/steps/__init__.py +23 -0
  118. engine/pipeline/steps/audit.py +220 -0
  119. engine/pipeline/steps/audit_apply.py +195 -0
  120. engine/pipeline/steps/audit_helpers.py +155 -0
  121. engine/pipeline/steps/classify_llm.py +236 -0
  122. engine/pipeline/steps/classify_prompt.py +223 -0
  123. engine/pipeline/steps/finalize.py +160 -0
  124. engine/pipeline/steps/generate.py +169 -0
  125. engine/pipeline/steps/generate_batch.py +197 -0
  126. engine/pipeline/steps/generate_recovery.py +170 -0
  127. engine/pipeline/steps/llm_plan_split.py +253 -0
  128. engine/pipeline/steps/lock.py +64 -0
  129. engine/pipeline/steps/preflight.py +237 -0
  130. engine/pipeline/steps/preflight_adjust.py +147 -0
  131. engine/pipeline/steps/pregenerate.py +130 -0
  132. engine/pipeline/steps/quality.py +81 -0
  133. engine/pipeline/steps/skeleton.py +149 -0
  134. engine/pipeline/steps/source.py +163 -0
  135. engine/pipeline/steps/sync.py +117 -0
  136. engine/pipeline/steps/sync_finalize.py +237 -0
  137. engine/pipeline/steps/sync_update.py +341 -0
  138. engine/pipelines.py +91 -0
  139. engine/runner.py +335 -0
  140. engine/strategies/__init__.py +86 -0
  141. engine/strategies/api.py +128 -0
  142. engine/strategies/delegated.py +50 -0
  143. engine/strategies/dryrun.py +25 -0
  144. engine/two_phase.py +143 -0
  145. mcp_server/__init__.py +73 -0
  146. mcp_server/__main__.py +5 -0
  147. mcp_server/tools/__init__.py +1 -0
  148. mcp_server/tools/config.py +63 -0
  149. mcp_server/tools/discovery.py +276 -0
  150. mcp_server/tools/generation.py +184 -0
  151. mcp_server/tools/planning.py +144 -0
  152. mcp_server/tools/source.py +175 -0
  153. mcp_server/tools/validation.py +140 -0
  154. mcp_server/tools/workflow.py +166 -0
  155. mcp_server/workflow_loader.py +204 -0
  156. presets/generic/audit_dimensions.md +132 -0
  157. presets/generic/doc_types.yaml +152 -0
  158. presets/generic/preset.yaml +115 -0
  159. presets/java-spring/audit_dimensions.md +228 -0
  160. presets/java-spring/audit_dimensions.yaml +203 -0
  161. presets/java-spring/doc_types.yaml +269 -0
  162. presets/java-spring/hooks.py +122 -0
  163. presets/java-spring/preset.yaml +341 -0
  164. presets/java-spring/templates/README.md +34 -0
  165. presets/java-spring/templates/audit-system.md +15 -0
  166. presets/java-spring/templates/subagent-aop.md +105 -0
  167. presets/java-spring/templates/subagent-api.md +63 -0
  168. presets/java-spring/templates/subagent-architecture.md +111 -0
  169. presets/java-spring/templates/subagent-async-events.md +107 -0
  170. presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
  171. presets/java-spring/templates/subagent-audit-architecture.md +38 -0
  172. presets/java-spring/templates/subagent-audit-business.md +40 -0
  173. presets/java-spring/templates/subagent-audit-data-models.md +40 -0
  174. presets/java-spring/templates/subagent-business.md +129 -0
  175. presets/java-spring/templates/subagent-caching.md +75 -0
  176. presets/java-spring/templates/subagent-database-access.md +114 -0
  177. presets/java-spring/templates/subagent-enum.md +75 -0
  178. presets/java-spring/templates/subagent-error-handling.md +91 -0
  179. presets/java-spring/templates/subagent-external-integrations.md +80 -0
  180. presets/java-spring/templates/subagent-index.md +122 -0
  181. presets/java-spring/templates/subagent-messaging.md +97 -0
  182. presets/java-spring/templates/subagent-model.md +88 -0
  183. presets/java-spring/templates/subagent-observability.md +91 -0
  184. presets/java-spring/templates/subagent-scheduled.md +81 -0
  185. presets/java-spring/templates/subagent-security.md +102 -0
  186. presets/java-spring/templates/subagent-structure.md +101 -0
  187. presets/java-spring/templates/subagent-sync-section.md +34 -0
  188. presets/java-spring/templates/subagent-utils.md +73 -0
  189. presets/java-spring/templates/sync-system.md +8 -0
  190. presets/java-spring/workflow-extensions.md +112 -0
  191. skills/__init__.py +1 -0
  192. skills/_shared/README.md +30 -0
  193. skills/_shared/doc-coverage-shared.md +134 -0
  194. skills/_shared/doc-quality-standard.md +1058 -0
  195. skills/_shared/doc-subagent-rules.md +762 -0
  196. skills/_shared/windows-compat.md +89 -0
  197. skills/kb-audit/SKILL.md +52 -0
  198. skills/kb-audit/rules.md +88 -0
  199. skills/kb-audit/steps/step-01-prepare.md +75 -0
  200. skills/kb-audit/steps/step-02-audit.md +96 -0
  201. skills/kb-audit/steps/step-03-verify.md +65 -0
  202. skills/kb-audit/steps/step-04-report.md +64 -0
  203. skills/kb-init/SKILL.md +146 -0
  204. skills/kb-init/rules.md +187 -0
  205. skills/kb-init/steps/step-01-scope.md +62 -0
  206. skills/kb-init/steps/step-02-source.md +410 -0
  207. skills/kb-init/steps/step-03-generate.md +307 -0
  208. skills/kb-init/steps/step-04-quality.md +92 -0
  209. skills/kb-init/steps/step-05-finalize.md +132 -0
  210. skills/kb-init/templates/core/execution-modes.md +29 -0
  211. skills/kb-init/templates/core/output-only.md +4 -0
  212. skills/kb-init/templates/core/readwrite.md +33 -0
  213. skills/kb-search/SKILL.md +138 -0
  214. skills/kb-search/rules.md +64 -0
  215. skills/kb-sync/SKILL.md +43 -0
  216. skills/kb-sync/rules.md +70 -0
  217. skills/kb-sync/scripts/rebuild_module.py +91 -0
  218. skills/kb-sync/scripts/scan_repos.py +687 -0
  219. skills/kb-sync/steps/step-01-detect.md +72 -0
  220. skills/kb-sync/steps/step-02-update.md +71 -0
  221. skills/kb-sync/steps/step-03-verify.md +47 -0
  222. skills/kb-sync/steps/step-04-finalize.md +52 -0
  223. source_kb-0.2.2.dist-info/METADATA +194 -0
  224. source_kb-0.2.2.dist-info/RECORD +228 -0
  225. source_kb-0.2.2.dist-info/WHEEL +5 -0
  226. source_kb-0.2.2.dist-info/entry_points.txt +3 -0
  227. source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
  228. source_kb-0.2.2.dist-info/top_level.txt +6 -0
@@ -0,0 +1,191 @@
1
+ """source-kb validate — Document validation (coverage, consistency, links, structure, sampling)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+
11
+ def register(subparsers: argparse._SubParsersAction) -> None:
12
+ p = subparsers.add_parser("validate", help="Run validation checks on knowledge base docs")
13
+ vsub = p.add_subparsers(dest="validator")
14
+
15
+ # coverage
16
+ c = vsub.add_parser("coverage", help="Coverage check")
17
+ c.add_argument("action", nargs="?", default="check", choices=["check"])
18
+ c.add_argument("--skeleton", help="Skeleton JSON path")
19
+ c.add_argument("--skeleton-dir", help="Skeleton shards directory")
20
+ c.add_argument("--module-dir", help="Module directory")
21
+ c.add_argument("--docs-dir", help="Documents directory")
22
+ c.add_argument("--type", default="service", help="Module type")
23
+ c.set_defaults(func=run_coverage)
24
+
25
+ # consistency
26
+ c = vsub.add_parser("consistency", help="Consistency and progress check")
27
+ c.add_argument("--module-dir", required=True, help="Module directory")
28
+ c.add_argument("--preset", help="Preset name")
29
+ c.add_argument("--source-cache", help="Source cache path")
30
+ c.add_argument("--cleanup", action="store_true", help="Clean up progress files")
31
+ c.set_defaults(func=run_consistency)
32
+
33
+ # links
34
+ c = vsub.add_parser("links", help="Link and reference health check")
35
+ c.add_argument("--module-dir", required=True, help="Module directory")
36
+ c.add_argument("--config", help="kb-project.yaml path")
37
+ c.set_defaults(func=run_links)
38
+
39
+ # structure
40
+ c = vsub.add_parser("structure", help="Document structure validation")
41
+ c.add_argument("--module-dir", required=True, help="Module directory")
42
+ c.add_argument("--skeleton-dir", help="Skeleton directory for comparison")
43
+ c.set_defaults(func=run_structure)
44
+
45
+ # sampling
46
+ c = vsub.add_parser("sampling", help="Numeric sampling validation")
47
+ c.add_argument("--module-dir", required=True, help="Module directory")
48
+ c.set_defaults(func=run_sampling)
49
+
50
+ p.set_defaults(func=_no_validator)
51
+
52
+
53
+ def _no_validator(args: argparse.Namespace) -> None:
54
+ print("Error: specify a validator (coverage, consistency, links, structure, sampling)",
55
+ file=sys.stderr)
56
+ sys.exit(1)
57
+
58
+
59
+ def run_coverage(args: argparse.Namespace) -> None:
60
+ from core.validators.coverage import CoverageValidator
61
+
62
+ module_dir = Path(args.module_dir) if args.module_dir else None
63
+ skeleton_path = Path(args.skeleton) if args.skeleton else None
64
+ skeleton_dir = Path(args.skeleton_dir) if args.skeleton_dir else None
65
+ docs_dir = Path(args.docs_dir) if args.docs_dir else module_dir
66
+
67
+ if not docs_dir:
68
+ print("Error: --docs-dir or --module-dir required", file=sys.stderr)
69
+ sys.exit(1)
70
+
71
+ validator = CoverageValidator()
72
+ result = validator.validate(
73
+ docs_dir,
74
+ skeleton_path=str(skeleton_path) if skeleton_path else None,
75
+ skeleton_dir=str(skeleton_dir) if skeleton_dir else None,
76
+ module_type=args.type or "service",
77
+ )
78
+
79
+ if result.errors:
80
+ print("ERRORS:")
81
+ for e in result.errors:
82
+ print(f" {e}")
83
+ if result.warnings:
84
+ print("WARNINGS:")
85
+ for w in result.warnings:
86
+ print(f" {w}")
87
+
88
+ status = "ok" if result.passed else "fail"
89
+ print(json.dumps({"status": status, "errors": len(result.errors),
90
+ "warnings": len(result.warnings)}, ensure_ascii=False), file=sys.stderr)
91
+
92
+
93
+ def run_consistency(args: argparse.Namespace) -> None:
94
+ from core.validators.consistency import ConsistencyValidator
95
+ from core.monitor.progress import check_progress, cleanup_progress
96
+
97
+ module_dir = Path(args.module_dir)
98
+
99
+ if args.cleanup:
100
+ cleaned = cleanup_progress(module_dir)
101
+ print(f"Cleaned {cleaned} progress files")
102
+ print(json.dumps({"status": "ok", "cleaned": cleaned}, ensure_ascii=False), file=sys.stderr)
103
+ return
104
+
105
+ progress = check_progress(module_dir)
106
+ if progress:
107
+ print("Progress status:")
108
+ for doc_name, doc_status in progress.items():
109
+ print(f" {doc_name}: {doc_status}")
110
+
111
+ validator = ConsistencyValidator()
112
+ source_cache = Path(args.source_cache) if args.source_cache else None
113
+ result = validator.validate(module_dir, source_cache=source_cache)
114
+
115
+ if result.warnings:
116
+ print("\nConsistency warnings:")
117
+ for w in result.warnings:
118
+ print(f" {w}")
119
+
120
+ status = "ok" if result.passed else "fail"
121
+ print(json.dumps({"status": status, "errors": len(result.errors),
122
+ "warnings": len(result.warnings),
123
+ "progress": progress or {}}, ensure_ascii=False), file=sys.stderr)
124
+
125
+
126
+ def run_links(args: argparse.Namespace) -> None:
127
+ from core.validators.links import LinksValidator
128
+
129
+ module_dir = Path(args.module_dir)
130
+ validator = LinksValidator()
131
+ result = validator.validate(module_dir)
132
+
133
+ if result.errors:
134
+ print("Broken links:")
135
+ for e in result.errors:
136
+ print(f" {e}")
137
+ if result.warnings:
138
+ for w in result.warnings:
139
+ print(f" {w}")
140
+
141
+ status = "ok" if result.passed else "fail"
142
+ print(json.dumps({"status": status, "errors": len(result.errors),
143
+ "warnings": len(result.warnings)}, ensure_ascii=False), file=sys.stderr)
144
+
145
+
146
+ def run_structure(args: argparse.Namespace) -> None:
147
+ from core.validators.structure import StructureValidator
148
+
149
+ module_dir = Path(args.module_dir)
150
+ skeleton_dir = Path(args.skeleton_dir) if args.skeleton_dir else None
151
+
152
+ validator = StructureValidator()
153
+ result = validator.validate(module_dir, skeleton_dir=skeleton_dir)
154
+
155
+ if result.errors:
156
+ print("Structure errors:")
157
+ for e in result.errors:
158
+ print(f" {e}")
159
+ if result.warnings:
160
+ for w in result.warnings:
161
+ print(f" {w}")
162
+
163
+ status = "ok" if result.passed else "fail"
164
+ print(json.dumps({"status": status, "errors": len(result.errors),
165
+ "warnings": len(result.warnings)}, ensure_ascii=False), file=sys.stderr)
166
+
167
+
168
+ def run_sampling(args: argparse.Namespace) -> None:
169
+ from core.validators.sampling import SamplingValidator
170
+
171
+ module_dir = Path(args.module_dir)
172
+ validator = SamplingValidator()
173
+ result = validator.validate(module_dir)
174
+
175
+ if result.warnings:
176
+ print("Sampling warnings:")
177
+ for w in result.warnings:
178
+ print(f" {w}")
179
+
180
+ hit_count = 10 - len(result.warnings)
181
+ total = 10
182
+ hit_rate = round(hit_count / total * 100, 1) if total > 0 else 100.0
183
+
184
+ if not result.warnings:
185
+ print(f"Sampling passed (hit rate {hit_rate}%)")
186
+ else:
187
+ print(f"Sampling hit rate {hit_rate}% ({len(result.warnings)} misses)")
188
+
189
+ status = "ok" if result.passed else "warn"
190
+ print(json.dumps({"status": status, "warnings": len(result.warnings),
191
+ "hit_rate_pct": hit_rate}, ensure_ascii=False), file=sys.stderr)
core/__init__.py ADDED
@@ -0,0 +1,32 @@
1
+ """core — shared algorithm library for source-kb.
2
+
3
+ This package provides pure functions and tools for knowledge base generation.
4
+ It has no opinion on pipeline orchestration, no global state, and no dependency
5
+ on cli/ or skill/ packages.
6
+
7
+ Layers:
8
+ core.config — Configuration loading and validation
9
+ core.git — Git operations (clone, fetch, diff)
10
+ core.paths — Canonical path resolution
11
+ core.preset — Preset loading and file classification
12
+ core.skeleton — Skeleton extraction, file lists, split planning, merge
13
+ core.prompt — Prompt rendering and content assembly
14
+ core.validators — Document quality validators
15
+ core.rag — Vector index (chunking, embedding, retrieval)
16
+ core.monitor — Progress monitoring and heartbeat detection
17
+ core.docs — Shared document generation
18
+ """
19
+
20
+ import logging
21
+
22
+ __version__ = "0.2.2"
23
+
24
+
25
+ def setup_logging(level: int = logging.INFO) -> None:
26
+ """Configure logging for the core package."""
27
+ handler = logging.StreamHandler()
28
+ handler.setFormatter(logging.Formatter("[%(name)s] %(message)s"))
29
+ root = logging.getLogger("core")
30
+ root.setLevel(level)
31
+ if not root.handlers:
32
+ root.addHandler(handler)
core/config.py ADDED
@@ -0,0 +1,261 @@
1
+ """Configuration loading, validation, and typed access.
2
+
3
+ Merges the responsibilities of engine/config_loader.py and engine/runtime_config.py
4
+ into a single module with no global state. The caller loads config once and passes
5
+ the resulting Config object explicitly.
6
+
7
+ Usage:
8
+ from core.config import load_config, find_config
9
+ config = load_config(find_config())
10
+ kb = config.get_kb("my-kb")
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import copy
16
+ import os
17
+ from dataclasses import dataclass, field
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ import yaml
22
+
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Config dataclass
26
+ # ---------------------------------------------------------------------------
27
+
28
+
29
+ @dataclass
30
+ class Config:
31
+ """Typed, immutable-ish configuration loaded from kb-project.yaml."""
32
+
33
+ raw: dict[str, Any]
34
+ config_path: Path
35
+
36
+ # --- Knowledge bases ---
37
+
38
+ @property
39
+ def kb_names(self) -> list[str]:
40
+ return list(self.raw.get("knowledge_bases", {}).keys())
41
+
42
+ def get_kb(self, name: str) -> dict[str, Any]:
43
+ kbs = self.raw.get("knowledge_bases", {})
44
+ if name not in kbs:
45
+ available = ", ".join(kbs.keys()) if kbs else "none"
46
+ raise KeyError(f"Knowledge base '{name}' not found. Available: {available}")
47
+ return kbs[name]
48
+
49
+ # --- Embedding ---
50
+
51
+ @property
52
+ def embed_backend(self) -> str:
53
+ return self.raw.get("embedding", {}).get("backend", "ollama")
54
+
55
+ @property
56
+ def embed_model(self) -> str:
57
+ return self.raw.get("embedding", {}).get("model", "bge-m3")
58
+
59
+ @property
60
+ def embed_base_url(self) -> str:
61
+ return self.raw.get("embedding", {}).get("base_url", "http://localhost:11434")
62
+
63
+ @property
64
+ def embed_api_key(self) -> str:
65
+ return os.getenv("EMBEDDING_API_KEY", "") or self.raw.get("embedding", {}).get("api_key", "")
66
+
67
+ # --- Chunking ---
68
+
69
+ @property
70
+ def chunk_size(self) -> int:
71
+ return self.raw.get("chunking", {}).get("size", 512)
72
+
73
+ @property
74
+ def chunk_overlap(self) -> int:
75
+ return self.raw.get("chunking", {}).get("overlap", 100)
76
+
77
+ @property
78
+ def chunk_min_length(self) -> int:
79
+ return self.raw.get("chunking", {}).get("min_length", 50)
80
+
81
+ # --- Retrieval ---
82
+
83
+ @property
84
+ def top_k(self) -> int:
85
+ return self.raw.get("retrieval", {}).get("top_k", 10)
86
+
87
+ @property
88
+ def similarity_threshold(self) -> float:
89
+ return self.raw.get("retrieval", {}).get("similarity_threshold", 0.6)
90
+
91
+ # --- Agent / LLM ---
92
+
93
+ @property
94
+ def agent_model(self) -> str:
95
+ return os.getenv("LLM_MODEL", "") or self.raw.get("agent", {}).get("model", "")
96
+
97
+ @property
98
+ def agent_api_key(self) -> str:
99
+ return os.getenv("LLM_API_KEY", "") or self.raw.get("agent", {}).get("api_key", "")
100
+
101
+ @property
102
+ def agent_base_url(self) -> str:
103
+ return os.getenv("LLM_BASE_URL", "") or self.raw.get("agent", {}).get("base_url", "")
104
+
105
+ @property
106
+ def agent_timeout(self) -> int:
107
+ env = os.getenv("KB_AGENT_TIMEOUT", "")
108
+ if env:
109
+ try:
110
+ return int(env)
111
+ except ValueError:
112
+ pass
113
+ return self.raw.get("agent", {}).get("subagent_timeout", 900)
114
+
115
+ @property
116
+ def agent_max_concurrent(self) -> int:
117
+ env = os.getenv("KB_AGENT_MAX_CONCURRENT", "")
118
+ if env:
119
+ try:
120
+ return int(env)
121
+ except ValueError:
122
+ pass
123
+ return self.raw.get("agent", {}).get("max_concurrent_subagents", 5)
124
+
125
+ @property
126
+ def agent_backend(self) -> str:
127
+ """Determine execution backend: 'delegated' / 'dry-run' / 'api'."""
128
+ model = self.agent_model
129
+ if model == "delegated":
130
+ return "delegated"
131
+ if model == "dry-run":
132
+ return "dry-run"
133
+ return "api"
134
+
135
+ # --- Maven ---
136
+
137
+ def maven_auth(self, kb_name: str | None = None) -> str:
138
+ env = os.getenv("KB_MAVEN_AUTH", "")
139
+ if env:
140
+ return env
141
+ if kb_name:
142
+ kb = self.get_kb(kb_name)
143
+ auth = kb.get("maven_repo", {}).get("auth", "")
144
+ if auth:
145
+ return auth
146
+ for kb in self.raw.get("knowledge_bases", {}).values():
147
+ auth = kb.get("maven_repo", {}).get("auth", "")
148
+ if auth:
149
+ return auth
150
+ return ""
151
+
152
+ # --- ChromaDB ---
153
+
154
+ def chroma_dir(self, kb_name: str | None = None) -> Path:
155
+ if kb_name:
156
+ kb = self.get_kb(kb_name)
157
+ return Path(kb["knowledge_dir"]).parent / "chroma_db"
158
+ for kb in self.raw.get("knowledge_bases", {}).values():
159
+ return Path(kb["knowledge_dir"]).parent / "chroma_db"
160
+ return Path("./chroma_db")
161
+
162
+
163
+ # ---------------------------------------------------------------------------
164
+ # Loading functions
165
+ # ---------------------------------------------------------------------------
166
+
167
+
168
+ def find_config(start_dir: Path | None = None) -> Path:
169
+ """Search upward for kb-project.yaml. Raises FileNotFoundError if not found."""
170
+ current = (start_dir or Path.cwd()).resolve()
171
+ while True:
172
+ candidate = current / "kb-project.yaml"
173
+ if candidate.exists():
174
+ return candidate
175
+ parent = current.parent
176
+ if parent == current:
177
+ raise FileNotFoundError(f"kb-project.yaml not found (searched from {start_dir or Path.cwd()})")
178
+ current = parent
179
+
180
+
181
+ def load_config(config_path: Path | None = None) -> Config:
182
+ """Load, validate, and resolve paths in kb-project.yaml. Returns Config instance."""
183
+ if config_path is None:
184
+ config_path = find_config()
185
+ config_path = Path(config_path)
186
+ if not config_path.exists():
187
+ raise FileNotFoundError(f"Config file does not exist: {config_path}")
188
+
189
+ with open(config_path, encoding="utf-8") as f:
190
+ raw = yaml.safe_load(f)
191
+
192
+ errors = validate_config(raw)
193
+ if errors:
194
+ msg = "\n".join(f" - {e}" for e in errors)
195
+ raise ValueError(f"Config validation failed:\n{msg}")
196
+
197
+ resolved = _resolve_paths(raw, config_path.parent)
198
+ return Config(raw=resolved, config_path=config_path)
199
+
200
+
201
+ def validate_config(config: dict) -> list[str]:
202
+ """Validate configuration. Returns list of error strings (empty = valid)."""
203
+ errors: list[str] = []
204
+ if config.get("version") != 1:
205
+ errors.append("version must be 1")
206
+
207
+ kbs = config.get("knowledge_bases", {})
208
+ if not kbs:
209
+ errors.append("knowledge_bases must contain at least one entry")
210
+ return errors
211
+
212
+ collections: dict[str, str] = {}
213
+ for kb_name, kb_cfg in kbs.items():
214
+ coll = kb_cfg.get("collection")
215
+ if not coll:
216
+ errors.append(f"knowledge_bases.{kb_name}.collection is required")
217
+ elif coll in collections:
218
+ errors.append(f"collection '{coll}' duplicated: {collections[coll]} and {kb_name}")
219
+ else:
220
+ collections[coll] = kb_name
221
+
222
+ source = kb_cfg.get("source", {})
223
+ structure = source.get("structure")
224
+ if structure not in ("multi-repo", "monorepo"):
225
+ errors.append(f"knowledge_bases.{kb_name}.source.structure must be multi-repo or monorepo")
226
+
227
+ if not kb_cfg.get("preset"):
228
+ errors.append(f"knowledge_bases.{kb_name}.preset is required")
229
+
230
+ return errors
231
+
232
+
233
+ # ---------------------------------------------------------------------------
234
+ # Internal helpers
235
+ # ---------------------------------------------------------------------------
236
+
237
+
238
+ def _resolve_paths(config: dict, base_dir: Path) -> dict:
239
+ """Resolve relative paths to absolute. Returns a deep copy."""
240
+ config = copy.deepcopy(config)
241
+ base_dir = base_dir.resolve()
242
+
243
+ for _kb_name, kb_cfg in config.get("knowledge_bases", {}).items():
244
+ if "knowledge_dir" in kb_cfg:
245
+ kb_cfg["knowledge_dir"] = str(_resolve(kb_cfg["knowledge_dir"], base_dir))
246
+ source = kb_cfg.get("source", {})
247
+ if "cache_dir" in source:
248
+ source["cache_dir"] = str(_resolve(source["cache_dir"], base_dir))
249
+ if source.get("structure") == "multi-repo":
250
+ for repo in source.get("repos", []):
251
+ if "local" in repo:
252
+ repo["local"] = str(_resolve(repo["local"], base_dir))
253
+ return config
254
+
255
+
256
+ def _resolve(path_str: str, base_dir: Path) -> Path:
257
+ """Resolve a single path relative to base_dir."""
258
+ p = Path(path_str).expanduser()
259
+ if not p.is_absolute():
260
+ p = (base_dir / p).resolve()
261
+ return p
core/docs/__init__.py ADDED
@@ -0,0 +1,7 @@
1
+ """core.docs — shared document generation.
2
+
3
+ Generates cross-module summary documents in _shared/ directory:
4
+ - project-overview.md
5
+ - cross-module-calls.md
6
+ - error-codes.md (if applicable)
7
+ """