source-kb 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +50 -0
- cli/__main__.py +5 -0
- cli/commands/__init__.py +1 -0
- cli/commands/anchor_fix.py +47 -0
- cli/commands/diff_doc.py +52 -0
- cli/commands/dispatch.py +77 -0
- cli/commands/extract.py +72 -0
- cli/commands/file_list.py +74 -0
- cli/commands/index.py +84 -0
- cli/commands/lock.py +89 -0
- cli/commands/merge.py +60 -0
- cli/commands/merge_delta.py +19 -0
- cli/commands/metadata.py +24 -0
- cli/commands/pipeline.py +45 -0
- cli/commands/post_merge.py +43 -0
- cli/commands/query.py +52 -0
- cli/commands/render.py +101 -0
- cli/commands/scan_repos.py +46 -0
- cli/commands/setup.py +94 -0
- cli/commands/split.py +196 -0
- cli/commands/stale_files.py +98 -0
- cli/commands/validate.py +191 -0
- core/__init__.py +32 -0
- core/config.py +261 -0
- core/docs/__init__.py +7 -0
- core/docs/section_updater.py +286 -0
- core/docs/shared.py +149 -0
- core/git.py +294 -0
- core/interfaces.py +249 -0
- core/monitor/__init__.py +5 -0
- core/monitor/progress.py +83 -0
- core/monitor/prompt_store.py +49 -0
- core/paths.py +141 -0
- core/preset.py +237 -0
- core/preset_accessors.py +202 -0
- core/preset_classify.py +132 -0
- core/preset_hooks.py +129 -0
- core/preset_profile.py +89 -0
- core/prompt/__init__.py +7 -0
- core/prompt/__main__.py +147 -0
- core/prompt/content.py +320 -0
- core/prompt/context_manager.py +164 -0
- core/prompt/renderer.py +236 -0
- core/prompt/response_parser.py +274 -0
- core/prompt/templates.py +357 -0
- core/prompt/validate_parity.py +162 -0
- core/prompt/variables.py +339 -0
- core/rag/__init__.py +22 -0
- core/rag/__main__.py +136 -0
- core/rag/bm25_index.py +268 -0
- core/rag/chunker.py +273 -0
- core/rag/embedder.py +151 -0
- core/rag/indexer.py +292 -0
- core/rag/loader.py +89 -0
- core/rag/retriever.py +82 -0
- core/skeleton/__init__.py +11 -0
- core/skeleton/__main__.py +934 -0
- core/skeleton/anchor_fix.py +250 -0
- core/skeleton/classify.py +331 -0
- core/skeleton/cmd_anchor_fix.py +43 -0
- core/skeleton/cmd_diff_doc.py +44 -0
- core/skeleton/cmd_lock.py +87 -0
- core/skeleton/cmd_merge_delta.py +41 -0
- core/skeleton/community.py +233 -0
- core/skeleton/dependency_graph.py +306 -0
- core/skeleton/diff_doc.py +248 -0
- core/skeleton/dispatch.py +273 -0
- core/skeleton/dispatch_render.py +319 -0
- core/skeleton/dispatch_source.py +111 -0
- core/skeleton/extract.py +218 -0
- core/skeleton/extract_methods.py +298 -0
- core/skeleton/file_list.py +239 -0
- core/skeleton/impact.py +278 -0
- core/skeleton/jar_download.py +177 -0
- core/skeleton/jar_resolver.py +186 -0
- core/skeleton/loader.py +162 -0
- core/skeleton/merge.py +278 -0
- core/skeleton/merge_delta.py +229 -0
- core/skeleton/metadata.py +96 -0
- core/skeleton/metadata_builders.py +264 -0
- core/skeleton/module_dag.py +330 -0
- core/skeleton/parsers/__init__.py +71 -0
- core/skeleton/parsers/jqassistant.py +300 -0
- core/skeleton/parsers/jqassistant_cypher.py +225 -0
- core/skeleton/parsers/regex.py +171 -0
- core/skeleton/parsers/treesitter.py +324 -0
- core/skeleton/parsers/treesitter_java.py +284 -0
- core/skeleton/parsers/treesitter_multi.py +289 -0
- core/skeleton/pom_parser.py +299 -0
- core/skeleton/post_merge.py +295 -0
- core/skeleton/post_merge_llm.py +82 -0
- core/skeleton/query.py +195 -0
- core/skeleton/shard_context.py +177 -0
- core/skeleton/split.py +180 -0
- core/skeleton/split_cache.py +107 -0
- core/skeleton/split_feedback.py +174 -0
- core/skeleton/split_plan.py +219 -0
- core/skeleton/split_plan_helpers.py +305 -0
- core/skeleton/split_plan_llm.py +274 -0
- core/utils.py +135 -0
- core/validators/__init__.py +65 -0
- core/validators/__main__.py +215 -0
- core/validators/consistency.py +203 -0
- core/validators/coverage.py +171 -0
- core/validators/duplicates.py +76 -0
- core/validators/engine.py +224 -0
- core/validators/links.py +76 -0
- core/validators/sampling.py +169 -0
- core/validators/structure.py +144 -0
- engine/__init__.py +7 -0
- engine/assembler.py +231 -0
- engine/confirm.py +65 -0
- engine/dedup.py +106 -0
- engine/main.py +211 -0
- engine/pipeline/__init__.py +163 -0
- engine/pipeline/recovery.py +250 -0
- engine/pipeline/steps/__init__.py +23 -0
- engine/pipeline/steps/audit.py +220 -0
- engine/pipeline/steps/audit_apply.py +195 -0
- engine/pipeline/steps/audit_helpers.py +155 -0
- engine/pipeline/steps/classify_llm.py +236 -0
- engine/pipeline/steps/classify_prompt.py +223 -0
- engine/pipeline/steps/finalize.py +160 -0
- engine/pipeline/steps/generate.py +169 -0
- engine/pipeline/steps/generate_batch.py +197 -0
- engine/pipeline/steps/generate_recovery.py +170 -0
- engine/pipeline/steps/llm_plan_split.py +253 -0
- engine/pipeline/steps/lock.py +64 -0
- engine/pipeline/steps/preflight.py +237 -0
- engine/pipeline/steps/preflight_adjust.py +147 -0
- engine/pipeline/steps/pregenerate.py +130 -0
- engine/pipeline/steps/quality.py +81 -0
- engine/pipeline/steps/skeleton.py +149 -0
- engine/pipeline/steps/source.py +163 -0
- engine/pipeline/steps/sync.py +117 -0
- engine/pipeline/steps/sync_finalize.py +237 -0
- engine/pipeline/steps/sync_update.py +341 -0
- engine/pipelines.py +91 -0
- engine/runner.py +335 -0
- engine/strategies/__init__.py +86 -0
- engine/strategies/api.py +128 -0
- engine/strategies/delegated.py +50 -0
- engine/strategies/dryrun.py +25 -0
- engine/two_phase.py +143 -0
- mcp_server/__init__.py +73 -0
- mcp_server/__main__.py +5 -0
- mcp_server/tools/__init__.py +1 -0
- mcp_server/tools/config.py +63 -0
- mcp_server/tools/discovery.py +276 -0
- mcp_server/tools/generation.py +184 -0
- mcp_server/tools/planning.py +144 -0
- mcp_server/tools/source.py +175 -0
- mcp_server/tools/validation.py +140 -0
- mcp_server/tools/workflow.py +166 -0
- mcp_server/workflow_loader.py +204 -0
- presets/generic/audit_dimensions.md +132 -0
- presets/generic/doc_types.yaml +152 -0
- presets/generic/preset.yaml +115 -0
- presets/java-spring/audit_dimensions.md +228 -0
- presets/java-spring/audit_dimensions.yaml +203 -0
- presets/java-spring/doc_types.yaml +269 -0
- presets/java-spring/hooks.py +122 -0
- presets/java-spring/preset.yaml +341 -0
- presets/java-spring/templates/README.md +34 -0
- presets/java-spring/templates/audit-system.md +15 -0
- presets/java-spring/templates/subagent-aop.md +105 -0
- presets/java-spring/templates/subagent-api.md +63 -0
- presets/java-spring/templates/subagent-architecture.md +111 -0
- presets/java-spring/templates/subagent-async-events.md +107 -0
- presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
- presets/java-spring/templates/subagent-audit-architecture.md +38 -0
- presets/java-spring/templates/subagent-audit-business.md +40 -0
- presets/java-spring/templates/subagent-audit-data-models.md +40 -0
- presets/java-spring/templates/subagent-business.md +129 -0
- presets/java-spring/templates/subagent-caching.md +75 -0
- presets/java-spring/templates/subagent-database-access.md +114 -0
- presets/java-spring/templates/subagent-enum.md +75 -0
- presets/java-spring/templates/subagent-error-handling.md +91 -0
- presets/java-spring/templates/subagent-external-integrations.md +80 -0
- presets/java-spring/templates/subagent-index.md +122 -0
- presets/java-spring/templates/subagent-messaging.md +97 -0
- presets/java-spring/templates/subagent-model.md +88 -0
- presets/java-spring/templates/subagent-observability.md +91 -0
- presets/java-spring/templates/subagent-scheduled.md +81 -0
- presets/java-spring/templates/subagent-security.md +102 -0
- presets/java-spring/templates/subagent-structure.md +101 -0
- presets/java-spring/templates/subagent-sync-section.md +34 -0
- presets/java-spring/templates/subagent-utils.md +73 -0
- presets/java-spring/templates/sync-system.md +8 -0
- presets/java-spring/workflow-extensions.md +112 -0
- skills/__init__.py +1 -0
- skills/_shared/README.md +30 -0
- skills/_shared/doc-coverage-shared.md +134 -0
- skills/_shared/doc-quality-standard.md +1058 -0
- skills/_shared/doc-subagent-rules.md +762 -0
- skills/_shared/windows-compat.md +89 -0
- skills/kb-audit/SKILL.md +52 -0
- skills/kb-audit/rules.md +88 -0
- skills/kb-audit/steps/step-01-prepare.md +75 -0
- skills/kb-audit/steps/step-02-audit.md +96 -0
- skills/kb-audit/steps/step-03-verify.md +65 -0
- skills/kb-audit/steps/step-04-report.md +64 -0
- skills/kb-init/SKILL.md +146 -0
- skills/kb-init/rules.md +187 -0
- skills/kb-init/steps/step-01-scope.md +62 -0
- skills/kb-init/steps/step-02-source.md +410 -0
- skills/kb-init/steps/step-03-generate.md +307 -0
- skills/kb-init/steps/step-04-quality.md +92 -0
- skills/kb-init/steps/step-05-finalize.md +132 -0
- skills/kb-init/templates/core/execution-modes.md +29 -0
- skills/kb-init/templates/core/output-only.md +4 -0
- skills/kb-init/templates/core/readwrite.md +33 -0
- skills/kb-search/SKILL.md +138 -0
- skills/kb-search/rules.md +64 -0
- skills/kb-sync/SKILL.md +43 -0
- skills/kb-sync/rules.md +70 -0
- skills/kb-sync/scripts/rebuild_module.py +91 -0
- skills/kb-sync/scripts/scan_repos.py +687 -0
- skills/kb-sync/steps/step-01-detect.md +72 -0
- skills/kb-sync/steps/step-02-update.md +71 -0
- skills/kb-sync/steps/step-03-verify.md +47 -0
- skills/kb-sync/steps/step-04-finalize.md +52 -0
- source_kb-0.2.2.dist-info/METADATA +194 -0
- source_kb-0.2.2.dist-info/RECORD +228 -0
- source_kb-0.2.2.dist-info/WHEEL +5 -0
- source_kb-0.2.2.dist-info/entry_points.txt +3 -0
- source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
- source_kb-0.2.2.dist-info/top_level.txt +6 -0
cli/commands/validate.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""source-kb validate — Document validation (coverage, consistency, links, structure, sampling)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def register(subparsers: argparse._SubParsersAction) -> None:
|
|
12
|
+
p = subparsers.add_parser("validate", help="Run validation checks on knowledge base docs")
|
|
13
|
+
vsub = p.add_subparsers(dest="validator")
|
|
14
|
+
|
|
15
|
+
# coverage
|
|
16
|
+
c = vsub.add_parser("coverage", help="Coverage check")
|
|
17
|
+
c.add_argument("action", nargs="?", default="check", choices=["check"])
|
|
18
|
+
c.add_argument("--skeleton", help="Skeleton JSON path")
|
|
19
|
+
c.add_argument("--skeleton-dir", help="Skeleton shards directory")
|
|
20
|
+
c.add_argument("--module-dir", help="Module directory")
|
|
21
|
+
c.add_argument("--docs-dir", help="Documents directory")
|
|
22
|
+
c.add_argument("--type", default="service", help="Module type")
|
|
23
|
+
c.set_defaults(func=run_coverage)
|
|
24
|
+
|
|
25
|
+
# consistency
|
|
26
|
+
c = vsub.add_parser("consistency", help="Consistency and progress check")
|
|
27
|
+
c.add_argument("--module-dir", required=True, help="Module directory")
|
|
28
|
+
c.add_argument("--preset", help="Preset name")
|
|
29
|
+
c.add_argument("--source-cache", help="Source cache path")
|
|
30
|
+
c.add_argument("--cleanup", action="store_true", help="Clean up progress files")
|
|
31
|
+
c.set_defaults(func=run_consistency)
|
|
32
|
+
|
|
33
|
+
# links
|
|
34
|
+
c = vsub.add_parser("links", help="Link and reference health check")
|
|
35
|
+
c.add_argument("--module-dir", required=True, help="Module directory")
|
|
36
|
+
c.add_argument("--config", help="kb-project.yaml path")
|
|
37
|
+
c.set_defaults(func=run_links)
|
|
38
|
+
|
|
39
|
+
# structure
|
|
40
|
+
c = vsub.add_parser("structure", help="Document structure validation")
|
|
41
|
+
c.add_argument("--module-dir", required=True, help="Module directory")
|
|
42
|
+
c.add_argument("--skeleton-dir", help="Skeleton directory for comparison")
|
|
43
|
+
c.set_defaults(func=run_structure)
|
|
44
|
+
|
|
45
|
+
# sampling
|
|
46
|
+
c = vsub.add_parser("sampling", help="Numeric sampling validation")
|
|
47
|
+
c.add_argument("--module-dir", required=True, help="Module directory")
|
|
48
|
+
c.set_defaults(func=run_sampling)
|
|
49
|
+
|
|
50
|
+
p.set_defaults(func=_no_validator)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _no_validator(args: argparse.Namespace) -> None:
|
|
54
|
+
print("Error: specify a validator (coverage, consistency, links, structure, sampling)",
|
|
55
|
+
file=sys.stderr)
|
|
56
|
+
sys.exit(1)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def run_coverage(args: argparse.Namespace) -> None:
|
|
60
|
+
from core.validators.coverage import CoverageValidator
|
|
61
|
+
|
|
62
|
+
module_dir = Path(args.module_dir) if args.module_dir else None
|
|
63
|
+
skeleton_path = Path(args.skeleton) if args.skeleton else None
|
|
64
|
+
skeleton_dir = Path(args.skeleton_dir) if args.skeleton_dir else None
|
|
65
|
+
docs_dir = Path(args.docs_dir) if args.docs_dir else module_dir
|
|
66
|
+
|
|
67
|
+
if not docs_dir:
|
|
68
|
+
print("Error: --docs-dir or --module-dir required", file=sys.stderr)
|
|
69
|
+
sys.exit(1)
|
|
70
|
+
|
|
71
|
+
validator = CoverageValidator()
|
|
72
|
+
result = validator.validate(
|
|
73
|
+
docs_dir,
|
|
74
|
+
skeleton_path=str(skeleton_path) if skeleton_path else None,
|
|
75
|
+
skeleton_dir=str(skeleton_dir) if skeleton_dir else None,
|
|
76
|
+
module_type=args.type or "service",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if result.errors:
|
|
80
|
+
print("ERRORS:")
|
|
81
|
+
for e in result.errors:
|
|
82
|
+
print(f" {e}")
|
|
83
|
+
if result.warnings:
|
|
84
|
+
print("WARNINGS:")
|
|
85
|
+
for w in result.warnings:
|
|
86
|
+
print(f" {w}")
|
|
87
|
+
|
|
88
|
+
status = "ok" if result.passed else "fail"
|
|
89
|
+
print(json.dumps({"status": status, "errors": len(result.errors),
|
|
90
|
+
"warnings": len(result.warnings)}, ensure_ascii=False), file=sys.stderr)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def run_consistency(args: argparse.Namespace) -> None:
|
|
94
|
+
from core.validators.consistency import ConsistencyValidator
|
|
95
|
+
from core.monitor.progress import check_progress, cleanup_progress
|
|
96
|
+
|
|
97
|
+
module_dir = Path(args.module_dir)
|
|
98
|
+
|
|
99
|
+
if args.cleanup:
|
|
100
|
+
cleaned = cleanup_progress(module_dir)
|
|
101
|
+
print(f"Cleaned {cleaned} progress files")
|
|
102
|
+
print(json.dumps({"status": "ok", "cleaned": cleaned}, ensure_ascii=False), file=sys.stderr)
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
progress = check_progress(module_dir)
|
|
106
|
+
if progress:
|
|
107
|
+
print("Progress status:")
|
|
108
|
+
for doc_name, doc_status in progress.items():
|
|
109
|
+
print(f" {doc_name}: {doc_status}")
|
|
110
|
+
|
|
111
|
+
validator = ConsistencyValidator()
|
|
112
|
+
source_cache = Path(args.source_cache) if args.source_cache else None
|
|
113
|
+
result = validator.validate(module_dir, source_cache=source_cache)
|
|
114
|
+
|
|
115
|
+
if result.warnings:
|
|
116
|
+
print("\nConsistency warnings:")
|
|
117
|
+
for w in result.warnings:
|
|
118
|
+
print(f" {w}")
|
|
119
|
+
|
|
120
|
+
status = "ok" if result.passed else "fail"
|
|
121
|
+
print(json.dumps({"status": status, "errors": len(result.errors),
|
|
122
|
+
"warnings": len(result.warnings),
|
|
123
|
+
"progress": progress or {}}, ensure_ascii=False), file=sys.stderr)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def run_links(args: argparse.Namespace) -> None:
|
|
127
|
+
from core.validators.links import LinksValidator
|
|
128
|
+
|
|
129
|
+
module_dir = Path(args.module_dir)
|
|
130
|
+
validator = LinksValidator()
|
|
131
|
+
result = validator.validate(module_dir)
|
|
132
|
+
|
|
133
|
+
if result.errors:
|
|
134
|
+
print("Broken links:")
|
|
135
|
+
for e in result.errors:
|
|
136
|
+
print(f" {e}")
|
|
137
|
+
if result.warnings:
|
|
138
|
+
for w in result.warnings:
|
|
139
|
+
print(f" {w}")
|
|
140
|
+
|
|
141
|
+
status = "ok" if result.passed else "fail"
|
|
142
|
+
print(json.dumps({"status": status, "errors": len(result.errors),
|
|
143
|
+
"warnings": len(result.warnings)}, ensure_ascii=False), file=sys.stderr)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def run_structure(args: argparse.Namespace) -> None:
|
|
147
|
+
from core.validators.structure import StructureValidator
|
|
148
|
+
|
|
149
|
+
module_dir = Path(args.module_dir)
|
|
150
|
+
skeleton_dir = Path(args.skeleton_dir) if args.skeleton_dir else None
|
|
151
|
+
|
|
152
|
+
validator = StructureValidator()
|
|
153
|
+
result = validator.validate(module_dir, skeleton_dir=skeleton_dir)
|
|
154
|
+
|
|
155
|
+
if result.errors:
|
|
156
|
+
print("Structure errors:")
|
|
157
|
+
for e in result.errors:
|
|
158
|
+
print(f" {e}")
|
|
159
|
+
if result.warnings:
|
|
160
|
+
for w in result.warnings:
|
|
161
|
+
print(f" {w}")
|
|
162
|
+
|
|
163
|
+
status = "ok" if result.passed else "fail"
|
|
164
|
+
print(json.dumps({"status": status, "errors": len(result.errors),
|
|
165
|
+
"warnings": len(result.warnings)}, ensure_ascii=False), file=sys.stderr)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def run_sampling(args: argparse.Namespace) -> None:
|
|
169
|
+
from core.validators.sampling import SamplingValidator
|
|
170
|
+
|
|
171
|
+
module_dir = Path(args.module_dir)
|
|
172
|
+
validator = SamplingValidator()
|
|
173
|
+
result = validator.validate(module_dir)
|
|
174
|
+
|
|
175
|
+
if result.warnings:
|
|
176
|
+
print("Sampling warnings:")
|
|
177
|
+
for w in result.warnings:
|
|
178
|
+
print(f" {w}")
|
|
179
|
+
|
|
180
|
+
hit_count = 10 - len(result.warnings)
|
|
181
|
+
total = 10
|
|
182
|
+
hit_rate = round(hit_count / total * 100, 1) if total > 0 else 100.0
|
|
183
|
+
|
|
184
|
+
if not result.warnings:
|
|
185
|
+
print(f"Sampling passed (hit rate {hit_rate}%)")
|
|
186
|
+
else:
|
|
187
|
+
print(f"Sampling hit rate {hit_rate}% ({len(result.warnings)} misses)")
|
|
188
|
+
|
|
189
|
+
status = "ok" if result.passed else "warn"
|
|
190
|
+
print(json.dumps({"status": status, "warnings": len(result.warnings),
|
|
191
|
+
"hit_rate_pct": hit_rate}, ensure_ascii=False), file=sys.stderr)
|
core/__init__.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""core — shared algorithm library for source-kb.
|
|
2
|
+
|
|
3
|
+
This package provides pure functions and tools for knowledge base generation.
|
|
4
|
+
It has no opinion on pipeline orchestration, no global state, and no dependency
|
|
5
|
+
on cli/ or skill/ packages.
|
|
6
|
+
|
|
7
|
+
Layers:
|
|
8
|
+
core.config — Configuration loading and validation
|
|
9
|
+
core.git — Git operations (clone, fetch, diff)
|
|
10
|
+
core.paths — Canonical path resolution
|
|
11
|
+
core.preset — Preset loading and file classification
|
|
12
|
+
core.skeleton — Skeleton extraction, file lists, split planning, merge
|
|
13
|
+
core.prompt — Prompt rendering and content assembly
|
|
14
|
+
core.validators — Document quality validators
|
|
15
|
+
core.rag — Vector index (chunking, embedding, retrieval)
|
|
16
|
+
core.monitor — Progress monitoring and heartbeat detection
|
|
17
|
+
core.docs — Shared document generation
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
|
|
22
|
+
__version__ = "0.2.2"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def setup_logging(level: int = logging.INFO) -> None:
|
|
26
|
+
"""Configure logging for the core package."""
|
|
27
|
+
handler = logging.StreamHandler()
|
|
28
|
+
handler.setFormatter(logging.Formatter("[%(name)s] %(message)s"))
|
|
29
|
+
root = logging.getLogger("core")
|
|
30
|
+
root.setLevel(level)
|
|
31
|
+
if not root.handlers:
|
|
32
|
+
root.addHandler(handler)
|
core/config.py
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""Configuration loading, validation, and typed access.
|
|
2
|
+
|
|
3
|
+
Merges the responsibilities of engine/config_loader.py and engine/runtime_config.py
|
|
4
|
+
into a single module with no global state. The caller loads config once and passes
|
|
5
|
+
the resulting Config object explicitly.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from core.config import load_config, find_config
|
|
9
|
+
config = load_config(find_config())
|
|
10
|
+
kb = config.get_kb("my-kb")
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import copy
|
|
16
|
+
import os
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
import yaml
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
# Config dataclass
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class Config:
|
|
31
|
+
"""Typed, immutable-ish configuration loaded from kb-project.yaml."""
|
|
32
|
+
|
|
33
|
+
raw: dict[str, Any]
|
|
34
|
+
config_path: Path
|
|
35
|
+
|
|
36
|
+
# --- Knowledge bases ---
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def kb_names(self) -> list[str]:
|
|
40
|
+
return list(self.raw.get("knowledge_bases", {}).keys())
|
|
41
|
+
|
|
42
|
+
def get_kb(self, name: str) -> dict[str, Any]:
|
|
43
|
+
kbs = self.raw.get("knowledge_bases", {})
|
|
44
|
+
if name not in kbs:
|
|
45
|
+
available = ", ".join(kbs.keys()) if kbs else "none"
|
|
46
|
+
raise KeyError(f"Knowledge base '{name}' not found. Available: {available}")
|
|
47
|
+
return kbs[name]
|
|
48
|
+
|
|
49
|
+
# --- Embedding ---
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def embed_backend(self) -> str:
|
|
53
|
+
return self.raw.get("embedding", {}).get("backend", "ollama")
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def embed_model(self) -> str:
|
|
57
|
+
return self.raw.get("embedding", {}).get("model", "bge-m3")
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def embed_base_url(self) -> str:
|
|
61
|
+
return self.raw.get("embedding", {}).get("base_url", "http://localhost:11434")
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def embed_api_key(self) -> str:
|
|
65
|
+
return os.getenv("EMBEDDING_API_KEY", "") or self.raw.get("embedding", {}).get("api_key", "")
|
|
66
|
+
|
|
67
|
+
# --- Chunking ---
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def chunk_size(self) -> int:
|
|
71
|
+
return self.raw.get("chunking", {}).get("size", 512)
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def chunk_overlap(self) -> int:
|
|
75
|
+
return self.raw.get("chunking", {}).get("overlap", 100)
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def chunk_min_length(self) -> int:
|
|
79
|
+
return self.raw.get("chunking", {}).get("min_length", 50)
|
|
80
|
+
|
|
81
|
+
# --- Retrieval ---
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def top_k(self) -> int:
|
|
85
|
+
return self.raw.get("retrieval", {}).get("top_k", 10)
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def similarity_threshold(self) -> float:
|
|
89
|
+
return self.raw.get("retrieval", {}).get("similarity_threshold", 0.6)
|
|
90
|
+
|
|
91
|
+
# --- Agent / LLM ---
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def agent_model(self) -> str:
|
|
95
|
+
return os.getenv("LLM_MODEL", "") or self.raw.get("agent", {}).get("model", "")
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def agent_api_key(self) -> str:
|
|
99
|
+
return os.getenv("LLM_API_KEY", "") or self.raw.get("agent", {}).get("api_key", "")
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def agent_base_url(self) -> str:
|
|
103
|
+
return os.getenv("LLM_BASE_URL", "") or self.raw.get("agent", {}).get("base_url", "")
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def agent_timeout(self) -> int:
|
|
107
|
+
env = os.getenv("KB_AGENT_TIMEOUT", "")
|
|
108
|
+
if env:
|
|
109
|
+
try:
|
|
110
|
+
return int(env)
|
|
111
|
+
except ValueError:
|
|
112
|
+
pass
|
|
113
|
+
return self.raw.get("agent", {}).get("subagent_timeout", 900)
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def agent_max_concurrent(self) -> int:
|
|
117
|
+
env = os.getenv("KB_AGENT_MAX_CONCURRENT", "")
|
|
118
|
+
if env:
|
|
119
|
+
try:
|
|
120
|
+
return int(env)
|
|
121
|
+
except ValueError:
|
|
122
|
+
pass
|
|
123
|
+
return self.raw.get("agent", {}).get("max_concurrent_subagents", 5)
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def agent_backend(self) -> str:
|
|
127
|
+
"""Determine execution backend: 'delegated' / 'dry-run' / 'api'."""
|
|
128
|
+
model = self.agent_model
|
|
129
|
+
if model == "delegated":
|
|
130
|
+
return "delegated"
|
|
131
|
+
if model == "dry-run":
|
|
132
|
+
return "dry-run"
|
|
133
|
+
return "api"
|
|
134
|
+
|
|
135
|
+
# --- Maven ---
|
|
136
|
+
|
|
137
|
+
def maven_auth(self, kb_name: str | None = None) -> str:
|
|
138
|
+
env = os.getenv("KB_MAVEN_AUTH", "")
|
|
139
|
+
if env:
|
|
140
|
+
return env
|
|
141
|
+
if kb_name:
|
|
142
|
+
kb = self.get_kb(kb_name)
|
|
143
|
+
auth = kb.get("maven_repo", {}).get("auth", "")
|
|
144
|
+
if auth:
|
|
145
|
+
return auth
|
|
146
|
+
for kb in self.raw.get("knowledge_bases", {}).values():
|
|
147
|
+
auth = kb.get("maven_repo", {}).get("auth", "")
|
|
148
|
+
if auth:
|
|
149
|
+
return auth
|
|
150
|
+
return ""
|
|
151
|
+
|
|
152
|
+
# --- ChromaDB ---
|
|
153
|
+
|
|
154
|
+
def chroma_dir(self, kb_name: str | None = None) -> Path:
|
|
155
|
+
if kb_name:
|
|
156
|
+
kb = self.get_kb(kb_name)
|
|
157
|
+
return Path(kb["knowledge_dir"]).parent / "chroma_db"
|
|
158
|
+
for kb in self.raw.get("knowledge_bases", {}).values():
|
|
159
|
+
return Path(kb["knowledge_dir"]).parent / "chroma_db"
|
|
160
|
+
return Path("./chroma_db")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# ---------------------------------------------------------------------------
|
|
164
|
+
# Loading functions
|
|
165
|
+
# ---------------------------------------------------------------------------
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def find_config(start_dir: Path | None = None) -> Path:
|
|
169
|
+
"""Search upward for kb-project.yaml. Raises FileNotFoundError if not found."""
|
|
170
|
+
current = (start_dir or Path.cwd()).resolve()
|
|
171
|
+
while True:
|
|
172
|
+
candidate = current / "kb-project.yaml"
|
|
173
|
+
if candidate.exists():
|
|
174
|
+
return candidate
|
|
175
|
+
parent = current.parent
|
|
176
|
+
if parent == current:
|
|
177
|
+
raise FileNotFoundError(f"kb-project.yaml not found (searched from {start_dir or Path.cwd()})")
|
|
178
|
+
current = parent
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def load_config(config_path: Path | None = None) -> Config:
|
|
182
|
+
"""Load, validate, and resolve paths in kb-project.yaml. Returns Config instance."""
|
|
183
|
+
if config_path is None:
|
|
184
|
+
config_path = find_config()
|
|
185
|
+
config_path = Path(config_path)
|
|
186
|
+
if not config_path.exists():
|
|
187
|
+
raise FileNotFoundError(f"Config file does not exist: {config_path}")
|
|
188
|
+
|
|
189
|
+
with open(config_path, encoding="utf-8") as f:
|
|
190
|
+
raw = yaml.safe_load(f)
|
|
191
|
+
|
|
192
|
+
errors = validate_config(raw)
|
|
193
|
+
if errors:
|
|
194
|
+
msg = "\n".join(f" - {e}" for e in errors)
|
|
195
|
+
raise ValueError(f"Config validation failed:\n{msg}")
|
|
196
|
+
|
|
197
|
+
resolved = _resolve_paths(raw, config_path.parent)
|
|
198
|
+
return Config(raw=resolved, config_path=config_path)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def validate_config(config: dict) -> list[str]:
|
|
202
|
+
"""Validate configuration. Returns list of error strings (empty = valid)."""
|
|
203
|
+
errors: list[str] = []
|
|
204
|
+
if config.get("version") != 1:
|
|
205
|
+
errors.append("version must be 1")
|
|
206
|
+
|
|
207
|
+
kbs = config.get("knowledge_bases", {})
|
|
208
|
+
if not kbs:
|
|
209
|
+
errors.append("knowledge_bases must contain at least one entry")
|
|
210
|
+
return errors
|
|
211
|
+
|
|
212
|
+
collections: dict[str, str] = {}
|
|
213
|
+
for kb_name, kb_cfg in kbs.items():
|
|
214
|
+
coll = kb_cfg.get("collection")
|
|
215
|
+
if not coll:
|
|
216
|
+
errors.append(f"knowledge_bases.{kb_name}.collection is required")
|
|
217
|
+
elif coll in collections:
|
|
218
|
+
errors.append(f"collection '{coll}' duplicated: {collections[coll]} and {kb_name}")
|
|
219
|
+
else:
|
|
220
|
+
collections[coll] = kb_name
|
|
221
|
+
|
|
222
|
+
source = kb_cfg.get("source", {})
|
|
223
|
+
structure = source.get("structure")
|
|
224
|
+
if structure not in ("multi-repo", "monorepo"):
|
|
225
|
+
errors.append(f"knowledge_bases.{kb_name}.source.structure must be multi-repo or monorepo")
|
|
226
|
+
|
|
227
|
+
if not kb_cfg.get("preset"):
|
|
228
|
+
errors.append(f"knowledge_bases.{kb_name}.preset is required")
|
|
229
|
+
|
|
230
|
+
return errors
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# ---------------------------------------------------------------------------
|
|
234
|
+
# Internal helpers
|
|
235
|
+
# ---------------------------------------------------------------------------
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _resolve_paths(config: dict, base_dir: Path) -> dict:
|
|
239
|
+
"""Resolve relative paths to absolute. Returns a deep copy."""
|
|
240
|
+
config = copy.deepcopy(config)
|
|
241
|
+
base_dir = base_dir.resolve()
|
|
242
|
+
|
|
243
|
+
for _kb_name, kb_cfg in config.get("knowledge_bases", {}).items():
|
|
244
|
+
if "knowledge_dir" in kb_cfg:
|
|
245
|
+
kb_cfg["knowledge_dir"] = str(_resolve(kb_cfg["knowledge_dir"], base_dir))
|
|
246
|
+
source = kb_cfg.get("source", {})
|
|
247
|
+
if "cache_dir" in source:
|
|
248
|
+
source["cache_dir"] = str(_resolve(source["cache_dir"], base_dir))
|
|
249
|
+
if source.get("structure") == "multi-repo":
|
|
250
|
+
for repo in source.get("repos", []):
|
|
251
|
+
if "local" in repo:
|
|
252
|
+
repo["local"] = str(_resolve(repo["local"], base_dir))
|
|
253
|
+
return config
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _resolve(path_str: str, base_dir: Path) -> Path:
|
|
257
|
+
"""Resolve a single path relative to base_dir."""
|
|
258
|
+
p = Path(path_str).expanduser()
|
|
259
|
+
if not p.is_absolute():
|
|
260
|
+
p = (base_dir / p).resolve()
|
|
261
|
+
return p
|