source-kb 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +50 -0
- cli/__main__.py +5 -0
- cli/commands/__init__.py +1 -0
- cli/commands/anchor_fix.py +47 -0
- cli/commands/diff_doc.py +52 -0
- cli/commands/dispatch.py +77 -0
- cli/commands/extract.py +72 -0
- cli/commands/file_list.py +74 -0
- cli/commands/index.py +84 -0
- cli/commands/lock.py +89 -0
- cli/commands/merge.py +60 -0
- cli/commands/merge_delta.py +19 -0
- cli/commands/metadata.py +24 -0
- cli/commands/pipeline.py +45 -0
- cli/commands/post_merge.py +43 -0
- cli/commands/query.py +52 -0
- cli/commands/render.py +101 -0
- cli/commands/scan_repos.py +46 -0
- cli/commands/setup.py +94 -0
- cli/commands/split.py +196 -0
- cli/commands/stale_files.py +98 -0
- cli/commands/validate.py +191 -0
- core/__init__.py +32 -0
- core/config.py +261 -0
- core/docs/__init__.py +7 -0
- core/docs/section_updater.py +286 -0
- core/docs/shared.py +149 -0
- core/git.py +294 -0
- core/interfaces.py +249 -0
- core/monitor/__init__.py +5 -0
- core/monitor/progress.py +83 -0
- core/monitor/prompt_store.py +49 -0
- core/paths.py +141 -0
- core/preset.py +237 -0
- core/preset_accessors.py +202 -0
- core/preset_classify.py +132 -0
- core/preset_hooks.py +129 -0
- core/preset_profile.py +89 -0
- core/prompt/__init__.py +7 -0
- core/prompt/__main__.py +147 -0
- core/prompt/content.py +320 -0
- core/prompt/context_manager.py +164 -0
- core/prompt/renderer.py +236 -0
- core/prompt/response_parser.py +274 -0
- core/prompt/templates.py +357 -0
- core/prompt/validate_parity.py +162 -0
- core/prompt/variables.py +339 -0
- core/rag/__init__.py +22 -0
- core/rag/__main__.py +136 -0
- core/rag/bm25_index.py +268 -0
- core/rag/chunker.py +273 -0
- core/rag/embedder.py +151 -0
- core/rag/indexer.py +292 -0
- core/rag/loader.py +89 -0
- core/rag/retriever.py +82 -0
- core/skeleton/__init__.py +11 -0
- core/skeleton/__main__.py +934 -0
- core/skeleton/anchor_fix.py +250 -0
- core/skeleton/classify.py +331 -0
- core/skeleton/cmd_anchor_fix.py +43 -0
- core/skeleton/cmd_diff_doc.py +44 -0
- core/skeleton/cmd_lock.py +87 -0
- core/skeleton/cmd_merge_delta.py +41 -0
- core/skeleton/community.py +233 -0
- core/skeleton/dependency_graph.py +306 -0
- core/skeleton/diff_doc.py +248 -0
- core/skeleton/dispatch.py +273 -0
- core/skeleton/dispatch_render.py +319 -0
- core/skeleton/dispatch_source.py +111 -0
- core/skeleton/extract.py +218 -0
- core/skeleton/extract_methods.py +298 -0
- core/skeleton/file_list.py +239 -0
- core/skeleton/impact.py +278 -0
- core/skeleton/jar_download.py +177 -0
- core/skeleton/jar_resolver.py +186 -0
- core/skeleton/loader.py +162 -0
- core/skeleton/merge.py +278 -0
- core/skeleton/merge_delta.py +229 -0
- core/skeleton/metadata.py +96 -0
- core/skeleton/metadata_builders.py +264 -0
- core/skeleton/module_dag.py +330 -0
- core/skeleton/parsers/__init__.py +71 -0
- core/skeleton/parsers/jqassistant.py +300 -0
- core/skeleton/parsers/jqassistant_cypher.py +225 -0
- core/skeleton/parsers/regex.py +171 -0
- core/skeleton/parsers/treesitter.py +324 -0
- core/skeleton/parsers/treesitter_java.py +284 -0
- core/skeleton/parsers/treesitter_multi.py +289 -0
- core/skeleton/pom_parser.py +299 -0
- core/skeleton/post_merge.py +295 -0
- core/skeleton/post_merge_llm.py +82 -0
- core/skeleton/query.py +195 -0
- core/skeleton/shard_context.py +177 -0
- core/skeleton/split.py +180 -0
- core/skeleton/split_cache.py +107 -0
- core/skeleton/split_feedback.py +174 -0
- core/skeleton/split_plan.py +219 -0
- core/skeleton/split_plan_helpers.py +305 -0
- core/skeleton/split_plan_llm.py +274 -0
- core/utils.py +135 -0
- core/validators/__init__.py +65 -0
- core/validators/__main__.py +215 -0
- core/validators/consistency.py +203 -0
- core/validators/coverage.py +171 -0
- core/validators/duplicates.py +76 -0
- core/validators/engine.py +224 -0
- core/validators/links.py +76 -0
- core/validators/sampling.py +169 -0
- core/validators/structure.py +144 -0
- engine/__init__.py +7 -0
- engine/assembler.py +231 -0
- engine/confirm.py +65 -0
- engine/dedup.py +106 -0
- engine/main.py +211 -0
- engine/pipeline/__init__.py +163 -0
- engine/pipeline/recovery.py +250 -0
- engine/pipeline/steps/__init__.py +23 -0
- engine/pipeline/steps/audit.py +220 -0
- engine/pipeline/steps/audit_apply.py +195 -0
- engine/pipeline/steps/audit_helpers.py +155 -0
- engine/pipeline/steps/classify_llm.py +236 -0
- engine/pipeline/steps/classify_prompt.py +223 -0
- engine/pipeline/steps/finalize.py +160 -0
- engine/pipeline/steps/generate.py +169 -0
- engine/pipeline/steps/generate_batch.py +197 -0
- engine/pipeline/steps/generate_recovery.py +170 -0
- engine/pipeline/steps/llm_plan_split.py +253 -0
- engine/pipeline/steps/lock.py +64 -0
- engine/pipeline/steps/preflight.py +237 -0
- engine/pipeline/steps/preflight_adjust.py +147 -0
- engine/pipeline/steps/pregenerate.py +130 -0
- engine/pipeline/steps/quality.py +81 -0
- engine/pipeline/steps/skeleton.py +149 -0
- engine/pipeline/steps/source.py +163 -0
- engine/pipeline/steps/sync.py +117 -0
- engine/pipeline/steps/sync_finalize.py +237 -0
- engine/pipeline/steps/sync_update.py +341 -0
- engine/pipelines.py +91 -0
- engine/runner.py +335 -0
- engine/strategies/__init__.py +86 -0
- engine/strategies/api.py +128 -0
- engine/strategies/delegated.py +50 -0
- engine/strategies/dryrun.py +25 -0
- engine/two_phase.py +143 -0
- mcp_server/__init__.py +73 -0
- mcp_server/__main__.py +5 -0
- mcp_server/tools/__init__.py +1 -0
- mcp_server/tools/config.py +63 -0
- mcp_server/tools/discovery.py +276 -0
- mcp_server/tools/generation.py +184 -0
- mcp_server/tools/planning.py +144 -0
- mcp_server/tools/source.py +175 -0
- mcp_server/tools/validation.py +140 -0
- mcp_server/tools/workflow.py +166 -0
- mcp_server/workflow_loader.py +204 -0
- presets/generic/audit_dimensions.md +132 -0
- presets/generic/doc_types.yaml +152 -0
- presets/generic/preset.yaml +115 -0
- presets/java-spring/audit_dimensions.md +228 -0
- presets/java-spring/audit_dimensions.yaml +203 -0
- presets/java-spring/doc_types.yaml +269 -0
- presets/java-spring/hooks.py +122 -0
- presets/java-spring/preset.yaml +341 -0
- presets/java-spring/templates/README.md +34 -0
- presets/java-spring/templates/audit-system.md +15 -0
- presets/java-spring/templates/subagent-aop.md +105 -0
- presets/java-spring/templates/subagent-api.md +63 -0
- presets/java-spring/templates/subagent-architecture.md +111 -0
- presets/java-spring/templates/subagent-async-events.md +107 -0
- presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
- presets/java-spring/templates/subagent-audit-architecture.md +38 -0
- presets/java-spring/templates/subagent-audit-business.md +40 -0
- presets/java-spring/templates/subagent-audit-data-models.md +40 -0
- presets/java-spring/templates/subagent-business.md +129 -0
- presets/java-spring/templates/subagent-caching.md +75 -0
- presets/java-spring/templates/subagent-database-access.md +114 -0
- presets/java-spring/templates/subagent-enum.md +75 -0
- presets/java-spring/templates/subagent-error-handling.md +91 -0
- presets/java-spring/templates/subagent-external-integrations.md +80 -0
- presets/java-spring/templates/subagent-index.md +122 -0
- presets/java-spring/templates/subagent-messaging.md +97 -0
- presets/java-spring/templates/subagent-model.md +88 -0
- presets/java-spring/templates/subagent-observability.md +91 -0
- presets/java-spring/templates/subagent-scheduled.md +81 -0
- presets/java-spring/templates/subagent-security.md +102 -0
- presets/java-spring/templates/subagent-structure.md +101 -0
- presets/java-spring/templates/subagent-sync-section.md +34 -0
- presets/java-spring/templates/subagent-utils.md +73 -0
- presets/java-spring/templates/sync-system.md +8 -0
- presets/java-spring/workflow-extensions.md +112 -0
- skills/__init__.py +1 -0
- skills/_shared/README.md +30 -0
- skills/_shared/doc-coverage-shared.md +134 -0
- skills/_shared/doc-quality-standard.md +1058 -0
- skills/_shared/doc-subagent-rules.md +762 -0
- skills/_shared/windows-compat.md +89 -0
- skills/kb-audit/SKILL.md +52 -0
- skills/kb-audit/rules.md +88 -0
- skills/kb-audit/steps/step-01-prepare.md +75 -0
- skills/kb-audit/steps/step-02-audit.md +96 -0
- skills/kb-audit/steps/step-03-verify.md +65 -0
- skills/kb-audit/steps/step-04-report.md +64 -0
- skills/kb-init/SKILL.md +146 -0
- skills/kb-init/rules.md +187 -0
- skills/kb-init/steps/step-01-scope.md +62 -0
- skills/kb-init/steps/step-02-source.md +410 -0
- skills/kb-init/steps/step-03-generate.md +307 -0
- skills/kb-init/steps/step-04-quality.md +92 -0
- skills/kb-init/steps/step-05-finalize.md +132 -0
- skills/kb-init/templates/core/execution-modes.md +29 -0
- skills/kb-init/templates/core/output-only.md +4 -0
- skills/kb-init/templates/core/readwrite.md +33 -0
- skills/kb-search/SKILL.md +138 -0
- skills/kb-search/rules.md +64 -0
- skills/kb-sync/SKILL.md +43 -0
- skills/kb-sync/rules.md +70 -0
- skills/kb-sync/scripts/rebuild_module.py +91 -0
- skills/kb-sync/scripts/scan_repos.py +687 -0
- skills/kb-sync/steps/step-01-detect.md +72 -0
- skills/kb-sync/steps/step-02-update.md +71 -0
- skills/kb-sync/steps/step-03-verify.md +47 -0
- skills/kb-sync/steps/step-04-finalize.md +52 -0
- source_kb-0.2.2.dist-info/METADATA +194 -0
- source_kb-0.2.2.dist-info/RECORD +228 -0
- source_kb-0.2.2.dist-info/WHEEL +5 -0
- source_kb-0.2.2.dist-info/entry_points.txt +3 -0
- source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
- source_kb-0.2.2.dist-info/top_level.txt +6 -0
core/git.py
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"""Git operations wrapper — clone, fetch, diff, file reading.
|
|
2
|
+
|
|
3
|
+
Provides retry, timeout, and input validation for all git commands.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
import subprocess
|
|
10
|
+
import time
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ─── Errors ─────────────────────────────────────────────────
|
|
15
|
+
|
|
16
|
+
class GitError(Exception):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
class GitAuthError(GitError):
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
class GitNetworkError(GitError):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
class GitRepoNotFoundError(GitError):
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
class GitBranchNotFoundError(GitError):
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
class ConfigError(GitError):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# ─── Validation ─────────────────────────────────────────────
|
|
36
|
+
|
|
37
|
+
_SAFE_NAME = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]*$")
|
|
38
|
+
_SAFE_URL = re.compile(r"^(?:https?://|git://|ssh://|git@)[^\s]+$")
|
|
39
|
+
_SAFE_REF = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._/~^{}\-]*$")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _validate_name(name: str, label: str = "name"):
|
|
43
|
+
if not name or not _SAFE_NAME.match(name) or ".." in name:
|
|
44
|
+
raise ConfigError(f"Invalid {label}: {name!r}")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _validate_url(url: str):
|
|
48
|
+
if not url:
|
|
49
|
+
return
|
|
50
|
+
if url.startswith("-"):
|
|
51
|
+
raise ConfigError(f"Invalid URL: {url!r}")
|
|
52
|
+
if not _SAFE_URL.match(url):
|
|
53
|
+
raise ConfigError(f"Invalid URL: {url!r}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _validate_ref(ref: str):
|
|
57
|
+
if not ref or ref.startswith("-"):
|
|
58
|
+
raise ConfigError(f"Invalid ref: {ref!r}")
|
|
59
|
+
if "/.." in ref or "../" in ref:
|
|
60
|
+
raise ConfigError(f"Invalid ref: {ref!r}")
|
|
61
|
+
if not _SAFE_REF.match(ref):
|
|
62
|
+
raise ConfigError(f"Invalid ref: {ref!r}")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# ─── Core ───────────────────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
def git(repo_path: str | Path, *args, timeout: int = 60) -> tuple[int, str, str]:
|
|
68
|
+
"""Execute a git command. Returns (returncode, stdout, stderr)."""
|
|
69
|
+
cmd = ["git", "-C", str(repo_path)] + list(args)
|
|
70
|
+
try:
|
|
71
|
+
r = subprocess.run(cmd, capture_output=True, encoding="utf-8", errors="replace", timeout=timeout)
|
|
72
|
+
return r.returncode, r.stdout, r.stderr
|
|
73
|
+
except subprocess.TimeoutExpired:
|
|
74
|
+
raise GitNetworkError(f"Git command timed out: {' '.join(cmd)}")
|
|
75
|
+
except FileNotFoundError:
|
|
76
|
+
raise GitError("git not found — please install git")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# ─── Repository management ──────────────────────────────────
|
|
80
|
+
|
|
81
|
+
def fetch_with_retry(repo_path: Path, branch: str = "main", max_retries: int = 3) -> bool:
|
|
82
|
+
"""Git fetch with exponential backoff retry."""
|
|
83
|
+
_validate_ref(branch)
|
|
84
|
+
for attempt in range(max_retries):
|
|
85
|
+
code, _, stderr = git(repo_path, "fetch", "origin", branch)
|
|
86
|
+
if code == 0:
|
|
87
|
+
return True
|
|
88
|
+
if code == 128 and ("authentication" in stderr.lower() or "permission denied" in stderr.lower()):
|
|
89
|
+
raise GitAuthError(f"Authentication failed: {stderr.strip()}")
|
|
90
|
+
if attempt < max_retries - 1:
|
|
91
|
+
time.sleep(2 ** (attempt + 1))
|
|
92
|
+
else:
|
|
93
|
+
raise GitNetworkError(f"Fetch failed after {max_retries} retries: {stderr.strip()}")
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def clone_with_retry(url: str, target: Path, branch: str = "main", max_retries: int = 3) -> bool:
|
|
98
|
+
"""Git clone with retry. Returns True on success."""
|
|
99
|
+
_validate_url(url)
|
|
100
|
+
if (target / ".git").exists():
|
|
101
|
+
return True
|
|
102
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
103
|
+
|
|
104
|
+
for attempt in range(max_retries):
|
|
105
|
+
cmd = ["git", "clone", "--depth", "1", "--single-branch", "-b", branch, "--", url, str(target)]
|
|
106
|
+
try:
|
|
107
|
+
r = subprocess.run(cmd, capture_output=True, encoding="utf-8", errors="replace", timeout=300)
|
|
108
|
+
except subprocess.TimeoutExpired:
|
|
109
|
+
if attempt >= max_retries - 1:
|
|
110
|
+
raise GitNetworkError(f"Clone timed out after {max_retries} retries")
|
|
111
|
+
time.sleep(2 ** (attempt + 1))
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
if r.returncode == 0:
|
|
115
|
+
return True
|
|
116
|
+
err = r.stderr.lower()
|
|
117
|
+
if r.returncode == 128:
|
|
118
|
+
if "authentication" in err or "permission denied" in err:
|
|
119
|
+
raise GitAuthError(f"Authentication failed: {r.stderr.strip()}")
|
|
120
|
+
if "not found" in err or "does not exist" in err:
|
|
121
|
+
raise GitRepoNotFoundError(f"Repository not found: {url}")
|
|
122
|
+
if "branch" in err:
|
|
123
|
+
raise GitBranchNotFoundError(f"Branch not found: {branch}")
|
|
124
|
+
# Clean partial clone
|
|
125
|
+
if target.exists():
|
|
126
|
+
import shutil
|
|
127
|
+
shutil.rmtree(target, ignore_errors=True)
|
|
128
|
+
if attempt < max_retries - 1:
|
|
129
|
+
time.sleep(2 ** (attempt + 1))
|
|
130
|
+
else:
|
|
131
|
+
raise GitNetworkError(f"Clone failed after {max_retries} retries: {r.stderr.strip()}")
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def ensure_repo(url: str | None, local: str | None, cache_dir: Path, module_name: str, branch: str = "main") -> Path:
|
|
136
|
+
"""Ensure a local repo is available (clone or use existing)."""
|
|
137
|
+
_validate_name(module_name, "module_name")
|
|
138
|
+
target = cache_dir / module_name
|
|
139
|
+
|
|
140
|
+
if (target / ".git").exists():
|
|
141
|
+
fetch_with_retry(target, branch)
|
|
142
|
+
return target
|
|
143
|
+
if url:
|
|
144
|
+
_validate_url(url)
|
|
145
|
+
clone_with_retry(url, target, branch)
|
|
146
|
+
return target
|
|
147
|
+
if local:
|
|
148
|
+
local_path = Path(local).expanduser()
|
|
149
|
+
if not (local_path / ".git").exists():
|
|
150
|
+
raise ConfigError(f"Not a git repo: {local_path}")
|
|
151
|
+
return local_path
|
|
152
|
+
raise ConfigError(f"Module {module_name} has neither url nor local configured")
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# ─── File reading ───────────────────────────────────────────
|
|
156
|
+
|
|
157
|
+
def read_file(repo_path: Path, ref: str, filepath: str) -> str | None:
|
|
158
|
+
"""Read file content at a given ref. Returns None if not found."""
|
|
159
|
+
_validate_ref(ref)
|
|
160
|
+
code, stdout, stderr = git(repo_path, "show", f"{ref}:{filepath}")
|
|
161
|
+
if code == 0:
|
|
162
|
+
return stdout
|
|
163
|
+
if "does not exist" in stderr or "not found" in stderr:
|
|
164
|
+
return None
|
|
165
|
+
raise GitError(f"Failed to read file: {stderr.strip()}")
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def ls_tree(repo_path: Path, ref: str = "origin/main") -> list[str]:
|
|
169
|
+
"""List all files at a given ref."""
|
|
170
|
+
_validate_ref(ref)
|
|
171
|
+
code, stdout, stderr = git(repo_path, "ls-tree", "-r", "--name-only", ref)
|
|
172
|
+
if code != 0:
|
|
173
|
+
raise GitError(f"ls-tree failed: {stderr.strip()}")
|
|
174
|
+
return [l.strip() for l in stdout.splitlines() if l.strip()]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ─── Diff operations ────────────────────────────────────────
|
|
178
|
+
|
|
179
|
+
def get_head_commit(repo_path: Path, ref: str = "origin/main") -> str | None:
|
|
180
|
+
"""Get commit hash for a ref. Returns None if unknown."""
|
|
181
|
+
_validate_ref(ref)
|
|
182
|
+
code, stdout, _ = git(repo_path, "rev-parse", ref)
|
|
183
|
+
return stdout.strip() if code == 0 else None
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def diff_files(repo_path: Path, old_commit: str, new_commit: str) -> list[dict]:
|
|
187
|
+
"""Get changed files between two commits. Returns [{file, status, old_path?, new_path?}]."""
|
|
188
|
+
_validate_ref(old_commit)
|
|
189
|
+
_validate_ref(new_commit)
|
|
190
|
+
code, stdout, stderr = git(repo_path, "diff", "--name-status", f"{old_commit}..{new_commit}")
|
|
191
|
+
if code != 0:
|
|
192
|
+
raise GitError(f"diff failed: {stderr.strip()}")
|
|
193
|
+
|
|
194
|
+
changes = []
|
|
195
|
+
for line in stdout.splitlines():
|
|
196
|
+
if not line.strip():
|
|
197
|
+
continue
|
|
198
|
+
parts = line.split("\t")
|
|
199
|
+
status = parts[0][0]
|
|
200
|
+
if status == "R" and len(parts) >= 3:
|
|
201
|
+
changes.append({"status": "R", "old_path": parts[1], "new_path": parts[2], "file": parts[2]})
|
|
202
|
+
elif len(parts) >= 2:
|
|
203
|
+
changes.append({"status": status, "file": parts[1]})
|
|
204
|
+
return changes
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def diff_stat(repo_path: Path, old_commit: str, new_commit: str) -> str:
|
|
208
|
+
"""Get shortstat between two commits."""
|
|
209
|
+
_validate_ref(old_commit)
|
|
210
|
+
_validate_ref(new_commit)
|
|
211
|
+
code, stdout, stderr = git(repo_path, "diff", "--shortstat", f"{old_commit}..{new_commit}")
|
|
212
|
+
if code != 0:
|
|
213
|
+
raise GitError(f"diff stat failed: {stderr.strip()}")
|
|
214
|
+
return stdout.strip()
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def detect_changes(repo_path: Path, from_ref: str, to_ref: str) -> list[dict]:
|
|
218
|
+
"""Detect changes between two refs with human-readable status.
|
|
219
|
+
|
|
220
|
+
Returns list of {"path": str, "status": "added"|"modified"|"deleted"|"renamed"}.
|
|
221
|
+
"""
|
|
222
|
+
_STATUS_MAP = {"A": "added", "M": "modified", "D": "deleted", "R": "renamed", "C": "copied", "T": "modified"}
|
|
223
|
+
raw = diff_files(repo_path, from_ref, to_ref)
|
|
224
|
+
result = []
|
|
225
|
+
for entry in raw:
|
|
226
|
+
status = _STATUS_MAP.get(entry["status"], "modified")
|
|
227
|
+
result.append({"path": entry["file"], "status": status})
|
|
228
|
+
return result
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def get_commit_log(
|
|
232
|
+
repo_path: Path, from_ref: str, to_ref: str, max_count: int = 50
|
|
233
|
+
) -> list[dict]:
|
|
234
|
+
"""Get commit log between two refs.
|
|
235
|
+
|
|
236
|
+
Returns list of {"hash": str, "author": str, "date": str, "message": str}.
|
|
237
|
+
"""
|
|
238
|
+
_validate_ref(from_ref)
|
|
239
|
+
_validate_ref(to_ref)
|
|
240
|
+
fmt = "%H%n%an%n%aI%n%s%n---"
|
|
241
|
+
code, stdout, stderr = git(
|
|
242
|
+
repo_path, "log", f"--max-count={max_count}", f"--format={fmt}", f"{from_ref}..{to_ref}"
|
|
243
|
+
)
|
|
244
|
+
if code != 0:
|
|
245
|
+
raise GitError(f"git log failed: {stderr.strip()}")
|
|
246
|
+
commits = []
|
|
247
|
+
lines = stdout.strip().split("\n")
|
|
248
|
+
i = 0
|
|
249
|
+
while i + 3 < len(lines):
|
|
250
|
+
commits.append({
|
|
251
|
+
"hash": lines[i].strip(),
|
|
252
|
+
"author": lines[i + 1].strip(),
|
|
253
|
+
"date": lines[i + 2].strip(),
|
|
254
|
+
"message": lines[i + 3].strip(),
|
|
255
|
+
})
|
|
256
|
+
# skip separator line "---"
|
|
257
|
+
i += 5
|
|
258
|
+
return commits
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def clone_repo(
|
|
262
|
+
url: str, dest: Path, branch: str = "main", auth: dict | None = None
|
|
263
|
+
) -> Path:
|
|
264
|
+
"""Clone a repository with optional authentication.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
url: Repository URL.
|
|
268
|
+
dest: Destination path.
|
|
269
|
+
branch: Branch to clone.
|
|
270
|
+
auth: Optional dict with "type" ("ssh_key"|"token") and credentials.
|
|
271
|
+
- {"type": "ssh_key", "key_path": "/path/to/key"}
|
|
272
|
+
- {"type": "token", "token": "ghp_xxx"}
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
Path to the cloned repository.
|
|
276
|
+
"""
|
|
277
|
+
_validate_url(url)
|
|
278
|
+
if auth:
|
|
279
|
+
auth_type = auth.get("type", "")
|
|
280
|
+
if auth_type == "token" and auth.get("token"):
|
|
281
|
+
# Inject token into HTTPS URL
|
|
282
|
+
token = auth["token"]
|
|
283
|
+
if url.startswith("https://"):
|
|
284
|
+
url = url.replace("https://", f"https://x-access-token:{token}@", 1)
|
|
285
|
+
elif auth_type == "ssh_key" and auth.get("key_path"):
|
|
286
|
+
key_path = auth["key_path"]
|
|
287
|
+
import os
|
|
288
|
+
os.environ["GIT_SSH_COMMAND"] = f'ssh -i {key_path} -o StrictHostKeyChecking=no'
|
|
289
|
+
clone_with_retry(url, dest, branch)
|
|
290
|
+
# Clean up SSH env if set
|
|
291
|
+
if auth and auth.get("type") == "ssh_key":
|
|
292
|
+
import os
|
|
293
|
+
os.environ.pop("GIT_SSH_COMMAND", None)
|
|
294
|
+
return dest
|
core/interfaces.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
"""Abstract interfaces and shared data classes for the core package.
|
|
2
|
+
|
|
3
|
+
All ABCs and dataclasses that define contracts between layers live here.
|
|
4
|
+
No imports from cli/ or skill/ are permitted.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from abc import ABC, abstractmethod
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Literal
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# LLM data transfer objects
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class LlmRequest:
|
|
25
|
+
"""Encapsulates a single LLM call request."""
|
|
26
|
+
|
|
27
|
+
system: str
|
|
28
|
+
user: str
|
|
29
|
+
model: str | None = None # None = use default from config
|
|
30
|
+
max_tokens: int = 8192
|
|
31
|
+
temperature: float = 0.3
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class LlmResponse:
|
|
36
|
+
"""Encapsulates the result of an LLM call."""
|
|
37
|
+
|
|
38
|
+
content: str
|
|
39
|
+
status: Literal["done", "delegated", "dry-run", "failed"]
|
|
40
|
+
usage: dict[str, int] = field(default_factory=dict)
|
|
41
|
+
elapsed: float = 0.0
|
|
42
|
+
error: str = ""
|
|
43
|
+
prompt_file: str = "" # For delegated mode
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# ---------------------------------------------------------------------------
|
|
47
|
+
# LlmStrategy ABC
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class LlmStrategy(ABC):
|
|
52
|
+
"""Abstract LLM execution backend.
|
|
53
|
+
|
|
54
|
+
Implementations: ApiLlmStrategy, DelegatedLlmStrategy, DryRunLlmStrategy.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
@abstractmethod
|
|
58
|
+
def call(self, request: LlmRequest) -> LlmResponse: ...
|
|
59
|
+
|
|
60
|
+
def call_batch(
|
|
61
|
+
self, requests: list[LlmRequest], max_concurrent: int = 5
|
|
62
|
+
) -> list[LlmResponse]:
|
|
63
|
+
"""Execute multiple requests. Default: sequential. CLI overrides with ThreadPoolExecutor."""
|
|
64
|
+
return [self.call(r) for r in requests]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# PromptAssembler ABC
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class PromptAssembler(ABC):
|
|
73
|
+
"""Abstract prompt content assembly strategy.
|
|
74
|
+
|
|
75
|
+
CLI injects InlineAssembler (inlines source); Agent uses ReferenceAssembler
|
|
76
|
+
(emits file path references).
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def resolve_file_list(self, module_dir: Path, doc_type: str,
|
|
81
|
+
file_list_override: str | None = None) -> str: ...
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def resolve_source_content(
|
|
85
|
+
self, module_dir: Path, doc_type: str, source_cache: Path
|
|
86
|
+
) -> str: ...
|
|
87
|
+
|
|
88
|
+
def resolve_source_content_from_paths(
|
|
89
|
+
self, module_dir: Path, doc_type: str, source_cache: Path, file_paths: list[str]
|
|
90
|
+
) -> str:
|
|
91
|
+
"""Resolve source content from an explicit file path list (shard override).
|
|
92
|
+
|
|
93
|
+
Default implementation falls back to resolve_source_content.
|
|
94
|
+
Subclasses may override for optimized shard handling.
|
|
95
|
+
"""
|
|
96
|
+
return self.resolve_source_content(module_dir, doc_type, source_cache)
|
|
97
|
+
|
|
98
|
+
@abstractmethod
|
|
99
|
+
def resolve_skeleton_content(self, module_dir: Path) -> str: ...
|
|
100
|
+
|
|
101
|
+
@abstractmethod
|
|
102
|
+
def should_append_source(self) -> bool: ...
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# ---------------------------------------------------------------------------
|
|
106
|
+
# SkeletonParser ABC
|
|
107
|
+
# ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class SkeletonParser(ABC):
|
|
111
|
+
"""Language-specific skeleton parser. Registered by name in parser registry."""
|
|
112
|
+
|
|
113
|
+
name: str
|
|
114
|
+
priority: int # Higher = preferred (jqassistant=100, treesitter=80, regex=50)
|
|
115
|
+
|
|
116
|
+
@abstractmethod
|
|
117
|
+
def can_parse(self, repo_path: Path, preset: dict[str, Any]) -> bool: ...
|
|
118
|
+
|
|
119
|
+
@abstractmethod
|
|
120
|
+
def parse(
|
|
121
|
+
self, repo_path: Path, preset: dict[str, Any], **kwargs: Any
|
|
122
|
+
) -> list[dict[str, Any]]: ...
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
# Step ABC and StepResult
|
|
127
|
+
# ---------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass
|
|
131
|
+
class StepResult:
|
|
132
|
+
"""Outcome of a single pipeline step execution."""
|
|
133
|
+
|
|
134
|
+
status: Literal["ok", "failed", "skipped", "delegated"]
|
|
135
|
+
message: str = ""
|
|
136
|
+
details: dict[str, Any] = field(default_factory=dict)
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def ok(self) -> bool:
|
|
140
|
+
return self.status == "ok"
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def skipped(self) -> bool:
|
|
144
|
+
return self.status == "skipped"
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def delegated(self) -> bool:
|
|
148
|
+
return self.status == "delegated"
|
|
149
|
+
|
|
150
|
+
def __str__(self) -> str:
|
|
151
|
+
return f"[{self.status.upper()}] {self.message}"
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class Step(ABC):
|
|
155
|
+
"""Abstract pipeline step with optional checkpoint and rollback."""
|
|
156
|
+
|
|
157
|
+
name: str
|
|
158
|
+
checkpoint: str | None = None
|
|
159
|
+
|
|
160
|
+
def __init__(self, name: str, checkpoint: str | None = None):
|
|
161
|
+
self.name = name
|
|
162
|
+
self.checkpoint = checkpoint
|
|
163
|
+
|
|
164
|
+
@abstractmethod
|
|
165
|
+
def run(self, ctx: PipelineContext) -> StepResult: ...
|
|
166
|
+
|
|
167
|
+
def rollback(self, ctx: PipelineContext) -> None:
|
|
168
|
+
"""Optional cleanup on failure. Default is no-op."""
|
|
169
|
+
|
|
170
|
+
def __repr__(self) -> str:
|
|
171
|
+
cp = f" (cp={self.checkpoint})" if self.checkpoint else ""
|
|
172
|
+
return f"<{self.__class__.__name__} '{self.name}'{cp}>"
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
# PipelineContext
|
|
177
|
+
# ---------------------------------------------------------------------------
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@dataclass
|
|
181
|
+
class PipelineContext:
|
|
182
|
+
"""Shared state carried across pipeline steps."""
|
|
183
|
+
|
|
184
|
+
config: dict[str, Any]
|
|
185
|
+
kb_name: str
|
|
186
|
+
kb_config: dict[str, Any]
|
|
187
|
+
knowledge_dir: Path
|
|
188
|
+
project_root: Path
|
|
189
|
+
module: str | None = None
|
|
190
|
+
cache_dir: Path = field(default_factory=lambda: Path(".source-cache"))
|
|
191
|
+
state: dict[str, Any] = field(default_factory=dict)
|
|
192
|
+
|
|
193
|
+
@classmethod
|
|
194
|
+
def from_config(
|
|
195
|
+
cls,
|
|
196
|
+
config: dict[str, Any],
|
|
197
|
+
kb_name: str,
|
|
198
|
+
*,
|
|
199
|
+
module: str | None = None,
|
|
200
|
+
) -> PipelineContext:
|
|
201
|
+
"""Construct context from a loaded configuration dict."""
|
|
202
|
+
kb_config = config["knowledge_bases"][kb_name]
|
|
203
|
+
knowledge_dir = Path(kb_config["knowledge_dir"])
|
|
204
|
+
source = kb_config.get("source", {})
|
|
205
|
+
cache_dir = Path(source.get("cache_dir", "./.source-cache"))
|
|
206
|
+
project_root = Path.cwd()
|
|
207
|
+
|
|
208
|
+
return cls(
|
|
209
|
+
config=config,
|
|
210
|
+
kb_name=kb_name,
|
|
211
|
+
kb_config=kb_config,
|
|
212
|
+
knowledge_dir=knowledge_dir,
|
|
213
|
+
project_root=project_root,
|
|
214
|
+
module=module,
|
|
215
|
+
cache_dir=cache_dir,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
# ---------------------------------------------------------------------------
|
|
220
|
+
# Validator ABC and ValidationResult
|
|
221
|
+
# ---------------------------------------------------------------------------
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@dataclass
|
|
225
|
+
class ValidationResult:
|
|
226
|
+
"""Structured output from a validator run."""
|
|
227
|
+
|
|
228
|
+
errors: list[str] = field(default_factory=list)
|
|
229
|
+
warnings: list[str] = field(default_factory=list)
|
|
230
|
+
|
|
231
|
+
@property
|
|
232
|
+
def passed(self) -> bool:
|
|
233
|
+
return len(self.errors) == 0
|
|
234
|
+
|
|
235
|
+
def merge(self, other: ValidationResult) -> ValidationResult:
|
|
236
|
+
"""Combine two results into one."""
|
|
237
|
+
return ValidationResult(
|
|
238
|
+
errors=self.errors + other.errors,
|
|
239
|
+
warnings=self.warnings + other.warnings,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class Validator(ABC):
|
|
244
|
+
"""Abstract document quality validator. Registered by name."""
|
|
245
|
+
|
|
246
|
+
name: str
|
|
247
|
+
|
|
248
|
+
@abstractmethod
|
|
249
|
+
def validate(self, module_dir: Path, **kwargs: Any) -> ValidationResult: ...
|
core/monitor/__init__.py
ADDED
core/monitor/progress.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Progress monitoring — read/write progress files, detect heartbeat timeouts.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
from core.monitor.progress import write_progress, read_progress, check_heartbeat
|
|
5
|
+
|
|
6
|
+
write_progress(module_dir, "business-logic", "RUNNING")
|
|
7
|
+
status = read_progress(module_dir, "business-logic")
|
|
8
|
+
stale = check_heartbeat(module_dir, timeout_seconds=60)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import time
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from core.paths import progress_path, progress_dir, ensure_dir
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def write_progress(module_dir: Path, doc_name: str, message: str) -> None:
|
|
20
|
+
"""Write a progress status message."""
|
|
21
|
+
path = progress_path(module_dir, doc_name)
|
|
22
|
+
ensure_dir(path.parent)
|
|
23
|
+
ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
|
24
|
+
path.write_text(f"{message} ({ts})", encoding="utf-8")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def read_progress(module_dir: Path, doc_name: str) -> str | None:
|
|
28
|
+
"""Read current progress status. Returns None if no progress file."""
|
|
29
|
+
path = progress_path(module_dir, doc_name)
|
|
30
|
+
if path.exists():
|
|
31
|
+
return path.read_text(encoding="utf-8").strip()
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def check_heartbeat(module_dir: Path, timeout_seconds: int = 60) -> list[str]:
|
|
36
|
+
"""Find progress files with stale heartbeats (no update within timeout).
|
|
37
|
+
|
|
38
|
+
Returns list of doc_names that appear stale/stuck.
|
|
39
|
+
"""
|
|
40
|
+
stale: list[str] = []
|
|
41
|
+
prog_dir = progress_dir(module_dir)
|
|
42
|
+
if not prog_dir.is_dir():
|
|
43
|
+
return stale
|
|
44
|
+
|
|
45
|
+
now = time.time()
|
|
46
|
+
for f in prog_dir.iterdir():
|
|
47
|
+
if not f.is_file() or f.name.endswith(".hb-pid"):
|
|
48
|
+
continue
|
|
49
|
+
mtime = f.stat().st_mtime
|
|
50
|
+
if now - mtime > timeout_seconds:
|
|
51
|
+
content = f.read_text(encoding="utf-8").strip()
|
|
52
|
+
# Only flag if still in RUNNING state
|
|
53
|
+
if "RUNNING" in content or "HEARTBEAT" in content:
|
|
54
|
+
stale.append(f.name)
|
|
55
|
+
|
|
56
|
+
return stale
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def cleanup_progress(module_dir: Path) -> int:
|
|
60
|
+
"""Remove all progress files (after successful completion). Returns count removed."""
|
|
61
|
+
prog_dir = progress_dir(module_dir)
|
|
62
|
+
if not prog_dir.is_dir():
|
|
63
|
+
return 0
|
|
64
|
+
count = 0
|
|
65
|
+
for f in prog_dir.iterdir():
|
|
66
|
+
f.unlink(missing_ok=True)
|
|
67
|
+
count += 1
|
|
68
|
+
return count
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def check_progress(module_dir: Path) -> dict[str, str]:
|
|
72
|
+
"""Read all progress files and return {doc_name: status} mapping.
|
|
73
|
+
|
|
74
|
+
Returns empty dict if no progress directory or no progress files.
|
|
75
|
+
"""
|
|
76
|
+
prog_dir = progress_dir(module_dir)
|
|
77
|
+
if not prog_dir.is_dir():
|
|
78
|
+
return {}
|
|
79
|
+
result: dict[str, str] = {}
|
|
80
|
+
for f in prog_dir.iterdir():
|
|
81
|
+
if f.is_file() and not f.name.endswith(".hb-pid"):
|
|
82
|
+
result[f.stem] = f.read_text(encoding="utf-8").strip()
|
|
83
|
+
return result
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Prompt persistence — save rendered prompts to disk for debugging.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
from core.monitor.prompt_store import save_prompt
|
|
5
|
+
|
|
6
|
+
save_prompt(module_dir, task_id, system_prompt, user_prompt)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import time
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def save_prompt(
|
|
16
|
+
module_dir: Path,
|
|
17
|
+
task_id: str,
|
|
18
|
+
system: str,
|
|
19
|
+
user: str,
|
|
20
|
+
*,
|
|
21
|
+
model: str = "",
|
|
22
|
+
) -> Path:
|
|
23
|
+
"""Persist a rendered prompt to .meta/prompts/{task_id}.md.
|
|
24
|
+
|
|
25
|
+
Returns the path to the saved file.
|
|
26
|
+
"""
|
|
27
|
+
prompts_dir = module_dir / ".meta" / "prompts"
|
|
28
|
+
prompts_dir.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
|
|
30
|
+
safe_id = task_id.replace("/", "_").replace("\\", "_")
|
|
31
|
+
path = prompts_dir / f"{safe_id}.md"
|
|
32
|
+
|
|
33
|
+
ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
|
34
|
+
header = f"<!-- task_id: {task_id} | model: {model} | saved: {ts} -->\n\n"
|
|
35
|
+
content = header
|
|
36
|
+
if system:
|
|
37
|
+
content += f"## System Prompt\n\n{system}\n\n"
|
|
38
|
+
content += f"## User Prompt\n\n{user}\n"
|
|
39
|
+
|
|
40
|
+
path.write_text(content, encoding="utf-8")
|
|
41
|
+
return path
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def should_save_prompts(config: dict) -> bool:
|
|
45
|
+
"""Check if prompt persistence is enabled in config."""
|
|
46
|
+
debug = config.get("debug", {})
|
|
47
|
+
if isinstance(debug, dict):
|
|
48
|
+
return bool(debug.get("save_prompts", False))
|
|
49
|
+
return False
|