source-kb 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. cli/__init__.py +50 -0
  2. cli/__main__.py +5 -0
  3. cli/commands/__init__.py +1 -0
  4. cli/commands/anchor_fix.py +47 -0
  5. cli/commands/diff_doc.py +52 -0
  6. cli/commands/dispatch.py +77 -0
  7. cli/commands/extract.py +72 -0
  8. cli/commands/file_list.py +74 -0
  9. cli/commands/index.py +84 -0
  10. cli/commands/lock.py +89 -0
  11. cli/commands/merge.py +60 -0
  12. cli/commands/merge_delta.py +19 -0
  13. cli/commands/metadata.py +24 -0
  14. cli/commands/pipeline.py +45 -0
  15. cli/commands/post_merge.py +43 -0
  16. cli/commands/query.py +52 -0
  17. cli/commands/render.py +101 -0
  18. cli/commands/scan_repos.py +46 -0
  19. cli/commands/setup.py +94 -0
  20. cli/commands/split.py +196 -0
  21. cli/commands/stale_files.py +98 -0
  22. cli/commands/validate.py +191 -0
  23. core/__init__.py +32 -0
  24. core/config.py +261 -0
  25. core/docs/__init__.py +7 -0
  26. core/docs/section_updater.py +286 -0
  27. core/docs/shared.py +149 -0
  28. core/git.py +294 -0
  29. core/interfaces.py +249 -0
  30. core/monitor/__init__.py +5 -0
  31. core/monitor/progress.py +83 -0
  32. core/monitor/prompt_store.py +49 -0
  33. core/paths.py +141 -0
  34. core/preset.py +237 -0
  35. core/preset_accessors.py +202 -0
  36. core/preset_classify.py +132 -0
  37. core/preset_hooks.py +129 -0
  38. core/preset_profile.py +89 -0
  39. core/prompt/__init__.py +7 -0
  40. core/prompt/__main__.py +147 -0
  41. core/prompt/content.py +320 -0
  42. core/prompt/context_manager.py +164 -0
  43. core/prompt/renderer.py +236 -0
  44. core/prompt/response_parser.py +274 -0
  45. core/prompt/templates.py +357 -0
  46. core/prompt/validate_parity.py +162 -0
  47. core/prompt/variables.py +339 -0
  48. core/rag/__init__.py +22 -0
  49. core/rag/__main__.py +136 -0
  50. core/rag/bm25_index.py +268 -0
  51. core/rag/chunker.py +273 -0
  52. core/rag/embedder.py +151 -0
  53. core/rag/indexer.py +292 -0
  54. core/rag/loader.py +89 -0
  55. core/rag/retriever.py +82 -0
  56. core/skeleton/__init__.py +11 -0
  57. core/skeleton/__main__.py +934 -0
  58. core/skeleton/anchor_fix.py +250 -0
  59. core/skeleton/classify.py +331 -0
  60. core/skeleton/cmd_anchor_fix.py +43 -0
  61. core/skeleton/cmd_diff_doc.py +44 -0
  62. core/skeleton/cmd_lock.py +87 -0
  63. core/skeleton/cmd_merge_delta.py +41 -0
  64. core/skeleton/community.py +233 -0
  65. core/skeleton/dependency_graph.py +306 -0
  66. core/skeleton/diff_doc.py +248 -0
  67. core/skeleton/dispatch.py +273 -0
  68. core/skeleton/dispatch_render.py +319 -0
  69. core/skeleton/dispatch_source.py +111 -0
  70. core/skeleton/extract.py +218 -0
  71. core/skeleton/extract_methods.py +298 -0
  72. core/skeleton/file_list.py +239 -0
  73. core/skeleton/impact.py +278 -0
  74. core/skeleton/jar_download.py +177 -0
  75. core/skeleton/jar_resolver.py +186 -0
  76. core/skeleton/loader.py +162 -0
  77. core/skeleton/merge.py +278 -0
  78. core/skeleton/merge_delta.py +229 -0
  79. core/skeleton/metadata.py +96 -0
  80. core/skeleton/metadata_builders.py +264 -0
  81. core/skeleton/module_dag.py +330 -0
  82. core/skeleton/parsers/__init__.py +71 -0
  83. core/skeleton/parsers/jqassistant.py +300 -0
  84. core/skeleton/parsers/jqassistant_cypher.py +225 -0
  85. core/skeleton/parsers/regex.py +171 -0
  86. core/skeleton/parsers/treesitter.py +324 -0
  87. core/skeleton/parsers/treesitter_java.py +284 -0
  88. core/skeleton/parsers/treesitter_multi.py +289 -0
  89. core/skeleton/pom_parser.py +299 -0
  90. core/skeleton/post_merge.py +295 -0
  91. core/skeleton/post_merge_llm.py +82 -0
  92. core/skeleton/query.py +195 -0
  93. core/skeleton/shard_context.py +177 -0
  94. core/skeleton/split.py +180 -0
  95. core/skeleton/split_cache.py +107 -0
  96. core/skeleton/split_feedback.py +174 -0
  97. core/skeleton/split_plan.py +219 -0
  98. core/skeleton/split_plan_helpers.py +305 -0
  99. core/skeleton/split_plan_llm.py +274 -0
  100. core/utils.py +135 -0
  101. core/validators/__init__.py +65 -0
  102. core/validators/__main__.py +215 -0
  103. core/validators/consistency.py +203 -0
  104. core/validators/coverage.py +171 -0
  105. core/validators/duplicates.py +76 -0
  106. core/validators/engine.py +224 -0
  107. core/validators/links.py +76 -0
  108. core/validators/sampling.py +169 -0
  109. core/validators/structure.py +144 -0
  110. engine/__init__.py +7 -0
  111. engine/assembler.py +231 -0
  112. engine/confirm.py +65 -0
  113. engine/dedup.py +106 -0
  114. engine/main.py +211 -0
  115. engine/pipeline/__init__.py +163 -0
  116. engine/pipeline/recovery.py +250 -0
  117. engine/pipeline/steps/__init__.py +23 -0
  118. engine/pipeline/steps/audit.py +220 -0
  119. engine/pipeline/steps/audit_apply.py +195 -0
  120. engine/pipeline/steps/audit_helpers.py +155 -0
  121. engine/pipeline/steps/classify_llm.py +236 -0
  122. engine/pipeline/steps/classify_prompt.py +223 -0
  123. engine/pipeline/steps/finalize.py +160 -0
  124. engine/pipeline/steps/generate.py +169 -0
  125. engine/pipeline/steps/generate_batch.py +197 -0
  126. engine/pipeline/steps/generate_recovery.py +170 -0
  127. engine/pipeline/steps/llm_plan_split.py +253 -0
  128. engine/pipeline/steps/lock.py +64 -0
  129. engine/pipeline/steps/preflight.py +237 -0
  130. engine/pipeline/steps/preflight_adjust.py +147 -0
  131. engine/pipeline/steps/pregenerate.py +130 -0
  132. engine/pipeline/steps/quality.py +81 -0
  133. engine/pipeline/steps/skeleton.py +149 -0
  134. engine/pipeline/steps/source.py +163 -0
  135. engine/pipeline/steps/sync.py +117 -0
  136. engine/pipeline/steps/sync_finalize.py +237 -0
  137. engine/pipeline/steps/sync_update.py +341 -0
  138. engine/pipelines.py +91 -0
  139. engine/runner.py +335 -0
  140. engine/strategies/__init__.py +86 -0
  141. engine/strategies/api.py +128 -0
  142. engine/strategies/delegated.py +50 -0
  143. engine/strategies/dryrun.py +25 -0
  144. engine/two_phase.py +143 -0
  145. mcp_server/__init__.py +73 -0
  146. mcp_server/__main__.py +5 -0
  147. mcp_server/tools/__init__.py +1 -0
  148. mcp_server/tools/config.py +63 -0
  149. mcp_server/tools/discovery.py +276 -0
  150. mcp_server/tools/generation.py +184 -0
  151. mcp_server/tools/planning.py +144 -0
  152. mcp_server/tools/source.py +175 -0
  153. mcp_server/tools/validation.py +140 -0
  154. mcp_server/tools/workflow.py +166 -0
  155. mcp_server/workflow_loader.py +204 -0
  156. presets/generic/audit_dimensions.md +132 -0
  157. presets/generic/doc_types.yaml +152 -0
  158. presets/generic/preset.yaml +115 -0
  159. presets/java-spring/audit_dimensions.md +228 -0
  160. presets/java-spring/audit_dimensions.yaml +203 -0
  161. presets/java-spring/doc_types.yaml +269 -0
  162. presets/java-spring/hooks.py +122 -0
  163. presets/java-spring/preset.yaml +341 -0
  164. presets/java-spring/templates/README.md +34 -0
  165. presets/java-spring/templates/audit-system.md +15 -0
  166. presets/java-spring/templates/subagent-aop.md +105 -0
  167. presets/java-spring/templates/subagent-api.md +63 -0
  168. presets/java-spring/templates/subagent-architecture.md +111 -0
  169. presets/java-spring/templates/subagent-async-events.md +107 -0
  170. presets/java-spring/templates/subagent-audit-api-contracts.md +40 -0
  171. presets/java-spring/templates/subagent-audit-architecture.md +38 -0
  172. presets/java-spring/templates/subagent-audit-business.md +40 -0
  173. presets/java-spring/templates/subagent-audit-data-models.md +40 -0
  174. presets/java-spring/templates/subagent-business.md +129 -0
  175. presets/java-spring/templates/subagent-caching.md +75 -0
  176. presets/java-spring/templates/subagent-database-access.md +114 -0
  177. presets/java-spring/templates/subagent-enum.md +75 -0
  178. presets/java-spring/templates/subagent-error-handling.md +91 -0
  179. presets/java-spring/templates/subagent-external-integrations.md +80 -0
  180. presets/java-spring/templates/subagent-index.md +122 -0
  181. presets/java-spring/templates/subagent-messaging.md +97 -0
  182. presets/java-spring/templates/subagent-model.md +88 -0
  183. presets/java-spring/templates/subagent-observability.md +91 -0
  184. presets/java-spring/templates/subagent-scheduled.md +81 -0
  185. presets/java-spring/templates/subagent-security.md +102 -0
  186. presets/java-spring/templates/subagent-structure.md +101 -0
  187. presets/java-spring/templates/subagent-sync-section.md +34 -0
  188. presets/java-spring/templates/subagent-utils.md +73 -0
  189. presets/java-spring/templates/sync-system.md +8 -0
  190. presets/java-spring/workflow-extensions.md +112 -0
  191. skills/__init__.py +1 -0
  192. skills/_shared/README.md +30 -0
  193. skills/_shared/doc-coverage-shared.md +134 -0
  194. skills/_shared/doc-quality-standard.md +1058 -0
  195. skills/_shared/doc-subagent-rules.md +762 -0
  196. skills/_shared/windows-compat.md +89 -0
  197. skills/kb-audit/SKILL.md +52 -0
  198. skills/kb-audit/rules.md +88 -0
  199. skills/kb-audit/steps/step-01-prepare.md +75 -0
  200. skills/kb-audit/steps/step-02-audit.md +96 -0
  201. skills/kb-audit/steps/step-03-verify.md +65 -0
  202. skills/kb-audit/steps/step-04-report.md +64 -0
  203. skills/kb-init/SKILL.md +146 -0
  204. skills/kb-init/rules.md +187 -0
  205. skills/kb-init/steps/step-01-scope.md +62 -0
  206. skills/kb-init/steps/step-02-source.md +410 -0
  207. skills/kb-init/steps/step-03-generate.md +307 -0
  208. skills/kb-init/steps/step-04-quality.md +92 -0
  209. skills/kb-init/steps/step-05-finalize.md +132 -0
  210. skills/kb-init/templates/core/execution-modes.md +29 -0
  211. skills/kb-init/templates/core/output-only.md +4 -0
  212. skills/kb-init/templates/core/readwrite.md +33 -0
  213. skills/kb-search/SKILL.md +138 -0
  214. skills/kb-search/rules.md +64 -0
  215. skills/kb-sync/SKILL.md +43 -0
  216. skills/kb-sync/rules.md +70 -0
  217. skills/kb-sync/scripts/rebuild_module.py +91 -0
  218. skills/kb-sync/scripts/scan_repos.py +687 -0
  219. skills/kb-sync/steps/step-01-detect.md +72 -0
  220. skills/kb-sync/steps/step-02-update.md +71 -0
  221. skills/kb-sync/steps/step-03-verify.md +47 -0
  222. skills/kb-sync/steps/step-04-finalize.md +52 -0
  223. source_kb-0.2.2.dist-info/METADATA +194 -0
  224. source_kb-0.2.2.dist-info/RECORD +228 -0
  225. source_kb-0.2.2.dist-info/WHEEL +5 -0
  226. source_kb-0.2.2.dist-info/entry_points.txt +3 -0
  227. source_kb-0.2.2.dist-info/licenses/LICENSE +21 -0
  228. source_kb-0.2.2.dist-info/top_level.txt +6 -0
@@ -0,0 +1,96 @@
1
+ """Global metadata pre-generation for prompt injection.
2
+
3
+ Generates glossary (class names + JavaDoc), dependency summary, and cross-references
4
+ from skeleton entries. Pre-generated once per module, shared across all sub-agent prompts.
5
+
6
+ Usage:
7
+ from core.skeleton.metadata import pregenerate, load_pregenerated, generate_global_metadata
8
+
9
+ # Pre-generate to .meta/global-metadata.md
10
+ pregenerate(module_dir, module_name="my-service")
11
+
12
+ # Load in prompt rendering
13
+ text = load_pregenerated(module_dir)
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ from pathlib import Path
20
+
21
+ from core.skeleton.metadata_builders import (
22
+ build_glossary, build_dependency_summary, build_cross_references,
23
+ )
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ PREGENERATE_FILE = "global-metadata.md"
28
+
29
+
30
+ def generate_global_metadata(
31
+ skeleton_entries: list[dict],
32
+ doc_type: str = "",
33
+ module_name: str = "",
34
+ hooks=None,
35
+ ) -> dict[str, str]:
36
+ """Generate global metadata sections from skeleton entries.
37
+
38
+ Returns dict with keys: glossary, dependency_summary, cross_references.
39
+ """
40
+ return {
41
+ "glossary": build_glossary(skeleton_entries),
42
+ "dependency_summary": build_dependency_summary(skeleton_entries, hooks=hooks),
43
+ "cross_references": build_cross_references(skeleton_entries, doc_type),
44
+ }
45
+
46
+
47
+ def format_metadata_for_prompt(metadata: dict[str, str], max_chars: int = 3000) -> str:
48
+ """Format metadata dict into a prompt-injectable text block."""
49
+ parts: list[str] = []
50
+ remaining = max_chars
51
+
52
+ sections = [
53
+ ("Module Core Concepts", "glossary"),
54
+ ("Business Dependencies", "dependency_summary"),
55
+ ("Cross-Document References", "cross_references"),
56
+ ]
57
+
58
+ for title, key in sections:
59
+ content = metadata.get(key, "")
60
+ if not content or remaining < 200:
61
+ continue
62
+ section = f"### {title}\n\n{content}"
63
+ if len(section) > remaining:
64
+ section = section[:remaining - 30] + "\n\n[truncated]"
65
+ parts.append(section)
66
+ remaining -= len(section)
67
+
68
+ if not parts:
69
+ return ""
70
+ return "## Global Metadata (terminology consistency reference)\n\n" + "\n\n".join(parts)
71
+
72
+
73
+ def pregenerate(module_dir: Path, module_name: str = "") -> Path:
74
+ """Pre-generate global metadata to .meta/global-metadata.md."""
75
+ from core.skeleton.file_list import load_skeleton
76
+ from core.paths import ensure_dir
77
+
78
+ entries = load_skeleton(module_dir)
79
+ if not entries:
80
+ raise RuntimeError(f"No skeleton entries found in {module_dir}")
81
+
82
+ metadata = generate_global_metadata(entries, module_name=module_name)
83
+ text = format_metadata_for_prompt(metadata, max_chars=4000)
84
+
85
+ output_path = module_dir / ".meta" / PREGENERATE_FILE
86
+ ensure_dir(output_path.parent)
87
+ output_path.write_text(text, encoding="utf-8")
88
+ return output_path
89
+
90
+
91
+ def load_pregenerated(module_dir: Path) -> str:
92
+ """Load pre-generated metadata file. Returns empty string if not found."""
93
+ path = module_dir / ".meta" / PREGENERATE_FILE
94
+ if path.exists():
95
+ return path.read_text(encoding="utf-8").strip()
96
+ return ""
@@ -0,0 +1,264 @@
1
+ """Metadata builders — glossary, dependency summary, cross-references.
2
+
3
+ Internal helpers for core/skeleton/metadata.py. Not part of public API.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import re
10
+ from collections import defaultdict
11
+ from typing import Any
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def build_glossary(entries: list[dict]) -> str:
17
+ """Build glossary: prioritize classes with JavaDoc, then business classes."""
18
+ inject_count = _count_injection_refs(entries)
19
+
20
+ candidates: list[tuple[int, str]] = []
21
+ seen: set[str] = set()
22
+
23
+ for entry in entries:
24
+ for cls in entry.get("classes", []):
25
+ name = cls.get("name", "")
26
+ if not name or name in seen or len(name) < 3:
27
+ continue
28
+ seen.add(name)
29
+
30
+ doc = cls.get("doc", "")
31
+ if doc and _is_placeholder_doc(doc):
32
+ doc = ""
33
+
34
+ if not doc and _is_excluded_class(name):
35
+ continue
36
+
37
+ priority = 0
38
+ if doc:
39
+ priority += 3
40
+ if _is_business_suffix(name):
41
+ priority += 2
42
+ if inject_count.get(name, 0) >= 2:
43
+ priority += 1
44
+
45
+ if priority < 2:
46
+ continue
47
+
48
+ if doc:
49
+ clean_doc = doc.split("\n")[0].strip()
50
+ clean_doc = clean_doc.replace("<br>", "").replace("&lt;br&gt;", "").strip()
51
+ clean_doc = clean_doc.rstrip(".").strip()[:80]
52
+ line = f"- **{name}**: {clean_doc}" if clean_doc else f"- **{name}**"
53
+ elif inject_count.get(name, 0) >= 2:
54
+ line = f"- **{name}** (injected by {inject_count[name]} classes)"
55
+ else:
56
+ line = f"- **{name}**"
57
+
58
+ candidates.append((priority, line))
59
+
60
+ if not candidates:
61
+ return ""
62
+
63
+ candidates.sort(key=lambda x: (-x[0], x[1]))
64
+ lines = [item[1] for item in candidates[:20]]
65
+ return "\n".join(lines)
66
+
67
+
68
+ def build_dependency_summary(entries: list[dict], hooks=None) -> str:
69
+ """Build dependency summary: Service -> injected business dependencies."""
70
+ if hooks:
71
+ inject_annotations = hooks.get_inject_annotations()
72
+ framework_types = hooks.get_framework_types()
73
+ else:
74
+ inject_annotations = {"@Autowired", "@Resource", "@Inject", "Autowired", "Resource", "Inject"}
75
+ framework_types = set()
76
+
77
+ deps: dict[str, list[str]] = defaultdict(list)
78
+
79
+ for entry in entries:
80
+ classes = entry.get("classes", [])
81
+ if not classes:
82
+ continue
83
+ main_class = classes[0].get("name", "")
84
+ if not main_class or not _is_core_business_class(main_class):
85
+ continue
86
+
87
+ fields = entry.get("fields", []) or classes[0].get("fields", [])
88
+ for field in fields:
89
+ annotations = field.get("annotations", [])
90
+ field_type = field.get("type", "")
91
+ if not field_type or not field_type[0].isupper():
92
+ continue
93
+
94
+ is_injected = any(
95
+ any(
96
+ inj in ((a.get("name", "") if isinstance(a, dict) else str(a)))
97
+ for inj in inject_annotations
98
+ )
99
+ for a in annotations
100
+ ) if annotations else True
101
+
102
+ if not is_injected:
103
+ continue
104
+
105
+ clean_type = re.sub(r'<.*>', '', field_type).strip()
106
+
107
+ if clean_type in framework_types:
108
+ continue
109
+ if clean_type.endswith(("Mapper", "Dao", "DaoImpl", "Repository")):
110
+ continue
111
+ if not _is_injectable_type(clean_type):
112
+ continue
113
+
114
+ if clean_type and clean_type != main_class:
115
+ deps[main_class].append(clean_type)
116
+
117
+ if not deps:
118
+ return ""
119
+
120
+ filtered = {k: v for k, v in deps.items() if len(set(v)) >= 2}
121
+ if not filtered:
122
+ filtered = deps
123
+
124
+ lines = []
125
+ for cls, dep_list in sorted(filtered.items(), key=lambda x: -len(set(x[1])))[:10]:
126
+ unique_deps = sorted(set(dep_list))
127
+ if len(unique_deps) > 8:
128
+ dep_str = ", ".join(unique_deps[:8]) + f" ... ({len(unique_deps)} total)"
129
+ else:
130
+ dep_str = ", ".join(unique_deps)
131
+ lines.append(f"- {cls} -> {dep_str}")
132
+
133
+ return "\n".join(lines)
134
+
135
+
136
+ def build_cross_references(entries: list[dict], doc_type: str, preset: dict | None = None) -> str:
137
+ """Build cross-reference hints (which classes belong to other doc types)."""
138
+ hints: list[str] = []
139
+ infra_classes: list[str] = []
140
+ model_classes: list[str] = []
141
+
142
+ for entry in entries:
143
+ for cls in entry.get("classes", []):
144
+ name = cls.get("name", "")
145
+ if _is_infra_class(name):
146
+ infra_classes.append(name)
147
+ elif name.endswith(("VO", "DTO", "DO", "Entity", "Model")):
148
+ model_classes.append(name)
149
+
150
+ has_model_dep = False
151
+ has_arch_dep = False
152
+ if preset:
153
+ from core.preset import get_doc_type_config
154
+ cfg = get_doc_type_config(preset, doc_type)
155
+ deps = cfg.get("depends_on", [])
156
+ has_model_dep = any(d in ("data-models", "enums-and-constants") for d in deps)
157
+ has_arch_dep = any(d in ("architecture",) for d in deps)
158
+ else:
159
+ has_model_dep = doc_type == "business-logic"
160
+ has_arch_dep = doc_type == "business-logic"
161
+
162
+ if has_model_dep and model_classes:
163
+ model_doc = "data-models.md"
164
+ if preset:
165
+ try:
166
+ from core.preset import get_doc_filename
167
+ model_doc = get_doc_filename(preset, "data-models", strict=False) or model_doc
168
+ except Exception as e:
169
+ logger.debug("get_doc_filename failed for data-models: %s", e)
170
+ hints.append(f"Data model classes (see {model_doc}): {', '.join(model_classes[:5])}")
171
+ if has_arch_dep and infra_classes:
172
+ arch_doc = "architecture.md"
173
+ if preset:
174
+ try:
175
+ from core.preset import get_doc_filename
176
+ arch_doc = get_doc_filename(preset, "architecture", strict=False) or arch_doc
177
+ except Exception as e:
178
+ logger.debug("get_doc_filename failed for architecture: %s", e)
179
+ hints.append(f"Infrastructure classes (see {arch_doc}): {', '.join(infra_classes[:5])}")
180
+
181
+ return "\n".join(f"- {h}" for h in hints) if hints else ""
182
+
183
+
184
+ # ---------------------------------------------------------------------------
185
+ # Classification helpers
186
+ # ---------------------------------------------------------------------------
187
+
188
+
189
+ def _is_placeholder_doc(doc: str) -> bool:
190
+ placeholders = ["TODO", "FIXME", "&lt;br&gt;"]
191
+ return any(p in doc for p in placeholders)
192
+
193
+
194
+ def _is_excluded_class(name: str) -> bool:
195
+ excluded_suffixes = (
196
+ "Config", "Configuration", "Properties", "Aspect", "Interceptor", "Filter",
197
+ "Mapper", "Dao", "DaoImpl", "Repository",
198
+ "DTO", "VO", "BO", "DO", "PO", "Param", "Request", "Response", "Result",
199
+ "Entity", "Model",
200
+ "Util", "Utils", "Helper", "Tool", "Tools",
201
+ "Constant", "Constants", "Enum",
202
+ "Converter", "Adapter", "Wrapper",
203
+ "Test", "Tests", "Mock",
204
+ )
205
+ java_types = {"String", "Integer", "Long", "Boolean", "Double", "Float",
206
+ "List", "Map", "Set", "Date", "BigDecimal", "Object",
207
+ "Collection", "Optional", "Class", "Void"}
208
+ return name.endswith(excluded_suffixes) or name in java_types
209
+
210
+
211
+ def _is_business_suffix(name: str) -> bool:
212
+ suffixes = ("ServiceImpl", "Service", "Handler", "Processor", "Manager",
213
+ "Facade", "Strategy", "Validator", "Factory",
214
+ "Listener", "Consumer", "Producer", "Client", "Feign", "Biz", "BizImpl")
215
+ return any(name.endswith(s) for s in suffixes)
216
+
217
+
218
+ def _count_injection_refs(entries: list[dict]) -> dict[str, int]:
219
+ counts: defaultdict[str, int] = defaultdict(int)
220
+ for entry in entries:
221
+ for field in entry.get("fields", []):
222
+ ftype = field.get("type", "")
223
+ if ftype and ftype[0].isupper() and len(ftype) > 2:
224
+ base = ftype.split("<")[0].split("[")[0].strip()
225
+ if not _is_excluded_class(base):
226
+ counts[base] += 1
227
+ return dict(counts)
228
+
229
+
230
+ def _is_core_business_class(name: str) -> bool:
231
+ if not name or not name[0].isupper() or len(name) <= 2:
232
+ return False
233
+ infra_suffixes = ("Config", "Configuration", "Properties", "Interceptor",
234
+ "Filter", "Aspect", "Advisor", "Converter", "Mapper")
235
+ if name.endswith(infra_suffixes):
236
+ return False
237
+ data_suffixes = ("DTO", "VO", "DO", "Entity", "Enum", "Constant", "Constants")
238
+ if name.endswith(data_suffixes):
239
+ return False
240
+ java_types = {"String", "Integer", "Long", "Boolean", "Double", "Float",
241
+ "List", "Map", "Set", "Date", "BigDecimal", "Object",
242
+ "Collection", "Optional", "Class", "Void"}
243
+ if name in java_types:
244
+ return False
245
+ business_suffixes = ("ServiceImpl", "Service", "Handler", "Processor",
246
+ "Manager", "Listener", "Factory", "Client", "Feign",
247
+ "Strategy", "Facade", "Biz", "BizImpl")
248
+ return any(name.endswith(s) for s in business_suffixes)
249
+
250
+
251
+ def _is_injectable_type(name: str) -> bool:
252
+ if not name or not name[0].isupper() or len(name) <= 2:
253
+ return False
254
+ exclude = {"String", "Integer", "Long", "Boolean", "Double", "Float",
255
+ "List", "Map", "Set", "Date", "BigDecimal", "Object",
256
+ "Collection", "Optional", "Class", "Void", "byte", "int",
257
+ "DateTimeFormatter", "Logger", "ObjectMapper"}
258
+ if name in exclude:
259
+ return False
260
+ return True
261
+
262
+
263
+ def _is_infra_class(name: str) -> bool:
264
+ return name.endswith(("Config", "Configuration", "Properties", "Interceptor", "Filter", "Aspect", "Advisor"))
@@ -0,0 +1,330 @@
1
+ """Cross-module DAG topological sort — build module dependency order from pom.xml.
2
+
3
+ Parses multi-module Maven projects to determine generation order: modules that are
4
+ depended upon should be generated first so downstream modules can reference their docs.
5
+
6
+ Usage:
7
+ from core.skeleton.module_dag import build_module_dag, topo_sort_modules
8
+
9
+ dag = build_module_dag(source_cache_dir, module_names)
10
+ ordered = topo_sort_modules(dag)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ from collections import deque
17
+ from dataclasses import dataclass, field
18
+ from pathlib import Path
19
+ from typing import Any
20
+
21
+ from core.skeleton.pom_parser import parse_pom
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Data classes
28
+ # ---------------------------------------------------------------------------
29
+
30
+
31
+ @dataclass
32
+ class ModuleNode:
33
+ """A single module in the dependency DAG."""
34
+
35
+ name: str
36
+ artifact_id: str = ""
37
+ group_id: str = ""
38
+ pom_path: Path | None = None
39
+
40
+
41
+ @dataclass
42
+ class ModuleDAG:
43
+ """Directed acyclic graph of module dependencies.
44
+
45
+ Edges point from dependent → dependency (A depends on B → edge A→B).
46
+ Topological sort yields dependencies first (B before A).
47
+ """
48
+
49
+ nodes: dict[str, ModuleNode] = field(default_factory=dict)
50
+ # adjacency: module_name → set of module_names it depends on
51
+ edges: dict[str, set[str]] = field(default_factory=dict)
52
+ # reverse: module_name → set of module_names that depend on it
53
+ reverse_edges: dict[str, set[str]] = field(default_factory=dict)
54
+
55
+ def add_node(self, node: ModuleNode) -> None:
56
+ self.nodes[node.name] = node
57
+ self.edges.setdefault(node.name, set())
58
+ self.reverse_edges.setdefault(node.name, set())
59
+
60
+ def add_edge(self, dependent: str, dependency: str) -> None:
61
+ """Add edge: `dependent` depends on `dependency`."""
62
+ self.edges.setdefault(dependent, set()).add(dependency)
63
+ self.reverse_edges.setdefault(dependency, set()).add(dependent)
64
+
65
+ @property
66
+ def module_names(self) -> list[str]:
67
+ return list(self.nodes.keys())
68
+
69
+
70
+ # ---------------------------------------------------------------------------
71
+ # Public API
72
+ # ---------------------------------------------------------------------------
73
+
74
+
75
+ def build_module_dag(
76
+ source_cache_dir: Path,
77
+ module_names: list[str],
78
+ module_paths: dict[str, Path] | None = None,
79
+ ) -> ModuleDAG:
80
+ """Build a module dependency DAG by parsing pom.xml files.
81
+
82
+ Args:
83
+ source_cache_dir: Root directory containing module source caches.
84
+ module_names: List of module names to include in the DAG.
85
+ module_paths: Optional explicit mapping of module_name → source directory.
86
+ If not provided, assumes source_cache_dir/{module_name}/pom.xml.
87
+
88
+ Returns:
89
+ ModuleDAG with nodes and dependency edges between known modules.
90
+ """
91
+ dag = ModuleDAG()
92
+
93
+ # Phase 1: Parse all pom.xml files and build artifact→module index
94
+ artifact_to_module: dict[str, str] = {} # "groupId:artifactId" → module_name
95
+ module_poms: dict[str, dict[str, Any]] = {}
96
+
97
+ for module_name in module_names:
98
+ if module_paths and module_name in module_paths:
99
+ module_dir = module_paths[module_name]
100
+ else:
101
+ module_dir = source_cache_dir / module_name
102
+
103
+ pom_path = _find_pom(module_dir)
104
+ if pom_path is None:
105
+ logger.debug("No pom.xml found for module %s in %s", module_name, module_dir)
106
+ dag.add_node(ModuleNode(name=module_name))
107
+ continue
108
+
109
+ try:
110
+ pom_data = parse_pom(pom_path)
111
+ except Exception as e:
112
+ logger.warning("Failed to parse pom.xml for %s: %s", module_name, e)
113
+ dag.add_node(ModuleNode(name=module_name))
114
+ continue
115
+
116
+ group_id = pom_data.get("group_id", "")
117
+ artifact_id = pom_data.get("artifact_id", "")
118
+
119
+ node = ModuleNode(
120
+ name=module_name,
121
+ artifact_id=artifact_id,
122
+ group_id=group_id,
123
+ pom_path=pom_path,
124
+ )
125
+ dag.add_node(node)
126
+
127
+ # Register artifact coordinate → module name
128
+ if group_id and artifact_id:
129
+ key = f"{group_id}:{artifact_id}"
130
+ artifact_to_module[key] = module_name
131
+ # Also register without groupId for fuzzy matching
132
+ if artifact_id:
133
+ artifact_to_module.setdefault(artifact_id, module_name)
134
+
135
+ module_poms[module_name] = pom_data
136
+
137
+ # Phase 2: Build edges from dependency declarations
138
+ for module_name, pom_data in module_poms.items():
139
+ all_deps = pom_data.get("dependencies", []) + pom_data.get("dependency_management", [])
140
+
141
+ for dep in all_deps:
142
+ dep_group = dep.get("groupId", "")
143
+ dep_artifact = dep.get("artifactId", "")
144
+ if not dep_artifact:
145
+ continue
146
+
147
+ # Skip test/provided scope dependencies for ordering purposes
148
+ scope = dep.get("scope", "compile")
149
+ if scope in ("test", "provided", "system"):
150
+ continue
151
+
152
+ # Try to match against known modules
153
+ dep_key = f"{dep_group}:{dep_artifact}" if dep_group else ""
154
+ target_module = None
155
+
156
+ if dep_key and dep_key in artifact_to_module:
157
+ target_module = artifact_to_module[dep_key]
158
+ elif dep_artifact in artifact_to_module:
159
+ target_module = artifact_to_module[dep_artifact]
160
+
161
+ if target_module and target_module != module_name:
162
+ dag.add_edge(module_name, target_module)
163
+ logger.debug(
164
+ "Module %s depends on %s (via %s:%s)",
165
+ module_name, target_module, dep_group, dep_artifact,
166
+ )
167
+
168
+ return dag
169
+
170
+
171
+ def topo_sort_modules(dag: ModuleDAG) -> list[str]:
172
+ """Topological sort: dependencies come first (Kahn's algorithm).
173
+
174
+ Modules with no dependencies are generated first. If cycles exist,
175
+ remaining modules are appended in alphabetical order at the end.
176
+
177
+ Returns:
178
+ Ordered list of module names (generate in this order).
179
+ """
180
+ if not dag.nodes:
181
+ return []
182
+
183
+ # Compute in-degree (number of dependencies each module has)
184
+ in_degree: dict[str, int] = {name: 0 for name in dag.nodes}
185
+ for module_name, deps in dag.edges.items():
186
+ # in_degree counts how many modules this module depends on
187
+ # But for topo sort we want: dependency comes first
188
+ # So we reverse: edge A→B means A depends on B, B should come first
189
+ for dep in deps:
190
+ if dep in in_degree:
191
+ pass # dep is depended upon, not the one with in-degree
192
+
193
+ # Recompute using reverse_edges for proper Kahn's
194
+ # reverse_edges[B] = {A} means A depends on B
195
+ # For generation order: B before A
196
+ # Standard Kahn's on the "depends-on" graph reversed:
197
+ # Node with in_degree 0 = no one depends on it... wrong direction.
198
+ #
199
+ # Correct approach: treat edges as "must come before" relationships.
200
+ # If A depends on B, then B must come before A.
201
+ # So edge direction for topo sort: B → A (B before A).
202
+ # in_degree of A = number of modules A depends on.
203
+
204
+ in_degree = {name: len(deps) for name, deps in dag.edges.items()}
205
+ # Ensure all nodes are present
206
+ for name in dag.nodes:
207
+ in_degree.setdefault(name, 0)
208
+
209
+ queue = deque(sorted(n for n in dag.nodes if in_degree.get(n, 0) == 0))
210
+ result: list[str] = []
211
+
212
+ while queue:
213
+ node = queue.popleft()
214
+ result.append(node)
215
+ # For each module that depends on `node`, decrease its in-degree
216
+ for dependent in dag.reverse_edges.get(node, set()):
217
+ if dependent in in_degree:
218
+ in_degree[dependent] -= 1
219
+ if in_degree[dependent] == 0:
220
+ queue.append(dependent)
221
+
222
+ # Handle cycles: append remaining nodes alphabetically
223
+ remaining = sorted(n for n in dag.nodes if n not in set(result))
224
+ if remaining:
225
+ logger.warning(
226
+ "Cycle detected in module dependencies. Appending %d modules: %s",
227
+ len(remaining), remaining,
228
+ )
229
+ result.extend(remaining)
230
+
231
+ return result
232
+
233
+
234
+ def detect_module_cycles(dag: ModuleDAG) -> list[list[str]]:
235
+ """Detect cycles in the module dependency graph.
236
+
237
+ Returns:
238
+ List of cycles, each cycle is a list of module names forming the loop.
239
+ """
240
+ visited: set[str] = set()
241
+ rec_stack: set[str] = set()
242
+ cycles: list[list[str]] = []
243
+ path: list[str] = []
244
+
245
+ def dfs(node: str) -> None:
246
+ visited.add(node)
247
+ rec_stack.add(node)
248
+ path.append(node)
249
+
250
+ for dep in dag.edges.get(node, set()):
251
+ if dep not in dag.nodes:
252
+ continue
253
+ if dep not in visited:
254
+ dfs(dep)
255
+ elif dep in rec_stack:
256
+ cycle_start = path.index(dep)
257
+ cycles.append(path[cycle_start:] + [dep])
258
+
259
+ path.pop()
260
+ rec_stack.discard(node)
261
+
262
+ for node in sorted(dag.nodes):
263
+ if node not in visited:
264
+ dfs(node)
265
+
266
+ return cycles
267
+
268
+
269
+ def get_generation_layers(dag: ModuleDAG) -> list[list[str]]:
270
+ """Group modules into parallel generation layers.
271
+
272
+ Each layer contains modules whose dependencies are all in previous layers.
273
+ Modules within the same layer can be generated in parallel.
274
+
275
+ Returns:
276
+ List of layers, each layer is a list of module names.
277
+ """
278
+ if not dag.nodes:
279
+ return []
280
+
281
+ in_degree = {name: len(deps) for name, deps in dag.edges.items()}
282
+ for name in dag.nodes:
283
+ in_degree.setdefault(name, 0)
284
+
285
+ remaining = set(dag.nodes.keys())
286
+ layers: list[list[str]] = []
287
+
288
+ while remaining:
289
+ # Find all nodes with in_degree 0 among remaining
290
+ layer = sorted(n for n in remaining if in_degree.get(n, 0) == 0)
291
+ if not layer:
292
+ # Cycle: break by taking alphabetically first remaining node
293
+ layer = [sorted(remaining)[0]]
294
+ logger.warning("Breaking cycle at module: %s", layer[0])
295
+
296
+ layers.append(layer)
297
+ remaining -= set(layer)
298
+
299
+ # Decrease in-degree for dependents
300
+ for node in layer:
301
+ for dependent in dag.reverse_edges.get(node, set()):
302
+ if dependent in remaining:
303
+ in_degree[dependent] = in_degree.get(dependent, 1) - 1
304
+
305
+ return layers
306
+
307
+
308
+ # ---------------------------------------------------------------------------
309
+ # Internal helpers
310
+ # ---------------------------------------------------------------------------
311
+
312
+
313
+ def _find_pom(module_dir: Path) -> Path | None:
314
+ """Find pom.xml in a module directory, checking common locations."""
315
+ if not module_dir.is_dir():
316
+ return None
317
+
318
+ # Direct pom.xml
319
+ direct = module_dir / "pom.xml"
320
+ if direct.exists():
321
+ return direct
322
+
323
+ # Check one level deeper (e.g., module-name/module-provider/pom.xml)
324
+ for child in module_dir.iterdir():
325
+ if child.is_dir() and not child.name.startswith("."):
326
+ candidate = child / "pom.xml"
327
+ if candidate.exists():
328
+ return candidate
329
+
330
+ return None