wikigen 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wikigen/__init__.py +7 -0
- wikigen/cli.py +690 -0
- wikigen/config.py +526 -0
- wikigen/defaults.py +78 -0
- wikigen/flows/__init__.py +1 -0
- wikigen/flows/flow.py +38 -0
- wikigen/formatter/help_formatter.py +194 -0
- wikigen/formatter/init_formatter.py +56 -0
- wikigen/formatter/output_formatter.py +290 -0
- wikigen/mcp/__init__.py +12 -0
- wikigen/mcp/chunking.py +127 -0
- wikigen/mcp/embeddings.py +69 -0
- wikigen/mcp/output_resources.py +65 -0
- wikigen/mcp/search_index.py +826 -0
- wikigen/mcp/server.py +232 -0
- wikigen/mcp/vector_index.py +297 -0
- wikigen/metadata/__init__.py +35 -0
- wikigen/metadata/logo.py +28 -0
- wikigen/metadata/project.py +28 -0
- wikigen/metadata/version.py +17 -0
- wikigen/nodes/__init__.py +1 -0
- wikigen/nodes/nodes.py +1080 -0
- wikigen/utils/__init__.py +0 -0
- wikigen/utils/adjust_headings.py +72 -0
- wikigen/utils/call_llm.py +271 -0
- wikigen/utils/crawl_github_files.py +450 -0
- wikigen/utils/crawl_local_files.py +151 -0
- wikigen/utils/llm_providers.py +101 -0
- wikigen/utils/version_check.py +84 -0
- wikigen-1.0.0.dist-info/METADATA +352 -0
- wikigen-1.0.0.dist-info/RECORD +35 -0
- wikigen-1.0.0.dist-info/WHEEL +5 -0
- wikigen-1.0.0.dist-info/entry_points.txt +2 -0
- wikigen-1.0.0.dist-info/licenses/LICENSE +21 -0
- wikigen-1.0.0.dist-info/top_level.txt +1 -0
wikigen/nodes/nodes.py
ADDED
|
@@ -0,0 +1,1080 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
import yaml
|
|
4
|
+
from pocketflow import Node, BatchNode
|
|
5
|
+
from wikigen.utils.crawl_github_files import crawl_github_files
|
|
6
|
+
from wikigen.utils.call_llm import call_llm
|
|
7
|
+
from wikigen.utils.crawl_local_files import crawl_local_files
|
|
8
|
+
from wikigen.formatter.output_formatter import (
|
|
9
|
+
Icons,
|
|
10
|
+
print_phase_start,
|
|
11
|
+
print_operation,
|
|
12
|
+
print_success,
|
|
13
|
+
print_phase_end,
|
|
14
|
+
format_size,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Helper to get content for specific file indices
|
|
19
|
+
def get_content_for_indices(files_data, indices):
|
|
20
|
+
content_map = {}
|
|
21
|
+
for i in indices:
|
|
22
|
+
if 0 <= i < len(files_data):
|
|
23
|
+
path, content = files_data[i]
|
|
24
|
+
content_map[f"{i} # {path}"] = (
|
|
25
|
+
content # Use index + path as key for context
|
|
26
|
+
)
|
|
27
|
+
return content_map
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class FetchRepo(Node):
|
|
31
|
+
def prep(self, shared):
|
|
32
|
+
repo_url = shared.get("repo_url")
|
|
33
|
+
local_dir = shared.get("local_dir")
|
|
34
|
+
project_name = shared.get("project_name")
|
|
35
|
+
|
|
36
|
+
if not project_name:
|
|
37
|
+
# Basic name derivation from URL or directory
|
|
38
|
+
if repo_url:
|
|
39
|
+
project_name = repo_url.split("/")[-1].replace(".git", "")
|
|
40
|
+
else:
|
|
41
|
+
project_name = os.path.basename(os.path.abspath(local_dir))
|
|
42
|
+
shared["project_name"] = project_name
|
|
43
|
+
|
|
44
|
+
# Get file patterns directly from shared
|
|
45
|
+
include_patterns = shared["include_patterns"]
|
|
46
|
+
exclude_patterns = shared["exclude_patterns"]
|
|
47
|
+
max_file_size = shared["max_file_size"]
|
|
48
|
+
|
|
49
|
+
return {
|
|
50
|
+
"repo_url": repo_url,
|
|
51
|
+
"local_dir": local_dir,
|
|
52
|
+
"token": shared.get("github_token"),
|
|
53
|
+
"include_patterns": include_patterns,
|
|
54
|
+
"exclude_patterns": exclude_patterns,
|
|
55
|
+
"max_file_size": max_file_size,
|
|
56
|
+
"use_relative_paths": True,
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
def exec(self, prep_res):
|
|
60
|
+
start_time = time.time()
|
|
61
|
+
|
|
62
|
+
if prep_res["repo_url"]:
|
|
63
|
+
print_phase_start("Repository Crawling", Icons.CRAWLING)
|
|
64
|
+
result = crawl_github_files(
|
|
65
|
+
repo_url=prep_res["repo_url"],
|
|
66
|
+
token=prep_res["token"],
|
|
67
|
+
include_patterns=prep_res["include_patterns"],
|
|
68
|
+
exclude_patterns=prep_res["exclude_patterns"],
|
|
69
|
+
max_file_size=prep_res["max_file_size"],
|
|
70
|
+
use_relative_paths=prep_res["use_relative_paths"],
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
print_phase_start("Directory Crawling", Icons.CRAWLING)
|
|
74
|
+
result = crawl_local_files(
|
|
75
|
+
directory=prep_res["local_dir"],
|
|
76
|
+
include_patterns=prep_res["include_patterns"],
|
|
77
|
+
exclude_patterns=prep_res["exclude_patterns"],
|
|
78
|
+
max_file_size=prep_res["max_file_size"],
|
|
79
|
+
use_relative_paths=prep_res["use_relative_paths"],
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Convert dict to list of tuples: [(path, content), ...]
|
|
83
|
+
files_list = list(result.get("files", {}).items())
|
|
84
|
+
if len(files_list) == 0:
|
|
85
|
+
raise (ValueError("Failed to fetch files"))
|
|
86
|
+
|
|
87
|
+
# Calculate total size
|
|
88
|
+
total_size = sum(len(content) for _, content in files_list)
|
|
89
|
+
elapsed = time.time() - start_time
|
|
90
|
+
|
|
91
|
+
print_success(
|
|
92
|
+
f"Complete ({len(files_list)} files, {format_size(total_size)})",
|
|
93
|
+
elapsed,
|
|
94
|
+
indent=1,
|
|
95
|
+
)
|
|
96
|
+
print_phase_end()
|
|
97
|
+
|
|
98
|
+
return files_list
|
|
99
|
+
|
|
100
|
+
def post(self, shared, prep_res, exec_res):
|
|
101
|
+
shared["files"] = exec_res # List of (path, content) tuples
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class IdentifyAbstractions(Node):
|
|
105
|
+
def prep(self, shared):
|
|
106
|
+
files_data = shared["files"]
|
|
107
|
+
project_name = shared["project_name"] # Get project name
|
|
108
|
+
language = shared.get("language", "english") # Get language
|
|
109
|
+
use_cache = shared.get("use_cache", True) # Get use_cache flag, default to True
|
|
110
|
+
max_abstraction_num = shared.get(
|
|
111
|
+
"max_abstraction_num", 10
|
|
112
|
+
) # Get max_abstraction_num, default to 10
|
|
113
|
+
|
|
114
|
+
# Helper to create context from files, respecting limits (basic example)
|
|
115
|
+
def create_llm_context(files_data):
|
|
116
|
+
context = ""
|
|
117
|
+
file_info = [] # Store tuples of (index, path)
|
|
118
|
+
for i, (path, content) in enumerate(files_data):
|
|
119
|
+
entry = f"--- File Index {i}: {path} ---\n{content}\n\n"
|
|
120
|
+
context += entry
|
|
121
|
+
file_info.append((i, path))
|
|
122
|
+
|
|
123
|
+
return context, file_info # file_info is list of (index, path)
|
|
124
|
+
|
|
125
|
+
context, file_info = create_llm_context(files_data)
|
|
126
|
+
# Format file info for the prompt (comment is just a hint for LLM)
|
|
127
|
+
file_listing_for_prompt = "\n".join(
|
|
128
|
+
[f"- {idx} # {path}" for idx, path in file_info]
|
|
129
|
+
)
|
|
130
|
+
return (
|
|
131
|
+
context,
|
|
132
|
+
file_listing_for_prompt,
|
|
133
|
+
len(files_data),
|
|
134
|
+
project_name,
|
|
135
|
+
language,
|
|
136
|
+
use_cache,
|
|
137
|
+
max_abstraction_num,
|
|
138
|
+
) # Return all parameters
|
|
139
|
+
|
|
140
|
+
def exec(self, prep_res):
|
|
141
|
+
start_time = time.time()
|
|
142
|
+
(
|
|
143
|
+
context,
|
|
144
|
+
file_listing_for_prompt,
|
|
145
|
+
file_count,
|
|
146
|
+
project_name,
|
|
147
|
+
language,
|
|
148
|
+
use_cache,
|
|
149
|
+
max_abstraction_num,
|
|
150
|
+
) = prep_res # Unpack all parameters
|
|
151
|
+
|
|
152
|
+
print_phase_start("LLM Analysis", Icons.PROCESSING)
|
|
153
|
+
print_operation("Identifying abstractions...", Icons.PROCESSING, indent=1)
|
|
154
|
+
|
|
155
|
+
# Add language instruction and hints only if not English
|
|
156
|
+
language_instruction = ""
|
|
157
|
+
name_lang_hint = ""
|
|
158
|
+
desc_lang_hint = ""
|
|
159
|
+
if language.lower() != "english":
|
|
160
|
+
language_instruction = f"IMPORTANT: Generate the `name` and `description` for each abstraction in **{language.capitalize()}** language. Do NOT use English for these fields.\n\n"
|
|
161
|
+
# Keep specific hints here as name/description are primary targets
|
|
162
|
+
name_lang_hint = f" (value in {language.capitalize()})"
|
|
163
|
+
desc_lang_hint = f" (value in {language.capitalize()})"
|
|
164
|
+
|
|
165
|
+
prompt = f"""
|
|
166
|
+
For the project `{project_name}`:
|
|
167
|
+
|
|
168
|
+
Codebase Context:
|
|
169
|
+
{context}
|
|
170
|
+
|
|
171
|
+
{language_instruction}Analyze the codebase context.
|
|
172
|
+
Identify the top 5 to {max_abstraction_num} core most important abstractions for technical documentation that helps existing and new engineers understand the codebase.
|
|
173
|
+
|
|
174
|
+
For each abstraction, provide:
|
|
175
|
+
1. A concise `name`{name_lang_hint}.
|
|
176
|
+
2. A technical `description` explaining what it does, its responsibilities, and role in the system, in around 100 words{desc_lang_hint}.
|
|
177
|
+
3. A list of relevant `file_indices` (integers) using the format `idx # path/comment`.
|
|
178
|
+
|
|
179
|
+
List of file indices and paths present in the context:
|
|
180
|
+
{file_listing_for_prompt}
|
|
181
|
+
|
|
182
|
+
Format the output as a YAML list of dictionaries:
|
|
183
|
+
|
|
184
|
+
```yaml
|
|
185
|
+
- name: |
|
|
186
|
+
Query Processing{name_lang_hint}
|
|
187
|
+
description: |
|
|
188
|
+
Handles incoming queries and routes them to appropriate handlers.
|
|
189
|
+
Responsible for parsing, validation, and initial processing of user requests.{desc_lang_hint}
|
|
190
|
+
file_indices:
|
|
191
|
+
- 0 # path/to/file1.py
|
|
192
|
+
- 3 # path/to/related.py
|
|
193
|
+
- name: |
|
|
194
|
+
Query Optimization{name_lang_hint}
|
|
195
|
+
description: |
|
|
196
|
+
Optimizes query execution by analyzing patterns and caching results.
|
|
197
|
+
Manages performance improvements and resource allocation for query processing.{desc_lang_hint}
|
|
198
|
+
file_indices:
|
|
199
|
+
- 5 # path/to/another.js
|
|
200
|
+
# ... up to {max_abstraction_num} abstractions
|
|
201
|
+
```"""
|
|
202
|
+
response = call_llm(
|
|
203
|
+
prompt, use_cache=(use_cache and self.cur_retry == 0)
|
|
204
|
+
) # Use cache only if enabled and not retrying
|
|
205
|
+
|
|
206
|
+
# --- Validation ---
|
|
207
|
+
yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
|
|
208
|
+
abstractions = yaml.safe_load(yaml_str)
|
|
209
|
+
|
|
210
|
+
if not isinstance(abstractions, list):
|
|
211
|
+
raise ValueError("LLM Output is not a list")
|
|
212
|
+
|
|
213
|
+
validated_abstractions = []
|
|
214
|
+
for item in abstractions:
|
|
215
|
+
if not isinstance(item, dict) or not all(
|
|
216
|
+
k in item for k in ["name", "description", "file_indices"]
|
|
217
|
+
):
|
|
218
|
+
raise ValueError(f"Missing keys in abstraction item: {item}")
|
|
219
|
+
if not isinstance(item["name"], str):
|
|
220
|
+
raise ValueError(f"Name is not a string in item: {item}")
|
|
221
|
+
if not isinstance(item["description"], str):
|
|
222
|
+
raise ValueError(f"Description is not a string in item: {item}")
|
|
223
|
+
if not isinstance(item["file_indices"], list):
|
|
224
|
+
raise ValueError(f"file_indices is not a list in item: {item}")
|
|
225
|
+
|
|
226
|
+
# Validate indices
|
|
227
|
+
validated_indices = []
|
|
228
|
+
for idx_entry in item["file_indices"]:
|
|
229
|
+
try:
|
|
230
|
+
if isinstance(idx_entry, int):
|
|
231
|
+
idx = idx_entry
|
|
232
|
+
elif isinstance(idx_entry, str) and "#" in idx_entry:
|
|
233
|
+
idx = int(idx_entry.split("#")[0].strip())
|
|
234
|
+
else:
|
|
235
|
+
idx = int(str(idx_entry).strip())
|
|
236
|
+
|
|
237
|
+
if not (0 <= idx < file_count):
|
|
238
|
+
raise ValueError(
|
|
239
|
+
f"Invalid file index {idx} found in item {item['name']}. Max index is {file_count - 1}."
|
|
240
|
+
)
|
|
241
|
+
validated_indices.append(idx)
|
|
242
|
+
except (ValueError, TypeError):
|
|
243
|
+
raise ValueError(
|
|
244
|
+
f"Could not parse index from entry: {idx_entry} in item {item['name']}"
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
item["files"] = sorted(list(set(validated_indices)))
|
|
248
|
+
# Store only the required fields
|
|
249
|
+
validated_abstractions.append(
|
|
250
|
+
{
|
|
251
|
+
"name": item["name"], # Potentially translated name
|
|
252
|
+
"description": item[
|
|
253
|
+
"description"
|
|
254
|
+
], # Potentially translated description
|
|
255
|
+
"files": item["files"],
|
|
256
|
+
}
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
elapsed = time.time() - start_time
|
|
260
|
+
print_success(
|
|
261
|
+
f"Found {len(validated_abstractions)} abstractions", elapsed, indent=2
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
return validated_abstractions
|
|
265
|
+
|
|
266
|
+
def post(self, shared, prep_res, exec_res):
|
|
267
|
+
shared["abstractions"] = (
|
|
268
|
+
exec_res # List of {"name": str, "description": str, "files": [int]}
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
class AnalyzeRelationships(Node):
|
|
273
|
+
def prep(self, shared):
|
|
274
|
+
abstractions = shared[
|
|
275
|
+
"abstractions"
|
|
276
|
+
] # Now contains 'files' list of indices, name/description potentially translated
|
|
277
|
+
files_data = shared["files"]
|
|
278
|
+
project_name = shared["project_name"] # Get project name
|
|
279
|
+
language = shared.get("language", "english") # Get language
|
|
280
|
+
use_cache = shared.get("use_cache", True) # Get use_cache flag, default to True
|
|
281
|
+
|
|
282
|
+
# Get the actual number of abstractions directly
|
|
283
|
+
num_abstractions = len(abstractions)
|
|
284
|
+
|
|
285
|
+
# Create context with abstraction names, indices, descriptions, and relevant file snippets
|
|
286
|
+
context = "Identified Abstractions:\\n"
|
|
287
|
+
all_relevant_indices = set()
|
|
288
|
+
abstraction_info_for_prompt = []
|
|
289
|
+
for i, abstr in enumerate(abstractions):
|
|
290
|
+
# Use 'files' which contains indices directly
|
|
291
|
+
file_indices_str = ", ".join(map(str, abstr["files"]))
|
|
292
|
+
# Abstraction name and description might be translated already
|
|
293
|
+
info_line = f"- Index {i}: {abstr['name']} (Relevant file indices: [{file_indices_str}])\\n Description: {abstr['description']}"
|
|
294
|
+
context += info_line + "\\n"
|
|
295
|
+
abstraction_info_for_prompt.append(
|
|
296
|
+
f"{i} # {abstr['name']}"
|
|
297
|
+
) # Use potentially translated name here too
|
|
298
|
+
all_relevant_indices.update(abstr["files"])
|
|
299
|
+
|
|
300
|
+
context += "\\nRelevant File Snippets (Referenced by Index and Path):\\n"
|
|
301
|
+
# Get content for relevant files using helper
|
|
302
|
+
relevant_files_content_map = get_content_for_indices(
|
|
303
|
+
files_data, sorted(list(all_relevant_indices))
|
|
304
|
+
)
|
|
305
|
+
# Format file content for context
|
|
306
|
+
file_context_str = "\\n\\n".join(
|
|
307
|
+
f"--- File: {idx_path} ---\\n{content}"
|
|
308
|
+
for idx_path, content in relevant_files_content_map.items()
|
|
309
|
+
)
|
|
310
|
+
context += file_context_str
|
|
311
|
+
|
|
312
|
+
return (
|
|
313
|
+
context,
|
|
314
|
+
"\n".join(abstraction_info_for_prompt),
|
|
315
|
+
num_abstractions, # Pass the actual count
|
|
316
|
+
project_name,
|
|
317
|
+
language,
|
|
318
|
+
use_cache,
|
|
319
|
+
) # Return use_cache
|
|
320
|
+
|
|
321
|
+
def exec(self, prep_res):
|
|
322
|
+
start_time = time.time()
|
|
323
|
+
(
|
|
324
|
+
context,
|
|
325
|
+
abstraction_listing,
|
|
326
|
+
num_abstractions, # Receive the actual count
|
|
327
|
+
project_name,
|
|
328
|
+
language,
|
|
329
|
+
use_cache,
|
|
330
|
+
) = prep_res # Unpack use_cache
|
|
331
|
+
|
|
332
|
+
print_operation("Analyzing relationships...", Icons.ANALYZING, indent=1)
|
|
333
|
+
|
|
334
|
+
# Add language instruction and hints only if not English
|
|
335
|
+
language_instruction = ""
|
|
336
|
+
lang_hint = ""
|
|
337
|
+
list_lang_note = ""
|
|
338
|
+
if language.lower() != "english":
|
|
339
|
+
language_instruction = f"IMPORTANT: Generate the `summary` and relationship `label` fields in **{language.capitalize()}** language. Do NOT use English for these fields.\n\n"
|
|
340
|
+
lang_hint = f" (in {language.capitalize()})"
|
|
341
|
+
list_lang_note = f" (Names might be in {language.capitalize()})" # Note for the input list
|
|
342
|
+
|
|
343
|
+
prompt = f"""
|
|
344
|
+
Based on the following abstractions and relevant code snippets from the project `{project_name}`:
|
|
345
|
+
|
|
346
|
+
List of Abstraction Indices and Names{list_lang_note}:
|
|
347
|
+
{abstraction_listing}
|
|
348
|
+
|
|
349
|
+
Context (Abstractions, Descriptions, Code):
|
|
350
|
+
{context}
|
|
351
|
+
|
|
352
|
+
{language_instruction}Please provide:
|
|
353
|
+
1. A high-level technical `summary` of the project's purpose, architecture, functionalities and their responsibilities{lang_hint}. Use markdown formatting with **bold** and *italic* text to highlight important concepts.
|
|
354
|
+
2. A list (`relationships`) describing the key interactions between these abstractions. For each relationship, specify:
|
|
355
|
+
- `from_abstraction`: Index of the source abstraction (e.g., `0 # AbstractionName1`)
|
|
356
|
+
- `to_abstraction`: Index of the target abstraction (e.g., `1 # AbstractionName2`)
|
|
357
|
+
- `label`: A brief label for the interaction **in just a few words**{lang_hint} (e.g., "Manages", "Inherits", "Uses").
|
|
358
|
+
Ideally the relationship should be backed by one abstraction calling or passing parameters to another.
|
|
359
|
+
Simplify the relationship and exclude those non-important ones.
|
|
360
|
+
|
|
361
|
+
IMPORTANT: Make sure EVERY abstraction is involved in at least ONE relationship (either as source or target). Each abstraction index must appear at least once across all relationships.
|
|
362
|
+
|
|
363
|
+
Format the output as YAML:
|
|
364
|
+
|
|
365
|
+
```yaml
|
|
366
|
+
summary: |
|
|
367
|
+
A technical overview of the project architecture{lang_hint}.
|
|
368
|
+
Can span multiple lines with **bold** and *italic* for emphasis.
|
|
369
|
+
relationships:
|
|
370
|
+
- from_abstraction: 0 # AbstractionName1
|
|
371
|
+
to_abstraction: 1 # AbstractionName2
|
|
372
|
+
label: "Manages"{lang_hint}
|
|
373
|
+
- from_abstraction: 2 # AbstractionName3
|
|
374
|
+
to_abstraction: 0 # AbstractionName1
|
|
375
|
+
label: "Provides config"{lang_hint}
|
|
376
|
+
# ... other relationships
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
Now, provide the YAML output:
|
|
380
|
+
"""
|
|
381
|
+
response = call_llm(
|
|
382
|
+
prompt, use_cache=(use_cache and self.cur_retry == 0)
|
|
383
|
+
) # Use cache only if enabled and not retrying
|
|
384
|
+
|
|
385
|
+
# --- Validation ---
|
|
386
|
+
yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
|
|
387
|
+
relationships_data = yaml.safe_load(yaml_str)
|
|
388
|
+
|
|
389
|
+
if not isinstance(relationships_data, dict) or not all(
|
|
390
|
+
k in relationships_data for k in ["summary", "relationships"]
|
|
391
|
+
):
|
|
392
|
+
raise ValueError(
|
|
393
|
+
"LLM output is not a dict or missing keys ('summary', 'relationships')"
|
|
394
|
+
)
|
|
395
|
+
if not isinstance(relationships_data["summary"], str):
|
|
396
|
+
raise ValueError("summary is not a string")
|
|
397
|
+
if not isinstance(relationships_data["relationships"], list):
|
|
398
|
+
raise ValueError("relationships is not a list")
|
|
399
|
+
|
|
400
|
+
# Validate relationships structure
|
|
401
|
+
validated_relationships = []
|
|
402
|
+
for rel in relationships_data["relationships"]:
|
|
403
|
+
# Check for 'label' key
|
|
404
|
+
if not isinstance(rel, dict) or not all(
|
|
405
|
+
k in rel for k in ["from_abstraction", "to_abstraction", "label"]
|
|
406
|
+
):
|
|
407
|
+
raise ValueError(
|
|
408
|
+
f"Missing keys (expected from_abstraction, to_abstraction, label) in relationship item: {rel}"
|
|
409
|
+
)
|
|
410
|
+
# Validate 'label' is a string
|
|
411
|
+
if not isinstance(rel["label"], str):
|
|
412
|
+
raise ValueError(f"Relationship label is not a string: {rel}")
|
|
413
|
+
|
|
414
|
+
# Validate indices
|
|
415
|
+
try:
|
|
416
|
+
from_idx = int(str(rel["from_abstraction"]).split("#")[0].strip())
|
|
417
|
+
to_idx = int(str(rel["to_abstraction"]).split("#")[0].strip())
|
|
418
|
+
if not (
|
|
419
|
+
0 <= from_idx < num_abstractions and 0 <= to_idx < num_abstractions
|
|
420
|
+
):
|
|
421
|
+
raise ValueError(
|
|
422
|
+
f"Invalid index in relationship: from={from_idx}, to={to_idx}. Max index is {num_abstractions-1}."
|
|
423
|
+
)
|
|
424
|
+
validated_relationships.append(
|
|
425
|
+
{
|
|
426
|
+
"from": from_idx,
|
|
427
|
+
"to": to_idx,
|
|
428
|
+
"label": rel["label"], # Potentially translated label
|
|
429
|
+
}
|
|
430
|
+
)
|
|
431
|
+
except (ValueError, TypeError):
|
|
432
|
+
raise ValueError(f"Could not parse indices from relationship: {rel}")
|
|
433
|
+
|
|
434
|
+
elapsed = time.time() - start_time
|
|
435
|
+
print_success("Generated project summary", elapsed, indent=2)
|
|
436
|
+
|
|
437
|
+
return {
|
|
438
|
+
"summary": relationships_data["summary"], # Potentially translated summary
|
|
439
|
+
"details": validated_relationships, # Store validated, index-based relationships with potentially translated labels
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
def post(self, shared, prep_res, exec_res):
|
|
443
|
+
# Structure is now {"summary": str, "details": [{"from": int, "to": int, "label": str}]}
|
|
444
|
+
# Summary and label might be translated
|
|
445
|
+
shared["relationships"] = exec_res
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
class OrderComponents(Node):
|
|
449
|
+
def prep(self, shared):
|
|
450
|
+
abstractions = shared["abstractions"] # Name/description might be translated
|
|
451
|
+
relationships = shared["relationships"] # Summary/label might be translated
|
|
452
|
+
project_name = shared["project_name"] # Get project name
|
|
453
|
+
language = shared.get("language", "english") # Get language
|
|
454
|
+
use_cache = shared.get("use_cache", True) # Get use_cache flag, default to True
|
|
455
|
+
|
|
456
|
+
# Prepare context for the LLM
|
|
457
|
+
abstraction_info_for_prompt = []
|
|
458
|
+
for i, a in enumerate(abstractions):
|
|
459
|
+
abstraction_info_for_prompt.append(
|
|
460
|
+
f"- {i} # {a['name']}"
|
|
461
|
+
) # Use potentially translated name
|
|
462
|
+
abstraction_listing = "\n".join(abstraction_info_for_prompt)
|
|
463
|
+
|
|
464
|
+
# Use potentially translated summary and labels
|
|
465
|
+
summary_note = ""
|
|
466
|
+
if language.lower() != "english":
|
|
467
|
+
summary_note = (
|
|
468
|
+
f" (Note: Project Summary might be in {language.capitalize()})"
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
context = f"Project Summary{summary_note}:\n{relationships['summary']}\n\n"
|
|
472
|
+
context += "Relationships (Indices refer to abstractions above):\n"
|
|
473
|
+
for rel in relationships["details"]:
|
|
474
|
+
from_name = abstractions[rel["from"]]["name"]
|
|
475
|
+
to_name = abstractions[rel["to"]]["name"]
|
|
476
|
+
# Use potentially translated 'label'
|
|
477
|
+
context += f"- From {rel['from']} ({from_name}) to {rel['to']} ({to_name}): {rel['label']}\n" # Label might be translated
|
|
478
|
+
|
|
479
|
+
list_lang_note = ""
|
|
480
|
+
if language.lower() != "english":
|
|
481
|
+
list_lang_note = f" (Names might be in {language.capitalize()})"
|
|
482
|
+
|
|
483
|
+
return (
|
|
484
|
+
abstraction_listing,
|
|
485
|
+
context,
|
|
486
|
+
len(abstractions),
|
|
487
|
+
project_name,
|
|
488
|
+
list_lang_note,
|
|
489
|
+
use_cache,
|
|
490
|
+
) # Return use_cache
|
|
491
|
+
|
|
492
|
+
def exec(self, prep_res):
|
|
493
|
+
start_time = time.time()
|
|
494
|
+
(
|
|
495
|
+
abstraction_listing,
|
|
496
|
+
context,
|
|
497
|
+
num_abstractions,
|
|
498
|
+
project_name,
|
|
499
|
+
list_lang_note,
|
|
500
|
+
use_cache,
|
|
501
|
+
) = prep_res # Unpack use_cache
|
|
502
|
+
|
|
503
|
+
print_operation("Determining component order...", Icons.ORDERING, indent=1)
|
|
504
|
+
# No language variation needed here in prompt instructions, just ordering based on structure
|
|
505
|
+
# The input names might be translated, hence the note.
|
|
506
|
+
prompt = f"""
|
|
507
|
+
Given the following project abstractions and their relationships for the project ```` {project_name} ````:
|
|
508
|
+
|
|
509
|
+
Abstractions (Index # Name){list_lang_note}:
|
|
510
|
+
{abstraction_listing}
|
|
511
|
+
|
|
512
|
+
Context about relationships and project summary:
|
|
513
|
+
{context}
|
|
514
|
+
|
|
515
|
+
If you are going to create technical documentation for ```` {project_name} ````, what is the best order to document these components, from first to last?
|
|
516
|
+
Ideally, first document those that are the most important or foundational, perhaps user-facing concepts or entry points. Then move to more detailed, lower-level implementation details or supporting concepts.
|
|
517
|
+
|
|
518
|
+
Output the ordered list of abstraction indices, including the name in a comment for clarity. Use the format `idx # AbstractionName`.
|
|
519
|
+
|
|
520
|
+
```yaml
|
|
521
|
+
- 2 # FoundationalConcept
|
|
522
|
+
- 0 # CoreClassA
|
|
523
|
+
- 1 # CoreClassB (uses CoreClassA)
|
|
524
|
+
- ...
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
Now, provide the YAML output:
|
|
528
|
+
"""
|
|
529
|
+
response = call_llm(
|
|
530
|
+
prompt, use_cache=(use_cache and self.cur_retry == 0)
|
|
531
|
+
) # Use cache only if enabled and not retrying
|
|
532
|
+
|
|
533
|
+
# --- Validation ---
|
|
534
|
+
yaml_str = response.strip().split("```yaml")[1].split("```")[0].strip()
|
|
535
|
+
ordered_indices_raw = yaml.safe_load(yaml_str)
|
|
536
|
+
|
|
537
|
+
if not isinstance(ordered_indices_raw, list):
|
|
538
|
+
raise ValueError("LLM output is not a list")
|
|
539
|
+
|
|
540
|
+
ordered_indices = []
|
|
541
|
+
seen_indices = set()
|
|
542
|
+
for entry in ordered_indices_raw:
|
|
543
|
+
try:
|
|
544
|
+
if isinstance(entry, int):
|
|
545
|
+
idx = entry
|
|
546
|
+
elif isinstance(entry, str) and "#" in entry:
|
|
547
|
+
idx = int(entry.split("#")[0].strip())
|
|
548
|
+
else:
|
|
549
|
+
idx = int(str(entry).strip())
|
|
550
|
+
|
|
551
|
+
if not (0 <= idx < num_abstractions):
|
|
552
|
+
raise ValueError(
|
|
553
|
+
f"Invalid index {idx} in ordered list. Max index is {num_abstractions-1}."
|
|
554
|
+
)
|
|
555
|
+
if idx in seen_indices:
|
|
556
|
+
raise ValueError(f"Duplicate index {idx} found in ordered list.")
|
|
557
|
+
ordered_indices.append(idx)
|
|
558
|
+
seen_indices.add(idx)
|
|
559
|
+
|
|
560
|
+
except (ValueError, TypeError):
|
|
561
|
+
raise ValueError(
|
|
562
|
+
f"Could not parse index from ordered list entry: {entry}"
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
# Check if all abstractions are included
|
|
566
|
+
if len(ordered_indices) != num_abstractions:
|
|
567
|
+
raise ValueError(
|
|
568
|
+
f"Ordered list length ({len(ordered_indices)}) does not match number of abstractions ({num_abstractions}). Missing indices: {set(range(num_abstractions)) - seen_indices}"
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
elapsed = time.time() - start_time
|
|
572
|
+
print_success(f"Order determined: {ordered_indices}", elapsed, indent=2)
|
|
573
|
+
print_phase_end()
|
|
574
|
+
|
|
575
|
+
return ordered_indices # Return the list of indices
|
|
576
|
+
|
|
577
|
+
def post(self, shared, prep_res, exec_res):
|
|
578
|
+
# exec_res is already the list of ordered indices
|
|
579
|
+
shared["component_order"] = exec_res # List of indices
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
class WriteComponents(BatchNode):
|
|
583
|
+
def prep(self, shared):
|
|
584
|
+
component_order = shared["component_order"] # List of indices
|
|
585
|
+
abstractions = shared[
|
|
586
|
+
"abstractions"
|
|
587
|
+
] # List of {"name": str, "description": str, "files": [int]}
|
|
588
|
+
files_data = shared["files"] # List of (path, content) tuples
|
|
589
|
+
language = shared.get("language", "english")
|
|
590
|
+
use_cache = shared.get("use_cache", True) # Get use_cache flag, default to True
|
|
591
|
+
documentation_mode = shared.get(
|
|
592
|
+
"documentation_mode", "minimal"
|
|
593
|
+
) # Get documentation_mode, default to minimal
|
|
594
|
+
|
|
595
|
+
# Get already written components to provide context
|
|
596
|
+
# We store them temporarily during the batch run, not in shared memory yet
|
|
597
|
+
# The 'previous_components_summary' will be built progressively in the exec context
|
|
598
|
+
self.components_written_so_far = (
|
|
599
|
+
[]
|
|
600
|
+
) # Use instance variable for temporary storage across exec calls
|
|
601
|
+
|
|
602
|
+
# Create a complete list of all components
|
|
603
|
+
all_components = []
|
|
604
|
+
component_filenames = {} # Store component filename mapping for linking
|
|
605
|
+
for i, abstraction_index in enumerate(component_order):
|
|
606
|
+
if 0 <= abstraction_index < len(abstractions):
|
|
607
|
+
component_num = i + 1
|
|
608
|
+
component_name = abstractions[abstraction_index][
|
|
609
|
+
"name"
|
|
610
|
+
] # Potentially translated name
|
|
611
|
+
# Create safe filename (from potentially translated name)
|
|
612
|
+
safe_name = "".join(
|
|
613
|
+
c if c.isalnum() else "_" for c in component_name
|
|
614
|
+
).lower()
|
|
615
|
+
filename = f"{i+1:02d}_{safe_name}.md"
|
|
616
|
+
# Format with link (using potentially translated name)
|
|
617
|
+
# Strip newlines from component name to prevent broken markdown links
|
|
618
|
+
clean_component_name = component_name.replace("\n", " ").strip()
|
|
619
|
+
all_components.append(
|
|
620
|
+
f"{component_num}. [{clean_component_name}]({filename})"
|
|
621
|
+
)
|
|
622
|
+
# Store mapping of component index to filename for linking
|
|
623
|
+
component_filenames[abstraction_index] = {
|
|
624
|
+
"num": component_num,
|
|
625
|
+
"name": component_name,
|
|
626
|
+
"filename": filename,
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
full_component_listing = "\n".join(all_components)
|
|
630
|
+
|
|
631
|
+
items_to_process = []
|
|
632
|
+
for i, abstraction_index in enumerate(component_order):
|
|
633
|
+
if 0 <= abstraction_index < len(abstractions):
|
|
634
|
+
abstraction_details = abstractions[
|
|
635
|
+
abstraction_index
|
|
636
|
+
] # Contains potentially translated name/desc
|
|
637
|
+
# Use 'files' (list of indices) directly
|
|
638
|
+
related_file_indices = abstraction_details.get("files", [])
|
|
639
|
+
# Get content using helper, passing indices
|
|
640
|
+
related_files_content_map = get_content_for_indices(
|
|
641
|
+
files_data, related_file_indices
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
prev_component = None
|
|
645
|
+
if i > 0:
|
|
646
|
+
prev_idx = component_order[i - 1]
|
|
647
|
+
prev_component = component_filenames[prev_idx]
|
|
648
|
+
|
|
649
|
+
next_component = None
|
|
650
|
+
if i < len(component_order) - 1:
|
|
651
|
+
next_idx = component_order[i + 1]
|
|
652
|
+
next_component = component_filenames[next_idx]
|
|
653
|
+
|
|
654
|
+
items_to_process.append(
|
|
655
|
+
{
|
|
656
|
+
"component_num": i + 1,
|
|
657
|
+
"abstraction_index": abstraction_index,
|
|
658
|
+
"abstraction_details": abstraction_details, # Has potentially translated name/desc
|
|
659
|
+
"related_files_content_map": related_files_content_map,
|
|
660
|
+
"project_name": shared["project_name"], # Add project name
|
|
661
|
+
"full_component_listing": full_component_listing,
|
|
662
|
+
"component_filenames": component_filenames,
|
|
663
|
+
"prev_component": prev_component,
|
|
664
|
+
"next_component": next_component,
|
|
665
|
+
"language": language,
|
|
666
|
+
"use_cache": use_cache,
|
|
667
|
+
"documentation_mode": documentation_mode,
|
|
668
|
+
}
|
|
669
|
+
)
|
|
670
|
+
else:
|
|
671
|
+
print(
|
|
672
|
+
f"Warning: Invalid abstraction index {abstraction_index} in component_order. Skipping."
|
|
673
|
+
)
|
|
674
|
+
|
|
675
|
+
print_phase_start("Content Generation", Icons.WRITING)
|
|
676
|
+
return items_to_process # Iterable for BatchNode
|
|
677
|
+
|
|
678
|
+
def exec(self, item):
|
|
679
|
+
start_time = time.time()
|
|
680
|
+
# This runs for each item prepared above
|
|
681
|
+
abstraction_name = item["abstraction_details"][
|
|
682
|
+
"name"
|
|
683
|
+
] # Potentially translated name
|
|
684
|
+
abstraction_description = item["abstraction_details"][
|
|
685
|
+
"description"
|
|
686
|
+
] # Potentially translated description
|
|
687
|
+
component_num = item["component_num"]
|
|
688
|
+
project_name = item.get("project_name")
|
|
689
|
+
language = item.get("language", "english")
|
|
690
|
+
use_cache = item.get("use_cache", True) # Read use_cache from item
|
|
691
|
+
documentation_mode = item.get(
|
|
692
|
+
"documentation_mode", "minimal"
|
|
693
|
+
) # Read documentation_mode from item
|
|
694
|
+
|
|
695
|
+
# Prepare file context string from the map
|
|
696
|
+
file_context_str = "\n\n".join(
|
|
697
|
+
f"--- File: {idx_path.split('# ')[1] if '# ' in idx_path else idx_path} ---\n{content}"
|
|
698
|
+
for idx_path, content in item["related_files_content_map"].items()
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
# Get summary of components written *before* this one
|
|
702
|
+
# Use the temporary instance variable
|
|
703
|
+
previous_components_summary = "\n---\n".join(self.components_written_so_far)
|
|
704
|
+
|
|
705
|
+
# Add language instruction and context notes only if not English
|
|
706
|
+
language_instruction = ""
|
|
707
|
+
concept_details_note = ""
|
|
708
|
+
structure_note = ""
|
|
709
|
+
prev_summary_note = ""
|
|
710
|
+
instruction_lang_note = ""
|
|
711
|
+
mermaid_lang_note = ""
|
|
712
|
+
code_comment_note = ""
|
|
713
|
+
link_lang_note = ""
|
|
714
|
+
tone_note = ""
|
|
715
|
+
if language.lower() != "english":
|
|
716
|
+
lang_cap = language.capitalize()
|
|
717
|
+
language_instruction = f"IMPORTANT: Write this ENTIRE documentation component in **{lang_cap}**. Some input context (like concept name, description, component list, previous summary) might already be in {lang_cap}, but you MUST translate ALL other generated content including explanations, examples, technical terms, and potentially code comments into {lang_cap}. DO NOT use English anywhere except in code syntax, required proper nouns, or when specified. The entire output MUST be in {lang_cap}.\n\n"
|
|
718
|
+
concept_details_note = f" (Note: Provided in {lang_cap})"
|
|
719
|
+
structure_note = f" (Note: Component names might be in {lang_cap})"
|
|
720
|
+
prev_summary_note = f" (Note: This summary might be in {lang_cap})"
|
|
721
|
+
instruction_lang_note = f" (in {lang_cap})"
|
|
722
|
+
mermaid_lang_note = f" (Use {lang_cap} for labels/text if appropriate)"
|
|
723
|
+
code_comment_note = f" (Translate to {lang_cap} if possible, otherwise keep minimal English for clarity)"
|
|
724
|
+
link_lang_note = (
|
|
725
|
+
f" (Use the {lang_cap} component title from the structure above)"
|
|
726
|
+
)
|
|
727
|
+
tone_note = f" (appropriate for {lang_cap} readers)"
|
|
728
|
+
|
|
729
|
+
# Build prompt based on mode
|
|
730
|
+
if documentation_mode == "minimal":
|
|
731
|
+
# Minimal mode: shorter, more direct instructions
|
|
732
|
+
prompt = f"""
|
|
733
|
+
{language_instruction}Write short and concise intent-focused documentation. Be brief but keep all critical info: architecture, design, components, integrations. Focus on key facts and intent. Avoid verbosity. Keep structure but be direct.
|
|
734
|
+
|
|
735
|
+
Write technical documentation (in Markdown format) for engineers working with the component "{abstraction_name}" in the project `{project_name}`. This is Component {component_num}.
|
|
736
|
+
|
|
737
|
+
Component/Concept Details{concept_details_note}:
|
|
738
|
+
- Name: {abstraction_name}
|
|
739
|
+
- Description:
|
|
740
|
+
{abstraction_description}
|
|
741
|
+
|
|
742
|
+
Complete Documentation Structure{structure_note}:
|
|
743
|
+
{item["full_component_listing"]}
|
|
744
|
+
|
|
745
|
+
Context from previous components{prev_summary_note}:
|
|
746
|
+
{previous_components_summary if previous_components_summary else "This is the first component."}
|
|
747
|
+
|
|
748
|
+
Relevant Code Snippets (Code itself remains unchanged):
|
|
749
|
+
{file_context_str if file_context_str else "No specific code snippets provided for this abstraction."}
|
|
750
|
+
|
|
751
|
+
Instructions for the documentation (Generate content in {language.capitalize()} unless specified otherwise):
|
|
752
|
+
- Start with clear heading: `# Component {component_num}: {abstraction_name}`. Use the provided component name.
|
|
753
|
+
|
|
754
|
+
- If not first component, reference previous component{instruction_lang_note} with Markdown link{link_lang_note}.
|
|
755
|
+
|
|
756
|
+
- Why it exists{instruction_lang_note}: core responsibilities, purpose in architecture.
|
|
757
|
+
|
|
758
|
+
- What it does{instruction_lang_note}: key responsibilities, how it works, integration points.
|
|
759
|
+
|
|
760
|
+
- Avoid code blocks if not critical. If code blocks are needed, keep them BELOW 5 lines. Simplify aggressively. Use comments{code_comment_note} to skip non-essential details. Explain after each block{instruction_lang_note}. No imports/packages.
|
|
761
|
+
|
|
762
|
+
- Internal implementation{instruction_lang_note}: step-by-step walkthrough (code-light). Use simple sequenceDiagram (max 5 participants). If participant name has space: `participant QP as Query Processing`. {mermaid_lang_note}.
|
|
763
|
+
|
|
764
|
+
- IMPORTANT: Link to other components: [Component Title](filename.md). Use Complete Documentation Structure for filename/title{link_lang_note}.
|
|
765
|
+
|
|
766
|
+
- Use mermaid diagrams for complex concepts (```mermaid``` format). {mermaid_lang_note}.
|
|
767
|
+
|
|
768
|
+
- Key takeaways{instruction_lang_note}: what it handles, common patterns, integration points. Link to next component if exists{link_lang_note}.
|
|
769
|
+
|
|
770
|
+
- Tone: technical and precise{tone_note}.
|
|
771
|
+
|
|
772
|
+
- Output *only* Markdown content (DONT NEED ```markdown``` tags).
|
|
773
|
+
"""
|
|
774
|
+
else:
|
|
775
|
+
# Comprehensive mode: ORIGINAL prompt unchanged
|
|
776
|
+
prompt = f"""
|
|
777
|
+
{language_instruction}Write technical documentation (in Markdown format) for engineers working with the component "{abstraction_name}" in the project `{project_name}`. This is Component {component_num}.
|
|
778
|
+
|
|
779
|
+
Component/Concept Details{concept_details_note}:
|
|
780
|
+
- Name: {abstraction_name}
|
|
781
|
+
- Description:
|
|
782
|
+
{abstraction_description}
|
|
783
|
+
|
|
784
|
+
Complete Documentation Structure{structure_note}:
|
|
785
|
+
{item["full_component_listing"]}
|
|
786
|
+
|
|
787
|
+
Context from previous components{prev_summary_note}:
|
|
788
|
+
{previous_components_summary if previous_components_summary else "This is the first component."}
|
|
789
|
+
|
|
790
|
+
Relevant Code Snippets (Code itself remains unchanged):
|
|
791
|
+
{file_context_str if file_context_str else "No specific code snippets provided for this abstraction."}
|
|
792
|
+
|
|
793
|
+
Instructions for the documentation (Generate content in {language.capitalize()} unless specified otherwise):
|
|
794
|
+
- Start with a clear heading (e.g., `# Component {component_num}: {abstraction_name}`). Use the provided component name.
|
|
795
|
+
|
|
796
|
+
- If this is not the first component, begin with a brief reference to the previous component{instruction_lang_note}, linking to it with a proper Markdown link using its name{link_lang_note}.
|
|
797
|
+
|
|
798
|
+
- Begin with why this component exists{instruction_lang_note} - what problem it solves and its core responsibilities. Focus on the component's purpose in the system architecture.
|
|
799
|
+
|
|
800
|
+
- Document what this component does{instruction_lang_note} - its key responsibilities, how it works, and how it integrates with other components.
|
|
801
|
+
|
|
802
|
+
- If the component is complex, break it down into key concepts. Explain each concept with technical precision{instruction_lang_note}.
|
|
803
|
+
|
|
804
|
+
- Each code block should be BELOW 10 lines! If longer code blocks are needed, break them down into smaller pieces and walk through them one-by-one. Aggresively simplify the code to make it minimal. Use comments{code_comment_note} to skip non-important implementation details. Each code block should have a solid explanation right after it{instruction_lang_note}. Make sure you dont include Imports or packages in the code blocks, keep it focused on the key logic always.
|
|
805
|
+
|
|
806
|
+
- Describe the internal implementation to help understand what's under the hood{instruction_lang_note}. First provide a non-code or code-light walkthrough on what happens step-by-step when the abstraction is called{instruction_lang_note}. It's recommended to use a simple sequenceDiagram - keep it minimal with at most 5 participants to ensure clarity. If participant name has space, use: `participant QP as Query Processing`. {mermaid_lang_note}.
|
|
807
|
+
|
|
808
|
+
- Then dive deeper into code for the internal implementation with references to files. Provide example code blocks, but make them similarly simple and beginner-friendly. Dont include imports or packages in the code blocks. Explain{instruction_lang_note}.
|
|
809
|
+
|
|
810
|
+
- IMPORTANT: When you need to refer to other core components covered in other sections, ALWAYS use proper Markdown links like this: [Component Title](filename.md). Use the Complete Documentation Structure above to find the correct filename and the component title{link_lang_note}. Translate the surrounding text.
|
|
811
|
+
|
|
812
|
+
- Use mermaid diagrams to illustrate complex concepts (```mermaid``` format). {mermaid_lang_note}.
|
|
813
|
+
|
|
814
|
+
- Provide concrete code examples from the codebase showing actual usage and implementation patterns{instruction_lang_note}.
|
|
815
|
+
|
|
816
|
+
- End the component documentation with key takeaways{instruction_lang_note}: what this component handles, common usage patterns, and integration points. If there is a next component, use a proper Markdown link: [Next Component Title](next_component_filename){link_lang_note}.
|
|
817
|
+
|
|
818
|
+
- Ensure the tone is technical and precise{tone_note}.
|
|
819
|
+
|
|
820
|
+
- Output *only* the Markdown content for this component.
|
|
821
|
+
|
|
822
|
+
Now, directly provide technical Markdown documentation (DON'T need ```markdown``` tags):
|
|
823
|
+
"""
|
|
824
|
+
component_content = call_llm(
|
|
825
|
+
prompt, use_cache=(use_cache and self.cur_retry == 0)
|
|
826
|
+
) # Use cache only if enabled and not retrying
|
|
827
|
+
|
|
828
|
+
elapsed = time.time() - start_time
|
|
829
|
+
|
|
830
|
+
# Store timing for later summary
|
|
831
|
+
if not hasattr(self, "component_times"):
|
|
832
|
+
self.component_times = []
|
|
833
|
+
self.component_times.append(elapsed)
|
|
834
|
+
|
|
835
|
+
# Show the operation with timing
|
|
836
|
+
print_operation(
|
|
837
|
+
f"Component {component_num}: {abstraction_name}",
|
|
838
|
+
Icons.WRITING,
|
|
839
|
+
indent=1,
|
|
840
|
+
elapsed_time=elapsed,
|
|
841
|
+
)
|
|
842
|
+
# Basic validation/cleanup
|
|
843
|
+
actual_heading = f"# Component {component_num}: {abstraction_name}" # Use potentially translated name
|
|
844
|
+
if not component_content.strip().startswith(f"# Component {component_num}"):
|
|
845
|
+
# Add heading if missing or incorrect, trying to preserve content
|
|
846
|
+
lines = component_content.strip().split("\n")
|
|
847
|
+
if lines and lines[0].strip().startswith(
|
|
848
|
+
"#"
|
|
849
|
+
): # If there's some heading, replace it
|
|
850
|
+
lines[0] = actual_heading
|
|
851
|
+
component_content = "\n".join(lines)
|
|
852
|
+
else: # Otherwise, prepend it
|
|
853
|
+
component_content = f"{actual_heading}\n\n{component_content}"
|
|
854
|
+
|
|
855
|
+
# Add the generated content to our temporary list for the next iteration's context
|
|
856
|
+
self.components_written_so_far.append(component_content)
|
|
857
|
+
|
|
858
|
+
return component_content # Return the Markdown string (potentially translated)
|
|
859
|
+
|
|
860
|
+
def post(self, shared, prep_res, exec_res_list):
|
|
861
|
+
# exec_res_list contains the generated Markdown for each component, in order
|
|
862
|
+
shared["components"] = exec_res_list
|
|
863
|
+
|
|
864
|
+
# Calculate total time
|
|
865
|
+
total_time = (
|
|
866
|
+
sum(self.component_times) if hasattr(self, "component_times") else 0
|
|
867
|
+
)
|
|
868
|
+
print_success(f"{len(exec_res_list)} components written", total_time, indent=1)
|
|
869
|
+
print_phase_end()
|
|
870
|
+
|
|
871
|
+
# Cleanup
|
|
872
|
+
if hasattr(self, "component_times"):
|
|
873
|
+
del self.component_times
|
|
874
|
+
if hasattr(self, "components_written_so_far"):
|
|
875
|
+
del self.components_written_so_far
|
|
876
|
+
|
|
877
|
+
|
|
878
|
+
class GenerateDocContent(Node):
|
|
879
|
+
def prep(self, shared):
|
|
880
|
+
project_name = shared["project_name"]
|
|
881
|
+
output_base_dir = shared.get("output_dir", "output") # Default output dir
|
|
882
|
+
output_path = output_base_dir
|
|
883
|
+
repo_url = shared.get("repo_url") # Get the repository URL
|
|
884
|
+
|
|
885
|
+
# Get potentially translated data
|
|
886
|
+
relationships_data = shared[
|
|
887
|
+
"relationships"
|
|
888
|
+
] # {"summary": str, "details": [{"from": int, "to": int, "label": str}]} -> summary/label potentially translated
|
|
889
|
+
component_order = shared["component_order"] # indices
|
|
890
|
+
abstractions = shared[
|
|
891
|
+
"abstractions"
|
|
892
|
+
] # list of dicts -> name/description potentially translated
|
|
893
|
+
components_content = shared[
|
|
894
|
+
"components"
|
|
895
|
+
] # list of strings -> content potentially translated
|
|
896
|
+
|
|
897
|
+
return {
|
|
898
|
+
"project_name": project_name,
|
|
899
|
+
"output_path": output_path,
|
|
900
|
+
"repo_url": repo_url,
|
|
901
|
+
"relationships_data": relationships_data,
|
|
902
|
+
"component_order": component_order,
|
|
903
|
+
"abstractions": abstractions,
|
|
904
|
+
"components_content": components_content,
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
def _generate_combined_content(
|
|
908
|
+
self, project_name, index_content, components_content
|
|
909
|
+
):
|
|
910
|
+
"""Generate the combined documentation file content."""
|
|
911
|
+
from wikigen.utils.adjust_headings import (
|
|
912
|
+
adjust_heading_levels,
|
|
913
|
+
strip_attribution_footer,
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
# Start with H1 repo name
|
|
917
|
+
combined = f"# {project_name}\n\n"
|
|
918
|
+
|
|
919
|
+
# Add index content without attribution footer
|
|
920
|
+
index_without_attribution = strip_attribution_footer(index_content)
|
|
921
|
+
combined += index_without_attribution
|
|
922
|
+
|
|
923
|
+
# Add separator
|
|
924
|
+
combined += "\n\n---\n\n"
|
|
925
|
+
|
|
926
|
+
# Add each component with headings shifted down one level
|
|
927
|
+
for i, component_content in enumerate(components_content):
|
|
928
|
+
adjusted_component = adjust_heading_levels(component_content, shift=1)
|
|
929
|
+
combined += adjusted_component
|
|
930
|
+
|
|
931
|
+
# Add separator between components (except for the last one)
|
|
932
|
+
if i < len(components_content) - 1:
|
|
933
|
+
combined += "\n\n---\n\n"
|
|
934
|
+
|
|
935
|
+
# Add separator at the bottom
|
|
936
|
+
combined += (
|
|
937
|
+
"\n\n---\n\nWiki created by [WIKIGEN](https://github.com/usesalt/wikigen)\n"
|
|
938
|
+
)
|
|
939
|
+
|
|
940
|
+
return combined
|
|
941
|
+
|
|
942
|
+
def exec(self, prep_res):
|
|
943
|
+
start_time = time.time()
|
|
944
|
+
project_name = prep_res["project_name"]
|
|
945
|
+
output_path = prep_res["output_path"]
|
|
946
|
+
repo_url = prep_res["repo_url"]
|
|
947
|
+
relationships_data = prep_res["relationships_data"]
|
|
948
|
+
component_order = prep_res["component_order"]
|
|
949
|
+
abstractions = prep_res["abstractions"]
|
|
950
|
+
components_content = prep_res["components_content"]
|
|
951
|
+
|
|
952
|
+
print_phase_start("Documentation Assembly", Icons.GENERATING)
|
|
953
|
+
|
|
954
|
+
# --- Generate Mermaid Diagram ---
|
|
955
|
+
mermaid_lines = ["flowchart TD"]
|
|
956
|
+
# Add nodes for each abstraction using potentially translated names
|
|
957
|
+
for i, abstr in enumerate(abstractions):
|
|
958
|
+
node_id = f"A{i}"
|
|
959
|
+
# Use potentially translated name, sanitize for Mermaid ID and label
|
|
960
|
+
# Remove quotes and line breaks to avoid Mermaid syntax issues
|
|
961
|
+
sanitized_name = abstr["name"].replace('"', "").replace("\n", " ").strip()
|
|
962
|
+
node_label = sanitized_name
|
|
963
|
+
mermaid_lines.append(
|
|
964
|
+
f' {node_id}["{node_label}"]'
|
|
965
|
+
) # Node label uses potentially translated name
|
|
966
|
+
# Add edges for relationships using potentially translated labels
|
|
967
|
+
for rel in relationships_data["details"]:
|
|
968
|
+
from_node_id = f"A{rel['from']}"
|
|
969
|
+
to_node_id = f"A{rel['to']}"
|
|
970
|
+
# Use potentially translated label, sanitize
|
|
971
|
+
edge_label = (
|
|
972
|
+
rel["label"].replace('"', "").replace("\n", " ")
|
|
973
|
+
) # Basic sanitization
|
|
974
|
+
max_label_len = 30
|
|
975
|
+
if len(edge_label) > max_label_len:
|
|
976
|
+
edge_label = edge_label[: max_label_len - 3] + "..."
|
|
977
|
+
mermaid_lines.append(
|
|
978
|
+
f' {from_node_id} -- "{edge_label}" --> {to_node_id}'
|
|
979
|
+
) # Edge label uses potentially translated label
|
|
980
|
+
|
|
981
|
+
mermaid_diagram = "\n".join(mermaid_lines)
|
|
982
|
+
# --- End Mermaid ---
|
|
983
|
+
|
|
984
|
+
# --- Prepare index.md content ---
|
|
985
|
+
index_content = f"{relationships_data['summary']}\n\n" # Use the potentially translated summary directly
|
|
986
|
+
# Keep fixed strings in English
|
|
987
|
+
index_content += f"**Source Repository:** [{repo_url}]({repo_url})\n\n"
|
|
988
|
+
|
|
989
|
+
# Add Mermaid diagram for relationships (diagram itself uses potentially translated names/labels)
|
|
990
|
+
index_content += "```mermaid\n"
|
|
991
|
+
index_content += mermaid_diagram + "\n"
|
|
992
|
+
index_content += "```\n\n"
|
|
993
|
+
|
|
994
|
+
# Keep fixed strings in English
|
|
995
|
+
index_content += "## Components\n\n"
|
|
996
|
+
|
|
997
|
+
component_files = []
|
|
998
|
+
# Generate component links based on the determined order, using potentially translated names
|
|
999
|
+
for i, abstraction_index in enumerate(component_order):
|
|
1000
|
+
# Ensure index is valid and we have content for it
|
|
1001
|
+
if 0 <= abstraction_index < len(abstractions) and i < len(
|
|
1002
|
+
components_content
|
|
1003
|
+
):
|
|
1004
|
+
abstraction_name = abstractions[abstraction_index][
|
|
1005
|
+
"name"
|
|
1006
|
+
] # Potentially translated name
|
|
1007
|
+
# Sanitize potentially translated name for filename
|
|
1008
|
+
safe_name = "".join(
|
|
1009
|
+
c if c.isalnum() else "_" for c in abstraction_name
|
|
1010
|
+
).lower()
|
|
1011
|
+
filename = f"{i+1:02d}_{safe_name}.md"
|
|
1012
|
+
# Strip newlines from component name to prevent broken markdown links
|
|
1013
|
+
clean_abstraction_name = abstraction_name.replace("\n", " ").strip()
|
|
1014
|
+
index_content += f"{i+1}. [{clean_abstraction_name}]({filename})\n" # Use potentially translated name in link text
|
|
1015
|
+
|
|
1016
|
+
# Component content without attribution footer
|
|
1017
|
+
component_content = components_content[
|
|
1018
|
+
i
|
|
1019
|
+
] # Potentially translated content
|
|
1020
|
+
|
|
1021
|
+
# Store filename and corresponding content
|
|
1022
|
+
component_files.append(
|
|
1023
|
+
{"filename": filename, "content": component_content}
|
|
1024
|
+
)
|
|
1025
|
+
else:
|
|
1026
|
+
print(
|
|
1027
|
+
f"Warning: Mismatch between component order, abstractions, or content at index {i} (abstraction index {abstraction_index}). Skipping file generation for this entry."
|
|
1028
|
+
)
|
|
1029
|
+
|
|
1030
|
+
# Add attribution to index content (using English fixed string)
|
|
1031
|
+
index_content += "\n\n---\n\nGenerated by [WIKIGEN](https://usesalt.co)"
|
|
1032
|
+
|
|
1033
|
+
# Generate combined content
|
|
1034
|
+
combined_content = self._generate_combined_content(
|
|
1035
|
+
project_name, index_content, components_content
|
|
1036
|
+
)
|
|
1037
|
+
|
|
1038
|
+
elapsed = time.time() - start_time
|
|
1039
|
+
print_success("Generated index and combined files", elapsed, indent=1)
|
|
1040
|
+
print_phase_end()
|
|
1041
|
+
|
|
1042
|
+
return {
|
|
1043
|
+
"project_name": project_name,
|
|
1044
|
+
"output_path": output_path,
|
|
1045
|
+
"index_content": index_content,
|
|
1046
|
+
"component_files": component_files,
|
|
1047
|
+
"combined_content": combined_content,
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
def post(self, shared, prep_res, exec_res):
|
|
1051
|
+
shared["doc_content"] = exec_res # Store the content dict
|
|
1052
|
+
|
|
1053
|
+
|
|
1054
|
+
class WriteDocFiles(Node):
|
|
1055
|
+
def prep(self, shared):
|
|
1056
|
+
return shared["doc_content"]
|
|
1057
|
+
|
|
1058
|
+
def exec(self, doc_content):
|
|
1059
|
+
start_time = time.time()
|
|
1060
|
+
project_name = doc_content["project_name"]
|
|
1061
|
+
output_path = doc_content["output_path"]
|
|
1062
|
+
combined_content = doc_content["combined_content"]
|
|
1063
|
+
|
|
1064
|
+
print_phase_start("Writing Output Files", Icons.CREATING)
|
|
1065
|
+
# Rely on Node's built-in retry/fallback
|
|
1066
|
+
os.makedirs(output_path, exist_ok=True)
|
|
1067
|
+
|
|
1068
|
+
# Write combined file
|
|
1069
|
+
combined_filepath = os.path.join(output_path, f"{project_name}.md")
|
|
1070
|
+
with open(combined_filepath, "w", encoding="utf-8") as f:
|
|
1071
|
+
f.write(combined_content)
|
|
1072
|
+
print_operation(f"{Icons.SUCCESS} {project_name}.md", indent=1)
|
|
1073
|
+
|
|
1074
|
+
elapsed = time.time() - start_time
|
|
1075
|
+
print_success("Documentation file written", elapsed, indent=1)
|
|
1076
|
+
|
|
1077
|
+
return output_path # Return the final path
|
|
1078
|
+
|
|
1079
|
+
def post(self, shared, prep_res, exec_res):
|
|
1080
|
+
shared["final_output_dir"] = exec_res # Store the output path
|