wikigen 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wikigen/__init__.py +7 -0
- wikigen/cli.py +690 -0
- wikigen/config.py +526 -0
- wikigen/defaults.py +78 -0
- wikigen/flows/__init__.py +1 -0
- wikigen/flows/flow.py +38 -0
- wikigen/formatter/help_formatter.py +194 -0
- wikigen/formatter/init_formatter.py +56 -0
- wikigen/formatter/output_formatter.py +290 -0
- wikigen/mcp/__init__.py +12 -0
- wikigen/mcp/chunking.py +127 -0
- wikigen/mcp/embeddings.py +69 -0
- wikigen/mcp/output_resources.py +65 -0
- wikigen/mcp/search_index.py +826 -0
- wikigen/mcp/server.py +232 -0
- wikigen/mcp/vector_index.py +297 -0
- wikigen/metadata/__init__.py +35 -0
- wikigen/metadata/logo.py +28 -0
- wikigen/metadata/project.py +28 -0
- wikigen/metadata/version.py +17 -0
- wikigen/nodes/__init__.py +1 -0
- wikigen/nodes/nodes.py +1080 -0
- wikigen/utils/__init__.py +0 -0
- wikigen/utils/adjust_headings.py +72 -0
- wikigen/utils/call_llm.py +271 -0
- wikigen/utils/crawl_github_files.py +450 -0
- wikigen/utils/crawl_local_files.py +151 -0
- wikigen/utils/llm_providers.py +101 -0
- wikigen/utils/version_check.py +84 -0
- wikigen-1.0.0.dist-info/METADATA +352 -0
- wikigen-1.0.0.dist-info/RECORD +35 -0
- wikigen-1.0.0.dist-info/WHEEL +5 -0
- wikigen-1.0.0.dist-info/entry_points.txt +2 -0
- wikigen-1.0.0.dist-info/licenses/LICENSE +21 -0
- wikigen-1.0.0.dist-info/top_level.txt +1 -0
|
File without changes
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def adjust_heading_levels(content: str, shift: int) -> str:
|
|
5
|
+
"""
|
|
6
|
+
Shift markdown heading levels by 'shift' amount.
|
|
7
|
+
Preserves code blocks, inline code, and other content.
|
|
8
|
+
|
|
9
|
+
Args:
|
|
10
|
+
content: Markdown text
|
|
11
|
+
shift: Number of levels to shift (positive integer)
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
Markdown text with adjusted headings
|
|
15
|
+
"""
|
|
16
|
+
if shift <= 0:
|
|
17
|
+
return content
|
|
18
|
+
|
|
19
|
+
lines = content.split("\n")
|
|
20
|
+
result_lines = []
|
|
21
|
+
in_code_block = False
|
|
22
|
+
# Track only in/out of code blocks; no need to store the marker
|
|
23
|
+
|
|
24
|
+
for line in lines:
|
|
25
|
+
# Track code block state
|
|
26
|
+
if line.strip().startswith("```"):
|
|
27
|
+
if not in_code_block:
|
|
28
|
+
in_code_block = True
|
|
29
|
+
else:
|
|
30
|
+
in_code_block = False
|
|
31
|
+
result_lines.append(line)
|
|
32
|
+
continue
|
|
33
|
+
|
|
34
|
+
# If we're in a code block, don't modify anything
|
|
35
|
+
if in_code_block:
|
|
36
|
+
result_lines.append(line)
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
# Check if line is a heading (starts with # at beginning of line)
|
|
40
|
+
heading_match = re.match(r"^(#+)\s+(.+)$", line)
|
|
41
|
+
if heading_match:
|
|
42
|
+
current_level = len(heading_match.group(1))
|
|
43
|
+
new_level = min(current_level + shift, 6) # Cap at H6
|
|
44
|
+
new_heading = "#" * new_level + " " + heading_match.group(2)
|
|
45
|
+
result_lines.append(new_heading)
|
|
46
|
+
else:
|
|
47
|
+
result_lines.append(line)
|
|
48
|
+
|
|
49
|
+
return "\n".join(result_lines)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def strip_attribution_footer(content: str) -> str:
|
|
53
|
+
"""
|
|
54
|
+
Remove the attribution footer from index content.
|
|
55
|
+
Looks for the pattern: \n---\n\nGenerated by...
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
content: Markdown text that may contain attribution footer
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
Markdown text without attribution footer
|
|
62
|
+
"""
|
|
63
|
+
# Split on the attribution separator
|
|
64
|
+
parts = content.split("\n---\n")
|
|
65
|
+
if len(parts) > 1:
|
|
66
|
+
# Check if the last part contains "Generated by"
|
|
67
|
+
last_part = parts[-1].strip()
|
|
68
|
+
if "Generated by" in last_part:
|
|
69
|
+
# Return everything before the attribution
|
|
70
|
+
return "\n---\n".join(parts[:-1])
|
|
71
|
+
|
|
72
|
+
return content
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
import json
|
|
4
|
+
import tempfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
# Configure logging
|
|
9
|
+
log_directory = os.getenv("LOG_DIR", "logs")
|
|
10
|
+
os.makedirs(log_directory, exist_ok=True)
|
|
11
|
+
log_file = os.path.join(
|
|
12
|
+
log_directory, f"llm_calls_{datetime.now().strftime('%Y%m%d')}.log"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# Set up logger
|
|
16
|
+
logger = logging.getLogger("llm_logger")
|
|
17
|
+
logger.setLevel(logging.INFO)
|
|
18
|
+
logger.propagate = False # Prevent propagation to root logger
|
|
19
|
+
file_handler = logging.FileHandler(log_file, encoding="utf-8")
|
|
20
|
+
file_handler.setFormatter(
|
|
21
|
+
logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
|
22
|
+
)
|
|
23
|
+
logger.addHandler(file_handler)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def get_cache_file_path() -> Path:
|
|
27
|
+
"""Get the cache file path in the WikiGen directory."""
|
|
28
|
+
try:
|
|
29
|
+
from ..config import DEFAULT_OUTPUT_DIR
|
|
30
|
+
|
|
31
|
+
DEFAULT_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
32
|
+
return DEFAULT_OUTPUT_DIR / "llm_cache.json"
|
|
33
|
+
except ImportError:
|
|
34
|
+
# Fallback to current directory if config module not available
|
|
35
|
+
return Path("llm_cache.json")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _call_gemini(prompt: str, model: str, api_key: str) -> str:
|
|
39
|
+
"""Call Google Gemini API."""
|
|
40
|
+
from google import genai
|
|
41
|
+
|
|
42
|
+
client = genai.Client(api_key=api_key)
|
|
43
|
+
response = client.models.generate_content(model=model, contents=[prompt])
|
|
44
|
+
return response.text
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _call_openai(prompt: str, model: str, api_key: str) -> str:
|
|
48
|
+
"""Call OpenAI API."""
|
|
49
|
+
from openai import OpenAI
|
|
50
|
+
|
|
51
|
+
client = OpenAI(api_key=api_key)
|
|
52
|
+
|
|
53
|
+
# Check if model is o1 family (requires special format)
|
|
54
|
+
if model.startswith("o1"):
|
|
55
|
+
r = client.chat.completions.create(
|
|
56
|
+
model=model,
|
|
57
|
+
messages=[{"role": "user", "content": prompt}],
|
|
58
|
+
response_format={"type": "text"},
|
|
59
|
+
reasoning_effort="medium",
|
|
60
|
+
store=False,
|
|
61
|
+
)
|
|
62
|
+
return r.choices[0].message.content
|
|
63
|
+
else:
|
|
64
|
+
r = client.chat.completions.create(
|
|
65
|
+
model=model,
|
|
66
|
+
messages=[{"role": "user", "content": prompt}],
|
|
67
|
+
)
|
|
68
|
+
return r.choices[0].message.content
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _call_anthropic(prompt: str, model: str, api_key: str) -> str:
|
|
72
|
+
"""Call Anthropic Claude API."""
|
|
73
|
+
from anthropic import Anthropic
|
|
74
|
+
|
|
75
|
+
client = Anthropic(api_key=api_key)
|
|
76
|
+
|
|
77
|
+
# Check if model supports extended thinking
|
|
78
|
+
thinking_enabled = "sonnet" in model.lower() and "7" in model
|
|
79
|
+
|
|
80
|
+
if thinking_enabled:
|
|
81
|
+
response = client.messages.create(
|
|
82
|
+
model=model,
|
|
83
|
+
max_tokens=21000,
|
|
84
|
+
thinking={"type": "enabled", "budget_tokens": 20000},
|
|
85
|
+
messages=[{"role": "user", "content": prompt}],
|
|
86
|
+
)
|
|
87
|
+
# Extended thinking returns content[1].text (the final answer)
|
|
88
|
+
if len(response.content) > 1:
|
|
89
|
+
return response.content[1].text
|
|
90
|
+
return response.content[0].text
|
|
91
|
+
else:
|
|
92
|
+
response = client.messages.create(
|
|
93
|
+
model=model,
|
|
94
|
+
max_tokens=8192,
|
|
95
|
+
messages=[{"role": "user", "content": prompt}],
|
|
96
|
+
)
|
|
97
|
+
return response.content[0].text
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _call_openrouter(prompt: str, model: str, api_key: str) -> str:
|
|
101
|
+
"""Call OpenRouter API."""
|
|
102
|
+
import requests
|
|
103
|
+
|
|
104
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
|
105
|
+
|
|
106
|
+
data = {"model": model, "messages": [{"role": "user", "content": prompt}]}
|
|
107
|
+
|
|
108
|
+
response = requests.post(
|
|
109
|
+
"https://openrouter.ai/api/v1/chat/completions",
|
|
110
|
+
headers=headers,
|
|
111
|
+
json=data,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
if response.status_code != 200:
|
|
115
|
+
error_msg = f"OpenRouter API call failed with status {response.status_code}: {response.text}"
|
|
116
|
+
logger.error(error_msg)
|
|
117
|
+
raise Exception(error_msg)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
response_text = response.json()["choices"][0]["message"]["content"]
|
|
121
|
+
except Exception as e:
|
|
122
|
+
error_msg = (
|
|
123
|
+
f"Failed to parse OpenRouter response: {e}; Response: {response.text}"
|
|
124
|
+
)
|
|
125
|
+
logger.error(error_msg)
|
|
126
|
+
raise Exception(error_msg)
|
|
127
|
+
|
|
128
|
+
return response_text
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _call_ollama(prompt: str, model: str, api_key: str = None) -> str:
|
|
132
|
+
"""Call Ollama API (local LLM)."""
|
|
133
|
+
import requests
|
|
134
|
+
from ..config import load_config
|
|
135
|
+
|
|
136
|
+
config = load_config()
|
|
137
|
+
base_url = config.get("ollama_base_url") or os.getenv(
|
|
138
|
+
"OLLAMA_BASE_URL", "http://localhost:11434"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
url = f"{base_url}/api/generate"
|
|
142
|
+
|
|
143
|
+
data = {"model": model, "prompt": prompt, "stream": False}
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
response = requests.post(url, json=data, timeout=300)
|
|
147
|
+
response.raise_for_status()
|
|
148
|
+
return response.json().get("response", "")
|
|
149
|
+
except requests.exceptions.RequestException as e:
|
|
150
|
+
error_msg = f"Ollama API call failed: {e}"
|
|
151
|
+
logger.error(error_msg)
|
|
152
|
+
raise Exception(error_msg)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _save_cache(cache: dict, cache_file: Path) -> None:
|
|
156
|
+
"""Save cache to file using atomic write."""
|
|
157
|
+
tmp_path = None
|
|
158
|
+
try:
|
|
159
|
+
with tempfile.NamedTemporaryFile(
|
|
160
|
+
mode="w",
|
|
161
|
+
encoding="utf-8",
|
|
162
|
+
dir=cache_file.parent,
|
|
163
|
+
delete=False,
|
|
164
|
+
suffix=".tmp",
|
|
165
|
+
) as tmp_file:
|
|
166
|
+
json.dump(cache, tmp_file, indent=2)
|
|
167
|
+
tmp_path = Path(tmp_file.name)
|
|
168
|
+
|
|
169
|
+
tmp_path.replace(cache_file)
|
|
170
|
+
except (IOError, OSError, PermissionError) as e:
|
|
171
|
+
logger.error(f"Failed to save cache: {e}")
|
|
172
|
+
if tmp_path is not None:
|
|
173
|
+
try:
|
|
174
|
+
if tmp_path.exists():
|
|
175
|
+
tmp_path.unlink()
|
|
176
|
+
except Exception:
|
|
177
|
+
pass # Ignore cleanup errors
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def call_llm(prompt: str, use_cache: bool = True, api_key: str = None) -> str:
|
|
181
|
+
"""Call LLM API based on configured provider."""
|
|
182
|
+
# Log the prompt
|
|
183
|
+
logger.info(f"PROMPT: {prompt}")
|
|
184
|
+
|
|
185
|
+
# Load cache once if enabled
|
|
186
|
+
cache = {}
|
|
187
|
+
cache_file = None
|
|
188
|
+
if use_cache:
|
|
189
|
+
cache_file = get_cache_file_path()
|
|
190
|
+
if cache_file.exists():
|
|
191
|
+
try:
|
|
192
|
+
with open(cache_file, "r", encoding="utf-8") as f:
|
|
193
|
+
cache = json.load(f)
|
|
194
|
+
except (json.JSONDecodeError, IOError, OSError) as e:
|
|
195
|
+
logger.warning(f"Failed to load cache, starting with empty cache: {e}")
|
|
196
|
+
|
|
197
|
+
# Return from cache if exists
|
|
198
|
+
if prompt in cache:
|
|
199
|
+
logger.info(f"RESPONSE: {cache[prompt]}")
|
|
200
|
+
return cache[prompt]
|
|
201
|
+
|
|
202
|
+
# Get provider and model from config
|
|
203
|
+
try:
|
|
204
|
+
from ..config import get_llm_provider, get_llm_model, get_api_key
|
|
205
|
+
from .llm_providers import requires_api_key, get_provider_info
|
|
206
|
+
|
|
207
|
+
provider = get_llm_provider()
|
|
208
|
+
model = get_llm_model()
|
|
209
|
+
|
|
210
|
+
# Get API key if required
|
|
211
|
+
if not api_key:
|
|
212
|
+
if requires_api_key(provider):
|
|
213
|
+
api_key = get_api_key()
|
|
214
|
+
if not api_key:
|
|
215
|
+
from ..metadata import CLI_ENTRY_POINT
|
|
216
|
+
|
|
217
|
+
provider_info = get_provider_info(provider)
|
|
218
|
+
api_key_env = provider_info.get("api_key_env", "")
|
|
219
|
+
raise ValueError(
|
|
220
|
+
f"{api_key_env} not found. Please run '{CLI_ENTRY_POINT} init' to configure your API key, "
|
|
221
|
+
f"or set the {api_key_env} environment variable."
|
|
222
|
+
)
|
|
223
|
+
else:
|
|
224
|
+
api_key = None # Ollama doesn't need API key
|
|
225
|
+
except ImportError:
|
|
226
|
+
# Fallback to Gemini if config module not available
|
|
227
|
+
provider = "gemini"
|
|
228
|
+
model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
|
|
229
|
+
if not api_key:
|
|
230
|
+
api_key = os.getenv("GEMINI_API_KEY", "")
|
|
231
|
+
if not api_key:
|
|
232
|
+
raise ValueError(
|
|
233
|
+
"GEMINI_API_KEY not found. Please configure your API key or set the GEMINI_API_KEY environment variable."
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Route to provider-specific implementation
|
|
237
|
+
try:
|
|
238
|
+
if provider == "gemini":
|
|
239
|
+
response_text = _call_gemini(prompt, model, api_key)
|
|
240
|
+
elif provider == "openai":
|
|
241
|
+
response_text = _call_openai(prompt, model, api_key)
|
|
242
|
+
elif provider == "anthropic":
|
|
243
|
+
response_text = _call_anthropic(prompt, model, api_key)
|
|
244
|
+
elif provider == "openrouter":
|
|
245
|
+
response_text = _call_openrouter(prompt, model, api_key)
|
|
246
|
+
elif provider == "ollama":
|
|
247
|
+
response_text = _call_ollama(prompt, model, api_key)
|
|
248
|
+
else:
|
|
249
|
+
raise ValueError(f"Unknown provider: {provider}")
|
|
250
|
+
except Exception as e:
|
|
251
|
+
logger.error(f"Provider {provider} call failed: {e}")
|
|
252
|
+
raise
|
|
253
|
+
|
|
254
|
+
# Log the response
|
|
255
|
+
logger.info(f"RESPONSE: {response_text}")
|
|
256
|
+
|
|
257
|
+
# Update cache if enabled
|
|
258
|
+
if use_cache and cache_file:
|
|
259
|
+
cache[prompt] = response_text
|
|
260
|
+
_save_cache(cache, cache_file)
|
|
261
|
+
|
|
262
|
+
return response_text
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
if __name__ == "__main__":
|
|
266
|
+
test_prompt = "Hello, how are you?"
|
|
267
|
+
|
|
268
|
+
# First call - should hit the API
|
|
269
|
+
print("Making call...")
|
|
270
|
+
response1 = call_llm(test_prompt, use_cache=False)
|
|
271
|
+
print(f"Response: {response1}")
|