wikigen 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wikigen/__init__.py +7 -0
- wikigen/cli.py +690 -0
- wikigen/config.py +526 -0
- wikigen/defaults.py +78 -0
- wikigen/flows/__init__.py +1 -0
- wikigen/flows/flow.py +38 -0
- wikigen/formatter/help_formatter.py +194 -0
- wikigen/formatter/init_formatter.py +56 -0
- wikigen/formatter/output_formatter.py +290 -0
- wikigen/mcp/__init__.py +12 -0
- wikigen/mcp/chunking.py +127 -0
- wikigen/mcp/embeddings.py +69 -0
- wikigen/mcp/output_resources.py +65 -0
- wikigen/mcp/search_index.py +826 -0
- wikigen/mcp/server.py +232 -0
- wikigen/mcp/vector_index.py +297 -0
- wikigen/metadata/__init__.py +35 -0
- wikigen/metadata/logo.py +28 -0
- wikigen/metadata/project.py +28 -0
- wikigen/metadata/version.py +17 -0
- wikigen/nodes/__init__.py +1 -0
- wikigen/nodes/nodes.py +1080 -0
- wikigen/utils/__init__.py +0 -0
- wikigen/utils/adjust_headings.py +72 -0
- wikigen/utils/call_llm.py +271 -0
- wikigen/utils/crawl_github_files.py +450 -0
- wikigen/utils/crawl_local_files.py +151 -0
- wikigen/utils/llm_providers.py +101 -0
- wikigen/utils/version_check.py +84 -0
- wikigen-1.0.0.dist-info/METADATA +352 -0
- wikigen-1.0.0.dist-info/RECORD +35 -0
- wikigen-1.0.0.dist-info/WHEEL +5 -0
- wikigen-1.0.0.dist-info/entry_points.txt +2 -0
- wikigen-1.0.0.dist-info/licenses/LICENSE +21 -0
- wikigen-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Local embedding generation for semantic search.
|
|
2
|
+
|
|
3
|
+
This module provides local embedding generation using sentence-transformers
|
|
4
|
+
for privacy-preserving semantic search without API calls.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import List, Optional
|
|
8
|
+
import numpy as np
|
|
9
|
+
from sentence_transformers import SentenceTransformer
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Global model cache to avoid reloading
|
|
13
|
+
_embedding_model: Optional[SentenceTransformer] = None
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load_embedding_model(model_name: str = "all-MiniLM-L6-v2") -> SentenceTransformer:
|
|
17
|
+
"""
|
|
18
|
+
Load the embedding model (cached globally).
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
model_name: Name of the sentence-transformers model to use
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Loaded SentenceTransformer model
|
|
25
|
+
"""
|
|
26
|
+
global _embedding_model
|
|
27
|
+
if _embedding_model is None:
|
|
28
|
+
_embedding_model = SentenceTransformer(model_name)
|
|
29
|
+
return _embedding_model
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_embedding(text: str, model_name: str = "all-MiniLM-L6-v2") -> np.ndarray:
|
|
33
|
+
"""
|
|
34
|
+
Generate embedding for a single text.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
text: Text to embed
|
|
38
|
+
model_name: Name of the sentence-transformers model to use
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
NumPy array of the embedding vector
|
|
42
|
+
"""
|
|
43
|
+
model = load_embedding_model(model_name)
|
|
44
|
+
embedding = model.encode(text, convert_to_numpy=True)
|
|
45
|
+
return embedding
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_embeddings_batch(
|
|
49
|
+
texts: List[str], model_name: str = "all-MiniLM-L6-v2", batch_size: int = 32
|
|
50
|
+
) -> np.ndarray:
|
|
51
|
+
"""
|
|
52
|
+
Generate embeddings for a batch of texts (more efficient).
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
texts: List of texts to embed
|
|
56
|
+
model_name: Name of the sentence-transformers model to use
|
|
57
|
+
batch_size: Batch size for processing
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
NumPy array of shape (len(texts), embedding_dim) containing embeddings
|
|
61
|
+
"""
|
|
62
|
+
if not texts:
|
|
63
|
+
return np.array([])
|
|
64
|
+
|
|
65
|
+
model = load_embedding_model(model_name)
|
|
66
|
+
embeddings = model.encode(
|
|
67
|
+
texts, convert_to_numpy=True, batch_size=batch_size, show_progress_bar=False
|
|
68
|
+
)
|
|
69
|
+
return embeddings
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Output directory resource mapping for MCP server.
|
|
2
|
+
|
|
3
|
+
This module provides resource name mapping for files in the configured output directory.
|
|
4
|
+
Resource names are derived from file paths relative to the output directory.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict
|
|
9
|
+
|
|
10
|
+
from ..config import get_output_dir
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def discover_projects(output_dir: Path) -> Dict[str, Path]:
|
|
14
|
+
"""
|
|
15
|
+
Discover all markdown documentation files in the output directory.
|
|
16
|
+
|
|
17
|
+
Searches recursively for all .md files using efficient glob pattern:
|
|
18
|
+
- Direct files: output/file.md -> key: "file"
|
|
19
|
+
- Nested files: output/folder/file.md -> key: "folder/file"
|
|
20
|
+
- Uses rglob for recursive search (more efficient than manual iteration)
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
output_dir: Base directory where documentation is stored
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Dictionary mapping resource names to their documentation file paths
|
|
27
|
+
"""
|
|
28
|
+
projects = {}
|
|
29
|
+
|
|
30
|
+
if not output_dir.exists():
|
|
31
|
+
return projects
|
|
32
|
+
|
|
33
|
+
# Use rglob to efficiently find all .md files recursively
|
|
34
|
+
# rglob is optimized and much faster for directory iteration
|
|
35
|
+
for md_file in output_dir.rglob("*.md"):
|
|
36
|
+
# Get relative path from output_dir to maintain folder structure in resource name
|
|
37
|
+
try:
|
|
38
|
+
relative_path = md_file.relative_to(output_dir)
|
|
39
|
+
# Remove .md extension and use path components as resource name
|
|
40
|
+
# Example: "folder/file.md" -> "folder/file"
|
|
41
|
+
# Example: "file.md" -> "file"
|
|
42
|
+
resource_name = str(relative_path.with_suffix(""))
|
|
43
|
+
|
|
44
|
+
# Skip hidden files/directories (e.g., .git, .cursor)
|
|
45
|
+
# Only skip if any parent directory is hidden, not the file itself
|
|
46
|
+
if any(part.startswith(".") for part in relative_path.parts[:-1]):
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
projects[resource_name] = md_file
|
|
50
|
+
except ValueError:
|
|
51
|
+
# File is not relative to output_dir (shouldn't happen, but safety check)
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
return projects
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def discover_all_projects() -> Dict[str, Path]:
|
|
58
|
+
"""
|
|
59
|
+
Discover all markdown files using configured output directory.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Dictionary mapping resource names to their documentation file paths
|
|
63
|
+
"""
|
|
64
|
+
output_dir = get_output_dir()
|
|
65
|
+
return discover_projects(output_dir)
|