wikigen 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,69 @@
1
+ """Local embedding generation for semantic search.
2
+
3
+ This module provides local embedding generation using sentence-transformers
4
+ for privacy-preserving semantic search without API calls.
5
+ """
6
+
7
+ from typing import List, Optional
8
+ import numpy as np
9
+ from sentence_transformers import SentenceTransformer
10
+
11
+
12
+ # Global model cache to avoid reloading
13
+ _embedding_model: Optional[SentenceTransformer] = None
14
+
15
+
16
+ def load_embedding_model(model_name: str = "all-MiniLM-L6-v2") -> SentenceTransformer:
17
+ """
18
+ Load the embedding model (cached globally).
19
+
20
+ Args:
21
+ model_name: Name of the sentence-transformers model to use
22
+
23
+ Returns:
24
+ Loaded SentenceTransformer model
25
+ """
26
+ global _embedding_model
27
+ if _embedding_model is None:
28
+ _embedding_model = SentenceTransformer(model_name)
29
+ return _embedding_model
30
+
31
+
32
+ def get_embedding(text: str, model_name: str = "all-MiniLM-L6-v2") -> np.ndarray:
33
+ """
34
+ Generate embedding for a single text.
35
+
36
+ Args:
37
+ text: Text to embed
38
+ model_name: Name of the sentence-transformers model to use
39
+
40
+ Returns:
41
+ NumPy array of the embedding vector
42
+ """
43
+ model = load_embedding_model(model_name)
44
+ embedding = model.encode(text, convert_to_numpy=True)
45
+ return embedding
46
+
47
+
48
+ def get_embeddings_batch(
49
+ texts: List[str], model_name: str = "all-MiniLM-L6-v2", batch_size: int = 32
50
+ ) -> np.ndarray:
51
+ """
52
+ Generate embeddings for a batch of texts (more efficient).
53
+
54
+ Args:
55
+ texts: List of texts to embed
56
+ model_name: Name of the sentence-transformers model to use
57
+ batch_size: Batch size for processing
58
+
59
+ Returns:
60
+ NumPy array of shape (len(texts), embedding_dim) containing embeddings
61
+ """
62
+ if not texts:
63
+ return np.array([])
64
+
65
+ model = load_embedding_model(model_name)
66
+ embeddings = model.encode(
67
+ texts, convert_to_numpy=True, batch_size=batch_size, show_progress_bar=False
68
+ )
69
+ return embeddings
@@ -0,0 +1,65 @@
1
+ """Output directory resource mapping for MCP server.
2
+
3
+ This module provides resource name mapping for files in the configured output directory.
4
+ Resource names are derived from file paths relative to the output directory.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Dict
9
+
10
+ from ..config import get_output_dir
11
+
12
+
13
+ def discover_projects(output_dir: Path) -> Dict[str, Path]:
14
+ """
15
+ Discover all markdown documentation files in the output directory.
16
+
17
+ Searches recursively for all .md files using efficient glob pattern:
18
+ - Direct files: output/file.md -> key: "file"
19
+ - Nested files: output/folder/file.md -> key: "folder/file"
20
+ - Uses rglob for recursive search (more efficient than manual iteration)
21
+
22
+ Args:
23
+ output_dir: Base directory where documentation is stored
24
+
25
+ Returns:
26
+ Dictionary mapping resource names to their documentation file paths
27
+ """
28
+ projects = {}
29
+
30
+ if not output_dir.exists():
31
+ return projects
32
+
33
+ # Use rglob to efficiently find all .md files recursively
34
+ # rglob is optimized and much faster for directory iteration
35
+ for md_file in output_dir.rglob("*.md"):
36
+ # Get relative path from output_dir to maintain folder structure in resource name
37
+ try:
38
+ relative_path = md_file.relative_to(output_dir)
39
+ # Remove .md extension and use path components as resource name
40
+ # Example: "folder/file.md" -> "folder/file"
41
+ # Example: "file.md" -> "file"
42
+ resource_name = str(relative_path.with_suffix(""))
43
+
44
+ # Skip hidden files/directories (e.g., .git, .cursor)
45
+ # Only skip if any parent directory is hidden, not the file itself
46
+ if any(part.startswith(".") for part in relative_path.parts[:-1]):
47
+ continue
48
+
49
+ projects[resource_name] = md_file
50
+ except ValueError:
51
+ # File is not relative to output_dir (shouldn't happen, but safety check)
52
+ continue
53
+
54
+ return projects
55
+
56
+
57
+ def discover_all_projects() -> Dict[str, Path]:
58
+ """
59
+ Discover all markdown files using configured output directory.
60
+
61
+ Returns:
62
+ Dictionary mapping resource names to their documentation file paths
63
+ """
64
+ output_dir = get_output_dir()
65
+ return discover_projects(output_dir)