PyPI - wikigen - Versions diffs - 1.0.0__py3-none-any.whl - Mend

wikigen 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

wikigen/__init__.py +7 -0
wikigen/cli.py +690 -0
wikigen/config.py +526 -0
wikigen/defaults.py +78 -0
wikigen/flows/__init__.py +1 -0
wikigen/flows/flow.py +38 -0
wikigen/formatter/help_formatter.py +194 -0
wikigen/formatter/init_formatter.py +56 -0
wikigen/formatter/output_formatter.py +290 -0
wikigen/mcp/__init__.py +12 -0
wikigen/mcp/chunking.py +127 -0
wikigen/mcp/embeddings.py +69 -0
wikigen/mcp/output_resources.py +65 -0
wikigen/mcp/search_index.py +826 -0
wikigen/mcp/server.py +232 -0
wikigen/mcp/vector_index.py +297 -0
wikigen/metadata/__init__.py +35 -0
wikigen/metadata/logo.py +28 -0
wikigen/metadata/project.py +28 -0
wikigen/metadata/version.py +17 -0
wikigen/nodes/__init__.py +1 -0
wikigen/nodes/nodes.py +1080 -0
wikigen/utils/__init__.py +0 -0
wikigen/utils/adjust_headings.py +72 -0
wikigen/utils/call_llm.py +271 -0
wikigen/utils/crawl_github_files.py +450 -0
wikigen/utils/crawl_local_files.py +151 -0
wikigen/utils/llm_providers.py +101 -0
wikigen/utils/version_check.py +84 -0
wikigen-1.0.0.dist-info/METADATA +352 -0
wikigen-1.0.0.dist-info/RECORD +35 -0
wikigen-1.0.0.dist-info/WHEEL +5 -0
wikigen-1.0.0.dist-info/entry_points.txt +2 -0
wikigen-1.0.0.dist-info/licenses/LICENSE +21 -0
wikigen-1.0.0.dist-info/top_level.txt +1 -0

wikigen/formatter/help_formatter.py ADDED Viewed

@@ -0,0 +1,194 @@
+"""
+Enhanced help formatting for WikiGen CLI.
+Provides colored, structured help output with icons and tree structure.
+"""
+from ..metadata.project import HOMEPAGE_URL, CLI_ENTRY_POINT
+from ..metadata.logo import print_logo
+class HelpColors:
+    """ANSI 256-color codes for help formatting."""
+    WHITE = "\033[38;5;255m"  # Headers, important text
+    LIGHT_GRAY = "\033[38;5;250m"  # Tree structure lines
+    MEDIUM_GRAY = "\033[38;5;245m"  # Descriptions, labels
+    DARK_GRAY = "\033[38;5;240m"  # Subtle details
+    RESET = "\033[0m"
+class HelpIcons:
+    """Unicode icons for help sections."""
+    INFO = "◆"
+    USAGE = "◎"
+    SOURCE = "◎"
+    OPTIONS = "⚙"
+    SUBCOMMANDS = "⚙"
+    EXAMPLES = "◆"
+    MORE_INFO = "◆"
+    ARROW = "→"
+    CHECK = "✓"
+class HelpTree:
+    """Tree structure characters for help formatting."""
+    START = "┌─"  # Start of section
+    MIDDLE = "├─"  # Middle items
+    END = "└─"  # Last item
+    VERTICAL = "│"  # Vertical line
+    SPACE = "   "  # Space for indentation
+def print_enhanced_help():
+    """Print enhanced help with logo, colors, and structure."""
+    # Print logo
+    print_logo()
+    # Print structured help sections
+    _print_usage_section()
+    _print_source_section()
+    _print_options_section()
+    _print_subcommands_section()
+    _print_examples_section()
+    _print_more_info_section()
+def _print_usage_section():
+    """Print usage section."""
+    print(
+        f"{HelpColors.LIGHT_GRAY}┌─ {HelpColors.WHITE}{HelpIcons.USAGE} USAGE{HelpColors.RESET}"
+    )
+    print(
+        f"{HelpColors.LIGHT_GRAY}└─ {HelpColors.MEDIUM_GRAY}{CLI_ENTRY_POINT} [-h] run [url|path] [OPTIONS...]{HelpColors.RESET}"
+    )
+    print()
+def _print_source_section():
+    """Print source options section."""
+    print(
+        f"{HelpColors.LIGHT_GRAY}┌─ {HelpColors.WHITE}{HelpIcons.SOURCE} SOURCE{HelpColors.RESET}"
+    )
+    print(
+        f"{HelpColors.LIGHT_GRAY}├─ {HelpColors.MEDIUM_GRAY}{CLI_ENTRY_POINT} run [url|path]{HelpColors.DARK_GRAY}    {HelpIcons.INFO} Generate documentation (auto-detects URL or path){HelpColors.RESET}"
+    )
+    print(
+        f"{HelpColors.LIGHT_GRAY}│  {HelpColors.DARK_GRAY}                            {HelpIcons.INFO} url: GitHub repository URL (e.g., https://github.com/user/repo){HelpColors.RESET}"
+    )
+    print(
+        f"{HelpColors.LIGHT_GRAY}│  {HelpColors.DARK_GRAY}                            {HelpIcons.INFO} path: Local directory path (e.g., /path/to/project){HelpColors.RESET}"
+    )
+    print(
+        f"{HelpColors.LIGHT_GRAY}└─ {HelpColors.DARK_GRAY}                            {HelpIcons.INFO} (no argument): Current directory{HelpColors.RESET}"
+    )
+    print()
+def _print_options_section():
+    """Print options section."""
+    print(
+        f"{HelpColors.LIGHT_GRAY}┌─ {HelpColors.WHITE}{HelpIcons.OPTIONS} OPTIONS{HelpColors.RESET}"
+    )
+    options = [
+        ("-h, --help", "Show this help message and exit"),
+        (
+            "-n, --name NAME",
+            "Project name (optional, derived from repo/directory if omitted)",
+        ),
+        (
+            "-t, --token TOKEN",
+            "GitHub personal access token (optional, reads from GITHUB_TOKEN env var)",
+        ),
+        ("-o, --output OUTPUT", "Base directory for output (default: from config)"),
+        ("-i, --include PATTERN", "Include file patterns (e.g. '*.py' '*.js')"),
+        ("-e, --exclude PATTERN", "Exclude file patterns (e.g. 'tests/*' 'docs/*')"),
+        ("-s, --max-size SIZE", "Maximum file size in bytes (default: from config)"),
+        (
+            "--language LANG",
+            "Language for the generated wiki (default: from config)",
+        ),
+        ("--no-cache", "Disable LLM response caching (default: caching enabled)"),
+        (
+            "--max-abstractions N",
+            "Maximum number of abstractions to identify (default: from config)",
+        ),
+        (
+            "--mode MODE",
+            "Documentation mode: minimal or comprehensive (default: from config)",
+        ),
+        ("--ci", "Enable CI mode (non-interactive, uses defaults)"),
+        (
+            "--update",
+            "Update existing documentation instead of overwriting",
+        ),
+        (
+            "--output-path PATH",
+            "Custom output path for documentation",
+        ),
+        (
+            "--check-changes",
+            "Exit with code 1 if docs changed (for CI checks)",
+        ),
+    ]
+    for i, (option, description) in enumerate(options):
+        is_last = i == len(options) - 1
+        prefix = f"{HelpColors.LIGHT_GRAY}{'└─' if is_last else '├─'}{HelpColors.RESET}"
+        print(
+            f"{prefix} {HelpColors.MEDIUM_GRAY}{option:<25}{HelpColors.DARK_GRAY} {HelpIcons.INFO} {description}{HelpColors.RESET}"
+        )
+    print()
+def _print_subcommands_section():
+    """Print subcommands section."""
+    print(
+        f"{HelpColors.LIGHT_GRAY}┌─ {HelpColors.WHITE}{HelpIcons.SUBCOMMANDS} SUBCOMMANDS{HelpColors.RESET}"
+    )
+    print(
+        f"{HelpColors.LIGHT_GRAY}├─ {HelpColors.MEDIUM_GRAY}run [url|path]{HelpColors.DARK_GRAY}        {HelpIcons.INFO} Generate documentation (auto-detects URL or path){HelpColors.RESET}"
+    )
+    print(
+        f"{HelpColors.LIGHT_GRAY}├─ {HelpColors.MEDIUM_GRAY}init{HelpColors.DARK_GRAY}                  {HelpIcons.INFO} Set up configuration{HelpColors.RESET}"
+    )
+    print(
+        f"{HelpColors.LIGHT_GRAY}└─ {HelpColors.MEDIUM_GRAY}config <command>{HelpColors.DARK_GRAY}      {HelpIcons.INFO} Manage configuration{HelpColors.RESET}"
+    )
+    print()
+def _print_examples_section():
+    """Print examples section."""
+    print(
+        f"{HelpColors.LIGHT_GRAY}┌─ {HelpColors.WHITE}{HelpIcons.EXAMPLES} EXAMPLES{HelpColors.RESET}"
+    )
+    examples = [
+        f"{CLI_ENTRY_POINT} run                                    {HelpIcons.INFO}  Current directory",
+        f"{CLI_ENTRY_POINT} run https://github.com/user/repo       {HelpIcons.INFO}  GitHub repo",
+        f"{CLI_ENTRY_POINT} run /path/to/project                   {HelpIcons.INFO}  Local directory",
+        f"{CLI_ENTRY_POINT} init",
+        f"{CLI_ENTRY_POINT} config show",
+        f'{CLI_ENTRY_POINT} config update-gemini-key "your-key"',
+    ]
+    for i, example in enumerate(examples):
+        is_last = i == len(examples) - 1
+        prefix = f"{HelpColors.LIGHT_GRAY}{'└─' if is_last else '├─'}{HelpColors.RESET}"
+        print(f"{prefix} {HelpColors.MEDIUM_GRAY}{example}{HelpColors.RESET}")
+    print()
+def _print_more_info_section():
+    """Print more info section."""
+    print(
+        f"{HelpColors.LIGHT_GRAY}┌─ {HelpColors.WHITE}{HelpIcons.MORE_INFO} MORE INFO{HelpColors.RESET}"
+    )
+    print(
+        f"{HelpColors.LIGHT_GRAY}└─ {HelpColors.MEDIUM_GRAY}Visit: {HelpColors.WHITE}{HOMEPAGE_URL}{HelpColors.RESET}"
+    )

wikigen/formatter/init_formatter.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""
+Init mode formatter for WikiGen CLI.
+Provides structured visual output for the configuration setup process.
+"""
+from .output_formatter import Colors, Icons, Tree
+from ..metadata.logo import print_logo
+def print_init_header():
+    """Print the logo and setup header."""
+    print_logo()
+    print()  # Blank line for spacing
+    print(
+        f"{Colors.LIGHT_GRAY}{Tree.START} {Colors.WHITE}{Icons.CONFIG} "
+        f"Configuration Setup{Colors.RESET}"
+    )
+def print_section_start(name, icon):
+    """Print the start of a configuration section."""
+    print(f"{Colors.LIGHT_GRAY}{Tree.MIDDLE} {Colors.WHITE}{icon} {name}{Colors.RESET}")
+def print_input_prompt(label, icon, is_required=True, default_value=None):
+    """Print an input prompt with proper tree structure."""
+    required_text = " (required)" if is_required else " (optional, press Enter to skip)"
+    default_text = f" [{default_value}]" if default_value else ""
+    print(
+        f"{Colors.LIGHT_GRAY}{Tree.VERTICAL}  {Colors.LIGHT_GRAY}{Tree.MIDDLE} "
+        f"{Colors.MEDIUM_GRAY}{icon} {label}{required_text}{default_text}{Colors.RESET}"
+    )
+    print(
+        f"{Colors.LIGHT_GRAY}{Tree.VERTICAL}  {Colors.LIGHT_GRAY}{Tree.VERTICAL}  "
+        f"{Colors.MEDIUM_GRAY}→ {Colors.RESET}",
+        end="",
+    )
+def print_init_complete(config_path, output_dir, keyring_available):
+    """Print the final completion message."""
+    print(
+        f"{Colors.LIGHT_GRAY}{Tree.END} {Colors.WHITE}{Icons.SUCCESS} "
+        f"Configuration Complete{Colors.RESET}"
+    )
+    print()
+    print(f"{Colors.WHITE}{Icons.SUCCESS} Saved to {config_path}{Colors.RESET}")
+    keyring_status = (
+        "Enabled (secure storage)"
+        if keyring_available
+        else "Not available (saved to config file)"
+    )
+    print(f"{Colors.MEDIUM_GRAY}{Icons.INFO} Keyring: {keyring_status}{Colors.RESET}")
+    print(f"{Colors.MEDIUM_GRAY}📂 {Colors.WHITE}{output_dir}{Colors.RESET}")

wikigen/formatter/output_formatter.py ADDED Viewed

@@ -0,0 +1,290 @@
+"""
+Output formatting utilities for WikiGen CLI.
+Provides tree-structured output with icons, colors, and timing.
+"""
+# ANSI 256-color codes (work on both light and dark backgrounds)
+class Colors:
+    WHITE = "\033[38;5;255m"  # Phase headers, success
+    LIGHT_GRAY = "\033[38;5;250m"  # Tree structure
+    MEDIUM_GRAY = "\033[38;5;245m"  # Operation text
+    DARK_GRAY = "\033[38;5;240m"  # Timing, file sizes
+    RESET = "\033[0m"
+# Unicode icons for different operations
+class Icons:
+    # Configuration
+    CONFIG = "⚙"
+    INFO = "◆"
+    # Repository operations
+    CRAWLING = "◎"
+    DOWNLOAD = "↓"
+    SKIP = "○"
+    # LLM operations
+    PROCESSING = "⟳"
+    ANALYZING = "◉"
+    ORDERING = "◈"
+    # Content generation
+    WRITING = "✎"
+    GENERATING = "◊"
+    # File operations
+    CREATING = "▸"
+    # Status
+    SUCCESS = "✓"
+    ERROR = "✗"
+    WARNING = "⚠"
+# Tree structure characters
+class Tree:
+    START = "┌─"  # Start of section
+    MIDDLE = "├─"  # Middle items
+    END = "└─"  # Last item
+    VERTICAL = "│"  # Vertical line
+    SPACE = "   "  # Space for indentation
+class PhaseTracker:
+    """Track current phase state for proper tree structure."""
+    def __init__(self):
+        self.depth = 0
+        self.in_phase = False
+        self.phase_items = 0
+    def start_phase(self):
+        """Start a new phase."""
+        self.in_phase = True
+        self.phase_items = 0
+        self.depth = 0
+    def end_phase(self):
+        """End current phase."""
+        self.in_phase = False
+        self.depth = 0
+    def add_item(self):
+        """Add an item to current phase."""
+        self.phase_items += 1
+# Global tracker instance
+_tracker = PhaseTracker()
+def format_time(seconds):
+    """Format elapsed time as [X.Xs]."""
+    return f"[{seconds:.1f}s]"
+def format_size(bytes_size):
+    """Format file size in human-readable format."""
+    if bytes_size < 1024:
+        return f"{bytes_size} bytes"
+    elif bytes_size < 1024 * 1024:
+        return f"{bytes_size / 1024:.1f} KB"
+    else:
+        return f"{bytes_size / (1024 * 1024):.1f} MB"
+def print_header(version=None):
+    """Print the CLI header with version and configuration info."""
+    if version is None:
+        from ..metadata import __version__
+        version = __version__
+    print(f"{Colors.WHITE}WikiGen {Colors.LIGHT_GRAY}v{version}{Colors.RESET}")
+def print_info(label, value):
+    """Print configuration information line."""
+    print(
+        f"{Colors.MEDIUM_GRAY}{Icons.INFO} {label}: {Colors.WHITE}{value}{Colors.RESET}"
+    )
+def print_phase_start(name, icon):
+    """
+    Print the start of a new phase (top-level section).
+    Example: "┌─ ◎ Repository Crawling"
+    """
+    _tracker.start_phase()
+    print()  # Blank line before phase
+    print(f"{Colors.LIGHT_GRAY}{Tree.START} {Colors.WHITE}{icon} {name}{Colors.RESET}")
+def print_operation(text, icon=None, indent=1, is_last=False, elapsed_time=None):
+    """
+    Print an operation within a phase with proper tree structure.
+    Args:
+        text: Operation description
+        icon: Icon to display (optional)
+        indent: Indentation level (1 for direct child, 2 for nested)
+        is_last: Whether this is the last item at this level
+        elapsed_time: Optional elapsed time to display inline
+    """
+    _tracker.add_item()
+    # Build indentation
+    prefix_parts = []
+    for i in range(indent):
+        if i < indent - 1:
+            prefix_parts.append(Colors.LIGHT_GRAY + Tree.VERTICAL + "  ")
+        else:
+            if is_last:
+                prefix_parts.append(Colors.LIGHT_GRAY + Tree.END + " ")
+            else:
+                prefix_parts.append(Colors.LIGHT_GRAY + Tree.MIDDLE + " ")
+    prefix = "".join(prefix_parts)
+    # Format icon and text
+    if icon:
+        formatted_text = f"{Colors.MEDIUM_GRAY}{icon} {text}{Colors.RESET}"
+    else:
+        formatted_text = f"{Colors.MEDIUM_GRAY}{text}{Colors.RESET}"
+    # Add timing if provided
+    if elapsed_time is not None:
+        time_suffix = f" {Colors.DARK_GRAY}[{format_time(elapsed_time)}]{Colors.RESET}"
+        formatted_text += time_suffix
+    print(f"{prefix}{formatted_text}")
+def print_success(text, elapsed_time=None, indent=1):
+    """
+    Print a success message with optional timing.
+    Example: "└─ ✓ Complete (43 files, 85.2 KB) [2.3s]"
+    """
+    # Build timing suffix
+    time_suffix = ""
+    if elapsed_time is not None:
+        time_suffix = f" {Colors.DARK_GRAY}{format_time(elapsed_time)}{Colors.RESET}"
+    # Build prefix
+    prefix_parts = []
+    for i in range(indent):
+        if i < indent - 1:
+            prefix_parts.append(Colors.LIGHT_GRAY + Tree.VERTICAL + "  ")
+        else:
+            prefix_parts.append(Colors.LIGHT_GRAY + Tree.END + " ")
+    prefix = "".join(prefix_parts)
+    print(f"{prefix}{Colors.WHITE}{Icons.SUCCESS} {text}{time_suffix}{Colors.RESET}")
+def print_phase_end():
+    """End the current phase (adds vertical connector if needed)."""
+    print(f"{Colors.LIGHT_GRAY}{Tree.VERTICAL}{Colors.RESET}")
+    _tracker.end_phase()
+def print_final_success(message, total_time, output_path):
+    """
+    Print final success message with total time and output location.
+    Example:
+    ✓ Success! Documents generated [66.2s total]
+    📂 /Users/.../output/
+    """
+    print()  # Blank line before final message
+    print(
+        f"{Colors.WHITE}{Icons.SUCCESS} {message} {Colors.DARK_GRAY}{format_time(total_time)} total{Colors.RESET}"
+    )
+    print(f"{Colors.MEDIUM_GRAY}📂 {Colors.WHITE}{output_path}{Colors.RESET}")
+def print_error_missing_api_key(provider_display: str = "API"):
+    """Print error message for missing API key."""
+    from ..metadata import CLI_ENTRY_POINT
+    print()
+    print(
+        f"{Colors.WHITE}{Icons.ERROR} Error: {provider_display} API key not found{Colors.RESET}"
+    )
+    print(f"{Colors.MEDIUM_GRAY}  To configure your API key, run:{Colors.RESET}")
+    print(
+        f"{Colors.WHITE}    {CLI_ENTRY_POINT} config update-api-key <provider>{Colors.RESET}"
+    )
+    print(
+        f"{Colors.MEDIUM_GRAY}  Or set the appropriate API key environment variable{Colors.RESET}"
+    )
+def print_error_invalid_api_key():
+    """Print error message for invalid API key."""
+    from ..metadata import CLI_ENTRY_POINT
+    print()
+    print(
+        f"{Colors.WHITE}{Icons.ERROR} Error: Invalid or unauthorized API key{Colors.RESET}"
+    )
+    print(
+        f"{Colors.MEDIUM_GRAY}  Your API key may be invalid or expired.{Colors.RESET}"
+    )
+    print(f"{Colors.MEDIUM_GRAY}  To update your API key, run:{Colors.RESET}")
+    print(
+        f"{Colors.WHITE}    {CLI_ENTRY_POINT} config update-api-key <provider>{Colors.RESET}"
+    )
+def print_error_rate_limit():
+    """Print error message for rate limit errors."""
+    print()
+    print(f"{Colors.WHITE}{Icons.ERROR} Error: Rate limit exceeded{Colors.RESET}")
+    print(
+        f"{Colors.MEDIUM_GRAY}  You've hit the API rate limit. Please wait and try again.{Colors.RESET}"
+    )
+    print(
+        f"{Colors.MEDIUM_GRAY}  Consider using --no-cache flag to reduce API calls.{Colors.RESET}"
+    )
+def print_error_network():
+    """Print error message for network errors."""
+    print()
+    print(f"{Colors.WHITE}{Icons.ERROR} Error: Network connection issue{Colors.RESET}")
+    print(
+        f"{Colors.MEDIUM_GRAY}  Unable to connect to the API. Please check your internet connection.{Colors.RESET}"
+    )
+def print_error_general(error):
+    """Print error message for general/unexpected errors."""
+    print()
+    print(
+        f"{Colors.WHITE}{Icons.ERROR} Error: An unexpected error occurred{Colors.RESET}"
+    )
+    print(f"{Colors.MEDIUM_GRAY}  {str(error)}{Colors.RESET}")
+    print(
+        f"{Colors.MEDIUM_GRAY}  Please check your configuration and try again.{Colors.RESET}"
+    )
+def print_update_notification(current_version: str, latest_version: str):
+    """
+    Print update notification message at the end of successful execution.
+    Args:
+        current_version: Currently installed version
+        latest_version: Latest available version from PyPI
+    """
+    print()
+    print(
+        f"{Colors.WHITE}{Icons.INFO} Update available: "
+        f"{Colors.MEDIUM_GRAY}v{current_version}"
+        f"{Colors.WHITE} → {Colors.WHITE}v{latest_version}{Colors.RESET}"
+    )
+    print(
+        f"{Colors.MEDIUM_GRAY}  To upgrade, run: {Colors.WHITE}pip install --upgrade wikigen{Colors.RESET}"
+    )

wikigen/mcp/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""MCP (Model Context Protocol) server for wikigen."""
+# Lazy import to avoid requiring mcp package at module load time
+def run_mcp_server():
+    """Entry point to run MCP server."""
+    from .server import run_mcp_server as _run
+    _run()  # run_mcp_server() doesn't return, it runs the server
+__all__ = ["run_mcp_server"]

wikigen/mcp/chunking.py ADDED Viewed

@@ -0,0 +1,127 @@
+"""Markdown chunking utilities for semantic search.
+This module provides intelligent chunking of markdown documents that respects
+markdown structure (headers, code blocks) while maintaining configurable
+chunk sizes and overlaps.
+"""
+import re
+from typing import List, Dict, Any
+def chunk_markdown(
+    content: str, chunk_size: int = 500, overlap: int = 50
+) -> List[Dict[str, Any]]:
+    """
+    Chunk markdown content intelligently, respecting structure.
+    This function chunks markdown text while:
+    - Preserving code blocks (don't split them)
+    - Respecting headers (prefer chunking at headers)
+    - Maintaining configurable chunk size and overlap
+    - Preserving context across chunks
+    Args:
+        content: The markdown content to chunk
+        chunk_size: Target chunk size in tokens (approximate, using character count)
+        overlap: Number of tokens to overlap between chunks
+    Returns:
+        List of dictionaries with chunk information:
+        - 'content': The chunk text
+        - 'start_pos': Starting position in original content
+        - 'end_pos': Ending position in original content
+        - 'chunk_index': Index of this chunk (0-based)
+    """
+    if not content:
+        return []
+    # Approximate tokens: roughly 4 characters per token
+    char_size = chunk_size * 4
+    char_overlap = overlap * 4
+    chunks = []
+    current_pos = 0
+    chunk_index = 0
+    # Split by code blocks first to preserve them
+    code_block_pattern = r"```[\s\S]*?```"
+    code_blocks = list(re.finditer(code_block_pattern, content))
+    while current_pos < len(content):
+        # Find the end position for this chunk
+        end_pos = min(current_pos + char_size, len(content))
+        # If we're not at the end, try to find a good break point
+        if end_pos < len(content):
+            # Prefer breaking at headers (##, ###, etc.)
+            header_pattern = r"\n#{1,6}\s+"
+            header_match = re.search(
+                header_pattern, content[current_pos : end_pos + 100]
+            )
+            if header_match:
+                # Break at the header
+                end_pos = current_pos + header_match.start()
+            else:
+                # Try breaking at paragraph boundaries (double newline)
+                para_match = re.search(r"\n\n+", content[end_pos - 200 : end_pos + 100])
+                if para_match:
+                    # Adjust end_pos to the paragraph break
+                    end_pos = end_pos - 200 + para_match.end()
+                else:
+                    # Try breaking at sentence boundaries
+                    sentence_match = re.search(
+                        r"[.!?]\s+", content[end_pos - 100 : end_pos + 50]
+                    )
+                    if sentence_match:
+                        end_pos = end_pos - 100 + sentence_match.end()
+                    else:
+                        # Last resort: break at word boundary
+                        word_match = re.search(
+                            r"\s+", content[end_pos - 50 : end_pos + 50]
+                        )
+                        if word_match:
+                            end_pos = end_pos - 50 + word_match.end()
+        # Check if we're in the middle of a code block
+        in_code_block = False
+        for cb_match in code_blocks:
+            if cb_match.start() < end_pos < cb_match.end():
+                # Extend to end of code block
+                end_pos = cb_match.end()
+                in_code_block = True
+                break
+        # Extract chunk content
+        chunk_content = content[current_pos:end_pos].strip()
+        # Only add chunk if it's meaningful (at least 100 chars to avoid tiny fragments)
+        if chunk_content and len(chunk_content) >= 100:
+            chunks.append(
+                {
+                    "content": chunk_content,
+                    "start_pos": current_pos,
+                    "end_pos": end_pos,
+                    "chunk_index": chunk_index,
+                }
+            )
+            chunk_index += 1
+        # Move to next chunk with overlap
+        if end_pos >= len(content):
+            break
+        # Calculate next start position with overlap
+        # Ensure we make meaningful progress (at least 50% of chunk size)
+        min_progress = char_size // 2
+        next_start = end_pos - char_overlap
+        if next_start <= current_pos:
+            # Ensure we make progress
+            next_start = current_pos + min_progress
+        elif (next_start - current_pos) < min_progress:
+            # If overlap would create too small a step, ensure minimum progress
+            next_start = current_pos + min_progress
+        current_pos = next_start
+    return chunks