PyPI - wcgw - Versions diffs - 5.5.4__py3-none-any.whl - Mend

wcgw 5.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

wcgw/__init__.py +4 -0
wcgw/client/__init__.py +0 -0
wcgw/client/bash_state/bash_state.py +1426 -0
wcgw/client/bash_state/parser/__init__.py +7 -0
wcgw/client/bash_state/parser/bash_statement_parser.py +181 -0
wcgw/client/common.py +51 -0
wcgw/client/diff-instructions.txt +73 -0
wcgw/client/encoder/__init__.py +47 -0
wcgw/client/file_ops/diff_edit.py +619 -0
wcgw/client/file_ops/extensions.py +137 -0
wcgw/client/file_ops/search_replace.py +212 -0
wcgw/client/mcp_server/Readme.md +3 -0
wcgw/client/mcp_server/__init__.py +32 -0
wcgw/client/mcp_server/server.py +184 -0
wcgw/client/memory.py +103 -0
wcgw/client/modes.py +240 -0
wcgw/client/repo_ops/display_tree.py +116 -0
wcgw/client/repo_ops/file_stats.py +152 -0
wcgw/client/repo_ops/path_prob.py +58 -0
wcgw/client/repo_ops/paths_model.vocab +20000 -0
wcgw/client/repo_ops/paths_tokens.model +80042 -0
wcgw/client/repo_ops/repo_context.py +289 -0
wcgw/client/schema_generator.py +63 -0
wcgw/client/tool_prompts.py +98 -0
wcgw/client/tools.py +1432 -0
wcgw/py.typed +0 -0
wcgw/types_.py +318 -0
wcgw-5.5.4.dist-info/METADATA +339 -0
wcgw-5.5.4.dist-info/RECORD +38 -0
wcgw-5.5.4.dist-info/WHEEL +4 -0
wcgw-5.5.4.dist-info/entry_points.txt +4 -0
wcgw-5.5.4.dist-info/licenses/LICENSE +213 -0
wcgw_cli/__init__.py +1 -0
wcgw_cli/__main__.py +3 -0
wcgw_cli/anthropic_client.py +486 -0
wcgw_cli/cli.py +40 -0
wcgw_cli/openai_client.py +404 -0
wcgw_cli/openai_utils.py +67 -0

wcgw/client/repo_ops/repo_context.py ADDED Viewed

@@ -0,0 +1,289 @@
+import os
+from collections import deque
+from pathlib import Path  # Still needed for other parts
+from typing import Optional
+from pygit2 import GitError, Repository
+from pygit2.enums import SortMode
+from .display_tree import DirectoryTree
+from .file_stats import load_workspace_stats
+from .path_prob import FastPathAnalyzer
+curr_folder = Path(__file__).parent
+vocab_file = curr_folder / "paths_model.vocab"
+model_file = curr_folder / "paths_tokens.model"
+PATH_SCORER = FastPathAnalyzer(str(model_file), str(vocab_file))
+def find_ancestor_with_git(path: Path) -> Optional[Repository]:
+    if path.is_file():
+        path = path.parent
+    try:
+        return Repository(str(path))
+    except GitError:
+        return None
+MAX_ENTRIES_CHECK = 100_000
+def get_all_files_max_depth(
+    abs_folder: str,
+    max_depth: int,
+    repo: Optional[Repository],
+) -> list[str]:
+    """BFS implementation using deque that maintains relative paths during traversal.
+    Returns (files_list, total_files_found) to track file count."""
+    all_files = []
+    # Queue stores: (folder_path, depth, rel_path_prefix)
+    queue = deque([(abs_folder, 0, "")])
+    entries_check = 0
+    while queue and entries_check < MAX_ENTRIES_CHECK:
+        current_folder, depth, prefix = queue.popleft()
+        if depth > max_depth:
+            continue
+        try:
+            entries = list(os.scandir(current_folder))
+        except PermissionError:
+            continue
+        except OSError:
+            continue
+        # Split into files and folders with single scan
+        files = []
+        folders = []
+        for entry in entries:
+            entries_check += 1
+            try:
+                is_file = entry.is_file(follow_symlinks=False)
+            except OSError:
+                continue
+            name = entry.name
+            rel_path = f"{prefix}{name}" if prefix else name
+            if repo and repo.path_is_ignored(rel_path):
+                continue
+            if is_file:
+                files.append(rel_path)
+            else:
+                folders.append((entry.path, rel_path))
+        # Process files first (maintain priority)
+        chunk = files[: min(10_000, max(0, MAX_ENTRIES_CHECK - entries_check))]
+        all_files.extend(chunk)
+        # Add folders to queue for BFS traversal
+        for folder_path, folder_rel_path in folders:
+            next_prefix = f"{folder_rel_path}/"
+            queue.append((folder_path, depth + 1, next_prefix))
+    return all_files
+def get_recent_git_files(repo: Repository, count: int = 10) -> list[str]:
+    """
+    Get the most recently modified files from git history
+    Args:
+        repo: The git repository
+        count: Number of recent files to return
+    Returns:
+        List of relative paths to recently modified files
+    """
+    # Track seen files to avoid duplicates
+    seen_files: set[str] = set()
+    recent_files: list[str] = []
+    try:
+        # Get the HEAD reference and walk through recent commits
+        head = repo.head
+        for commit in repo.walk(head.target, SortMode.TOPOLOGICAL | SortMode.TIME):
+            # Skip merge commits which have multiple parents
+            if len(commit.parents) > 1:
+                continue
+            # If we have a parent, get the diff between the commit and its parent
+            if commit.parents:
+                parent = commit.parents[0]
+                diff = repo.diff(parent, commit)  # type: ignore[attr-defined]
+            else:
+                # For the first commit, get the diff against an empty tree
+                diff = commit.tree.diff_to_tree(context_lines=0)
+            # Process each changed file in the diff
+            for patch in diff:
+                file_path = patch.delta.new_file.path
+                # Skip if we've already seen this file or if the file was deleted
+                repo_path_parent = Path(repo.path).parent
+                if (
+                    file_path in seen_files
+                    or not (repo_path_parent / file_path).exists()
+                ):
+                    continue
+                seen_files.add(file_path)
+                recent_files.append(file_path)
+                # If we have enough files, stop
+                if len(recent_files) >= count:
+                    return recent_files
+    except Exception:
+        # Handle git errors gracefully
+        pass
+    return recent_files
+def calculate_dynamic_file_limit(total_files: int) -> int:
+    # Scale linearly, with minimum and maximum bounds
+    min_files = 50
+    max_files = 400
+    if total_files <= min_files:
+        return min_files
+    scale_factor = (max_files - min_files) / (30000 - min_files)
+    dynamic_limit = min_files + int((total_files - min_files) * scale_factor)
+    return min(max_files, dynamic_limit)
+def get_repo_context(file_or_repo_path: str) -> tuple[str, Path]:
+    file_or_repo_path_ = Path(file_or_repo_path).absolute()
+    repo = find_ancestor_with_git(file_or_repo_path_)
+    recent_git_files: list[str] = []
+    # Determine the context directory
+    if repo is not None:
+        context_dir = Path(repo.path).parent
+    else:
+        if file_or_repo_path_.is_file():
+            context_dir = file_or_repo_path_.parent
+        else:
+            context_dir = file_or_repo_path_
+    # Load workspace stats from the context directory
+    workspace_stats = load_workspace_stats(str(context_dir))
+    # Get all files and calculate dynamic max files limit once
+    all_files = get_all_files_max_depth(str(context_dir), 10, repo)
+    # For Git repositories, get recent files
+    if repo is not None:
+        dynamic_max_files = calculate_dynamic_file_limit(len(all_files))
+        # Get recent git files - get at least 10 or 20% of dynamic_max_files, whichever is larger
+        recent_files_count = max(10, int(dynamic_max_files * 0.2))
+        recent_git_files = get_recent_git_files(repo, recent_files_count)
+    else:
+        # We don't want dynamic limit for non git folders like /tmp or ~
+        dynamic_max_files = 50
+    # Calculate probabilities in batch
+    path_scores = PATH_SCORER.calculate_path_probabilities_batch(all_files)
+    # Create list of (path, score) tuples and sort by score
+    path_with_scores = list(zip(all_files, (score[0] for score in path_scores)))
+    sorted_files = [
+        path for path, _ in sorted(path_with_scores, key=lambda x: x[1], reverse=True)
+    ]
+    # Start with recent git files, then add other important files
+    top_files = []
+    # If we have workspace stats, prioritize the most active files first
+    active_files = []
+    if workspace_stats is not None:
+        # Get files with activity score (weighted count of operations)
+        scored_files = []
+        for file_path, file_stats in workspace_stats.files.items():
+            try:
+                # Convert to relative path if possible
+                if str(context_dir) in file_path:
+                    rel_path = os.path.relpath(file_path, str(context_dir))
+                else:
+                    rel_path = file_path
+                # Calculate activity score - weight reads more for this functionality
+                activity_score = (
+                    file_stats.read_count * 2
+                    + (file_stats.edit_count)
+                    + (file_stats.write_count)
+                )
+                # Only include files that still exist
+                if rel_path in all_files or os.path.exists(file_path):
+                    scored_files.append((rel_path, activity_score))
+            except (ValueError, OSError):
+                # Skip files that cause path resolution errors
+                continue
+        # Sort by activity score (highest first) and get top 5
+        active_files = [
+            f for f, _ in sorted(scored_files, key=lambda x: x[1], reverse=True)[:5]
+        ]
+        # Add active files first
+        for file in active_files:
+            if file not in top_files and file in all_files:
+                top_files.append(file)
+    # Add recent git files next - these should be prioritized
+    for file in recent_git_files:
+        if file not in top_files and file in all_files:
+            top_files.append(file)
+    # Use statistical sorting for the remaining files, but respect dynamic_max_files limit
+    # and ensure we don't add duplicates
+    if len(top_files) < dynamic_max_files:
+        # Only add statistically important files that aren't already in top_files
+        for file in sorted_files:
+            if file not in top_files and len(top_files) < dynamic_max_files:
+                top_files.append(file)
+    directory_printer = DirectoryTree(context_dir, max_files=dynamic_max_files)
+    for file in top_files[:dynamic_max_files]:
+        directory_printer.expand(file)
+    return directory_printer.display(), context_dir
+if __name__ == "__main__":
+    import cProfile
+    import pstats
+    import sys
+    from line_profiler import LineProfiler
+    folder = sys.argv[1]
+    # Profile using cProfile for overall function statistics
+    profiler = cProfile.Profile()
+    profiler.enable()
+    result = get_repo_context(folder)[0]
+    profiler.disable()
+    # Print cProfile stats
+    stats = pstats.Stats(profiler)
+    stats.sort_stats("cumulative")
+    print("\n=== Function-level profiling ===")
+    stats.print_stats(20)  # Print top 20 functions
+    # Profile using line_profiler for line-by-line statistics
+    lp = LineProfiler()
+    lp_wrapper = lp(get_repo_context)
+    lp_wrapper(folder)
+    print("\n=== Line-by-line profiling ===")
+    lp.print_stats()
+    print("\n=== Result ===")
+    print(result)

wcgw/client/schema_generator.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""
+Custom JSON schema generator to remove title fields from Pydantic models.
+This module provides utilities to remove auto-generated title fields from JSON schemas,
+making them more suitable for tool schemas where titles are not needed.
+"""
+import copy
+from typing import Any, Dict
+def recursive_purge_dict_key(d: Dict[str, Any], k: str) -> None:
+    """
+    Remove a key from a dictionary recursively, but only from JSON schema metadata.
+    This function removes the specified key from dictionaries that appear to be
+    JSON schema objects (have "type" or "$ref" or are property definitions).
+    This prevents removing legitimate data fields that happen to have the same name.
+    Args:
+        d: The dictionary to clean
+        k: The key to remove (typically "title")
+    """
+    if isinstance(d, dict):
+        # Only remove the key if this looks like a JSON schema object
+        # This includes objects with "type", "$ref", or if we're in a "properties" context
+        is_schema_object = (
+            "type" in d or
+            "$ref" in d or
+            any(schema_key in d for schema_key in ["properties", "items", "additionalProperties", "enum", "const", "anyOf", "allOf", "oneOf"])
+        )
+        if is_schema_object and k in d:
+            del d[k]
+        # Recursively process all values, regardless of key names
+        # This ensures we catch all nested structures
+        for key, value in d.items():
+            if isinstance(value, dict):
+                recursive_purge_dict_key(value, k)
+            elif isinstance(value, list):
+                for item in value:
+                    if isinstance(item, dict):
+                        recursive_purge_dict_key(item, k)
+def remove_titles_from_schema(schema: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Remove all 'title' keys from a JSON schema dictionary.
+    This function creates a copy of the schema and removes all title keys
+    recursively, making it suitable for use with APIs that don't need titles.
+    Args:
+        schema: The JSON schema dictionary to clean
+    Returns:
+        A new dictionary with all title keys removed
+    """
+    schema_copy = copy.deepcopy(schema)
+    recursive_purge_dict_key(schema_copy, "title")
+    return schema_copy

wcgw/client/tool_prompts.py ADDED Viewed

@@ -0,0 +1,98 @@
+import os
+from mcp.types import Tool, ToolAnnotations
+from ..types_ import (
+    BashCommand,
+    ContextSave,
+    FileWriteOrEdit,
+    Initialize,
+    ReadFiles,
+    ReadImage,
+)
+from .schema_generator import remove_titles_from_schema
+with open(os.path.join(os.path.dirname(__file__), "diff-instructions.txt")) as f:
+    diffinstructions = f.read()
+TOOL_PROMPTS = [
+    Tool(
+        inputSchema=remove_titles_from_schema(Initialize.model_json_schema()),
+        name="Initialize",
+        description="""
+- Always call this at the start of the conversation before using any of the shell tools from wcgw.
+- Use `any_workspace_path` to initialize the shell in the appropriate project directory.
+- If the user has mentioned a workspace or project root or any other file or folder use it to set `any_workspace_path`.
+- If user has mentioned any files use `initial_files_to_read` to read, use absolute paths only (~ allowed)
+- By default use mode "wcgw"
+- In "code-writer" mode, set the commands and globs which user asked to set, otherwise use 'all'.
+- Use type="first_call" if it's the first call to this tool.
+- Use type="user_asked_mode_change" if in a conversation user has asked to change mode.
+- Use type="reset_shell" if in a conversation shell is not working after multiple tries.
+- Use type="user_asked_change_workspace" if in a conversation user asked to change workspace
+""",
+        annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=False),
+    ),
+    Tool(
+        inputSchema=remove_titles_from_schema(BashCommand.model_json_schema()),
+        name="BashCommand",
+        description="""
+- Execute a bash command. This is stateful (beware with subsequent calls).
+- Status of the command and the current working directory will always be returned at the end.
+- The first or the last line might be `(...truncated)` if the output is too long.
+- Always run `pwd` if you get any file or directory not found error to make sure you're not lost.
+- Do not run bg commands using "&", instead use this tool.
+- You must not use echo/cat to read/write files, use ReadFiles/FileWriteOrEdit
+- In order to check status of previous command, use `status_check` with empty command argument.
+- Only command is allowed to run at a time. You need to wait for any previous command to finish before running a new one.
+- Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again.
+- Do not send Ctrl-c before checking for status till 10 minutes or whatever is appropriate for the program to finish.
+- Only run long running commands in background. Each background command is run in a new non-reusable shell.
+- On running a bg command you'll get a bg command id that you should use to get status or interact.
+""",
+        annotations=ToolAnnotations(destructiveHint=True, openWorldHint=True),
+    ),
+    Tool(
+        inputSchema=remove_titles_from_schema(ReadFiles.model_json_schema()),
+        name="ReadFiles",
+        description="""
+- Read full file content of one or more files.
+- Provide absolute paths only (~ allowed)
+- Only if the task requires line numbers understanding:
+    - You may extract a range of lines. E.g., `/path/to/file:1-10` for lines 1-10. You can drop start or end like `/path/to/file:1-` or `/path/to/file:-10`
+""",
+        annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=False),
+    ),
+    Tool(
+        inputSchema=remove_titles_from_schema(ReadImage.model_json_schema()),
+        name="ReadImage",
+        description="Read an image from the shell.",
+        annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=False),
+    ),
+    Tool(
+        inputSchema=remove_titles_from_schema(FileWriteOrEdit.model_json_schema()),
+        name="FileWriteOrEdit",
+        description="""
+- Writes or edits a file based on the percentage of changes.
+- Use absolute path only (~ allowed).
+- First write down percentage of lines that need to be replaced in the file (between 0-100) in percentage_to_change
+- percentage_to_change should be low if mostly new code is to be added. It should be high if a lot of things are to be replaced.
+- If percentage_to_change > 50, provide full file content in text_or_search_replace_blocks
+- If percentage_to_change <= 50, text_or_search_replace_blocks should be search/replace blocks.
+"""
+        + diffinstructions,
+        annotations=ToolAnnotations(
+            destructiveHint=True, idempotentHint=True, openWorldHint=False
+        ),
+    ),
+    Tool(
+        inputSchema=remove_titles_from_schema(ContextSave.model_json_schema()),
+        name="ContextSave",
+        description="""
+Saves provided description and file contents of all the relevant file paths or globs in a single text file.
+- Provide random 3 word unqiue id or whatever user provided.
+- Leave project path as empty string if no project path""",
+        annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=False),
+    ),
+]