PyPI - wcgw - Versions diffs - 3.0.7__py3-none-any.whl → 4.1.0__py3-none-any.whl - Mend

wcgw 3.0.7py3-none-any.whl → 4.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wcgw might be problematic. Click here for more details.

Files changed (21) hide show

wcgw/client/bash_state/bash_state.py +184 -13
wcgw/client/diff-instructions.txt +29 -15
wcgw/client/file_ops/diff_edit.py +44 -47
wcgw/client/file_ops/search_replace.py +107 -72
wcgw/client/mcp_server/server.py +7 -3
wcgw/client/memory.py +5 -2
wcgw/client/modes.py +19 -10
wcgw/client/repo_ops/display_tree.py +3 -3
wcgw/client/repo_ops/file_stats.py +152 -0
wcgw/client/repo_ops/repo_context.py +147 -6
wcgw/client/tool_prompts.py +14 -16
wcgw/client/tools.py +496 -81
wcgw/relay/serve.py +8 -53
wcgw/types_.py +103 -16
{wcgw-3.0.7.dist-info → wcgw-4.1.0.dist-info}/METADATA +39 -20
{wcgw-3.0.7.dist-info → wcgw-4.1.0.dist-info}/RECORD +21 -20
wcgw_cli/anthropic_client.py +1 -1
wcgw_cli/openai_client.py +1 -1
{wcgw-3.0.7.dist-info → wcgw-4.1.0.dist-info}/WHEEL +0 -0
{wcgw-3.0.7.dist-info → wcgw-4.1.0.dist-info}/entry_points.txt +0 -0
{wcgw-3.0.7.dist-info → wcgw-4.1.0.dist-info}/licenses/LICENSE +0 -0

wcgw/client/bash_state/bash_state.py CHANGED Viewed

@@ -6,6 +6,7 @@ import threading
 import time
 import traceback
 from dataclasses import dataclass
+from hashlib import sha256
 from typing import (
     Any,
     Literal,
@@ -124,7 +125,6 @@ def cleanup_all_screens_with_name(name: str, console: Console) -> None:
         session_info = line.split()[0].strip()  # e.g., "1234.my_screen"
         if session_info.endswith(f".{name}"):
             sessions_to_kill.append(session_info)
     # Now, for every session we found, tell screen to quit it.
     for session in sessions_to_kill:
         try:
@@ -232,11 +232,13 @@ class BashState:
         write_if_empty_mode: Optional[WriteIfEmptyMode],
         mode: Optional[Modes],
         use_screen: bool,
-        whitelist_for_overwrite: Optional[set[str]] = None,
+        whitelist_for_overwrite: Optional[dict[str, "FileWhitelistData"]] = None,
     ) -> None:
         self._last_command: str = ""
         self.console = console
         self._cwd = working_dir or os.getcwd()
+        # Store the workspace root separately from the current working directory
+        self._workspace_root = working_dir or os.getcwd()
         self._bash_command_mode: BashCommandMode = bash_command_mode or BashCommandMode(
             "normal_mode", "all"
         )
@@ -245,7 +247,9 @@ class BashState:
             write_if_empty_mode or WriteIfEmptyMode("all")
         )
         self._mode = mode or "wcgw"
-        self._whitelist_for_overwrite: set[str] = whitelist_for_overwrite or set()
+        self._whitelist_for_overwrite: dict[str, FileWhitelistData] = (
+            whitelist_for_overwrite or {}
+        )
         self._bg_expect_thread: Optional[threading.Thread] = None
         self._bg_expect_thread_stop_event = threading.Event()
         self._use_screen = use_screen
@@ -253,7 +257,12 @@ class BashState:
     def expect(self, pattern: Any, timeout: Optional[float] = -1) -> int:
         self.close_bg_expect_thread()
-        output = self._shell.expect(pattern, timeout)
+        try:
+            output = self._shell.expect(pattern, timeout)
+        except pexpect.TIMEOUT:
+            # Edge case: gets raised when the child fd is not ready in some timeout
+            # pexpect/utils.py:143
+            return 1
         return output
     def send(self, s: str | bytes, set_as_command: Optional[str]) -> int:
@@ -314,9 +323,9 @@ class BashState:
             self._bg_expect_thread_stop_event = threading.Event()
     def cleanup(self) -> None:
+        cleanup_all_screens_with_name(self._shell_id, self.console)
         self.close_bg_expect_thread()
         self._shell.close(True)
-        cleanup_all_screens_with_name(self._shell_id, self.console)
     def __enter__(self) -> "BashState":
         return self
@@ -427,6 +436,15 @@ class BashState:
     def cwd(self) -> str:
         return self._cwd
+    @property
+    def workspace_root(self) -> str:
+        """Return the workspace root directory."""
+        return self._workspace_root
+    def set_workspace_root(self, workspace_root: str) -> None:
+        """Set the workspace root directory."""
+        self._workspace_root = workspace_root
     @property
     def prompt(self) -> str:
         return PROMPT_CONST
@@ -454,20 +472,56 @@ class BashState:
             "bash_command_mode": self._bash_command_mode.serialize(),
             "file_edit_mode": self._file_edit_mode.serialize(),
             "write_if_empty_mode": self._write_if_empty_mode.serialize(),
-            "whitelist_for_overwrite": list(self._whitelist_for_overwrite),
+            "whitelist_for_overwrite": {
+                k: v.serialize() for k, v in self._whitelist_for_overwrite.items()
+            },
             "mode": self._mode,
+            "workspace_root": self._workspace_root,
         }
     @staticmethod
     def parse_state(
         state: dict[str, Any],
-    ) -> tuple[BashCommandMode, FileEditMode, WriteIfEmptyMode, Modes, list[str]]:
+    ) -> tuple[
+        BashCommandMode,
+        FileEditMode,
+        WriteIfEmptyMode,
+        Modes,
+        dict[str, "FileWhitelistData"],
+        str,
+    ]:
+        whitelist_state = state["whitelist_for_overwrite"]
+        # Convert serialized whitelist data back to FileWhitelistData objects
+        whitelist_dict = {}
+        if isinstance(whitelist_state, dict):
+            for file_path, data in whitelist_state.items():
+                if isinstance(data, dict) and "file_hash" in data:
+                    # New format
+                    whitelist_dict[file_path] = FileWhitelistData.deserialize(data)
+                else:
+                    # Legacy format (just a hash string)
+                    # Try to get line count from file if it exists, otherwise use a large default
+                    whitelist_dict[file_path] = FileWhitelistData(
+                        file_hash=data if isinstance(data, str) else "",
+                        line_ranges_read=[(1, 1000000)],  # Assume entire file was read
+                        total_lines=1000000,
+                    )
+        else:
+            # Handle really old format if needed
+            whitelist_dict = {
+                k: FileWhitelistData(
+                    file_hash="", line_ranges_read=[(1, 1000000)], total_lines=1000000
+                )
+                for k in whitelist_state
+            }
         return (
             BashCommandMode.deserialize(state["bash_command_mode"]),
             FileEditMode.deserialize(state["file_edit_mode"]),
             WriteIfEmptyMode.deserialize(state["write_if_empty_mode"]),
             state["mode"],
-            state["whitelist_for_overwrite"],
+            whitelist_dict,
+            state.get("workspace_root", ""),
         )
     def load_state(
@@ -476,15 +530,17 @@ class BashState:
         file_edit_mode: FileEditMode,
         write_if_empty_mode: WriteIfEmptyMode,
         mode: Modes,
-        whitelist_for_overwrite: list[str],
+        whitelist_for_overwrite: dict[str, "FileWhitelistData"],
         cwd: str,
+        workspace_root: str,
     ) -> None:
         """Create a new BashState instance from a serialized state dictionary"""
         self._bash_command_mode = bash_command_mode
         self._cwd = cwd or self._cwd
+        self._workspace_root = workspace_root or cwd or self._workspace_root
         self._file_edit_mode = file_edit_mode
         self._write_if_empty_mode = write_if_empty_mode
-        self._whitelist_for_overwrite = set(whitelist_for_overwrite)
+        self._whitelist_for_overwrite = dict(whitelist_for_overwrite)
         self._mode = mode
         self.reset_shell()
@@ -505,17 +561,132 @@ class BashState:
         return "Not pending"
     @property
-    def whitelist_for_overwrite(self) -> set[str]:
+    def whitelist_for_overwrite(self) -> dict[str, "FileWhitelistData"]:
         return self._whitelist_for_overwrite
-    def add_to_whitelist_for_overwrite(self, file_path: str) -> None:
-        self._whitelist_for_overwrite.add(file_path)
+    def add_to_whitelist_for_overwrite(
+        self, file_paths_with_ranges: dict[str, list[tuple[int, int]]]
+    ) -> None:
+        """
+        Add files to the whitelist for overwrite.
+        Args:
+            file_paths_with_ranges: Dictionary mapping file paths to sequences of
+                               (start_line, end_line) tuples representing
+                               the ranges that have been read.
+        """
+        for file_path, ranges in file_paths_with_ranges.items():
+            # Read the file to get its hash and count lines
+            with open(file_path, "rb") as f:
+                file_content = f.read()
+                file_hash = sha256(file_content).hexdigest()
+                total_lines = file_content.count(b"\n") + 1
+            # Update or create whitelist entry
+            if file_path in self._whitelist_for_overwrite:
+                # Update existing entry
+                whitelist_data = self._whitelist_for_overwrite[file_path]
+                whitelist_data.file_hash = file_hash
+                whitelist_data.total_lines = total_lines
+                for range_start, range_end in ranges:
+                    whitelist_data.add_range(range_start, range_end)
+            else:
+                # Create new entry
+                self._whitelist_for_overwrite[file_path] = FileWhitelistData(
+                    file_hash=file_hash,
+                    line_ranges_read=list(ranges),
+                    total_lines=total_lines,
+                )
     @property
     def pending_output(self) -> str:
         return self._pending_output
+@dataclass
+class FileWhitelistData:
+    """Data about a file that has been read and can be modified."""
+    file_hash: str
+    # List of line ranges that have been read (inclusive start, inclusive end)
+    # E.g., [(1, 10), (20, 30)] means lines 1-10 and 20-30 have been read
+    line_ranges_read: list[tuple[int, int]]
+    # Total number of lines in the file
+    total_lines: int
+    def get_percentage_read(self) -> float:
+        """Calculate percentage of file read based on line ranges."""
+        if self.total_lines == 0:
+            return 100.0
+        # Count unique lines read
+        lines_read: set[int] = set()
+        for start, end in self.line_ranges_read:
+            lines_read.update(range(start, end + 1))
+        return (len(lines_read) / self.total_lines) * 100.0
+    def is_read_enough(self) -> bool:
+        """Check if enough of the file has been read (>=99%)"""
+        return self.get_percentage_read() >= 99
+    def get_unread_ranges(self) -> list[tuple[int, int]]:
+        """Return a list of line ranges (start, end) that haven't been read yet.
+        Returns line ranges as tuples of (start_line, end_line) in 1-indexed format.
+        If the whole file has been read, returns an empty list.
+        """
+        if self.total_lines == 0:
+            return []
+        # First collect all lines that have been read
+        lines_read: set[int] = set()
+        for start, end in self.line_ranges_read:
+            lines_read.update(range(start, end + 1))
+        # Generate unread ranges from the gaps
+        unread_ranges: list[tuple[int, int]] = []
+        start_range = None
+        for i in range(1, self.total_lines + 1):
+            if i not in lines_read:
+                if start_range is None:
+                    start_range = i
+            elif start_range is not None:
+                # End of an unread range
+                unread_ranges.append((start_range, i - 1))
+                start_range = None
+        # Don't forget the last range if it extends to the end of the file
+        if start_range is not None:
+            unread_ranges.append((start_range, self.total_lines))
+        return unread_ranges
+    def add_range(self, start: int, end: int) -> None:
+        """Add a new range of lines that have been read."""
+        # Merge with existing ranges if possible
+        self.line_ranges_read.append((start, end))
+        # Could add range merging logic here for optimization
+    def serialize(self) -> dict[str, Any]:
+        """Convert to a serializable dictionary."""
+        return {
+            "file_hash": self.file_hash,
+            "line_ranges_read": self.line_ranges_read,
+            "total_lines": self.total_lines,
+        }
+    @classmethod
+    def deserialize(cls, data: dict[str, Any]) -> "FileWhitelistData":
+        """Create from a serialized dictionary."""
+        return cls(
+            file_hash=data.get("file_hash", ""),
+            line_ranges_read=data.get("line_ranges_read", []),
+            total_lines=data.get("total_lines", 0),
+        )
 WAITING_INPUT_MESSAGE = """A command is already running. NOTE: You can't run multiple shell sessions, likely a previous program hasn't exited.
 1. Get its output using status check.
 2. Use `send_ascii` or `send_specials` to give inputs to the running program OR

wcgw/client/diff-instructions.txt CHANGED Viewed

@@ -1,17 +1,40 @@
 Instructions for editing files.
+# Example
+## Input file
+```
+import numpy as np
+from impls import impl1, impl2
+def hello():
+    "print a greeting"
+    print("hello")
-Only edit the files using the following SEARCH/REPLACE blocks.
+def call_hello():
+    "call hello"
+    hello()
+    print("Called")
+    impl1()
+    hello()
+    impl2()
+```
+## Edit format on the input file
 ```
-file_edit_using_search_replace_blocks="""
+<<<<<<< SEARCH
+from impls import impl1, impl2
+=======
+from impls import impl1, impl2
+from hello import hello as hello_renamed
+>>>>>>> REPLACE
 <<<<<<< SEARCH
 def hello():
     "print a greeting"
     print("hello")
 =======
-from hello import hello as hello_renamed
 >>>>>>> REPLACE
 <<<<<<< SEARCH
 def call_hello():
@@ -33,26 +56,17 @@ def call_hello_renamed():
     hello_renamed()
     impl2()
 >>>>>>> REPLACE
-"""
 ```
 # *SEARCH/REPLACE block* Rules:
-Every *SEARCH/REPLACE block* must use this format:
-1. The start of match block: <<<<<<< SEARCH
-2. A contiguous chunk of lines to do exact match for in the existing source code
-3. The dividing line: =======
-4. The lines to replace into the source code
-5. The end of the replace block: >>>>>>> REPLACE
 Every "<<<<<<< SEARCH" section must *EXACTLY MATCH* the existing file content, character for character, including all comments, docstrings, whitespaces, etc.
 Including multiple unique *SEARCH/REPLACE* blocks if needed.
-Include enough lines in each SEARCH section to uniquely match each set of lines that need to change.
+Include enough and only enough lines in each SEARCH section to uniquely match each set of lines that need to change.
 Keep *SEARCH/REPLACE* blocks concise.
 Break large *SEARCH/REPLACE* blocks into a series of smaller blocks that each change a small portion of the file.
-Include just the changing lines, and a few surrounding lines if needed for uniqueness.
-Do not include long runs of unchanging lines in *SEARCH/REPLACE* blocks.
+Include just the changing lines, and a few surrounding lines (2-3 lines) if needed for uniqueness.
+Other than for uniqueness, avoid including those lines which do not change in search (and replace) blocks. Target 2-3 non trivial extra lines per block.
 Preserve leading spaces and indentations in both SEARCH and REPLACE blocks.

wcgw/client/file_ops/diff_edit.py CHANGED Viewed

@@ -7,7 +7,13 @@ TOLERANCE_TYPES = Literal["SILENT", "WARNING", "ERROR"]
 class SearchReplaceMatchError(Exception):
-    pass
+    def __init__(self, message: str):
+        message = f"""
+{message}
+---
+Retry immediately with same "percentage_to_change" using search replace blocks fixing above error.
+"""
+        super().__init__(message)
 @dataclass
@@ -27,7 +33,9 @@ class TolerancesHit(Tolerance):
 class FileEditOutput:
     original_content: list[str]
     orig_search_blocks: list[list[str]]
-    edited_with_tolerances: list[tuple[slice, list[TolerancesHit], list[str]]]
+    edited_with_tolerances: list[
+        tuple[slice, list[TolerancesHit], list[str]]
+    ]  # Need not be equal to orig_search_blocks when early exit
     def replace_or_throw(
         self,
@@ -45,11 +53,12 @@ class FileEditOutput:
                     if tol.severity_cat == "WARNING":
                         warnings.add(tol.error_name)
                     elif tol.severity_cat == "ERROR":
+                        search__ = "\n".join(search_)
                         errors.append(f"""
 Got error while processing the following search block:
 ---
 ```
-{"\n".join(search_)}
+{search__}
 ```
 ---
 Error:
@@ -93,8 +102,7 @@ Error:
                     best_score = hit_score
                 elif abs(hit_score - best_score) < 1e-3:
                     best_hits.append(output)
-        return best_hits, best_score < 0
+        return best_hits, best_score > 1000
 def line_process_max_space_tolerance(line: str) -> str:
@@ -204,7 +212,7 @@ class FileEditInput:
                         TolerancesHit(
                             line_process=lambda x: x,
                             severity_cat="ERROR",
-                            score_multiplier=float("-inf"),
+                            score_multiplier=float("inf"),
                             error_name="The blocks couldn't be matched, maybe the sequence of search blocks was incorrect?",
                             count=max(1, len(search_lines)),
                         )
@@ -240,6 +248,7 @@ class FileEditInput:
         # search for first block
         first_block = self.search_replace_blocks[self.search_replace_offset]
+        replace_by = first_block[1]
         # Try exact match
         matches = match_exact(self.file_lines, self.file_line_offset, first_block[0])
@@ -251,7 +260,6 @@ class FileEditInput:
             matches_with_tolerances = match_with_tolerance(
                 self.file_lines, self.file_line_offset, first_block[0], self.tolerances
             )
-            replace_by = first_block[1]
             if not matches_with_tolerances:
                 # Try with no empty lines
                 matches_with_tolerances = match_with_tolerance_empty_line(
@@ -277,8 +285,8 @@ class FileEditInput:
                                     TolerancesHit(
                                         lambda x: x,
                                         "ERROR",
-                                        -1,
-                                        "Couldn't find match. Do you mean to match the lines in the following context?\n```"
+                                        float("inf"),
+                                        "Couldn't find match. Here's the latest snippet from the file which might be relevant for you to consider:\n```"
                                         + sim_context
                                         + "\n```",
                                         int(len(first_block[0]) // sim_sim),
@@ -287,51 +295,40 @@ class FileEditInput:
                             )
                         ]
-            for match, tolerances in matches_with_tolerances:
-                if any(
-                    tolerance.error_name == REMOVE_INDENTATION
-                    for tolerance in tolerances
-                ):
-                    replace_by = fix_indentation(
-                        self.file_lines[match.start : match.stop],
-                        first_block[0],
-                        replace_by,
-                    )
-                file_edit_input = FileEditInput(
-                    self.file_lines,
-                    match.stop,
-                    self.search_replace_blocks,
-                    self.search_replace_offset + 1,
-                    self.tolerances,
+        else:
+            matches_with_tolerances = [(match, []) for match in matches]
+        for match, tolerances in matches_with_tolerances:
+            if any(
+                tolerance.error_name == REMOVE_INDENTATION for tolerance in tolerances
+            ):
+                replace_by = fix_indentation(
+                    self.file_lines[match.start : match.stop],
+                    first_block[0],
+                    replace_by,
                 )
-                remaining_output = file_edit_input.edit_file()
-                for rem_output in remaining_output:
-                    all_outputs.append(
-                        [
-                            (match, tolerances, replace_by),
-                            *rem_output.edited_with_tolerances,
-                        ]
-                    )
-        else:
-            for match in matches:
-                file_edit_input = FileEditInput(
-                    self.file_lines,
-                    match.stop,
-                    self.search_replace_blocks,
-                    self.search_replace_offset + 1,
-                    self.tolerances,
+            file_edit_input = FileEditInput(
+                self.file_lines,
+                match.stop,
+                self.search_replace_blocks,
+                self.search_replace_offset + 1,
+                self.tolerances,
+            )
+            if any(tolerance.severity_cat == "ERROR" for tolerance in tolerances):
+                # Exit early
+                all_outputs.append(
+                    [
+                        (match, tolerances, replace_by),
+                    ]
                 )
+            else:
                 remaining_output = file_edit_input.edit_file()
                 for rem_output in remaining_output:
                     all_outputs.append(
                         [
-                            (
-                                match,
-                                [],
-                                first_block[1],
-                            ),
+                            (match, tolerances, replace_by),
                             *rem_output.edited_with_tolerances,
                         ]
                     )

wcgw 3.0.7__py3-none-any.whl → 4.1.0__py3-none-any.whl

Potentially problematic release.

wcgw 3.0.7py3-none-any.whl → 4.1.0py3-none-any.whl