PyPI - wcgw - Versions diffs - 2.4.3__py3-none-any.whl → 2.6.1__py3-none-any.whl - Mend

wcgw 2.4.3py3-none-any.whl → 2.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wcgw might be problematic. Click here for more details.

Files changed (18) hide show

wcgw/client/anthropic_client.py +7 -17
wcgw/client/common.py +3 -1
wcgw/client/mcp_server/server.py +41 -36
wcgw/client/openai_client.py +21 -36
wcgw/client/openai_utils.py +5 -5
wcgw/client/repo_ops/display_tree.py +127 -0
wcgw/client/repo_ops/path_prob.py +58 -0
wcgw/client/repo_ops/paths_model.vocab +20000 -0
wcgw/client/repo_ops/paths_tokens.model +80042 -0
wcgw/client/repo_ops/repo_context.py +148 -0
wcgw/client/tools.py +220 -115
wcgw/relay/serve.py +3 -3
wcgw/types_.py +6 -4
{wcgw-2.4.3.dist-info → wcgw-2.6.1.dist-info}/METADATA +19 -56
{wcgw-2.4.3.dist-info → wcgw-2.6.1.dist-info}/RECORD +18 -12
wcgw-2.6.1.dist-info/licenses/LICENSE +213 -0
{wcgw-2.4.3.dist-info → wcgw-2.6.1.dist-info}/WHEEL +0 -0
{wcgw-2.4.3.dist-info → wcgw-2.6.1.dist-info}/entry_points.txt +0 -0

wcgw/client/tools.py CHANGED Viewed

@@ -1,61 +1,57 @@
 import base64
 import datetime
+import importlib.metadata
 import json
 import mimetypes
-from pathlib import Path
+import os
 import re
 import shlex
-import importlib.metadata
 import time
 import traceback
+import uuid
+from difflib import SequenceMatcher
+from pathlib import Path
 from tempfile import NamedTemporaryFile, TemporaryDirectory
 from typing import (
     Callable,
-    DefaultDict,
     Literal,
     Optional,
     ParamSpec,
     Type,
     TypeVar,
 )
-import uuid
-import humanize
-from pydantic import BaseModel, TypeAdapter
-import typer
-from .computer_use import run_computer_tool
-from websockets.sync.client import connect as syncconnect
-import os
-import tiktoken
 import pexpect
-from typer import Typer
-import websockets
-import rich
 import pyte
-from syntax_checker import check_syntax
+import rich
+import tokenizers  # type: ignore
+import typer
+import websockets
 from openai.types.chat import (
     ChatCompletionMessageParam,
 )
-from difflib import SequenceMatcher
+from pydantic import BaseModel, TypeAdapter
+from syntax_checker import check_syntax
+from typer import Typer
+from websockets.sync.client import connect as syncconnect
 from ..types_ import (
     BashCommand,
     BashInteraction,
-    WriteIfEmpty,
-    FileEditFindReplace,
     FileEdit,
+    FileEditFindReplace,
+    GetScreenInfo,
     Initialize,
-    ReadFile,
+    Keyboard,
+    Mouse,
+    ReadFiles,
     ReadImage,
     ResetShell,
-    Mouse,
-    Keyboard,
     ScreenShot,
-    GetScreenInfo,
+    WriteIfEmpty,
 )
+from .computer_use import run_computer_tool
+from .repo_ops.repo_context import get_repo_context
 from .sys_utils import command_run
@@ -178,19 +174,23 @@ def _ensure_env_and_bg_jobs(shell: pexpect.spawn) -> Optional[int]:  # type: ign
     shell.expect(PROMPT, timeout=0.2)
     shell.sendline("jobs | wc -l")
     before = ""
     while not _is_int(before):  # Consume all previous output
         try:
             shell.expect(PROMPT, timeout=0.2)
         except pexpect.TIMEOUT:
             console.print(f"Couldn't get exit code, before: {before}")
             raise
-        assert isinstance(shell.before, str)
-        # Render because there could be some anscii escape sequences still set like in google colab env
-        before_lines = render_terminal_output(shell.before)
+        before_val = shell.before
+        if not isinstance(before_val, str):
+            before_val = str(before_val)
+        assert isinstance(before_val, str)
+        before_lines = render_terminal_output(before_val)
         before = "\n".join(before_lines).strip()
     try:
-        return int((before))
+        return int(before)
     except ValueError:
         raise ValueError(f"Malformed output: {before}")
@@ -244,10 +244,12 @@ class BashState:
         return self._cwd
     def update_cwd(self) -> str:
-        BASH_STATE.shell.sendline("pwd")
-        BASH_STATE.shell.expect(PROMPT, timeout=0.2)
-        assert isinstance(BASH_STATE.shell.before, str)
-        before_lines = render_terminal_output(BASH_STATE.shell.before)
+        self.shell.sendline("pwd")
+        self.shell.expect(PROMPT, timeout=0.2)
+        before_val = self.shell.before
+        if not isinstance(before_val, str):
+            before_val = str(before_val)
+        before_lines = render_terminal_output(before_val)
         current_dir = "\n".join(before_lines).strip()
         self._cwd = current_dir
         return current_dir
@@ -259,9 +261,17 @@ class BashState:
     def get_pending_for(self) -> str:
         if isinstance(self._state, datetime.datetime):
             timedelta = datetime.datetime.now() - self._state
-            return humanize.naturaldelta(
-                timedelta + datetime.timedelta(seconds=TIMEOUT)
+            return (
+                str(
+                    int(
+                        (
+                            timedelta + datetime.timedelta(seconds=TIMEOUT)
+                        ).total_seconds()
+                    )
+                )
+                + " seconds"
             )
         return "Not pending"
     @property
@@ -279,16 +289,46 @@ class BashState:
 BASH_STATE = BashState()
-def initial_info() -> str:
+def initialize(
+    any_workspace_path: str, read_files_: list[str], max_tokens: Optional[int]
+) -> str:
+    reset_shell()
+    repo_context = ""
+    if any_workspace_path:
+        if os.path.exists(any_workspace_path):
+            repo_context, folder_to_start = get_repo_context(any_workspace_path, 200)
+            BASH_STATE.shell.sendline(f"cd {shlex.quote(str(folder_to_start))}")
+            BASH_STATE.shell.expect(PROMPT, timeout=0.2)
+            BASH_STATE.update_cwd()
+            repo_context = f"---\n# Workspace structure\n{repo_context}\n---\n"
+        else:
+            return f"\nInfo: Workspace path {any_workspace_path} does not exist\n"
+    initial_files_context = ""
+    if read_files_:
+        initial_files = read_files(read_files_, max_tokens)
+        initial_files_context = f"---\n# Requested files\n{initial_files}\n---\n"
     uname_sysname = os.uname().sysname
     uname_machine = os.uname().machine
-    return f"""
+    output = f"""
+# Environment
 System: {uname_sysname}
 Machine: {uname_machine}
 Current working directory: {BASH_STATE.cwd}
-wcgw version: {importlib.metadata.version("wcgw")}
+{repo_context}
+{initial_files_context}
 """
+    return output
 def reset_shell() -> str:
     BASH_STATE.reset()
@@ -345,29 +385,11 @@ def get_status() -> str:
 T = TypeVar("T")
-def save_out_of_context(
-    tokens: list[T],
-    max_tokens: int,
-    suffix: str,
-    tokens_converted: Callable[[list[T]], str],
-) -> tuple[str, list[Path]]:
-    file_contents = list[str]()
-    for i in range(0, len(tokens), max_tokens):
-        file_contents.append(tokens_converted(tokens[i : i + max_tokens]))
-    if len(file_contents) == 1:
-        return file_contents[0], []
-    rest_paths = list[Path]()
-    for i, content in enumerate(file_contents):
-        if i == 0:
-            continue
-        file_path = NamedTemporaryFile(delete=False, suffix=suffix).name
-        with open(file_path, "w") as f:
-            f.write(content)
-        rest_paths.append(Path(file_path))
-    return file_contents[0], rest_paths
+def save_out_of_context(content: str, suffix: str) -> str:
+    file_path = NamedTemporaryFile(delete=False, suffix=suffix).name
+    with open(file_path, "w") as f:
+        f.write(content)
+    return file_path
 def rstrip(lines: list[str]) -> str:
@@ -404,7 +426,7 @@ def is_status_check(arg: BashInteraction | BashCommand) -> bool:
 def execute_bash(
-    enc: tiktoken.Encoding,
+    enc: tokenizers.Tokenizer,
     bash_arg: BashCommand | BashInteraction,
     max_tokens: Optional[int],
     timeout_s: Optional[float],
@@ -549,7 +571,7 @@ def execute_bash(
             if max_tokens and len(tokens) >= max_tokens:
                 incremental_text = "(...truncated)\n" + enc.decode(
-                    tokens[-(max_tokens - 1) :]
+                    tokens.ids[-(max_tokens - 1) :]
                 )
             if is_interrupt:
@@ -569,21 +591,20 @@ def execute_bash(
             return incremental_text, 0
-    assert isinstance(BASH_STATE.shell.before, str)
+    if not isinstance(BASH_STATE.shell.before, str):
+        BASH_STATE.shell.before = str(BASH_STATE.shell.before)
     output = _incremental_text(BASH_STATE.shell.before, BASH_STATE.pending_output)
     BASH_STATE.set_repl()
-    if is_interrupt:
-        return "Interrupt successful", 0.0
     tokens = enc.encode(output)
     if max_tokens and len(tokens) >= max_tokens:
-        output = "(...truncated)\n" + enc.decode(tokens[-(max_tokens - 1) :])
+        output = "(...truncated)\n" + enc.decode(tokens.ids[-(max_tokens - 1) :])
     try:
         exit_status = get_status()
         output += exit_status
-    except ValueError as e:
+    except ValueError:
         console.print(output)
         console.print(traceback.format_exc())
         console.print("Malformed output, restarting shell", style="red")
@@ -638,6 +659,19 @@ def ensure_no_previous_output(func: Callable[Param, T]) -> Callable[Param, T]:
     return wrapper
+def truncate_if_over(content: str, max_tokens: Optional[int]) -> str:
+    if max_tokens and max_tokens > 0:
+        tokens = default_enc.encode(content)
+        n_tokens = len(tokens)
+        if n_tokens > max_tokens:
+            content = (
+                default_enc.decode(tokens.ids[: max(0, max_tokens - 100)])
+                + "\n(...truncated)"
+            )
+    return content
 def read_image_from_shell(file_path: str) -> ImageData:
     if not os.path.isabs(file_path):
         file_path = os.path.join(BASH_STATE.cwd, file_path)
@@ -666,7 +700,25 @@ def read_image_from_shell(file_path: str) -> ImageData:
             return ImageData(media_type=image_type, data=image_b64)  # type: ignore
-def write_file(writefile: WriteIfEmpty, error_on_exist: bool) -> str:
+def get_context_for_errors(
+    errors: list[tuple[int, int]], file_content: str, max_tokens: Optional[int]
+) -> str:
+    file_lines = file_content.split("\n")
+    min_line_num = max(0, min([error[0] for error in errors]) - 10)
+    max_line_num = min(len(file_lines), max([error[0] for error in errors]) + 10)
+    context_lines = file_lines[min_line_num:max_line_num]
+    context = "\n".join(context_lines)
+    if max_tokens is not None and max_tokens > 0:
+        ntokens = len(default_enc.encode(context))
+        if ntokens > max_tokens:
+            return "Please re-read the file to understand the context"
+    return f"Here's relevant snippet from the file where the syntax errors occured:\n```\n{context}\n```"
+def write_file(
+    writefile: WriteIfEmpty, error_on_exist: bool, max_tokens: Optional[int]
+) -> str:
     if not os.path.isabs(writefile.file_path):
         return f"Failure: file_path should be absolute path, current working directory is {BASH_STATE.cwd}"
     else:
@@ -678,9 +730,14 @@ def write_file(writefile: WriteIfEmpty, error_on_exist: bool) -> str:
         if (error_on_exist or error_on_exist_) and os.path.exists(path_):
             content = Path(path_).read_text().strip()
             if content:
+                content = truncate_if_over(content, max_tokens)
                 if error_on_exist_:
-                    return f"Error: can't write to existing file {path_}, use other functions to edit the file"
-                elif error_on_exist:
+                    return (
+                        f"Error: can't write to existing file {path_}, use other functions to edit the file"
+                        + f"\nHere's the existing content:\n```\n{content}\n```"
+                    )
+                else:
                     add_overwrite_warning = content
         # Since we've already errored once, add this to whitelist
@@ -701,8 +758,13 @@ def write_file(writefile: WriteIfEmpty, error_on_exist: bool) -> str:
                 timeout=TIMEOUT,
             )
             if return_code != 0 and content.strip():
+                content = truncate_if_over(content, max_tokens)
                 if error_on_exist_:
-                    return f"Error: can't write to existing file {path_}, use other functions to edit the file"
+                    return (
+                        f"Error: can't write to existing file {path_}, use other functions to edit the file"
+                        + f"\nHere's the existing content:\n```\n{content}\n```"
+                    )
                 else:
                     add_overwrite_warning = content
@@ -735,13 +797,19 @@ def write_file(writefile: WriteIfEmpty, error_on_exist: bool) -> str:
     try:
         check = check_syntax(extension, writefile.file_content)
         syntax_errors = check.description
         if syntax_errors:
+            context_for_errors = get_context_for_errors(
+                check.errors, writefile.file_content, max_tokens
+            )
             console.print(f"W: Syntax errors encountered: {syntax_errors}")
             warnings.append(f"""
 ---
-Warning: tree-sitter reported syntax errors, please re-read the file and fix if any errors.
-Errors:
+Warning: tree-sitter reported syntax errors
+Syntax errors:
 {syntax_errors}
+{context_for_errors}
 ---
             """)
@@ -751,8 +819,10 @@ Errors:
     if add_overwrite_warning:
         warnings.append(
             "\n---\nWarning: a file already existed and it's now overwritten. Was it a mistake? If yes please revert your action."
-            "Here's the previous content:\n```\n" + add_overwrite_warning + "\n```"
             "\n---\n"
+            + "Here's the previous content:\n```\n"
+            + add_overwrite_warning
+            + "\n```"
         )
     return "Success" + "".join(warnings)
@@ -878,9 +948,9 @@ def edit_content(content: str, find_lines: str, replace_with_lines: str) -> str:
     )
-def do_diff_edit(fedit: FileEdit) -> str:
+def do_diff_edit(fedit: FileEdit, max_tokens: Optional[int]) -> str:
     try:
-        return _do_diff_edit(fedit)
+        return _do_diff_edit(fedit, max_tokens)
     except Exception as e:
         # Try replacing \"
         try:
@@ -890,13 +960,13 @@ def do_diff_edit(fedit: FileEdit) -> str:
                     '\\"', '"'
                 ),
             )
-            return _do_diff_edit(fedit)
+            return _do_diff_edit(fedit, max_tokens)
         except Exception:
             pass
         raise e
-def _do_diff_edit(fedit: FileEdit) -> str:
+def _do_diff_edit(fedit: FileEdit, max_tokens: Optional[int]) -> str:
     console.log(f"Editing file: {fedit.file_path}")
     if not os.path.isabs(fedit.file_path):
@@ -995,13 +1065,19 @@ def _do_diff_edit(fedit: FileEdit) -> str:
         check = check_syntax(extension, apply_diff_to)
         syntax_errors = check.description
         if syntax_errors:
+            context_for_errors = get_context_for_errors(
+                check.errors, apply_diff_to, max_tokens
+            )
             console.print(f"W: Syntax errors encountered: {syntax_errors}")
             return f"""Wrote file succesfully.
 ---
 However, tree-sitter reported syntax errors, please re-read the file and fix if there are any errors.
-Errors:
+Syntax errors:
 {syntax_errors}
-            """
+{context_for_errors}
+"""
     except Exception:
         pass
@@ -1041,7 +1117,7 @@ TOOLS = (
     | AIAssistant
     | DoneFlag
     | ReadImage
-    | ReadFile
+    | ReadFiles
     | Initialize
     | Mouse
     | Keyboard
@@ -1076,8 +1152,8 @@ def which_tool_name(name: str) -> Type[TOOLS]:
         return DoneFlag
     elif name == "ReadImage":
         return ReadImage
-    elif name == "ReadFile":
-        return ReadFile
+    elif name == "ReadFiles":
+        return ReadFiles
     elif name == "Initialize":
         return Initialize
     elif name == "Mouse":
@@ -1097,7 +1173,7 @@ TOOL_CALLS: list[TOOLS] = []
 def get_tool_output(
     args: dict[object, object] | TOOLS,
-    enc: tiktoken.Encoding,
+    enc: tokenizers.Tokenizer,
     limit: float,
     loop_call: Callable[[str, float], tuple[str, float]],
     max_tokens: Optional[int],
@@ -1118,10 +1194,10 @@ def get_tool_output(
         output = execute_bash(enc, arg, max_tokens, arg.wait_for_seconds)
     elif isinstance(arg, WriteIfEmpty):
         console.print("Calling write file tool")
-        output = write_file(arg, True), 0
+        output = write_file(arg, True, max_tokens), 0
     elif isinstance(arg, FileEdit):
         console.print("Calling full file edit tool")
-        output = do_diff_edit(arg), 0.0
+        output = do_diff_edit(arg, max_tokens), 0.0
     elif isinstance(arg, DoneFlag):
         console.print("Calling mark finish tool")
         output = mark_finish(arg), 0.0
@@ -1131,17 +1207,18 @@ def get_tool_output(
     elif isinstance(arg, ReadImage):
         console.print("Calling read image tool")
         output = read_image_from_shell(arg.file_path), 0.0
-    elif isinstance(arg, ReadFile):
+    elif isinstance(arg, ReadFiles):
         console.print("Calling read file tool")
-        output = read_file(arg, max_tokens), 0.0
+        output = read_files(arg.file_paths, max_tokens), 0.0
     elif isinstance(arg, ResetShell):
         console.print("Calling reset shell tool")
         output = reset_shell(), 0.0
     elif isinstance(arg, Initialize):
         console.print("Calling initial info tool")
-        # First force reset
-        reset_shell()
-        output = initial_info(), 0.0
+        output = (
+            initialize(arg.any_workspace_path, arg.initial_files_to_read, max_tokens),
+            0.0,
+        )
     elif isinstance(arg, (Mouse, Keyboard, ScreenShot, GetScreenInfo)):
         console.print(f"Calling {type(arg).__name__} tool")
         outputs_cost = run_computer_tool(arg), 0.0
@@ -1190,7 +1267,9 @@ def get_tool_output(
 History = list[ChatCompletionMessageParam]
-default_enc = tiktoken.encoding_for_model("gpt-4o")
+default_enc: tokenizers.Tokenizer = tokenizers.Tokenizer.from_pretrained(
+    "Xenova/claude-tokenizer"
+)
 curr_cost = 0.0
@@ -1203,7 +1282,7 @@ class Mdata(BaseModel):
         | FileEditFindReplace
         | FileEdit
         | str
-        | ReadFile
+        | ReadFiles
         | Initialize
     )
@@ -1276,43 +1355,69 @@ def app(
     register_client(server_url, client_uuid or "")
-def read_file(readfile: ReadFile, max_tokens: Optional[int]) -> str:
-    console.print(f"Reading file: {readfile.file_path}")
+def read_files(file_paths: list[str], max_tokens: Optional[int]) -> str:
+    message = ""
+    for i, file in enumerate(file_paths):
+        try:
+            content, truncated, tokens = read_file(file, max_tokens)
+        except Exception as e:
+            message += f"\n{file}: {str(e)}\n"
+            continue
-    if not os.path.isabs(readfile.file_path):
-        return f"Failure: file_path should be absolute path, current working directory is {BASH_STATE.cwd}"
+        if max_tokens:
+            max_tokens = max_tokens - tokens
-    BASH_STATE.add_to_whitelist_for_overwrite(readfile.file_path)
+        message += f"\n``` {file}\n{content}\n"
+        if truncated or (max_tokens and max_tokens <= 0):
+            not_reading = file_paths[i + 1 :]
+            if not_reading:
+                message += f'\nNot reading the rest of the files: {", ".join(not_reading)} due to token limit, please call again'
+            break
+        else:
+            message += "```"
+    return message
+def read_file(file_path: str, max_tokens: Optional[int]) -> tuple[str, bool, int]:
+    console.print(f"Reading file: {file_path}")
+    if not os.path.isabs(file_path):
+        raise ValueError(
+            f"Failure: file_path should be absolute path, current working directory is {BASH_STATE.cwd}"
+        )
+    BASH_STATE.add_to_whitelist_for_overwrite(file_path)
     if not BASH_STATE.is_in_docker:
-        path = Path(readfile.file_path)
+        path = Path(file_path)
         if not path.exists():
-            return f"Error: file {readfile.file_path} does not exist"
+            raise ValueError(f"Error: file {file_path} does not exist")
         with path.open("r") as f:
-            content = f.read()
+            content = f.read(10_000_000)
     else:
         return_code, content, stderr = command_run(
-            f"docker exec {BASH_STATE.is_in_docker} cat {shlex.quote(readfile.file_path)}",
+            f"docker exec {BASH_STATE.is_in_docker} cat {shlex.quote(file_path)}",
             timeout=TIMEOUT,
         )
         if return_code != 0:
             raise Exception(
-                f"Error: cat {readfile.file_path} failed with code {return_code}\nstdout: {content}\nstderr: {stderr}"
+                f"Error: cat {file_path} failed with code {return_code}\nstdout: {content}\nstderr: {stderr}"
             )
+    truncated = False
+    tokens_counts = 0
     if max_tokens is not None:
         tokens = default_enc.encode(content)
+        tokens_counts = len(tokens)
         if len(tokens) > max_tokens:
-            content, rest = save_out_of_context(
-                tokens,
-                max_tokens - 100,
-                Path(readfile.file_path).suffix,
-                default_enc.decode,
+            content = default_enc.decode(tokens.ids[:max_tokens])
+            rest = save_out_of_context(
+                default_enc.decode(tokens.ids[max_tokens:]), Path(file_path).suffix
             )
-            if rest:
-                rest_ = "\n".join(map(str, rest))
-                content += f"\n(...truncated)\n---\nI've split the rest of the file into multiple files. Here are the remaining splits, please read them:\n{rest_}"
-    return content
+            content += f"\n(...truncated)\n---\nI've saved the continuation in a new file. Please read: `{rest}`"
+            truncated = True
+    return content, truncated, tokens_counts

wcgw/relay/serve.py CHANGED Viewed

@@ -21,7 +21,7 @@ from ..types_ import (
     FileEditFindReplace,
     FileEdit,
     Initialize,
-    ReadFile,
+    ReadFiles,
     ResetShell,
     Specials,
 )
@@ -35,7 +35,7 @@ class Mdata(BaseModel):
         | ResetShell
         | FileEditFindReplace
         | FileEdit
-        | ReadFile
+        | ReadFiles
         | Initialize
         | str
     )
@@ -259,7 +259,7 @@ async def bash_interaction(bash_interaction: BashInteractionWithUUID) -> str:
     raise fastapi.HTTPException(status_code=500, detail="Timeout error")
-class ReadFileWithUUID(ReadFile):
+class ReadFileWithUUID(ReadFiles):
     user_id: UUID

wcgw/types_.py CHANGED Viewed

@@ -1,5 +1,5 @@
-import re
 from typing import Literal, Optional, Sequence
 from pydantic import BaseModel
@@ -31,9 +31,9 @@ class WriteIfEmpty(BaseModel):
     file_content: str
-class ReadFile(BaseModel):
-    file_path: str  # The path to the file to read
-    type: Literal["ReadFile"]
+class ReadFiles(BaseModel):
+    file_paths: list[str]
+    type: Literal["ReadFiles"]
 class FileEditFindReplace(BaseModel):
@@ -53,6 +53,8 @@ class FileEdit(BaseModel):
 class Initialize(BaseModel):
     type: Literal["Initialize"]
+    any_workspace_path: str
+    initial_files_to_read: list[str]
 class GetScreenInfo(BaseModel):

wcgw 2.4.3__py3-none-any.whl → 2.6.1__py3-none-any.whl

Potentially problematic release.

wcgw 2.4.3py3-none-any.whl → 2.6.1py3-none-any.whl