PyPI - wcgw - Versions diffs - 2.4.3__py3-none-any.whl → 2.6.1__py3-none-any.whl - Mend

wcgw 2.4.3py3-none-any.whl → 2.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wcgw might be problematic. Click here for more details.

Files changed (18) hide show

wcgw/client/anthropic_client.py +7 -17
wcgw/client/common.py +3 -1
wcgw/client/mcp_server/server.py +41 -36
wcgw/client/openai_client.py +21 -36
wcgw/client/openai_utils.py +5 -5
wcgw/client/repo_ops/display_tree.py +127 -0
wcgw/client/repo_ops/path_prob.py +58 -0
wcgw/client/repo_ops/paths_model.vocab +20000 -0
wcgw/client/repo_ops/paths_tokens.model +80042 -0
wcgw/client/repo_ops/repo_context.py +148 -0
wcgw/client/tools.py +220 -115
wcgw/relay/serve.py +3 -3
wcgw/types_.py +6 -4
{wcgw-2.4.3.dist-info → wcgw-2.6.1.dist-info}/METADATA +19 -56
{wcgw-2.4.3.dist-info → wcgw-2.6.1.dist-info}/RECORD +18 -12
wcgw-2.6.1.dist-info/licenses/LICENSE +213 -0
{wcgw-2.4.3.dist-info → wcgw-2.6.1.dist-info}/WHEEL +0 -0
{wcgw-2.4.3.dist-info → wcgw-2.6.1.dist-info}/entry_points.txt +0 -0

wcgw/client/anthropic_client.py CHANGED Viewed

@@ -29,7 +29,7 @@ from ..types_ import (
     FileEdit,
     Keyboard,
     Mouse,
-    ReadFile,
+    ReadFiles,
     ReadImage,
     ResetShell,
     ScreenShot,
@@ -41,12 +41,7 @@ from .common import CostData
 from .tools import ImageData
 from .computer_use import Computer
-from .tools import (
-    DoneFlag,
-    get_tool_output,
-    which_tool_name,
-)
-import tiktoken
+from .tools import DoneFlag, get_tool_output, which_tool_name, default_enc
 from urllib import parse
 import subprocess
@@ -156,10 +151,6 @@ def loop(
     limit = 1
-    enc = tiktoken.encoding_for_model(
-        "gpt-4o-2024-08-06",
-    )
     tools = [
         ToolParam(
             input_schema=BashCommand.model_json_schema(),
@@ -192,12 +183,11 @@ def loop(
 """,
         ),
         ToolParam(
-            input_schema=ReadFile.model_json_schema(),
-            name="ReadFile",
+            input_schema=ReadFiles.model_json_schema(),
+            name="ReadFiles",
             description="""
-- Read full file content
-- Provide absolute file path only
-- Use this instead of 'cat' from BashCommand
+- Read full file content of one or more files.
+- Provide absolute file paths only
 """,
         ),
         ToolParam(
@@ -451,7 +441,7 @@ System information:
                             try:
                                 output_or_dones, _ = get_tool_output(
                                     tool_parsed,
-                                    enc,
+                                    default_enc,
                                     limit - cost,
                                     loop,
                                     max_tokens=8000,

wcgw/client/common.py CHANGED Viewed

@@ -38,7 +38,9 @@ def discard_input() -> None:
             while True:
                 # Check if there is input to be read
                 if sys.stdin in select.select([sys.stdin], [], [], 0)[0]:
-                    sys.stdin.read(1)  # Read one character at a time to flush the input buffer
+                    sys.stdin.read(
+                        1
+                    )  # Read one character at a time to flush the input buffer
                 else:
                     break
         finally:

wcgw/client/mcp_server/server.py CHANGED Viewed

@@ -1,34 +1,33 @@
-import asyncio
 import importlib
 import json
 import os
-import sys
-import traceback
 from typing import Any
-from mcp_wcgw.server.models import InitializationOptions
+from pydantic import AnyUrl, ValidationError
+import mcp_wcgw.server.stdio
 import mcp_wcgw.types as types
-from mcp_wcgw.types import Tool as ToolParam
 from mcp_wcgw.server import NotificationOptions, Server
-from pydantic import AnyUrl, BaseModel, ValidationError
-import mcp_wcgw.server.stdio
-from .. import tools
-from ..tools import DoneFlag, get_tool_output, which_tool_name, default_enc
+from mcp_wcgw.server.models import InitializationOptions
+from mcp_wcgw.types import Tool as ToolParam
 from ...types_ import (
     BashCommand,
     BashInteraction,
-    WriteIfEmpty,
     FileEdit,
+    GetScreenInfo,
+    Initialize,
     Keyboard,
     Mouse,
-    ReadFile,
+    ReadFiles,
     ReadImage,
     ResetShell,
-    Initialize,
     ScreenShot,
-    GetScreenInfo,
+    WriteIfEmpty,
 )
+from .. import tools
 from ..computer_use import SLEEP_TIME_MAX_S
+from ..tools import DoneFlag, default_enc, get_tool_output, which_tool_name
 COMPUTER_USE_ON_DOCKER_ENABLED = False
@@ -76,7 +75,13 @@ async def handle_list_tools() -> list[types.Tool]:
             inputSchema=Initialize.model_json_schema(),
             name="Initialize",
             description="""
-- Always call this at the start of the conversation before anything else.
+- Always call this at the start of the conversation before using any of the shell tools from wcgw.
+- This will reset the shell.
+- Use `any_workspace_path` to initialize the shell in the appropriate project directory.
+- If the user has mentioned a workspace or project root, use it to set `any_workspace_path`.
+- If the user has mentioned a folder or file with unclear project root, use the file or folder as `any_workspace_path`.
+- If user has mentioned any files use `initial_files_to_read` to read, use absolute paths only.
+- If `any_workspace_path` is provided, a tree structure of the workspace will be shown.
 """,
         ),
         ToolParam(
@@ -92,6 +97,7 @@ async def handle_list_tools() -> list[types.Tool]:
 - The control will return to you in {SLEEP_TIME_MAX_S} seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished.
 - Run long running commands in background using screen instead of "&".
 - Use longer wait_for_seconds if the command is expected to run for a long time.
+- Do not use 'cat' to read files, use ReadFiles tool instead.
 """,
         ),
         ToolParam(
@@ -110,12 +116,11 @@ async def handle_list_tools() -> list[types.Tool]:
 """,
         ),
         ToolParam(
-            inputSchema=ReadFile.model_json_schema(),
-            name="ReadFile",
+            inputSchema=ReadFiles.model_json_schema(),
+            name="ReadFiles",
             description="""
-- Read full file content
-- Provide absolute file path only
-- Use this instead of 'cat' from BashCommand
+- Read full file content of one or more files.
+- Provide absolute file paths only
 """,
         ),
         ToolParam(
@@ -236,24 +241,24 @@ async def handle_call_tool(
         if isinstance(output_or_done, str):
             if issubclass(tool_type, Initialize):
                 output_or_done += """
-    You're an expert software engineer with shell and code knowledge.
-    Instructions:
+---
+You're an expert software engineer with shell and code knowledge.
+Instructions:
+    - You should use the provided bash execution, reading and writing file tools to complete objective.
+    - First understand about the project by getting the folder structure (ignoring .git, node_modules, venv, etc.)
+    - Always read relevant files before editing.
+    - Do not provide code snippets unless asked by the user, instead directly add/edit the code.
+    - Do not install new tools/packages before ensuring no such tools/package or an alternative already exists.
+    - Do not use artifacts if you have access to the repository and not asked by the user to provide artifacts/snippets. Directly create/update using shell tools.
+    - Do not use Ctrl-c or Ctrl-z or interrupt commands without asking the user, because often the program don't show any update but they still are running.
+    - Do not use echo to write multi-line files, always use FileEdit tool to update a code.
-        - You should use the provided bash execution, reading and writing file tools to complete objective.
-        - First understand about the project by getting the folder structure (ignoring .git, node_modules, venv, etc.)
-        - Always read relevant files before editing.
-        - Do not provide code snippets unless asked by the user, instead directly add/edit the code.
-        - Do not install new tools/packages before ensuring no such tools/package or an alternative already exists.
-        - Do not use artifacts if you have access to the repository and not asked by the user to provide artifacts/snippets. Directly create/update using shell tools.
-        - Do not use Ctrl-c or Ctrl-z or interrupt commands without asking the user, because often the program don't show any update but they still are running.
-        - Do not use echo to write multi-line files, always use FileEdit tool to update a code.
-    Additional instructions:
-        Always run `pwd` if you get any file or directory not found error to make sure you're not lost, or to get absolute cwd.
-        Always write production ready, syntactically correct code.
+Additional instructions:
+    Always run `pwd` if you get any file or directory not found error to make sure you're not lost, or to get absolute cwd.
+    Always write production ready, syntactically correct code.
     """
             content.append(types.TextContent(type="text", text=output_or_done))

wcgw/client/openai_client.py CHANGED Viewed

@@ -1,57 +1,45 @@
 import base64
 import json
 import mimetypes
-from pathlib import Path
-import sys
+import os
+import subprocess
+import tempfile
 import traceback
-from typing import Callable, DefaultDict, Optional, cast
+import uuid
+from pathlib import Path
+from typing import DefaultDict, Optional, cast
 import openai
+import petname  # type: ignore[import-untyped]
+import rich
+import tokenizers  # type: ignore[import-untyped]
+from dotenv import load_dotenv
 from openai import OpenAI
 from openai.types.chat import (
+    ChatCompletionContentPartParam,
     ChatCompletionMessageParam,
-    ChatCompletionAssistantMessageParam,
     ChatCompletionUserMessageParam,
-    ChatCompletionContentPartParam,
-    ChatCompletionMessage,
-    ParsedChatCompletionMessage,
 )
-import rich
-import petname  # type: ignore[import-untyped]
+from pydantic import BaseModel
 from typer import Typer
-import uuid
 from ..types_ import (
     BashCommand,
     BashInteraction,
-    WriteIfEmpty,
     FileEdit,
+    ReadFiles,
     ReadImage,
-    ReadFile,
     ResetShell,
+    WriteIfEmpty,
 )
-from .common import Models, discard_input
-from .common import CostData, History
+from .common import CostData, History, Models, discard_input
 from .openai_utils import get_input_cost, get_output_cost
-from .tools import ImageData
 from .tools import (
     DoneFlag,
+    ImageData,
     get_tool_output,
     which_tool,
 )
-import tiktoken
-from urllib import parse
-import subprocess
-import os
-import tempfile
-import toml
-from pydantic import BaseModel
-from dotenv import load_dotenv
 class Config(BaseModel):
@@ -160,9 +148,7 @@ def loop(
         config.cost_limit = limit
     limit = config.cost_limit
-    enc = tiktoken.encoding_for_model(
-        config.model if not config.model.startswith("o1") else "gpt-4o"
-    )
+    enc = tokenizers.Tokenizer.from_pretrained("Xenova/gpt-4o")
     tools = [
         openai.pydantic_function_tool(
@@ -188,11 +174,10 @@ def loop(
 - Only one of send_text, send_specials, send_ascii should be provided.""",
         ),
         openai.pydantic_function_tool(
-            ReadFile,
+            ReadFiles,
             description="""
-- Read full file content
-- Provide absolute file path only
-- Use this instead of 'cat' from BashCommand
+- Read full file content of one or more files.
+- Provide absolute file paths only
 """,
         ),
         openai.pydantic_function_tool(

wcgw/client/openai_utils.py CHANGED Viewed

@@ -15,7 +15,7 @@ from openai.types.chat import (
     ParsedChatCompletionMessage,
 )
 import rich
-import tiktoken
+from tokenizers import Tokenizer  # type: ignore[import-untyped]
 from typer import Typer
 import uuid
@@ -23,7 +23,7 @@ from .common import CostData, History
 def get_input_cost(
-    cost_map: CostData, enc: tiktoken.Encoding, history: History
+    cost_map: CostData, enc: Tokenizer, history: History
 ) -> tuple[float, int]:
     input_tokens = 0
     for msg in history:
@@ -31,8 +31,8 @@ def get_input_cost(
         refusal = msg.get("refusal")
         if isinstance(content, list):
             for part in content:
-                if 'text' in part:
-                    input_tokens += len(enc.encode(part['text']))
+                if "text" in part:
+                    input_tokens += len(enc.encode(part["text"]))
         elif content is None:
             if refusal is None:
                 raise ValueError("Expected content or refusal to be present")
@@ -47,7 +47,7 @@ def get_input_cost(
 def get_output_cost(
     cost_map: CostData,
-    enc: tiktoken.Encoding,
+    enc: Tokenizer,
     item: ChatCompletionMessage | ChatCompletionMessageParam,
 ) -> tuple[float, int]:
     if isinstance(item, ChatCompletionMessage):

wcgw/client/repo_ops/display_tree.py ADDED Viewed

@@ -0,0 +1,127 @@
+import io
+from pathlib import Path
+from typing import List, Set
+class DirectoryTree:
+    def __init__(self, root: Path, max_files: int = 10):
+        """
+        Initialize the DirectoryTree with a root path and maximum number of files to display
+        Args:
+            root_path: The root directory path to start from
+            max_files: Maximum number of files to display in unexpanded directories
+        """
+        self.root = root
+        self.max_files = max_files
+        self.expanded_files: Set[Path] = set()
+        self.expanded_dirs = set[Path]()
+        if not self.root.exists():
+            raise ValueError(f"Root path {root} does not exist")
+        if not self.root.is_dir():
+            raise ValueError(f"Root path {root} is not a directory")
+    def expand(self, rel_path: str) -> None:
+        """
+        Expand a specific file in the tree
+        Args:
+            rel_path: Relative path from root to the file to expand
+        """
+        abs_path = self.root / rel_path
+        if not abs_path.exists():
+            raise ValueError(f"Path {rel_path} does not exist")
+        if not abs_path.is_file():
+            raise ValueError(f"Path {rel_path} is not a file")
+        if not str(abs_path).startswith(str(self.root)):
+            raise ValueError(f"Path {rel_path} is outside root directory")
+        self.expanded_files.add(abs_path)
+        # Add all parent directories to expanded dirs
+        current = abs_path.parent
+        while str(current) >= str(self.root):
+            if current not in self.expanded_dirs:
+                self.expanded_dirs.add(current)
+            if current == current.parent:
+                break
+            current = current.parent
+    def _list_directory(self, dir_path: Path) -> List[Path]:
+        """List contents of a directory, sorted with directories first"""
+        contents = list(dir_path.iterdir())
+        return sorted(contents, key=lambda x: (not x.is_dir(), x.name.lower()))
+    def _count_hidden_items(
+        self, dir_path: Path, shown_items: List[Path]
+    ) -> tuple[int, int]:
+        """Count hidden files and directories in a directory"""
+        all_items = set(self._list_directory(dir_path))
+        shown_items_set = set(shown_items)
+        hidden_items = all_items - shown_items_set
+        hidden_files = sum(1 for p in hidden_items if p.is_file())
+        hidden_dirs = sum(1 for p in hidden_items if p.is_dir())
+        return hidden_files, hidden_dirs
+    def display(self) -> str:
+        """Display the directory tree with expanded state"""
+        writer = io.StringIO()
+        def _display_recursive(
+            current_path: Path, indent: int = 0, depth: int = 0
+        ) -> None:
+            # Print current directory name
+            if current_path == self.root:
+                writer.write(f"{current_path}\n")
+            else:
+                writer.write(f"{' ' * indent}{current_path.name}\n")
+            # Don't recurse beyond depth 1 unless path contains expanded files
+            if depth > 0 and current_path not in self.expanded_dirs:
+                return
+            # Get directory contents
+            contents = self._list_directory(current_path)
+            shown_items = []
+            for item in contents:
+                # Show items only if:
+                # 1. They are expanded files
+                # 2. They are parents of expanded items
+                should_show = item in self.expanded_files or item in self.expanded_dirs
+                if should_show:
+                    shown_items.append(item)
+                    if item.is_dir():
+                        _display_recursive(item, indent + 2, depth + 1)
+                    else:
+                        writer.write(f"{' ' * (indent + 2)}{item.name}\n")
+            # Show hidden items count if any items were hidden
+            hidden_files, hidden_dirs = self._count_hidden_items(
+                current_path, shown_items
+            )
+            if hidden_files > 0 or hidden_dirs > 0:
+                hidden_msg = []
+                if hidden_dirs > 0:
+                    hidden_msg.append(
+                        f"{hidden_dirs} director{'ies' if hidden_dirs != 1 else 'y'}"
+                    )
+                if hidden_files > 0:
+                    hidden_msg.append(
+                        f"{hidden_files} file{'s' if hidden_files != 1 else ''}"
+                    )
+                writer.write(
+                    f"{' ' * (indent + 2)}... {' and '.join(hidden_msg)} hidden\n"
+                )
+        _display_recursive(self.root, depth=0)
+        return writer.getvalue()

wcgw/client/repo_ops/path_prob.py ADDED Viewed

@@ -0,0 +1,58 @@
+from typing import Dict, List, Tuple
+import tokenizers  # type: ignore[import-untyped]
+class FastPathAnalyzer:
+    def __init__(self, model_path: str, vocab_path: str) -> None:
+        """Initialize with vocabulary."""
+        # Load vocabulary and probabilities
+        self.vocab_probs: Dict[str, float] = {}
+        with open(vocab_path, "r") as f:
+            for line in f:
+                parts = line.strip().split()
+                if len(parts) == 2:
+                    token, prob = parts
+                    try:
+                        self.vocab_probs[token] = float(prob)
+                    except ValueError:
+                        continue
+        self.encoder = tokenizers.Tokenizer.from_file(model_path)
+    def tokenize_batch(self, texts: List[str]) -> List[List[str]]:
+        """Tokenize multiple texts at once."""
+        encodings = self.encoder.encode_batch(texts)
+        return [encoding.tokens for encoding in encodings]
+    def detokenize(self, tokens: List[str]) -> str:
+        """Convert tokens back to text, handling special tokens."""
+        return self.encoder.decode(tokens)  # type: ignore[no-any-return]
+    def calculate_path_probabilities_batch(
+        self, paths: List[str]
+    ) -> List[Tuple[float, List[str], List[str]]]:
+        """Calculate log probability for multiple paths at once."""
+        # Batch tokenize all paths
+        all_tokens = self.tokenize_batch(paths)
+        results = []
+        for tokens in all_tokens:
+            # Calculate sum of log probabilities for each path
+            log_prob_sum = 0.0
+            unknown_tokens = []
+            for token in tokens:
+                if token in self.vocab_probs:
+                    log_prob_sum += self.vocab_probs[token]
+                else:
+                    unknown_tokens.append(token)
+            results.append((log_prob_sum, tokens, unknown_tokens))
+        return results
+    def calculate_path_probability(
+        self, path: str
+    ) -> Tuple[float, List[str], List[str]]:
+        """Calculate log probability for a single path."""
+        return self.calculate_path_probabilities_batch([path])[0]

wcgw 2.4.3__py3-none-any.whl → 2.6.1__py3-none-any.whl

Potentially problematic release.

wcgw 2.4.3py3-none-any.whl → 2.6.1py3-none-any.whl