PyPI - wcgw - Versions diffs - 1.5.2__tar.gz → 1.5.4__tar.gz - Mend

wcgw 1.5.2tar.gz → 1.5.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wcgw might be problematic. Click here for more details.

Files changed (39) hide show

{wcgw-1.5.2 → wcgw-1.5.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: wcgw
-Version: 1.5.2
+Version: 1.5.4
 Summary: What could go wrong giving full shell access to chatgpt?
 Project-URL: Homepage, https://github.com/rusiaaman/wcgw
 Author-email: Aman Rusia <gapypi@arcfu.com>

{wcgw-1.5.2 → wcgw-1.5.4}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 authors = [{ name = "Aman Rusia", email = "gapypi@arcfu.com" }]
 name = "wcgw"
-version = "1.5.2"
+version = "1.5.4"
 description = "What could go wrong giving full shell access to chatgpt?"
 readme = "README.md"
 requires-python = ">=3.11, <3.13"

{wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/anthropic_client.py RENAMED Viewed

@@ -131,6 +131,7 @@ def loop(
     first_message: Optional[str] = None,
     limit: Optional[float] = None,
     resume: Optional[str] = None,
+    computer_use: bool = False,
 ) -> tuple[str, float]:
     load_dotenv()
@@ -182,6 +183,9 @@ def loop(
 - Send text input to the running program.
 - Send send_specials=["Enter"] to recheck status of a running program.
 - Only one of send_text, send_specials, send_ascii should be provided.
+- This returns within 5 seconds, for heavy programs keep checking status for upto 10 turns before asking user to continue checking again.
+    - Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again usign ["Enter"].
 """,
         ),
         ToolParam(
@@ -219,10 +223,14 @@ def loop(
 - Use SEARCH/REPLACE blocks to edit the file.
 """,
         ),
-        ToolParam(
-            input_schema=GetScreenInfo.model_json_schema(),
-            name="GetScreenInfo",
-            description="""
+    ]
+    if computer_use:
+        tools += [
+            ToolParam(
+                input_schema=GetScreenInfo.model_json_schema(),
+                name="GetScreenInfo",
+                description="""
 - Important: call this first in the conversation before ScreenShot, Mouse, and Keyboard tools.
 - Get display information of a linux os running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
 - If user hasn't provided docker image id, check using `docker ps` and provide the id.
@@ -230,33 +238,40 @@ def loop(
 - Connects shell to the docker environment.
 - Note: once this is called, the shell enters the docker environment. All bash commands will run over there.
 """,
-        ),
-        ToolParam(
-            input_schema=ScreenShot.model_json_schema(),
-            name="ScreenShot",
-            description="""
+            ),
+            ToolParam(
+                input_schema=ScreenShot.model_json_schema(),
+                name="ScreenShot",
+                description="""
 - Capture screenshot of the linux os on docker.
+- All actions on UI using mouse and keyboard return within 0.5 seconds.
+    * So if you're doing something that takes longer for UI to update like heavy page loading, keep checking UI for update usign ScreenShot upto 10 turns.
+    * Notice for smallest of the loading icons to check if your action worked.
+    * After 10 turns of no change, ask user for permission to keep checking.
+    * If you don't notice even slightest of the change, it's likely you clicked on the wrong place.
 """,
-        ),
-        ToolParam(
-            input_schema=Mouse.model_json_schema(),
-            name="Mouse",
-            description="""
+            ),
+            ToolParam(
+                input_schema=Mouse.model_json_schema(),
+                name="Mouse",
+                description="""
 - Interact with the linux os on docker using mouse.
 - Uses xdotool
+- About left_click_drag: the current mouse position will be used as the starting point, click and drag to the given x, y coordinates. Useful in things like sliders, moving things around, etc.
 """,
-        ),
-        ToolParam(
-            input_schema=Keyboard.model_json_schema(),
-            name="Keyboard",
-            description="""
+            ),
+            ToolParam(
+                input_schema=Keyboard.model_json_schema(),
+                name="Keyboard",
+                description="""
 - Interact with the linux os on docker using keyboard.
 - Emulate keyboard input to the screen
 - Uses xdootool to send keyboard input, keys like Return, BackSpace, Escape, Page_Up, etc. can be used.
 - Do not use it to interact with Bash tool.
 """,
-        ),
-    ]
+            ),
+        ]
     uname_sysname = os.uname().sysname
     uname_machine = os.uname().machine

{wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/cli.py RENAMED Viewed

@@ -16,6 +16,7 @@ def loop(
     first_message: Optional[str] = None,
     limit: Optional[float] = None,
     resume: Optional[str] = None,
+    computer_use: bool = False,
     version: bool = typer.Option(False, "--version", "-v"),
 ) -> tuple[str, float]:
     if version:
@@ -27,6 +28,7 @@ def loop(
             first_message=first_message,
             limit=limit,
             resume=resume,
+            computer_use=computer_use,
         )
     else:
         return openai_loop(

{wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/computer_use.py RENAMED Viewed

@@ -26,6 +26,7 @@ from ..types_ import (
 OUTPUT_DIR = "/tmp/outputs"
 TYPING_DELAY_MS = 12
 TYPING_GROUP_SIZE = 50
+SLEEP_TIME_MAX_S = 3
 Action = Literal[
     "key",
@@ -187,12 +188,15 @@ class ComputerTool:
         text: str | None = None,
         coordinate: tuple[int, int] | None = None,
         do_left_click_on_move: bool | None = None,
+        take_after_delay_seconds: int | None = None,
         **kwargs: Any,
     ) -> ToolResult:
         if action == "get_screen_info":
             assert docker_image_id is not None
             self.docker_image_id = docker_image_id
             self.get_screen_info()
+            if take_after_delay_seconds is not None:
+                time.sleep(min(take_after_delay_seconds, SLEEP_TIME_MAX_S))
             screenshot_res = self.screenshot()
             return ToolResult(
                 output=f"width: {self.width}, height: {self.height}, display_num: {self.display_num}",
@@ -396,6 +400,7 @@ def run_computer_tool(
     elif isinstance(action, ScreenShot):
         result = Computer(
             action="screenshot",
+            screenshot_delay=action.take_after_delay_seconds,
         )
     elif isinstance(action, Keyboard):
         result = Computer(

{wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/mcp_server/Readme.md RENAMED Viewed

@@ -31,15 +31,39 @@ Then restart claude app.
 ### [Optional] Computer use support using desktop on docker
-Computer use is enabled by default. Claude will be able to connect to any docker container with linux environment. Native system control isn't supported outside docker.
+Computer use is disabled by default. Add `--computer-use` to enable it. This will add necessary tools to Claude including ScreenShot, Mouse and Keyboard control.
-First run a sample docker image with desktop and optionally VNC connection:
+```json
+{
+  "mcpServers": {
+    "wcgw": {
+      "command": "uv",
+      "args": [
+        "tool",
+        "run",
+        "--from",
+        "wcgw@latest",
+        "--python",
+        "3.12",
+        "wcgw_mcp",
+        "--computer-use"
+      ]
+    }
+  }
+}
+```
+Claude will be able to connect to any docker container with linux environment. Native system control isn't supported outside docker.
+You'll need to run a docker image with desktop and optional VNC connection. Here's a demo image:
 ```sh
 docker run -p 6080:6080 ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest
 ```
-Connect to `http://localhost:6080/vnc.html` for desktop view (VNC) of the system running in the docker. Then ask claude desktop app to control the docker os.
+Then ask claude desktop app to control the docker os. It'll connect to the docker container and control it.
+Connect to `http://localhost:6080/vnc.html` for desktop view (VNC) of the system running in the docker.
 ## Usage

{wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/mcp_server/__init__.py RENAMED Viewed

@@ -1,10 +1,14 @@
 from wcgw.client.mcp_server import server
 import asyncio
+from typer import Typer
+main = Typer()
-def main():
+@main.command()
+def app(computer_use: bool = False) -> None:
     """Main entry point for the package."""
-    asyncio.run(server.main())
+    asyncio.run(server.main(computer_use))
 # Optionally expose other important items at package level

{wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/mcp_server/server.py RENAMED Viewed

@@ -28,9 +28,11 @@ from ...types_ import (
     ScreenShot,
     GetScreenInfo,
 )
-from ..computer_use import Computer
+from ..computer_use import SLEEP_TIME_MAX_S
-tools.TIMEOUT = 3
+tools.TIMEOUT = SLEEP_TIME_MAX_S
+COMPUTER_USE_ON_DOCKER_ENABLED = False
 server = Server("wcgw")
@@ -71,7 +73,7 @@ async def handle_list_tools() -> list[types.Tool]:
     ) as f:
         diffinstructions = f.read()
-    return [
+    tools = [
         ToolParam(
             inputSchema=Initialize.model_json_schema(),
             name="Initialize",
@@ -102,6 +104,8 @@ async def handle_list_tools() -> list[types.Tool]:
 - Send text input to the running program.
 - Send send_specials=["Enter"] to recheck status of a running program.
 - Only one of send_text, send_specials, send_ascii should be provided.
+- This returns within 3 seconds, for heavy programs keep checking status for upto 10 turns before asking user to continue checking again.
+    - Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again usign ["Enter"].
 """,
         ),
         ToolParam(
@@ -140,17 +144,13 @@ async def handle_list_tools() -> list[types.Tool]:
 """
             + diffinstructions,
         ),
-        ToolParam(
-            inputSchema=ReadImage.model_json_schema(),
-            name="ReadImage",
-            description="""
-- Read an image from the shell.
-""",
-        ),
-        ToolParam(
-            inputSchema=GetScreenInfo.model_json_schema(),
-            name="GetScreenInfo",
-            description="""
+    ]
+    if COMPUTER_USE_ON_DOCKER_ENABLED:
+        tools += [
+            ToolParam(
+                inputSchema=GetScreenInfo.model_json_schema(),
+                name="GetScreenInfo",
+                description="""
 - Important: call this first in the conversation before ScreenShot, Mouse, and Keyboard tools.
 - Get display information of a linux os running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
 - If user hasn't provided docker image id, check using `docker ps` and provide the id.
@@ -158,34 +158,42 @@ async def handle_list_tools() -> list[types.Tool]:
 - Connects shell to the docker environment.
 - Note: once this is called, the shell enters the docker environment. All bash commands will run over there.
 """,
-        ),
-        ToolParam(
-            inputSchema=ScreenShot.model_json_schema(),
-            name="ScreenShot",
-            description="""
+            ),
+            ToolParam(
+                inputSchema=ScreenShot.model_json_schema(),
+                name="ScreenShot",
+                description="""
 - Capture screenshot of the linux os on docker.
+- All actions on UI using mouse and keyboard return within 0.5 seconds.
+    * So if you're doing something that takes longer for UI to update like heavy page loading, keep checking UI for update usign ScreenShot upto 10 turns.
+    * Notice for smallest of the loading icons to check if your action worked.
+    * After 10 turns of no change, ask user for permission to keep checking.
+    * If you don't notice even slightest of the change, it's likely you clicked on the wrong place.
 """,
-        ),
-        ToolParam(
-            inputSchema=Mouse.model_json_schema(),
-            name="Mouse",
-            description="""
+            ),
+            ToolParam(
+                inputSchema=Mouse.model_json_schema(),
+                name="Mouse",
+                description="""
 - Interact with the linux os on docker using mouse.
 - Uses xdotool
+- About left_click_drag: the current mouse position will be used as the starting point, click and drag to the given x, y coordinates. Useful in things like sliders, moving things around, etc.
 """,
-        ),
-        ToolParam(
-            inputSchema=Keyboard.model_json_schema(),
-            name="Keyboard",
-            description="""
+            ),
+            ToolParam(
+                inputSchema=Keyboard.model_json_schema(),
+                name="Keyboard",
+                description="""
 - Interact with the linux os on docker using keyboard.
 - Emulate keyboard input to the screen
 - Uses xdootool to send keyboard input, keys like Return, BackSpace, Escape, Page_Up, etc. can be used.
 - Do not use it to interact with Bash tool.
 - Make sure you've selected a text area or an editable element before sending text.
 """,
-        ),
-    ]
+            ),
+        ]
+    return tools
 @server.call_tool()  # type: ignore
@@ -255,29 +263,24 @@ async def handle_call_tool(
     return content
-async def main() -> None:
+async def main(computer_use: bool) -> None:
+    global COMPUTER_USE_ON_DOCKER_ENABLED
+    if computer_use:
+        COMPUTER_USE_ON_DOCKER_ENABLED = True
     version = importlib.metadata.version("wcgw")
-    while True:
-        try:
-            # Run the server using stdin/stdout streams
-            async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
-                await server.run(
-                    read_stream,
-                    write_stream,
-                    InitializationOptions(
-                        server_name="wcgw",
-                        server_version=version,
-                        capabilities=server.get_capabilities(
-                            notification_options=NotificationOptions(),
-                            experimental_capabilities={},
-                        ),
-                    ),
-                    raise_exceptions=False,
-                )
-        except BaseException as e:
-            print(f"Server encountered an error: {e}", file=sys.stderr)
-            print("Stack trace:", file=sys.stderr)
-            traceback.print_exc(file=sys.stderr)
-            print("Restarting server in 5 seconds...", file=sys.stderr)
-            await asyncio.sleep(5)
-            continue
+    # Run the server using stdin/stdout streams
+    async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
+        await server.run(
+            read_stream,
+            write_stream,
+            InitializationOptions(
+                server_name="wcgw",
+                server_version=version,
+                capabilities=server.get_capabilities(
+                    notification_options=NotificationOptions(),
+                    experimental_capabilities={},
+                ),
+            ),
+            raise_exceptions=False,
+        )

{wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/openai_client.py RENAMED Viewed

@@ -123,6 +123,7 @@ def loop(
     first_message: Optional[str] = None,
     limit: Optional[float] = None,
     resume: Optional[str] = None,
+    computer_use: bool = False,
 ) -> tuple[str, float]:
     load_dotenv()

{wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/types_.py RENAMED Viewed

@@ -60,6 +60,7 @@ class GetScreenInfo(BaseModel):
 class ScreenShot(BaseModel):
     type: Literal["ScreenShot"]
+    take_after_delay_seconds: int
 class MouseMove(BaseModel):

{wcgw-1.5.2 → wcgw-1.5.4}/uv.lock RENAMED Viewed

@@ -860,7 +860,7 @@ wheels = [
 [[package]]
 name = "wcgw"
-version = "1.5.1"
+version = "1.5.4"
 source = { editable = "." }
 dependencies = [
     { name = "anthropic" },

wcgw-1.5.2/claude_desktop_config.json DELETED Viewed

@@ -1,15 +0,0 @@
-{
-  "mcpServers": {
-    "wcgw": {
-      "command": "uvx",
-      "args": [
-        "--from",
-        "wcgw",
-        "wcgw_mcp"
-      ],
-      "protocol": "mcp",
-      "defaultModel": "claude-3-sonnet",
-      "maxTokens": 4096
-    }
-  }
-}