wcgw 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

@@ -182,6 +182,9 @@ def loop(
182
182
  - Send text input to the running program.
183
183
  - Send send_specials=["Enter"] to recheck status of a running program.
184
184
  - Only one of send_text, send_specials, send_ascii should be provided.
185
+ - This returns within 5 seconds, for heavy programs keep checking status for upto 10 turns before asking user to continue checking again.
186
+ - Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again usign ["Enter"].
187
+
185
188
  """,
186
189
  ),
187
190
  ToolParam(
@@ -223,9 +226,10 @@ def loop(
223
226
  input_schema=GetScreenInfo.model_json_schema(),
224
227
  name="GetScreenInfo",
225
228
  description="""
226
- - Get display information of an OS running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
227
- - If user hasn't provided docker image id, check using `docker ps` and provide the id.
228
229
  - Important: call this first in the conversation before ScreenShot, Mouse, and Keyboard tools.
230
+ - Get display information of a linux os running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
231
+ - If user hasn't provided docker image id, check using `docker ps` and provide the id.
232
+ - If the docker is not running, run using `docker run -d -p 6080:6080 ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest`
229
233
  - Connects shell to the docker environment.
230
234
  - Note: once this is called, the shell enters the docker environment. All bash commands will run over there.
231
235
  """,
@@ -234,26 +238,29 @@ def loop(
234
238
  input_schema=ScreenShot.model_json_schema(),
235
239
  name="ScreenShot",
236
240
  description="""
237
- - Capture screenshot of an OS running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
238
- - If user hasn't provided docker image id, check using `docker ps` and provide the id.
239
- - Capture ScreenShot of the current screen for automation.
241
+ - Capture screenshot of the linux os on docker.
242
+ - All actions on UI using mouse and keyboard return within 0.5 seconds.
243
+ * So if you're doing something that takes longer for UI to update like heavy page loading, keep checking UI for update usign ScreenShot upto 10 turns.
244
+ * Notice for smallest of the loading icons to check if your action worked.
245
+ * After 10 turns of no change, ask user for permission to keep checking.
246
+ * If you don't notice even slightest of the change, it's likely you clicked on the wrong place.
247
+
240
248
  """,
241
249
  ),
242
250
  ToolParam(
243
251
  input_schema=Mouse.model_json_schema(),
244
252
  name="Mouse",
245
253
  description="""
246
- - Interact with docker container running image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
247
- - If user hasn't provided docker image id, check using `docker ps` and provide the id.
248
- - Interact with the screen using mouse
254
+ - Interact with the linux os on docker using mouse.
255
+ - Uses xdotool
256
+ - About left_click_drag: the current mouse position will be used as the starting point, click and drag to the given x, y coordinates. Useful in things like sliders, moving things around, etc.
249
257
  """,
250
258
  ),
251
259
  ToolParam(
252
260
  input_schema=Keyboard.model_json_schema(),
253
261
  name="Keyboard",
254
262
  description="""
255
- - Interact with docker container running image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
256
- - If user hasn't provided docker image id, check using `docker ps` and provide the id.
263
+ - Interact with the linux os on docker using keyboard.
257
264
  - Emulate keyboard input to the screen
258
265
  - Uses xdootool to send keyboard input, keys like Return, BackSpace, Escape, Page_Up, etc. can be used.
259
266
  - Do not use it to interact with Bash tool.
@@ -26,6 +26,7 @@ from ..types_ import (
26
26
  OUTPUT_DIR = "/tmp/outputs"
27
27
  TYPING_DELAY_MS = 12
28
28
  TYPING_GROUP_SIZE = 50
29
+ SLEEP_TIME_MAX_S = 3
29
30
 
30
31
  Action = Literal[
31
32
  "key",
@@ -186,12 +187,16 @@ class ComputerTool:
186
187
  docker_image_id: Optional[str] = None,
187
188
  text: str | None = None,
188
189
  coordinate: tuple[int, int] | None = None,
190
+ do_left_click_on_move: bool | None = None,
191
+ take_after_delay_seconds: int | None = None,
189
192
  **kwargs: Any,
190
193
  ) -> ToolResult:
191
194
  if action == "get_screen_info":
192
195
  assert docker_image_id is not None
193
196
  self.docker_image_id = docker_image_id
194
197
  self.get_screen_info()
198
+ if take_after_delay_seconds is not None:
199
+ time.sleep(min(take_after_delay_seconds, SLEEP_TIME_MAX_S))
195
200
  screenshot_res = self.screenshot()
196
201
  return ToolResult(
197
202
  output=f"width: {self.width}, height: {self.height}, display_num: {self.display_num}",
@@ -217,7 +222,12 @@ class ComputerTool:
217
222
  )
218
223
 
219
224
  if action == "mouse_move":
220
- return self.shell(f"{self.xdotool} mousemove {x} {y}")
225
+ if not do_left_click_on_move:
226
+ return self.shell(f"{self.xdotool} mousemove {x} {y}")
227
+ else:
228
+ return self.shell(
229
+ f"{self.xdotool} mousemove {x} {y} click 1",
230
+ )
221
231
  elif action == "left_click_drag":
222
232
  return self.shell(
223
233
  f"{self.xdotool} mousedown 1 mousemove {x} {y} mouseup 1",
@@ -390,6 +400,7 @@ def run_computer_tool(
390
400
  elif isinstance(action, ScreenShot):
391
401
  result = Computer(
392
402
  action="screenshot",
403
+ screenshot_delay=action.take_after_delay_seconds,
393
404
  )
394
405
  elif isinstance(action, Keyboard):
395
406
  result = Computer(
@@ -401,6 +412,7 @@ def run_computer_tool(
401
412
  result = Computer(
402
413
  action="mouse_move",
403
414
  coordinate=(action.action.x, action.action.y),
415
+ do_left_click_on_move=action.action.do_left_click_on_move,
404
416
  )
405
417
  elif isinstance(action.action, LeftClickDrag):
406
418
  result = Computer(
@@ -1,5 +1,9 @@
1
1
  # Claude desktop support
2
2
 
3
+ `wcgw` enables Claude desktop app on Mac to access shell and file system in order to automate tasks, run code, etc.
4
+
5
+ It also has a computer use feature to connect to linux running on docker. Claude can fully control it including mouse and keyboard.
6
+
3
7
  ## Setup
4
8
 
5
9
  Update `claude_desktop_config.json` (~/Library/Application Support/Claude/claude_desktop_config.json)
@@ -32,21 +36,10 @@ Computer use is enabled by default. Claude will be able to connect to any docker
32
36
  First run a sample docker image with desktop and optionally VNC connection:
33
37
 
34
38
  ```sh
35
- docker run \
36
- --entrypoint "" \
37
- -p 6080:6080 \
38
- -e WIDTH=1024 \
39
- -e HEIGHT=768 \
40
- -d \
41
- ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest \
42
- bash -c "\
43
- ./start_all.sh && \
44
- ./novnc_startup.sh && \
45
- python http_server.py > /tmp/server_logs.txt 2>&1 & \
46
- tail -f /dev/null"
39
+ docker run -p 6080:6080 ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest
47
40
  ```
48
41
 
49
- Connect to `http://localhost:6080/vnc.html` for desktop view (VNC) of the system running in the docker. Then ask claude to control the docker os.
42
+ Connect to `http://localhost:6080/vnc.html` for desktop view (VNC) of the system running in the docker. Then ask claude desktop app to control the docker os.
50
43
 
51
44
  ## Usage
52
45
 
@@ -28,9 +28,9 @@ from ...types_ import (
28
28
  ScreenShot,
29
29
  GetScreenInfo,
30
30
  )
31
- from ..computer_use import Computer
31
+ from ..computer_use import SLEEP_TIME_MAX_S
32
32
 
33
- tools.TIMEOUT = 3
33
+ tools.TIMEOUT = SLEEP_TIME_MAX_S
34
34
 
35
35
  server = Server("wcgw")
36
36
 
@@ -102,6 +102,8 @@ async def handle_list_tools() -> list[types.Tool]:
102
102
  - Send text input to the running program.
103
103
  - Send send_specials=["Enter"] to recheck status of a running program.
104
104
  - Only one of send_text, send_specials, send_ascii should be provided.
105
+ - This returns within 3 seconds, for heavy programs keep checking status for upto 10 turns before asking user to continue checking again.
106
+ - Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again usign ["Enter"].
105
107
  """,
106
108
  ),
107
109
  ToolParam(
@@ -151,9 +153,10 @@ async def handle_list_tools() -> list[types.Tool]:
151
153
  inputSchema=GetScreenInfo.model_json_schema(),
152
154
  name="GetScreenInfo",
153
155
  description="""
154
- - Get display information of an OS running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
155
- - If user hasn't provided docker image id, check using `docker ps` and provide the id.
156
156
  - Important: call this first in the conversation before ScreenShot, Mouse, and Keyboard tools.
157
+ - Get display information of a linux os running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
158
+ - If user hasn't provided docker image id, check using `docker ps` and provide the id.
159
+ - If the docker is not running, run using `docker run -d -p 6080:6080 ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest`
157
160
  - Connects shell to the docker environment.
158
161
  - Note: once this is called, the shell enters the docker environment. All bash commands will run over there.
159
162
  """,
@@ -162,29 +165,32 @@ async def handle_list_tools() -> list[types.Tool]:
162
165
  inputSchema=ScreenShot.model_json_schema(),
163
166
  name="ScreenShot",
164
167
  description="""
165
- - Capture screenshot of an OS running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
166
- - If user hasn't provided docker image id, check using `docker ps` and provide the id.
167
- - Capture ScreenShot of the current screen for automation.
168
+ - Capture screenshot of the linux os on docker.
169
+ - All actions on UI using mouse and keyboard return within 0.5 seconds.
170
+ * So if you're doing something that takes longer for UI to update like heavy page loading, keep checking UI for update usign ScreenShot upto 10 turns.
171
+ * Notice for smallest of the loading icons to check if your action worked.
172
+ * After 10 turns of no change, ask user for permission to keep checking.
173
+ * If you don't notice even slightest of the change, it's likely you clicked on the wrong place.
168
174
  """,
169
175
  ),
170
176
  ToolParam(
171
177
  inputSchema=Mouse.model_json_schema(),
172
178
  name="Mouse",
173
179
  description="""
174
- - Interact with docker container running image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
175
- - If user hasn't provided docker image id, check using `docker ps` and provide the id.
176
- - Interact with the screen using mouse
180
+ - Interact with the linux os on docker using mouse.
181
+ - Uses xdotool
182
+ - About left_click_drag: the current mouse position will be used as the starting point, click and drag to the given x, y coordinates. Useful in things like sliders, moving things around, etc.
177
183
  """,
178
184
  ),
179
185
  ToolParam(
180
186
  inputSchema=Keyboard.model_json_schema(),
181
187
  name="Keyboard",
182
188
  description="""
183
- - Interact with docker container running image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
184
- - If user hasn't provided docker image id, check using `docker ps` and provide the id.
189
+ - Interact with the linux os on docker using keyboard.
185
190
  - Emulate keyboard input to the screen
186
191
  - Uses xdootool to send keyboard input, keys like Return, BackSpace, Escape, Page_Up, etc. can be used.
187
192
  - Do not use it to interact with Bash tool.
193
+ - Make sure you've selected a text area or an editable element before sending text.
188
194
  """,
189
195
  ),
190
196
  ]
@@ -259,27 +265,18 @@ async def handle_call_tool(
259
265
 
260
266
  async def main() -> None:
261
267
  version = importlib.metadata.version("wcgw")
262
- while True:
263
- try:
264
- # Run the server using stdin/stdout streams
265
- async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
266
- await server.run(
267
- read_stream,
268
- write_stream,
269
- InitializationOptions(
270
- server_name="wcgw",
271
- server_version=version,
272
- capabilities=server.get_capabilities(
273
- notification_options=NotificationOptions(),
274
- experimental_capabilities={},
275
- ),
276
- ),
277
- raise_exceptions=False,
278
- )
279
- except BaseException as e:
280
- print(f"Server encountered an error: {e}", file=sys.stderr)
281
- print("Stack trace:", file=sys.stderr)
282
- traceback.print_exc(file=sys.stderr)
283
- print("Restarting server in 5 seconds...", file=sys.stderr)
284
- await asyncio.sleep(5)
285
- continue
268
+ # Run the server using stdin/stdout streams
269
+ async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
270
+ await server.run(
271
+ read_stream,
272
+ write_stream,
273
+ InitializationOptions(
274
+ server_name="wcgw",
275
+ server_version=version,
276
+ capabilities=server.get_capabilities(
277
+ notification_options=NotificationOptions(),
278
+ experimental_capabilities={},
279
+ ),
280
+ ),
281
+ raise_exceptions=False,
282
+ )
wcgw/client/tools.py CHANGED
@@ -170,6 +170,7 @@ def initial_info() -> str:
170
170
  System: {uname_sysname}
171
171
  Machine: {uname_machine}
172
172
  Current working directory: {CWD}
173
+ wcgw version: {importlib.metadata.version("wcgw")}
173
174
  """
174
175
 
175
176
 
wcgw/types_.py CHANGED
@@ -60,12 +60,13 @@ class GetScreenInfo(BaseModel):
60
60
 
61
61
  class ScreenShot(BaseModel):
62
62
  type: Literal["ScreenShot"]
63
- docker_image_id: str
63
+ take_after_delay_seconds: int
64
64
 
65
65
 
66
66
  class MouseMove(BaseModel):
67
67
  x: int
68
68
  y: int
69
+ do_left_click_on_move: bool
69
70
  type: Literal["MouseMove"]
70
71
 
71
72
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: wcgw
3
- Version: 1.5.1
3
+ Version: 1.5.3
4
4
  Summary: What could go wrong giving full shell access to chatgpt?
5
5
  Project-URL: Homepage, https://github.com/rusiaaman/wcgw
6
6
  Author-email: Aman Rusia <gapypi@arcfu.com>
@@ -29,7 +29,9 @@ Description-Content-Type: text/markdown
29
29
 
30
30
  # Shell and Coding agent on Chatgpt and Claude desktop apps
31
31
 
32
- A custom gpt on chatgpt web/desktop apps to interact with your local shell, edit files, run code, etc.
32
+ - An MCP server on claude desktop for autonomous shell, coding and desktop control agent.
33
+ - A custom gpt on chatgpt web/desktop apps to interact with your local shell, edit files, run code, etc.
34
+
33
35
 
34
36
  [![Tests](https://github.com/rusiaaman/wcgw/actions/workflows/python-tests.yml/badge.svg?branch=main)](https://github.com/rusiaaman/wcgw/actions/workflows/python-tests.yml)
35
37
  [![Build](https://github.com/rusiaaman/wcgw/actions/workflows/python-publish.yml/badge.svg)](https://github.com/rusiaaman/wcgw/actions/workflows/python-publish.yml)
@@ -40,6 +42,7 @@ A custom gpt on chatgpt web/desktop apps to interact with your local shell, edit
40
42
  ### 🚀 Highlights
41
43
 
42
44
  - ⚡ **Full Shell Access**: No restrictions, complete control.
45
+ - ⚡ **Desktop control on Claude**: Screen capture, mouse control, keyboard control on claude desktop (on mac with docker linux)
43
46
  - ⚡ **Create, Execute, Iterate**: Ask the gpt to keep running compiler checks till all errors are fixed, or ask it to keep checking for the status of a long running command till it's done.
44
47
  - ⚡ **Interactive Command Handling**: Supports interactive commands using arrow keys, interrupt, and ansi escape sequences.
45
48
  - ⚡ **REPL support**: [beta] Supports python/node and other REPL execution.
@@ -1,22 +1,22 @@
1
1
  wcgw/__init__.py,sha256=9K2QW7QuSLhMTVbKbBYd9UUp-ZyrfBrxcjuD_xk458k,118
2
- wcgw/types_.py,sha256=STKGeVdQNK_k8mcmAXpKZJY9YUpU7-mbJcFpXfzBuys,1732
2
+ wcgw/types_.py,sha256=rDz4olJS2zvYC13jzeOppA2tci-tVDyWAqeA5BesAaU,1773
3
3
  wcgw/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  wcgw/client/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
5
- wcgw/client/anthropic_client.py,sha256=owR-nIxQVGgw_ned8JOQ-QmmCBQvZSqgD08kYpU_Rbg,17730
5
+ wcgw/client/anthropic_client.py,sha256=c6nNNojsWLFWjMJcEsEp7g31Ps9SIgCqMk4dhrOn9V4,18314
6
6
  wcgw/client/cli.py,sha256=Oja42CHkVO8puqOXflko9NeephYCMa85aBmQTEjBZtI,932
7
7
  wcgw/client/common.py,sha256=grH-yV_4tnTQZ29xExn4YicGLxEq98z-HkEZwH0ReSg,1410
8
- wcgw/client/computer_use.py,sha256=hadmsHpwVRqTZh4Q7Ssu3xdnNLnBW4y-pd5P6D-qqKE,14276
8
+ wcgw/client/computer_use.py,sha256=eGiINKfgY8WWT-NDUa6vUKd1MTWE7dTjSlvjZHPCWzc,14870
9
9
  wcgw/client/diff-instructions.txt,sha256=s5AJKG23JsjwRYhFZFQVvwDpF67vElawrmdXwvukR1A,1683
10
10
  wcgw/client/openai_client.py,sha256=L61ajFVQW2QPS3C0n1YsjgF4vQKfMIZHmp6iFBHutX8,17748
11
11
  wcgw/client/openai_utils.py,sha256=YNwCsA-Wqq7jWrxP0rfQmBTb1dI0s7dWXzQqyTzOZT4,2629
12
12
  wcgw/client/sys_utils.py,sha256=GajPntKhaTUMn6EOmopENWZNR2G_BJyuVbuot0x6veI,1376
13
- wcgw/client/tools.py,sha256=Ce_1eLXl6W1U2EcNk2JPiCAKmEnTHt3Jd78ZHlW-ET4,32629
14
- wcgw/client/mcp_server/Readme.md,sha256=mztipVTwqXLLenbVihnodq7gUF2Q0_YOKOMW3MiV3UM,2020
13
+ wcgw/client/tools.py,sha256=d7Fni7JU3aOh2vXBAw5k5rsxkdQVcxoxc5vipvEsA2g,32680
14
+ wcgw/client/mcp_server/Readme.md,sha256=1hNZtqltsORug7OzUjjoK5O8q5s9-Y3S0_rlzT-Wfg4,2033
15
15
  wcgw/client/mcp_server/__init__.py,sha256=cQ7PUrEmXUpio8x0SEoGWP5hCRPd7z2bAkNCbYbtTys,236
16
- wcgw/client/mcp_server/server.py,sha256=niS5elM7vdu181DjDSNXRbipFjt9Ke-H9HQMWMwNnXg,10211
16
+ wcgw/client/mcp_server/server.py,sha256=i1nn16LSsbySm3LCt_T_D2G33nPtGNDJakp5FzKV6vQ,10416
17
17
  wcgw/relay/serve.py,sha256=RUcUeyL4Xt0EEo12Ul6VQjb4tRle4uIdsa85v7XXxEw,8771
18
18
  wcgw/relay/static/privacy.txt,sha256=s9qBdbx2SexCpC_z33sg16TptmAwDEehMCLz4L50JLc,529
19
- wcgw-1.5.1.dist-info/METADATA,sha256=ls5ebZhUUraGJqdSIHCggftaq16jnGJS8VxHMj1Y0GY,6285
20
- wcgw-1.5.1.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
21
- wcgw-1.5.1.dist-info/entry_points.txt,sha256=eKo1omwbAggWlQ0l7GKoR7uV1-j16nk9tK0BhC2Oz_E,120
22
- wcgw-1.5.1.dist-info/RECORD,,
19
+ wcgw-1.5.3.dist-info/METADATA,sha256=9raP3OP6KWq5bzIEuwJ41_04g5fS5m1HVSNm-YI8Dkw,6508
20
+ wcgw-1.5.3.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
21
+ wcgw-1.5.3.dist-info/entry_points.txt,sha256=eKo1omwbAggWlQ0l7GKoR7uV1-j16nk9tK0BhC2Oz_E,120
22
+ wcgw-1.5.3.dist-info/RECORD,,
File without changes