wcgw 1.5.2__tar.gz → 1.5.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

Files changed (39) hide show
  1. {wcgw-1.5.2 → wcgw-1.5.4}/PKG-INFO +1 -1
  2. {wcgw-1.5.2 → wcgw-1.5.4}/pyproject.toml +1 -1
  3. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/anthropic_client.py +36 -21
  4. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/cli.py +2 -0
  5. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/computer_use.py +5 -0
  6. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/mcp_server/Readme.md +27 -3
  7. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/mcp_server/__init__.py +6 -2
  8. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/mcp_server/server.py +59 -56
  9. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/openai_client.py +1 -0
  10. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/types_.py +1 -0
  11. {wcgw-1.5.2 → wcgw-1.5.4}/uv.lock +1 -1
  12. wcgw-1.5.2/claude_desktop_config.json +0 -15
  13. {wcgw-1.5.2 → wcgw-1.5.4}/.github/workflows/python-publish.yml +0 -0
  14. {wcgw-1.5.2 → wcgw-1.5.4}/.github/workflows/python-tests.yml +0 -0
  15. {wcgw-1.5.2 → wcgw-1.5.4}/.gitignore +0 -0
  16. {wcgw-1.5.2 → wcgw-1.5.4}/.python-version +0 -0
  17. {wcgw-1.5.2 → wcgw-1.5.4}/.vscode/settings.json +0 -0
  18. {wcgw-1.5.2 → wcgw-1.5.4}/README.md +0 -0
  19. {wcgw-1.5.2 → wcgw-1.5.4}/add.py +0 -0
  20. {wcgw-1.5.2 → wcgw-1.5.4}/gpt_action_json_schema.json +0 -0
  21. {wcgw-1.5.2 → wcgw-1.5.4}/gpt_instructions.txt +0 -0
  22. {wcgw-1.5.2 → wcgw-1.5.4}/src/__init__.py +0 -0
  23. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/__init__.py +0 -0
  24. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/__init__.py +0 -0
  25. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/__main__.py +0 -0
  26. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/common.py +0 -0
  27. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/diff-instructions.txt +0 -0
  28. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/openai_utils.py +0 -0
  29. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/sys_utils.py +0 -0
  30. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/tools.py +0 -0
  31. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/relay/serve.py +0 -0
  32. {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/relay/static/privacy.txt +0 -0
  33. {wcgw-1.5.2 → wcgw-1.5.4}/static/claude-ss.jpg +0 -0
  34. {wcgw-1.5.2 → wcgw-1.5.4}/static/computer-use.jpg +0 -0
  35. {wcgw-1.5.2 → wcgw-1.5.4}/static/example.jpg +0 -0
  36. {wcgw-1.5.2 → wcgw-1.5.4}/static/rocket-icon.png +0 -0
  37. {wcgw-1.5.2 → wcgw-1.5.4}/static/ss1.png +0 -0
  38. {wcgw-1.5.2 → wcgw-1.5.4}/tests/test_basic.py +0 -0
  39. {wcgw-1.5.2 → wcgw-1.5.4}/tests/test_tools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: wcgw
3
- Version: 1.5.2
3
+ Version: 1.5.4
4
4
  Summary: What could go wrong giving full shell access to chatgpt?
5
5
  Project-URL: Homepage, https://github.com/rusiaaman/wcgw
6
6
  Author-email: Aman Rusia <gapypi@arcfu.com>
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  authors = [{ name = "Aman Rusia", email = "gapypi@arcfu.com" }]
3
3
  name = "wcgw"
4
- version = "1.5.2"
4
+ version = "1.5.4"
5
5
  description = "What could go wrong giving full shell access to chatgpt?"
6
6
  readme = "README.md"
7
7
  requires-python = ">=3.11, <3.13"
@@ -131,6 +131,7 @@ def loop(
131
131
  first_message: Optional[str] = None,
132
132
  limit: Optional[float] = None,
133
133
  resume: Optional[str] = None,
134
+ computer_use: bool = False,
134
135
  ) -> tuple[str, float]:
135
136
  load_dotenv()
136
137
 
@@ -182,6 +183,9 @@ def loop(
182
183
  - Send text input to the running program.
183
184
  - Send send_specials=["Enter"] to recheck status of a running program.
184
185
  - Only one of send_text, send_specials, send_ascii should be provided.
186
+ - This returns within 5 seconds, for heavy programs keep checking status for upto 10 turns before asking user to continue checking again.
187
+ - Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again usign ["Enter"].
188
+
185
189
  """,
186
190
  ),
187
191
  ToolParam(
@@ -219,10 +223,14 @@ def loop(
219
223
  - Use SEARCH/REPLACE blocks to edit the file.
220
224
  """,
221
225
  ),
222
- ToolParam(
223
- input_schema=GetScreenInfo.model_json_schema(),
224
- name="GetScreenInfo",
225
- description="""
226
+ ]
227
+
228
+ if computer_use:
229
+ tools += [
230
+ ToolParam(
231
+ input_schema=GetScreenInfo.model_json_schema(),
232
+ name="GetScreenInfo",
233
+ description="""
226
234
  - Important: call this first in the conversation before ScreenShot, Mouse, and Keyboard tools.
227
235
  - Get display information of a linux os running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
228
236
  - If user hasn't provided docker image id, check using `docker ps` and provide the id.
@@ -230,33 +238,40 @@ def loop(
230
238
  - Connects shell to the docker environment.
231
239
  - Note: once this is called, the shell enters the docker environment. All bash commands will run over there.
232
240
  """,
233
- ),
234
- ToolParam(
235
- input_schema=ScreenShot.model_json_schema(),
236
- name="ScreenShot",
237
- description="""
241
+ ),
242
+ ToolParam(
243
+ input_schema=ScreenShot.model_json_schema(),
244
+ name="ScreenShot",
245
+ description="""
238
246
  - Capture screenshot of the linux os on docker.
247
+ - All actions on UI using mouse and keyboard return within 0.5 seconds.
248
+ * So if you're doing something that takes longer for UI to update like heavy page loading, keep checking UI for update usign ScreenShot upto 10 turns.
249
+ * Notice for smallest of the loading icons to check if your action worked.
250
+ * After 10 turns of no change, ask user for permission to keep checking.
251
+ * If you don't notice even slightest of the change, it's likely you clicked on the wrong place.
252
+
239
253
  """,
240
- ),
241
- ToolParam(
242
- input_schema=Mouse.model_json_schema(),
243
- name="Mouse",
244
- description="""
254
+ ),
255
+ ToolParam(
256
+ input_schema=Mouse.model_json_schema(),
257
+ name="Mouse",
258
+ description="""
245
259
  - Interact with the linux os on docker using mouse.
246
260
  - Uses xdotool
261
+ - About left_click_drag: the current mouse position will be used as the starting point, click and drag to the given x, y coordinates. Useful in things like sliders, moving things around, etc.
247
262
  """,
248
- ),
249
- ToolParam(
250
- input_schema=Keyboard.model_json_schema(),
251
- name="Keyboard",
252
- description="""
263
+ ),
264
+ ToolParam(
265
+ input_schema=Keyboard.model_json_schema(),
266
+ name="Keyboard",
267
+ description="""
253
268
  - Interact with the linux os on docker using keyboard.
254
269
  - Emulate keyboard input to the screen
255
270
  - Uses xdootool to send keyboard input, keys like Return, BackSpace, Escape, Page_Up, etc. can be used.
256
271
  - Do not use it to interact with Bash tool.
257
272
  """,
258
- ),
259
- ]
273
+ ),
274
+ ]
260
275
  uname_sysname = os.uname().sysname
261
276
  uname_machine = os.uname().machine
262
277
 
@@ -16,6 +16,7 @@ def loop(
16
16
  first_message: Optional[str] = None,
17
17
  limit: Optional[float] = None,
18
18
  resume: Optional[str] = None,
19
+ computer_use: bool = False,
19
20
  version: bool = typer.Option(False, "--version", "-v"),
20
21
  ) -> tuple[str, float]:
21
22
  if version:
@@ -27,6 +28,7 @@ def loop(
27
28
  first_message=first_message,
28
29
  limit=limit,
29
30
  resume=resume,
31
+ computer_use=computer_use,
30
32
  )
31
33
  else:
32
34
  return openai_loop(
@@ -26,6 +26,7 @@ from ..types_ import (
26
26
  OUTPUT_DIR = "/tmp/outputs"
27
27
  TYPING_DELAY_MS = 12
28
28
  TYPING_GROUP_SIZE = 50
29
+ SLEEP_TIME_MAX_S = 3
29
30
 
30
31
  Action = Literal[
31
32
  "key",
@@ -187,12 +188,15 @@ class ComputerTool:
187
188
  text: str | None = None,
188
189
  coordinate: tuple[int, int] | None = None,
189
190
  do_left_click_on_move: bool | None = None,
191
+ take_after_delay_seconds: int | None = None,
190
192
  **kwargs: Any,
191
193
  ) -> ToolResult:
192
194
  if action == "get_screen_info":
193
195
  assert docker_image_id is not None
194
196
  self.docker_image_id = docker_image_id
195
197
  self.get_screen_info()
198
+ if take_after_delay_seconds is not None:
199
+ time.sleep(min(take_after_delay_seconds, SLEEP_TIME_MAX_S))
196
200
  screenshot_res = self.screenshot()
197
201
  return ToolResult(
198
202
  output=f"width: {self.width}, height: {self.height}, display_num: {self.display_num}",
@@ -396,6 +400,7 @@ def run_computer_tool(
396
400
  elif isinstance(action, ScreenShot):
397
401
  result = Computer(
398
402
  action="screenshot",
403
+ screenshot_delay=action.take_after_delay_seconds,
399
404
  )
400
405
  elif isinstance(action, Keyboard):
401
406
  result = Computer(
@@ -31,15 +31,39 @@ Then restart claude app.
31
31
 
32
32
  ### [Optional] Computer use support using desktop on docker
33
33
 
34
- Computer use is enabled by default. Claude will be able to connect to any docker container with linux environment. Native system control isn't supported outside docker.
34
+ Computer use is disabled by default. Add `--computer-use` to enable it. This will add necessary tools to Claude including ScreenShot, Mouse and Keyboard control.
35
35
 
36
- First run a sample docker image with desktop and optionally VNC connection:
36
+ ```json
37
+ {
38
+ "mcpServers": {
39
+ "wcgw": {
40
+ "command": "uv",
41
+ "args": [
42
+ "tool",
43
+ "run",
44
+ "--from",
45
+ "wcgw@latest",
46
+ "--python",
47
+ "3.12",
48
+ "wcgw_mcp",
49
+ "--computer-use"
50
+ ]
51
+ }
52
+ }
53
+ }
54
+ ```
55
+
56
+ Claude will be able to connect to any docker container with linux environment. Native system control isn't supported outside docker.
57
+
58
+ You'll need to run a docker image with desktop and optional VNC connection. Here's a demo image:
37
59
 
38
60
  ```sh
39
61
  docker run -p 6080:6080 ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest
40
62
  ```
41
63
 
42
- Connect to `http://localhost:6080/vnc.html` for desktop view (VNC) of the system running in the docker. Then ask claude desktop app to control the docker os.
64
+ Then ask claude desktop app to control the docker os. It'll connect to the docker container and control it.
65
+
66
+ Connect to `http://localhost:6080/vnc.html` for desktop view (VNC) of the system running in the docker.
43
67
 
44
68
  ## Usage
45
69
 
@@ -1,10 +1,14 @@
1
1
  from wcgw.client.mcp_server import server
2
2
  import asyncio
3
+ from typer import Typer
3
4
 
5
+ main = Typer()
4
6
 
5
- def main():
7
+
8
+ @main.command()
9
+ def app(computer_use: bool = False) -> None:
6
10
  """Main entry point for the package."""
7
- asyncio.run(server.main())
11
+ asyncio.run(server.main(computer_use))
8
12
 
9
13
 
10
14
  # Optionally expose other important items at package level
@@ -28,9 +28,11 @@ from ...types_ import (
28
28
  ScreenShot,
29
29
  GetScreenInfo,
30
30
  )
31
- from ..computer_use import Computer
31
+ from ..computer_use import SLEEP_TIME_MAX_S
32
32
 
33
- tools.TIMEOUT = 3
33
+ tools.TIMEOUT = SLEEP_TIME_MAX_S
34
+
35
+ COMPUTER_USE_ON_DOCKER_ENABLED = False
34
36
 
35
37
  server = Server("wcgw")
36
38
 
@@ -71,7 +73,7 @@ async def handle_list_tools() -> list[types.Tool]:
71
73
  ) as f:
72
74
  diffinstructions = f.read()
73
75
 
74
- return [
76
+ tools = [
75
77
  ToolParam(
76
78
  inputSchema=Initialize.model_json_schema(),
77
79
  name="Initialize",
@@ -102,6 +104,8 @@ async def handle_list_tools() -> list[types.Tool]:
102
104
  - Send text input to the running program.
103
105
  - Send send_specials=["Enter"] to recheck status of a running program.
104
106
  - Only one of send_text, send_specials, send_ascii should be provided.
107
+ - This returns within 3 seconds, for heavy programs keep checking status for upto 10 turns before asking user to continue checking again.
108
+ - Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again usign ["Enter"].
105
109
  """,
106
110
  ),
107
111
  ToolParam(
@@ -140,17 +144,13 @@ async def handle_list_tools() -> list[types.Tool]:
140
144
  """
141
145
  + diffinstructions,
142
146
  ),
143
- ToolParam(
144
- inputSchema=ReadImage.model_json_schema(),
145
- name="ReadImage",
146
- description="""
147
- - Read an image from the shell.
148
- """,
149
- ),
150
- ToolParam(
151
- inputSchema=GetScreenInfo.model_json_schema(),
152
- name="GetScreenInfo",
153
- description="""
147
+ ]
148
+ if COMPUTER_USE_ON_DOCKER_ENABLED:
149
+ tools += [
150
+ ToolParam(
151
+ inputSchema=GetScreenInfo.model_json_schema(),
152
+ name="GetScreenInfo",
153
+ description="""
154
154
  - Important: call this first in the conversation before ScreenShot, Mouse, and Keyboard tools.
155
155
  - Get display information of a linux os running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
156
156
  - If user hasn't provided docker image id, check using `docker ps` and provide the id.
@@ -158,34 +158,42 @@ async def handle_list_tools() -> list[types.Tool]:
158
158
  - Connects shell to the docker environment.
159
159
  - Note: once this is called, the shell enters the docker environment. All bash commands will run over there.
160
160
  """,
161
- ),
162
- ToolParam(
163
- inputSchema=ScreenShot.model_json_schema(),
164
- name="ScreenShot",
165
- description="""
161
+ ),
162
+ ToolParam(
163
+ inputSchema=ScreenShot.model_json_schema(),
164
+ name="ScreenShot",
165
+ description="""
166
166
  - Capture screenshot of the linux os on docker.
167
+ - All actions on UI using mouse and keyboard return within 0.5 seconds.
168
+ * So if you're doing something that takes longer for UI to update like heavy page loading, keep checking UI for update usign ScreenShot upto 10 turns.
169
+ * Notice for smallest of the loading icons to check if your action worked.
170
+ * After 10 turns of no change, ask user for permission to keep checking.
171
+ * If you don't notice even slightest of the change, it's likely you clicked on the wrong place.
167
172
  """,
168
- ),
169
- ToolParam(
170
- inputSchema=Mouse.model_json_schema(),
171
- name="Mouse",
172
- description="""
173
+ ),
174
+ ToolParam(
175
+ inputSchema=Mouse.model_json_schema(),
176
+ name="Mouse",
177
+ description="""
173
178
  - Interact with the linux os on docker using mouse.
174
179
  - Uses xdotool
180
+ - About left_click_drag: the current mouse position will be used as the starting point, click and drag to the given x, y coordinates. Useful in things like sliders, moving things around, etc.
175
181
  """,
176
- ),
177
- ToolParam(
178
- inputSchema=Keyboard.model_json_schema(),
179
- name="Keyboard",
180
- description="""
182
+ ),
183
+ ToolParam(
184
+ inputSchema=Keyboard.model_json_schema(),
185
+ name="Keyboard",
186
+ description="""
181
187
  - Interact with the linux os on docker using keyboard.
182
188
  - Emulate keyboard input to the screen
183
189
  - Uses xdootool to send keyboard input, keys like Return, BackSpace, Escape, Page_Up, etc. can be used.
184
190
  - Do not use it to interact with Bash tool.
185
191
  - Make sure you've selected a text area or an editable element before sending text.
186
192
  """,
187
- ),
188
- ]
193
+ ),
194
+ ]
195
+
196
+ return tools
189
197
 
190
198
 
191
199
  @server.call_tool() # type: ignore
@@ -255,29 +263,24 @@ async def handle_call_tool(
255
263
  return content
256
264
 
257
265
 
258
- async def main() -> None:
266
+ async def main(computer_use: bool) -> None:
267
+ global COMPUTER_USE_ON_DOCKER_ENABLED
268
+ if computer_use:
269
+ COMPUTER_USE_ON_DOCKER_ENABLED = True
270
+
259
271
  version = importlib.metadata.version("wcgw")
260
- while True:
261
- try:
262
- # Run the server using stdin/stdout streams
263
- async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
264
- await server.run(
265
- read_stream,
266
- write_stream,
267
- InitializationOptions(
268
- server_name="wcgw",
269
- server_version=version,
270
- capabilities=server.get_capabilities(
271
- notification_options=NotificationOptions(),
272
- experimental_capabilities={},
273
- ),
274
- ),
275
- raise_exceptions=False,
276
- )
277
- except BaseException as e:
278
- print(f"Server encountered an error: {e}", file=sys.stderr)
279
- print("Stack trace:", file=sys.stderr)
280
- traceback.print_exc(file=sys.stderr)
281
- print("Restarting server in 5 seconds...", file=sys.stderr)
282
- await asyncio.sleep(5)
283
- continue
272
+ # Run the server using stdin/stdout streams
273
+ async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
274
+ await server.run(
275
+ read_stream,
276
+ write_stream,
277
+ InitializationOptions(
278
+ server_name="wcgw",
279
+ server_version=version,
280
+ capabilities=server.get_capabilities(
281
+ notification_options=NotificationOptions(),
282
+ experimental_capabilities={},
283
+ ),
284
+ ),
285
+ raise_exceptions=False,
286
+ )
@@ -123,6 +123,7 @@ def loop(
123
123
  first_message: Optional[str] = None,
124
124
  limit: Optional[float] = None,
125
125
  resume: Optional[str] = None,
126
+ computer_use: bool = False,
126
127
  ) -> tuple[str, float]:
127
128
  load_dotenv()
128
129
 
@@ -60,6 +60,7 @@ class GetScreenInfo(BaseModel):
60
60
 
61
61
  class ScreenShot(BaseModel):
62
62
  type: Literal["ScreenShot"]
63
+ take_after_delay_seconds: int
63
64
 
64
65
 
65
66
  class MouseMove(BaseModel):
@@ -860,7 +860,7 @@ wheels = [
860
860
 
861
861
  [[package]]
862
862
  name = "wcgw"
863
- version = "1.5.1"
863
+ version = "1.5.4"
864
864
  source = { editable = "." }
865
865
  dependencies = [
866
866
  { name = "anthropic" },
@@ -1,15 +0,0 @@
1
- {
2
- "mcpServers": {
3
- "wcgw": {
4
- "command": "uvx",
5
- "args": [
6
- "--from",
7
- "wcgw",
8
- "wcgw_mcp"
9
- ],
10
- "protocol": "mcp",
11
- "defaultModel": "claude-3-sonnet",
12
- "maxTokens": 4096
13
- }
14
- }
15
- }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes