wcgw 1.5.2__tar.gz → 1.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wcgw might be problematic. Click here for more details.
- {wcgw-1.5.2 → wcgw-1.5.4}/PKG-INFO +1 -1
- {wcgw-1.5.2 → wcgw-1.5.4}/pyproject.toml +1 -1
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/anthropic_client.py +36 -21
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/cli.py +2 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/computer_use.py +5 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/mcp_server/Readme.md +27 -3
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/mcp_server/__init__.py +6 -2
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/mcp_server/server.py +59 -56
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/openai_client.py +1 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/types_.py +1 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/uv.lock +1 -1
- wcgw-1.5.2/claude_desktop_config.json +0 -15
- {wcgw-1.5.2 → wcgw-1.5.4}/.github/workflows/python-publish.yml +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/.github/workflows/python-tests.yml +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/.gitignore +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/.python-version +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/.vscode/settings.json +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/README.md +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/add.py +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/gpt_action_json_schema.json +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/gpt_instructions.txt +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/__init__.py +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/__init__.py +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/__init__.py +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/__main__.py +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/common.py +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/diff-instructions.txt +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/openai_utils.py +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/sys_utils.py +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/client/tools.py +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/relay/serve.py +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/src/wcgw/relay/static/privacy.txt +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/static/claude-ss.jpg +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/static/computer-use.jpg +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/static/example.jpg +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/static/rocket-icon.png +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/static/ss1.png +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/tests/test_basic.py +0 -0
- {wcgw-1.5.2 → wcgw-1.5.4}/tests/test_tools.py +0 -0
|
@@ -131,6 +131,7 @@ def loop(
|
|
|
131
131
|
first_message: Optional[str] = None,
|
|
132
132
|
limit: Optional[float] = None,
|
|
133
133
|
resume: Optional[str] = None,
|
|
134
|
+
computer_use: bool = False,
|
|
134
135
|
) -> tuple[str, float]:
|
|
135
136
|
load_dotenv()
|
|
136
137
|
|
|
@@ -182,6 +183,9 @@ def loop(
|
|
|
182
183
|
- Send text input to the running program.
|
|
183
184
|
- Send send_specials=["Enter"] to recheck status of a running program.
|
|
184
185
|
- Only one of send_text, send_specials, send_ascii should be provided.
|
|
186
|
+
- This returns within 5 seconds, for heavy programs keep checking status for upto 10 turns before asking user to continue checking again.
|
|
187
|
+
- Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again usign ["Enter"].
|
|
188
|
+
|
|
185
189
|
""",
|
|
186
190
|
),
|
|
187
191
|
ToolParam(
|
|
@@ -219,10 +223,14 @@ def loop(
|
|
|
219
223
|
- Use SEARCH/REPLACE blocks to edit the file.
|
|
220
224
|
""",
|
|
221
225
|
),
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
+
]
|
|
227
|
+
|
|
228
|
+
if computer_use:
|
|
229
|
+
tools += [
|
|
230
|
+
ToolParam(
|
|
231
|
+
input_schema=GetScreenInfo.model_json_schema(),
|
|
232
|
+
name="GetScreenInfo",
|
|
233
|
+
description="""
|
|
226
234
|
- Important: call this first in the conversation before ScreenShot, Mouse, and Keyboard tools.
|
|
227
235
|
- Get display information of a linux os running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
|
|
228
236
|
- If user hasn't provided docker image id, check using `docker ps` and provide the id.
|
|
@@ -230,33 +238,40 @@ def loop(
|
|
|
230
238
|
- Connects shell to the docker environment.
|
|
231
239
|
- Note: once this is called, the shell enters the docker environment. All bash commands will run over there.
|
|
232
240
|
""",
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
241
|
+
),
|
|
242
|
+
ToolParam(
|
|
243
|
+
input_schema=ScreenShot.model_json_schema(),
|
|
244
|
+
name="ScreenShot",
|
|
245
|
+
description="""
|
|
238
246
|
- Capture screenshot of the linux os on docker.
|
|
247
|
+
- All actions on UI using mouse and keyboard return within 0.5 seconds.
|
|
248
|
+
* So if you're doing something that takes longer for UI to update like heavy page loading, keep checking UI for update usign ScreenShot upto 10 turns.
|
|
249
|
+
* Notice for smallest of the loading icons to check if your action worked.
|
|
250
|
+
* After 10 turns of no change, ask user for permission to keep checking.
|
|
251
|
+
* If you don't notice even slightest of the change, it's likely you clicked on the wrong place.
|
|
252
|
+
|
|
239
253
|
""",
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
254
|
+
),
|
|
255
|
+
ToolParam(
|
|
256
|
+
input_schema=Mouse.model_json_schema(),
|
|
257
|
+
name="Mouse",
|
|
258
|
+
description="""
|
|
245
259
|
- Interact with the linux os on docker using mouse.
|
|
246
260
|
- Uses xdotool
|
|
261
|
+
- About left_click_drag: the current mouse position will be used as the starting point, click and drag to the given x, y coordinates. Useful in things like sliders, moving things around, etc.
|
|
247
262
|
""",
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
263
|
+
),
|
|
264
|
+
ToolParam(
|
|
265
|
+
input_schema=Keyboard.model_json_schema(),
|
|
266
|
+
name="Keyboard",
|
|
267
|
+
description="""
|
|
253
268
|
- Interact with the linux os on docker using keyboard.
|
|
254
269
|
- Emulate keyboard input to the screen
|
|
255
270
|
- Uses xdootool to send keyboard input, keys like Return, BackSpace, Escape, Page_Up, etc. can be used.
|
|
256
271
|
- Do not use it to interact with Bash tool.
|
|
257
272
|
""",
|
|
258
|
-
|
|
259
|
-
|
|
273
|
+
),
|
|
274
|
+
]
|
|
260
275
|
uname_sysname = os.uname().sysname
|
|
261
276
|
uname_machine = os.uname().machine
|
|
262
277
|
|
|
@@ -16,6 +16,7 @@ def loop(
|
|
|
16
16
|
first_message: Optional[str] = None,
|
|
17
17
|
limit: Optional[float] = None,
|
|
18
18
|
resume: Optional[str] = None,
|
|
19
|
+
computer_use: bool = False,
|
|
19
20
|
version: bool = typer.Option(False, "--version", "-v"),
|
|
20
21
|
) -> tuple[str, float]:
|
|
21
22
|
if version:
|
|
@@ -27,6 +28,7 @@ def loop(
|
|
|
27
28
|
first_message=first_message,
|
|
28
29
|
limit=limit,
|
|
29
30
|
resume=resume,
|
|
31
|
+
computer_use=computer_use,
|
|
30
32
|
)
|
|
31
33
|
else:
|
|
32
34
|
return openai_loop(
|
|
@@ -26,6 +26,7 @@ from ..types_ import (
|
|
|
26
26
|
OUTPUT_DIR = "/tmp/outputs"
|
|
27
27
|
TYPING_DELAY_MS = 12
|
|
28
28
|
TYPING_GROUP_SIZE = 50
|
|
29
|
+
SLEEP_TIME_MAX_S = 3
|
|
29
30
|
|
|
30
31
|
Action = Literal[
|
|
31
32
|
"key",
|
|
@@ -187,12 +188,15 @@ class ComputerTool:
|
|
|
187
188
|
text: str | None = None,
|
|
188
189
|
coordinate: tuple[int, int] | None = None,
|
|
189
190
|
do_left_click_on_move: bool | None = None,
|
|
191
|
+
take_after_delay_seconds: int | None = None,
|
|
190
192
|
**kwargs: Any,
|
|
191
193
|
) -> ToolResult:
|
|
192
194
|
if action == "get_screen_info":
|
|
193
195
|
assert docker_image_id is not None
|
|
194
196
|
self.docker_image_id = docker_image_id
|
|
195
197
|
self.get_screen_info()
|
|
198
|
+
if take_after_delay_seconds is not None:
|
|
199
|
+
time.sleep(min(take_after_delay_seconds, SLEEP_TIME_MAX_S))
|
|
196
200
|
screenshot_res = self.screenshot()
|
|
197
201
|
return ToolResult(
|
|
198
202
|
output=f"width: {self.width}, height: {self.height}, display_num: {self.display_num}",
|
|
@@ -396,6 +400,7 @@ def run_computer_tool(
|
|
|
396
400
|
elif isinstance(action, ScreenShot):
|
|
397
401
|
result = Computer(
|
|
398
402
|
action="screenshot",
|
|
403
|
+
screenshot_delay=action.take_after_delay_seconds,
|
|
399
404
|
)
|
|
400
405
|
elif isinstance(action, Keyboard):
|
|
401
406
|
result = Computer(
|
|
@@ -31,15 +31,39 @@ Then restart claude app.
|
|
|
31
31
|
|
|
32
32
|
### [Optional] Computer use support using desktop on docker
|
|
33
33
|
|
|
34
|
-
Computer use is
|
|
34
|
+
Computer use is disabled by default. Add `--computer-use` to enable it. This will add necessary tools to Claude including ScreenShot, Mouse and Keyboard control.
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
```json
|
|
37
|
+
{
|
|
38
|
+
"mcpServers": {
|
|
39
|
+
"wcgw": {
|
|
40
|
+
"command": "uv",
|
|
41
|
+
"args": [
|
|
42
|
+
"tool",
|
|
43
|
+
"run",
|
|
44
|
+
"--from",
|
|
45
|
+
"wcgw@latest",
|
|
46
|
+
"--python",
|
|
47
|
+
"3.12",
|
|
48
|
+
"wcgw_mcp",
|
|
49
|
+
"--computer-use"
|
|
50
|
+
]
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Claude will be able to connect to any docker container with linux environment. Native system control isn't supported outside docker.
|
|
57
|
+
|
|
58
|
+
You'll need to run a docker image with desktop and optional VNC connection. Here's a demo image:
|
|
37
59
|
|
|
38
60
|
```sh
|
|
39
61
|
docker run -p 6080:6080 ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest
|
|
40
62
|
```
|
|
41
63
|
|
|
42
|
-
|
|
64
|
+
Then ask claude desktop app to control the docker os. It'll connect to the docker container and control it.
|
|
65
|
+
|
|
66
|
+
Connect to `http://localhost:6080/vnc.html` for desktop view (VNC) of the system running in the docker.
|
|
43
67
|
|
|
44
68
|
## Usage
|
|
45
69
|
|
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
from wcgw.client.mcp_server import server
|
|
2
2
|
import asyncio
|
|
3
|
+
from typer import Typer
|
|
3
4
|
|
|
5
|
+
main = Typer()
|
|
4
6
|
|
|
5
|
-
|
|
7
|
+
|
|
8
|
+
@main.command()
|
|
9
|
+
def app(computer_use: bool = False) -> None:
|
|
6
10
|
"""Main entry point for the package."""
|
|
7
|
-
asyncio.run(server.main())
|
|
11
|
+
asyncio.run(server.main(computer_use))
|
|
8
12
|
|
|
9
13
|
|
|
10
14
|
# Optionally expose other important items at package level
|
|
@@ -28,9 +28,11 @@ from ...types_ import (
|
|
|
28
28
|
ScreenShot,
|
|
29
29
|
GetScreenInfo,
|
|
30
30
|
)
|
|
31
|
-
from ..computer_use import
|
|
31
|
+
from ..computer_use import SLEEP_TIME_MAX_S
|
|
32
32
|
|
|
33
|
-
tools.TIMEOUT =
|
|
33
|
+
tools.TIMEOUT = SLEEP_TIME_MAX_S
|
|
34
|
+
|
|
35
|
+
COMPUTER_USE_ON_DOCKER_ENABLED = False
|
|
34
36
|
|
|
35
37
|
server = Server("wcgw")
|
|
36
38
|
|
|
@@ -71,7 +73,7 @@ async def handle_list_tools() -> list[types.Tool]:
|
|
|
71
73
|
) as f:
|
|
72
74
|
diffinstructions = f.read()
|
|
73
75
|
|
|
74
|
-
|
|
76
|
+
tools = [
|
|
75
77
|
ToolParam(
|
|
76
78
|
inputSchema=Initialize.model_json_schema(),
|
|
77
79
|
name="Initialize",
|
|
@@ -102,6 +104,8 @@ async def handle_list_tools() -> list[types.Tool]:
|
|
|
102
104
|
- Send text input to the running program.
|
|
103
105
|
- Send send_specials=["Enter"] to recheck status of a running program.
|
|
104
106
|
- Only one of send_text, send_specials, send_ascii should be provided.
|
|
107
|
+
- This returns within 3 seconds, for heavy programs keep checking status for upto 10 turns before asking user to continue checking again.
|
|
108
|
+
- Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again usign ["Enter"].
|
|
105
109
|
""",
|
|
106
110
|
),
|
|
107
111
|
ToolParam(
|
|
@@ -140,17 +144,13 @@ async def handle_list_tools() -> list[types.Tool]:
|
|
|
140
144
|
"""
|
|
141
145
|
+ diffinstructions,
|
|
142
146
|
),
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
""
|
|
149
|
-
|
|
150
|
-
ToolParam(
|
|
151
|
-
inputSchema=GetScreenInfo.model_json_schema(),
|
|
152
|
-
name="GetScreenInfo",
|
|
153
|
-
description="""
|
|
147
|
+
]
|
|
148
|
+
if COMPUTER_USE_ON_DOCKER_ENABLED:
|
|
149
|
+
tools += [
|
|
150
|
+
ToolParam(
|
|
151
|
+
inputSchema=GetScreenInfo.model_json_schema(),
|
|
152
|
+
name="GetScreenInfo",
|
|
153
|
+
description="""
|
|
154
154
|
- Important: call this first in the conversation before ScreenShot, Mouse, and Keyboard tools.
|
|
155
155
|
- Get display information of a linux os running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
|
|
156
156
|
- If user hasn't provided docker image id, check using `docker ps` and provide the id.
|
|
@@ -158,34 +158,42 @@ async def handle_list_tools() -> list[types.Tool]:
|
|
|
158
158
|
- Connects shell to the docker environment.
|
|
159
159
|
- Note: once this is called, the shell enters the docker environment. All bash commands will run over there.
|
|
160
160
|
""",
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
161
|
+
),
|
|
162
|
+
ToolParam(
|
|
163
|
+
inputSchema=ScreenShot.model_json_schema(),
|
|
164
|
+
name="ScreenShot",
|
|
165
|
+
description="""
|
|
166
166
|
- Capture screenshot of the linux os on docker.
|
|
167
|
+
- All actions on UI using mouse and keyboard return within 0.5 seconds.
|
|
168
|
+
* So if you're doing something that takes longer for UI to update like heavy page loading, keep checking UI for update usign ScreenShot upto 10 turns.
|
|
169
|
+
* Notice for smallest of the loading icons to check if your action worked.
|
|
170
|
+
* After 10 turns of no change, ask user for permission to keep checking.
|
|
171
|
+
* If you don't notice even slightest of the change, it's likely you clicked on the wrong place.
|
|
167
172
|
""",
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
+
),
|
|
174
|
+
ToolParam(
|
|
175
|
+
inputSchema=Mouse.model_json_schema(),
|
|
176
|
+
name="Mouse",
|
|
177
|
+
description="""
|
|
173
178
|
- Interact with the linux os on docker using mouse.
|
|
174
179
|
- Uses xdotool
|
|
180
|
+
- About left_click_drag: the current mouse position will be used as the starting point, click and drag to the given x, y coordinates. Useful in things like sliders, moving things around, etc.
|
|
175
181
|
""",
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
182
|
+
),
|
|
183
|
+
ToolParam(
|
|
184
|
+
inputSchema=Keyboard.model_json_schema(),
|
|
185
|
+
name="Keyboard",
|
|
186
|
+
description="""
|
|
181
187
|
- Interact with the linux os on docker using keyboard.
|
|
182
188
|
- Emulate keyboard input to the screen
|
|
183
189
|
- Uses xdootool to send keyboard input, keys like Return, BackSpace, Escape, Page_Up, etc. can be used.
|
|
184
190
|
- Do not use it to interact with Bash tool.
|
|
185
191
|
- Make sure you've selected a text area or an editable element before sending text.
|
|
186
192
|
""",
|
|
187
|
-
|
|
188
|
-
|
|
193
|
+
),
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
return tools
|
|
189
197
|
|
|
190
198
|
|
|
191
199
|
@server.call_tool() # type: ignore
|
|
@@ -255,29 +263,24 @@ async def handle_call_tool(
|
|
|
255
263
|
return content
|
|
256
264
|
|
|
257
265
|
|
|
258
|
-
async def main() -> None:
|
|
266
|
+
async def main(computer_use: bool) -> None:
|
|
267
|
+
global COMPUTER_USE_ON_DOCKER_ENABLED
|
|
268
|
+
if computer_use:
|
|
269
|
+
COMPUTER_USE_ON_DOCKER_ENABLED = True
|
|
270
|
+
|
|
259
271
|
version = importlib.metadata.version("wcgw")
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
raise_exceptions=False,
|
|
276
|
-
)
|
|
277
|
-
except BaseException as e:
|
|
278
|
-
print(f"Server encountered an error: {e}", file=sys.stderr)
|
|
279
|
-
print("Stack trace:", file=sys.stderr)
|
|
280
|
-
traceback.print_exc(file=sys.stderr)
|
|
281
|
-
print("Restarting server in 5 seconds...", file=sys.stderr)
|
|
282
|
-
await asyncio.sleep(5)
|
|
283
|
-
continue
|
|
272
|
+
# Run the server using stdin/stdout streams
|
|
273
|
+
async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
|
|
274
|
+
await server.run(
|
|
275
|
+
read_stream,
|
|
276
|
+
write_stream,
|
|
277
|
+
InitializationOptions(
|
|
278
|
+
server_name="wcgw",
|
|
279
|
+
server_version=version,
|
|
280
|
+
capabilities=server.get_capabilities(
|
|
281
|
+
notification_options=NotificationOptions(),
|
|
282
|
+
experimental_capabilities={},
|
|
283
|
+
),
|
|
284
|
+
),
|
|
285
|
+
raise_exceptions=False,
|
|
286
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{wcgw-1.5.2 → wcgw-1.5.4}/add.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|