wcgw 1.4.0__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

Files changed (43) hide show
  1. wcgw-1.5.0/.github/workflows/python-tests.yml +30 -0
  2. {wcgw-1.4.0 → wcgw-1.5.0}/PKG-INFO +72 -25
  3. {wcgw-1.4.0 → wcgw-1.5.0}/README.md +70 -23
  4. {wcgw-1.4.0 → wcgw-1.5.0}/gpt_instructions.txt +1 -1
  5. {wcgw-1.4.0 → wcgw-1.5.0}/pyproject.toml +2 -2
  6. wcgw-1.5.0/src/wcgw/client/__main__.py +3 -0
  7. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/client/anthropic_client.py +83 -37
  8. wcgw-1.5.0/src/wcgw/client/computer_use.py +416 -0
  9. wcgw-1.5.0/src/wcgw/client/mcp_server/Readme.md +73 -0
  10. wcgw-1.5.0/src/wcgw/client/mcp_server/server.py +283 -0
  11. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/client/openai_client.py +3 -2
  12. wcgw-1.5.0/src/wcgw/client/sys_utils.py +40 -0
  13. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/client/tools.py +178 -72
  14. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/types_.py +41 -0
  15. wcgw-1.5.0/static/claude-ss.jpg +0 -0
  16. wcgw-1.5.0/static/computer-use.jpg +0 -0
  17. wcgw-1.5.0/static/example.jpg +0 -0
  18. {wcgw-1.4.0 → wcgw-1.5.0}/uv.lock +93 -115
  19. wcgw-1.4.0/.github/workflows/python-tests.yml +0 -30
  20. wcgw-1.4.0/src/wcgw/client/__main__.py +0 -3
  21. wcgw-1.4.0/src/wcgw/client/mcp_server/Readme.md +0 -26
  22. wcgw-1.4.0/src/wcgw/client/mcp_server/server.py +0 -222
  23. {wcgw-1.4.0 → wcgw-1.5.0}/.github/workflows/python-publish.yml +0 -0
  24. {wcgw-1.4.0 → wcgw-1.5.0}/.gitignore +0 -0
  25. {wcgw-1.4.0 → wcgw-1.5.0}/.python-version +0 -0
  26. {wcgw-1.4.0 → wcgw-1.5.0}/.vscode/settings.json +0 -0
  27. {wcgw-1.4.0 → wcgw-1.5.0}/add.py +0 -0
  28. {wcgw-1.4.0 → wcgw-1.5.0}/claude_desktop_config.json +0 -0
  29. {wcgw-1.4.0 → wcgw-1.5.0}/gpt_action_json_schema.json +0 -0
  30. {wcgw-1.4.0 → wcgw-1.5.0}/src/__init__.py +0 -0
  31. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/__init__.py +0 -0
  32. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/client/__init__.py +0 -0
  33. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/client/cli.py +0 -0
  34. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/client/common.py +0 -0
  35. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/client/diff-instructions.txt +0 -0
  36. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/client/mcp_server/__init__.py +0 -0
  37. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/client/openai_utils.py +0 -0
  38. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/relay/serve.py +0 -0
  39. {wcgw-1.4.0 → wcgw-1.5.0}/src/wcgw/relay/static/privacy.txt +0 -0
  40. {wcgw-1.4.0 → wcgw-1.5.0}/static/rocket-icon.png +0 -0
  41. {wcgw-1.4.0 → wcgw-1.5.0}/static/ss1.png +0 -0
  42. {wcgw-1.4.0 → wcgw-1.5.0}/tests/test_basic.py +0 -0
  43. {wcgw-1.4.0 → wcgw-1.5.0}/tests/test_tools.py +0 -0
@@ -0,0 +1,30 @@
1
+ name: Python Test
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ branches:
9
+ - main
10
+
11
+ jobs:
12
+ test:
13
+ runs-on: ubuntu-latest
14
+ strategy:
15
+ matrix:
16
+ python-version: ["3.11", "3.12"]
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+ - name: Set up Python
20
+ uses: actions/setup-python@v3
21
+ with:
22
+ python-version: "${{ matrix.python-version }}"
23
+ - name: Install dependencies
24
+ run: |
25
+ python -m pip install --upgrade pip
26
+ pip install build
27
+ pip install .[dev] # Installs dependencies based on pyproject.toml
28
+ - name: Run tests
29
+ run: |
30
+ python -m unittest discover -s tests
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: wcgw
3
- Version: 1.4.0
3
+ Version: 1.5.0
4
4
  Summary: What could go wrong giving full shell access to chatgpt?
5
5
  Project-URL: Homepage, https://github.com/rusiaaman/wcgw
6
6
  Author-email: Aman Rusia <gapypi@arcfu.com>
7
- Requires-Python: <3.13,>=3.10
7
+ Requires-Python: <3.13,>=3.11
8
8
  Requires-Dist: anthropic>=0.39.0
9
9
  Requires-Dist: fastapi>=0.115.0
10
10
  Requires-Dist: mcp>=1.0.0
@@ -27,82 +27,124 @@ Requires-Dist: uvicorn>=0.31.0
27
27
  Requires-Dist: websockets>=13.1
28
28
  Description-Content-Type: text/markdown
29
29
 
30
- # Enable shell access on chatgpt.com
31
- A custom gpt on chatgpt web app to interact with your local shell.
30
+ # Shell and Coding agent on Chatgpt and Claude desktop apps
31
+
32
+ A custom gpt on chatgpt web/desktop apps to interact with your local shell, edit files, run code, etc.
32
33
 
33
34
  [![Tests](https://github.com/rusiaaman/wcgw/actions/workflows/python-tests.yml/badge.svg?branch=main)](https://github.com/rusiaaman/wcgw/actions/workflows/python-tests.yml)
34
35
  [![Build](https://github.com/rusiaaman/wcgw/actions/workflows/python-publish.yml/badge.svg)](https://github.com/rusiaaman/wcgw/actions/workflows/python-publish.yml)
35
36
 
37
+ [New feature] [26-Nov-2024] Claude desktop support for shell, computer-control, coding agent.
38
+ [src/wcgw/client/mcp_server/Readme.md](src/wcgw/client/mcp_server/Readme.md)
39
+
36
40
  ### 🚀 Highlights
41
+
37
42
  - ⚡ **Full Shell Access**: No restrictions, complete control.
38
43
  - ⚡ **Create, Execute, Iterate**: Ask the gpt to keep running compiler checks till all errors are fixed, or ask it to keep checking for the status of a long running command till it's done.
39
- - ⚡ **Interactive Command Handling**: Supports interactive commands using arrow keys, interrupt, and ansi escape sequences.
44
+ - ⚡ **Interactive Command Handling**: Supports interactive commands using arrow keys, interrupt, and ansi escape sequences.
40
45
  - ⚡ **REPL support**: [beta] Supports python/node and other REPL execution.
41
46
 
42
- ### 🪜 Steps:
47
+ ## Claude
48
+ Full readme [src/wcgw/client/mcp_server/Readme.md](src/wcgw/client/mcp_server/Readme.md)
49
+ ### Setup
50
+
51
+ Update `claude_desktop_config.json`
52
+
53
+ ```json
54
+ {
55
+ "mcpServers": {
56
+ "wcgw": {
57
+ "command": "uvx",
58
+ "args": ["--from", "wcgw@latest", "wcgw_mcp"]
59
+ }
60
+ }
61
+ }
62
+ ```
63
+
64
+ Then restart claude app.
65
+ You can then ask claude to execute shell commands, read files, edit files, run your code, etc.
66
+
67
+ ## ChatGPT
68
+
69
+ ### 🪜 Steps:
70
+
43
71
  1. Run the [cli client](https://github.com/rusiaaman/wcgw?tab=readme-ov-file#client) in any directory of choice.
44
72
  2. Share the generated id with this GPT: `https://chatgpt.com/g/g-Us0AAXkRh-wcgw-giving-shell-access`
45
73
  3. The custom GPT can now run any command on your cli
46
74
 
75
+ ### Client
47
76
 
48
- ## Client
49
77
  You need to keep running this client for GPT to access your shell. Run it in a version controlled project's root.
50
78
 
51
- ### Option 1: using uv [Recommended]
79
+ #### Option 1: using uv [Recommended]
80
+
52
81
  ```sh
53
82
  $ curl -LsSf https://astral.sh/uv/install.sh | sh
54
83
  $ uvx wcgw@latest
55
84
  ```
56
85
 
57
- ### Option 2: using pip
86
+ #### Option 2: using pip
87
+
58
88
  Supports python >=3.10 and <3.13
89
+
59
90
  ```sh
60
91
  $ pip3 install wcgw
61
92
  $ wcgw
62
93
  ```
63
94
 
64
-
65
95
  This will print a UUID that you need to share with the gpt.
66
96
 
97
+ ### Chat
67
98
 
68
- ## Chat
69
99
  Open the following link or search the "wcgw" custom gpt using "Explore GPTs" on chatgpt.com
70
100
 
71
101
  https://chatgpt.com/g/g-Us0AAXkRh-wcgw-giving-shell-access
72
102
 
73
103
  Finally, let the chatgpt know your user id in any format. E.g., "user_id=<your uuid>" followed by rest of your instructions.
74
104
 
75
- NOTE: you can resume a broken connection
105
+ NOTE: you can resume a broken connection
76
106
  `wcgw --client-uuid $previous_uuid`
77
107
 
78
- # How it works
108
+ ### How it works on chatgpt app?
109
+
79
110
  Your commands are relayed through a server to the terminal client. [You could host the server on your own](https://github.com/rusiaaman/wcgw?tab=readme-ov-file#creating-your-own-custom-gpt-and-the-relay-server). For public convenience I've hosted one at https://wcgw.arcfu.com thanks to the gcloud free tier plan.
80
111
 
81
112
  Chatgpt sends a request to the relay server using the user id that you share with it. The relay server holds a websocket with the terminal client against the user id and acts as a proxy to pass the request.
82
113
 
83
- It's secure in both the directions. Either a malicious actor or a malicious Chatgpt has to correctly guess your UUID for any security breach.
114
+ It's secure in both the directions. Either a malicious actor or a malicious Chatgpt has to correctly guess your UUID for any security breach.
84
115
 
85
116
  # Showcase
86
117
 
87
- ## Unit tests and github actions
118
+ ## Claude desktop
119
+
120
+ ### Resize image and move it to a new dir
121
+
122
+ ![example](https://github.com/rusiaaman/wcgw/blob/main/static/example.jpg?raw=true)
123
+
124
+ ## Chatgpt app
125
+
126
+ ### Unit tests and github actions
127
+
88
128
  [The first version of unit tests and github workflow to test on multiple python versions were written by the custom chatgpt](https://chatgpt.com/share/6717f922-8998-8005-b825-45d4b348b4dd)
89
129
 
90
- ## Create a todo app using react + typescript + vite
91
- ![Screenshot](https://github.com/rusiaaman/wcgw/blob/main/static/ss1.png?raw=true)
130
+ ### Create a todo app using react + typescript + vite
92
131
 
132
+ ![Screenshot](https://github.com/rusiaaman/wcgw/blob/main/static/ss1.png?raw=true)
93
133
 
94
134
  # Privacy
135
+
95
136
  The relay server doesn't store any data. I can't access any information passing through it and only secure channels are used to communicate.
96
137
 
97
138
  You may host the server on your own and create a custom gpt using the following section.
98
139
 
99
140
  # Creating your own custom gpt and the relay server.
141
+
100
142
  I've used the following instructions and action json schema to create the custom GPT. (Replace wcgw.arcfu.com with the address to your server)
101
143
 
102
144
  https://github.com/rusiaaman/wcgw/blob/main/gpt_instructions.txt
103
145
  https://github.com/rusiaaman/wcgw/blob/main/gpt_action_json_schema.json
104
146
 
105
- Run the server
147
+ Run the server
106
148
  `gunicorn --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:443 src.wcgw.relay.serve:app --certfile fullchain.pem --keyfile privkey.pem`
107
149
 
108
150
  If you don't have public ip and domain name, you can use `ngrok` or similar services to get a https address to the api.
@@ -110,19 +152,24 @@ If you don't have public ip and domain name, you can use `ngrok` or similar serv
110
152
  The specify the server url in the `wcgw` command like so
111
153
  `wcgw --server-url https://your-url/v1/register`
112
154
 
113
- # Claude Support
114
- WCGW now supports Claude Desktop through the MCP protocol, allowing you to use Claude's capabilities directly from your desktop environment. This integration enables seamless interaction between Claude and your local shell.
155
+ # [Optional] Local shell access with openai API key or anthropic API key
115
156
 
116
- # [Optional] Local shell access with openai API key
157
+ ## Openai
117
158
 
118
159
  Add `OPENAI_API_KEY` and `OPENAI_ORG_ID` env variables.
119
160
 
120
- Clone the repo and run to install `wcgw_local` command
161
+ Then run
162
+
163
+ `uvx --from wcgw@latest wcgw_local --limit 0.1` # Cost limit $0.1
164
+
165
+ You can now directly write messages or press enter key to open vim for multiline message and text pasting.
166
+
167
+ ## Anthropic
121
168
 
122
- `pip install .`
169
+ Add `ANTHROPIC_API_KEY` env variable.
123
170
 
124
- Then run
171
+ Then run
125
172
 
126
- `wcgw_local --limit 0.1` # Cost limit $0.1
173
+ `uvx --from wcgw@latest wcgw_local --claude`
127
174
 
128
175
  You can now directly write messages or press enter key to open vim for multiline message and text pasting.
@@ -1,79 +1,121 @@
1
- # Enable shell access on chatgpt.com
2
- A custom gpt on chatgpt web app to interact with your local shell.
1
+ # Shell and Coding agent on Chatgpt and Claude desktop apps
2
+
3
+ A custom gpt on chatgpt web/desktop apps to interact with your local shell, edit files, run code, etc.
3
4
 
4
5
  [![Tests](https://github.com/rusiaaman/wcgw/actions/workflows/python-tests.yml/badge.svg?branch=main)](https://github.com/rusiaaman/wcgw/actions/workflows/python-tests.yml)
5
6
  [![Build](https://github.com/rusiaaman/wcgw/actions/workflows/python-publish.yml/badge.svg)](https://github.com/rusiaaman/wcgw/actions/workflows/python-publish.yml)
6
7
 
8
+ [New feature] [26-Nov-2024] Claude desktop support for shell, computer-control, coding agent.
9
+ [src/wcgw/client/mcp_server/Readme.md](src/wcgw/client/mcp_server/Readme.md)
10
+
7
11
  ### 🚀 Highlights
12
+
8
13
  - ⚡ **Full Shell Access**: No restrictions, complete control.
9
14
  - ⚡ **Create, Execute, Iterate**: Ask the gpt to keep running compiler checks till all errors are fixed, or ask it to keep checking for the status of a long running command till it's done.
10
- - ⚡ **Interactive Command Handling**: Supports interactive commands using arrow keys, interrupt, and ansi escape sequences.
15
+ - ⚡ **Interactive Command Handling**: Supports interactive commands using arrow keys, interrupt, and ansi escape sequences.
11
16
  - ⚡ **REPL support**: [beta] Supports python/node and other REPL execution.
12
17
 
13
- ### 🪜 Steps:
18
+ ## Claude
19
+ Full readme [src/wcgw/client/mcp_server/Readme.md](src/wcgw/client/mcp_server/Readme.md)
20
+ ### Setup
21
+
22
+ Update `claude_desktop_config.json`
23
+
24
+ ```json
25
+ {
26
+ "mcpServers": {
27
+ "wcgw": {
28
+ "command": "uvx",
29
+ "args": ["--from", "wcgw@latest", "wcgw_mcp"]
30
+ }
31
+ }
32
+ }
33
+ ```
34
+
35
+ Then restart claude app.
36
+ You can then ask claude to execute shell commands, read files, edit files, run your code, etc.
37
+
38
+ ## ChatGPT
39
+
40
+ ### 🪜 Steps:
41
+
14
42
  1. Run the [cli client](https://github.com/rusiaaman/wcgw?tab=readme-ov-file#client) in any directory of choice.
15
43
  2. Share the generated id with this GPT: `https://chatgpt.com/g/g-Us0AAXkRh-wcgw-giving-shell-access`
16
44
  3. The custom GPT can now run any command on your cli
17
45
 
46
+ ### Client
18
47
 
19
- ## Client
20
48
  You need to keep running this client for GPT to access your shell. Run it in a version controlled project's root.
21
49
 
22
- ### Option 1: using uv [Recommended]
50
+ #### Option 1: using uv [Recommended]
51
+
23
52
  ```sh
24
53
  $ curl -LsSf https://astral.sh/uv/install.sh | sh
25
54
  $ uvx wcgw@latest
26
55
  ```
27
56
 
28
- ### Option 2: using pip
57
+ #### Option 2: using pip
58
+
29
59
  Supports python >=3.10 and <3.13
60
+
30
61
  ```sh
31
62
  $ pip3 install wcgw
32
63
  $ wcgw
33
64
  ```
34
65
 
35
-
36
66
  This will print a UUID that you need to share with the gpt.
37
67
 
68
+ ### Chat
38
69
 
39
- ## Chat
40
70
  Open the following link or search the "wcgw" custom gpt using "Explore GPTs" on chatgpt.com
41
71
 
42
72
  https://chatgpt.com/g/g-Us0AAXkRh-wcgw-giving-shell-access
43
73
 
44
74
  Finally, let the chatgpt know your user id in any format. E.g., "user_id=<your uuid>" followed by rest of your instructions.
45
75
 
46
- NOTE: you can resume a broken connection
76
+ NOTE: you can resume a broken connection
47
77
  `wcgw --client-uuid $previous_uuid`
48
78
 
49
- # How it works
79
+ ### How it works on chatgpt app?
80
+
50
81
  Your commands are relayed through a server to the terminal client. [You could host the server on your own](https://github.com/rusiaaman/wcgw?tab=readme-ov-file#creating-your-own-custom-gpt-and-the-relay-server). For public convenience I've hosted one at https://wcgw.arcfu.com thanks to the gcloud free tier plan.
51
82
 
52
83
  Chatgpt sends a request to the relay server using the user id that you share with it. The relay server holds a websocket with the terminal client against the user id and acts as a proxy to pass the request.
53
84
 
54
- It's secure in both the directions. Either a malicious actor or a malicious Chatgpt has to correctly guess your UUID for any security breach.
85
+ It's secure in both the directions. Either a malicious actor or a malicious Chatgpt has to correctly guess your UUID for any security breach.
55
86
 
56
87
  # Showcase
57
88
 
58
- ## Unit tests and github actions
89
+ ## Claude desktop
90
+
91
+ ### Resize image and move it to a new dir
92
+
93
+ ![example](https://github.com/rusiaaman/wcgw/blob/main/static/example.jpg?raw=true)
94
+
95
+ ## Chatgpt app
96
+
97
+ ### Unit tests and github actions
98
+
59
99
  [The first version of unit tests and github workflow to test on multiple python versions were written by the custom chatgpt](https://chatgpt.com/share/6717f922-8998-8005-b825-45d4b348b4dd)
60
100
 
61
- ## Create a todo app using react + typescript + vite
62
- ![Screenshot](https://github.com/rusiaaman/wcgw/blob/main/static/ss1.png?raw=true)
101
+ ### Create a todo app using react + typescript + vite
63
102
 
103
+ ![Screenshot](https://github.com/rusiaaman/wcgw/blob/main/static/ss1.png?raw=true)
64
104
 
65
105
  # Privacy
106
+
66
107
  The relay server doesn't store any data. I can't access any information passing through it and only secure channels are used to communicate.
67
108
 
68
109
  You may host the server on your own and create a custom gpt using the following section.
69
110
 
70
111
  # Creating your own custom gpt and the relay server.
112
+
71
113
  I've used the following instructions and action json schema to create the custom GPT. (Replace wcgw.arcfu.com with the address to your server)
72
114
 
73
115
  https://github.com/rusiaaman/wcgw/blob/main/gpt_instructions.txt
74
116
  https://github.com/rusiaaman/wcgw/blob/main/gpt_action_json_schema.json
75
117
 
76
- Run the server
118
+ Run the server
77
119
  `gunicorn --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:443 src.wcgw.relay.serve:app --certfile fullchain.pem --keyfile privkey.pem`
78
120
 
79
121
  If you don't have public ip and domain name, you can use `ngrok` or similar services to get a https address to the api.
@@ -81,19 +123,24 @@ If you don't have public ip and domain name, you can use `ngrok` or similar serv
81
123
  The specify the server url in the `wcgw` command like so
82
124
  `wcgw --server-url https://your-url/v1/register`
83
125
 
84
- # Claude Support
85
- WCGW now supports Claude Desktop through the MCP protocol, allowing you to use Claude's capabilities directly from your desktop environment. This integration enables seamless interaction between Claude and your local shell.
126
+ # [Optional] Local shell access with openai API key or anthropic API key
86
127
 
87
- # [Optional] Local shell access with openai API key
128
+ ## Openai
88
129
 
89
130
  Add `OPENAI_API_KEY` and `OPENAI_ORG_ID` env variables.
90
131
 
91
- Clone the repo and run to install `wcgw_local` command
132
+ Then run
133
+
134
+ `uvx --from wcgw@latest wcgw_local --limit 0.1` # Cost limit $0.1
135
+
136
+ You can now directly write messages or press enter key to open vim for multiline message and text pasting.
137
+
138
+ ## Anthropic
92
139
 
93
- `pip install .`
140
+ Add `ANTHROPIC_API_KEY` env variable.
94
141
 
95
- Then run
142
+ Then run
96
143
 
97
- `wcgw_local --limit 0.1` # Cost limit $0.1
144
+ `uvx --from wcgw@latest wcgw_local --claude`
98
145
 
99
146
  You can now directly write messages or press enter key to open vim for multiline message and text pasting.
@@ -17,6 +17,7 @@ Instructions for `BashCommand`:
17
17
  - Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands.
18
18
  - The first line might be `(...truncated)` if the output is too long.
19
19
  - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished.
20
+ - Run long running commands in background using screen instead of "&".
20
21
 
21
22
  Instructions for `Read File`
22
23
  - Read full content of a file.
@@ -24,7 +25,6 @@ Instructions for `Read File`
24
25
 
25
26
  Instructions for `Create File New`
26
27
  - Write content to a new file. Provide file path and content. Use this instead of BashCommand for writing new files.
27
- - This doesn't create any directories, please create directories using `mkdir -p` BashCommand.
28
28
  - Provide absolute file path only.
29
29
  - For editing existing files, use FileEdit.
30
30
 
@@ -1,10 +1,10 @@
1
1
  [project]
2
2
  authors = [{ name = "Aman Rusia", email = "gapypi@arcfu.com" }]
3
3
  name = "wcgw"
4
- version = "1.4.0"
4
+ version = "1.5.0"
5
5
  description = "What could go wrong giving full shell access to chatgpt?"
6
6
  readme = "README.md"
7
- requires-python = ">=3.10, <3.13"
7
+ requires-python = ">=3.11, <3.13"
8
8
  dependencies = [
9
9
  "openai>=1.46.0",
10
10
  "mypy>=1.11.2",
@@ -0,0 +1,3 @@
1
+ from .cli import app
2
+
3
+ app()
@@ -27,14 +27,19 @@ from ..types_ import (
27
27
  CreateFileNew,
28
28
  FileEditFindReplace,
29
29
  FileEdit,
30
+ Keyboard,
31
+ Mouse,
30
32
  ReadFile,
31
33
  ReadImage,
32
34
  ResetShell,
35
+ ScreenShot,
36
+ GetScreenInfo,
33
37
  )
34
38
 
35
39
  from .common import Models, discard_input
36
40
  from .common import CostData
37
41
  from .tools import ImageData
42
+ from .computer_use import Computer
38
43
 
39
44
  from .tools import (
40
45
  DoneFlag,
@@ -165,6 +170,7 @@ def loop(
165
170
  - The first line might be `(...truncated)` if the output is too long.
166
171
  - Always run `pwd` if you get any file or directory not found error to make sure you're not lost.
167
172
  - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished.
173
+ - Run long running commands in background using screen instead of "&".
168
174
  """,
169
175
  ),
170
176
  ToolParam(
@@ -191,7 +197,6 @@ def loop(
191
197
  name="CreateFileNew",
192
198
  description="""
193
199
  - Write content to a new file. Provide file path and content. Use this instead of BashCommand for writing new files.
194
- - This doesn't create any directories, please create directories using `mkdir -p` BashCommand.
195
200
  - Provide absolute file path only.
196
201
  - For editing existing files, use FileEdit instead of this tool.
197
202
  """,
@@ -204,7 +209,7 @@ def loop(
204
209
  ToolParam(
205
210
  input_schema=ResetShell.model_json_schema(),
206
211
  name="ResetShell",
207
- description="Resets the shell. Use only if all interrupts and prompt reset attempts have failed repeatedly.",
212
+ description="Resets the shell. Use only if all interrupts and prompt reset attempts have failed repeatedly.\nAlso exits the docker environment.\nYou need to call GetScreenInfo again",
208
213
  ),
209
214
  ToolParam(
210
215
  input_schema=FileEdit.model_json_schema(),
@@ -212,6 +217,46 @@ def loop(
212
217
  description="""
213
218
  - Use absolute file path only.
214
219
  - Use SEARCH/REPLACE blocks to edit the file.
220
+ """,
221
+ ),
222
+ ToolParam(
223
+ input_schema=GetScreenInfo.model_json_schema(),
224
+ name="GetScreenInfo",
225
+ description="""
226
+ - Get display information of an OS running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
227
+ - If user hasn't provided docker image id, check using `docker ps` and provide the id.
228
+ - Important: call this first in the conversation before ScreenShot, Mouse, and Keyboard tools.
229
+ - Connects shell to the docker environment.
230
+ - Note: once this is called, the shell enters the docker environment. All bash commands will run over there.
231
+ """,
232
+ ),
233
+ ToolParam(
234
+ input_schema=ScreenShot.model_json_schema(),
235
+ name="ScreenShot",
236
+ description="""
237
+ - Capture screenshot of an OS running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
238
+ - If user hasn't provided docker image id, check using `docker ps` and provide the id.
239
+ - Capture ScreenShot of the current screen for automation.
240
+ """,
241
+ ),
242
+ ToolParam(
243
+ input_schema=Mouse.model_json_schema(),
244
+ name="Mouse",
245
+ description="""
246
+ - Interact with docker container running image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
247
+ - If user hasn't provided docker image id, check using `docker ps` and provide the id.
248
+ - Interact with the screen using mouse
249
+ """,
250
+ ),
251
+ ToolParam(
252
+ input_schema=Keyboard.model_json_schema(),
253
+ name="Keyboard",
254
+ description="""
255
+ - Interact with docker container running image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
256
+ - If user hasn't provided docker image id, check using `docker ps` and provide the id.
257
+ - Emulate keyboard input to the screen
258
+ - Uses xdootool to send keyboard input, keys like Return, BackSpace, Escape, Page_Up, etc. can be used.
259
+ - Do not use it to interact with Bash tool.
215
260
  """,
216
261
  ),
217
262
  ]
@@ -357,7 +402,7 @@ System information:
357
402
  }
358
403
  )
359
404
  try:
360
- output_or_done, _ = get_tool_output(
405
+ output_or_dones, _ = get_tool_output(
361
406
  tool_parsed,
362
407
  enc,
363
408
  limit - cost,
@@ -365,45 +410,46 @@ System information:
365
410
  max_tokens=8000,
366
411
  )
367
412
  except Exception as e:
368
- output_or_done = (
369
- f"GOT EXCEPTION while calling tool. Error: {e}"
370
- )
413
+ output_or_dones = [
414
+ (f"GOT EXCEPTION while calling tool. Error: {e}")
415
+ ]
371
416
  tb = traceback.format_exc()
372
- error_console.print(output_or_done + "\n" + tb)
373
-
374
- if isinstance(output_or_done, DoneFlag):
375
- system_console.print(
376
- f"\n# Task marked done, with output {output_or_done.task_output}",
377
- )
378
- return output_or_done.task_output, cost
379
-
380
- output = output_or_done
381
- if isinstance(output, ImageData):
382
- tool_results.append(
383
- ToolResultBlockParam(
384
- type="tool_result",
385
- tool_use_id=tc["id"],
386
- content=[
387
- {
388
- "type": "image",
389
- "source": {
390
- "type": "base64",
391
- "media_type": output.media_type,
392
- "data": output.data,
393
- },
394
- }
395
- ],
417
+ error_console.print(str(output_or_dones) + "\n" + tb)
418
+
419
+ if any(isinstance(x, DoneFlag) for x in output_or_dones):
420
+ return "", cost
421
+
422
+ tool_results_content: list[
423
+ TextBlockParam | ImageBlockParam
424
+ ] = []
425
+ for output in output_or_dones:
426
+ assert not isinstance(output, DoneFlag)
427
+ if isinstance(output, ImageData):
428
+ tool_results_content.append(
429
+ {
430
+ "type": "image",
431
+ "source": {
432
+ "type": "base64",
433
+ "media_type": output.media_type,
434
+ "data": output.data,
435
+ },
436
+ }
396
437
  )
397
- )
398
438
 
399
- else:
400
- tool_results.append(
401
- ToolResultBlockParam(
402
- type="tool_result",
403
- tool_use_id=tc["id"],
404
- content=output,
439
+ else:
440
+ tool_results_content.append(
441
+ {
442
+ "type": "text",
443
+ "text": output,
444
+ },
405
445
  )
446
+ tool_results.append(
447
+ ToolResultBlockParam(
448
+ type="tool_result",
449
+ tool_use_id=tc["id"],
450
+ content=tool_results_content,
406
451
  )
452
+ )
407
453
  else:
408
454
  _histories.append(
409
455
  {"role": "assistant", "content": full_response}