wcgw 2.8.6__py3-none-any.whl → 2.8.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

@@ -318,7 +318,8 @@ async def main(computer_use: bool) -> None:
318
318
  if computer_use:
319
319
  COMPUTER_USE_ON_DOCKER_ENABLED = True
320
320
 
321
- version = importlib.metadata.version("wcgw")
321
+ version = str(importlib.metadata.version("wcgw"))
322
+ tools.console.log("wcgw version: " + version)
322
323
  # Run the server using stdin/stdout streams
323
324
  async with mcp_wcgw.server.stdio.stdio_server() as (read_stream, write_stream):
324
325
  await server.run(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wcgw
3
- Version: 2.8.6
3
+ Version: 2.8.7
4
4
  Summary: Shell and coding agent on claude and chatgpt
5
5
  Project-URL: Homepage, https://github.com/rusiaaman/wcgw
6
6
  Author-email: Aman Rusia <gapypi@arcfu.com>
@@ -28,12 +28,12 @@ Requires-Dist: websockets>=13.1
28
28
  Description-Content-Type: text/markdown
29
29
 
30
30
  # Shell and Coding agent for Claude and Chatgpt
31
+
31
32
  Empowering chat applications to code, build and run on your local machine.
32
33
 
33
34
  - Claude - An MCP server on claude desktop for autonomous shell and coding agent. (mac only)
34
35
  - Chatgpt - Allows custom gpt to talk to your shell via a relay server. (linux or mac)
35
36
 
36
-
37
37
  ⚠️ Warning: do not allow BashCommand tool without reviewing the command, it may result in data loss.
38
38
 
39
39
  [![Tests](https://github.com/rusiaaman/wcgw/actions/workflows/python-tests.yml/badge.svg?branch=main)](https://github.com/rusiaaman/wcgw/actions/workflows/python-tests.yml)
@@ -43,13 +43,14 @@ Empowering chat applications to code, build and run on your local machine.
43
43
  [![smithery badge](https://smithery.ai/badge/wcgw)](https://smithery.ai/server/wcgw)
44
44
 
45
45
  ## Updates
46
- - [15 Jan 2025] Modes introduced: architect, code-writer, and all powerful wcgw mode.
46
+
47
+ - [15 Jan 2025] Modes introduced: architect, code-writer, and all powerful wcgw mode.
47
48
 
48
49
  - [8 Jan 2025] Context saving tool for saving relevant file paths along with a description in a single file. Can be used as a task checkpoint or for knowledge transfer.
49
50
 
50
51
  - [29 Dec 2024] Syntax checking on file writing and edits is now stable. Made `initialize` tool call useful; sending smart repo structure to claude if any repo is referenced. Large file handling is also now improved.
51
52
 
52
- - [9 Dec 2024] [Vscode extension to paste context on Claude app](https://marketplace.visualstudio.com/items?itemName=AmanRusia.wcgw)
53
+ - [9 Dec 2024] [Vscode extension to paste context on Claude app](https://marketplace.visualstudio.com/items?itemName=AmanRusia.wcgw)
53
54
 
54
55
  - [01 Dec 2024] Removed author hosted relay server for chatgpt.
55
56
 
@@ -61,19 +62,19 @@ Empowering chat applications to code, build and run on your local machine.
61
62
  - ⚡ **Large file edit**: Supports large file incremental edits to avoid token limit issues. Faster than full file write.
62
63
  - ⚡ **Syntax checking on edits**: Reports feedback to the LLM if its edits have any syntax errors, so that it can redo it.
63
64
  - ⚡ **Interactive Command Handling**: Supports interactive commands using arrow keys, interrupt, and ansi escape sequences.
64
- - ⚡ **File protections**:
65
- - The AI needs to read a file at least once before it's allowed to edit or rewrite it. This avoids accidental overwrites.
66
- - Avoids context filling up while reading very large files. Files get chunked based on token length.
65
+ - ⚡ **File protections**:
66
+ - The AI needs to read a file at least once before it's allowed to edit or rewrite it. This avoids accidental overwrites.
67
+ - Avoids context filling up while reading very large files. Files get chunked based on token length.
67
68
  - On initialisation the provided workspace's directory structure is returned after selecting important files (based on .gitignore as well as a statistical approach)
68
69
  - File edit based on search-replace tries to find correct search block if it has multiple matches based on previous search blocks. Fails otherwise (for correctness).
69
70
  - File edit has spacing tolerant matching, with warning on issues like indentation mismatch. If there's no match, the closest match is returned to the AI to fix its mistakes.
70
71
  - Using Aider-like search and replace, which has better performance than tool call based search and replace.
71
- - ⚡ **Shell optimizations**:
72
+ - ⚡ **Shell optimizations**:
72
73
  - Only one command is allowed to be run at a time, simplifying management and avoiding rogue processes. There's only single shell instance at any point of time.
73
- - Current working directory is always returned after any shell command to prevent AI from getting lost.
74
+ - Current working directory is always returned after any shell command to prevent AI from getting lost.
74
75
  - Command polling exits after a quick timeout to avoid slow feedback. However, status checking has wait tolerance based on fresh output streaming from a command. Both of these approach combined provides a good shell interaction experience.
75
76
  - ⚡ **Saving repo context in a single file**: Task checkpointing using "ContextSave" tool saves detailed context in a single file. Tasks can later be resumed in a new chat asking "Resume `task id`". The saved file can be used to do other kinds of knowledge transfer, such as taking help from another AI.
76
- - ⚡ **Easily switch between various modes**:
77
+ - ⚡ **Easily switch between various modes**:
77
78
  - Ask it to run in 'architect' mode for planning. Inspired by adier's architect mode, work with Claude to come up with a plan first. Leads to better accuracy and prevents premature file editing.
78
79
  - Ask it to run in 'code-writer' mode for code editing and project building. You can provide specific paths with wild card support to prevent other files getting edited.
79
80
  - By default it runs in 'wcgw' mode that has no restrictions and full authorisation.
@@ -151,25 +152,29 @@ over here
151
152
  Then ask claude to execute shell commands, read files, edit files, run your code, etc.
152
153
 
153
154
  #### Task checkpoint or knowledge transfer
155
+
154
156
  - You can do a task checkpoint or a knowledge transfer by attaching "KnowledgeTransfer" prompt using "Attach from MCP" button.
155
157
  - On running "KnowledgeTransfer" prompt, the "ContextSave" tool will be called saving the task description and all file content together in a single file. An id for the task will be generated.
156
158
  - You can in a new chat say "Resume '<task id>'", the AI should then call "Initialize" with the task id and load the context from there.
157
159
  - Or you can directly open the file generated and share it with another AI for help.
158
160
 
159
161
  #### Modes
162
+
160
163
  There are three built-in modes. You may ask Claude to run in one of the modes, like "Use 'architect' mode"
161
- | **Mode** | **Description** | **Allows** | **Denies** | **Invoke prompt** |
164
+ | **Mode** | **Description** | **Allows** | **Denies** | **Invoke prompt** |
162
165
  |-----------------|-----------------------------------------------------------------------------|---------------------------------------------------------|----------------------------------------------|----------------------------------------------------------------------------------------------------|
163
- | **Architect** | Designed for you to work with Claude to investigate and understand your repo. | Read-only commands | FileEdit and Write tool | Run in mode='architect' |
164
- | **Code-writer** | For code writing and development | Specified path globs for editing or writing, specified commands | FileEdit for paths not matching specified glob, Write for paths not matching specified glob | Run in code writer mode, only 'tests/**' allowed, only uv command allowed |
165
- | **wcgw** | Default mode with everything allowed | Everything | Nothing | No prompt, or "Run in wcgw mode" |
166
+ | **Architect** | Designed for you to work with Claude to investigate and understand your repo. | Read-only commands | FileEdit and Write tool | Run in mode='architect' |
167
+ | **Code-writer** | For code writing and development | Specified path globs for editing or writing, specified commands | FileEdit for paths not matching specified glob, Write for paths not matching specified glob | Run in code writer mode, only 'tests/**' allowed, only uv command allowed |
168
+ | **wcgw\*\* | Default mode with everything allowed | Everything | Nothing | No prompt, or "Run in wcgw mode" |
166
169
 
167
170
  Note: in code-writer mode either all commands are allowed or none are allowed for now. If you give a list of allowed commands, Claude is instructed to run only those commands, but no actual check happens. (WIP)
168
171
 
169
- ### [Optional] Vs code extension
172
+ ### [Optional] Vs code extension
173
+
170
174
  https://marketplace.visualstudio.com/items?itemName=AmanRusia.wcgw
171
175
 
172
- Commands:
176
+ Commands:
177
+
173
178
  - Select a text and press `cmd+'` and then enter instructions. This will switch the app to Claude and paste a text containing your instructions, file path, workspace dir, and the selected text.
174
179
 
175
180
  ## Chatgpt Setup
@@ -201,3 +206,36 @@ Then run
201
206
  `uvx --from wcgw@latest wcgw_local --claude`
202
207
 
203
208
  You can now directly write messages or press enter key to open vim for multiline message and text pasting.
209
+
210
+ ## Tools
211
+
212
+ The server provides the following MCP tools:
213
+
214
+ **Shell Operations:**
215
+
216
+ - `Initialize`: Reset shell and set up workspace environment
217
+ - Parameters: `any_workspace_path` (string), `initial_files_to_read` (string[]), `mode_name` ("wcgw"|"architect"|"code_writer"), `task_id_to_resume` (string)
218
+ - `BashCommand`: Execute shell commands with timeout control
219
+ - Parameters: `command` (string), `wait_for_seconds` (int, optional)
220
+ - `BashInteraction`: Send keyboard input to running programs
221
+ - Parameters: `send_text` (string) or `send_specials` (["Enter"|"Key-up"|...]) or `send_ascii` (int[]), `wait_for_seconds` (int, optional)
222
+
223
+ **File Operations:**
224
+
225
+ - `ReadFiles`: Read content from one or more files
226
+ - Parameters: `file_paths` (string[])
227
+ - `WriteIfEmpty`: Create new files or write to empty files
228
+ - Parameters: `file_path` (string), `file_content` (string)
229
+ - `FileEdit`: Edit existing files using search/replace blocks
230
+ - Parameters: `file_path` (string), `file_edit_using_search_replace_blocks` (string)
231
+ - `ReadImage`: Read image files for display/processing
232
+ - Parameters: `file_path` (string)
233
+
234
+ **Project Management:**
235
+
236
+ - `ContextSave`: Save project context and files for Knowledge Transfer or saving task checkpoints to be resumed later
237
+ - Parameters: `id` (string), `project_root_path` (string), `description` (string), `relevant_file_globs` (string[])
238
+ - `ResetShell`: Emergency reset for shell environment
239
+ - Parameters: `should_reset` (boolean)
240
+
241
+ All tools support absolute paths and include built-in protections against common errors. See the [MCP specification](https://modelcontextprotocol.io/) for detailed protocol information.
@@ -12,7 +12,7 @@ wcgw/client/file_ops/diff_edit.py,sha256=OlJCpPSE_3T41q9H0yDORm6trjm3w6zh1EkuPTx
12
12
  wcgw/client/file_ops/search_replace.py,sha256=Napa7IWaYPGMNdttunKyRDkb90elZE7r23B_o_htRxo,5585
13
13
  wcgw/client/mcp_server/Readme.md,sha256=I8N4dHkTUVGNQ63BQkBMBhCCBTgqGOSF_pUR6iOEiUk,2495
14
14
  wcgw/client/mcp_server/__init__.py,sha256=hyPPwO9cabAJsOMWhKyat9yl7OlSmIobaoAZKHu3DMc,381
15
- wcgw/client/mcp_server/server.py,sha256=CBOS_DSDcBf5VlHQMJJbDL-wnn_DAPDvxX3dO8yodRo,13144
15
+ wcgw/client/mcp_server/server.py,sha256=zMVa2nR2hapQZAtR34POch3VLVEEnJ6SQ8iWYLDxJrU,13199
16
16
  wcgw/client/repo_ops/display_tree.py,sha256=5FD4hfMkM2cIZnXlu7WfJswJLthj0SkuHlkGH6dpWQU,4632
17
17
  wcgw/client/repo_ops/path_prob.py,sha256=SWf0CDn37rtlsYRQ51ufSxay-heaQoVIhr1alB9tZ4M,2144
18
18
  wcgw/client/repo_ops/paths_model.vocab,sha256=M1pXycYDQehMXtpp-qAgU7rtzeBbCOiJo4qcYFY0kqk,315087
@@ -20,6 +20,12 @@ wcgw/client/repo_ops/paths_tokens.model,sha256=jiwwE4ae8ADKuTZISutXuM5Wfyc_FBmN5
20
20
  wcgw/client/repo_ops/repo_context.py,sha256=5NqRxBY0K-SBFXJ0Ybt7llzYOBD8pRkTpruMMJHWxv4,4336
21
21
  wcgw/relay/serve.py,sha256=Z5EwtaCAtKFBSnUw4mPYw0sze3Coc4Fa8gObRRG_bT0,9525
22
22
  wcgw/relay/static/privacy.txt,sha256=s9qBdbx2SexCpC_z33sg16TptmAwDEehMCLz4L50JLc,529
23
+ wcgw_cli/__init__.py,sha256=TNxXsTPgb52OhakIda9wTRh91cqoBqgQRx5TxjzQQFU,21
24
+ wcgw_cli/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
25
+ wcgw_cli/anthropic_client.py,sha256=lZWEoX_qDOJIjzbG-EKxTjJyvTSw1Y5odtv7YUUIL7k,21054
26
+ wcgw_cli/cli.py,sha256=GEje9ZBIaD5_-HK3zxZCGYaeDF8bfFxImloOR3O66Fw,1019
27
+ wcgw_cli/openai_client.py,sha256=wp4XDf3t3W6XG5LHgr6bFckePyty24BGtsOEjOrIrk0,17955
28
+ wcgw_cli/openai_utils.py,sha256=xGOb3W5ALrIozV7oszfGYztpj0FnXdD7jAxm5lEIVKY,2439
23
29
  mcp_wcgw/__init__.py,sha256=fKCgOdN7cn7gR3YGFaGyV5Goe8A2sEyllLcsRkN0i-g,2601
24
30
  mcp_wcgw/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
31
  mcp_wcgw/types.py,sha256=Enq5vqOPaQdObK9OQuafdLuMxb5RsLQ3k_k613fK41k,30556
@@ -42,8 +48,8 @@ mcp_wcgw/shared/memory.py,sha256=dBsOghxHz8-tycdSVo9kSujbsC8xb_tYsGmuJobuZnw,281
42
48
  mcp_wcgw/shared/progress.py,sha256=ymxOsb8XO5Mhlop7fRfdbmvPodANj7oq6O4dD0iUcnw,1048
43
49
  mcp_wcgw/shared/session.py,sha256=e44a0LQOW8gwdLs9_DE9oDsxqW2U8mXG3d5KT95bn5o,10393
44
50
  mcp_wcgw/shared/version.py,sha256=d2LZii-mgsPIxpshjkXnOTUmk98i0DT4ff8VpA_kAvE,111
45
- wcgw-2.8.6.dist-info/METADATA,sha256=PnCOOeODRmn7ZBHmoQN7qjS63YXOc0Xd78i7OVSo07o,12019
46
- wcgw-2.8.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
47
- wcgw-2.8.6.dist-info/entry_points.txt,sha256=vd3tj1_Kzfp55LscJ8-6WFMM5hm9cWTfNGFCrWBnH3Q,124
48
- wcgw-2.8.6.dist-info/licenses/LICENSE,sha256=BvY8xqjOfc3X2qZpGpX3MZEmF-4Dp0LqgKBbT6L_8oI,11142
49
- wcgw-2.8.6.dist-info/RECORD,,
51
+ wcgw-2.8.7.dist-info/METADATA,sha256=xXsXNlWtzj7OgpCgknOCaiiX1qG0KkOc8IbxsLW2k1A,13053
52
+ wcgw-2.8.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
53
+ wcgw-2.8.7.dist-info/entry_points.txt,sha256=vd3tj1_Kzfp55LscJ8-6WFMM5hm9cWTfNGFCrWBnH3Q,124
54
+ wcgw-2.8.7.dist-info/licenses/LICENSE,sha256=BvY8xqjOfc3X2qZpGpX3MZEmF-4Dp0LqgKBbT6L_8oI,11142
55
+ wcgw-2.8.7.dist-info/RECORD,,
wcgw_cli/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .cli import app
wcgw_cli/__main__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .cli import app
2
+
3
+ app()
@@ -0,0 +1,518 @@
1
+ import base64
2
+ import json
3
+ import mimetypes
4
+ import os
5
+ import subprocess
6
+ import tempfile
7
+ import traceback
8
+ import uuid
9
+ from pathlib import Path
10
+ from typing import Literal, Optional, cast
11
+
12
+ import rich
13
+ from anthropic import Anthropic
14
+ from anthropic.types import (
15
+ ImageBlockParam,
16
+ MessageParam,
17
+ TextBlockParam,
18
+ ToolParam,
19
+ ToolResultBlockParam,
20
+ ToolUseBlockParam,
21
+ )
22
+ from dotenv import load_dotenv
23
+ from typer import Typer
24
+
25
+ from wcgw.client.common import discard_input
26
+ from wcgw.client.memory import load_memory
27
+ from wcgw.client.tools import (
28
+ DoneFlag,
29
+ ImageData,
30
+ default_enc,
31
+ get_tool_output,
32
+ initialize,
33
+ which_tool_name,
34
+ )
35
+ from wcgw.types_ import (
36
+ BashCommand,
37
+ BashInteraction,
38
+ ContextSave,
39
+ FileEdit,
40
+ GetScreenInfo,
41
+ Keyboard,
42
+ Mouse,
43
+ ReadFiles,
44
+ ReadImage,
45
+ ResetShell,
46
+ ScreenShot,
47
+ WriteIfEmpty,
48
+ )
49
+
50
+ History = list[MessageParam]
51
+
52
+
53
+ def text_from_editor(console: rich.console.Console) -> str:
54
+ # First consume all the input till now
55
+ discard_input()
56
+ console.print("\n---------------------------------------\n# User message")
57
+ data = input()
58
+ if data:
59
+ return data
60
+ editor = os.environ.get("EDITOR", "vim")
61
+ with tempfile.NamedTemporaryFile(suffix=".tmp") as tf:
62
+ subprocess.run([editor, tf.name], check=True)
63
+ with open(tf.name, "r") as f:
64
+ data = f.read()
65
+ console.print(data)
66
+ return data
67
+
68
+
69
+ def save_history(history: History, session_id: str) -> None:
70
+ myid = str(history[1]["content"]).replace("/", "_").replace(" ", "_").lower()[:60]
71
+ myid += "_" + session_id
72
+ myid = myid + ".json"
73
+
74
+ mypath = Path(".wcgw") / myid
75
+ mypath.parent.mkdir(parents=True, exist_ok=True)
76
+ with open(mypath, "w") as f:
77
+ json.dump(history, f, indent=3)
78
+
79
+
80
+ def parse_user_message_special(msg: str) -> MessageParam:
81
+ # Search for lines starting with `%` and treat them as special commands
82
+ parts: list[ImageBlockParam | TextBlockParam] = []
83
+ for line in msg.split("\n"):
84
+ if line.startswith("%"):
85
+ args = line[1:].strip().split(" ")
86
+ command = args[0]
87
+ assert command == "image"
88
+ image_path = " ".join(args[1:])
89
+ with open(image_path, "rb") as f:
90
+ image_bytes = f.read()
91
+ image_b64 = base64.b64encode(image_bytes).decode("utf-8")
92
+ image_type = mimetypes.guess_type(image_path)[0]
93
+ parts.append(
94
+ {
95
+ "type": "image",
96
+ "source": {
97
+ "type": "base64",
98
+ "media_type": cast(
99
+ 'Literal["image/jpeg", "image/png", "image/gif", "image/webp"]',
100
+ image_type or "image/png",
101
+ ),
102
+ "data": image_b64,
103
+ },
104
+ }
105
+ )
106
+ else:
107
+ if len(parts) > 0 and parts[-1]["type"] == "text":
108
+ parts[-1]["text"] += "\n" + line
109
+ else:
110
+ parts.append({"type": "text", "text": line})
111
+ return {"role": "user", "content": parts}
112
+
113
+
114
+ app = Typer(pretty_exceptions_show_locals=False)
115
+
116
+
117
+ @app.command()
118
+ def loop(
119
+ first_message: Optional[str] = None,
120
+ limit: Optional[float] = None,
121
+ resume: Optional[str] = None,
122
+ computer_use: bool = False,
123
+ ) -> tuple[str, float]:
124
+ load_dotenv()
125
+
126
+ session_id = str(uuid.uuid4())[:6]
127
+
128
+ history: History = []
129
+ waiting_for_assistant = False
130
+ memory = None
131
+ if resume:
132
+ try:
133
+ _, memory, _ = load_memory(
134
+ resume,
135
+ 8000,
136
+ lambda x: default_enc.encode(x).ids,
137
+ lambda x: default_enc.decode(x),
138
+ )
139
+ except OSError:
140
+ if resume == "latest":
141
+ resume_path = sorted(Path(".wcgw").iterdir(), key=os.path.getmtime)[-1]
142
+ else:
143
+ resume_path = Path(resume)
144
+ if not resume_path.exists():
145
+ raise FileNotFoundError(f"File {resume} not found")
146
+ with resume_path.open() as f:
147
+ history = json.load(f)
148
+ if len(history) <= 2:
149
+ raise ValueError("Invalid history file")
150
+ first_message = ""
151
+ waiting_for_assistant = history[-1]["role"] != "assistant"
152
+
153
+ limit = 1
154
+
155
+ tools = [
156
+ ToolParam(
157
+ input_schema=BashCommand.model_json_schema(),
158
+ name="BashCommand",
159
+ description="""
160
+ - Execute a bash command. This is stateful (beware with subsequent calls).
161
+ - Do not use interactive commands like nano. Prefer writing simpler commands.
162
+ - Status of the command and the current working directory will always be returned at the end.
163
+ - Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands.
164
+ - The first or the last line might be `(...truncated)` if the output is too long.
165
+ - Always run `pwd` if you get any file or directory not found error to make sure you're not lost.
166
+ - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished.
167
+ - Run long running commands in background using screen instead of "&".
168
+ - Use longer wait_for_seconds if the command is expected to run for a long time.
169
+ - Do not use 'cat' to read files, use ReadFiles tool instead.
170
+ """,
171
+ ),
172
+ ToolParam(
173
+ input_schema=BashInteraction.model_json_schema(),
174
+ name="BashInteraction",
175
+ description="""
176
+ - Interact with running program using this tool
177
+ - Special keys like arrows, interrupts, enter, etc.
178
+ - Send text input to the running program.
179
+ - Send send_specials=["Enter"] to recheck status of a running program.
180
+ - Only one of send_text, send_specials, send_ascii should be provided.
181
+ - This returns within 5 seconds, for heavy programs keep checking status for upto 10 turns before asking user to continue checking again.
182
+ - Programs don't hang easily, so most likely explanation for no output is usually that the program is still running, and you need to check status again using ["Enter"].
183
+ - Do not send Ctrl-c before checking for status till 10 minutes or whatever is appropriate for the program to finish.
184
+ - Set longer wait_for_seconds when program is expected to run for a long time.
185
+ """,
186
+ ),
187
+ ToolParam(
188
+ input_schema=ReadFiles.model_json_schema(),
189
+ name="ReadFiles",
190
+ description="""
191
+ - Read full file content of one or more files.
192
+ - Provide absolute file paths only
193
+ """,
194
+ ),
195
+ ToolParam(
196
+ input_schema=WriteIfEmpty.model_json_schema(),
197
+ name="WriteIfEmpty",
198
+ description="""
199
+ - Write content to an empty or non-existent file. Provide file path and content. Use this instead of BashCommand for writing new files.
200
+ - Provide absolute file path only.
201
+ - For editing existing files, use FileEdit instead of this tool.
202
+ """,
203
+ ),
204
+ ToolParam(
205
+ input_schema=ReadImage.model_json_schema(),
206
+ name="ReadImage",
207
+ description="Read an image from the shell.",
208
+ ),
209
+ ToolParam(
210
+ input_schema=ResetShell.model_json_schema(),
211
+ name="ResetShell",
212
+ description="Resets the shell. Use only if all interrupts and prompt reset attempts have failed repeatedly.\nAlso exits the docker environment.\nYou need to call GetScreenInfo again",
213
+ ),
214
+ ToolParam(
215
+ input_schema=FileEdit.model_json_schema(),
216
+ name="FileEdit",
217
+ description="""
218
+ - Use absolute file path only.
219
+ - Use SEARCH/REPLACE blocks to edit the file.
220
+ - If the edit fails due to block not matching, please retry with correct block till it matches. Re-read the file to ensure you've all the lines correct.
221
+ """,
222
+ ),
223
+ ToolParam(
224
+ input_schema=ContextSave.model_json_schema(),
225
+ name="ContextSave",
226
+ description="""
227
+ Saves provided description and file contents of all the relevant file paths or globs in a single text file.
228
+ - Provide random unqiue id or whatever user provided.
229
+ - Leave project path as empty string if no project path
230
+ """,
231
+ ),
232
+ ]
233
+
234
+ if computer_use:
235
+ tools += [
236
+ ToolParam(
237
+ input_schema=GetScreenInfo.model_json_schema(),
238
+ name="GetScreenInfo",
239
+ description="""
240
+ - Important: call this first in the conversation before ScreenShot, Mouse, and Keyboard tools.
241
+ - Get display information of a linux os running on docker using image "ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest"
242
+ - If user hasn't provided docker image id, check using `docker ps` and provide the id.
243
+ - If the docker is not running, run using `docker run -d -p 6080:6080 ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest`
244
+ - Connects shell to the docker environment.
245
+ - Note: once this is called, the shell enters the docker environment. All bash commands will run over there.
246
+ """,
247
+ ),
248
+ ToolParam(
249
+ input_schema=ScreenShot.model_json_schema(),
250
+ name="ScreenShot",
251
+ description="""
252
+ - Capture screenshot of the linux os on docker.
253
+ - All actions on UI using mouse and keyboard return within 0.5 seconds.
254
+ * So if you're doing something that takes longer for UI to update like heavy page loading, keep checking UI for update using ScreenShot upto 10 turns.
255
+ * Notice for smallest of the loading icons to check if your action worked.
256
+ * After 10 turns of no change, ask user for permission to keep checking.
257
+ * If you don't notice even slightest of the change, it's likely you clicked on the wrong place.
258
+
259
+ """,
260
+ ),
261
+ ToolParam(
262
+ input_schema=Mouse.model_json_schema(),
263
+ name="Mouse",
264
+ description="""
265
+ - Interact with the linux os on docker using mouse.
266
+ - Uses xdotool
267
+ - About left_click_drag: the current mouse position will be used as the starting point, click and drag to the given x, y coordinates. Useful in things like sliders, moving things around, etc.
268
+ - The output of this command has the screenshot after doing this action. Use this to verify if the action was successful.
269
+ """,
270
+ ),
271
+ ToolParam(
272
+ input_schema=Keyboard.model_json_schema(),
273
+ name="Keyboard",
274
+ description="""
275
+ - Interact with the linux os on docker using keyboard.
276
+ - Emulate keyboard input to the screen
277
+ - Uses xdootool to send keyboard input, keys like Return, BackSpace, Escape, Page_Up, etc. can be used.
278
+ - Do not use it to interact with Bash tool.
279
+ - Make sure you've selected a text area or an editable element before sending text.
280
+ - The output of this command has the screenshot after doing this action. Use this to verify if the action was successful.
281
+ """,
282
+ ),
283
+ ]
284
+
285
+ system = initialize(
286
+ os.getcwd(),
287
+ [],
288
+ resume if (memory and resume) else "",
289
+ max_tokens=8000,
290
+ mode="wcgw",
291
+ )
292
+
293
+ with open(
294
+ os.path.join(
295
+ os.path.dirname(__file__), "..", "wcgw", "client", "diff-instructions.txt"
296
+ )
297
+ ) as f:
298
+ system += f.read()
299
+
300
+ if history:
301
+ if (
302
+ (last_msg := history[-1])["role"] == "user"
303
+ and isinstance((content := last_msg["content"]), dict)
304
+ and content["type"] == "tool_result"
305
+ ):
306
+ waiting_for_assistant = True
307
+
308
+ client = Anthropic()
309
+
310
+ cost: float = 0
311
+ input_toks = 0
312
+ output_toks = 0
313
+ system_console = rich.console.Console(style="blue", highlight=False, markup=False)
314
+ error_console = rich.console.Console(style="red", highlight=False, markup=False)
315
+ user_console = rich.console.Console(
316
+ style="bright_black", highlight=False, markup=False
317
+ )
318
+ assistant_console = rich.console.Console(
319
+ style="white bold", highlight=False, markup=False
320
+ )
321
+ while True:
322
+ if cost > limit:
323
+ system_console.print(
324
+ f"\nCost limit exceeded. Current cost: {cost}, input tokens: {input_toks}, output tokens: {output_toks}"
325
+ )
326
+ break
327
+
328
+ if not waiting_for_assistant:
329
+ if first_message:
330
+ msg = first_message
331
+ first_message = ""
332
+ else:
333
+ msg = text_from_editor(user_console)
334
+
335
+ history.append(parse_user_message_special(msg))
336
+ else:
337
+ waiting_for_assistant = False
338
+
339
+ cost_, input_toks_ = 0, 0
340
+ cost += cost_
341
+ input_toks += input_toks_
342
+
343
+ stream = client.messages.stream(
344
+ model="claude-3-5-sonnet-20241022",
345
+ messages=history,
346
+ tools=tools,
347
+ max_tokens=8096,
348
+ system=system,
349
+ )
350
+
351
+ system_console.print(
352
+ "\n---------------------------------------\n# Assistant response",
353
+ style="bold",
354
+ )
355
+ _histories: History = []
356
+ full_response: str = ""
357
+
358
+ tool_calls = []
359
+ tool_results: list[ToolResultBlockParam] = []
360
+ try:
361
+ with stream as stream_:
362
+ for chunk in stream_:
363
+ type_ = chunk.type
364
+ if type_ in {"message_start", "message_stop"}:
365
+ continue
366
+ elif type_ == "content_block_start" and hasattr(
367
+ chunk, "content_block"
368
+ ):
369
+ content_block = chunk.content_block
370
+ if (
371
+ hasattr(content_block, "type")
372
+ and content_block.type == "text"
373
+ and hasattr(content_block, "text")
374
+ ):
375
+ chunk_str = content_block.text
376
+ assistant_console.print(chunk_str, end="")
377
+ full_response += chunk_str
378
+ elif content_block.type == "tool_use":
379
+ if (
380
+ hasattr(content_block, "input")
381
+ and hasattr(content_block, "name")
382
+ and hasattr(content_block, "id")
383
+ ):
384
+ assert content_block.input == {}
385
+ tool_calls.append(
386
+ {
387
+ "name": str(content_block.name),
388
+ "input": str(""),
389
+ "done": False,
390
+ "id": str(content_block.id),
391
+ }
392
+ )
393
+ else:
394
+ error_console.log(
395
+ f"Ignoring unknown content block type {content_block.type}"
396
+ )
397
+ elif type_ == "content_block_delta" and hasattr(chunk, "delta"):
398
+ delta = chunk.delta
399
+ if hasattr(delta, "type"):
400
+ delta_type = str(delta.type)
401
+ if delta_type == "text_delta" and hasattr(delta, "text"):
402
+ chunk_str = delta.text
403
+ assistant_console.print(chunk_str, end="")
404
+ full_response += chunk_str
405
+ elif delta_type == "input_json_delta" and hasattr(
406
+ delta, "partial_json"
407
+ ):
408
+ partial_json = delta.partial_json
409
+ if isinstance(tool_calls[-1]["input"], str):
410
+ tool_calls[-1]["input"] += partial_json
411
+ else:
412
+ error_console.log(
413
+ f"Ignoring unknown content block delta type {delta_type}"
414
+ )
415
+ else:
416
+ raise ValueError("Content block delta has no type")
417
+ elif type_ == "content_block_stop":
418
+ if tool_calls and not tool_calls[-1]["done"]:
419
+ tc = tool_calls[-1]
420
+ tool_name = str(tc["name"])
421
+ tool_input = str(tc["input"])
422
+ tool_id = str(tc["id"])
423
+
424
+ tool_parsed = which_tool_name(
425
+ tool_name
426
+ ).model_validate_json(tool_input)
427
+
428
+ system_console.print(
429
+ f"\n---------------------------------------\n# Assistant invoked tool: {tool_parsed}"
430
+ )
431
+
432
+ _histories.append(
433
+ {
434
+ "role": "assistant",
435
+ "content": [
436
+ ToolUseBlockParam(
437
+ id=tool_id,
438
+ name=tool_name,
439
+ input=tool_parsed.model_dump(),
440
+ type="tool_use",
441
+ )
442
+ ],
443
+ }
444
+ )
445
+ try:
446
+ output_or_dones, _ = get_tool_output(
447
+ tool_parsed,
448
+ default_enc,
449
+ limit - cost,
450
+ loop,
451
+ max_tokens=8000,
452
+ )
453
+ except Exception as e:
454
+ output_or_dones = [
455
+ (f"GOT EXCEPTION while calling tool. Error: {e}")
456
+ ]
457
+ tb = traceback.format_exc()
458
+ error_console.print(str(output_or_dones) + "\n" + tb)
459
+
460
+ if any(isinstance(x, DoneFlag) for x in output_or_dones):
461
+ return "", cost
462
+
463
+ tool_results_content: list[
464
+ TextBlockParam | ImageBlockParam
465
+ ] = []
466
+ for output in output_or_dones:
467
+ assert not isinstance(output, DoneFlag)
468
+ if isinstance(output, ImageData):
469
+ tool_results_content.append(
470
+ {
471
+ "type": "image",
472
+ "source": {
473
+ "type": "base64",
474
+ "media_type": output.media_type,
475
+ "data": output.data,
476
+ },
477
+ }
478
+ )
479
+
480
+ else:
481
+ tool_results_content.append(
482
+ {
483
+ "type": "text",
484
+ "text": output,
485
+ },
486
+ )
487
+ tool_results.append(
488
+ ToolResultBlockParam(
489
+ type="tool_result",
490
+ tool_use_id=str(tc["id"]),
491
+ content=tool_results_content,
492
+ )
493
+ )
494
+ else:
495
+ _histories.append(
496
+ {
497
+ "role": "assistant",
498
+ "content": full_response
499
+ if full_response.strip()
500
+ else "...",
501
+ } # Fixes anthropic issue of non empty response only
502
+ )
503
+
504
+ except KeyboardInterrupt:
505
+ waiting_for_assistant = False
506
+ input("Interrupted...enter to redo the current turn")
507
+ else:
508
+ history.extend(_histories)
509
+ if tool_results:
510
+ history.append({"role": "user", "content": tool_results})
511
+ waiting_for_assistant = True
512
+ save_history(history, session_id)
513
+
514
+ return "Couldn't finish the task", cost
515
+
516
+
517
+ if __name__ == "__main__":
518
+ app()
wcgw_cli/cli.py ADDED
@@ -0,0 +1,42 @@
1
+ import importlib
2
+ from typing import Optional
3
+
4
+ import typer
5
+ from typer import Typer
6
+
7
+ from wcgw_cli.anthropic_client import loop as claude_loop
8
+ from wcgw_cli.openai_client import loop as openai_loop
9
+
10
+ app = Typer(pretty_exceptions_show_locals=False)
11
+
12
+
13
+ @app.command()
14
+ def loop(
15
+ claude: bool = False,
16
+ first_message: Optional[str] = None,
17
+ limit: Optional[float] = None,
18
+ resume: Optional[str] = None,
19
+ computer_use: bool = False,
20
+ version: bool = typer.Option(False, "--version", "-v"),
21
+ ) -> tuple[str, float]:
22
+ if version:
23
+ version_ = importlib.metadata.version("wcgw")
24
+ print(f"wcgw version: {version_}")
25
+ exit()
26
+ if claude:
27
+ return claude_loop(
28
+ first_message=first_message,
29
+ limit=limit,
30
+ resume=resume,
31
+ computer_use=computer_use,
32
+ )
33
+ else:
34
+ return openai_loop(
35
+ first_message=first_message,
36
+ limit=limit,
37
+ resume=resume,
38
+ )
39
+
40
+
41
+ if __name__ == "__main__":
42
+ app()
@@ -0,0 +1,467 @@
1
+ import base64
2
+ import json
3
+ import mimetypes
4
+ import os
5
+ import subprocess
6
+ import tempfile
7
+ import traceback
8
+ import uuid
9
+ from pathlib import Path
10
+ from typing import DefaultDict, Optional, cast
11
+
12
+ import openai
13
+ import petname # type: ignore[import-untyped]
14
+ import rich
15
+ import tokenizers # type: ignore[import-untyped]
16
+ from dotenv import load_dotenv
17
+ from openai import OpenAI
18
+ from openai.types.chat import (
19
+ ChatCompletionContentPartParam,
20
+ ChatCompletionMessageParam,
21
+ ChatCompletionUserMessageParam,
22
+ )
23
+ from pydantic import BaseModel
24
+ from typer import Typer
25
+
26
+ from wcgw.client.common import CostData, History, Models, discard_input
27
+ from wcgw.client.memory import load_memory
28
+ from wcgw.client.tools import (
29
+ DoneFlag,
30
+ ImageData,
31
+ default_enc,
32
+ get_tool_output,
33
+ initialize,
34
+ which_tool,
35
+ )
36
+ from wcgw.types_ import (
37
+ BashCommand,
38
+ BashInteraction,
39
+ ContextSave,
40
+ FileEdit,
41
+ ReadFiles,
42
+ ReadImage,
43
+ ResetShell,
44
+ WriteIfEmpty,
45
+ )
46
+
47
+ from .openai_utils import get_input_cost, get_output_cost
48
+
49
+
50
+ class Config(BaseModel):
51
+ model: Models
52
+ cost_limit: float
53
+ cost_file: dict[Models, CostData]
54
+ cost_unit: str = "$"
55
+
56
+
57
+ def text_from_editor(console: rich.console.Console) -> str:
58
+ # First consume all the input till now
59
+ discard_input()
60
+ console.print("\n---------------------------------------\n# User message")
61
+ data = input()
62
+ if data:
63
+ return data
64
+ editor = os.environ.get("EDITOR", "vim")
65
+ with tempfile.NamedTemporaryFile(suffix=".tmp") as tf:
66
+ subprocess.run([editor, tf.name], check=True)
67
+ with open(tf.name, "r") as f:
68
+ data = f.read()
69
+ console.print(data)
70
+ return data
71
+
72
+
73
+ def save_history(history: History, session_id: str) -> None:
74
+ myid = str(history[1]["content"]).replace("/", "_").replace(" ", "_").lower()[:60]
75
+ myid += "_" + session_id
76
+ myid = myid + ".json"
77
+
78
+ mypath = Path(".wcgw") / myid
79
+ mypath.parent.mkdir(parents=True, exist_ok=True)
80
+ with open(mypath, "w") as f:
81
+ json.dump(history, f, indent=3)
82
+
83
+
84
+ def parse_user_message_special(msg: str) -> ChatCompletionUserMessageParam:
85
+ # Search for lines starting with `%` and treat them as special commands
86
+ parts: list[ChatCompletionContentPartParam] = []
87
+ for line in msg.split("\n"):
88
+ if line.startswith("%"):
89
+ args = line[1:].strip().split(" ")
90
+ command = args[0]
91
+ assert command == "image"
92
+ image_path = " ".join(args[1:])
93
+ with open(image_path, "rb") as f:
94
+ image_bytes = f.read()
95
+ image_b64 = base64.b64encode(image_bytes).decode("utf-8")
96
+ image_type = mimetypes.guess_type(image_path)[0]
97
+ dataurl = f"data:{image_type};base64,{image_b64}"
98
+ parts.append(
99
+ {"type": "image_url", "image_url": {"url": dataurl, "detail": "auto"}}
100
+ )
101
+ else:
102
+ if len(parts) > 0 and parts[-1]["type"] == "text":
103
+ parts[-1]["text"] += "\n" + line
104
+ else:
105
+ parts.append({"type": "text", "text": line})
106
+ return {"role": "user", "content": parts}
107
+
108
+
109
+ app = Typer(pretty_exceptions_show_locals=False)
110
+
111
+
112
+ @app.command()
113
+ def loop(
114
+ first_message: Optional[str] = None,
115
+ limit: Optional[float] = None,
116
+ resume: Optional[str] = None,
117
+ computer_use: bool = False,
118
+ ) -> tuple[str, float]:
119
+ load_dotenv()
120
+
121
+ session_id = str(uuid.uuid4())[:6]
122
+
123
+ history: History = []
124
+ waiting_for_assistant = False
125
+
126
+ memory = None
127
+ if resume:
128
+ try:
129
+ _, memory, _ = load_memory(
130
+ resume,
131
+ 8000,
132
+ lambda x: default_enc.encode(x).ids,
133
+ lambda x: default_enc.decode(x),
134
+ )
135
+ except OSError:
136
+ if resume == "latest":
137
+ resume_path = sorted(Path(".wcgw").iterdir(), key=os.path.getmtime)[-1]
138
+ else:
139
+ resume_path = Path(resume)
140
+ if not resume_path.exists():
141
+ raise FileNotFoundError(f"File {resume} not found")
142
+ with resume_path.open() as f:
143
+ history = json.load(f)
144
+ if len(history) <= 2:
145
+ raise ValueError("Invalid history file")
146
+ first_message = ""
147
+ waiting_for_assistant = history[-1]["role"] != "assistant"
148
+
149
+ my_dir = os.path.dirname(__file__)
150
+
151
+ config = Config(
152
+ model=cast(Models, os.getenv("OPENAI_MODEL", "gpt-4o-2024-08-06").lower()),
153
+ cost_limit=0.1,
154
+ cost_unit="$",
155
+ cost_file={
156
+ "gpt-4o-2024-08-06": CostData(
157
+ cost_per_1m_input_tokens=5, cost_per_1m_output_tokens=15
158
+ ),
159
+ },
160
+ )
161
+
162
+ if limit is not None:
163
+ config.cost_limit = limit
164
+ limit = config.cost_limit
165
+
166
+ enc = tokenizers.Tokenizer.from_pretrained("Xenova/gpt-4o")
167
+
168
+ tools = [
169
+ openai.pydantic_function_tool(
170
+ BashCommand,
171
+ description="""
172
+ - Execute a bash command. This is stateful (beware with subsequent calls).
173
+ - Do not use interactive commands like nano. Prefer writing simpler commands.
174
+ - Status of the command and the current working directory will always be returned at the end.
175
+ - Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands.
176
+ - The first or the last line might be `(...truncated)` if the output is too long.
177
+ - Always run `pwd` if you get any file or directory not found error to make sure you're not lost.
178
+ - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished.
179
+ - Run long running commands in background using screen instead of "&".
180
+ - Do not use 'cat' to read files, use ReadFiles tool instead.
181
+ """,
182
+ ),
183
+ openai.pydantic_function_tool(
184
+ BashInteraction,
185
+ description="""
186
+ - Interact with running program using this tool
187
+ - Special keys like arrows, interrupts, enter, etc.
188
+ - Send text input to the running program.
189
+ - Send send_specials=["Enter"] to recheck status of a running program.
190
+ - Only one of send_text, send_specials, send_ascii should be provided.""",
191
+ ),
192
+ openai.pydantic_function_tool(
193
+ ReadFiles,
194
+ description="""
195
+ - Read full file content of one or more files.
196
+ - Provide absolute file paths only
197
+ """,
198
+ ),
199
+ openai.pydantic_function_tool(
200
+ WriteIfEmpty,
201
+ description="""
202
+ - Write content to an empty or non-existent file. Provide file path and content. Use this instead of BashCommand for writing new files.
203
+ - Provide absolute file path only.
204
+ - For editing existing files, use FileEdit instead of this tool.""",
205
+ ),
206
+ openai.pydantic_function_tool(
207
+ FileEdit,
208
+ description="""
209
+ - Use absolute file path only.
210
+ - Use ONLY SEARCH/REPLACE blocks to edit the file.
211
+ - file_edit_using_search_replace_blocks should start with <<<<<<< SEARCH
212
+ """,
213
+ ),
214
+ openai.pydantic_function_tool(
215
+ ReadImage, description="Read an image from the shell."
216
+ ),
217
+ openai.pydantic_function_tool(
218
+ ResetShell,
219
+ description="Resets the shell. Use only if all interrupts and prompt reset attempts have failed repeatedly.",
220
+ ),
221
+ openai.pydantic_function_tool(
222
+ ContextSave,
223
+ description="""
224
+
225
+ Saves provided description and file contents of all the relevant file paths or globs in a single text file.
226
+ - Provide random unqiue id or whatever user provided.
227
+ - Leave project path as empty string if no project path""",
228
+ ),
229
+ ]
230
+
231
+ system = initialize(
232
+ os.getcwd(),
233
+ [],
234
+ resume if (memory and resume) else "",
235
+ max_tokens=8000,
236
+ mode="wcgw",
237
+ )
238
+
239
+ with open(
240
+ os.path.join(
241
+ os.path.dirname(__file__), "..", "wcgw", "client", "diff-instructions.txt"
242
+ )
243
+ ) as f:
244
+ system += f.read()
245
+
246
+ if not history:
247
+ history = [{"role": "system", "content": system}]
248
+ else:
249
+ if history[-1]["role"] == "tool":
250
+ waiting_for_assistant = True
251
+
252
+ client = OpenAI()
253
+
254
+ cost: float = 0
255
+ input_toks = 0
256
+ output_toks = 0
257
+ system_console = rich.console.Console(style="blue", highlight=False, markup=False)
258
+ error_console = rich.console.Console(style="red", highlight=False, markup=False)
259
+ user_console = rich.console.Console(
260
+ style="bright_black", highlight=False, markup=False
261
+ )
262
+ assistant_console = rich.console.Console(
263
+ style="white bold", highlight=False, markup=False
264
+ )
265
+ while True:
266
+ if cost > limit:
267
+ system_console.print(
268
+ f"\nCost limit exceeded. Current cost: {cost}, input tokens: {input_toks}, output tokens: {output_toks}"
269
+ )
270
+ break
271
+
272
+ if not waiting_for_assistant:
273
+ if first_message:
274
+ msg = first_message
275
+ first_message = ""
276
+ else:
277
+ msg = text_from_editor(user_console)
278
+
279
+ history.append(parse_user_message_special(msg))
280
+ else:
281
+ waiting_for_assistant = False
282
+
283
+ cost_, input_toks_ = get_input_cost(
284
+ config.cost_file[config.model], enc, history
285
+ )
286
+ cost += cost_
287
+ input_toks += input_toks_
288
+
289
+ stream = client.chat.completions.create(
290
+ messages=history,
291
+ model=config.model,
292
+ stream=True,
293
+ tools=tools,
294
+ )
295
+
296
+ system_console.print(
297
+ "\n---------------------------------------\n# Assistant response",
298
+ style="bold",
299
+ )
300
+ tool_call_args_by_id = DefaultDict[str, DefaultDict[int, str]](
301
+ lambda: DefaultDict(str)
302
+ )
303
+ _histories: History = []
304
+ item: ChatCompletionMessageParam
305
+ full_response: str = ""
306
+ image_histories: History = []
307
+ try:
308
+ for chunk in stream:
309
+ if chunk.choices[0].finish_reason == "tool_calls":
310
+ assert tool_call_args_by_id
311
+ item = {
312
+ "role": "assistant",
313
+ "content": full_response,
314
+ "tool_calls": [
315
+ {
316
+ "id": tool_call_id + str(toolindex),
317
+ "type": "function",
318
+ "function": {
319
+ "arguments": tool_args,
320
+ "name": type(which_tool(tool_args)).__name__,
321
+ },
322
+ }
323
+ for tool_call_id, toolcallargs in tool_call_args_by_id.items()
324
+ for toolindex, tool_args in toolcallargs.items()
325
+ ],
326
+ }
327
+ cost_, output_toks_ = get_output_cost(
328
+ config.cost_file[config.model], enc, item
329
+ )
330
+ cost += cost_
331
+ system_console.print(
332
+ f"\n---------------------------------------\n# Assistant invoked tools: {[which_tool(tool['function']['arguments']) for tool in item['tool_calls']]}"
333
+ )
334
+ system_console.print(f"\nTotal cost: {config.cost_unit}{cost:.3f}")
335
+ output_toks += output_toks_
336
+
337
+ _histories.append(item)
338
+ for tool_call_id, toolcallargs in tool_call_args_by_id.items():
339
+ for toolindex, tool_args in toolcallargs.items():
340
+ try:
341
+ output_or_dones, cost_ = get_tool_output(
342
+ json.loads(tool_args),
343
+ enc,
344
+ limit - cost,
345
+ loop,
346
+ max_tokens=8000,
347
+ )
348
+ output_or_done = output_or_dones[0]
349
+ except Exception as e:
350
+ output_or_done = (
351
+ f"GOT EXCEPTION while calling tool. Error: {e}"
352
+ )
353
+ tb = traceback.format_exc()
354
+ error_console.print(output_or_done + "\n" + tb)
355
+ cost_ = 0
356
+ cost += cost_
357
+ system_console.print(
358
+ f"\nTotal cost: {config.cost_unit}{cost:.3f}"
359
+ )
360
+
361
+ if isinstance(output_or_done, DoneFlag):
362
+ system_console.print(
363
+ f"\n# Task marked done, with output {output_or_done.task_output}",
364
+ )
365
+ system_console.print(
366
+ f"\nTotal cost: {config.cost_unit}{cost:.3f}"
367
+ )
368
+ return output_or_done.task_output, cost
369
+
370
+ output = output_or_done
371
+
372
+ if isinstance(output, ImageData):
373
+ randomId = petname.Generate(2, "-")
374
+ if not image_histories:
375
+ image_histories.extend(
376
+ [
377
+ {
378
+ "role": "assistant",
379
+ "content": f"Share images with ids: {randomId}",
380
+ },
381
+ {
382
+ "role": "user",
383
+ "content": [
384
+ {
385
+ "type": "image_url",
386
+ "image_url": {
387
+ "url": output.dataurl,
388
+ "detail": "auto",
389
+ },
390
+ }
391
+ ],
392
+ },
393
+ ]
394
+ )
395
+ else:
396
+ image_histories[0]["content"] += ", " + randomId
397
+ second_content = image_histories[1]["content"]
398
+ assert isinstance(second_content, list)
399
+ second_content.append(
400
+ {
401
+ "type": "image_url",
402
+ "image_url": {
403
+ "url": output.dataurl,
404
+ "detail": "auto",
405
+ },
406
+ }
407
+ )
408
+
409
+ item = {
410
+ "role": "tool",
411
+ "content": f"Ask user for image id: {randomId}",
412
+ "tool_call_id": tool_call_id + str(toolindex),
413
+ }
414
+ else:
415
+ item = {
416
+ "role": "tool",
417
+ "content": str(output),
418
+ "tool_call_id": tool_call_id + str(toolindex),
419
+ }
420
+ cost_, output_toks_ = get_output_cost(
421
+ config.cost_file[config.model], enc, item
422
+ )
423
+ cost += cost_
424
+ output_toks += output_toks_
425
+
426
+ _histories.append(item)
427
+ waiting_for_assistant = True
428
+ break
429
+ elif chunk.choices[0].finish_reason:
430
+ assistant_console.print("")
431
+ item = {
432
+ "role": "assistant",
433
+ "content": full_response,
434
+ }
435
+ cost_, output_toks_ = get_output_cost(
436
+ config.cost_file[config.model], enc, item
437
+ )
438
+ cost += cost_
439
+ output_toks += output_toks_
440
+
441
+ system_console.print(f"\nTotal cost: {config.cost_unit}{cost:.3f}")
442
+ _histories.append(item)
443
+ break
444
+
445
+ if chunk.choices[0].delta.tool_calls:
446
+ tool_call = chunk.choices[0].delta.tool_calls[0]
447
+ if tool_call.function and tool_call.function.arguments:
448
+ tool_call_args_by_id[tool_call.id or ""][tool_call.index] += (
449
+ tool_call.function.arguments
450
+ )
451
+
452
+ chunk_str = chunk.choices[0].delta.content or ""
453
+ assistant_console.print(chunk_str, end="")
454
+ full_response += chunk_str
455
+ except KeyboardInterrupt:
456
+ waiting_for_assistant = False
457
+ input("Interrupted...enter to redo the current turn")
458
+ else:
459
+ history.extend(_histories)
460
+ history.extend(image_histories)
461
+ save_history(history, session_id)
462
+
463
+ return "Couldn't finish the task", cost
464
+
465
+
466
+ if __name__ == "__main__":
467
+ app()
@@ -0,0 +1,67 @@
1
+ from typing import cast
2
+
3
+ from openai.types.chat import (
4
+ ChatCompletionAssistantMessageParam,
5
+ ChatCompletionMessage,
6
+ ChatCompletionMessageParam,
7
+ ParsedChatCompletionMessage,
8
+ )
9
+ from tokenizers import Tokenizer # type: ignore[import-untyped]
10
+
11
+ from wcgw.client.common import CostData, History
12
+
13
+
14
+ def get_input_cost(
15
+ cost_map: CostData, enc: Tokenizer, history: History
16
+ ) -> tuple[float, int]:
17
+ input_tokens = 0
18
+ for msg in history:
19
+ content = msg["content"]
20
+ refusal = msg.get("refusal")
21
+ if isinstance(content, list):
22
+ for part in content:
23
+ if "text" in part:
24
+ input_tokens += len(enc.encode(part["text"]))
25
+ elif content is None:
26
+ if refusal is None:
27
+ raise ValueError("Expected content or refusal to be present")
28
+ input_tokens += len(enc.encode(str(refusal)))
29
+ elif not isinstance(content, str):
30
+ raise ValueError(f"Expected content to be string, got {type(content)}")
31
+ else:
32
+ input_tokens += len(enc.encode(content))
33
+ cost = input_tokens * cost_map.cost_per_1m_input_tokens / 1_000_000
34
+ return cost, input_tokens
35
+
36
+
37
+ def get_output_cost(
38
+ cost_map: CostData,
39
+ enc: Tokenizer,
40
+ item: ChatCompletionMessage | ChatCompletionMessageParam,
41
+ ) -> tuple[float, int]:
42
+ if isinstance(item, ChatCompletionMessage):
43
+ content = item.content
44
+ if not isinstance(content, str):
45
+ raise ValueError(f"Expected content to be string, got {type(content)}")
46
+ else:
47
+ if not isinstance(item["content"], str):
48
+ raise ValueError(
49
+ f"Expected content to be string, got {type(item['content'])}"
50
+ )
51
+ content = item["content"]
52
+ if item["role"] == "tool":
53
+ return 0, 0
54
+ output_tokens = len(enc.encode(content))
55
+
56
+ if "tool_calls" in item:
57
+ item = cast(ChatCompletionAssistantMessageParam, item)
58
+ toolcalls = item["tool_calls"]
59
+ for tool_call in toolcalls or []:
60
+ output_tokens += len(enc.encode(tool_call["function"]["arguments"]))
61
+ elif isinstance(item, ParsedChatCompletionMessage):
62
+ if item.tool_calls:
63
+ for tool_callf in item.tool_calls:
64
+ output_tokens += len(enc.encode(tool_callf.function.arguments))
65
+
66
+ cost = output_tokens * cost_map.cost_per_1m_output_tokens / 1_000_000
67
+ return cost, output_tokens
File without changes