wcgw 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

wcgw/basic.py CHANGED
@@ -1,4 +1,6 @@
1
+ import base64
1
2
  import json
3
+ import mimetypes
2
4
  from pathlib import Path
3
5
  import sys
4
6
  import traceback
@@ -8,24 +10,26 @@ from openai import OpenAI
8
10
  from openai.types.chat import (
9
11
  ChatCompletionMessageParam,
10
12
  ChatCompletionAssistantMessageParam,
13
+ ChatCompletionUserMessageParam,
14
+ ChatCompletionContentPartParam,
11
15
  ChatCompletionMessage,
12
16
  ParsedChatCompletionMessage,
13
17
  )
14
18
  import rich
19
+ import petname # type: ignore[import-untyped]
15
20
  from typer import Typer
16
21
  import uuid
17
22
 
18
23
  from .common import Models, discard_input
19
24
  from .common import CostData, History
20
25
  from .openai_utils import get_input_cost, get_output_cost
21
- from .tools import ExecuteBash, GetShellOutputLastCommand
26
+ from .tools import ExecuteBash, ReadImage, ImageData
22
27
 
23
28
  from .tools import (
24
29
  BASH_CLF_OUTPUT,
25
30
  Confirmation,
26
31
  DoneFlag,
27
32
  Writefile,
28
- get_is_waiting_user_input,
29
33
  get_tool_output,
30
34
  SHELL,
31
35
  start_shell,
@@ -80,6 +84,31 @@ def save_history(history: History, session_id: str) -> None:
80
84
  json.dump(history, f, indent=3)
81
85
 
82
86
 
87
+ def parse_user_message_special(msg: str) -> ChatCompletionUserMessageParam:
88
+ # Search for lines starting with `%` and treat them as special commands
89
+ parts: list[ChatCompletionContentPartParam] = []
90
+ for line in msg.split("\n"):
91
+ if line.startswith("%"):
92
+ args = line[1:].strip().split(" ")
93
+ command = args[0]
94
+ assert command == "image"
95
+ image_path = args[1]
96
+ with open(image_path, "rb") as f:
97
+ image_bytes = f.read()
98
+ image_b64 = base64.b64encode(image_bytes).decode("utf-8")
99
+ image_type = mimetypes.guess_type(image_path)[0]
100
+ dataurl = f"data:{image_type};base64,{image_b64}"
101
+ parts.append(
102
+ {"type": "image_url", "image_url": {"url": dataurl, "detail": "auto"}}
103
+ )
104
+ else:
105
+ if len(parts) > 0 and parts[-1]["type"] == "text":
106
+ parts[-1]["text"] += "\n" + line
107
+ else:
108
+ parts.append({"type": "text", "text": line})
109
+ return {"role": "user", "content": parts}
110
+
111
+
83
112
  app = Typer(pretty_exceptions_show_locals=False)
84
113
 
85
114
 
@@ -94,6 +123,7 @@ def loop(
94
123
  session_id = str(uuid.uuid4())[:6]
95
124
 
96
125
  history: History = []
126
+ waiting_for_assistant = False
97
127
  if resume:
98
128
  if resume == "latest":
99
129
  resume_path = sorted(Path(".wcgw").iterdir(), key=os.path.getmtime)[-1]
@@ -108,6 +138,7 @@ def loop(
108
138
  if history[1]["role"] != "user":
109
139
  raise ValueError("Invalid history file, second message should be user")
110
140
  first_message = ""
141
+ waiting_for_assistant = history[-1]["role"] != "assistant"
111
142
 
112
143
  my_dir = os.path.dirname(__file__)
113
144
  config_file = os.path.join(my_dir, "..", "..", "config.toml")
@@ -122,31 +153,29 @@ def loop(
122
153
  enc = tiktoken.encoding_for_model(
123
154
  config.model if not config.model.startswith("o1") else "gpt-4o"
124
155
  )
125
- is_waiting_user_input = get_is_waiting_user_input(
126
- config.model, config.cost_file[config.model]
127
- )
128
156
 
129
157
  tools = [
130
158
  openai.pydantic_function_tool(
131
159
  ExecuteBash,
132
160
  description="""
133
- Execute a bash script. Stateful (beware with subsequent calls).
134
- Execute commands using `execute_command` attribute.
135
- Do not use interactive commands like nano. Prefer writing simpler commands.
136
- Last line will always be `(exit <int code>)` except if
137
- the last line is `(waiting for input)` which will be the case if you've run any interactive command (which you shouldn't run) by mistake. You can then send input using `send_ascii` attributes.
138
- Optionally the last line is `(won't exit)` in which case you need to kill the process if you want to run a new command.
139
- Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands.
140
- The first line might be `(...truncated)` if the output is too long.""",
141
- ),
142
- openai.pydantic_function_tool(
143
- GetShellOutputLastCommand,
144
- description="Get output of the last command run in the shell. Use this in case you want to know status of a running program.",
161
+ - Execute a bash script. This is stateful (beware with subsequent calls).
162
+ - Execute commands using `execute_command` attribute.
163
+ - Do not use interactive commands like nano. Prefer writing simpler commands.
164
+ - Last line will always be `(exit <int code>)` except if
165
+ - The last line is `(pending)` if the program is still running or waiting for your input. You can then send input using `send_ascii` attributes. You get status by sending new line `send_ascii: ["Enter"]` or `send_ascii: [10]`.
166
+ - Optionally the last line is `(won't exit)` in which case you need to kill the process if you want to run a new command.
167
+ - Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands.
168
+ - The first line might be `(...truncated)` if the output is too long.
169
+ - Always run `pwd` if you get any file or directory not found error to make sure you're not lost.
170
+ """,
145
171
  ),
146
172
  openai.pydantic_function_tool(
147
173
  Writefile,
148
174
  description="Write content to a file. Provide file path and content. Use this instead of ExecuteBash for writing files.",
149
175
  ),
176
+ openai.pydantic_function_tool(
177
+ ReadImage, description="Read an image from the shell."
178
+ ),
150
179
  ]
151
180
  uname_sysname = os.uname().sysname
152
181
  uname_machine = os.uname().machine
@@ -165,12 +194,11 @@ System information:
165
194
  - Machine: {uname_machine}
166
195
  """
167
196
 
168
- has_tool_output = False
169
197
  if not history:
170
198
  history = [{"role": "system", "content": system}]
171
199
  else:
172
200
  if history[-1]["role"] == "tool":
173
- has_tool_output = True
201
+ waiting_for_assistant = True
174
202
 
175
203
  client = OpenAI()
176
204
 
@@ -189,16 +217,16 @@ System information:
189
217
  )
190
218
  break
191
219
 
192
- if not has_tool_output:
220
+ if not waiting_for_assistant:
193
221
  if first_message:
194
222
  msg = first_message
195
223
  first_message = ""
196
224
  else:
197
225
  msg = text_from_editor(user_console)
198
226
 
199
- history.append({"role": "user", "content": msg})
227
+ history.append(parse_user_message_special(msg))
200
228
  else:
201
- has_tool_output = False
229
+ waiting_for_assistant = False
202
230
 
203
231
  cost_, input_toks_ = get_input_cost(
204
232
  config.cost_file[config.model], enc, history
@@ -223,6 +251,7 @@ System information:
223
251
  _histories: History = []
224
252
  item: ChatCompletionMessageParam
225
253
  full_response: str = ""
254
+ image_histories: History = []
226
255
  try:
227
256
  for chunk in stream:
228
257
  if chunk.choices[0].finish_reason == "tool_calls":
@@ -236,7 +265,7 @@ System information:
236
265
  "type": "function",
237
266
  "function": {
238
267
  "arguments": tool_args,
239
- "name": "execute_bash",
268
+ "name": type(which_tool(tool_args)).__name__,
240
269
  },
241
270
  }
242
271
  for tool_call_id, toolcallargs in tool_call_args_by_id.items()
@@ -262,7 +291,7 @@ System information:
262
291
  enc,
263
292
  limit - cost,
264
293
  loop,
265
- is_waiting_user_input,
294
+ max_tokens=2048,
266
295
  )
267
296
  except Exception as e:
268
297
  output_or_done = (
@@ -284,13 +313,57 @@ System information:
284
313
  f"\nTotal cost: {config.cost_unit}{cost:.3f}"
285
314
  )
286
315
  return output_or_done.task_output, cost
316
+
287
317
  output = output_or_done
288
318
 
289
- item = {
290
- "role": "tool",
291
- "content": str(output),
292
- "tool_call_id": tool_call_id + str(toolindex),
293
- }
319
+ if isinstance(output, ImageData):
320
+ randomId = petname.Generate(2, "-")
321
+ if not image_histories:
322
+ image_histories.extend(
323
+ [
324
+ {
325
+ "role": "assistant",
326
+ "content": f"Share images with ids: {randomId}",
327
+ },
328
+ {
329
+ "role": "user",
330
+ "content": [
331
+ {
332
+ "type": "image_url",
333
+ "image_url": {
334
+ "url": output.dataurl,
335
+ "detail": "auto",
336
+ },
337
+ }
338
+ ],
339
+ },
340
+ ]
341
+ )
342
+ else:
343
+ image_histories[0]["content"] += ", " + randomId
344
+ second_content = image_histories[1]["content"]
345
+ assert isinstance(second_content, list)
346
+ second_content.append(
347
+ {
348
+ "type": "image_url",
349
+ "image_url": {
350
+ "url": output.dataurl,
351
+ "detail": "auto",
352
+ },
353
+ }
354
+ )
355
+
356
+ item = {
357
+ "role": "tool",
358
+ "content": f"Ask user for image id: {randomId}",
359
+ "tool_call_id": tool_call_id + str(toolindex),
360
+ }
361
+ else:
362
+ item = {
363
+ "role": "tool",
364
+ "content": str(output),
365
+ "tool_call_id": tool_call_id + str(toolindex),
366
+ }
294
367
  cost_, output_toks_ = get_output_cost(
295
368
  config.cost_file[config.model], enc, item
296
369
  )
@@ -298,7 +371,7 @@ System information:
298
371
  output_toks += output_toks_
299
372
 
300
373
  _histories.append(item)
301
- has_tool_output = True
374
+ waiting_for_assistant = True
302
375
  break
303
376
  elif chunk.choices[0].finish_reason:
304
377
  assistant_console.print("")
@@ -327,11 +400,11 @@ System information:
327
400
  assistant_console.print(chunk_str, end="")
328
401
  full_response += chunk_str
329
402
  except KeyboardInterrupt:
330
- has_tool_output = False
403
+ waiting_for_assistant = False
331
404
  input("Interrupted...enter to redo the current turn")
332
405
  else:
333
406
  history.extend(_histories)
334
-
407
+ history.extend(image_histories)
335
408
  save_history(history, session_id)
336
409
 
337
410
  return "Couldn't finish the task", cost
wcgw/claude.py ADDED
@@ -0,0 +1,384 @@
1
+ import base64
2
+ import json
3
+ import mimetypes
4
+ from pathlib import Path
5
+ import sys
6
+ import traceback
7
+ from typing import Callable, DefaultDict, Optional, cast
8
+ import rich
9
+ import petname
10
+ from typer import Typer
11
+ import uuid
12
+
13
+ from .common import Models, discard_input
14
+ from .common import CostData, Config, text_from_editor
15
+ from .tools import ExecuteBash, ReadImage, ImageData
16
+
17
+ from .tools import (
18
+ BASH_CLF_OUTPUT,
19
+ Confirmation,
20
+ DoneFlag,
21
+ Writefile,
22
+ get_is_waiting_user_input,
23
+ get_tool_output,
24
+ SHELL,
25
+ start_shell,
26
+ which_tool,
27
+ )
28
+ import tiktoken
29
+
30
+ from urllib import parse
31
+ import subprocess
32
+ import os
33
+ import tempfile
34
+
35
+ import toml
36
+ from pydantic import BaseModel
37
+
38
+
39
+ from dotenv import load_dotenv
40
+
41
+ from anthropic.types import MessageParam
42
+
43
+ History = list[MessageParam]
44
+
45
+ def save_history(history: History, session_id: str) -> None:
46
+ myid = str(history[1]["content"]).replace("/", "_").replace(" ", "_").lower()[:60]
47
+ myid += "_" + session_id
48
+ myid = myid + ".json"
49
+
50
+ mypath = Path(".wcgw") / myid
51
+ mypath.parent.mkdir(parents=True, exist_ok=True)
52
+ with open(mypath, "w") as f:
53
+ json.dump(history, f, indent=3)
54
+
55
+
56
+ def parse_user_message_special(msg: str) -> ChatCompletionUserMessageParam:
57
+ # Search for lines starting with `%` and treat them as special commands
58
+ parts: list[ChatCompletionContentPartParam] = []
59
+ for line in msg.split("\n"):
60
+ if line.startswith("%"):
61
+ args = line[1:].strip().split(" ")
62
+ command = args[0]
63
+ assert command == 'image'
64
+ image_path = args[1]
65
+ with open(image_path, 'rb') as f:
66
+ image_bytes = f.read()
67
+ image_b64 = base64.b64encode(image_bytes).decode("utf-8")
68
+ image_type = mimetypes.guess_type(image_path)[0]
69
+ dataurl=f'data:{image_type};base64,{image_b64}'
70
+ parts.append({
71
+ 'type': 'image_url',
72
+ 'image_url': {
73
+ 'url': dataurl,
74
+ 'detail': 'auto'
75
+ }
76
+ })
77
+ else:
78
+ if len(parts) > 0 and parts[-1]['type'] == 'text':
79
+ parts[-1]['text'] += '\n' + line
80
+ else:
81
+ parts.append({'type': 'text', 'text': line})
82
+ return {
83
+ 'role': 'user',
84
+ 'content': parts
85
+ }
86
+
87
+
88
+ app = Typer(pretty_exceptions_show_locals=False)
89
+
90
+
91
+ @app.command()
92
+ def loop(
93
+ first_message: Optional[str] = None,
94
+ limit: Optional[float] = None,
95
+ resume: Optional[str] = None,
96
+ ) -> tuple[str, float]:
97
+ load_dotenv()
98
+
99
+ session_id = str(uuid.uuid4())[:6]
100
+
101
+ history: History = []
102
+ waiting_for_assistant = False
103
+ if resume:
104
+ if resume == "latest":
105
+ resume_path = sorted(Path(".wcgw").iterdir(), key=os.path.getmtime)[-1]
106
+ else:
107
+ resume_path = Path(resume)
108
+ if not resume_path.exists():
109
+ raise FileNotFoundError(f"File {resume} not found")
110
+ with resume_path.open() as f:
111
+ history = json.load(f)
112
+ if len(history) <= 2:
113
+ raise ValueError("Invalid history file")
114
+ if history[1]["role"] != "user":
115
+ raise ValueError("Invalid history file, second message should be user")
116
+ first_message = ""
117
+ waiting_for_assistant = history[-1]['role'] != 'assistant'
118
+
119
+ my_dir = os.path.dirname(__file__)
120
+ config_file = os.path.join(my_dir, "..", "..", "config.toml")
121
+ with open(config_file) as f:
122
+ config_json = toml.load(f)
123
+ config = Config.model_validate(config_json)
124
+
125
+ if limit is not None:
126
+ config.cost_limit = limit
127
+ limit = config.cost_limit
128
+
129
+ enc = tiktoken.encoding_for_model(
130
+ config.model if not config.model.startswith("o1") else "gpt-4o"
131
+ )
132
+ is_waiting_user_input = get_is_waiting_user_input(
133
+ config.model, config.cost_file[config.model]
134
+ )
135
+
136
+ tools = [
137
+ openai.pydantic_function_tool(
138
+ ExecuteBash,
139
+ description="""
140
+ Execute a bash script. Stateful (beware with subsequent calls).
141
+ Execute commands using `execute_command` attribute.
142
+ Do not use interactive commands like nano. Prefer writing simpler commands.
143
+ Last line will always be `(exit <int code>)` except if
144
+ the last line is `(pending)` if the program is still running or waiting for user inputs. You can then send input using `send_ascii` attributes. You get status by sending `send_ascii: [10]`.
145
+ Optionally the last line is `(won't exit)` in which case you need to kill the process if you want to run a new command.
146
+ Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands.
147
+ The first line might be `(...truncated)` if the output is too long.""",
148
+ ),
149
+ openai.pydantic_function_tool(
150
+ Writefile,
151
+ description="Write content to a file. Provide file path and content. Use this instead of ExecuteBash for writing files.",
152
+ ),
153
+ openai.pydantic_function_tool(
154
+ ReadImage, description="Read an image from the shell."
155
+ ),
156
+ ]
157
+ uname_sysname = os.uname().sysname
158
+ uname_machine = os.uname().machine
159
+
160
+ system = f"""
161
+ You're a cli assistant.
162
+
163
+ Instructions:
164
+
165
+ - You should use the provided bash execution tool to run script to complete objective.
166
+ - Do not use sudo. Do not use interactive commands.
167
+ - Ask user for confirmation before running anything major
168
+
169
+ System information:
170
+ - System: {uname_sysname}
171
+ - Machine: {uname_machine}
172
+ """
173
+
174
+ if not history:
175
+ history = [{"role": "system", "content": system}]
176
+ else:
177
+ if history[-1]["role"] == "tool":
178
+ waiting_for_assistant = True
179
+
180
+ client = OpenAI()
181
+
182
+ cost: float = 0
183
+ input_toks = 0
184
+ output_toks = 0
185
+ system_console = rich.console.Console(style="blue", highlight=False)
186
+ error_console = rich.console.Console(style="red", highlight=False)
187
+ user_console = rich.console.Console(style="bright_black", highlight=False)
188
+ assistant_console = rich.console.Console(style="white bold", highlight=False)
189
+
190
+ while True:
191
+ if cost > limit:
192
+ system_console.print(
193
+ f"\nCost limit exceeded. Current cost: {cost}, input tokens: {input_toks}, output tokens: {output_toks}"
194
+ )
195
+ break
196
+
197
+ if not waiting_for_assistant:
198
+ if first_message:
199
+ msg = first_message
200
+ first_message = ""
201
+ else:
202
+ msg = text_from_editor(user_console)
203
+
204
+ history.append(parse_user_message_special(msg))
205
+ else:
206
+ waiting_for_assistant = False
207
+
208
+ cost_, input_toks_ = get_input_cost(
209
+ config.cost_file[config.model], enc, history
210
+ )
211
+ cost += cost_
212
+ input_toks += input_toks_
213
+
214
+ stream = client.chat.completions.create(
215
+ messages=history,
216
+ model=config.model,
217
+ stream=True,
218
+ tools=tools,
219
+ )
220
+
221
+ system_console.print(
222
+ "\n---------------------------------------\n# Assistant response",
223
+ style="bold",
224
+ )
225
+ tool_call_args_by_id = DefaultDict[str, DefaultDict[int, str]](
226
+ lambda: DefaultDict(str)
227
+ )
228
+ _histories: History = []
229
+ item: ChatCompletionMessageParam
230
+ full_response: str = ""
231
+ image_histories: History = []
232
+ try:
233
+ for chunk in stream:
234
+ if chunk.choices[0].finish_reason == "tool_calls":
235
+ assert tool_call_args_by_id
236
+ item = {
237
+ "role": "assistant",
238
+ "content": full_response,
239
+ "tool_calls": [
240
+ {
241
+ "id": tool_call_id + str(toolindex),
242
+ "type": "function",
243
+ "function": {
244
+ "arguments": tool_args,
245
+ "name": type(which_tool(tool_args)).__name__,
246
+ },
247
+ }
248
+ for tool_call_id, toolcallargs in tool_call_args_by_id.items()
249
+ for toolindex, tool_args in toolcallargs.items()
250
+ ],
251
+ }
252
+ cost_, output_toks_ = get_output_cost(
253
+ config.cost_file[config.model], enc, item
254
+ )
255
+ cost += cost_
256
+ system_console.print(
257
+ f"\n---------------------------------------\n# Assistant invoked tools: {[which_tool(tool['function']['arguments']) for tool in item['tool_calls']]}"
258
+ )
259
+ system_console.print(f"\nTotal cost: {config.cost_unit}{cost:.3f}")
260
+ output_toks += output_toks_
261
+
262
+ _histories.append(item)
263
+ for tool_call_id, toolcallargs in tool_call_args_by_id.items():
264
+ for toolindex, tool_args in toolcallargs.items():
265
+ try:
266
+ output_or_done, cost_ = get_tool_output(
267
+ json.loads(tool_args),
268
+ enc,
269
+ limit - cost,
270
+ loop,
271
+ is_waiting_user_input,
272
+ )
273
+ except Exception as e:
274
+ output_or_done = (
275
+ f"GOT EXCEPTION while calling tool. Error: {e}"
276
+ )
277
+ tb = traceback.format_exc()
278
+ error_console.print(output_or_done + "\n" + tb)
279
+ cost_ = 0
280
+ cost += cost_
281
+ system_console.print(
282
+ f"\nTotal cost: {config.cost_unit}{cost:.3f}"
283
+ )
284
+
285
+ if isinstance(output_or_done, DoneFlag):
286
+ system_console.print(
287
+ f"\n# Task marked done, with output {output_or_done.task_output}",
288
+ )
289
+ system_console.print(
290
+ f"\nTotal cost: {config.cost_unit}{cost:.3f}"
291
+ )
292
+ return output_or_done.task_output, cost
293
+
294
+ output = output_or_done
295
+
296
+ if isinstance(output, ImageData):
297
+ randomId = petname.Generate(2, "-")
298
+ if not image_histories:
299
+ image_histories.extend([
300
+ {
301
+ 'role': 'assistant',
302
+ 'content': f'Share images with ids: {randomId}'
303
+
304
+ },
305
+ {
306
+ 'role': 'user',
307
+ 'content': [{
308
+ 'type': 'image_url',
309
+ 'image_url': {
310
+ 'url': output.dataurl,
311
+ 'detail': 'auto'
312
+ }
313
+ }]
314
+ }]
315
+ )
316
+ else:
317
+ image_histories[0]['content'] += ', ' + randomId
318
+ image_histories[1]["content"].append({ # type: ignore
319
+ 'type': 'image_url',
320
+ 'image_url': {
321
+ 'url': output.dataurl,
322
+ 'detail': 'auto'
323
+ }
324
+ })
325
+
326
+ item = {
327
+ "role": "tool",
328
+ "content": f'Ask user for image id: {randomId}',
329
+ "tool_call_id": tool_call_id + str(toolindex),
330
+ }
331
+ else:
332
+ item = {
333
+ "role": "tool",
334
+ "content": str(output),
335
+ "tool_call_id": tool_call_id + str(toolindex),
336
+ }
337
+ cost_, output_toks_ = get_output_cost(
338
+ config.cost_file[config.model], enc, item
339
+ )
340
+ cost += cost_
341
+ output_toks += output_toks_
342
+
343
+ _histories.append(item)
344
+ waiting_for_assistant = True
345
+ break
346
+ elif chunk.choices[0].finish_reason:
347
+ assistant_console.print("")
348
+ item = {
349
+ "role": "assistant",
350
+ "content": full_response,
351
+ }
352
+ cost_, output_toks_ = get_output_cost(
353
+ config.cost_file[config.model], enc, item
354
+ )
355
+ cost += cost_
356
+ output_toks += output_toks_
357
+
358
+ system_console.print(f"\nTotal cost: {config.cost_unit}{cost:.3f}")
359
+ _histories.append(item)
360
+ break
361
+
362
+ if chunk.choices[0].delta.tool_calls:
363
+ tool_call = chunk.choices[0].delta.tool_calls[0]
364
+ if tool_call.function and tool_call.function.arguments:
365
+ tool_call_args_by_id[tool_call.id or ""][tool_call.index] += (
366
+ tool_call.function.arguments
367
+ )
368
+
369
+ chunk_str = chunk.choices[0].delta.content or ""
370
+ assistant_console.print(chunk_str, end="")
371
+ full_response += chunk_str
372
+ except KeyboardInterrupt:
373
+ waiting_for_assistant = False
374
+ input("Interrupted...enter to redo the current turn")
375
+ else:
376
+ history.extend(_histories)
377
+ history.extend(image_histories)
378
+ save_history(history, session_id)
379
+
380
+ return "Couldn't finish the task", cost
381
+
382
+
383
+ if __name__ == "__main__":
384
+ app()