wcgw 2.8.6__py3-none-any.whl → 2.8.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

@@ -0,0 +1,467 @@
1
+ import base64
2
+ import json
3
+ import mimetypes
4
+ import os
5
+ import subprocess
6
+ import tempfile
7
+ import traceback
8
+ import uuid
9
+ from pathlib import Path
10
+ from typing import DefaultDict, Optional, cast
11
+
12
+ import openai
13
+ import petname # type: ignore[import-untyped]
14
+ import rich
15
+ import tokenizers # type: ignore[import-untyped]
16
+ from dotenv import load_dotenv
17
+ from openai import OpenAI
18
+ from openai.types.chat import (
19
+ ChatCompletionContentPartParam,
20
+ ChatCompletionMessageParam,
21
+ ChatCompletionUserMessageParam,
22
+ )
23
+ from pydantic import BaseModel
24
+ from typer import Typer
25
+
26
+ from wcgw.client.common import CostData, History, Models, discard_input
27
+ from wcgw.client.memory import load_memory
28
+ from wcgw.client.tools import (
29
+ DoneFlag,
30
+ ImageData,
31
+ default_enc,
32
+ get_tool_output,
33
+ initialize,
34
+ which_tool,
35
+ )
36
+ from wcgw.types_ import (
37
+ BashCommand,
38
+ BashInteraction,
39
+ ContextSave,
40
+ FileEdit,
41
+ ReadFiles,
42
+ ReadImage,
43
+ ResetShell,
44
+ WriteIfEmpty,
45
+ )
46
+
47
+ from .openai_utils import get_input_cost, get_output_cost
48
+
49
+
50
+ class Config(BaseModel):
51
+ model: Models
52
+ cost_limit: float
53
+ cost_file: dict[Models, CostData]
54
+ cost_unit: str = "$"
55
+
56
+
57
+ def text_from_editor(console: rich.console.Console) -> str:
58
+ # First consume all the input till now
59
+ discard_input()
60
+ console.print("\n---------------------------------------\n# User message")
61
+ data = input()
62
+ if data:
63
+ return data
64
+ editor = os.environ.get("EDITOR", "vim")
65
+ with tempfile.NamedTemporaryFile(suffix=".tmp") as tf:
66
+ subprocess.run([editor, tf.name], check=True)
67
+ with open(tf.name, "r") as f:
68
+ data = f.read()
69
+ console.print(data)
70
+ return data
71
+
72
+
73
+ def save_history(history: History, session_id: str) -> None:
74
+ myid = str(history[1]["content"]).replace("/", "_").replace(" ", "_").lower()[:60]
75
+ myid += "_" + session_id
76
+ myid = myid + ".json"
77
+
78
+ mypath = Path(".wcgw") / myid
79
+ mypath.parent.mkdir(parents=True, exist_ok=True)
80
+ with open(mypath, "w") as f:
81
+ json.dump(history, f, indent=3)
82
+
83
+
84
+ def parse_user_message_special(msg: str) -> ChatCompletionUserMessageParam:
85
+ # Search for lines starting with `%` and treat them as special commands
86
+ parts: list[ChatCompletionContentPartParam] = []
87
+ for line in msg.split("\n"):
88
+ if line.startswith("%"):
89
+ args = line[1:].strip().split(" ")
90
+ command = args[0]
91
+ assert command == "image"
92
+ image_path = " ".join(args[1:])
93
+ with open(image_path, "rb") as f:
94
+ image_bytes = f.read()
95
+ image_b64 = base64.b64encode(image_bytes).decode("utf-8")
96
+ image_type = mimetypes.guess_type(image_path)[0]
97
+ dataurl = f"data:{image_type};base64,{image_b64}"
98
+ parts.append(
99
+ {"type": "image_url", "image_url": {"url": dataurl, "detail": "auto"}}
100
+ )
101
+ else:
102
+ if len(parts) > 0 and parts[-1]["type"] == "text":
103
+ parts[-1]["text"] += "\n" + line
104
+ else:
105
+ parts.append({"type": "text", "text": line})
106
+ return {"role": "user", "content": parts}
107
+
108
+
109
+ app = Typer(pretty_exceptions_show_locals=False)
110
+
111
+
112
+ @app.command()
113
+ def loop(
114
+ first_message: Optional[str] = None,
115
+ limit: Optional[float] = None,
116
+ resume: Optional[str] = None,
117
+ computer_use: bool = False,
118
+ ) -> tuple[str, float]:
119
+ load_dotenv()
120
+
121
+ session_id = str(uuid.uuid4())[:6]
122
+
123
+ history: History = []
124
+ waiting_for_assistant = False
125
+
126
+ memory = None
127
+ if resume:
128
+ try:
129
+ _, memory, _ = load_memory(
130
+ resume,
131
+ 8000,
132
+ lambda x: default_enc.encode(x).ids,
133
+ lambda x: default_enc.decode(x),
134
+ )
135
+ except OSError:
136
+ if resume == "latest":
137
+ resume_path = sorted(Path(".wcgw").iterdir(), key=os.path.getmtime)[-1]
138
+ else:
139
+ resume_path = Path(resume)
140
+ if not resume_path.exists():
141
+ raise FileNotFoundError(f"File {resume} not found")
142
+ with resume_path.open() as f:
143
+ history = json.load(f)
144
+ if len(history) <= 2:
145
+ raise ValueError("Invalid history file")
146
+ first_message = ""
147
+ waiting_for_assistant = history[-1]["role"] != "assistant"
148
+
149
+ my_dir = os.path.dirname(__file__)
150
+
151
+ config = Config(
152
+ model=cast(Models, os.getenv("OPENAI_MODEL", "gpt-4o-2024-08-06").lower()),
153
+ cost_limit=0.1,
154
+ cost_unit="$",
155
+ cost_file={
156
+ "gpt-4o-2024-08-06": CostData(
157
+ cost_per_1m_input_tokens=5, cost_per_1m_output_tokens=15
158
+ ),
159
+ },
160
+ )
161
+
162
+ if limit is not None:
163
+ config.cost_limit = limit
164
+ limit = config.cost_limit
165
+
166
+ enc = tokenizers.Tokenizer.from_pretrained("Xenova/gpt-4o")
167
+
168
+ tools = [
169
+ openai.pydantic_function_tool(
170
+ BashCommand,
171
+ description="""
172
+ - Execute a bash command. This is stateful (beware with subsequent calls).
173
+ - Do not use interactive commands like nano. Prefer writing simpler commands.
174
+ - Status of the command and the current working directory will always be returned at the end.
175
+ - Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands.
176
+ - The first or the last line might be `(...truncated)` if the output is too long.
177
+ - Always run `pwd` if you get any file or directory not found error to make sure you're not lost.
178
+ - The control will return to you in 5 seconds regardless of the status. For heavy commands, keep checking status using BashInteraction till they are finished.
179
+ - Run long running commands in background using screen instead of "&".
180
+ - Do not use 'cat' to read files, use ReadFiles tool instead.
181
+ """,
182
+ ),
183
+ openai.pydantic_function_tool(
184
+ BashInteraction,
185
+ description="""
186
+ - Interact with running program using this tool
187
+ - Special keys like arrows, interrupts, enter, etc.
188
+ - Send text input to the running program.
189
+ - Send send_specials=["Enter"] to recheck status of a running program.
190
+ - Only one of send_text, send_specials, send_ascii should be provided.""",
191
+ ),
192
+ openai.pydantic_function_tool(
193
+ ReadFiles,
194
+ description="""
195
+ - Read full file content of one or more files.
196
+ - Provide absolute file paths only
197
+ """,
198
+ ),
199
+ openai.pydantic_function_tool(
200
+ WriteIfEmpty,
201
+ description="""
202
+ - Write content to an empty or non-existent file. Provide file path and content. Use this instead of BashCommand for writing new files.
203
+ - Provide absolute file path only.
204
+ - For editing existing files, use FileEdit instead of this tool.""",
205
+ ),
206
+ openai.pydantic_function_tool(
207
+ FileEdit,
208
+ description="""
209
+ - Use absolute file path only.
210
+ - Use ONLY SEARCH/REPLACE blocks to edit the file.
211
+ - file_edit_using_search_replace_blocks should start with <<<<<<< SEARCH
212
+ """,
213
+ ),
214
+ openai.pydantic_function_tool(
215
+ ReadImage, description="Read an image from the shell."
216
+ ),
217
+ openai.pydantic_function_tool(
218
+ ResetShell,
219
+ description="Resets the shell. Use only if all interrupts and prompt reset attempts have failed repeatedly.",
220
+ ),
221
+ openai.pydantic_function_tool(
222
+ ContextSave,
223
+ description="""
224
+
225
+ Saves provided description and file contents of all the relevant file paths or globs in a single text file.
226
+ - Provide random unqiue id or whatever user provided.
227
+ - Leave project path as empty string if no project path""",
228
+ ),
229
+ ]
230
+
231
+ system = initialize(
232
+ os.getcwd(),
233
+ [],
234
+ resume if (memory and resume) else "",
235
+ max_tokens=8000,
236
+ mode="wcgw",
237
+ )
238
+
239
+ with open(
240
+ os.path.join(
241
+ os.path.dirname(__file__), "..", "wcgw", "client", "diff-instructions.txt"
242
+ )
243
+ ) as f:
244
+ system += f.read()
245
+
246
+ if not history:
247
+ history = [{"role": "system", "content": system}]
248
+ else:
249
+ if history[-1]["role"] == "tool":
250
+ waiting_for_assistant = True
251
+
252
+ client = OpenAI()
253
+
254
+ cost: float = 0
255
+ input_toks = 0
256
+ output_toks = 0
257
+ system_console = rich.console.Console(style="blue", highlight=False, markup=False)
258
+ error_console = rich.console.Console(style="red", highlight=False, markup=False)
259
+ user_console = rich.console.Console(
260
+ style="bright_black", highlight=False, markup=False
261
+ )
262
+ assistant_console = rich.console.Console(
263
+ style="white bold", highlight=False, markup=False
264
+ )
265
+ while True:
266
+ if cost > limit:
267
+ system_console.print(
268
+ f"\nCost limit exceeded. Current cost: {cost}, input tokens: {input_toks}, output tokens: {output_toks}"
269
+ )
270
+ break
271
+
272
+ if not waiting_for_assistant:
273
+ if first_message:
274
+ msg = first_message
275
+ first_message = ""
276
+ else:
277
+ msg = text_from_editor(user_console)
278
+
279
+ history.append(parse_user_message_special(msg))
280
+ else:
281
+ waiting_for_assistant = False
282
+
283
+ cost_, input_toks_ = get_input_cost(
284
+ config.cost_file[config.model], enc, history
285
+ )
286
+ cost += cost_
287
+ input_toks += input_toks_
288
+
289
+ stream = client.chat.completions.create(
290
+ messages=history,
291
+ model=config.model,
292
+ stream=True,
293
+ tools=tools,
294
+ )
295
+
296
+ system_console.print(
297
+ "\n---------------------------------------\n# Assistant response",
298
+ style="bold",
299
+ )
300
+ tool_call_args_by_id = DefaultDict[str, DefaultDict[int, str]](
301
+ lambda: DefaultDict(str)
302
+ )
303
+ _histories: History = []
304
+ item: ChatCompletionMessageParam
305
+ full_response: str = ""
306
+ image_histories: History = []
307
+ try:
308
+ for chunk in stream:
309
+ if chunk.choices[0].finish_reason == "tool_calls":
310
+ assert tool_call_args_by_id
311
+ item = {
312
+ "role": "assistant",
313
+ "content": full_response,
314
+ "tool_calls": [
315
+ {
316
+ "id": tool_call_id + str(toolindex),
317
+ "type": "function",
318
+ "function": {
319
+ "arguments": tool_args,
320
+ "name": type(which_tool(tool_args)).__name__,
321
+ },
322
+ }
323
+ for tool_call_id, toolcallargs in tool_call_args_by_id.items()
324
+ for toolindex, tool_args in toolcallargs.items()
325
+ ],
326
+ }
327
+ cost_, output_toks_ = get_output_cost(
328
+ config.cost_file[config.model], enc, item
329
+ )
330
+ cost += cost_
331
+ system_console.print(
332
+ f"\n---------------------------------------\n# Assistant invoked tools: {[which_tool(tool['function']['arguments']) for tool in item['tool_calls']]}"
333
+ )
334
+ system_console.print(f"\nTotal cost: {config.cost_unit}{cost:.3f}")
335
+ output_toks += output_toks_
336
+
337
+ _histories.append(item)
338
+ for tool_call_id, toolcallargs in tool_call_args_by_id.items():
339
+ for toolindex, tool_args in toolcallargs.items():
340
+ try:
341
+ output_or_dones, cost_ = get_tool_output(
342
+ json.loads(tool_args),
343
+ enc,
344
+ limit - cost,
345
+ loop,
346
+ max_tokens=8000,
347
+ )
348
+ output_or_done = output_or_dones[0]
349
+ except Exception as e:
350
+ output_or_done = (
351
+ f"GOT EXCEPTION while calling tool. Error: {e}"
352
+ )
353
+ tb = traceback.format_exc()
354
+ error_console.print(output_or_done + "\n" + tb)
355
+ cost_ = 0
356
+ cost += cost_
357
+ system_console.print(
358
+ f"\nTotal cost: {config.cost_unit}{cost:.3f}"
359
+ )
360
+
361
+ if isinstance(output_or_done, DoneFlag):
362
+ system_console.print(
363
+ f"\n# Task marked done, with output {output_or_done.task_output}",
364
+ )
365
+ system_console.print(
366
+ f"\nTotal cost: {config.cost_unit}{cost:.3f}"
367
+ )
368
+ return output_or_done.task_output, cost
369
+
370
+ output = output_or_done
371
+
372
+ if isinstance(output, ImageData):
373
+ randomId = petname.Generate(2, "-")
374
+ if not image_histories:
375
+ image_histories.extend(
376
+ [
377
+ {
378
+ "role": "assistant",
379
+ "content": f"Share images with ids: {randomId}",
380
+ },
381
+ {
382
+ "role": "user",
383
+ "content": [
384
+ {
385
+ "type": "image_url",
386
+ "image_url": {
387
+ "url": output.dataurl,
388
+ "detail": "auto",
389
+ },
390
+ }
391
+ ],
392
+ },
393
+ ]
394
+ )
395
+ else:
396
+ image_histories[0]["content"] += ", " + randomId
397
+ second_content = image_histories[1]["content"]
398
+ assert isinstance(second_content, list)
399
+ second_content.append(
400
+ {
401
+ "type": "image_url",
402
+ "image_url": {
403
+ "url": output.dataurl,
404
+ "detail": "auto",
405
+ },
406
+ }
407
+ )
408
+
409
+ item = {
410
+ "role": "tool",
411
+ "content": f"Ask user for image id: {randomId}",
412
+ "tool_call_id": tool_call_id + str(toolindex),
413
+ }
414
+ else:
415
+ item = {
416
+ "role": "tool",
417
+ "content": str(output),
418
+ "tool_call_id": tool_call_id + str(toolindex),
419
+ }
420
+ cost_, output_toks_ = get_output_cost(
421
+ config.cost_file[config.model], enc, item
422
+ )
423
+ cost += cost_
424
+ output_toks += output_toks_
425
+
426
+ _histories.append(item)
427
+ waiting_for_assistant = True
428
+ break
429
+ elif chunk.choices[0].finish_reason:
430
+ assistant_console.print("")
431
+ item = {
432
+ "role": "assistant",
433
+ "content": full_response,
434
+ }
435
+ cost_, output_toks_ = get_output_cost(
436
+ config.cost_file[config.model], enc, item
437
+ )
438
+ cost += cost_
439
+ output_toks += output_toks_
440
+
441
+ system_console.print(f"\nTotal cost: {config.cost_unit}{cost:.3f}")
442
+ _histories.append(item)
443
+ break
444
+
445
+ if chunk.choices[0].delta.tool_calls:
446
+ tool_call = chunk.choices[0].delta.tool_calls[0]
447
+ if tool_call.function and tool_call.function.arguments:
448
+ tool_call_args_by_id[tool_call.id or ""][tool_call.index] += (
449
+ tool_call.function.arguments
450
+ )
451
+
452
+ chunk_str = chunk.choices[0].delta.content or ""
453
+ assistant_console.print(chunk_str, end="")
454
+ full_response += chunk_str
455
+ except KeyboardInterrupt:
456
+ waiting_for_assistant = False
457
+ input("Interrupted...enter to redo the current turn")
458
+ else:
459
+ history.extend(_histories)
460
+ history.extend(image_histories)
461
+ save_history(history, session_id)
462
+
463
+ return "Couldn't finish the task", cost
464
+
465
+
466
+ if __name__ == "__main__":
467
+ app()
@@ -0,0 +1,67 @@
1
+ from typing import cast
2
+
3
+ from openai.types.chat import (
4
+ ChatCompletionAssistantMessageParam,
5
+ ChatCompletionMessage,
6
+ ChatCompletionMessageParam,
7
+ ParsedChatCompletionMessage,
8
+ )
9
+ from tokenizers import Tokenizer # type: ignore[import-untyped]
10
+
11
+ from wcgw.client.common import CostData, History
12
+
13
+
14
+ def get_input_cost(
15
+ cost_map: CostData, enc: Tokenizer, history: History
16
+ ) -> tuple[float, int]:
17
+ input_tokens = 0
18
+ for msg in history:
19
+ content = msg["content"]
20
+ refusal = msg.get("refusal")
21
+ if isinstance(content, list):
22
+ for part in content:
23
+ if "text" in part:
24
+ input_tokens += len(enc.encode(part["text"]))
25
+ elif content is None:
26
+ if refusal is None:
27
+ raise ValueError("Expected content or refusal to be present")
28
+ input_tokens += len(enc.encode(str(refusal)))
29
+ elif not isinstance(content, str):
30
+ raise ValueError(f"Expected content to be string, got {type(content)}")
31
+ else:
32
+ input_tokens += len(enc.encode(content))
33
+ cost = input_tokens * cost_map.cost_per_1m_input_tokens / 1_000_000
34
+ return cost, input_tokens
35
+
36
+
37
+ def get_output_cost(
38
+ cost_map: CostData,
39
+ enc: Tokenizer,
40
+ item: ChatCompletionMessage | ChatCompletionMessageParam,
41
+ ) -> tuple[float, int]:
42
+ if isinstance(item, ChatCompletionMessage):
43
+ content = item.content
44
+ if not isinstance(content, str):
45
+ raise ValueError(f"Expected content to be string, got {type(content)}")
46
+ else:
47
+ if not isinstance(item["content"], str):
48
+ raise ValueError(
49
+ f"Expected content to be string, got {type(item['content'])}"
50
+ )
51
+ content = item["content"]
52
+ if item["role"] == "tool":
53
+ return 0, 0
54
+ output_tokens = len(enc.encode(content))
55
+
56
+ if "tool_calls" in item:
57
+ item = cast(ChatCompletionAssistantMessageParam, item)
58
+ toolcalls = item["tool_calls"]
59
+ for tool_call in toolcalls or []:
60
+ output_tokens += len(enc.encode(tool_call["function"]["arguments"]))
61
+ elif isinstance(item, ParsedChatCompletionMessage):
62
+ if item.tool_calls:
63
+ for tool_callf in item.tool_calls:
64
+ output_tokens += len(enc.encode(tool_callf.function.arguments))
65
+
66
+ cost = output_tokens * cost_map.cost_per_1m_output_tokens / 1_000_000
67
+ return cost, output_tokens
File without changes