wcgw 0.0.5__tar.gz → 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcgw might be problematic. Click here for more details.

@@ -0,0 +1 @@
1
+ 3.12
@@ -1,10 +1,11 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: wcgw
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: What could go wrong giving full shell access to chatgpt?
5
5
  Project-URL: Homepage, https://github.com/rusiaaman/wcgw
6
6
  Author-email: Aman Rusia <gapypi@arcfu.com>
7
- Requires-Python: >=3.7
7
+ Requires-Python: >=3.8
8
+ Requires-Dist: anthropic>=0.36.2
8
9
  Requires-Dist: fastapi>=0.115.0
9
10
  Requires-Dist: mypy>=1.11.2
10
11
  Requires-Dist: openai>=1.46.0
@@ -22,10 +23,18 @@ Requires-Dist: uvicorn>=0.31.0
22
23
  Requires-Dist: websockets>=13.1
23
24
  Description-Content-Type: text/markdown
24
25
 
25
- # Giving full shell access to Chatgpt web (What could go wrong?)
26
- Steps:
27
- 1. First run the client in any directory of choice
28
- 2. Use this custom gpt `https://chatgpt.com/g/g-Us0AAXkRh-wcgw-giving-shell-access` to let it interact with your shell.
26
+ # Shell access to chatgpt.com
27
+
28
+ ### 🚀 Highlights
29
+ - **Full Shell Access**: No restrictions, complete control.
30
+ - ⚡ **Create, Execute, Iterate**: Seamless workflow for development and execution.
31
+ - ⚡ **Interactive Command Handling**: Supports interactive commands with ease.
32
+
33
+
34
+ ### 🪜 Steps:
35
+ 1. Run the [cli client](https://github.com/rusiaaman/wcgw?tab=readme-ov-file#client) in any directory of choice.
36
+ 2. Share the generated id with the GPT: `https://chatgpt.com/g/g-Us0AAXkRh-wcgw-giving-shell-access`
37
+ 3. The custom GPT can now run any command on your cli
29
38
 
30
39
  ## Client
31
40
 
@@ -49,7 +58,7 @@ https://chatgpt.com/g/g-Us0AAXkRh-wcgw-giving-shell-access
49
58
 
50
59
  Add user id the client generated to the first message along with the instructions.
51
60
 
52
- # How does it work?
61
+ # How it works
53
62
  Your commands are relayed through a server I've hosted at https://wcgw.arcfu.com. The code for that is at `src/relay/serve.py`.
54
63
 
55
64
  The user id that you share with chatgpt is added in the request it sents to the relay server which holds a websocket with the terminal client.
@@ -1,7 +1,15 @@
1
- # Giving full shell access to Chatgpt web (What could go wrong?)
2
- Steps:
3
- 1. First run the client in any directory of choice
4
- 2. Use this custom gpt `https://chatgpt.com/g/g-Us0AAXkRh-wcgw-giving-shell-access` to let it interact with your shell.
1
+ # Shell access to chatgpt.com
2
+
3
+ ### 🚀 Highlights
4
+ - **Full Shell Access**: No restrictions, complete control.
5
+ - ⚡ **Create, Execute, Iterate**: Seamless workflow for development and execution.
6
+ - ⚡ **Interactive Command Handling**: Supports interactive commands with ease.
7
+
8
+
9
+ ### 🪜 Steps:
10
+ 1. Run the [cli client](https://github.com/rusiaaman/wcgw?tab=readme-ov-file#client) in any directory of choice.
11
+ 2. Share the generated id with the GPT: `https://chatgpt.com/g/g-Us0AAXkRh-wcgw-giving-shell-access`
12
+ 3. The custom GPT can now run any command on your cli
5
13
 
6
14
  ## Client
7
15
 
@@ -25,7 +33,7 @@ https://chatgpt.com/g/g-Us0AAXkRh-wcgw-giving-shell-access
25
33
 
26
34
  Add user id the client generated to the first message along with the instructions.
27
35
 
28
- # How does it work?
36
+ # How it works
29
37
  Your commands are relayed through a server I've hosted at https://wcgw.arcfu.com. The code for that is at `src/relay/serve.py`.
30
38
 
31
39
  The user id that you share with chatgpt is added in the request it sents to the relay server which holds a websocket with the terminal client.
@@ -1,10 +1,10 @@
1
1
  [project]
2
2
  authors = [{ name = "Aman Rusia", email = "gapypi@arcfu.com" }]
3
3
  name = "wcgw"
4
- version = "0.0.5"
4
+ version = "0.0.7"
5
5
  description = "What could go wrong giving full shell access to chatgpt?"
6
6
  readme = "README.md"
7
- requires-python = ">=3.7"
7
+ requires-python = ">=3.8"
8
8
  dependencies = [
9
9
  "openai>=1.46.0",
10
10
  "mypy>=1.11.2",
@@ -21,6 +21,7 @@ dependencies = [
21
21
  "fastapi>=0.115.0",
22
22
  "uvicorn>=0.31.0",
23
23
  "websockets>=13.1",
24
+ "anthropic>=0.36.2",
24
25
  ]
25
26
 
26
27
  [project.urls]
@@ -37,8 +38,8 @@ wcgw = "wcgw:listen"
37
38
 
38
39
  [tool.uv]
39
40
  dev-dependencies = [
40
- "ipython>=8.27.0",
41
41
  "mypy>=1.11.2",
42
42
  "types-toml>=0.10.8.20240310",
43
43
  "autoflake",
44
+ "ipython>=8.12.3",
44
45
  ]
@@ -1,7 +1,8 @@
1
1
  import asyncio
2
+ import base64
2
3
  import threading
3
4
  import time
4
- from typing import Callable, Coroutine, Literal, Optional, Sequence
5
+ from typing import Any, Callable, Coroutine, DefaultDict, Literal, Optional, Sequence
5
6
  from uuid import UUID
6
7
  import fastapi
7
8
  from fastapi import WebSocket, WebSocketDisconnect
@@ -36,6 +37,30 @@ clients: dict[UUID, Callable[[Mdata], Coroutine[None, None, None]]] = {}
36
37
  websockets: dict[UUID, WebSocket] = {}
37
38
  gpts: dict[UUID, Callable[[str], None]] = {}
38
39
 
40
+ images: DefaultDict[UUID, dict[str, dict[str, Any]]] = DefaultDict(dict)
41
+
42
+
43
+ @app.websocket("/register_serve_image/{uuid}")
44
+ async def register_serve_image(websocket: WebSocket, uuid: UUID) -> None:
45
+ raise Exception("Disabled")
46
+ await websocket.accept()
47
+ received_data = await websocket.receive_json()
48
+ name = received_data["name"]
49
+ image_b64 = received_data["image_b64"]
50
+ image_bytes = base64.b64decode(image_b64)
51
+ images[uuid][name] = {
52
+ "content": image_bytes,
53
+ "media_type": received_data["media_type"],
54
+ }
55
+
56
+
57
+ @app.get("/get_image/{uuid}/{name}")
58
+ async def get_image(uuid: UUID, name: str) -> fastapi.responses.Response:
59
+ return fastapi.responses.Response(
60
+ content=images[uuid][name]["content"],
61
+ media_type=images[uuid][name]["media_type"],
62
+ )
63
+
39
64
 
40
65
  @app.websocket("/register/{uuid}")
41
66
  async def register_websocket(websocket: WebSocket, uuid: UUID) -> None:
@@ -61,9 +86,34 @@ async def register_websocket(websocket: WebSocket, uuid: UUID) -> None:
61
86
  print(f"Client {uuid} disconnected")
62
87
 
63
88
 
64
- @app.post("/action")
65
- async def chatgpt_server(json_data: Mdata) -> str:
66
- user_id = json_data.user_id
89
+ @app.post("/write_file")
90
+ async def write_file(write_file_data: Writefile, user_id: UUID) -> str:
91
+ if user_id not in clients:
92
+ raise fastapi.HTTPException(
93
+ status_code=404, detail="User with the provided id not found"
94
+ )
95
+
96
+ results: Optional[str] = None
97
+
98
+ def put_results(result: str) -> None:
99
+ nonlocal results
100
+ results = result
101
+
102
+ gpts[user_id] = put_results
103
+
104
+ await clients[user_id](Mdata(data=write_file_data, user_id=user_id))
105
+
106
+ start_time = time.time()
107
+ while time.time() - start_time < 30:
108
+ if results is not None:
109
+ return results
110
+ await asyncio.sleep(0.1)
111
+
112
+ raise fastapi.HTTPException(status_code=500, detail="Timeout error")
113
+
114
+
115
+ @app.post("/execute_bash")
116
+ async def execute_bash(excute_bash_data: ExecuteBash, user_id: UUID) -> str:
67
117
  if user_id not in clients:
68
118
  raise fastapi.HTTPException(
69
119
  status_code=404, detail="User with the provided id not found"
@@ -77,7 +127,7 @@ async def chatgpt_server(json_data: Mdata) -> str:
77
127
 
78
128
  gpts[user_id] = put_results
79
129
 
80
- await clients[user_id](json_data)
130
+ await clients[user_id](Mdata(data=excute_bash_data, user_id=user_id))
81
131
 
82
132
  start_time = time.time()
83
133
  while time.time() - start_time < 30:
@@ -1,4 +1,6 @@
1
+ import base64
1
2
  import json
3
+ import mimetypes
2
4
  from pathlib import Path
3
5
  import sys
4
6
  import traceback
@@ -8,17 +10,21 @@ from openai import OpenAI
8
10
  from openai.types.chat import (
9
11
  ChatCompletionMessageParam,
10
12
  ChatCompletionAssistantMessageParam,
13
+ ChatCompletionUserMessageParam,
14
+ ChatCompletionContentPartParam,
11
15
  ChatCompletionMessage,
12
16
  ParsedChatCompletionMessage,
13
17
  )
14
18
  import rich
19
+ import petname
15
20
  from typer import Typer
16
21
  import uuid
17
22
 
18
- from .common import Models, discard_input
19
- from .common import CostData, History
23
+ from wcgw.common import Config, text_from_editor
24
+
25
+ from .common import Models
20
26
  from .openai_utils import get_input_cost, get_output_cost
21
- from .tools import ExecuteBash, GetShellOutputLastCommand
27
+ from .tools import ExecuteBash, ReadImage, ImageData
22
28
 
23
29
  from .tools import (
24
30
  BASH_CLF_OUTPUT,
@@ -34,40 +40,14 @@ from .tools import (
34
40
  import tiktoken
35
41
 
36
42
  from urllib import parse
37
- import subprocess
38
43
  import os
39
- import tempfile
40
44
 
41
45
  import toml
42
- from pydantic import BaseModel
43
46
 
44
47
 
45
48
  from dotenv import load_dotenv
46
49
 
47
-
48
- class Config(BaseModel):
49
- model: Models
50
- secondary_model: Models
51
- cost_limit: float
52
- cost_file: dict[Models, CostData]
53
- cost_unit: str = "$"
54
-
55
-
56
- def text_from_editor(console: rich.console.Console) -> str:
57
- # First consume all the input till now
58
- discard_input()
59
- console.print("\n---------------------------------------\n# User message")
60
- data = input()
61
- if data:
62
- return data
63
- editor = os.environ.get("EDITOR", "vim")
64
- with tempfile.NamedTemporaryFile(suffix=".tmp") as tf:
65
- subprocess.run([editor, tf.name], check=True)
66
- with open(tf.name, "r") as f:
67
- data = f.read()
68
- console.print(data)
69
- return data
70
-
50
+ History = list[ChatCompletionMessageParam]
71
51
 
72
52
  def save_history(history: History, session_id: str) -> None:
73
53
  myid = str(history[1]["content"]).replace("/", "_").replace(" ", "_").lower()[:60]
@@ -80,6 +60,38 @@ def save_history(history: History, session_id: str) -> None:
80
60
  json.dump(history, f, indent=3)
81
61
 
82
62
 
63
+ def parse_user_message_special(msg: str) -> ChatCompletionUserMessageParam:
64
+ # Search for lines starting with `%` and treat them as special commands
65
+ parts: list[ChatCompletionContentPartParam] = []
66
+ for line in msg.split("\n"):
67
+ if line.startswith("%"):
68
+ args = line[1:].strip().split(" ")
69
+ command = args[0]
70
+ assert command == 'image'
71
+ image_path = args[1]
72
+ with open(image_path, 'rb') as f:
73
+ image_bytes = f.read()
74
+ image_b64 = base64.b64encode(image_bytes).decode("utf-8")
75
+ image_type = mimetypes.guess_type(image_path)[0]
76
+ dataurl=f'data:{image_type};base64,{image_b64}'
77
+ parts.append({
78
+ 'type': 'image_url',
79
+ 'image_url': {
80
+ 'url': dataurl,
81
+ 'detail': 'auto'
82
+ }
83
+ })
84
+ else:
85
+ if len(parts) > 0 and parts[-1]['type'] == 'text':
86
+ parts[-1]['text'] += '\n' + line
87
+ else:
88
+ parts.append({'type': 'text', 'text': line})
89
+ return {
90
+ 'role': 'user',
91
+ 'content': parts
92
+ }
93
+
94
+
83
95
  app = Typer(pretty_exceptions_show_locals=False)
84
96
 
85
97
 
@@ -94,6 +106,7 @@ def loop(
94
106
  session_id = str(uuid.uuid4())[:6]
95
107
 
96
108
  history: History = []
109
+ waiting_for_assistant = False
97
110
  if resume:
98
111
  if resume == "latest":
99
112
  resume_path = sorted(Path(".wcgw").iterdir(), key=os.path.getmtime)[-1]
@@ -108,6 +121,7 @@ def loop(
108
121
  if history[1]["role"] != "user":
109
122
  raise ValueError("Invalid history file, second message should be user")
110
123
  first_message = ""
124
+ waiting_for_assistant = history[-1]['role'] != 'assistant'
111
125
 
112
126
  my_dir = os.path.dirname(__file__)
113
127
  config_file = os.path.join(my_dir, "..", "..", "config.toml")
@@ -134,19 +148,18 @@ Execute a bash script. Stateful (beware with subsequent calls).
134
148
  Execute commands using `execute_command` attribute.
135
149
  Do not use interactive commands like nano. Prefer writing simpler commands.
136
150
  Last line will always be `(exit <int code>)` except if
137
- the last line is `(waiting for input)` which will be the case if you've run any interactive command (which you shouldn't run) by mistake. You can then send input using `send_ascii` attributes.
151
+ the last line is `(pending)` if the program is still running or waiting for user inputs. You can then send input using `send_ascii` attributes. You get status by sending `send_ascii: [10]`.
138
152
  Optionally the last line is `(won't exit)` in which case you need to kill the process if you want to run a new command.
139
153
  Optionally `exit shell has restarted` is the output, in which case environment resets, you can run fresh commands.
140
154
  The first line might be `(...truncated)` if the output is too long.""",
141
155
  ),
142
- openai.pydantic_function_tool(
143
- GetShellOutputLastCommand,
144
- description="Get output of the last command run in the shell. Use this in case you want to know status of a running program.",
145
- ),
146
156
  openai.pydantic_function_tool(
147
157
  Writefile,
148
158
  description="Write content to a file. Provide file path and content. Use this instead of ExecuteBash for writing files.",
149
159
  ),
160
+ openai.pydantic_function_tool(
161
+ ReadImage, description="Read an image from the shell."
162
+ ),
150
163
  ]
151
164
  uname_sysname = os.uname().sysname
152
165
  uname_machine = os.uname().machine
@@ -165,12 +178,11 @@ System information:
165
178
  - Machine: {uname_machine}
166
179
  """
167
180
 
168
- has_tool_output = False
169
181
  if not history:
170
182
  history = [{"role": "system", "content": system}]
171
183
  else:
172
184
  if history[-1]["role"] == "tool":
173
- has_tool_output = True
185
+ waiting_for_assistant = True
174
186
 
175
187
  client = OpenAI()
176
188
 
@@ -189,16 +201,16 @@ System information:
189
201
  )
190
202
  break
191
203
 
192
- if not has_tool_output:
204
+ if not waiting_for_assistant:
193
205
  if first_message:
194
206
  msg = first_message
195
207
  first_message = ""
196
208
  else:
197
209
  msg = text_from_editor(user_console)
198
210
 
199
- history.append({"role": "user", "content": msg})
211
+ history.append(parse_user_message_special(msg))
200
212
  else:
201
- has_tool_output = False
213
+ waiting_for_assistant = False
202
214
 
203
215
  cost_, input_toks_ = get_input_cost(
204
216
  config.cost_file[config.model], enc, history
@@ -223,6 +235,7 @@ System information:
223
235
  _histories: History = []
224
236
  item: ChatCompletionMessageParam
225
237
  full_response: str = ""
238
+ image_histories: History = []
226
239
  try:
227
240
  for chunk in stream:
228
241
  if chunk.choices[0].finish_reason == "tool_calls":
@@ -236,7 +249,7 @@ System information:
236
249
  "type": "function",
237
250
  "function": {
238
251
  "arguments": tool_args,
239
- "name": "execute_bash",
252
+ "name": type(which_tool(tool_args)).__name__,
240
253
  },
241
254
  }
242
255
  for tool_call_id, toolcallargs in tool_call_args_by_id.items()
@@ -252,7 +265,7 @@ System information:
252
265
  )
253
266
  system_console.print(f"\nTotal cost: {config.cost_unit}{cost:.3f}")
254
267
  output_toks += output_toks_
255
-
268
+
256
269
  _histories.append(item)
257
270
  for tool_call_id, toolcallargs in tool_call_args_by_id.items():
258
271
  for toolindex, tool_args in toolcallargs.items():
@@ -284,13 +297,50 @@ System information:
284
297
  f"\nTotal cost: {config.cost_unit}{cost:.3f}"
285
298
  )
286
299
  return output_or_done.task_output, cost
300
+
287
301
  output = output_or_done
288
302
 
289
- item = {
290
- "role": "tool",
291
- "content": str(output),
292
- "tool_call_id": tool_call_id + str(toolindex),
293
- }
303
+ if isinstance(output, ImageData):
304
+ randomId = petname.Generate(2, "-")
305
+ if not image_histories:
306
+ image_histories.extend([
307
+ {
308
+ 'role': 'assistant',
309
+ 'content': f'Share images with ids: {randomId}'
310
+
311
+ },
312
+ {
313
+ 'role': 'user',
314
+ 'content': [{
315
+ 'type': 'image_url',
316
+ 'image_url': {
317
+ 'url': output.dataurl,
318
+ 'detail': 'auto'
319
+ }
320
+ }]
321
+ }]
322
+ )
323
+ else:
324
+ image_histories[0]['content'] += ', ' + randomId
325
+ image_histories[1]["content"].append({ # type: ignore
326
+ 'type': 'image_url',
327
+ 'image_url': {
328
+ 'url': output.dataurl,
329
+ 'detail': 'auto'
330
+ }
331
+ })
332
+
333
+ item = {
334
+ "role": "tool",
335
+ "content": f'Ask user for image id: {randomId}',
336
+ "tool_call_id": tool_call_id + str(toolindex),
337
+ }
338
+ else:
339
+ item = {
340
+ "role": "tool",
341
+ "content": str(output),
342
+ "tool_call_id": tool_call_id + str(toolindex),
343
+ }
294
344
  cost_, output_toks_ = get_output_cost(
295
345
  config.cost_file[config.model], enc, item
296
346
  )
@@ -298,7 +348,7 @@ System information:
298
348
  output_toks += output_toks_
299
349
 
300
350
  _histories.append(item)
301
- has_tool_output = True
351
+ waiting_for_assistant = True
302
352
  break
303
353
  elif chunk.choices[0].finish_reason:
304
354
  assistant_console.print("")
@@ -327,11 +377,11 @@ System information:
327
377
  assistant_console.print(chunk_str, end="")
328
378
  full_response += chunk_str
329
379
  except KeyboardInterrupt:
330
- has_tool_output = False
380
+ waiting_for_assistant = False
331
381
  input("Interrupted...enter to redo the current turn")
332
382
  else:
333
383
  history.extend(_histories)
334
-
384
+ history.extend(image_histories)
335
385
  save_history(history, session_id)
336
386
 
337
387
  return "Couldn't finish the task", cost
@@ -1,9 +1,13 @@
1
+ import os
1
2
  import select
3
+ import subprocess
2
4
  import sys
5
+ import tempfile
3
6
  import termios
4
7
  import tty
5
8
  from typing import Literal
6
9
  from pydantic import BaseModel
10
+ import rich
7
11
 
8
12
 
9
13
  class CostData(BaseModel):
@@ -11,14 +15,6 @@ class CostData(BaseModel):
11
15
  cost_per_1m_output_tokens: float
12
16
 
13
17
 
14
- from openai.types.chat import (
15
- ChatCompletionMessageParam,
16
- ChatCompletionAssistantMessageParam,
17
- ChatCompletionMessage,
18
- ParsedChatCompletionMessage,
19
- )
20
-
21
- History = list[ChatCompletionMessageParam]
22
18
  Models = Literal["gpt-4o-2024-08-06", "gpt-4o-mini"]
23
19
 
24
20
 
@@ -45,3 +41,27 @@ def discard_input() -> None:
45
41
  finally:
46
42
  # Restore old terminal settings
47
43
  termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
44
+
45
+
46
+ class Config(BaseModel):
47
+ model: Models
48
+ secondary_model: Models
49
+ cost_limit: float
50
+ cost_file: dict[Models, CostData]
51
+ cost_unit: str = "$"
52
+
53
+
54
+ def text_from_editor(console: rich.console.Console) -> str:
55
+ # First consume all the input till now
56
+ discard_input()
57
+ console.print("\n---------------------------------------\n# User message")
58
+ data = input()
59
+ if data:
60
+ return data
61
+ editor = os.environ.get("EDITOR", "vim")
62
+ with tempfile.NamedTemporaryFile(suffix=".tmp") as tf:
63
+ subprocess.run([editor, tf.name], check=True)
64
+ with open(tf.name, "r") as f:
65
+ data = f.read()
66
+ console.print(data)
67
+ return data
@@ -28,9 +28,19 @@ def get_input_cost(
28
28
  input_tokens = 0
29
29
  for msg in history:
30
30
  content = msg["content"]
31
- if not isinstance(content, str):
31
+ refusal = msg.get("refusal")
32
+ if isinstance(content, list):
33
+ for part in content:
34
+ if 'text' in part:
35
+ input_tokens += len(enc.encode(part['text']))
36
+ elif content is None:
37
+ if refusal is None:
38
+ raise ValueError("Expected content or refusal to be present")
39
+ input_tokens += len(enc.encode(str(refusal)))
40
+ elif not isinstance(content, str):
32
41
  raise ValueError(f"Expected content to be string, got {type(content)}")
33
- input_tokens += len(enc.encode(content))
42
+ else:
43
+ input_tokens += len(enc.encode(content))
34
44
  cost = input_tokens * cost_map.cost_per_1m_input_tokens / 1_000_000
35
45
  return cost, input_tokens
36
46