vnc-computer-use 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,42 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ build:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - uses: actions/setup-python@v5
16
+ with:
17
+ python-version: "3.12"
18
+
19
+ - name: Build package
20
+ run: |
21
+ pip install build
22
+ python -m build
23
+
24
+ - uses: actions/upload-artifact@v4
25
+ with:
26
+ name: dist
27
+ path: dist/
28
+
29
+ publish:
30
+ needs: build
31
+ runs-on: ubuntu-latest
32
+ environment: pypi
33
+ permissions:
34
+ id-token: write
35
+ steps:
36
+ - uses: actions/download-artifact@v4
37
+ with:
38
+ name: dist
39
+ path: dist/
40
+
41
+ - name: Publish to PyPI
42
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,6 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ *.egg
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 EYHN
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,92 @@
1
+ Metadata-Version: 2.4
2
+ Name: vnc-computer-use
3
+ Version: 0.1.0
4
+ Summary: VNC Computer-Use CLI - control a remote desktop via VNC for AI computer-use agents
5
+ Project-URL: Homepage, https://github.com/EYHN/vnc-computer-use
6
+ Project-URL: Repository, https://github.com/EYHN/vnc-computer-use
7
+ Project-URL: Issues, https://github.com/EYHN/vnc-computer-use/issues
8
+ Author: EYHN
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: ai,automation,computer-use,remote-desktop,vnc
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: POSIX
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: System :: Networking
23
+ Classifier: Topic :: Utilities
24
+ Requires-Python: >=3.10
25
+ Requires-Dist: vncdotool>=1.0.0
26
+ Description-Content-Type: text/markdown
27
+
28
+ # vnc-computer-use
29
+
30
+ VNC Computer-Use CLI - control a remote desktop via VNC for AI computer-use agents.
31
+
32
+ ## Installation
33
+
34
+ ```bash
35
+ pip install vnc-computer-use
36
+ ```
37
+
38
+ ## Usage
39
+
40
+ ### Connect to a VNC server
41
+
42
+ ```bash
43
+ vnc connect localhost::5900
44
+ vnc connect myhost:0 --password secret
45
+ vnc connect myhost:0 --username user --password pass # macOS Screen Sharing
46
+ ```
47
+
48
+ ### Interact with the remote desktop
49
+
50
+ ```bash
51
+ # Screenshots
52
+ vnc get_screenshot -o screen.png
53
+ vnc get_screenshot # returns base64 JSON
54
+
55
+ # Keyboard
56
+ vnc key enter
57
+ vnc key ctrl-c
58
+ vnc type "hello world"
59
+
60
+ # Mouse
61
+ vnc left_click 100 200
62
+ vnc right_click 300 400
63
+ vnc double_click 100 200
64
+ vnc mouse_move 500 500
65
+ vnc left_click_drag 100 200
66
+ vnc scroll down 100 200
67
+
68
+ # Info
69
+ vnc get_cursor_position
70
+ vnc get_screen_size
71
+ vnc status
72
+ ```
73
+
74
+ ### Multi-session support
75
+
76
+ ```bash
77
+ vnc connect host1::5900 --session work
78
+ vnc connect host2::5900 --session personal
79
+ vnc -s work get_screenshot -o work.png
80
+ vnc -s personal left_click 100 200
81
+ ```
82
+
83
+ ### Disconnect
84
+
85
+ ```bash
86
+ vnc disconnect
87
+ vnc disconnect --session work
88
+ ```
89
+
90
+ ## How it works
91
+
92
+ `vnc` launches a background daemon process that maintains a persistent VNC connection via [vncdotool](https://github.com/sibson/vncdotool). The CLI communicates with the daemon over a Unix domain socket, making each action fast without reconnection overhead.
@@ -0,0 +1,65 @@
1
+ # vnc-computer-use
2
+
3
+ VNC Computer-Use CLI - control a remote desktop via VNC for AI computer-use agents.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install vnc-computer-use
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ ### Connect to a VNC server
14
+
15
+ ```bash
16
+ vnc connect localhost::5900
17
+ vnc connect myhost:0 --password secret
18
+ vnc connect myhost:0 --username user --password pass # macOS Screen Sharing
19
+ ```
20
+
21
+ ### Interact with the remote desktop
22
+
23
+ ```bash
24
+ # Screenshots
25
+ vnc get_screenshot -o screen.png
26
+ vnc get_screenshot # returns base64 JSON
27
+
28
+ # Keyboard
29
+ vnc key enter
30
+ vnc key ctrl-c
31
+ vnc type "hello world"
32
+
33
+ # Mouse
34
+ vnc left_click 100 200
35
+ vnc right_click 300 400
36
+ vnc double_click 100 200
37
+ vnc mouse_move 500 500
38
+ vnc left_click_drag 100 200
39
+ vnc scroll down 100 200
40
+
41
+ # Info
42
+ vnc get_cursor_position
43
+ vnc get_screen_size
44
+ vnc status
45
+ ```
46
+
47
+ ### Multi-session support
48
+
49
+ ```bash
50
+ vnc connect host1::5900 --session work
51
+ vnc connect host2::5900 --session personal
52
+ vnc -s work get_screenshot -o work.png
53
+ vnc -s personal left_click 100 200
54
+ ```
55
+
56
+ ### Disconnect
57
+
58
+ ```bash
59
+ vnc disconnect
60
+ vnc disconnect --session work
61
+ ```
62
+
63
+ ## How it works
64
+
65
+ `vnc` launches a background daemon process that maintains a persistent VNC connection via [vncdotool](https://github.com/sibson/vncdotool). The CLI communicates with the daemon over a Unix domain socket, making each action fast without reconnection overhead.
@@ -0,0 +1,40 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vnc-computer-use"
7
+ version = "0.1.0"
8
+ description = "VNC Computer-Use CLI - control a remote desktop via VNC for AI computer-use agents"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "EYHN" },
14
+ ]
15
+ keywords = ["vnc", "computer-use", "remote-desktop", "automation", "ai"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Environment :: Console",
19
+ "Intended Audience :: Developers",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Operating System :: POSIX",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ "Programming Language :: Python :: 3.13",
27
+ "Topic :: System :: Networking",
28
+ "Topic :: Utilities",
29
+ ]
30
+ dependencies = [
31
+ "vncdotool>=1.0.0",
32
+ ]
33
+
34
+ [project.urls]
35
+ Homepage = "https://github.com/EYHN/vnc-computer-use"
36
+ Repository = "https://github.com/EYHN/vnc-computer-use"
37
+ Issues = "https://github.com/EYHN/vnc-computer-use/issues"
38
+
39
+ [project.scripts]
40
+ vnc = "vnc_computer_use.cli:main"
@@ -0,0 +1,3 @@
1
+ """VNC Computer-Use CLI - control a remote desktop via VNC."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,417 @@
1
+ #!/usr/bin/env python3
2
+ """VNC Computer-Use CLI - control a remote desktop via VNC.
3
+
4
+ Use a mouse and keyboard to interact with a computer, and take screenshots.
5
+
6
+ * This is an interface to a desktop GUI. You do not have access to a terminal
7
+ or applications menu. You must click on desktop icons to start applications.
8
+ * Always prefer using keyboard shortcuts rather than clicking, where possible.
9
+ * Some applications may take time to start or process actions, so you may need
10
+ to wait and take successive screenshots to see the results of your actions.
11
+ E.g. if you click on Firefox and a window doesn't open, try taking another
12
+ screenshot.
13
+ * Whenever you intend to move the cursor to click on an element like an icon,
14
+ you should consult a screenshot to determine the coordinates of the element
15
+ before moving the cursor.
16
+ * If you tried clicking on a program or link but it failed to load, even after
17
+ waiting, try adjusting your cursor position so that the tip of the cursor
18
+ visually falls on the element that you want to click.
19
+ * Make sure to click any buttons, links, icons, etc with the cursor tip in the
20
+ center of the element. Don't click boxes on their edges unless asked.
21
+ """
22
+
23
+ import argparse
24
+ import base64
25
+ import json
26
+ import os
27
+ import signal
28
+ import socket
29
+ import subprocess
30
+ import sys
31
+ import time
32
+
33
+ DEFAULT_SESSION = "default"
34
+ SOCKET_TIMEOUT = 60 # seconds
35
+
36
+ ACTION_DESCRIPTIONS = """\
37
+ Available actions:
38
+ key Press a key or key-combination on the keyboard.
39
+ Examples: "enter", "tab", "ctrl-c", "alt-f4", "shift-a"
40
+ type Type a string of text on the keyboard. Short text is typed
41
+ character by character; long text uses clipboard paste.
42
+ mouse_move Move the cursor to a specified (x, y) coordinate.
43
+ left_click Click the left mouse button. Optionally move to (x, y) first.
44
+ left_click_drag Click and drag the cursor to a specified (x, y) coordinate.
45
+ right_click Click the right mouse button. Optionally move to (x, y) first.
46
+ middle_click Click the middle mouse button. Optionally move to (x, y) first.
47
+ double_click Double-click the left mouse button. Optionally move to (x, y) first.
48
+ scroll Scroll the screen in a direction. Requires a coordinate.
49
+ Text param specifies direction and optional pixel amount:
50
+ "up", "down", "left", "right", or "down:500" for 500 pixels.
51
+ get_screenshot Take a screenshot of the screen.
52
+ get_cursor_position Get the current (x, y) coordinate of the cursor.
53
+ get_screen_size Get the screen dimensions (width x height).
54
+ status Show daemon connection status.
55
+ """
56
+
57
+
58
+ def session_socket_path(session):
59
+ return f"/tmp/vnc-cli-{session}.sock"
60
+
61
+
62
+ def session_pid_file(session):
63
+ return f"/tmp/vnc-cli-{session}.pid"
64
+
65
+
66
+ def session_log_file(session):
67
+ return f"/tmp/vnc-cli-{session}.log"
68
+
69
+
70
+ def send_command(session, request):
71
+ """Send a JSON command to the daemon and return the response."""
72
+ socket_path = session_socket_path(session)
73
+ if not os.path.exists(socket_path):
74
+ result = {"error": f"VNC session '{session}' not running. Run 'vnc connect <host> --session {session}' first."}
75
+ print(json.dumps(result))
76
+ sys.exit(1)
77
+
78
+ sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
79
+ sock.settimeout(SOCKET_TIMEOUT)
80
+ try:
81
+ sock.connect(socket_path)
82
+ sock.sendall(json.dumps(request).encode() + b"\n")
83
+
84
+ chunks = []
85
+ while True:
86
+ chunk = sock.recv(1 << 20) # 1MB buffer
87
+ if not chunk:
88
+ break
89
+ chunks.append(chunk)
90
+ if b"\n" in chunk:
91
+ break
92
+ data = b"".join(chunks)
93
+ return json.loads(data.strip())
94
+ except socket.timeout:
95
+ result = {"error": f"VNC session '{session}' timed out."}
96
+ print(json.dumps(result))
97
+ sys.exit(1)
98
+ except ConnectionRefusedError:
99
+ result = {"error": f"VNC session '{session}' not responding. Try 'vnc disconnect --session {session}' then reconnect."}
100
+ print(json.dumps(result))
101
+ sys.exit(1)
102
+ finally:
103
+ sock.close()
104
+
105
+
106
+ def cmd_connect(args):
107
+ session = args.session
108
+ pid_file = session_pid_file(session)
109
+ socket_path = session_socket_path(session)
110
+ log_file = session_log_file(session)
111
+
112
+ if os.path.exists(pid_file):
113
+ with open(pid_file) as f:
114
+ pid = int(f.read().strip())
115
+ try:
116
+ os.kill(pid, 0)
117
+ print(json.dumps({"error": f"VNC session '{session}' already running (PID {pid}). Use 'vnc disconnect --session {session}' first."}))
118
+ sys.exit(1)
119
+ except ProcessLookupError:
120
+ os.unlink(pid_file)
121
+ if os.path.exists(socket_path):
122
+ os.unlink(socket_path)
123
+
124
+ daemon_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), "daemon.py")
125
+ cmd = [sys.executable, daemon_script, args.host, "--session", session]
126
+ if args.password:
127
+ cmd += ["--password", args.password]
128
+ if args.username:
129
+ cmd += ["--username", args.username]
130
+
131
+ with open(log_file, "w") as log:
132
+ proc = subprocess.Popen(cmd, stdout=log, stderr=log, start_new_session=True)
133
+
134
+ for _ in range(30):
135
+ if os.path.exists(socket_path):
136
+ print(json.dumps({"ok": True, "session": session, "host": args.host, "pid": proc.pid}))
137
+ return
138
+ ret = proc.poll()
139
+ if ret is not None:
140
+ err = ""
141
+ try:
142
+ with open(log_file) as f:
143
+ err = f.read()
144
+ except Exception:
145
+ pass
146
+ print(json.dumps({"error": f"Daemon exited with code {ret}.", "log": err}))
147
+ sys.exit(1)
148
+ time.sleep(0.5)
149
+
150
+ proc.terminate()
151
+ print(json.dumps({"error": f"Timeout waiting for daemon to start. Check {log_file}"}))
152
+ sys.exit(1)
153
+
154
+
155
+ def cmd_disconnect(args):
156
+ session = args.session
157
+ pid_file = session_pid_file(session)
158
+ socket_path = session_socket_path(session)
159
+
160
+ if not os.path.exists(pid_file):
161
+ print(json.dumps({"ok": True, "message": f"VNC session '{session}' was not running."}))
162
+ return
163
+
164
+ with open(pid_file) as f:
165
+ pid = int(f.read().strip())
166
+
167
+ try:
168
+ os.kill(pid, signal.SIGTERM)
169
+ for _ in range(10):
170
+ try:
171
+ os.kill(pid, 0)
172
+ time.sleep(0.2)
173
+ except ProcessLookupError:
174
+ break
175
+ except ProcessLookupError:
176
+ pass
177
+
178
+ for path in [pid_file, socket_path]:
179
+ try:
180
+ os.unlink(path)
181
+ except OSError:
182
+ pass
183
+
184
+ print(json.dumps({"ok": True, "session": session, "message": "disconnected"}))
185
+
186
+
187
+ def cmd_action(args):
188
+ """Unified action handler -- mirrors the computer-use tool interface."""
189
+ action = args.action
190
+ req = {"action": action}
191
+
192
+ # Parse coordinate
193
+ if args.coordinate is not None:
194
+ if len(args.coordinate) != 2:
195
+ print(json.dumps({"error": "coordinate requires exactly 2 values: x y"}))
196
+ sys.exit(1)
197
+ req["x"] = args.coordinate[0]
198
+ req["y"] = args.coordinate[1]
199
+
200
+ # Parse text
201
+ if args.text is not None:
202
+ req["text"] = args.text
203
+
204
+ # Parse output (screenshot)
205
+ if hasattr(args, "output") and args.output:
206
+ req["output"] = os.path.abspath(args.output)
207
+
208
+ # Validate required params per action
209
+ needs_coord = {"mouse_move", "left_click_drag", "scroll"}
210
+ needs_text = {"key", "type", "scroll"}
211
+ optional_coord = {"left_click", "right_click", "middle_click", "double_click"}
212
+
213
+ if action in needs_coord and "x" not in req:
214
+ print(json.dumps({"error": f"coordinate required for {action}"}))
215
+ sys.exit(1)
216
+ if action in needs_text and "text" not in req:
217
+ print(json.dumps({"error": f"text required for {action}"}))
218
+ sys.exit(1)
219
+ if action not in needs_coord and action not in optional_coord and "x" in req:
220
+ # Ignore stray coordinates for actions that don't use them
221
+ del req["x"]
222
+ del req["y"]
223
+
224
+ resp = send_command(args.session, req)
225
+
226
+ # Handle screenshot output
227
+ if action == "get_screenshot" and resp.get("ok"):
228
+ if "path" in resp:
229
+ print(json.dumps({"ok": True, "path": resp["path"]}))
230
+ elif "data" in resp:
231
+ if args.output:
232
+ png_data = base64.b64decode(resp["data"])
233
+ output = os.path.abspath(args.output)
234
+ with open(output, "wb") as f:
235
+ f.write(png_data)
236
+ print(json.dumps({"ok": True, "path": output}))
237
+ else:
238
+ print(json.dumps({"ok": True, "format": "png", "data": resp["data"]}))
239
+ return
240
+
241
+ print(json.dumps(resp))
242
+
243
+
244
+ def _rewrite_argv():
245
+ """Rewrite sys.argv to map positional shorthand to --text / --coordinate flags."""
246
+ argv = sys.argv[1:]
247
+ if len(argv) < 1:
248
+ return
249
+
250
+ # Find session flag and skip it
251
+ clean = []
252
+ i = 0
253
+ while i < len(argv):
254
+ if argv[i] in ("--session", "-s") and i + 1 < len(argv):
255
+ clean.append(argv[i])
256
+ clean.append(argv[i + 1])
257
+ i += 2
258
+ else:
259
+ clean.append(argv[i])
260
+ i += 1
261
+
262
+ # Separate global flags from subcommand
263
+ global_flags = []
264
+ rest = []
265
+ i = 0
266
+ while i < len(clean):
267
+ if clean[i] in ("--session", "-s"):
268
+ global_flags += [clean[i], clean[i + 1]]
269
+ i += 2
270
+ elif clean[i].startswith("-") and not rest:
271
+ global_flags.append(clean[i])
272
+ i += 1
273
+ else:
274
+ rest = clean[i:]
275
+ break
276
+
277
+ if not rest:
278
+ return
279
+
280
+ action = rest[0]
281
+ action_args = rest[1:]
282
+
283
+ text_actions = {"key", "type"}
284
+ coord_actions = {"mouse_move", "left_click_drag"}
285
+ optional_coord_actions = {"left_click", "right_click", "middle_click", "double_click"}
286
+ scroll_action = {"scroll"}
287
+
288
+ # Already has flags -- don't rewrite
289
+ if any(a.startswith("-") for a in action_args if a not in ("-o", "--output")):
290
+ return
291
+
292
+ new_args = global_flags + [action]
293
+
294
+ if action in text_actions and action_args:
295
+ # vnc key enter / vnc type "hello"
296
+ # Find -o flag if present
297
+ text_parts = []
298
+ j = 0
299
+ while j < len(action_args):
300
+ if action_args[j] in ("-o", "--output") and j + 1 < len(action_args):
301
+ new_args += [action_args[j], action_args[j + 1]]
302
+ j += 2
303
+ else:
304
+ text_parts.append(action_args[j])
305
+ j += 1
306
+ if text_parts:
307
+ new_args += ["--text", " ".join(text_parts)]
308
+
309
+ elif action in scroll_action and action_args:
310
+ # vnc scroll down:5 100 200
311
+ parts = []
312
+ j = 0
313
+ while j < len(action_args):
314
+ if action_args[j] in ("-o", "--output") and j + 1 < len(action_args):
315
+ new_args += [action_args[j], action_args[j + 1]]
316
+ j += 2
317
+ else:
318
+ parts.append(action_args[j])
319
+ j += 1
320
+ if parts:
321
+ new_args += ["--text", parts[0]]
322
+ if len(parts) >= 3:
323
+ new_args += ["--coordinate", parts[1], parts[2]]
324
+
325
+ elif action in coord_actions and action_args:
326
+ # vnc mouse_move 100 200
327
+ parts = []
328
+ j = 0
329
+ while j < len(action_args):
330
+ if action_args[j] in ("-o", "--output") and j + 1 < len(action_args):
331
+ new_args += [action_args[j], action_args[j + 1]]
332
+ j += 2
333
+ else:
334
+ parts.append(action_args[j])
335
+ j += 1
336
+ if len(parts) >= 2:
337
+ new_args += ["--coordinate", parts[0], parts[1]]
338
+
339
+ elif action in optional_coord_actions and action_args:
340
+ # vnc left_click 100 200
341
+ parts = []
342
+ j = 0
343
+ while j < len(action_args):
344
+ if action_args[j] in ("-o", "--output") and j + 1 < len(action_args):
345
+ new_args += [action_args[j], action_args[j + 1]]
346
+ j += 2
347
+ else:
348
+ parts.append(action_args[j])
349
+ j += 1
350
+ if len(parts) >= 2:
351
+ new_args += ["--coordinate", parts[0], parts[1]]
352
+
353
+ elif action == "get_screenshot":
354
+ new_args += action_args
355
+
356
+ else:
357
+ new_args += action_args
358
+
359
+ sys.argv = [sys.argv[0]] + new_args
360
+
361
+
362
+ def main():
363
+ _rewrite_argv()
364
+
365
+ parser = argparse.ArgumentParser(
366
+ prog="vnc",
367
+ description=__doc__,
368
+ epilog=ACTION_DESCRIPTIONS,
369
+ formatter_class=argparse.RawDescriptionHelpFormatter,
370
+ )
371
+ parser.add_argument("--session", "-s", default=DEFAULT_SESSION,
372
+ help=f"Session name for multi-connection support (default: {DEFAULT_SESSION})")
373
+ sub = parser.add_subparsers(dest="command")
374
+
375
+ # connect
376
+ p = sub.add_parser("connect", help="Connect to VNC server and start daemon")
377
+ p.add_argument("host", help="VNC host (e.g. localhost::5900 or localhost:0)")
378
+ p.add_argument("--password", "-p", help="VNC password")
379
+ p.add_argument("--username", "-u", help="VNC/ARD username (required for macOS Screen Sharing)")
380
+
381
+ # disconnect
382
+ sub.add_parser("disconnect", help="Stop VNC daemon")
383
+
384
+ # All computer-use actions share the same interface
385
+ actions = [
386
+ "key", "type", "mouse_move",
387
+ "left_click", "left_click_drag", "right_click",
388
+ "middle_click", "double_click", "scroll",
389
+ "get_screenshot", "get_cursor_position",
390
+ "get_screen_size", "status",
391
+ ]
392
+
393
+ for action in actions:
394
+ p = sub.add_parser(action)
395
+ p.add_argument("--coordinate", "-c", type=int, nargs=2, metavar=("X", "Y"),
396
+ help="(x, y) pixel coordinate on the screen")
397
+ p.add_argument("--text", "-t", help="Text to type, key name, or scroll direction")
398
+ if action == "get_screenshot":
399
+ p.add_argument("--output", "-o", help="Output file path (default: base64 JSON to stdout)")
400
+
401
+ args = parser.parse_args()
402
+
403
+ if not args.command:
404
+ parser.print_help()
405
+ sys.exit(1)
406
+
407
+ if args.command == "connect":
408
+ cmd_connect(args)
409
+ elif args.command == "disconnect":
410
+ cmd_disconnect(args)
411
+ else:
412
+ args.action = args.command
413
+ cmd_action(args)
414
+
415
+
416
+ if __name__ == "__main__":
417
+ main()
@@ -0,0 +1,305 @@
1
+ #!/usr/bin/env python3
2
+ """VNC CLI Daemon - maintains VNC connection and serves commands over Unix socket."""
3
+
4
+ import json
5
+ import os
6
+ import signal
7
+ import socketserver
8
+ import sys
9
+ import tempfile
10
+ import time
11
+ import base64
12
+ import threading
13
+ import traceback
14
+
15
+ DEFAULT_SESSION = "default"
16
+
17
+
18
+ def session_socket_path(session):
19
+ return f"/tmp/vnc-cli-{session}.sock"
20
+
21
+
22
+ def session_pid_file(session):
23
+ return f"/tmp/vnc-cli-{session}.pid"
24
+
25
+
26
+ def session_log_file(session):
27
+ return f"/tmp/vnc-cli-{session}.log"
28
+
29
+ # Threshold: texts shorter than this use keyPress; longer use paste()
30
+ PASTE_THRESHOLD = 32
31
+
32
+
33
+ class VNCController:
34
+ """Wraps vncdotool client with local cursor tracking."""
35
+
36
+ def __init__(self, host, password=None, username=None, session=DEFAULT_SESSION):
37
+ import vncdotool.api
38
+ self._api = vncdotool.api
39
+ self.client = self._api.connect(host, password=password, username=username)
40
+ self.cursor_x = 0
41
+ self.cursor_y = 0
42
+ self.host = host
43
+ self.session = session
44
+ self._lock = threading.Lock()
45
+
46
+ def shutdown(self):
47
+ try:
48
+ self.client.disconnect()
49
+ except Exception:
50
+ pass
51
+ try:
52
+ self._api.shutdown()
53
+ except Exception:
54
+ pass
55
+
56
+ def handle(self, request):
57
+ action = request.get("action")
58
+ with self._lock:
59
+ handler = getattr(self, f"_do_{action}", None)
60
+ if handler is None:
61
+ return {"error": f"Unknown action: {action}"}
62
+ try:
63
+ return handler(request)
64
+ except Exception as e:
65
+ traceback.print_exc(file=sys.stderr)
66
+ return {"error": str(e)}
67
+
68
+ def _do_key(self, req):
69
+ text = req.get("text")
70
+ if not text:
71
+ return {"error": "text required for key"}
72
+ self.client.keyPress(text)
73
+ return {"ok": True}
74
+
75
+ def _do_type(self, req):
76
+ text = req.get("text")
77
+ if not text:
78
+ return {"error": "text required for type"}
79
+ if len(text) >= PASTE_THRESHOLD:
80
+ self.client.paste(text)
81
+ else:
82
+ for char in text:
83
+ self.client.keyPress(char)
84
+ return {"ok": True}
85
+
86
+ def _do_mouse_move(self, req):
87
+ x, y = self._require_coords(req)
88
+ self.client.mouseMove(x, y)
89
+ self.cursor_x, self.cursor_y = x, y
90
+ return {"ok": True}
91
+
92
+ def _do_left_click(self, req):
93
+ x, y = self._optional_coords(req)
94
+ if x is not None:
95
+ self.client.mouseMove(x, y)
96
+ self.cursor_x, self.cursor_y = x, y
97
+ self.client.mousePress(1)
98
+ return {"ok": True}
99
+
100
+ def _do_right_click(self, req):
101
+ x, y = self._optional_coords(req)
102
+ if x is not None:
103
+ self.client.mouseMove(x, y)
104
+ self.cursor_x, self.cursor_y = x, y
105
+ self.client.mousePress(3)
106
+ return {"ok": True}
107
+
108
+ def _do_middle_click(self, req):
109
+ x, y = self._optional_coords(req)
110
+ if x is not None:
111
+ self.client.mouseMove(x, y)
112
+ self.cursor_x, self.cursor_y = x, y
113
+ self.client.mousePress(2)
114
+ return {"ok": True}
115
+
116
+ def _do_double_click(self, req):
117
+ x, y = self._optional_coords(req)
118
+ if x is not None:
119
+ self.client.mouseMove(x, y)
120
+ self.cursor_x, self.cursor_y = x, y
121
+ self.client.mousePress(1)
122
+ self.client.pause(0.1)
123
+ self.client.mousePress(1)
124
+ return {"ok": True}
125
+
126
+ def _do_left_click_drag(self, req):
127
+ x, y = self._require_coords(req)
128
+ step = int(req.get("step", 10))
129
+ self.client.mouseDrag(x, y, step=step)
130
+ self.cursor_x, self.cursor_y = x, y
131
+ return {"ok": True}
132
+
133
+ def _do_scroll(self, req):
134
+ text = req.get("text")
135
+ if not text:
136
+ return {"error": "text required for scroll. Use 'up', 'down', 'left', 'right', or 'down:500'"}
137
+
138
+ # Parse direction and optional amount from text (e.g. "down" or "down:5")
139
+ parts = text.split(":")
140
+ direction = parts[0].lower()
141
+ amount = int(parts[1]) if len(parts) > 1 else 3
142
+
143
+ x, y = self._optional_coords(req)
144
+ if x is not None:
145
+ self.client.mouseMove(x, y)
146
+ self.cursor_x, self.cursor_y = x, y
147
+
148
+ # VNC scroll uses mouse buttons 4/5 (up/down) and 6/7 (left/right)
149
+ button_map = {"up": 4, "down": 5, "left": 6, "right": 7}
150
+ button = button_map.get(direction)
151
+ if button is None:
152
+ return {"error": f"Invalid scroll direction: '{direction}'. Use 'up', 'down', 'left', or 'right'"}
153
+ for _ in range(amount):
154
+ self.client.mousePress(button)
155
+ return {"ok": True}
156
+
157
+ def _do_get_screenshot(self, req):
158
+ output = req.get("output")
159
+ if output:
160
+ # Write directly to the requested file path
161
+ self.client.captureScreen(output)
162
+ return {"ok": True, "path": output}
163
+ else:
164
+ # Fallback: return base64 via socket
165
+ tmpfile = os.path.join(tempfile.gettempdir(), f"vnc-screenshot-{os.getpid()}.png")
166
+ try:
167
+ self.client.captureScreen(tmpfile)
168
+ with open(tmpfile, "rb") as f:
169
+ data = base64.b64encode(f.read()).decode("ascii")
170
+ return {"ok": True, "data": data, "format": "png"}
171
+ finally:
172
+ try:
173
+ os.unlink(tmpfile)
174
+ except OSError:
175
+ pass
176
+
177
+ def _do_get_cursor_position(self, req):
178
+ return {"ok": True, "x": self.cursor_x, "y": self.cursor_y}
179
+
180
+ def _do_get_screen_size(self, req):
181
+ self.client.refreshScreen()
182
+ screen = self.client.screen
183
+ return {"ok": True, "width": screen.size[0], "height": screen.size[1]}
184
+
185
+ def _do_status(self, req):
186
+ return {
187
+ "ok": True,
188
+ "host": self.host,
189
+ "session": self.session,
190
+ "pid": os.getpid(),
191
+ "cursor_x": self.cursor_x,
192
+ "cursor_y": self.cursor_y,
193
+ }
194
+
195
+ def _require_coords(self, req):
196
+ x = req.get("x")
197
+ y = req.get("y")
198
+ if x is None or y is None:
199
+ raise ValueError("x and y coordinates required")
200
+ return int(x), int(y)
201
+
202
+ def _optional_coords(self, req):
203
+ x = req.get("x")
204
+ y = req.get("y")
205
+ if x is not None and y is not None:
206
+ return int(x), int(y)
207
+ return None, None
208
+
209
+
210
+ class RequestHandler(socketserver.StreamRequestHandler):
211
+ def handle(self):
212
+ for line in self.rfile:
213
+ line = line.strip()
214
+ if not line:
215
+ continue
216
+ try:
217
+ request = json.loads(line)
218
+ except json.JSONDecodeError as e:
219
+ response = {"error": f"Invalid JSON: {e}"}
220
+ else:
221
+ response = self.server.vnc_controller.handle(request)
222
+ self.wfile.write(json.dumps(response).encode() + b"\n")
223
+ self.wfile.flush()
224
+
225
+
226
+ class VNCDaemonServer(socketserver.ThreadingUnixStreamServer):
227
+ def __init__(self, socket_path, handler, vnc_controller):
228
+ self.vnc_controller = vnc_controller
229
+ super().__init__(socket_path, handler)
230
+
231
+
232
+ def cleanup(socket_path, pid_file):
233
+ try:
234
+ os.unlink(socket_path)
235
+ except OSError:
236
+ pass
237
+ try:
238
+ os.unlink(pid_file)
239
+ except OSError:
240
+ pass
241
+
242
+
243
+ def parse_arg(argv, flag):
244
+ if flag in argv:
245
+ idx = argv.index(flag)
246
+ return argv[idx + 1]
247
+ return None
248
+
249
+
250
+ def main():
251
+ if len(sys.argv) < 2:
252
+ print("Usage: vnc_daemon.py <host[:display]> [--password <pw>] [--username <user>] [--session <name>]", file=sys.stderr)
253
+ sys.exit(1)
254
+
255
+ host = sys.argv[1]
256
+ password = parse_arg(sys.argv, "--password")
257
+ username = parse_arg(sys.argv, "--username")
258
+ session = parse_arg(sys.argv, "--session") or DEFAULT_SESSION
259
+
260
+ socket_path = session_socket_path(session)
261
+ pid_file = session_pid_file(session)
262
+
263
+ # Clean up stale socket
264
+ if os.path.exists(socket_path):
265
+ os.unlink(socket_path)
266
+
267
+ # Connect to VNC
268
+ print(f"[{session}] Connecting to VNC at {host}...", file=sys.stderr)
269
+ try:
270
+ controller = VNCController(host, password=password, username=username, session=session)
271
+ except Exception as e:
272
+ print(f"[{session}] Failed to connect to VNC: {e}", file=sys.stderr)
273
+ sys.exit(1)
274
+ print(f"[{session}] VNC connected.", file=sys.stderr)
275
+
276
+ # Write PID file
277
+ with open(pid_file, "w") as f:
278
+ f.write(str(os.getpid()))
279
+
280
+ # Set up signal handlers for clean shutdown
281
+ def handle_signal(signum, frame):
282
+ print(f"\n[{session}] Shutting down...", file=sys.stderr)
283
+ controller.shutdown()
284
+ cleanup(socket_path, pid_file)
285
+ sys.exit(0)
286
+
287
+ signal.signal(signal.SIGTERM, handle_signal)
288
+ signal.signal(signal.SIGINT, handle_signal)
289
+
290
+ # Start socket server
291
+ server = VNCDaemonServer(socket_path, RequestHandler, controller)
292
+ print(f"[{session}] Listening on {socket_path}", file=sys.stderr)
293
+
294
+ try:
295
+ server.serve_forever()
296
+ except KeyboardInterrupt:
297
+ pass
298
+ finally:
299
+ server.shutdown()
300
+ controller.shutdown()
301
+ cleanup(socket_path, pid_file)
302
+
303
+
304
+ if __name__ == "__main__":
305
+ main()