windows-mcp 0.5.7__py3-none-any.whl → 0.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
windows_mcp/__main__.py CHANGED
@@ -1,312 +1,314 @@
1
- from windows_mcp.analytics import PostHogAnalytics, with_analytics
2
- from live_inspect.watch_cursor import WatchCursor
3
- from windows_mcp.desktop.service import Desktop
4
- from contextlib import asynccontextmanager
5
- from fastmcp.utilities.types import Image
6
- from mcp.types import ToolAnnotations
7
- from typing import Literal, Optional
8
- from humancursor import SystemCursor
9
- from fastmcp import FastMCP, Context
10
- from dotenv import load_dotenv
11
- from textwrap import dedent
12
- import pyautogui as pg
13
- import asyncio
14
- import click
15
- import os
16
-
17
- load_dotenv()
18
-
19
- pg.FAILSAFE=False
20
- pg.PAUSE=1.0
21
-
22
- desktop=Desktop()
23
- cursor=SystemCursor()
24
- watch_cursor=WatchCursor()
25
- windows_version=desktop.get_windows_version()
26
- default_language=desktop.get_default_language()
27
- screen_width,screen_height=desktop.get_resolution()
28
-
29
- instructions=dedent(f'''
30
- Windows MCP server provides tools to interact directly with the {windows_version} desktop,
31
- thus enabling to operate the desktop on the user's behalf.
32
- ''')
33
-
34
- # Initialize analytics at module level to be used in decorators
35
- if os.getenv("ANONYMIZED_TELEMETRY", "true").lower() == "false":
36
- analytics = None
37
- else:
38
- analytics = PostHogAnalytics()
39
-
40
- @asynccontextmanager
41
- async def lifespan(app: FastMCP):
42
- """Runs initialization code before the server starts and cleanup code after it shuts down."""
43
- try:
44
- watch_cursor.start()
45
- await asyncio.sleep(1) # Simulate startup latency
46
- yield
47
- finally:
48
- watch_cursor.stop()
49
- if analytics:
50
- await analytics.close()
51
-
52
- mcp=FastMCP(name='windows-mcp',instructions=instructions,lifespan=lifespan)
53
-
54
- @mcp.tool(
55
- name="App-Tool",
56
- description="Manages Windows applications with three modes: 'launch' (start app by name), 'resize' (set window position/size using window_loc=[x,y] and window_size=[width,height]), 'switch' (activate app by name). Essential for application lifecycle management.",
57
- annotations=ToolAnnotations(
58
- title="App Tool",
59
- readOnlyHint=False,
60
- destructiveHint=True,
61
- idempotentHint=False,
62
- openWorldHint=False
63
- )
64
- )
65
- @with_analytics(analytics, "App-Tool")
66
- def app_tool(mode:Literal['launch','resize','switch'],name:str|None=None,window_loc:list[int]|None=None,window_size:list[int]|None=None, ctx: Context = None):
67
- return desktop.app(mode,name,window_loc,window_size)
68
-
69
- @mcp.tool(
70
- name='Powershell-Tool',
71
- description='Execute PowerShell commands directly on the Windows system and return output with status code. Supports all PowerShell cmdlets, scripts, and system commands. Use for file operations, system queries, and administrative tasks.',
72
- annotations=ToolAnnotations(
73
- title="Powershell Tool",
74
- readOnlyHint=False,
75
- destructiveHint=True,
76
- idempotentHint=False,
77
- openWorldHint=True
78
- )
79
- )
80
- @with_analytics(analytics, "Powershell-Tool")
81
- def powershell_tool(command: str, ctx: Context = None) -> str:
82
- response,status_code=desktop.execute_command(command)
83
- return f'Response: {response}\nStatus Code: {status_code}'
84
-
85
- @mcp.tool(
86
- name='State-Tool',
87
- description='Captures complete desktop state including: system language, focused/opened apps, interactive elements (buttons, text fields, links, menus with coordinates), and scrollable areas. Set use_vision=True to include screenshot. Set use_dom=True for browser content to get web page elements instead of browser UI. Always call this first to understand the current desktop state before taking actions.',
88
- annotations=ToolAnnotations(
89
- title="State Tool",
90
- readOnlyHint=True,
91
- destructiveHint=False,
92
- idempotentHint=True,
93
- openWorldHint=False
94
- )
95
- )
96
- @with_analytics(analytics, "State-Tool")
97
- def state_tool(use_vision:bool=False,use_dom:bool=False, ctx: Context = None):
98
- # Calculate scale factor to cap resolution at 1080p (1920x1080)
99
- max_width, max_height = 1920, 1080
100
- scale_width = max_width / screen_width if screen_width > max_width else 1.0
101
- scale_height = max_height / screen_height if screen_height > max_height else 1.0
102
- scale = min(scale_width, scale_height) # Use the smaller scale to ensure both dimensions fit
103
-
104
- desktop_state=desktop.get_state(use_vision=use_vision,use_dom=use_dom,as_bytes=True,scale=scale)
105
- interactive_elements=desktop_state.tree_state.interactive_elements_to_string()
106
- scrollable_elements=desktop_state.tree_state.scrollable_elements_to_string()
107
- apps=desktop_state.apps_to_string()
108
- active_app=desktop_state.active_app_to_string()
109
- return [dedent(f'''
110
- Default Language of User:
111
- {default_language} with encoding: {desktop.encoding}
112
-
113
- Focused App:
114
- {active_app}
115
-
116
- Opened Apps:
117
- {apps}
118
-
119
- List of Interactive Elements:
120
- {interactive_elements or 'No interactive elements found.'}
121
-
122
- List of Scrollable Elements:
123
- {scrollable_elements or 'No scrollable elements found.'}
124
- ''')]+([Image(data=desktop_state.screenshot,format='png')] if use_vision else [])
125
-
126
- @mcp.tool(
127
- name='Click-Tool',
128
- description='Performs mouse clicks at specified coordinates [x, y]. Supports button types: left (default), right (context menu), middle. Supports clicks: 1 (single), 2 (double), 3 (triple). Always use coordinates from State-Tool output to ensure accuracy.',
129
- annotations=ToolAnnotations(
130
- title="Click Tool",
131
- readOnlyHint=False,
132
- destructiveHint=True,
133
- idempotentHint=False,
134
- openWorldHint=False
135
- )
136
- )
137
- @with_analytics(analytics, "Click-Tool")
138
- def click_tool(loc:list[int],button:Literal['left','right','middle']='left',clicks:int=1, ctx: Context = None)->str:
139
- if len(loc) != 2:
140
- raise ValueError("Location must be a list of exactly 2 integers [x, y]")
141
- x,y=loc[0],loc[1]
142
- desktop.click(loc=loc,button=button,clicks=clicks)
143
- num_clicks={1:'Single',2:'Double',3:'Triple'}
144
- return f'{num_clicks.get(clicks)} {button} clicked at ({x},{y}).'
145
-
146
- @mcp.tool(
147
- name='Type-Tool',
148
- description='Types text at specified coordinates [x, y]. Set clear=True to clear existing text first (Ctrl+A then type), clear=False to append. Set press_enter=True to submit after typing. Always click on the target input field first to ensure focus.',
149
- annotations=ToolAnnotations(
150
- title="Type Tool",
151
- readOnlyHint=False,
152
- destructiveHint=True,
153
- idempotentHint=False,
154
- openWorldHint=False
155
- )
156
- )
157
- @with_analytics(analytics, "Type-Tool")
158
- def type_tool(loc:list[int],text:str,clear:bool=False,press_enter:bool=False, ctx: Context = None)->str:
159
- if len(loc) != 2:
160
- raise ValueError("Location must be a list of exactly 2 integers [x, y]")
161
- x,y=loc[0],loc[1]
162
- desktop.type(loc=loc,text=text,clear=clear,press_enter=press_enter)
163
- return f'Typed {text} at ({x},{y}).'
164
-
165
- @mcp.tool(
166
- name='Scroll-Tool',
167
- description='Scrolls at coordinates [x, y] or current mouse position if loc=None. Type: vertical (default) or horizontal. Direction: up/down for vertical, left/right for horizontal. wheel_times controls amount (1 wheel ≈ 3-5 lines). Use for navigating long content, lists, and web pages.',
168
- annotations=ToolAnnotations(
169
- title="Scroll Tool",
170
- readOnlyHint=False,
171
- destructiveHint=False,
172
- idempotentHint=True,
173
- openWorldHint=False
174
- )
175
- )
176
- @with_analytics(analytics, "Scroll-Tool")
177
- def scroll_tool(loc:list[int]=None,type:Literal['horizontal','vertical']='vertical',direction:Literal['up','down','left','right']='down',wheel_times:int=1, ctx: Context = None)->str:
178
- if loc and len(loc) != 2:
179
- raise ValueError("Location must be a list of exactly 2 integers [x, y]")
180
- response=desktop.scroll(loc,type,direction,wheel_times)
181
- if response:
182
- return response
183
- return f'Scrolled {type} {direction} by {wheel_times} wheel times'+f' at ({loc[0]},{loc[1]}).' if loc else ''
184
-
185
- @mcp.tool(
186
- name='Drag-Tool',
187
- description='Performs drag-and-drop from current mouse position to destination coordinates [x, y]. Click or move to source position first, then call this tool with target coordinates. Use for moving files, reordering items, resizing windows, or any drag-drop UI interactions.',
188
- annotations=ToolAnnotations(
189
- title="Drag Tool",
190
- readOnlyHint=False,
191
- destructiveHint=True,
192
- idempotentHint=False,
193
- openWorldHint=False
194
- )
195
- )
196
- @with_analytics(analytics, "Drag-Tool")
197
- def drag_tool(to_loc:list[int], ctx: Context = None)->str:
198
- if len(to_loc) != 2:
199
- raise ValueError("to_loc must be a list of exactly 2 integers [x, y]")
200
- desktop.drag(to_loc)
201
- x2,y2=to_loc[0],to_loc[1]
202
- return f'Dragged the element to ({x2},{y2}).'
203
-
204
- @mcp.tool(
205
- name='Move-Tool',
206
- description='Moves mouse cursor to coordinates [x, y] without clicking. Use for hovering to reveal tooltips/menus, positioning cursor before drag operations, or triggering hover-based UI changes. Does not interact with elements.',
207
- annotations=ToolAnnotations(
208
- title="Move Tool",
209
- readOnlyHint=False,
210
- destructiveHint=False,
211
- idempotentHint=True,
212
- openWorldHint=False
213
- )
214
- )
215
- @with_analytics(analytics, "Move-Tool")
216
- def move_tool(to_loc:list[int], ctx: Context = None)->str:
217
- if len(to_loc) != 2:
218
- raise ValueError("to_loc must be a list of exactly 2 integers [x, y]")
219
- x,y=to_loc[0],to_loc[1]
220
- desktop.move(to_loc)
221
- return f'Moved the mouse pointer to ({x},{y}).'
222
-
223
- @mcp.tool(
224
- name='Shortcut-Tool',
225
- description='Executes keyboard shortcuts using key combinations separated by +. Examples: "ctrl+c" (copy), "ctrl+v" (paste), "alt+tab" (switch apps), "win+r" (Run dialog), "win" (Start menu), "ctrl+shift+esc" (Task Manager). Use for quick actions and system commands.',
226
- annotations=ToolAnnotations(
227
- title="Shortcut Tool",
228
- readOnlyHint=False,
229
- destructiveHint=True,
230
- idempotentHint=False,
231
- openWorldHint=False
232
- )
233
- )
234
- @with_analytics(analytics, "Shortcut-Tool")
235
- def shortcut_tool(shortcut:str, ctx: Context = None):
236
- desktop.shortcut(shortcut)
237
- return f"Pressed {shortcut}."
238
-
239
- @mcp.tool(
240
- name='Wait-Tool',
241
- description='Pauses execution for specified duration in seconds. Use when waiting for: applications to launch/load, UI animations to complete, page content to render, dialogs to appear, or between rapid actions. Helps ensure UI is ready before next interaction.',
242
- annotations=ToolAnnotations(
243
- title="Wait Tool",
244
- readOnlyHint=True,
245
- destructiveHint=False,
246
- idempotentHint=True,
247
- openWorldHint=False
248
- )
249
- )
250
- @with_analytics(analytics, "Wait-Tool")
251
- def wait_tool(duration:int, ctx: Context = None)->str:
252
- pg.sleep(duration)
253
- return f'Waited for {duration} seconds.'
254
-
255
- @mcp.tool(
256
- name='Scrape-Tool',
257
- description='Fetch content from a URL or the active browser tab. By default (use_dom=False), performs a lightweight HTTP request to the URL and returns markdown content of complete webpage. Note: Some websites may block automated HTTP requests. If this fails, open the page in a browser and retry with use_dom=True to extract visible text from the active tab\'s DOM within the viewport.',
258
- annotations=ToolAnnotations(
259
- title="Scrape Tool",
260
- readOnlyHint=True,
261
- destructiveHint=False,
262
- idempotentHint=True,
263
- openWorldHint=True
264
- )
265
- )
266
- @with_analytics(analytics, "Scrape-Tool")
267
- def scrape_tool(url:str,use_dom:bool=False, ctx: Context = None)->str:
268
- if not use_dom:
269
- content=desktop.scrape(url)
270
- return f'URL:{url}\nContent:\n{content}'
271
-
272
- desktop_state=desktop.get_state(use_vision=False,use_dom=use_dom)
273
- tree_state=desktop_state.tree_state
274
- if not tree_state.dom_info:
275
- return f'No DOM information found. Please open {url} in browser first.'
276
- dom_info=tree_state.dom_info
277
- vertical_scroll_percent=dom_info.vertical_scroll_percent
278
- content='\n'.join([node.text for node in tree_state.dom_informative_nodes])
279
- header_status = "Reached top" if vertical_scroll_percent <= 0 else "Scroll up to see more"
280
- footer_status = "Reached bottom" if vertical_scroll_percent >= 100 else "Scroll down to see more"
281
- return f'URL:{url}\nContent:\n[{header_status}]\n{content}\n[{footer_status}]'
282
-
283
-
284
- @click.command()
285
- @click.option(
286
- "--transport",
287
- help="The transport layer used by the MCP server.",
288
- type=click.Choice(['stdio','sse','streamable-http']),
289
- default='stdio'
290
- )
291
- @click.option(
292
- "--host",
293
- help="Host to bind the SSE/Streamable HTTP server.",
294
- default="localhost",
295
- type=str,
296
- show_default=True
297
- )
298
- @click.option(
299
- "--port",
300
- help="Port to bind the SSE/Streamable HTTP server.",
301
- default=8000,
302
- type=int,
303
- show_default=True
304
- )
305
- def main(transport, host, port):
306
- if transport=='stdio':
307
- mcp.run()
308
- else:
309
- mcp.run(transport=transport,host=host,port=port)
310
-
311
- if __name__ == "__main__":
312
- main()
1
+ from windows_mcp.analytics import PostHogAnalytics, with_analytics
2
+ from windows_mcp.watchdog.service import WatchDog
3
+ from windows_mcp.desktop.service import Desktop
4
+ from contextlib import asynccontextmanager
5
+ from fastmcp.utilities.types import Image
6
+ from mcp.types import ToolAnnotations
7
+ from typing import Literal, Optional
8
+ from humancursor import SystemCursor
9
+ from fastmcp import FastMCP, Context
10
+ from dotenv import load_dotenv
11
+ from textwrap import dedent
12
+ import pyautogui as pg
13
+ import asyncio
14
+ import click
15
+ import os
16
+
17
+ load_dotenv()
18
+
19
+ pg.FAILSAFE=False
20
+ pg.PAUSE=1.0
21
+
22
+ desktop=Desktop()
23
+ watchdog=WatchDog()
24
+ cursor=SystemCursor()
25
+ windows_version=desktop.get_windows_version()
26
+ default_language=desktop.get_default_language()
27
+ screen_width,screen_height=desktop.get_resolution()
28
+ watchdog.set_focus_callback(desktop.tree._on_focus_change)
29
+
30
+ instructions=dedent(f'''
31
+ Windows MCP server provides tools to interact directly with the {windows_version} desktop,
32
+ thus enabling to operate the desktop on the user's behalf.
33
+ ''')
34
+
35
+ # Initialize analytics at module level to be used in decorators
36
+ if os.getenv("ANONYMIZED_TELEMETRY", "true").lower() == "false":
37
+ analytics = None
38
+ else:
39
+ analytics = PostHogAnalytics()
40
+
41
+ @asynccontextmanager
42
+ async def lifespan(app: FastMCP):
43
+ """Runs initialization code before the server starts and cleanup code after it shuts down."""
44
+ try:
45
+ watchdog.start()
46
+ await asyncio.sleep(1) # Simulate startup latency
47
+ yield
48
+ finally:
49
+ watchdog.stop()
50
+ if analytics:
51
+ await analytics.close()
52
+
53
+ mcp=FastMCP(name='windows-mcp',instructions=instructions,lifespan=lifespan)
54
+
55
+ @mcp.tool(
56
+ name="App-Tool",
57
+ description="Manages Windows applications with three modes: 'launch' (start app by name), 'resize' (set window position/size using window_loc=[x,y] and window_size=[width,height]), 'switch' (activate app by name). Essential for application lifecycle management.",
58
+ annotations=ToolAnnotations(
59
+ title="App Tool",
60
+ readOnlyHint=False,
61
+ destructiveHint=True,
62
+ idempotentHint=False,
63
+ openWorldHint=False
64
+ )
65
+ )
66
+ @with_analytics(analytics, "App-Tool")
67
+ def app_tool(mode:Literal['launch','resize','switch'],name:str|None=None,window_loc:list[int]|None=None,window_size:list[int]|None=None, ctx: Context = None):
68
+ return desktop.app(mode,name,window_loc,window_size)
69
+
70
+ @mcp.tool(
71
+ name='Powershell-Tool',
72
+ description='Execute PowerShell commands directly on the Windows system and return output with status code. Supports all PowerShell cmdlets, scripts, and system commands. Use for file operations, system queries, and administrative tasks.',
73
+ annotations=ToolAnnotations(
74
+ title="Powershell Tool",
75
+ readOnlyHint=False,
76
+ destructiveHint=True,
77
+ idempotentHint=False,
78
+ openWorldHint=True
79
+ )
80
+ )
81
+ @with_analytics(analytics, "Powershell-Tool")
82
+ def powershell_tool(command: str, ctx: Context = None) -> str:
83
+ response,status_code=desktop.execute_command(command)
84
+ return f'Response: {response}\nStatus Code: {status_code}'
85
+
86
+ @mcp.tool(
87
+ name='State-Tool',
88
+ description='Captures complete desktop state including: system language, focused/opened apps, interactive elements (buttons, text fields, links, menus with coordinates), and scrollable areas. Set use_vision=True to include screenshot. Set use_dom=True for browser content to get web page elements instead of browser UI. Always call this first to understand the current desktop state before taking actions.',
89
+ annotations=ToolAnnotations(
90
+ title="State Tool",
91
+ readOnlyHint=True,
92
+ destructiveHint=False,
93
+ idempotentHint=True,
94
+ openWorldHint=False
95
+ )
96
+ )
97
+ @with_analytics(analytics, "State-Tool")
98
+ def state_tool(use_vision:bool=False,use_dom:bool=False, ctx: Context = None):
99
+ # Calculate scale factor to cap resolution at 1080p (1920x1080)
100
+ max_width, max_height = 1920, 1080
101
+ scale_width = max_width / screen_width if screen_width > max_width else 1.0
102
+ scale_height = max_height / screen_height if screen_height > max_height else 1.0
103
+ scale = min(scale_width, scale_height) # Use the smaller scale to ensure both dimensions fit
104
+
105
+ desktop_state=desktop.get_state(use_vision=use_vision,use_dom=use_dom,as_bytes=True,scale=scale)
106
+ interactive_elements=desktop_state.tree_state.interactive_elements_to_string()
107
+ scrollable_elements=desktop_state.tree_state.scrollable_elements_to_string()
108
+ apps=desktop_state.apps_to_string()
109
+ active_app=desktop_state.active_app_to_string()
110
+ return [dedent(f'''
111
+ Default Language of User:
112
+ {default_language} with encoding: {desktop.encoding}
113
+
114
+ Focused App:
115
+ {active_app}
116
+
117
+ Opened Apps:
118
+ {apps}
119
+
120
+ List of Interactive Elements:
121
+ {interactive_elements or 'No interactive elements found.'}
122
+
123
+ List of Scrollable Elements:
124
+ {scrollable_elements or 'No scrollable elements found.'}
125
+ ''')]+([Image(data=desktop_state.screenshot,format='png')] if use_vision else [])
126
+
127
+ @mcp.tool(
128
+ name='Click-Tool',
129
+ description='Performs mouse clicks at specified coordinates [x, y]. Supports button types: left (default), right (context menu), middle. Supports clicks: 1 (single), 2 (double), 3 (triple). Always use coordinates from State-Tool output to ensure accuracy.',
130
+ annotations=ToolAnnotations(
131
+ title="Click Tool",
132
+ readOnlyHint=False,
133
+ destructiveHint=True,
134
+ idempotentHint=False,
135
+ openWorldHint=False
136
+ )
137
+ )
138
+ @with_analytics(analytics, "Click-Tool")
139
+ def click_tool(loc:list[int],button:Literal['left','right','middle']='left',clicks:int=1, ctx: Context = None)->str:
140
+ if len(loc) != 2:
141
+ raise ValueError("Location must be a list of exactly 2 integers [x, y]")
142
+ x,y=loc[0],loc[1]
143
+ desktop.click(loc=loc,button=button,clicks=clicks)
144
+ num_clicks={1:'Single',2:'Double',3:'Triple'}
145
+ return f'{num_clicks.get(clicks)} {button} clicked at ({x},{y}).'
146
+
147
+ @mcp.tool(
148
+ name='Type-Tool',
149
+ description='Types text at specified coordinates [x, y]. Set clear=True to clear existing text first (Ctrl+A then type), clear=False to append. Set press_enter=True to submit after typing. Always click on the target input field first to ensure focus.',
150
+ annotations=ToolAnnotations(
151
+ title="Type Tool",
152
+ readOnlyHint=False,
153
+ destructiveHint=True,
154
+ idempotentHint=False,
155
+ openWorldHint=False
156
+ )
157
+ )
158
+ @with_analytics(analytics, "Type-Tool")
159
+ def type_tool(loc:list[int],text:str,clear:bool=False,press_enter:bool=False, ctx: Context = None)->str:
160
+ if len(loc) != 2:
161
+ raise ValueError("Location must be a list of exactly 2 integers [x, y]")
162
+ x,y=loc[0],loc[1]
163
+ desktop.type(loc=loc,text=text,clear=clear,press_enter=press_enter)
164
+ return f'Typed {text} at ({x},{y}).'
165
+
166
+ @mcp.tool(
167
+ name='Scroll-Tool',
168
+ description='Scrolls at coordinates [x, y] or current mouse position if loc=None. Type: vertical (default) or horizontal. Direction: up/down for vertical, left/right for horizontal. wheel_times controls amount (1 wheel ≈ 3-5 lines). Use for navigating long content, lists, and web pages.',
169
+ annotations=ToolAnnotations(
170
+ title="Scroll Tool",
171
+ readOnlyHint=False,
172
+ destructiveHint=False,
173
+ idempotentHint=True,
174
+ openWorldHint=False
175
+ )
176
+ )
177
+ @with_analytics(analytics, "Scroll-Tool")
178
+ def scroll_tool(loc:list[int]=None,type:Literal['horizontal','vertical']='vertical',direction:Literal['up','down','left','right']='down',wheel_times:int=1, ctx: Context = None)->str:
179
+ if loc and len(loc) != 2:
180
+ raise ValueError("Location must be a list of exactly 2 integers [x, y]")
181
+ response=desktop.scroll(loc,type,direction,wheel_times)
182
+ if response:
183
+ return response
184
+ return f'Scrolled {type} {direction} by {wheel_times} wheel times'+f' at ({loc[0]},{loc[1]}).' if loc else ''
185
+
186
+ @mcp.tool(
187
+ name='Drag-Tool',
188
+ description='Performs drag-and-drop from current mouse position to destination coordinates [x, y]. Click or move to source position first, then call this tool with target coordinates. Use for moving files, reordering items, resizing windows, or any drag-drop UI interactions.',
189
+ annotations=ToolAnnotations(
190
+ title="Drag Tool",
191
+ readOnlyHint=False,
192
+ destructiveHint=True,
193
+ idempotentHint=False,
194
+ openWorldHint=False
195
+ )
196
+ )
197
+ @with_analytics(analytics, "Drag-Tool")
198
+ def drag_tool(to_loc:list[int], ctx: Context = None)->str:
199
+ if len(to_loc) != 2:
200
+ raise ValueError("to_loc must be a list of exactly 2 integers [x, y]")
201
+ desktop.drag(to_loc)
202
+ x2,y2=to_loc[0],to_loc[1]
203
+ return f'Dragged the element to ({x2},{y2}).'
204
+
205
+ @mcp.tool(
206
+ name='Move-Tool',
207
+ description='Moves mouse cursor to coordinates [x, y] without clicking. Use for hovering to reveal tooltips/menus, positioning cursor before drag operations, or triggering hover-based UI changes. Does not interact with elements.',
208
+ annotations=ToolAnnotations(
209
+ title="Move Tool",
210
+ readOnlyHint=False,
211
+ destructiveHint=False,
212
+ idempotentHint=True,
213
+ openWorldHint=False
214
+ )
215
+ )
216
+ @with_analytics(analytics, "Move-Tool")
217
+ def move_tool(to_loc:list[int], ctx: Context = None)->str:
218
+ if len(to_loc) != 2:
219
+ raise ValueError("to_loc must be a list of exactly 2 integers [x, y]")
220
+ x,y=to_loc[0],to_loc[1]
221
+ desktop.move(to_loc)
222
+ return f'Moved the mouse pointer to ({x},{y}).'
223
+
224
+ @mcp.tool(
225
+ name='Shortcut-Tool',
226
+ description='Executes keyboard shortcuts using key combinations separated by +. Examples: "ctrl+c" (copy), "ctrl+v" (paste), "alt+tab" (switch apps), "win+r" (Run dialog), "win" (Start menu), "ctrl+shift+esc" (Task Manager). Use for quick actions and system commands.',
227
+ annotations=ToolAnnotations(
228
+ title="Shortcut Tool",
229
+ readOnlyHint=False,
230
+ destructiveHint=True,
231
+ idempotentHint=False,
232
+ openWorldHint=False
233
+ )
234
+ )
235
+ @with_analytics(analytics, "Shortcut-Tool")
236
+ def shortcut_tool(shortcut:str, ctx: Context = None):
237
+ desktop.shortcut(shortcut)
238
+ return f"Pressed {shortcut}."
239
+
240
+ @mcp.tool(
241
+ name='Wait-Tool',
242
+ description='Pauses execution for specified duration in seconds. Use when waiting for: applications to launch/load, UI animations to complete, page content to render, dialogs to appear, or between rapid actions. Helps ensure UI is ready before next interaction.',
243
+ annotations=ToolAnnotations(
244
+ title="Wait Tool",
245
+ readOnlyHint=True,
246
+ destructiveHint=False,
247
+ idempotentHint=True,
248
+ openWorldHint=False
249
+ )
250
+ )
251
+ @with_analytics(analytics, "Wait-Tool")
252
+ def wait_tool(duration:int, ctx: Context = None)->str:
253
+ pg.sleep(duration)
254
+ return f'Waited for {duration} seconds.'
255
+
256
+ @mcp.tool(
257
+ name='Scrape-Tool',
258
+ description='Fetch content from a URL or the active browser tab. By default (use_dom=False), performs a lightweight HTTP request to the URL and returns markdown content of complete webpage. Note: Some websites may block automated HTTP requests. If this fails, open the page in a browser and retry with use_dom=True to extract visible text from the active tab\'s DOM within the viewport.',
259
+ annotations=ToolAnnotations(
260
+ title="Scrape Tool",
261
+ readOnlyHint=True,
262
+ destructiveHint=False,
263
+ idempotentHint=True,
264
+ openWorldHint=True
265
+ )
266
+ )
267
+ @with_analytics(analytics, "Scrape-Tool")
268
+ def scrape_tool(url:str,use_dom:bool=False, ctx: Context = None)->str:
269
+ if not use_dom:
270
+ content=desktop.scrape(url)
271
+ return f'URL:{url}\nContent:\n{content}'
272
+
273
+ desktop_state=desktop.get_state(use_vision=False,use_dom=use_dom)
274
+ tree_state=desktop_state.tree_state
275
+ if not tree_state.dom_info:
276
+ return f'No DOM information found. Please open {url} in browser first.'
277
+ dom_info=tree_state.dom_info
278
+ vertical_scroll_percent=dom_info.vertical_scroll_percent
279
+ content='\n'.join([node.text for node in tree_state.dom_informative_nodes])
280
+ header_status = "Reached top" if vertical_scroll_percent <= 0 else "Scroll up to see more"
281
+ footer_status = "Reached bottom" if vertical_scroll_percent >= 100 else "Scroll down to see more"
282
+ return f'URL:{url}\nContent:\n[{header_status}]\n{content}\n[{footer_status}]'
283
+
284
+
285
+ @click.command()
286
+ @click.option(
287
+ "--transport",
288
+ help="The transport layer used by the MCP server.",
289
+ type=click.Choice(['stdio','sse','streamable-http']),
290
+ default='stdio'
291
+ )
292
+ @click.option(
293
+ "--host",
294
+ help="Host to bind the SSE/Streamable HTTP server.",
295
+ default="localhost",
296
+ type=str,
297
+ show_default=True
298
+ )
299
+ @click.option(
300
+ "--port",
301
+ help="Port to bind the SSE/Streamable HTTP server.",
302
+ default=8000,
303
+ type=int,
304
+ show_default=True
305
+ )
306
+ def main(transport, host, port):
307
+
308
+ if transport=='stdio':
309
+ mcp.run()
310
+ else:
311
+ mcp.run(transport=transport,host=host,port=port)
312
+
313
+ if __name__ == "__main__":
314
+ main()