windows-mcp 0.5.8__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
windows_mcp/__main__.py CHANGED
@@ -1,314 +1,299 @@
1
- from windows_mcp.analytics import PostHogAnalytics, with_analytics
2
- from windows_mcp.watchdog.service import WatchDog
3
- from windows_mcp.desktop.service import Desktop
4
- from contextlib import asynccontextmanager
5
- from fastmcp.utilities.types import Image
6
- from mcp.types import ToolAnnotations
7
- from typing import Literal, Optional
8
- from humancursor import SystemCursor
9
- from fastmcp import FastMCP, Context
10
- from dotenv import load_dotenv
11
- from textwrap import dedent
12
- import pyautogui as pg
13
- import asyncio
14
- import click
15
- import os
16
-
17
- load_dotenv()
18
-
19
- pg.FAILSAFE=False
20
- pg.PAUSE=1.0
21
-
22
- desktop=Desktop()
23
- watchdog=WatchDog()
24
- cursor=SystemCursor()
25
- windows_version=desktop.get_windows_version()
26
- default_language=desktop.get_default_language()
27
- screen_width,screen_height=desktop.get_resolution()
28
- watchdog.set_focus_callback(desktop.tree._on_focus_change)
29
-
30
- instructions=dedent(f'''
31
- Windows MCP server provides tools to interact directly with the {windows_version} desktop,
32
- thus enabling to operate the desktop on the user's behalf.
33
- ''')
34
-
35
- # Initialize analytics at module level to be used in decorators
36
- if os.getenv("ANONYMIZED_TELEMETRY", "true").lower() == "false":
37
- analytics = None
38
- else:
39
- analytics = PostHogAnalytics()
40
-
41
- @asynccontextmanager
42
- async def lifespan(app: FastMCP):
43
- """Runs initialization code before the server starts and cleanup code after it shuts down."""
44
- try:
45
- watchdog.start()
46
- await asyncio.sleep(1) # Simulate startup latency
47
- yield
48
- finally:
49
- watchdog.stop()
50
- if analytics:
51
- await analytics.close()
52
-
53
- mcp=FastMCP(name='windows-mcp',instructions=instructions,lifespan=lifespan)
54
-
55
- @mcp.tool(
56
- name="App-Tool",
57
- description="Manages Windows applications with three modes: 'launch' (start app by name), 'resize' (set window position/size using window_loc=[x,y] and window_size=[width,height]), 'switch' (activate app by name). Essential for application lifecycle management.",
58
- annotations=ToolAnnotations(
59
- title="App Tool",
60
- readOnlyHint=False,
61
- destructiveHint=True,
62
- idempotentHint=False,
63
- openWorldHint=False
64
- )
65
- )
66
- @with_analytics(analytics, "App-Tool")
67
- def app_tool(mode:Literal['launch','resize','switch'],name:str|None=None,window_loc:list[int]|None=None,window_size:list[int]|None=None, ctx: Context = None):
68
- return desktop.app(mode,name,window_loc,window_size)
69
-
70
- @mcp.tool(
71
- name='Powershell-Tool',
72
- description='Execute PowerShell commands directly on the Windows system and return output with status code. Supports all PowerShell cmdlets, scripts, and system commands. Use for file operations, system queries, and administrative tasks.',
73
- annotations=ToolAnnotations(
74
- title="Powershell Tool",
75
- readOnlyHint=False,
76
- destructiveHint=True,
77
- idempotentHint=False,
78
- openWorldHint=True
79
- )
80
- )
81
- @with_analytics(analytics, "Powershell-Tool")
82
- def powershell_tool(command: str, ctx: Context = None) -> str:
83
- response,status_code=desktop.execute_command(command)
84
- return f'Response: {response}\nStatus Code: {status_code}'
85
-
86
- @mcp.tool(
87
- name='State-Tool',
88
- description='Captures complete desktop state including: system language, focused/opened apps, interactive elements (buttons, text fields, links, menus with coordinates), and scrollable areas. Set use_vision=True to include screenshot. Set use_dom=True for browser content to get web page elements instead of browser UI. Always call this first to understand the current desktop state before taking actions.',
89
- annotations=ToolAnnotations(
90
- title="State Tool",
91
- readOnlyHint=True,
92
- destructiveHint=False,
93
- idempotentHint=True,
94
- openWorldHint=False
95
- )
96
- )
97
- @with_analytics(analytics, "State-Tool")
98
- def state_tool(use_vision:bool=False,use_dom:bool=False, ctx: Context = None):
99
- # Calculate scale factor to cap resolution at 1080p (1920x1080)
100
- max_width, max_height = 1920, 1080
101
- scale_width = max_width / screen_width if screen_width > max_width else 1.0
102
- scale_height = max_height / screen_height if screen_height > max_height else 1.0
103
- scale = min(scale_width, scale_height) # Use the smaller scale to ensure both dimensions fit
104
-
105
- desktop_state=desktop.get_state(use_vision=use_vision,use_dom=use_dom,as_bytes=True,scale=scale)
106
- interactive_elements=desktop_state.tree_state.interactive_elements_to_string()
107
- scrollable_elements=desktop_state.tree_state.scrollable_elements_to_string()
108
- apps=desktop_state.apps_to_string()
109
- active_app=desktop_state.active_app_to_string()
110
- return [dedent(f'''
111
- Default Language of User:
112
- {default_language} with encoding: {desktop.encoding}
113
-
114
- Focused App:
115
- {active_app}
116
-
117
- Opened Apps:
118
- {apps}
119
-
120
- List of Interactive Elements:
121
- {interactive_elements or 'No interactive elements found.'}
122
-
123
- List of Scrollable Elements:
124
- {scrollable_elements or 'No scrollable elements found.'}
125
- ''')]+([Image(data=desktop_state.screenshot,format='png')] if use_vision else [])
126
-
127
- @mcp.tool(
128
- name='Click-Tool',
129
- description='Performs mouse clicks at specified coordinates [x, y]. Supports button types: left (default), right (context menu), middle. Supports clicks: 1 (single), 2 (double), 3 (triple). Always use coordinates from State-Tool output to ensure accuracy.',
130
- annotations=ToolAnnotations(
131
- title="Click Tool",
132
- readOnlyHint=False,
133
- destructiveHint=True,
134
- idempotentHint=False,
135
- openWorldHint=False
136
- )
137
- )
138
- @with_analytics(analytics, "Click-Tool")
139
- def click_tool(loc:list[int],button:Literal['left','right','middle']='left',clicks:int=1, ctx: Context = None)->str:
140
- if len(loc) != 2:
141
- raise ValueError("Location must be a list of exactly 2 integers [x, y]")
142
- x,y=loc[0],loc[1]
143
- desktop.click(loc=loc,button=button,clicks=clicks)
144
- num_clicks={1:'Single',2:'Double',3:'Triple'}
145
- return f'{num_clicks.get(clicks)} {button} clicked at ({x},{y}).'
146
-
147
- @mcp.tool(
148
- name='Type-Tool',
149
- description='Types text at specified coordinates [x, y]. Set clear=True to clear existing text first (Ctrl+A then type), clear=False to append. Set press_enter=True to submit after typing. Always click on the target input field first to ensure focus.',
150
- annotations=ToolAnnotations(
151
- title="Type Tool",
152
- readOnlyHint=False,
153
- destructiveHint=True,
154
- idempotentHint=False,
155
- openWorldHint=False
156
- )
157
- )
158
- @with_analytics(analytics, "Type-Tool")
159
- def type_tool(loc:list[int],text:str,clear:bool=False,press_enter:bool=False, ctx: Context = None)->str:
160
- if len(loc) != 2:
161
- raise ValueError("Location must be a list of exactly 2 integers [x, y]")
162
- x,y=loc[0],loc[1]
163
- desktop.type(loc=loc,text=text,clear=clear,press_enter=press_enter)
164
- return f'Typed {text} at ({x},{y}).'
165
-
166
- @mcp.tool(
167
- name='Scroll-Tool',
168
- description='Scrolls at coordinates [x, y] or current mouse position if loc=None. Type: vertical (default) or horizontal. Direction: up/down for vertical, left/right for horizontal. wheel_times controls amount (1 wheel ≈ 3-5 lines). Use for navigating long content, lists, and web pages.',
169
- annotations=ToolAnnotations(
170
- title="Scroll Tool",
171
- readOnlyHint=False,
172
- destructiveHint=False,
173
- idempotentHint=True,
174
- openWorldHint=False
175
- )
176
- )
177
- @with_analytics(analytics, "Scroll-Tool")
178
- def scroll_tool(loc:list[int]=None,type:Literal['horizontal','vertical']='vertical',direction:Literal['up','down','left','right']='down',wheel_times:int=1, ctx: Context = None)->str:
179
- if loc and len(loc) != 2:
180
- raise ValueError("Location must be a list of exactly 2 integers [x, y]")
181
- response=desktop.scroll(loc,type,direction,wheel_times)
182
- if response:
183
- return response
184
- return f'Scrolled {type} {direction} by {wheel_times} wheel times'+f' at ({loc[0]},{loc[1]}).' if loc else ''
185
-
186
- @mcp.tool(
187
- name='Drag-Tool',
188
- description='Performs drag-and-drop from current mouse position to destination coordinates [x, y]. Click or move to source position first, then call this tool with target coordinates. Use for moving files, reordering items, resizing windows, or any drag-drop UI interactions.',
189
- annotations=ToolAnnotations(
190
- title="Drag Tool",
191
- readOnlyHint=False,
192
- destructiveHint=True,
193
- idempotentHint=False,
194
- openWorldHint=False
195
- )
196
- )
197
- @with_analytics(analytics, "Drag-Tool")
198
- def drag_tool(to_loc:list[int], ctx: Context = None)->str:
199
- if len(to_loc) != 2:
200
- raise ValueError("to_loc must be a list of exactly 2 integers [x, y]")
201
- desktop.drag(to_loc)
202
- x2,y2=to_loc[0],to_loc[1]
203
- return f'Dragged the element to ({x2},{y2}).'
204
-
205
- @mcp.tool(
206
- name='Move-Tool',
207
- description='Moves mouse cursor to coordinates [x, y] without clicking. Use for hovering to reveal tooltips/menus, positioning cursor before drag operations, or triggering hover-based UI changes. Does not interact with elements.',
208
- annotations=ToolAnnotations(
209
- title="Move Tool",
210
- readOnlyHint=False,
211
- destructiveHint=False,
212
- idempotentHint=True,
213
- openWorldHint=False
214
- )
215
- )
216
- @with_analytics(analytics, "Move-Tool")
217
- def move_tool(to_loc:list[int], ctx: Context = None)->str:
218
- if len(to_loc) != 2:
219
- raise ValueError("to_loc must be a list of exactly 2 integers [x, y]")
220
- x,y=to_loc[0],to_loc[1]
221
- desktop.move(to_loc)
222
- return f'Moved the mouse pointer to ({x},{y}).'
223
-
224
- @mcp.tool(
225
- name='Shortcut-Tool',
226
- description='Executes keyboard shortcuts using key combinations separated by +. Examples: "ctrl+c" (copy), "ctrl+v" (paste), "alt+tab" (switch apps), "win+r" (Run dialog), "win" (Start menu), "ctrl+shift+esc" (Task Manager). Use for quick actions and system commands.',
227
- annotations=ToolAnnotations(
228
- title="Shortcut Tool",
229
- readOnlyHint=False,
230
- destructiveHint=True,
231
- idempotentHint=False,
232
- openWorldHint=False
233
- )
234
- )
235
- @with_analytics(analytics, "Shortcut-Tool")
236
- def shortcut_tool(shortcut:str, ctx: Context = None):
237
- desktop.shortcut(shortcut)
238
- return f"Pressed {shortcut}."
239
-
240
- @mcp.tool(
241
- name='Wait-Tool',
242
- description='Pauses execution for specified duration in seconds. Use when waiting for: applications to launch/load, UI animations to complete, page content to render, dialogs to appear, or between rapid actions. Helps ensure UI is ready before next interaction.',
243
- annotations=ToolAnnotations(
244
- title="Wait Tool",
245
- readOnlyHint=True,
246
- destructiveHint=False,
247
- idempotentHint=True,
248
- openWorldHint=False
249
- )
250
- )
251
- @with_analytics(analytics, "Wait-Tool")
252
- def wait_tool(duration:int, ctx: Context = None)->str:
253
- pg.sleep(duration)
254
- return f'Waited for {duration} seconds.'
255
-
256
- @mcp.tool(
257
- name='Scrape-Tool',
258
- description='Fetch content from a URL or the active browser tab. By default (use_dom=False), performs a lightweight HTTP request to the URL and returns markdown content of complete webpage. Note: Some websites may block automated HTTP requests. If this fails, open the page in a browser and retry with use_dom=True to extract visible text from the active tab\'s DOM within the viewport.',
259
- annotations=ToolAnnotations(
260
- title="Scrape Tool",
261
- readOnlyHint=True,
262
- destructiveHint=False,
263
- idempotentHint=True,
264
- openWorldHint=True
265
- )
266
- )
267
- @with_analytics(analytics, "Scrape-Tool")
268
- def scrape_tool(url:str,use_dom:bool=False, ctx: Context = None)->str:
269
- if not use_dom:
270
- content=desktop.scrape(url)
271
- return f'URL:{url}\nContent:\n{content}'
272
-
273
- desktop_state=desktop.get_state(use_vision=False,use_dom=use_dom)
274
- tree_state=desktop_state.tree_state
275
- if not tree_state.dom_info:
276
- return f'No DOM information found. Please open {url} in browser first.'
277
- dom_info=tree_state.dom_info
278
- vertical_scroll_percent=dom_info.vertical_scroll_percent
279
- content='\n'.join([node.text for node in tree_state.dom_informative_nodes])
280
- header_status = "Reached top" if vertical_scroll_percent <= 0 else "Scroll up to see more"
281
- footer_status = "Reached bottom" if vertical_scroll_percent >= 100 else "Scroll down to see more"
282
- return f'URL:{url}\nContent:\n[{header_status}]\n{content}\n[{footer_status}]'
283
-
284
-
285
- @click.command()
286
- @click.option(
287
- "--transport",
288
- help="The transport layer used by the MCP server.",
289
- type=click.Choice(['stdio','sse','streamable-http']),
290
- default='stdio'
291
- )
292
- @click.option(
293
- "--host",
294
- help="Host to bind the SSE/Streamable HTTP server.",
295
- default="localhost",
296
- type=str,
297
- show_default=True
298
- )
299
- @click.option(
300
- "--port",
301
- help="Port to bind the SSE/Streamable HTTP server.",
302
- default=8000,
303
- type=int,
304
- show_default=True
305
- )
306
- def main(transport, host, port):
307
-
308
- if transport=='stdio':
309
- mcp.run()
310
- else:
311
- mcp.run(transport=transport,host=host,port=port)
312
-
313
- if __name__ == "__main__":
314
- main()
1
+ from windows_mcp.analytics import PostHogAnalytics, with_analytics
2
+ from windows_mcp.desktop.service import Desktop,Size
3
+ from windows_mcp.watchdog.service import WatchDog
4
+ from contextlib import asynccontextmanager
5
+ from fastmcp.utilities.types import Image
6
+ from mcp.types import ToolAnnotations
7
+ from typing import Literal, Optional
8
+ from fastmcp import FastMCP, Context
9
+ from dotenv import load_dotenv
10
+ from textwrap import dedent
11
+ import pyautogui as pg
12
+ import asyncio
13
+ import click
14
+ import os
15
+
16
+ load_dotenv()
17
+
18
+ MAX_IMAGE_WIDTH, MAX_IMAGE_HEIGHT = 1920, 1080
19
+ pg.FAILSAFE=False
20
+ pg.PAUSE=1.0
21
+
22
+ desktop: Optional[Desktop] = None
23
+ watchdog: Optional[WatchDog] = None
24
+ analytics: Optional[PostHogAnalytics] = None
25
+ screen_size:Optional[Size]=None
26
+
27
+ instructions=dedent(f'''
28
+ Windows MCP server provides tools to interact directly with the Windows desktop,
29
+ thus enabling to operate the desktop on the user's behalf.
30
+ ''')
31
+
32
+ @asynccontextmanager
33
+ async def lifespan(app: FastMCP):
34
+ """Runs initialization code before the server starts and cleanup code after it shuts down."""
35
+ global desktop, watchdog, analytics,screen_size
36
+
37
+ # Initialize components here instead of at module level
38
+ if os.getenv("ANONYMIZED_TELEMETRY", "true").lower() != "false":
39
+ analytics = PostHogAnalytics()
40
+ desktop = Desktop()
41
+ watchdog = WatchDog()
42
+ screen_size=desktop.get_screen_size()
43
+ watchdog.set_focus_callback(desktop.tree._on_focus_change)
44
+
45
+ try:
46
+ watchdog.start()
47
+ await asyncio.sleep(1) # Simulate startup latency
48
+ yield
49
+ finally:
50
+ if watchdog:
51
+ watchdog.stop()
52
+ if analytics:
53
+ await analytics.close()
54
+
55
+ mcp=FastMCP(name='windows-mcp',instructions=instructions,lifespan=lifespan)
56
+
57
+ @mcp.tool(
58
+ name="App",
59
+ description="Manages Windows applications with three modes: 'launch' (start app by name), 'resize' (set window position/size using window_loc=[x,y] and window_size=[width,height]), 'switch' (activate app by name). Essential for application lifecycle management.",
60
+ annotations=ToolAnnotations(
61
+ title="App",
62
+ readOnlyHint=False,
63
+ destructiveHint=True,
64
+ idempotentHint=False,
65
+ openWorldHint=False
66
+ )
67
+ )
68
+ @with_analytics(analytics, "App-Tool")
69
+ def app_tool(mode:Literal['launch','resize','switch'],name:str|None=None,window_loc:list[int]|None=None,window_size:list[int]|None=None, ctx: Context = None):
70
+ return desktop.app(mode,name,window_loc,window_size)
71
+
72
+ @mcp.tool(
73
+ name='Shell',
74
+ description='A comprehensive system tool for executing any PowerShell commands. Use it to navigate the file system, manage files and processes, and execute system-level operations. Capable of accessing web content (e.g., via Invoke-WebRequest), interacting with network resources, and performing complex administrative tasks. This tool provides full access to the underlying operating system capabilities, making it the primary interface for system automation, scripting, and deep system interaction.',
75
+ annotations=ToolAnnotations(
76
+ title="Shell",
77
+ readOnlyHint=False,
78
+ destructiveHint=True,
79
+ idempotentHint=False,
80
+ openWorldHint=True
81
+ )
82
+ )
83
+ @with_analytics(analytics, "Powershell-Tool")
84
+ def powershell_tool(command: str,timeout:int=10, ctx: Context = None) -> str:
85
+ response,status_code=desktop.execute_command(command,timeout)
86
+ return f'Response: {response}\nStatus Code: {status_code}'
87
+
88
+ @mcp.tool(
89
+ name='Snapshot',
90
+ description='Captures complete desktop state including: system language, focused/opened apps, interactive elements (buttons, text fields, links, menus with coordinates), and scrollable areas. Set use_vision=True to include screenshot. Set use_dom=True for browser content to get web page elements instead of browser UI. Always call this first to understand the current desktop state before taking actions.',
91
+ annotations=ToolAnnotations(
92
+ title="Snapshot",
93
+ readOnlyHint=True,
94
+ destructiveHint=False,
95
+ idempotentHint=True,
96
+ openWorldHint=False
97
+ )
98
+ )
99
+ @with_analytics(analytics, "State-Tool")
100
+ def state_tool(use_vision:bool=False,use_dom:bool=False, ctx: Context = None):
101
+ # Calculate scale factor to cap resolution at 1080p (1920x1080)
102
+ scale_width = MAX_IMAGE_WIDTH / screen_size.width if screen_size.width > MAX_IMAGE_WIDTH else 1.0
103
+ scale_height = MAX_IMAGE_HEIGHT / screen_size.height if screen_size.height > MAX_IMAGE_HEIGHT else 1.0
104
+ scale = min(scale_width, scale_height) # Use the smaller scale to ensure both dimensions fit
105
+
106
+ desktop_state=desktop.get_state(use_vision=use_vision,use_dom=use_dom,as_bytes=True,scale=scale)
107
+ interactive_elements=desktop_state.tree_state.interactive_elements_to_string()
108
+ scrollable_elements=desktop_state.tree_state.scrollable_elements_to_string()
109
+ apps=desktop_state.apps_to_string()
110
+ active_app=desktop_state.active_app_to_string()
111
+ return [dedent(f'''
112
+ Focused App:
113
+ {active_app}
114
+
115
+ Opened Apps:
116
+ {apps}
117
+
118
+ List of Interactive Elements:
119
+ {interactive_elements or 'No interactive elements found.'}
120
+
121
+ List of Scrollable Elements:
122
+ {scrollable_elements or 'No scrollable elements found.'}
123
+ ''')]+([Image(data=desktop_state.screenshot,format='png')] if use_vision else [])
124
+
125
+ @mcp.tool(
126
+ name='Click',
127
+ description='Performs mouse clicks at specified coordinates [x, y]. Supports button types: left (default), right (context menu), middle. Supports clicks: 1 (single), 2 (double), 3 (triple). Always use coordinates from State-Tool output to ensure accuracy.',
128
+ annotations=ToolAnnotations(
129
+ title="Click",
130
+ readOnlyHint=False,
131
+ destructiveHint=True,
132
+ idempotentHint=False,
133
+ openWorldHint=False
134
+ )
135
+ )
136
+ @with_analytics(analytics, "Click-Tool")
137
+ def click_tool(loc:list[int],button:Literal['left','right','middle']='left',clicks:int=1, ctx: Context = None)->str:
138
+ if len(loc) != 2:
139
+ raise ValueError("Location must be a list of exactly 2 integers [x, y]")
140
+ x,y=loc[0],loc[1]
141
+ desktop.click(loc=loc,button=button,clicks=clicks)
142
+ num_clicks={1:'Single',2:'Double',3:'Triple'}
143
+ return f'{num_clicks.get(clicks)} {button} clicked at ({x},{y}).'
144
+
145
+ @mcp.tool(
146
+ name='Type',
147
+ description='Types text at specified coordinates [x, y]. Set clear=True to clear existing text first (Ctrl+A then type), clear=False to append. Set press_enter=True to submit after typing. Always click on the target input field first to ensure focus.',
148
+ annotations=ToolAnnotations(
149
+ title="Type",
150
+ readOnlyHint=False,
151
+ destructiveHint=True,
152
+ idempotentHint=False,
153
+ openWorldHint=False
154
+ )
155
+ )
156
+ @with_analytics(analytics, "Type-Tool")
157
+ def type_tool(loc:list[int],text:str,clear:bool=False,press_enter:bool=False, ctx: Context = None)->str:
158
+ if len(loc) != 2:
159
+ raise ValueError("Location must be a list of exactly 2 integers [x, y]")
160
+ x,y=loc[0],loc[1]
161
+ desktop.type(loc=loc,text=text,clear=clear,press_enter=press_enter)
162
+ return f'Typed {text} at ({x},{y}).'
163
+
164
+ @mcp.tool(
165
+ name='Scroll',
166
+ description='Scrolls at coordinates [x, y] or current mouse position if loc=None. Type: vertical (default) or horizontal. Direction: up/down for vertical, left/right for horizontal. wheel_times controls amount (1 wheel ≈ 3-5 lines). Use for navigating long content, lists, and web pages.',
167
+ annotations=ToolAnnotations(
168
+ title="Scroll",
169
+ readOnlyHint=False,
170
+ destructiveHint=False,
171
+ idempotentHint=True,
172
+ openWorldHint=False
173
+ )
174
+ )
175
+ @with_analytics(analytics, "Scroll-Tool")
176
+ def scroll_tool(loc:list[int]=None,type:Literal['horizontal','vertical']='vertical',direction:Literal['up','down','left','right']='down',wheel_times:int=1, ctx: Context = None)->str:
177
+ if loc and len(loc) != 2:
178
+ raise ValueError("Location must be a list of exactly 2 integers [x, y]")
179
+ response=desktop.scroll(loc,type,direction,wheel_times)
180
+ if response:
181
+ return response
182
+ return f'Scrolled {type} {direction} by {wheel_times} wheel times'+f' at ({loc[0]},{loc[1]}).' if loc else ''
183
+
184
+ @mcp.tool(
185
+ name='Move',
186
+ description='Moves mouse cursor to coordinates [x, y]. Set drag=True to perform a drag-and-drop operation from the current mouse position to the target coordinates. Default (drag=False) is a simple cursor move (hover).',
187
+ annotations=ToolAnnotations(
188
+ title="Move",
189
+ readOnlyHint=False,
190
+ destructiveHint=False,
191
+ idempotentHint=True,
192
+ openWorldHint=False
193
+ )
194
+ )
195
+ @with_analytics(analytics, "Move-Tool")
196
+ def move_tool(loc:list[int], drag:bool=False, ctx: Context = None)->str:
197
+ if len(loc) != 2:
198
+ raise ValueError("loc must be a list of exactly 2 integers [x, y]")
199
+ x,y=loc[0],loc[1]
200
+ if drag:
201
+ desktop.drag(loc)
202
+ return f'Dragged to ({x},{y}).'
203
+ else:
204
+ desktop.move(loc)
205
+ return f'Moved the mouse pointer to ({x},{y}).'
206
+
207
+ @mcp.tool(
208
+ name='Shortcut',
209
+ description='Executes keyboard shortcuts using key combinations separated by +. Examples: "ctrl+c" (copy), "ctrl+v" (paste), "alt+tab" (switch apps), "win+r" (Run dialog), "win" (Start menu), "ctrl+shift+esc" (Task Manager). Use for quick actions and system commands.',
210
+ annotations=ToolAnnotations(
211
+ title="Shortcut",
212
+ readOnlyHint=False,
213
+ destructiveHint=True,
214
+ idempotentHint=False,
215
+ openWorldHint=False
216
+ )
217
+ )
218
+ @with_analytics(analytics, "Shortcut-Tool")
219
+ def shortcut_tool(shortcut:str, ctx: Context = None):
220
+ desktop.shortcut(shortcut)
221
+ return f"Pressed {shortcut}."
222
+
223
+ @mcp.tool(
224
+ name='Wait',
225
+ description='Pauses execution for specified duration in seconds. Use when waiting for: applications to launch/load, UI animations to complete, page content to render, dialogs to appear, or between rapid actions. Helps ensure UI is ready before next interaction.',
226
+ annotations=ToolAnnotations(
227
+ title="Wait",
228
+ readOnlyHint=True,
229
+ destructiveHint=False,
230
+ idempotentHint=True,
231
+ openWorldHint=False
232
+ )
233
+ )
234
+ @with_analytics(analytics, "Wait-Tool")
235
+ def wait_tool(duration:int, ctx: Context = None)->str:
236
+ pg.sleep(duration)
237
+ return f'Waited for {duration} seconds.'
238
+
239
+ @mcp.tool(
240
+ name='Scrape',
241
+ description='Fetch content from a URL or the active browser tab. By default (use_dom=False), performs a lightweight HTTP request to the URL and returns markdown content of complete webpage. Note: Some websites may block automated HTTP requests. If this fails, open the page in a browser and retry with use_dom=True to extract visible text from the active tab\'s DOM within the viewport using the accessibility tree data.',
242
+ annotations=ToolAnnotations(
243
+ title="Scrape",
244
+ readOnlyHint=True,
245
+ destructiveHint=False,
246
+ idempotentHint=True,
247
+ openWorldHint=True
248
+ )
249
+ )
250
+ @with_analytics(analytics, "Scrape-Tool")
251
+ def scrape_tool(url:str,use_dom:bool=False, ctx: Context = None)->str:
252
+ if not use_dom:
253
+ content=desktop.scrape(url)
254
+ return f'URL:{url}\nContent:\n{content}'
255
+
256
+ desktop_state=desktop.get_state(use_vision=False,use_dom=use_dom)
257
+ tree_state=desktop_state.tree_state
258
+ if not tree_state.dom_node:
259
+ return f'No DOM information found. Please open {url} in browser first.'
260
+ dom_node=tree_state.dom_node
261
+ vertical_scroll_percent=dom_node.vertical_scroll_percent
262
+ content='\n'.join([node.text for node in tree_state.dom_informative_nodes])
263
+ header_status = "Reached top" if vertical_scroll_percent <= 0 else "Scroll up to see more"
264
+ footer_status = "Reached bottom" if vertical_scroll_percent >= 100 else "Scroll down to see more"
265
+ return f'URL:{url}\nContent:\n[{header_status}]\n{content}\n[{footer_status}]'
266
+
267
+
268
+ @click.command()
269
+ @click.option(
270
+ "--transport",
271
+ help="The transport layer used by the MCP server.",
272
+ type=click.Choice(['stdio','sse','streamable-http']),
273
+ default='stdio'
274
+ )
275
+ @click.option(
276
+ "--host",
277
+ help="Host to bind the SSE/Streamable HTTP server.",
278
+ default="localhost",
279
+ type=str,
280
+ show_default=True
281
+ )
282
+ @click.option(
283
+ "--port",
284
+ help="Port to bind the SSE/Streamable HTTP server.",
285
+ default=8000,
286
+ type=int,
287
+ show_default=True
288
+ )
289
+ def main(transport, host, port):
290
+ match transport:
291
+ case 'stdio':
292
+ mcp.run(transport=transport,show_banner=False)
293
+ case 'sse'|'streamable-http':
294
+ mcp.run(transport=transport,host=host,port=port,show_banner=False)
295
+ case _:
296
+ raise ValueError(f"Invalid transport: {transport}")
297
+
298
+ if __name__ == "__main__":
299
+ main()