windows-mcp 0.6.0__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. windows_mcp-0.6.1/.mcpbignore +22 -0
  2. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/PKG-INFO +4 -5
  3. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/README.md +3 -4
  4. windows_mcp-0.6.1/assets/logo.png +0 -0
  5. windows_mcp-0.6.1/manifest.json +114 -0
  6. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/pyproject.toml +1 -1
  7. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/__main__.py +57 -15
  8. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/desktop/service.py +182 -100
  9. windows_mcp-0.6.1/src/windows_mcp/desktop/views.py +77 -0
  10. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/tree/service.py +29 -29
  11. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/tree/utils.py +21 -21
  12. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/tree/views.py +9 -9
  13. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/uia/controls.py +0 -42
  14. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/uia/core.py +19 -929
  15. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/vdm/core.py +147 -135
  16. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/uv.lock +1 -1
  17. windows_mcp-0.6.0/.mcpbignore +0 -7
  18. windows_mcp-0.6.0/assets/logo.png +0 -0
  19. windows_mcp-0.6.0/manifest.json +0 -111
  20. windows_mcp-0.6.0/src/windows_mcp/desktop/views.py +0 -60
  21. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/.github/FUNDING.yml +0 -0
  22. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/.gitignore +0 -0
  23. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/.python-version +0 -0
  24. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/CONTRIBUTING.md +0 -0
  25. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/LICENSE.md +0 -0
  26. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/SECURITY.md +0 -0
  27. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/assets/screenshots/screenshot_1.png +0 -0
  28. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/assets/screenshots/screenshot_2.png +0 -0
  29. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/assets/screenshots/screenshot_3.png +0 -0
  30. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/server.json +0 -0
  31. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/__init__.py +0 -0
  32. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/analytics.py +0 -0
  33. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/desktop/__init__.py +0 -0
  34. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/desktop/config.py +0 -0
  35. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/tree/__init__.py +0 -0
  36. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/tree/cache_utils.py +0 -0
  37. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/tree/config.py +0 -0
  38. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/uia/__init__.py +0 -0
  39. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/uia/enums.py +0 -0
  40. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/uia/events.py +0 -0
  41. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/uia/patterns.py +0 -0
  42. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/vdm/__init__.py +0 -0
  43. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/watchdog/__init__.py +0 -0
  44. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/watchdog/event_handlers.py +0 -0
  45. {windows_mcp-0.6.0 → windows_mcp-0.6.1}/src/windows_mcp/watchdog/service.py +0 -0
@@ -0,0 +1,22 @@
1
+ # Virtual environment (uses uv run at runtime)
2
+ .venv/
3
+
4
+ # Demo videos (too large for bundle)
5
+ assets/demo*.mov
6
+ assets/*.mov
7
+
8
+ # Git
9
+ .git/
10
+
11
+ # Development files
12
+ __pycache__/
13
+ *.pyc
14
+ .pytest_cache/
15
+
16
+ # Editor files
17
+ .vscode/
18
+ .idea/
19
+ *.swp
20
+
21
+ # State files from audit
22
+ .clone-and-audit-state.json
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: windows-mcp
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: Lightweight MCP Server for interacting with Windows Operating System.
5
5
  Project-URL: homepage, https://github.com/CursorTouch
6
6
  Author-email: Jeomon George <jeogeoalukka@gmail.com>
@@ -73,7 +73,7 @@ Description-Content-Type: text/markdown
73
73
  mcp-name: io.github.CursorTouch/Windows-MCP
74
74
 
75
75
  ## Updates
76
- - Windows-MCP reached 1M+ Users in [Claude Desktop Extensiosn](https://claude.ai/directory).
76
+ - Windows-MCP reached `1M+ Users` in [Claude Desktop Extensiosn](https://claude.ai/directory).
77
77
  - Windows-MCP is now available on [PyPI](https://pypi.org/project/windows-mcp/) (thus supports `uvx windows-mcp`)
78
78
  - Windows-MCP is added to [MCP Registry](https://github.com/modelcontextprotocol/registry)
79
79
  - Try out 🪟[Windows-Use](https://github.com/CursorTouch/Windows-Use)!!, an agent built using Windows-MCP.
@@ -110,7 +110,7 @@ mcp-name: io.github.CursorTouch/Windows-MCP
110
110
  Easily adapt or extend tools to suit your unique automation or AI integration needs.
111
111
 
112
112
  - **Real-Time Interaction**
113
- Typical latency between actions (e.g., from one mouse click to the next) ranges from **0.4 to 1.2 secs**, and may slightly vary based on the number of active applications and system load, also the inferencing speed of the llm.
113
+ Typical latency between actions (e.g., from one mouse click to the next) ranges from **0.2 to 0.9 secs**, and may slightly vary based on the number of active applications and system load, also the inferencing speed of the llm.
114
114
 
115
115
  - **DOM Mode for Browser Automation**
116
116
  Special `use_dom=True` mode for State-Tool that focuses exclusively on web page content, filtering out browser UI elements for cleaner, more efficient web automation.
@@ -347,8 +347,7 @@ MCP Client can access the following tools to interact with Windows:
347
347
  - `Click`: Click on the screen at the given coordinates.
348
348
  - `Type`: Type text on an element (optionally clears existing text).
349
349
  - `Scroll`: Scroll vertically or horizontally on the window or specific regions.
350
- - `Drag`: Drag from one point to another.
351
- - `Move`: Move mouse pointer.
350
+ - `Move`: Move mouse pointer or drag (set drag=True) to coordinates.
352
351
  - `Shortcut`: Press keyboard shortcuts (`Ctrl+c`, `Alt+Tab`, etc).
353
352
  - `Wait`: Pause for a defined duration.
354
353
  - `Snapshot`: Combined snapshot of default language, browser, active apps and interactive, textual and scrollable elements along with screenshot of the desktop. Supports `use_dom=True` for browser content extraction (web page elements only) and `use_vision=True` for including screenshots.
@@ -26,7 +26,7 @@
26
26
  mcp-name: io.github.CursorTouch/Windows-MCP
27
27
 
28
28
  ## Updates
29
- - Windows-MCP reached 1M+ Users in [Claude Desktop Extensiosn](https://claude.ai/directory).
29
+ - Windows-MCP reached `1M+ Users` in [Claude Desktop Extensiosn](https://claude.ai/directory).
30
30
  - Windows-MCP is now available on [PyPI](https://pypi.org/project/windows-mcp/) (thus supports `uvx windows-mcp`)
31
31
  - Windows-MCP is added to [MCP Registry](https://github.com/modelcontextprotocol/registry)
32
32
  - Try out 🪟[Windows-Use](https://github.com/CursorTouch/Windows-Use)!!, an agent built using Windows-MCP.
@@ -63,7 +63,7 @@ mcp-name: io.github.CursorTouch/Windows-MCP
63
63
  Easily adapt or extend tools to suit your unique automation or AI integration needs.
64
64
 
65
65
  - **Real-Time Interaction**
66
- Typical latency between actions (e.g., from one mouse click to the next) ranges from **0.4 to 1.2 secs**, and may slightly vary based on the number of active applications and system load, also the inferencing speed of the llm.
66
+ Typical latency between actions (e.g., from one mouse click to the next) ranges from **0.2 to 0.9 secs**, and may slightly vary based on the number of active applications and system load, also the inferencing speed of the llm.
67
67
 
68
68
  - **DOM Mode for Browser Automation**
69
69
  Special `use_dom=True` mode for State-Tool that focuses exclusively on web page content, filtering out browser UI elements for cleaner, more efficient web automation.
@@ -300,8 +300,7 @@ MCP Client can access the following tools to interact with Windows:
300
300
  - `Click`: Click on the screen at the given coordinates.
301
301
  - `Type`: Type text on an element (optionally clears existing text).
302
302
  - `Scroll`: Scroll vertically or horizontally on the window or specific regions.
303
- - `Drag`: Drag from one point to another.
304
- - `Move`: Move mouse pointer.
303
+ - `Move`: Move mouse pointer or drag (set drag=True) to coordinates.
305
304
  - `Shortcut`: Press keyboard shortcuts (`Ctrl+c`, `Alt+Tab`, etc).
306
305
  - `Wait`: Pause for a defined duration.
307
306
  - `Snapshot`: Combined snapshot of default language, browser, active apps and interactive, textual and scrollable elements along with screenshot of the desktop. Supports `use_dom=True` for browser content extraction (web page elements only) and `use_vision=True` for including screenshots.
Binary file
@@ -0,0 +1,114 @@
1
+ {
2
+ "manifest_version": "0.2",
3
+ "name": "Windows-MCP",
4
+ "version": "0.6.0",
5
+ "description": "MCP Server that enables Claude to interact with Windows OS",
6
+ "long_description": "Windows MCP is an open-source project that enables seamless integration between AI agents and the Windows operating system. Acting as an MCP server, it bridges the gap between large language models (LLMs) and the Windows OS, allowing agents to perform tasks such as **file navigation, application control, UI interaction, QA testing, and more**.\n\n**KEY FEATURES**\n- **Seamless Windows Integration**: Interacts natively with Windows UI elements, opens applications, controls windows, simulates user input, and more.\n- **Use Any LLM (Vision Optional)**: Does not rely on traditional computer vision techniques or fine-tuned models. Works with any LLM, reducing complexity and setup time.\n- **Rich Toolset for UI Automation**: Includes tools for keyboard and mouse control, window management, and capturing window or UI state.\n- **Lightweight & Open-Source**: Minimal dependencies with full source code available under the MIT license.\n- **Customizable & Extendable**: Easily adapt or extend tools to suit custom automation workflows or AI integrations.\n- **Real-Time Interaction**: Typical latency between actions ranges from `0.2` to `0.9` seconds, depending on system load, active applications, and LLM inference speed.\n\n**MINIMUM REQUIREMENTS**\n- Python 3.13 or higher\n- UV Package Manager\nThis MCP server requires UV, a fast Python package manager.\nInstallation:\n`curl -LsSf https://astral.sh/uv/install.sh | sh`\nFor detailed installation instructions, [see the UV documentation](https://github.com/astral-sh/uv)",
7
+ "author": {
8
+ "name": "CursorTouch",
9
+ "url": "https://cursortouch.com/"
10
+ },
11
+ "homepage": "https://cursortouch.com/",
12
+ "documentation": "https://github.com/CursorTouch/Windows-MCP",
13
+ "icon": "assets/logo.png",
14
+ "screenshots": [
15
+ "assets/screenshots/screenshot_1.png",
16
+ "assets/screenshots/screenshot_2.png",
17
+ "assets/screenshots/screenshot_3.png"
18
+ ],
19
+ "server": {
20
+ "type": "python",
21
+ "entry_point": "./src/windows_mcp/__main__.py",
22
+ "mcp_config": {
23
+ "command": "uv",
24
+ "args": [
25
+ "--directory",
26
+ "${__dirname}",
27
+ "run",
28
+ "windows-mcp"
29
+ ],
30
+ "env": {
31
+ "ANONYMIZED_TELEMETRY": "${user_config.anonymized_telemetry}"
32
+ }
33
+ }
34
+ },
35
+ "user_config": {
36
+ "anonymized_telemetry": {
37
+ "type": "boolean",
38
+ "title": "Anonymized Telemetry",
39
+ "description": "Windows-MCP collects basic usage data to help improve the MCP server. No personal information, tool arguments, or tool outputs are tracked.",
40
+ "required": false,
41
+ "default": true
42
+ }
43
+ },
44
+ "tools": [
45
+ {
46
+ "name": "App",
47
+ "description": "Manages Windows applications with three modes: 'launch' (opens the prescibed application), 'resize' (adjusts active window size/position), 'switch' (brings specific window into focus)."
48
+ },
49
+ {
50
+ "name": "Shell",
51
+ "description": "A comprehensive system tool for executing any PowerShell commands. Use it to navigate the file system, manage files and processes, and execute system-level operations. Capable of accessing web content, interacting with network resources, and performing complex administrative tasks."
52
+ },
53
+ {
54
+ "name": "Snapshot",
55
+ "description": "Captures complete desktop state including: system language, focused/opened windows, interactive elements (buttons, text fields, links, menus with coordinates), and scrollable areas. Set use_vision=True to include screenshot. Set use_dom=True for browser content to get web page elements instead of browser UI. Always call this first to understand the current desktop state before taking actions."
56
+ },
57
+ {
58
+ "name": "Click",
59
+ "description": "Performs mouse clicks at specified coordinates [x, y]. Supports button types: 'left' for selection/activation, 'right' for context menus, 'middle'. Supports clicks: 0=hover only (no click), 1=single click (select/focus), 2=double click (open/activate)."
60
+ },
61
+ {
62
+ "name": "Type",
63
+ "description": "Types text at specified coordinates [x, y]. Set clear=True to clear existing text first, False to append. Set press_enter=True to submit after typing. Set caret_position to 'start' (beginning), 'end' (end), or 'idle' (default)."
64
+ },
65
+ {
66
+ "name": "Scroll",
67
+ "description": "Scrolls at coordinates [x, y] or current mouse position if loc=None. Type: vertical (default) or horizontal. Direction: up/down for vertical, left/right for horizontal. wheel_times controls amount (1 wheel ≈ 3-5 lines). Use for navigating long content, lists, and web pages."
68
+ },
69
+ {
70
+ "name": "Move",
71
+ "description": "Moves mouse cursor to coordinates [x, y]. Set drag=True to perform a drag-and-drop operation from the current mouse position to the target coordinates. Default (drag=False) is a simple cursor move (hover)."
72
+ },
73
+ {
74
+ "name": "Shortcut",
75
+ "description": "Executes keyboard shortcuts using key combinations separated by +. Examples: \"ctrl+c\" (copy), \"ctrl+v\" (paste), \"alt+tab\" (switch apps), \"win+r\" (Run dialog), \"win\" (Start menu), \"ctrl+shift+esc\" (Task Manager). Use for quick actions and system commands."
76
+ },
77
+ {
78
+ "name": "Wait",
79
+ "description": "Pauses execution for specified duration in seconds. Use when waiting for: applications to launch/load, UI animations to complete, page content to render, dialogs to appear, or between rapid actions. Helps ensure UI is ready before next interaction."
80
+ },
81
+ {
82
+ "name": "Scrape",
83
+ "description": "Fetch content from a URL or the active browser tab. By default, performs a lightweight HTTP request to the URL. If you need to extract text from the active tab's DOM within the viewport, ensure the page is open in a browser and use Snapshot with use_dom=True first, then the agent will handle extraction."
84
+ },
85
+ {
86
+ "name": "MultiSelect",
87
+ "description": "Selects multiple items such as files, folders, or checkboxes if press_ctrl=True, or performs multiple clicks if False."
88
+ },
89
+ {
90
+ "name": "MultiEdit",
91
+ "description": "Enters text into multiple input fields at specified coordinates [[x,y,text], ...]."
92
+ }
93
+ ],
94
+ "compatibility": {
95
+ "platforms": [
96
+ "win32"
97
+ ],
98
+ "runtimes": {
99
+ "python": ">=3.13"
100
+ }
101
+ },
102
+ "keywords": [
103
+ "windows",
104
+ "automation",
105
+ "ai",
106
+ "mcp",
107
+ "computer-use"
108
+ ],
109
+ "license": "MIT",
110
+ "repository": {
111
+ "type": "git",
112
+ "url": "https://github.com/CursorTouch/Windows-MCP"
113
+ }
114
+ }
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "windows-mcp"
3
- version = "0.6.0"
3
+ version = "0.6.1"
4
4
  description = "Lightweight MCP Server for interacting with Windows Operating System."
5
5
  authors = [
6
6
  { name = "Jeomon George", email = "jeogeoalukka@gmail.com" }
@@ -56,7 +56,7 @@ mcp=FastMCP(name='windows-mcp',instructions=instructions,lifespan=lifespan)
56
56
 
57
57
  @mcp.tool(
58
58
  name="App",
59
- description="Manages Windows applications with three modes: 'launch' (start app by name), 'resize' (set window position/size using window_loc=[x,y] and window_size=[width,height]), 'switch' (activate app by name). Essential for application lifecycle management.",
59
+ description="Manages Windows applications with three modes: 'launch' (opens the prescibed application), 'resize' (adjusts active window size/position), 'switch' (brings specific window into focus).",
60
60
  annotations=ToolAnnotations(
61
61
  title="App",
62
62
  readOnlyHint=False,
@@ -87,7 +87,7 @@ def powershell_tool(command: str,timeout:int=10, ctx: Context = None) -> str:
87
87
 
88
88
  @mcp.tool(
89
89
  name='Snapshot',
90
- description='Captures complete desktop state including: system language, focused/opened apps, interactive elements (buttons, text fields, links, menus with coordinates), and scrollable areas. Set use_vision=True to include screenshot. Set use_dom=True for browser content to get web page elements instead of browser UI. Always call this first to understand the current desktop state before taking actions.',
90
+ description='Captures complete desktop state including: system language, focused/opened windows, interactive elements (buttons, text fields, links, menus with coordinates), and scrollable areas. Set use_vision=True to include screenshot. Set use_dom=True for browser content to get web page elements instead of browser UI. Always call this first to understand the current desktop state before taking actions.',
91
91
  annotations=ToolAnnotations(
92
92
  title="Snapshot",
93
93
  readOnlyHint=True,
@@ -106,14 +106,22 @@ def state_tool(use_vision:bool=False,use_dom:bool=False, ctx: Context = None):
106
106
  desktop_state=desktop.get_state(use_vision=use_vision,use_dom=use_dom,as_bytes=True,scale=scale)
107
107
  interactive_elements=desktop_state.tree_state.interactive_elements_to_string()
108
108
  scrollable_elements=desktop_state.tree_state.scrollable_elements_to_string()
109
- apps=desktop_state.apps_to_string()
110
- active_app=desktop_state.active_app_to_string()
111
- return [dedent(f'''
112
- Focused App:
113
- {active_app}
109
+ windows=desktop_state.windows_to_string()
110
+ active_window=desktop_state.active_window_to_string()
111
+ active_desktop=desktop_state.active_desktop_to_string()
112
+ all_desktops=desktop_state.desktops_to_string()
113
+ return [dedent(f'''
114
+ Active Desktop:
115
+ {active_desktop}
114
116
 
115
- Opened Apps:
116
- {apps}
117
+ All Desktops:
118
+ {all_desktops}
119
+
120
+ Focused Window:
121
+ {active_window}
122
+
123
+ Opened Windows:
124
+ {windows}
117
125
 
118
126
  List of Interactive Elements:
119
127
  {interactive_elements or 'No interactive elements found.'}
@@ -124,7 +132,7 @@ def state_tool(use_vision:bool=False,use_dom:bool=False, ctx: Context = None):
124
132
 
125
133
  @mcp.tool(
126
134
  name='Click',
127
- description='Performs mouse clicks at specified coordinates [x, y]. Supports button types: left (default), right (context menu), middle. Supports clicks: 1 (single), 2 (double), 3 (triple). Always use coordinates from State-Tool output to ensure accuracy.',
135
+ description="Performs mouse clicks at specified coordinates [x, y]. Supports button types: 'left' for selection/activation, 'right' for context menus, 'middle'. Supports clicks: 0=hover only (no click), 1=single click (select/focus), 2=double click (open/activate).",
128
136
  annotations=ToolAnnotations(
129
137
  title="Click",
130
138
  readOnlyHint=False,
@@ -139,12 +147,12 @@ def click_tool(loc:list[int],button:Literal['left','right','middle']='left',clic
139
147
  raise ValueError("Location must be a list of exactly 2 integers [x, y]")
140
148
  x,y=loc[0],loc[1]
141
149
  desktop.click(loc=loc,button=button,clicks=clicks)
142
- num_clicks={1:'Single',2:'Double',3:'Triple'}
150
+ num_clicks={0:'Hover',1:'Single',2:'Double'}
143
151
  return f'{num_clicks.get(clicks)} {button} clicked at ({x},{y}).'
144
152
 
145
153
  @mcp.tool(
146
154
  name='Type',
147
- description='Types text at specified coordinates [x, y]. Set clear=True to clear existing text first (Ctrl+A then type), clear=False to append. Set press_enter=True to submit after typing. Always click on the target input field first to ensure focus.',
155
+ description="Types text at specified coordinates [x, y]. Set clear=True to clear existing text first, False to append. Set press_enter=True to submit after typing. Set caret_position to 'start' (beginning), 'end' (end), or 'idle' (default).",
148
156
  annotations=ToolAnnotations(
149
157
  title="Type",
150
158
  readOnlyHint=False,
@@ -154,11 +162,11 @@ def click_tool(loc:list[int],button:Literal['left','right','middle']='left',clic
154
162
  )
155
163
  )
156
164
  @with_analytics(analytics, "Type-Tool")
157
- def type_tool(loc:list[int],text:str,clear:bool=False,press_enter:bool=False, ctx: Context = None)->str:
165
+ def type_tool(loc:list[int],text:str,clear:bool|str=False,caret_position:Literal['start', 'idle', 'end']='idle',press_enter:bool|str=False, ctx: Context = None)->str:
158
166
  if len(loc) != 2:
159
167
  raise ValueError("Location must be a list of exactly 2 integers [x, y]")
160
168
  x,y=loc[0],loc[1]
161
- desktop.type(loc=loc,text=text,clear=clear,press_enter=press_enter)
169
+ desktop.type(loc=loc,text=text,caret_position=caret_position,clear=clear,press_enter=press_enter)
162
170
  return f'Typed {text} at ({x},{y}).'
163
171
 
164
172
  @mcp.tool(
@@ -262,7 +270,41 @@ def scrape_tool(url:str,use_dom:bool=False, ctx: Context = None)->str:
262
270
  content='\n'.join([node.text for node in tree_state.dom_informative_nodes])
263
271
  header_status = "Reached top" if vertical_scroll_percent <= 0 else "Scroll up to see more"
264
272
  footer_status = "Reached bottom" if vertical_scroll_percent >= 100 else "Scroll down to see more"
265
- return f'URL:{url}\nContent:\n[{header_status}]\n{content}\n[{footer_status}]'
273
+ return f'URL:{url}\nContent:\n{header_status}\n{content}\n{footer_status}'
274
+
275
+ @mcp.tool(
276
+ name='MultiSelect',
277
+ description="Selects multiple items such as files, folders, or checkboxes if press_ctrl=True, or performs multiple clicks if False.",
278
+ annotations=ToolAnnotations(
279
+ title="MultiSelect",
280
+ readOnlyHint=False,
281
+ destructiveHint=True,
282
+ idempotentHint=False,
283
+ openWorldHint=False
284
+ )
285
+ )
286
+ @with_analytics(analytics, "Multi-Select-Tool")
287
+ def multi_select_tool(locs:list[list[int]], press_ctrl:bool=True, ctx: Context = None)->str:
288
+ desktop.multi_select(press_ctrl,locs)
289
+ elements_str = '\n'.join([f"({loc[0]},{loc[1]})" for loc in locs])
290
+ return f"Multi-selected elements at:\n{elements_str}"
291
+
292
+ @mcp.tool(
293
+ name='MultiEdit',
294
+ description="Enters text into multiple input fields at specified coordinates [[x,y,text], ...].",
295
+ annotations=ToolAnnotations(
296
+ title="MultiEdit",
297
+ readOnlyHint=False,
298
+ destructiveHint=True,
299
+ idempotentHint=False,
300
+ openWorldHint=False
301
+ )
302
+ )
303
+ @with_analytics(analytics, "Multi-Edit-Tool")
304
+ def multi_edit_tool(locs:list[list], ctx: Context = None)->str:
305
+ desktop.multi_edit(locs)
306
+ elements_str = ', '.join([f"({e[0]},{e[1]}) with text '{e[2]}'" for e in locs])
307
+ return f"Multi-edited elements at: {elements_str}"
266
308
 
267
309
 
268
310
  @click.command()