windows-mcp 0.5.9__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- windows_mcp/__main__.py +57 -15
- windows_mcp/desktop/service.py +182 -100
- windows_mcp/desktop/views.py +32 -15
- windows_mcp/tree/service.py +29 -29
- windows_mcp/tree/utils.py +21 -21
- windows_mcp/tree/views.py +9 -9
- windows_mcp/uia/controls.py +0 -42
- windows_mcp/uia/core.py +19 -929
- windows_mcp/vdm/core.py +147 -135
- {windows_mcp-0.5.9.dist-info → windows_mcp-0.6.1.dist-info}/METADATA +4 -5
- {windows_mcp-0.5.9.dist-info → windows_mcp-0.6.1.dist-info}/RECORD +14 -14
- {windows_mcp-0.5.9.dist-info → windows_mcp-0.6.1.dist-info}/WHEEL +0 -0
- {windows_mcp-0.5.9.dist-info → windows_mcp-0.6.1.dist-info}/entry_points.txt +0 -0
- {windows_mcp-0.5.9.dist-info → windows_mcp-0.6.1.dist-info}/licenses/LICENSE.md +0 -0
windows_mcp/__main__.py
CHANGED
|
@@ -56,7 +56,7 @@ mcp=FastMCP(name='windows-mcp',instructions=instructions,lifespan=lifespan)
|
|
|
56
56
|
|
|
57
57
|
@mcp.tool(
|
|
58
58
|
name="App",
|
|
59
|
-
description="Manages Windows applications with three modes: 'launch' (
|
|
59
|
+
description="Manages Windows applications with three modes: 'launch' (opens the prescibed application), 'resize' (adjusts active window size/position), 'switch' (brings specific window into focus).",
|
|
60
60
|
annotations=ToolAnnotations(
|
|
61
61
|
title="App",
|
|
62
62
|
readOnlyHint=False,
|
|
@@ -87,7 +87,7 @@ def powershell_tool(command: str,timeout:int=10, ctx: Context = None) -> str:
|
|
|
87
87
|
|
|
88
88
|
@mcp.tool(
|
|
89
89
|
name='Snapshot',
|
|
90
|
-
description='Captures complete desktop state including: system language, focused/opened
|
|
90
|
+
description='Captures complete desktop state including: system language, focused/opened windows, interactive elements (buttons, text fields, links, menus with coordinates), and scrollable areas. Set use_vision=True to include screenshot. Set use_dom=True for browser content to get web page elements instead of browser UI. Always call this first to understand the current desktop state before taking actions.',
|
|
91
91
|
annotations=ToolAnnotations(
|
|
92
92
|
title="Snapshot",
|
|
93
93
|
readOnlyHint=True,
|
|
@@ -106,14 +106,22 @@ def state_tool(use_vision:bool=False,use_dom:bool=False, ctx: Context = None):
|
|
|
106
106
|
desktop_state=desktop.get_state(use_vision=use_vision,use_dom=use_dom,as_bytes=True,scale=scale)
|
|
107
107
|
interactive_elements=desktop_state.tree_state.interactive_elements_to_string()
|
|
108
108
|
scrollable_elements=desktop_state.tree_state.scrollable_elements_to_string()
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
109
|
+
windows=desktop_state.windows_to_string()
|
|
110
|
+
active_window=desktop_state.active_window_to_string()
|
|
111
|
+
active_desktop=desktop_state.active_desktop_to_string()
|
|
112
|
+
all_desktops=desktop_state.desktops_to_string()
|
|
113
|
+
return [dedent(f'''
|
|
114
|
+
Active Desktop:
|
|
115
|
+
{active_desktop}
|
|
114
116
|
|
|
115
|
-
|
|
116
|
-
{
|
|
117
|
+
All Desktops:
|
|
118
|
+
{all_desktops}
|
|
119
|
+
|
|
120
|
+
Focused Window:
|
|
121
|
+
{active_window}
|
|
122
|
+
|
|
123
|
+
Opened Windows:
|
|
124
|
+
{windows}
|
|
117
125
|
|
|
118
126
|
List of Interactive Elements:
|
|
119
127
|
{interactive_elements or 'No interactive elements found.'}
|
|
@@ -124,7 +132,7 @@ def state_tool(use_vision:bool=False,use_dom:bool=False, ctx: Context = None):
|
|
|
124
132
|
|
|
125
133
|
@mcp.tool(
|
|
126
134
|
name='Click',
|
|
127
|
-
description=
|
|
135
|
+
description="Performs mouse clicks at specified coordinates [x, y]. Supports button types: 'left' for selection/activation, 'right' for context menus, 'middle'. Supports clicks: 0=hover only (no click), 1=single click (select/focus), 2=double click (open/activate).",
|
|
128
136
|
annotations=ToolAnnotations(
|
|
129
137
|
title="Click",
|
|
130
138
|
readOnlyHint=False,
|
|
@@ -139,12 +147,12 @@ def click_tool(loc:list[int],button:Literal['left','right','middle']='left',clic
|
|
|
139
147
|
raise ValueError("Location must be a list of exactly 2 integers [x, y]")
|
|
140
148
|
x,y=loc[0],loc[1]
|
|
141
149
|
desktop.click(loc=loc,button=button,clicks=clicks)
|
|
142
|
-
num_clicks={1:'Single',2:'Double'
|
|
150
|
+
num_clicks={0:'Hover',1:'Single',2:'Double'}
|
|
143
151
|
return f'{num_clicks.get(clicks)} {button} clicked at ({x},{y}).'
|
|
144
152
|
|
|
145
153
|
@mcp.tool(
|
|
146
154
|
name='Type',
|
|
147
|
-
description=
|
|
155
|
+
description="Types text at specified coordinates [x, y]. Set clear=True to clear existing text first, False to append. Set press_enter=True to submit after typing. Set caret_position to 'start' (beginning), 'end' (end), or 'idle' (default).",
|
|
148
156
|
annotations=ToolAnnotations(
|
|
149
157
|
title="Type",
|
|
150
158
|
readOnlyHint=False,
|
|
@@ -154,11 +162,11 @@ def click_tool(loc:list[int],button:Literal['left','right','middle']='left',clic
|
|
|
154
162
|
)
|
|
155
163
|
)
|
|
156
164
|
@with_analytics(analytics, "Type-Tool")
|
|
157
|
-
def type_tool(loc:list[int],text:str,clear:bool=False,press_enter:bool=False, ctx: Context = None)->str:
|
|
165
|
+
def type_tool(loc:list[int],text:str,clear:bool|str=False,caret_position:Literal['start', 'idle', 'end']='idle',press_enter:bool|str=False, ctx: Context = None)->str:
|
|
158
166
|
if len(loc) != 2:
|
|
159
167
|
raise ValueError("Location must be a list of exactly 2 integers [x, y]")
|
|
160
168
|
x,y=loc[0],loc[1]
|
|
161
|
-
desktop.type(loc=loc,text=text,clear=clear,press_enter=press_enter)
|
|
169
|
+
desktop.type(loc=loc,text=text,caret_position=caret_position,clear=clear,press_enter=press_enter)
|
|
162
170
|
return f'Typed {text} at ({x},{y}).'
|
|
163
171
|
|
|
164
172
|
@mcp.tool(
|
|
@@ -262,7 +270,41 @@ def scrape_tool(url:str,use_dom:bool=False, ctx: Context = None)->str:
|
|
|
262
270
|
content='\n'.join([node.text for node in tree_state.dom_informative_nodes])
|
|
263
271
|
header_status = "Reached top" if vertical_scroll_percent <= 0 else "Scroll up to see more"
|
|
264
272
|
footer_status = "Reached bottom" if vertical_scroll_percent >= 100 else "Scroll down to see more"
|
|
265
|
-
return f'URL:{url}\nContent:\n
|
|
273
|
+
return f'URL:{url}\nContent:\n{header_status}\n{content}\n{footer_status}'
|
|
274
|
+
|
|
275
|
+
@mcp.tool(
|
|
276
|
+
name='MultiSelect',
|
|
277
|
+
description="Selects multiple items such as files, folders, or checkboxes if press_ctrl=True, or performs multiple clicks if False.",
|
|
278
|
+
annotations=ToolAnnotations(
|
|
279
|
+
title="MultiSelect",
|
|
280
|
+
readOnlyHint=False,
|
|
281
|
+
destructiveHint=True,
|
|
282
|
+
idempotentHint=False,
|
|
283
|
+
openWorldHint=False
|
|
284
|
+
)
|
|
285
|
+
)
|
|
286
|
+
@with_analytics(analytics, "Multi-Select-Tool")
|
|
287
|
+
def multi_select_tool(locs:list[list[int]], press_ctrl:bool=True, ctx: Context = None)->str:
|
|
288
|
+
desktop.multi_select(press_ctrl,locs)
|
|
289
|
+
elements_str = '\n'.join([f"({loc[0]},{loc[1]})" for loc in locs])
|
|
290
|
+
return f"Multi-selected elements at:\n{elements_str}"
|
|
291
|
+
|
|
292
|
+
@mcp.tool(
|
|
293
|
+
name='MultiEdit',
|
|
294
|
+
description="Enters text into multiple input fields at specified coordinates [[x,y,text], ...].",
|
|
295
|
+
annotations=ToolAnnotations(
|
|
296
|
+
title="MultiEdit",
|
|
297
|
+
readOnlyHint=False,
|
|
298
|
+
destructiveHint=True,
|
|
299
|
+
idempotentHint=False,
|
|
300
|
+
openWorldHint=False
|
|
301
|
+
)
|
|
302
|
+
)
|
|
303
|
+
@with_analytics(analytics, "Multi-Edit-Tool")
|
|
304
|
+
def multi_edit_tool(locs:list[list], ctx: Context = None)->str:
|
|
305
|
+
desktop.multi_edit(locs)
|
|
306
|
+
elements_str = ', '.join([f"({e[0]},{e[1]}) with text '{e[2]}'" for e in locs])
|
|
307
|
+
return f"Multi-edited elements at: {elements_str}"
|
|
266
308
|
|
|
267
309
|
|
|
268
310
|
@click.command()
|
windows_mcp/desktop/service.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
from windows_mcp.
|
|
2
|
-
from windows_mcp.desktop.views import DesktopState,
|
|
1
|
+
from windows_mcp.vdm.core import get_all_desktops, get_current_desktop, is_window_on_current_desktop
|
|
2
|
+
from windows_mcp.desktop.views import DesktopState, Window, Browser, Status, Size
|
|
3
|
+
from windows_mcp.desktop.config import PROCESS_PER_MONITOR_DPI_AWARE
|
|
3
4
|
from windows_mcp.tree.views import BoundingBox, TreeElementNode
|
|
4
5
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
6
|
from PIL import ImageGrab, ImageFont, ImageDraw, Image
|
|
@@ -46,24 +47,30 @@ class Desktop:
|
|
|
46
47
|
self.desktop_state=None
|
|
47
48
|
|
|
48
49
|
def get_state(self,use_annotation:bool=True,use_vision:bool=False,use_dom:bool=False,as_bytes:bool=False,scale:float=1.0)->DesktopState:
|
|
49
|
-
sleep(0.1)
|
|
50
50
|
start_time = time()
|
|
51
51
|
|
|
52
52
|
controls_handles=self.get_controls_handles() # Taskbar,Program Manager,Apps, Dialogs
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
53
|
+
windows,windows_handles=self.get_windows(controls_handles=controls_handles) # Apps
|
|
54
|
+
active_window=self.get_active_window(windows=windows) #Active Window
|
|
55
|
+
active_window_handle=active_window.handle if active_window else None
|
|
56
56
|
|
|
57
|
-
|
|
58
|
-
|
|
57
|
+
try:
|
|
58
|
+
active_desktop=get_current_desktop()
|
|
59
|
+
all_desktops=get_all_desktops()
|
|
60
|
+
except RuntimeError:
|
|
61
|
+
active_desktop = {'id': '00000000-0000-0000-0000-000000000000', 'name': 'Default Desktop'}
|
|
62
|
+
all_desktops = [active_desktop]
|
|
63
|
+
|
|
64
|
+
if active_window is not None and active_window in windows:
|
|
65
|
+
windows.remove(active_window)
|
|
59
66
|
|
|
60
|
-
logger.debug(f"Active
|
|
61
|
-
logger.debug(f"
|
|
67
|
+
logger.debug(f"Active window: {active_window or 'No Active Window Found'}")
|
|
68
|
+
logger.debug(f"Windows: {windows}")
|
|
62
69
|
|
|
63
70
|
#Preparing handles for Tree
|
|
64
|
-
|
|
71
|
+
other_windows_handles=list(controls_handles-windows_handles)
|
|
65
72
|
|
|
66
|
-
tree_state=self.tree.get_state(
|
|
73
|
+
tree_state=self.tree.get_state(active_window_handle,other_windows_handles,use_dom=use_dom)
|
|
67
74
|
|
|
68
75
|
if use_vision:
|
|
69
76
|
if use_annotation:
|
|
@@ -83,13 +90,20 @@ class Desktop:
|
|
|
83
90
|
else:
|
|
84
91
|
screenshot=None
|
|
85
92
|
|
|
86
|
-
self.desktop_state=DesktopState(
|
|
93
|
+
self.desktop_state=DesktopState(
|
|
94
|
+
active_window=active_window,
|
|
95
|
+
windows=windows,
|
|
96
|
+
active_desktop=active_desktop,
|
|
97
|
+
all_desktops=all_desktops,
|
|
98
|
+
screenshot=screenshot,
|
|
99
|
+
tree_state=tree_state
|
|
100
|
+
)
|
|
87
101
|
# Log the time taken to capture the state
|
|
88
102
|
end_time = time()
|
|
89
103
|
logger.info(f"Desktop State capture took {end_time - start_time:.2f} seconds")
|
|
90
104
|
return self.desktop_state
|
|
91
105
|
|
|
92
|
-
def
|
|
106
|
+
def get_window_status(self,control:uia.Control)->Status:
|
|
93
107
|
if uia.IsIconic(control.NativeWindowHandle):
|
|
94
108
|
return Status.MINIMIZED
|
|
95
109
|
elif uia.IsZoomed(control.NativeWindowHandle):
|
|
@@ -117,8 +131,8 @@ class Desktop:
|
|
|
117
131
|
try:
|
|
118
132
|
reader = csv.DictReader(io.StringIO(apps_info.strip()))
|
|
119
133
|
return {
|
|
120
|
-
row.get('Name').lower(): row.get('AppID')
|
|
121
|
-
for row in reader
|
|
134
|
+
row.get('Name', '').lower(): row.get('AppID', '')
|
|
135
|
+
for row in reader
|
|
122
136
|
if row.get('Name') and row.get('AppID')
|
|
123
137
|
}
|
|
124
138
|
except Exception as e:
|
|
@@ -129,10 +143,11 @@ class Desktop:
|
|
|
129
143
|
try:
|
|
130
144
|
encoded = base64.b64encode(command.encode("utf-16le")).decode("ascii")
|
|
131
145
|
result = subprocess.run(
|
|
132
|
-
['powershell', '-NoProfile', '-EncodedCommand', encoded],
|
|
146
|
+
['powershell', '-NoProfile', '-OutputFormat', 'Text', '-EncodedCommand', encoded],
|
|
133
147
|
capture_output=True, # No errors='ignore' - let subprocess return bytes
|
|
134
148
|
timeout=timeout,
|
|
135
|
-
cwd=os.path.expanduser(path='~')
|
|
149
|
+
cwd=os.path.expanduser(path='~'),
|
|
150
|
+
env=os.environ.copy() # Inherit environment variables including PATH
|
|
136
151
|
)
|
|
137
152
|
# Handle both bytes and str output (subprocess behavior varies by environment)
|
|
138
153
|
stdout = result.stdout
|
|
@@ -147,11 +162,11 @@ class Desktop:
|
|
|
147
162
|
except Exception as e:
|
|
148
163
|
return (f'Command execution failed: {type(e).__name__}: {e}', 1)
|
|
149
164
|
|
|
150
|
-
def
|
|
165
|
+
def is_window_browser(self,node:uia.Control):
|
|
151
166
|
'''Give any node of the app and it will return True if the app is a browser, False otherwise.'''
|
|
152
167
|
try:
|
|
153
168
|
process=Process(node.ProcessId)
|
|
154
|
-
return process.name()
|
|
169
|
+
return Browser.has_process(process.name())
|
|
155
170
|
except:
|
|
156
171
|
return False
|
|
157
172
|
|
|
@@ -162,32 +177,32 @@ class Desktop:
|
|
|
162
177
|
return "".join([row.get('DisplayName') for row in reader])
|
|
163
178
|
|
|
164
179
|
def resize_app(self,size:tuple[int,int]=None,loc:tuple[int,int]=None)->tuple[str,int]:
|
|
165
|
-
|
|
166
|
-
if
|
|
167
|
-
return "No active
|
|
168
|
-
if
|
|
169
|
-
return f"{
|
|
170
|
-
elif
|
|
171
|
-
return f"{
|
|
180
|
+
active_window=self.desktop_state.active_window
|
|
181
|
+
if active_window is None:
|
|
182
|
+
return "No active window found",1
|
|
183
|
+
if active_window.status==Status.MINIMIZED:
|
|
184
|
+
return f"{active_window.name} is minimized",1
|
|
185
|
+
elif active_window.status==Status.MAXIMIZED:
|
|
186
|
+
return f"{active_window.name} is maximized",1
|
|
172
187
|
else:
|
|
173
|
-
|
|
188
|
+
window_control=uia.ControlFromHandle(active_window.handle)
|
|
174
189
|
if loc is None:
|
|
175
|
-
x=
|
|
176
|
-
y=
|
|
190
|
+
x=window_control.BoundingRectangle.left
|
|
191
|
+
y=window_control.BoundingRectangle.top
|
|
177
192
|
loc=(x,y)
|
|
178
193
|
if size is None:
|
|
179
|
-
width=
|
|
180
|
-
height=
|
|
194
|
+
width=window_control.BoundingRectangle.width()
|
|
195
|
+
height=window_control.BoundingRectangle.height()
|
|
181
196
|
size=(width,height)
|
|
182
197
|
x,y=loc
|
|
183
198
|
width,height=size
|
|
184
|
-
|
|
185
|
-
return (f'{
|
|
199
|
+
window_control.MoveWindow(x,y,width,height)
|
|
200
|
+
return (f'{active_window.name} resized to {width}x{height} at {x},{y}.',0)
|
|
186
201
|
|
|
187
202
|
def is_app_running(self,name:str)->bool:
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
return process.extractOne(name,list(
|
|
203
|
+
windows, _ = self.get_windows()
|
|
204
|
+
windows_dict = {window.name: window for window in windows}
|
|
205
|
+
return process.extractOne(name,list(windows_dict.keys()),score_cutoff=60) is not None
|
|
191
206
|
|
|
192
207
|
def app(self,mode:Literal['launch','switch','resize'],name:Optional[str]=None,loc:Optional[tuple[int,int]]=None,size:Optional[tuple[int,int]]=None):
|
|
193
208
|
match mode:
|
|
@@ -207,7 +222,7 @@ class Desktop:
|
|
|
207
222
|
safe_name = re.escape(name)
|
|
208
223
|
if uia.WindowControl(RegexName=f'(?i).*{safe_name}.*').Exists(maxSearchSeconds=10):
|
|
209
224
|
launched = True
|
|
210
|
-
|
|
225
|
+
|
|
211
226
|
if launched:
|
|
212
227
|
return f'{name.title()} launched.'
|
|
213
228
|
return f'Launching {name.title()} sent, but window not detected yet.'
|
|
@@ -232,37 +247,41 @@ class Desktop:
|
|
|
232
247
|
app_name,_=matched_app
|
|
233
248
|
appid=apps_map.get(app_name)
|
|
234
249
|
if appid is None:
|
|
235
|
-
return (
|
|
250
|
+
return (f'{name.title()} not found in start menu.',1,0)
|
|
236
251
|
|
|
237
252
|
pid = 0
|
|
238
253
|
if os.path.exists(appid) or "\\" in appid:
|
|
239
254
|
# It's a file path, we can try to get the PID using PassThru
|
|
240
|
-
|
|
255
|
+
# Escape any single quotes and wrap in single quotes for PowerShell safety
|
|
256
|
+
safe_appid = appid.replace("'", "''")
|
|
257
|
+
command = f"Start-Process '{safe_appid}' -PassThru | Select-Object -ExpandProperty Id"
|
|
241
258
|
response, status = self.execute_command(command)
|
|
242
259
|
if status == 0 and response.strip().isdigit():
|
|
243
260
|
pid = int(response.strip())
|
|
244
261
|
else:
|
|
245
|
-
# It's an AUMID (Store App)
|
|
262
|
+
# It's an AUMID (Store App) - validate it only contains expected characters
|
|
263
|
+
if not appid.replace('\\', '').replace('_', '').replace('.', '').replace('-', '').isalnum():
|
|
264
|
+
return (f'Invalid app identifier: {appid}', 1, 0)
|
|
246
265
|
command = f'Start-Process "shell:AppsFolder\\{appid}"'
|
|
247
266
|
response, status = self.execute_command(command)
|
|
248
267
|
|
|
249
268
|
return response, status, pid
|
|
250
269
|
|
|
251
270
|
def switch_app(self,name:str):
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
if
|
|
271
|
+
windows={window.name:window for window in [self.desktop_state.active_window]+self.desktop_state.windows if window is not None}
|
|
272
|
+
matched_window:Optional[tuple[str,float]]=process.extractOne(name,list(windows.keys()),score_cutoff=70)
|
|
273
|
+
if matched_window is None:
|
|
255
274
|
return (f'Application {name.title()} not found.',1)
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
target_handle=
|
|
275
|
+
window_name,_=matched_window
|
|
276
|
+
window=windows.get(window_name)
|
|
277
|
+
target_handle=window.handle
|
|
259
278
|
|
|
260
279
|
if uia.IsIconic(target_handle):
|
|
261
280
|
uia.ShowWindow(target_handle, win32con.SW_RESTORE)
|
|
262
|
-
content=f'{
|
|
281
|
+
content=f'{window_name.title()} restored from Minimized state.'
|
|
263
282
|
else:
|
|
264
283
|
self.bring_window_to_top(target_handle)
|
|
265
|
-
content=f'Switched to {
|
|
284
|
+
content=f'Switched to {window_name.title()} window.'
|
|
266
285
|
return content,0
|
|
267
286
|
|
|
268
287
|
def bring_window_to_top(self, target_handle: int):
|
|
@@ -274,6 +293,14 @@ class Desktop:
|
|
|
274
293
|
win32gui.ShowWindow(target_handle, win32con.SW_RESTORE)
|
|
275
294
|
|
|
276
295
|
foreground_handle = win32gui.GetForegroundWindow()
|
|
296
|
+
|
|
297
|
+
# Validate both handles before proceeding
|
|
298
|
+
if not win32gui.IsWindow(foreground_handle):
|
|
299
|
+
# No valid foreground window, just try to set target as foreground
|
|
300
|
+
win32gui.SetForegroundWindow(target_handle)
|
|
301
|
+
win32gui.BringWindowToTop(target_handle)
|
|
302
|
+
return
|
|
303
|
+
|
|
277
304
|
foreground_thread, _ = win32process.GetWindowThreadProcessId(foreground_handle)
|
|
278
305
|
target_thread, _ = win32process.GetWindowThreadProcessId(target_handle)
|
|
279
306
|
|
|
@@ -322,7 +349,7 @@ class Desktop:
|
|
|
322
349
|
x,y=loc
|
|
323
350
|
pg.click(x,y,button=button,clicks=clicks,duration=0.1)
|
|
324
351
|
|
|
325
|
-
def type(self,loc:tuple[int,int],text:str,caret_position:Literal['start','
|
|
352
|
+
def type(self,loc:tuple[int,int],text:str,caret_position:Literal['start', 'idle', 'end']='idle',clear:bool|str=False,press_enter:bool|str=False):
|
|
326
353
|
x,y=loc
|
|
327
354
|
pg.leftClick(x,y)
|
|
328
355
|
if caret_position == 'start':
|
|
@@ -331,12 +358,16 @@ class Desktop:
|
|
|
331
358
|
pg.press('end')
|
|
332
359
|
else:
|
|
333
360
|
pass
|
|
334
|
-
|
|
361
|
+
|
|
362
|
+
# Handle both boolean and string 'true'/'false'
|
|
363
|
+
if clear is True or (isinstance(clear, str) and clear.lower() == 'true'):
|
|
335
364
|
pg.sleep(0.5)
|
|
336
365
|
pg.hotkey('ctrl','a')
|
|
337
366
|
pg.press('backspace')
|
|
367
|
+
|
|
338
368
|
pg.typewrite(text,interval=0.02)
|
|
339
|
-
|
|
369
|
+
|
|
370
|
+
if press_enter is True or (isinstance(press_enter, str) and press_enter.lower() == 'true'):
|
|
340
371
|
pg.press('enter')
|
|
341
372
|
|
|
342
373
|
def scroll(self,loc:tuple[int,int]=None,type:Literal['horizontal','vertical']='vertical',direction:Literal['up','down','left','right']='down',wheel_times:int=1)->str|None:
|
|
@@ -387,19 +418,19 @@ class Desktop:
|
|
|
387
418
|
else:
|
|
388
419
|
pg.press(''.join(shortcut))
|
|
389
420
|
|
|
390
|
-
def multi_select(self,press_ctrl:
|
|
391
|
-
if press_ctrl
|
|
421
|
+
def multi_select(self,press_ctrl:bool=False,locs:list[tuple[int,int]]=[]):
|
|
422
|
+
if press_ctrl:
|
|
392
423
|
pg.keyDown('ctrl')
|
|
393
|
-
for
|
|
394
|
-
x,y=
|
|
424
|
+
for loc in locs:
|
|
425
|
+
x,y=loc
|
|
395
426
|
pg.click(x,y,duration=0.2)
|
|
396
427
|
pg.sleep(0.5)
|
|
397
428
|
pg.keyUp('ctrl')
|
|
398
429
|
|
|
399
|
-
def multi_edit(self,
|
|
400
|
-
for
|
|
401
|
-
x,y,text=
|
|
402
|
-
self.type((x,y),text=text,clear=
|
|
430
|
+
def multi_edit(self,locs:list[tuple[int,int,str]]):
|
|
431
|
+
for loc in locs:
|
|
432
|
+
x,y,text=loc
|
|
433
|
+
self.type((x,y),text=text,clear=True)
|
|
403
434
|
|
|
404
435
|
def scrape(self,url:str)->str:
|
|
405
436
|
response=requests.get(url,timeout=10)
|
|
@@ -407,56 +438,105 @@ class Desktop:
|
|
|
407
438
|
content=markdownify(html=html)
|
|
408
439
|
return content
|
|
409
440
|
|
|
410
|
-
def
|
|
441
|
+
def get_window_from_element(self,element:uia.Control)->Window|None:
|
|
411
442
|
if element is None:
|
|
412
443
|
return None
|
|
413
444
|
top_window=element.GetTopLevelControl()
|
|
414
445
|
if top_window is None:
|
|
415
446
|
return None
|
|
416
447
|
handle=top_window.NativeWindowHandle
|
|
417
|
-
|
|
418
|
-
for
|
|
419
|
-
if
|
|
420
|
-
return
|
|
448
|
+
windows,_=self.get_windows()
|
|
449
|
+
for window in windows:
|
|
450
|
+
if window.handle==handle:
|
|
451
|
+
return window
|
|
421
452
|
return None
|
|
422
453
|
|
|
423
|
-
def
|
|
424
|
-
is_minimized=self.
|
|
425
|
-
size=
|
|
454
|
+
def is_window_visible(self,window:uia.Control)->bool:
|
|
455
|
+
is_minimized=self.get_window_status(window)!=Status.MINIMIZED
|
|
456
|
+
size=window.BoundingRectangle
|
|
426
457
|
area=size.width()*size.height()
|
|
427
|
-
is_overlay=self.
|
|
458
|
+
is_overlay=self.is_overlay_window(window)
|
|
428
459
|
return not is_overlay and is_minimized and area>10
|
|
429
460
|
|
|
430
|
-
def
|
|
461
|
+
def is_overlay_window(self,element:uia.Control) -> bool:
|
|
431
462
|
no_children = len(element.GetChildren()) == 0
|
|
432
463
|
is_name = "Overlay" in element.Name.strip()
|
|
433
464
|
return no_children or is_name
|
|
434
465
|
|
|
435
|
-
def get_controls_handles(self):
|
|
466
|
+
def get_controls_handles(self,optimized:bool=False):
|
|
436
467
|
handles = set()
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
468
|
+
# For even more faster results (still under development)
|
|
469
|
+
def callback(hwnd, _):
|
|
470
|
+
try:
|
|
471
|
+
# Validate handle before checking properties
|
|
472
|
+
if win32gui.IsWindow(hwnd) and win32gui.IsWindowVisible(hwnd) and is_window_on_current_desktop(hwnd):
|
|
473
|
+
handles.add(hwnd)
|
|
474
|
+
except Exception:
|
|
475
|
+
# Skip invalid handles without logging (common during window enumeration)
|
|
476
|
+
pass
|
|
477
|
+
|
|
478
|
+
win32gui.EnumWindows(callback, None)
|
|
479
|
+
|
|
480
|
+
if desktop_hwnd:= win32gui.FindWindow('Progman',None):
|
|
481
|
+
handles.add(desktop_hwnd)
|
|
482
|
+
if taskbar_hwnd:= win32gui.FindWindow('Shell_TrayWnd',None):
|
|
483
|
+
handles.add(taskbar_hwnd)
|
|
484
|
+
if secondary_taskbar_hwnd:= win32gui.FindWindow('Shell_SecondaryTrayWnd',None):
|
|
485
|
+
handles.add(secondary_taskbar_hwnd)
|
|
441
486
|
return handles
|
|
442
487
|
|
|
443
|
-
def
|
|
488
|
+
def get_active_window(self,windows:list[Window]|None=None)->Window|None:
|
|
444
489
|
try:
|
|
445
|
-
if
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
490
|
+
if windows is None:
|
|
491
|
+
windows,_=self.get_windows()
|
|
492
|
+
active_window=self.get_foreground_window()
|
|
493
|
+
if active_window.ClassName=="Progman":
|
|
494
|
+
return None
|
|
495
|
+
active_window_handle=active_window.NativeWindowHandle
|
|
496
|
+
for window in windows:
|
|
497
|
+
if window.handle!=active_window_handle:
|
|
450
498
|
continue
|
|
451
|
-
return
|
|
499
|
+
return window
|
|
500
|
+
# In case active window is not present in the windows list
|
|
501
|
+
return Window(**{
|
|
502
|
+
"name":active_window.Name,
|
|
503
|
+
"is_browser":self.is_window_browser(active_window),
|
|
504
|
+
"depth":0,
|
|
505
|
+
"bounding_box":BoundingBox(
|
|
506
|
+
left=active_window.BoundingRectangle.left,
|
|
507
|
+
top=active_window.BoundingRectangle.top,
|
|
508
|
+
right=active_window.BoundingRectangle.right,
|
|
509
|
+
bottom=active_window.BoundingRectangle.bottom,
|
|
510
|
+
width=active_window.BoundingRectangle.width(),
|
|
511
|
+
height=active_window.BoundingRectangle.height()
|
|
512
|
+
),
|
|
513
|
+
"status":self.get_window_status(active_window),
|
|
514
|
+
"handle":active_window_handle,
|
|
515
|
+
"process_id":active_window.ProcessId,
|
|
516
|
+
})
|
|
452
517
|
except Exception as ex:
|
|
453
|
-
logger.error(f"Error in
|
|
518
|
+
logger.error(f"Error in get_active_window: {ex}")
|
|
454
519
|
return None
|
|
520
|
+
|
|
521
|
+
def get_foreground_window(self)->uia.Control:
|
|
522
|
+
handle=uia.GetForegroundWindow()
|
|
523
|
+
active_window=self.get_window_from_element_handle(handle)
|
|
524
|
+
return active_window
|
|
525
|
+
|
|
526
|
+
def get_window_from_element_handle(self, element_handle: int) -> uia.Control:
|
|
527
|
+
current = uia.ControlFromHandle(element_handle)
|
|
528
|
+
root_handle = uia.GetRootControl().NativeWindowHandle
|
|
455
529
|
|
|
456
|
-
|
|
530
|
+
while True:
|
|
531
|
+
parent = current.GetParentControl()
|
|
532
|
+
if parent is None or parent.NativeWindowHandle == root_handle:
|
|
533
|
+
return current
|
|
534
|
+
current = parent
|
|
535
|
+
|
|
536
|
+
def get_windows(self,controls_handles:set[int]|None=None) -> tuple[list[Window],set[int]]:
|
|
457
537
|
try:
|
|
458
|
-
|
|
459
|
-
|
|
538
|
+
windows = []
|
|
539
|
+
window_handles = set()
|
|
460
540
|
controls_handles=controls_handles or self.get_controls_handles()
|
|
461
541
|
for depth, hwnd in enumerate(controls_handles):
|
|
462
542
|
try:
|
|
@@ -465,7 +545,7 @@ class Desktop:
|
|
|
465
545
|
continue
|
|
466
546
|
|
|
467
547
|
# Filter out Overlays (e.g. NVIDIA, Steam)
|
|
468
|
-
if self.
|
|
548
|
+
if self.is_overlay_window(child):
|
|
469
549
|
continue
|
|
470
550
|
|
|
471
551
|
if isinstance(child,(uia.WindowControl,uia.PaneControl)):
|
|
@@ -474,15 +554,14 @@ class Desktop:
|
|
|
474
554
|
continue
|
|
475
555
|
|
|
476
556
|
if window_pattern.CanMinimize and window_pattern.CanMaximize:
|
|
477
|
-
status = self.
|
|
557
|
+
status = self.get_window_status(child)
|
|
478
558
|
|
|
479
559
|
bounding_rect=child.BoundingRectangle
|
|
480
560
|
if bounding_rect.isempty() and status!=Status.MINIMIZED:
|
|
481
561
|
continue
|
|
482
562
|
|
|
483
|
-
|
|
563
|
+
windows.append(Window(**{
|
|
484
564
|
"name":child.Name,
|
|
485
|
-
"runtime_id":tuple(child.GetRuntimeId()),
|
|
486
565
|
"depth":depth,
|
|
487
566
|
"status":status,
|
|
488
567
|
"bounding_box":BoundingBox(
|
|
@@ -495,13 +574,13 @@ class Desktop:
|
|
|
495
574
|
),
|
|
496
575
|
"handle":child.NativeWindowHandle,
|
|
497
576
|
"process_id":child.ProcessId,
|
|
498
|
-
"is_browser":self.
|
|
577
|
+
"is_browser":self.is_window_browser(child)
|
|
499
578
|
}))
|
|
500
|
-
|
|
579
|
+
window_handles.add(child.NativeWindowHandle)
|
|
501
580
|
except Exception as ex:
|
|
502
|
-
logger.error(f"Error in
|
|
503
|
-
|
|
504
|
-
return
|
|
581
|
+
logger.error(f"Error in get_windows: {ex}")
|
|
582
|
+
windows = []
|
|
583
|
+
return windows,window_handles
|
|
505
584
|
|
|
506
585
|
def get_xpath_from_element(self,element:uia.Control):
|
|
507
586
|
current=element
|
|
@@ -556,9 +635,13 @@ class Desktop:
|
|
|
556
635
|
return "Local Account" if response.strip()=='Local' else "Microsoft Account" if status==0 else "Local Account"
|
|
557
636
|
|
|
558
637
|
def get_dpi_scaling(self):
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
638
|
+
try:
|
|
639
|
+
user32 = ctypes.windll.user32
|
|
640
|
+
dpi = user32.GetDpiForSystem()
|
|
641
|
+
return dpi / 96.0 if dpi > 0 else 1.0
|
|
642
|
+
except Exception:
|
|
643
|
+
# Fallback to standard DPI if system call fails
|
|
644
|
+
return 1.0
|
|
562
645
|
|
|
563
646
|
def get_screen_size(self)->Size:
|
|
564
647
|
width, height = uia.GetVirtualScreenSize()
|
|
@@ -568,12 +651,11 @@ class Desktop:
|
|
|
568
651
|
try:
|
|
569
652
|
return ImageGrab.grab(all_screens=True)
|
|
570
653
|
except Exception as e:
|
|
571
|
-
logger.warning(f"Failed to capture
|
|
654
|
+
logger.warning(f"Failed to capture virtual screen, using primary screen")
|
|
572
655
|
return pg.screenshot()
|
|
573
656
|
|
|
574
657
|
def get_annotated_screenshot(self, nodes: list[TreeElementNode]) -> Image.Image:
|
|
575
658
|
screenshot = self.get_screenshot()
|
|
576
|
-
sleep(0.10)
|
|
577
659
|
# Add padding
|
|
578
660
|
padding = 5
|
|
579
661
|
width = int(screenshot.width + (1.5 * padding))
|