windows-mcp 0.5.8__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,458 +1,638 @@
1
- from windows_mcp.desktop.config import BROWSER_NAMES, PROCESS_PER_MONITOR_DPI_AWARE
2
- from windows_mcp.desktop.views import DesktopState, App, Size, Status
3
- from windows_mcp.tree.service import Tree
4
- from locale import getpreferredencoding
5
- from contextlib import contextmanager
6
- from typing import Optional,Literal
7
- from markdownify import markdownify
8
- from fuzzywuzzy import process
9
- from psutil import Process
10
- from time import sleep
11
- from PIL import Image
12
- import win32process
13
- import subprocess
14
- import win32gui
15
- import win32con
16
- import requests
17
- import logging
18
- import base64
19
- import ctypes
20
- import csv
21
- import re
22
- import os
23
- import io
24
-
25
- logger = logging.getLogger(__name__)
26
- logger.setLevel(logging.INFO)
27
- handler = logging.StreamHandler()
28
- formatter = logging.Formatter('[%(levelname)s] %(message)s')
29
- handler.setFormatter(formatter)
30
- logger.addHandler(handler)
31
-
32
- try:
33
- ctypes.windll.shcore.SetProcessDpiAwareness(PROCESS_PER_MONITOR_DPI_AWARE)
34
- except Exception:
35
- ctypes.windll.user32.SetProcessDPIAware()
36
-
37
- import windows_mcp.uia as uia
38
- import pyautogui as pg
39
-
40
- pg.FAILSAFE=False
41
- pg.PAUSE=1.0
42
-
43
- class Desktop:
44
- def __init__(self):
45
- self.encoding=getpreferredencoding()
46
- self.tree=Tree(self)
47
- self.desktop_state=None
48
-
49
- def get_resolution(self)->tuple[int,int]:
50
- return pg.size()
51
-
52
- def get_state(self,use_vision:bool=False,use_dom:bool=False,as_bytes:bool=False,scale:float=1.0)->DesktopState:
53
- sleep(0.1)
54
- apps=self.get_apps()
55
- active_app=self.get_active_app()
56
- if active_app is not None and active_app in apps:
57
- apps.remove(active_app)
58
- logger.debug(f"Active app: {active_app}")
59
- logger.debug(f"Apps: {apps}")
60
- tree_state=self.tree.get_state(active_app,apps,use_dom=use_dom)
61
- if use_vision:
62
- screenshot=self.tree.get_annotated_screenshot(tree_state.interactive_nodes,scale=scale)
63
- if as_bytes:
64
- bytes_io=io.BytesIO()
65
- screenshot.save(bytes_io,format='PNG')
66
- screenshot=bytes_io.getvalue()
67
- else:
68
- screenshot=None
69
- self.desktop_state=DesktopState(apps= apps,active_app=active_app,screenshot=screenshot,tree_state=tree_state)
70
- return self.desktop_state
71
-
72
- def get_window_element_from_element(self,element:uia.Control)->uia.Control|None:
73
- while element is not None:
74
- if uia.IsTopLevelWindow(element.NativeWindowHandle):
75
- return element
76
- element = element.GetParentControl()
77
- return None
78
-
79
- def get_active_app(self)->App|None:
80
- try:
81
- handle=uia.GetForegroundWindow()
82
- for app in self.get_apps():
83
- if app.handle!=handle:
84
- continue
85
- return app
86
- except Exception as ex:
87
- logger.error(f"Error in get_active_app: {ex}")
88
- return None
89
-
90
- def get_app_status(self,control:uia.Control)->Status:
91
- if uia.IsIconic(control.NativeWindowHandle):
92
- return Status.MINIMIZED
93
- elif uia.IsZoomed(control.NativeWindowHandle):
94
- return Status.MAXIMIZED
95
- elif uia.IsWindowVisible(control.NativeWindowHandle):
96
- return Status.NORMAL
97
- else:
98
- return Status.HIDDEN
99
-
100
- def get_cursor_location(self)->tuple[int,int]:
101
- position=pg.position()
102
- return (position.x,position.y)
103
-
104
- def get_element_under_cursor(self)->uia.Control:
105
- return uia.ControlFromCursor()
106
-
107
- def get_apps_from_start_menu(self)->dict[str,str]:
108
- command='Get-StartApps | ConvertTo-Csv -NoTypeInformation'
109
- apps_info,_=self.execute_command(command)
110
- reader=csv.DictReader(io.StringIO(apps_info))
111
- return {row.get('Name').lower():row.get('AppID') for row in reader}
112
-
113
- def execute_command(self,command:str)->tuple[str,int]:
114
- try:
115
- encoded = base64.b64encode(command.encode("utf-16le")).decode("ascii")
116
- result = subprocess.run(
117
- ['powershell', '-NoProfile', '-EncodedCommand', encoded],
118
- capture_output=True,
119
- errors='ignore',
120
- timeout=25,
121
- cwd=os.path.expanduser(path='~')
122
- )
123
- stdout=result.stdout
124
- stderr=result.stderr
125
- return (stdout or stderr,result.returncode)
126
- except subprocess.TimeoutExpired:
127
- return ('Command execution timed out', 1)
128
- except Exception as e:
129
- return ('Command execution failed', 1)
130
-
131
- def is_app_browser(self,node:uia.Control):
132
- process=Process(node.ProcessId)
133
- return process.name() in BROWSER_NAMES
134
-
135
- def get_default_language(self)->str:
136
- command="Get-Culture | Select-Object Name,DisplayName | ConvertTo-Csv -NoTypeInformation"
137
- response,_=self.execute_command(command)
138
- reader=csv.DictReader(io.StringIO(response))
139
- return "".join([row.get('DisplayName') for row in reader])
140
-
141
- def resize_app(self,size:tuple[int,int]=None,loc:tuple[int,int]=None)->tuple[str,int]:
142
- active_app=self.desktop_state.active_app
143
- if active_app is None:
144
- return "No active app found",1
145
- if active_app.status==Status.MINIMIZED:
146
- return f"{active_app.name} is minimized",1
147
- elif active_app.status==Status.MAXIMIZED:
148
- return f"{active_app.name} is maximized",1
149
- else:
150
- app_control=uia.ControlFromHandle(active_app.handle)
151
- if loc is None:
152
- x=app_control.BoundingRectangle.left
153
- y=app_control.BoundingRectangle.top
154
- loc=(x,y)
155
- if size is None:
156
- width=app_control.BoundingRectangle.width()
157
- height=app_control.BoundingRectangle.height()
158
- size=(width,height)
159
- x,y=loc
160
- width,height=size
161
- app_control.MoveWindow(x,y,width,height)
162
- return (f'{active_app.name} resized to {width}x{height} at {x},{y}.',0)
163
-
164
- def is_app_running(self,name:str)->bool:
165
- apps={app.name:app for app in self.get_apps()}
166
- return process.extractOne(name,list(apps.keys()),score_cutoff=60) is not None
167
-
168
- def app(self,mode:Literal['launch','switch','resize'],name:Optional[str]=None,loc:Optional[tuple[int,int]]=None,size:Optional[tuple[int,int]]=None):
169
- match mode:
170
- case 'launch':
171
- response,status=self.launch_app(name)
172
- sleep(1.25)
173
- if status!=0:
174
- return response
175
- consecutive_waits=10
176
- for _ in range(consecutive_waits):
177
- if not self.is_app_running(name):
178
- sleep(1.25)
179
- else:
180
- return f'{name.title()} launched.'
181
- return f'Launching {name.title()} wait for it to come load.'
182
- case 'resize':
183
- response,status=self.resize_app(size=size,loc=loc)
184
- if status!=0:
185
- return response
186
- else:
187
- return response
188
- case 'switch':
189
- response,status=self.switch_app(name)
190
- if status!=0:
191
- return response
192
- else:
193
- return response
194
-
195
- def launch_app(self,name:str)->tuple[str,int]:
196
- apps_map=self.get_apps_from_start_menu()
197
- matched_app=process.extractOne(name,apps_map.keys(),score_cutoff=70)
198
- if matched_app is None:
199
- return (f'{name.title()} not found in start menu.',1)
200
- app_name,_=matched_app
201
- appid=apps_map.get(app_name)
202
- if appid is None:
203
- return (f'{name.title()} not found in start menu.',1)
204
- if appid.endswith('.exe'):
205
- command=f"Start-Process '{appid}'"
206
- else:
207
- command=f"Start-Process shell:AppsFolder\\{appid}"
208
- response,status=self.execute_command(command)
209
- return response,status
210
-
211
- def switch_app(self,name:str):
212
- apps={app.name:app for app in [self.desktop_state.active_app]+self.desktop_state.apps if app is not None}
213
- matched_app:Optional[tuple[str,float]]=process.extractOne(name,list(apps.keys()),score_cutoff=70)
214
- if matched_app is None:
215
- return (f'Application {name.title()} not found.',1)
216
- app_name,_=matched_app
217
- app=apps.get(app_name)
218
- target_handle=app.handle
219
-
220
- if uia.IsIconic(target_handle):
221
- uia.ShowWindow(target_handle, win32con.SW_RESTORE)
222
- content=f'{app_name.title()} restored from Minimized state.'
223
- else:
224
- self.bring_window_to_top(target_handle)
225
- content=f'Switched to {app_name.title()} window.'
226
- return content,0
227
-
228
- def bring_window_to_top(self,target_handle:int):
229
- foreground_handle=win32gui.GetForegroundWindow()
230
- foreground_thread,_=win32process.GetWindowThreadProcessId(foreground_handle)
231
- target_thread,_=win32process.GetWindowThreadProcessId(target_handle)
232
- try:
233
- ctypes.windll.user32.AllowSetForegroundWindow(-1)
234
- win32process.AttachThreadInput(foreground_thread,target_thread,True)
235
- win32gui.SetForegroundWindow(target_handle)
236
- win32gui.BringWindowToTop(target_handle)
237
- except Exception as e:
238
- logger.error(f'Failed to bring window to top: {e}')
239
- finally:
240
- win32process.AttachThreadInput(foreground_thread,target_thread,False)
241
-
242
- def get_element_handle_from_label(self,label:int)->uia.Control:
243
- tree_state=self.desktop_state.tree_state
244
- element_node=tree_state.interactive_nodes[label]
245
- xpath=element_node.xpath
246
- element_handle=self.get_element_from_xpath(xpath)
247
- return element_handle
248
-
249
- def get_coordinates_from_label(self,label:int)->tuple[int,int]:
250
- element_handle=self.get_element_handle_from_label(label)
251
- bounding_rectangle=element_handle.BoundingRectangle
252
- return bounding_rectangle.xcenter(),bounding_rectangle.ycenter()
253
-
254
- def click(self,loc:tuple[int,int],button:str='left',clicks:int=2):
255
- x,y=loc
256
- pg.click(x,y,button=button,clicks=clicks,duration=0.1)
257
-
258
- def type(self,loc:tuple[int,int],text:str,caret_position:Literal['start','end','none']='none',clear:Literal['true','false']='false',press_enter:Literal['true','false']='false'):
259
- x,y=loc
260
- pg.leftClick(x,y)
261
- if caret_position == 'start':
262
- pg.press('home')
263
- elif caret_position == 'end':
264
- pg.press('end')
265
- else:
266
- pass
267
- if clear=='true':
268
- pg.sleep(0.5)
269
- pg.hotkey('ctrl','a')
270
- pg.press('backspace')
271
- pg.typewrite(text,interval=0.02)
272
- if press_enter=='true':
273
- pg.press('enter')
274
-
275
- def scroll(self,loc:tuple[int,int]=None,type:Literal['horizontal','vertical']='vertical',direction:Literal['up','down','left','right']='down',wheel_times:int=1)->str|None:
276
- if loc:
277
- self.move(loc)
278
- match type:
279
- case 'vertical':
280
- match direction:
281
- case 'up':
282
- uia.WheelUp(wheel_times)
283
- case 'down':
284
- uia.WheelDown(wheel_times)
285
- case _:
286
- return 'Invalid direction. Use "up" or "down".'
287
- case 'horizontal':
288
- match direction:
289
- case 'left':
290
- pg.keyDown('Shift')
291
- pg.sleep(0.05)
292
- uia.WheelUp(wheel_times)
293
- pg.sleep(0.05)
294
- pg.keyUp('Shift')
295
- case 'right':
296
- pg.keyDown('Shift')
297
- pg.sleep(0.05)
298
- uia.WheelDown(wheel_times)
299
- pg.sleep(0.05)
300
- pg.keyUp('Shift')
301
- case _:
302
- return 'Invalid direction. Use "left" or "right".'
303
- case _:
304
- return 'Invalid type. Use "horizontal" or "vertical".'
305
- return None
306
-
307
- def drag(self,loc:tuple[int,int]):
308
- x,y=loc
309
- pg.sleep(0.5)
310
- pg.dragTo(x,y,duration=0.6)
311
-
312
- def move(self,loc:tuple[int,int]):
313
- x,y=loc
314
- pg.moveTo(x,y,duration=0.1)
315
-
316
- def shortcut(self,shortcut:str):
317
- shortcut=shortcut.split('+')
318
- if len(shortcut)>1:
319
- pg.hotkey(*shortcut)
320
- else:
321
- pg.press(''.join(shortcut))
322
-
323
- def multi_select(self,press_ctrl:Literal['true','false']='false',elements:list[tuple[int,int]|int]=[]):
324
- if press_ctrl=='true':
325
- pg.keyDown('ctrl')
326
- for element in elements:
327
- x,y=element
328
- pg.click(x,y,duration=0.2)
329
- pg.sleep(0.5)
330
- pg.keyUp('ctrl')
331
-
332
- def multi_edit(self,elements:list[tuple[int,int,str]|tuple[int,str]]):
333
- for element in elements:
334
- x,y,text=element
335
- self.type((x,y),text=text,clear='true')
336
-
337
- def scrape(self,url:str)->str:
338
- response=requests.get(url,timeout=10)
339
- html=response.text
340
- content=markdownify(html=html)
341
- return content
342
-
343
- def get_app_size(self,control:uia.Control):
344
- window=control.BoundingRectangle
345
- if window.isempty():
346
- return Size(width=0,height=0)
347
- return Size(width=window.width(),height=window.height())
348
-
349
- def is_app_visible(self,app)->bool:
350
- is_minimized=self.get_app_status(app)!=Status.MINIMIZED
351
- size=self.get_app_size(app)
352
- area=size.width*size.height
353
- is_overlay=self.is_overlay_app(app)
354
- return not is_overlay and is_minimized and area>10
355
-
356
- def is_overlay_app(self,element:uia.Control) -> bool:
357
- no_children = len(element.GetChildren()) == 0
358
- is_name = "Overlay" in element.Name.strip()
359
- return no_children or is_name
360
-
361
- def get_apps(self) -> list[App]:
362
- try:
363
- desktop = uia.GetRootControl() # Get the desktop control
364
- children = desktop.GetChildren()
365
- apps = []
366
- for depth, child in enumerate(children):
367
- if isinstance(child,(uia.WindowControl,uia.PaneControl)):
368
- window_pattern=child.GetPattern(uia.PatternId.WindowPattern)
369
- if (window_pattern is None):
370
- continue
371
- if window_pattern.CanMinimize and window_pattern.CanMaximize:
372
- status = self.get_app_status(child)
373
- size=self.get_app_size(child)
374
- apps.append(App(**{
375
- "name":child.Name,
376
- "depth":depth,
377
- "status":status,
378
- "size":size,
379
- "handle":child.NativeWindowHandle,
380
- "process_id":child.ProcessId
381
- }))
382
- except Exception as ex:
383
- logger.error(f"Error in get_apps: {ex}")
384
- apps = []
385
- return apps
386
-
387
- def get_xpath_from_element(self,element:uia.Control):
388
- current=element
389
- if current is None:
390
- return ""
391
- path_parts=[]
392
- while current is not None:
393
- parent=current.GetParentControl()
394
- if parent is None:
395
- # we are at the root node
396
- path_parts.append(f'{current.ControlTypeName}')
397
- break
398
- children=parent.GetChildren()
399
- same_type_children=["-".join(map(lambda x:str(x),child.GetRuntimeId())) for child in children if child.ControlType==current.ControlType]
400
- index=same_type_children.index("-".join(map(lambda x:str(x),current.GetRuntimeId())))
401
- if same_type_children:
402
- path_parts.append(f'{current.ControlTypeName}[{index+1}]')
403
- else:
404
- path_parts.append(f'{current.ControlTypeName}')
405
- current=parent
406
- path_parts.reverse()
407
- xpath="/".join(path_parts)
408
- return xpath
409
-
410
- def get_element_from_xpath(self,xpath:str)->uia.Control:
411
- pattern = re.compile(r'(\w+)(?:\[(\d+)\])?')
412
- parts=xpath.split("/")
413
- root=uia.GetRootControl()
414
- element=root
415
- for part in parts[1:]:
416
- match=pattern.fullmatch(part)
417
- if match is None:
418
- continue
419
- control_type, index=match.groups()
420
- index=int(index) if index else None
421
- children=element.GetChildren()
422
- same_type_children=list(filter(lambda x:x.ControlTypeName==control_type,children))
423
- if index:
424
- element=same_type_children[index-1]
425
- else:
426
- element=same_type_children[0]
427
- return element
428
-
429
- def get_windows_version(self)->str:
430
- response,status=self.execute_command("(Get-CimInstance Win32_OperatingSystem).Caption")
431
- if status==0:
432
- return response.strip()
433
- return "Windows"
434
-
435
- def get_user_account_type(self)->str:
436
- response,status=self.execute_command("(Get-LocalUser -Name $env:USERNAME).PrincipalSource")
437
- return "Local Account" if response.strip()=='Local' else "Microsoft Account" if status==0 else "Local Account"
438
-
439
- def get_dpi_scaling(self):
440
- user32 = ctypes.windll.user32
441
- dpi = user32.GetDpiForSystem()
442
- return dpi / 96.0
443
-
444
- def get_screen_size(self)->Size:
445
- width, height = uia.GetScreenSize()
446
- return Size(width=width,height=height)
447
-
448
- def get_screenshot(self)->Image.Image:
449
- return pg.screenshot()
450
-
451
- @contextmanager
452
- def auto_minimize(self):
453
- try:
454
- handle = uia.GetForegroundWindow()
455
- uia.ShowWindow(handle, win32con.SW_MINIMIZE)
456
- yield
457
- finally:
458
- uia.ShowWindow(handle, win32con.SW_RESTORE)
1
+ from windows_mcp.desktop.config import BROWSER_NAMES, PROCESS_PER_MONITOR_DPI_AWARE
2
+ from windows_mcp.desktop.views import DesktopState, App, Status, Size
3
+ from windows_mcp.tree.views import BoundingBox, TreeElementNode
4
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from PIL import ImageGrab, ImageFont, ImageDraw, Image
6
+ from windows_mcp.tree.service import Tree
7
+ from locale import getpreferredencoding
8
+ from contextlib import contextmanager
9
+ from typing import Optional,Literal
10
+ from markdownify import markdownify
11
+ from fuzzywuzzy import process
12
+ from time import sleep,time
13
+ from psutil import Process
14
+ import win32process
15
+ import subprocess
16
+ import win32gui
17
+ import win32con
18
+ import requests
19
+ import logging
20
+ import base64
21
+ import ctypes
22
+ import csv
23
+ import re
24
+ import os
25
+ import io
26
+ import random
27
+
28
+ logger = logging.getLogger(__name__)
29
+ logger.setLevel(logging.INFO)
30
+
31
+ try:
32
+ ctypes.windll.shcore.SetProcessDpiAwareness(PROCESS_PER_MONITOR_DPI_AWARE)
33
+ except Exception:
34
+ ctypes.windll.user32.SetProcessDPIAware()
35
+
36
+ import windows_mcp.uia as uia
37
+ import pyautogui as pg
38
+
39
+ pg.FAILSAFE=False
40
+ pg.PAUSE=1.0
41
+
42
+ class Desktop:
43
+ def __init__(self):
44
+ self.encoding=getpreferredencoding()
45
+ self.tree=Tree(self)
46
+ self.desktop_state=None
47
+
48
+ def get_state(self,use_annotation:bool=True,use_vision:bool=False,use_dom:bool=False,as_bytes:bool=False,scale:float=1.0)->DesktopState:
49
+ sleep(0.1)
50
+ start_time = time()
51
+
52
+ controls_handles=self.get_controls_handles() # Taskbar,Program Manager,Apps, Dialogs
53
+ apps,apps_handles=self.get_apps(controls_handles=controls_handles) # Apps
54
+ active_app=self.get_active_app(apps=apps) #Active App
55
+ active_app_handle=active_app.handle if active_app else None
56
+
57
+ if active_app is not None and active_app in apps:
58
+ apps.remove(active_app)
59
+
60
+ logger.debug(f"Active app: {active_app or 'No Active App Found'}")
61
+ logger.debug(f"Apps: {apps}")
62
+
63
+ #Preparing handles for Tree
64
+ other_apps_handles=list(controls_handles-apps_handles)
65
+
66
+ tree_state=self.tree.get_state(active_app_handle,other_apps_handles,use_dom=use_dom)
67
+
68
+ if use_vision:
69
+ if use_annotation:
70
+ nodes=tree_state.interactive_nodes
71
+ screenshot=self.get_annotated_screenshot(nodes=nodes)
72
+ else:
73
+ screenshot=self.get_screenshot()
74
+
75
+ if scale != 1.0:
76
+ screenshot = screenshot.resize((int(screenshot.width * scale), int(screenshot.height * scale)), Image.LANCZOS)
77
+
78
+ if as_bytes:
79
+ buffered = io.BytesIO()
80
+ screenshot.save(buffered, format="PNG")
81
+ screenshot = buffered.getvalue()
82
+ buffered.close()
83
+ else:
84
+ screenshot=None
85
+
86
+ self.desktop_state=DesktopState(apps= apps,active_app=active_app,screenshot=screenshot,tree_state=tree_state)
87
+ # Log the time taken to capture the state
88
+ end_time = time()
89
+ logger.info(f"Desktop State capture took {end_time - start_time:.2f} seconds")
90
+ return self.desktop_state
91
+
92
+ def get_app_status(self,control:uia.Control)->Status:
93
+ if uia.IsIconic(control.NativeWindowHandle):
94
+ return Status.MINIMIZED
95
+ elif uia.IsZoomed(control.NativeWindowHandle):
96
+ return Status.MAXIMIZED
97
+ elif uia.IsWindowVisible(control.NativeWindowHandle):
98
+ return Status.NORMAL
99
+ else:
100
+ return Status.HIDDEN
101
+
102
+ def get_cursor_location(self)->tuple[int,int]:
103
+ position=pg.position()
104
+ return (position.x,position.y)
105
+
106
+ def get_element_under_cursor(self)->uia.Control:
107
+ return uia.ControlFromCursor()
108
+
109
+ def get_apps_from_start_menu(self)->dict[str,str]:
110
+ command='Get-StartApps | ConvertTo-Csv -NoTypeInformation'
111
+ apps_info, status = self.execute_command(command)
112
+
113
+ if status != 0 or not apps_info:
114
+ logger.error(f"Failed to get apps from start menu: {apps_info}")
115
+ return {}
116
+
117
+ try:
118
+ reader = csv.DictReader(io.StringIO(apps_info.strip()))
119
+ return {
120
+ row.get('Name').lower(): row.get('AppID')
121
+ for row in reader
122
+ if row.get('Name') and row.get('AppID')
123
+ }
124
+ except Exception as e:
125
+ logger.error(f"Error parsing start menu apps: {e}")
126
+ return {}
127
+
128
+ def execute_command(self, command: str,timeout:int=10) -> tuple[str, int]:
129
+ try:
130
+ encoded = base64.b64encode(command.encode("utf-16le")).decode("ascii")
131
+ result = subprocess.run(
132
+ ['powershell', '-NoProfile', '-EncodedCommand', encoded],
133
+ capture_output=True, # No errors='ignore' - let subprocess return bytes
134
+ timeout=timeout,
135
+ cwd=os.path.expanduser(path='~')
136
+ )
137
+ # Handle both bytes and str output (subprocess behavior varies by environment)
138
+ stdout = result.stdout
139
+ stderr = result.stderr
140
+ if isinstance(stdout, bytes):
141
+ stdout = stdout.decode(self.encoding, errors='ignore')
142
+ if isinstance(stderr, bytes):
143
+ stderr = stderr.decode(self.encoding, errors='ignore')
144
+ return (stdout or stderr, result.returncode)
145
+ except subprocess.TimeoutExpired:
146
+ return ('Command execution timed out', 1)
147
+ except Exception as e:
148
+ return (f'Command execution failed: {type(e).__name__}: {e}', 1)
149
+
150
+ def is_app_browser(self,node:uia.Control):
151
+ '''Give any node of the app and it will return True if the app is a browser, False otherwise.'''
152
+ try:
153
+ process=Process(node.ProcessId)
154
+ return process.name() in BROWSER_NAMES
155
+ except:
156
+ return False
157
+
158
+ def get_default_language(self)->str:
159
+ command="Get-Culture | Select-Object Name,DisplayName | ConvertTo-Csv -NoTypeInformation"
160
+ response,_=self.execute_command(command)
161
+ reader=csv.DictReader(io.StringIO(response))
162
+ return "".join([row.get('DisplayName') for row in reader])
163
+
164
+ def resize_app(self,size:tuple[int,int]=None,loc:tuple[int,int]=None)->tuple[str,int]:
165
+ active_app=self.desktop_state.active_app
166
+ if active_app is None:
167
+ return "No active app found",1
168
+ if active_app.status==Status.MINIMIZED:
169
+ return f"{active_app.name} is minimized",1
170
+ elif active_app.status==Status.MAXIMIZED:
171
+ return f"{active_app.name} is maximized",1
172
+ else:
173
+ app_control=uia.ControlFromHandle(active_app.handle)
174
+ if loc is None:
175
+ x=app_control.BoundingRectangle.left
176
+ y=app_control.BoundingRectangle.top
177
+ loc=(x,y)
178
+ if size is None:
179
+ width=app_control.BoundingRectangle.width()
180
+ height=app_control.BoundingRectangle.height()
181
+ size=(width,height)
182
+ x,y=loc
183
+ width,height=size
184
+ app_control.MoveWindow(x,y,width,height)
185
+ return (f'{active_app.name} resized to {width}x{height} at {x},{y}.',0)
186
+
187
+ def is_app_running(self,name:str)->bool:
188
+ apps, _ = self.get_apps()
189
+ apps_dict = {app.name: app for app in apps}
190
+ return process.extractOne(name,list(apps_dict.keys()),score_cutoff=60) is not None
191
+
192
+ def app(self,mode:Literal['launch','switch','resize'],name:Optional[str]=None,loc:Optional[tuple[int,int]]=None,size:Optional[tuple[int,int]]=None):
193
+ match mode:
194
+ case 'launch':
195
+ response,status,pid=self.launch_app(name)
196
+ if status!=0:
197
+ return response
198
+
199
+ # Smart wait using UIA Exists (avoids manual Python loops)
200
+ launched = False
201
+ if pid > 0:
202
+ if uia.WindowControl(ProcessId=pid).Exists(maxSearchSeconds=10):
203
+ launched = True
204
+
205
+ if not launched:
206
+ # Fallback: Regex search for the window title
207
+ safe_name = re.escape(name)
208
+ if uia.WindowControl(RegexName=f'(?i).*{safe_name}.*').Exists(maxSearchSeconds=10):
209
+ launched = True
210
+
211
+ if launched:
212
+ return f'{name.title()} launched.'
213
+ return f'Launching {name.title()} sent, but window not detected yet.'
214
+ case 'resize':
215
+ response,status=self.resize_app(size=size,loc=loc)
216
+ if status!=0:
217
+ return response
218
+ else:
219
+ return response
220
+ case 'switch':
221
+ response,status=self.switch_app(name)
222
+ if status!=0:
223
+ return response
224
+ else:
225
+ return response
226
+
227
+ def launch_app(self,name:str)->tuple[str,int,int]:
228
+ apps_map=self.get_apps_from_start_menu()
229
+ matched_app=process.extractOne(name,apps_map.keys(),score_cutoff=70)
230
+ if matched_app is None:
231
+ return (f'{name.title()} not found in start menu.',1,0)
232
+ app_name,_=matched_app
233
+ appid=apps_map.get(app_name)
234
+ if appid is None:
235
+ return (name,f'{name.title()} not found in start menu.',1,0)
236
+
237
+ pid = 0
238
+ if os.path.exists(appid) or "\\" in appid:
239
+ # It's a file path, we can try to get the PID using PassThru
240
+ command = f'Start-Process "{appid}" -PassThru | Select-Object -ExpandProperty Id'
241
+ response, status = self.execute_command(command)
242
+ if status == 0 and response.strip().isdigit():
243
+ pid = int(response.strip())
244
+ else:
245
+ # It's an AUMID (Store App)
246
+ command = f'Start-Process "shell:AppsFolder\\{appid}"'
247
+ response, status = self.execute_command(command)
248
+
249
+ return response, status, pid
250
+
251
+ def switch_app(self,name:str):
252
+ apps={app.name:app for app in [self.desktop_state.active_app]+self.desktop_state.apps if app is not None}
253
+ matched_app:Optional[tuple[str,float]]=process.extractOne(name,list(apps.keys()),score_cutoff=70)
254
+ if matched_app is None:
255
+ return (f'Application {name.title()} not found.',1)
256
+ app_name,_=matched_app
257
+ app=apps.get(app_name)
258
+ target_handle=app.handle
259
+
260
+ if uia.IsIconic(target_handle):
261
+ uia.ShowWindow(target_handle, win32con.SW_RESTORE)
262
+ content=f'{app_name.title()} restored from Minimized state.'
263
+ else:
264
+ self.bring_window_to_top(target_handle)
265
+ content=f'Switched to {app_name.title()} window.'
266
+ return content,0
267
+
268
+ def bring_window_to_top(self, target_handle: int):
269
+ if not win32gui.IsWindow(target_handle):
270
+ raise ValueError("Invalid window handle")
271
+
272
+ try:
273
+ if win32gui.IsIconic(target_handle):
274
+ win32gui.ShowWindow(target_handle, win32con.SW_RESTORE)
275
+
276
+ foreground_handle = win32gui.GetForegroundWindow()
277
+ foreground_thread, _ = win32process.GetWindowThreadProcessId(foreground_handle)
278
+ target_thread, _ = win32process.GetWindowThreadProcessId(target_handle)
279
+
280
+ if not foreground_thread or not target_thread or foreground_thread == target_thread:
281
+ win32gui.SetForegroundWindow(target_handle)
282
+ win32gui.BringWindowToTop(target_handle)
283
+ return
284
+
285
+ ctypes.windll.user32.AllowSetForegroundWindow(-1)
286
+
287
+ attached = False
288
+ try:
289
+ win32process.AttachThreadInput(foreground_thread, target_thread, True)
290
+ attached = True
291
+
292
+ win32gui.SetForegroundWindow(target_handle)
293
+ win32gui.BringWindowToTop(target_handle)
294
+
295
+ win32gui.SetWindowPos(
296
+ target_handle,
297
+ win32con.HWND_TOP,
298
+ 0, 0, 0, 0,
299
+ win32con.SWP_NOMOVE | win32con.SWP_NOSIZE | win32con.SWP_SHOWWINDOW
300
+ )
301
+
302
+ finally:
303
+ if attached:
304
+ win32process.AttachThreadInput(foreground_thread, target_thread, False)
305
+
306
+ except Exception as e:
307
+ logger.exception(f"Failed to bring window to top: {e}")
308
+
309
+ def get_element_handle_from_label(self,label:int)->uia.Control:
310
+ tree_state=self.desktop_state.tree_state
311
+ element_node=tree_state.interactive_nodes[label]
312
+ xpath=element_node.xpath
313
+ element_handle=self.get_element_from_xpath(xpath)
314
+ return element_handle
315
+
316
+ def get_coordinates_from_label(self,label:int)->tuple[int,int]:
317
+ element_handle=self.get_element_handle_from_label(label)
318
+ bounding_rectangle=element_handle.BoundingRectangle
319
+ return bounding_rectangle.xcenter(),bounding_rectangle.ycenter()
320
+
321
+ def click(self,loc:tuple[int,int],button:str='left',clicks:int=2):
322
+ x,y=loc
323
+ pg.click(x,y,button=button,clicks=clicks,duration=0.1)
324
+
325
+ def type(self,loc:tuple[int,int],text:str,caret_position:Literal['start','end','none']='none',clear:Literal['true','false']='false',press_enter:Literal['true','false']='false'):
326
+ x,y=loc
327
+ pg.leftClick(x,y)
328
+ if caret_position == 'start':
329
+ pg.press('home')
330
+ elif caret_position == 'end':
331
+ pg.press('end')
332
+ else:
333
+ pass
334
+ if clear=='true':
335
+ pg.sleep(0.5)
336
+ pg.hotkey('ctrl','a')
337
+ pg.press('backspace')
338
+ pg.typewrite(text,interval=0.02)
339
+ if press_enter=='true':
340
+ pg.press('enter')
341
+
342
+ def scroll(self,loc:tuple[int,int]=None,type:Literal['horizontal','vertical']='vertical',direction:Literal['up','down','left','right']='down',wheel_times:int=1)->str|None:
343
+ if loc:
344
+ self.move(loc)
345
+ match type:
346
+ case 'vertical':
347
+ match direction:
348
+ case 'up':
349
+ uia.WheelUp(wheel_times)
350
+ case 'down':
351
+ uia.WheelDown(wheel_times)
352
+ case _:
353
+ return 'Invalid direction. Use "up" or "down".'
354
+ case 'horizontal':
355
+ match direction:
356
+ case 'left':
357
+ pg.keyDown('Shift')
358
+ pg.sleep(0.05)
359
+ uia.WheelUp(wheel_times)
360
+ pg.sleep(0.05)
361
+ pg.keyUp('Shift')
362
+ case 'right':
363
+ pg.keyDown('Shift')
364
+ pg.sleep(0.05)
365
+ uia.WheelDown(wheel_times)
366
+ pg.sleep(0.05)
367
+ pg.keyUp('Shift')
368
+ case _:
369
+ return 'Invalid direction. Use "left" or "right".'
370
+ case _:
371
+ return 'Invalid type. Use "horizontal" or "vertical".'
372
+ return None
373
+
374
+ def drag(self,loc:tuple[int,int]):
375
+ x,y=loc
376
+ pg.sleep(0.5)
377
+ pg.dragTo(x,y,duration=0.6)
378
+
379
+ def move(self,loc:tuple[int,int]):
380
+ x,y=loc
381
+ pg.moveTo(x,y,duration=0.1)
382
+
383
+ def shortcut(self,shortcut:str):
384
+ shortcut=shortcut.split('+')
385
+ if len(shortcut)>1:
386
+ pg.hotkey(*shortcut)
387
+ else:
388
+ pg.press(''.join(shortcut))
389
+
390
+ def multi_select(self,press_ctrl:Literal['true','false']='false',elements:list[tuple[int,int]|int]=[]):
391
+ if press_ctrl=='true':
392
+ pg.keyDown('ctrl')
393
+ for element in elements:
394
+ x,y=element
395
+ pg.click(x,y,duration=0.2)
396
+ pg.sleep(0.5)
397
+ pg.keyUp('ctrl')
398
+
399
+ def multi_edit(self,elements:list[tuple[int,int,str]|tuple[int,str]]):
400
+ for element in elements:
401
+ x,y,text=element
402
+ self.type((x,y),text=text,clear='true')
403
+
404
+ def scrape(self,url:str)->str:
405
+ response=requests.get(url,timeout=10)
406
+ html=response.text
407
+ content=markdownify(html=html)
408
+ return content
409
+
410
+ def get_app_from_element(self,element:uia.Control)->App|None:
411
+ if element is None:
412
+ return None
413
+ top_window=element.GetTopLevelControl()
414
+ if top_window is None:
415
+ return None
416
+ handle=top_window.NativeWindowHandle
417
+ apps,_=self.get_apps()
418
+ for app in apps:
419
+ if app.handle==handle:
420
+ return app
421
+ return None
422
+
423
+ def is_app_visible(self,app:uia.Control)->bool:
424
+ is_minimized=self.get_app_status(app)!=Status.MINIMIZED
425
+ size=app.BoundingRectangle
426
+ area=size.width()*size.height()
427
+ is_overlay=self.is_overlay_app(app)
428
+ return not is_overlay and is_minimized and area>10
429
+
430
+ def is_overlay_app(self,element:uia.Control) -> bool:
431
+ no_children = len(element.GetChildren()) == 0
432
+ is_name = "Overlay" in element.Name.strip()
433
+ return no_children or is_name
434
+
435
+ def get_controls_handles(self):
436
+ handles = set()
437
+ root=uia.GetRootControl()
438
+ children=root.GetChildren()
439
+ for child in children:
440
+ handles.add(child.NativeWindowHandle)
441
+ return handles
442
+
443
+ def get_active_app(self,apps:list[App]|None=None)->App|None:
444
+ try:
445
+ if apps is None:
446
+ apps,_=self.get_apps()
447
+ handle=uia.GetForegroundWindow()
448
+ for app in apps:
449
+ if app.handle!=handle:
450
+ continue
451
+ return app
452
+ except Exception as ex:
453
+ logger.error(f"Error in get_active_app: {ex}")
454
+ return None
455
+
456
+ def get_apps(self,controls_handles:set[int]|None=None) -> tuple[list[App],set[int]]:
457
+ try:
458
+ apps = []
459
+ handles = set()
460
+ controls_handles=controls_handles or self.get_controls_handles()
461
+ for depth, hwnd in enumerate(controls_handles):
462
+ try:
463
+ child = uia.ControlFromHandle(hwnd)
464
+ except Exception:
465
+ continue
466
+
467
+ # Filter out Overlays (e.g. NVIDIA, Steam)
468
+ if self.is_overlay_app(child):
469
+ continue
470
+
471
+ if isinstance(child,(uia.WindowControl,uia.PaneControl)):
472
+ window_pattern=child.GetPattern(uia.PatternId.WindowPattern)
473
+ if (window_pattern is None):
474
+ continue
475
+
476
+ if window_pattern.CanMinimize and window_pattern.CanMaximize:
477
+ status = self.get_app_status(child)
478
+
479
+ bounding_rect=child.BoundingRectangle
480
+ if bounding_rect.isempty() and status!=Status.MINIMIZED:
481
+ continue
482
+
483
+ apps.append(App(**{
484
+ "name":child.Name,
485
+ "runtime_id":tuple(child.GetRuntimeId()),
486
+ "depth":depth,
487
+ "status":status,
488
+ "bounding_box":BoundingBox(
489
+ left=bounding_rect.left,
490
+ top=bounding_rect.top,
491
+ right=bounding_rect.right,
492
+ bottom=bounding_rect.bottom,
493
+ width=bounding_rect.width(),
494
+ height=bounding_rect.height()
495
+ ),
496
+ "handle":child.NativeWindowHandle,
497
+ "process_id":child.ProcessId,
498
+ "is_browser":self.is_app_browser(child)
499
+ }))
500
+ handles.add(child.NativeWindowHandle)
501
+ except Exception as ex:
502
+ logger.error(f"Error in get_apps: {ex}")
503
+ apps = []
504
+ return apps,handles
505
+
506
+ def get_xpath_from_element(self,element:uia.Control):
507
+ current=element
508
+ if current is None:
509
+ return ""
510
+ path_parts=[]
511
+ while current is not None:
512
+ parent=current.GetParentControl()
513
+ if parent is None:
514
+ # we are at the root node
515
+ path_parts.append(f'{current.ControlTypeName}')
516
+ break
517
+ children=parent.GetChildren()
518
+ same_type_children=["-".join(map(lambda x:str(x),child.GetRuntimeId())) for child in children if child.ControlType==current.ControlType]
519
+ index=same_type_children.index("-".join(map(lambda x:str(x),current.GetRuntimeId())))
520
+ if same_type_children:
521
+ path_parts.append(f'{current.ControlTypeName}[{index+1}]')
522
+ else:
523
+ path_parts.append(f'{current.ControlTypeName}')
524
+ current=parent
525
+ path_parts.reverse()
526
+ xpath="/".join(path_parts)
527
+ return xpath
528
+
529
+ def get_element_from_xpath(self,xpath:str)->uia.Control:
530
+ pattern = re.compile(r'(\w+)(?:\[(\d+)\])?')
531
+ parts=xpath.split("/")
532
+ root=uia.GetRootControl()
533
+ element=root
534
+ for part in parts[1:]:
535
+ match=pattern.fullmatch(part)
536
+ if match is None:
537
+ continue
538
+ control_type, index=match.groups()
539
+ index=int(index) if index else None
540
+ children=element.GetChildren()
541
+ same_type_children=list(filter(lambda x:x.ControlTypeName==control_type,children))
542
+ if index:
543
+ element=same_type_children[index-1]
544
+ else:
545
+ element=same_type_children[0]
546
+ return element
547
+
548
+ def get_windows_version(self)->str:
549
+ response,status=self.execute_command("(Get-CimInstance Win32_OperatingSystem).Caption")
550
+ if status==0:
551
+ return response.strip()
552
+ return "Windows"
553
+
554
+ def get_user_account_type(self)->str:
555
+ response,status=self.execute_command("(Get-LocalUser -Name $env:USERNAME).PrincipalSource")
556
+ return "Local Account" if response.strip()=='Local' else "Microsoft Account" if status==0 else "Local Account"
557
+
558
+ def get_dpi_scaling(self):
559
+ user32 = ctypes.windll.user32
560
+ dpi = user32.GetDpiForSystem()
561
+ return dpi / 96.0
562
+
563
+ def get_screen_size(self)->Size:
564
+ width, height = uia.GetVirtualScreenSize()
565
+ return Size(width=width,height=height)
566
+
567
+ def get_screenshot(self)->Image.Image:
568
+ try:
569
+ return ImageGrab.grab(all_screens=True)
570
+ except Exception as e:
571
+ logger.warning(f"Failed to capture all screens: {e}. Fallback to primary.")
572
+ return pg.screenshot()
573
+
574
+ def get_annotated_screenshot(self, nodes: list[TreeElementNode]) -> Image.Image:
575
+ screenshot = self.get_screenshot()
576
+ sleep(0.10)
577
+ # Add padding
578
+ padding = 5
579
+ width = int(screenshot.width + (1.5 * padding))
580
+ height = int(screenshot.height + (1.5 * padding))
581
+ padded_screenshot = Image.new("RGB", (width, height), color=(255, 255, 255))
582
+ padded_screenshot.paste(screenshot, (padding, padding))
583
+
584
+ draw = ImageDraw.Draw(padded_screenshot)
585
+ font_size = 12
586
+ try:
587
+ font = ImageFont.truetype('arial.ttf', font_size)
588
+ except IOError:
589
+ font = ImageFont.load_default()
590
+
591
+ def get_random_color():
592
+ return "#{:06x}".format(random.randint(0, 0xFFFFFF))
593
+
594
+ left_offset, top_offset, _, _ = uia.GetVirtualScreenRect()
595
+
596
+ def draw_annotation(label, node: TreeElementNode):
597
+ box = node.bounding_box
598
+ color = get_random_color()
599
+
600
+ # Scale and pad the bounding box also clip the bounding box
601
+ # Adjust for virtual screen offset so coordinates map to the screenshot image
602
+ adjusted_box = (
603
+ int(box.left - left_offset) + padding,
604
+ int(box.top - top_offset) + padding,
605
+ int(box.right - left_offset) + padding,
606
+ int(box.bottom - top_offset) + padding
607
+ )
608
+ # Draw bounding box
609
+ draw.rectangle(adjusted_box, outline=color, width=2)
610
+
611
+ # Label dimensions
612
+ label_width = draw.textlength(str(label), font=font)
613
+ label_height = font_size
614
+ left, top, right, bottom = adjusted_box
615
+
616
+ # Label position above bounding box
617
+ label_x1 = right - label_width
618
+ label_y1 = top - label_height - 4
619
+ label_x2 = label_x1 + label_width
620
+ label_y2 = label_y1 + label_height + 4
621
+
622
+ # Draw label background and text
623
+ draw.rectangle([(label_x1, label_y1), (label_x2, label_y2)], fill=color)
624
+ draw.text((label_x1 + 2, label_y1 + 2), str(label), fill=(255, 255, 255), font=font)
625
+
626
+ # Draw annotations in parallel
627
+ with ThreadPoolExecutor() as executor:
628
+ executor.map(draw_annotation, range(len(nodes)), nodes)
629
+ return padded_screenshot
630
+
631
+ @contextmanager
632
+ def auto_minimize(self):
633
+ try:
634
+ handle = uia.GetForegroundWindow()
635
+ uia.ShowWindow(handle, win32con.SW_MINIMIZE)
636
+ yield
637
+ finally:
638
+ uia.ShowWindow(handle, win32con.SW_RESTORE)