windows-mcp 0.3.2__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
src/desktop/service.py ADDED
@@ -0,0 +1,454 @@
1
+ from src.desktop.config import BROWSER_NAMES, PROCESS_PER_MONITOR_DPI_AWARE
2
+ from src.desktop.views import DesktopState, App, Size, Status
3
+ from locale import getpreferredencoding
4
+ from contextlib import contextmanager
5
+ from typing import Optional,Literal
6
+ from markdownify import markdownify
7
+ from src.tree.service import Tree
8
+ from fuzzywuzzy import process
9
+ from psutil import Process
10
+ from time import sleep
11
+ from PIL import Image
12
+ import win32process
13
+ import subprocess
14
+ import win32gui
15
+ import win32con
16
+ import requests
17
+ import logging
18
+ import base64
19
+ import ctypes
20
+ import csv
21
+ import re
22
+ import os
23
+ import io
24
+
25
+ logger = logging.getLogger(__name__)
26
+ logger.setLevel(logging.INFO)
27
+ handler = logging.StreamHandler()
28
+ formatter = logging.Formatter('[%(levelname)s] %(message)s')
29
+ handler.setFormatter(formatter)
30
+ logger.addHandler(handler)
31
+
32
+ try:
33
+ ctypes.windll.shcore.SetProcessDpiAwareness(PROCESS_PER_MONITOR_DPI_AWARE)
34
+ except Exception:
35
+ ctypes.windll.user32.SetProcessDPIAware()
36
+
37
+ import uiautomation as uia
38
+ import pyautogui as pg
39
+
40
+ pg.FAILSAFE=False
41
+ pg.PAUSE=1.0
42
+
43
+ class Desktop:
44
+ def __init__(self):
45
+ self.encoding=getpreferredencoding()
46
+ self.tree=Tree(self)
47
+ self.desktop_state=None
48
+
49
+ def get_state(self,use_vision:bool=False,as_bytes:bool=False)->DesktopState:
50
+ sleep(0.1)
51
+ apps=self.get_apps()
52
+ active_app=self.get_active_app()
53
+ if active_app is not None:
54
+ apps.remove(active_app)
55
+ logger.debug(f"Active app: {active_app}")
56
+ logger.debug(f"Apps: {apps}")
57
+ tree_state=self.tree.get_state(active_app,apps)
58
+ if use_vision:
59
+ screenshot=self.tree.annotated_screenshot(tree_state.interactive_nodes)
60
+ if as_bytes:
61
+ bytes_io=io.BytesIO()
62
+ screenshot.save(bytes_io,format='PNG')
63
+ screenshot=bytes_io.getvalue()
64
+ else:
65
+ screenshot=None
66
+ self.desktop_state=DesktopState(apps= apps,active_app=active_app,screenshot=screenshot,tree_state=tree_state)
67
+ return self.desktop_state
68
+
69
+ def get_window_element_from_element(self,element:uia.Control)->uia.Control|None:
70
+ while element is not None:
71
+ if uia.IsTopLevelWindow(element.NativeWindowHandle):
72
+ return element
73
+ element = element.GetParentControl()
74
+ return None
75
+
76
+ def get_active_app(self)->App|None:
77
+ try:
78
+ handle=uia.GetForegroundWindow()
79
+ for app in self.get_apps():
80
+ if app.handle!=handle:
81
+ continue
82
+ return app
83
+ except Exception as ex:
84
+ logger.error(f"Error in get_active_app: {ex}")
85
+ return None
86
+
87
+ def get_app_status(self,control:uia.Control)->Status:
88
+ if uia.IsIconic(control.NativeWindowHandle):
89
+ return Status.MINIMIZED
90
+ elif uia.IsZoomed(control.NativeWindowHandle):
91
+ return Status.MAXIMIZED
92
+ elif uia.IsWindowVisible(control.NativeWindowHandle):
93
+ return Status.NORMAL
94
+ else:
95
+ return Status.HIDDEN
96
+
97
+ def get_cursor_location(self)->tuple[int,int]:
98
+ position=pg.position()
99
+ return (position.x,position.y)
100
+
101
+ def get_element_under_cursor(self)->uia.Control:
102
+ return uia.ControlFromCursor()
103
+
104
+ def get_apps_from_start_menu(self)->dict[str,str]:
105
+ command='Get-StartApps | ConvertTo-Csv -NoTypeInformation'
106
+ apps_info,_=self.execute_command(command)
107
+ reader=csv.DictReader(io.StringIO(apps_info))
108
+ return {row.get('Name').lower():row.get('AppID') for row in reader}
109
+
110
+ def execute_command(self,command:str)->tuple[str,int]:
111
+ try:
112
+ encoded = base64.b64encode(command.encode("utf-16le")).decode("ascii")
113
+ result = subprocess.run(
114
+ ['powershell', '-NoProfile', '-EncodedCommand', encoded],
115
+ capture_output=True,
116
+ errors='ignore',
117
+ timeout=25,
118
+ cwd=os.path.expanduser(path='~')
119
+ )
120
+ stdout=result.stdout
121
+ stderr=result.stderr
122
+ return (stdout or stderr,result.returncode)
123
+ except subprocess.TimeoutExpired:
124
+ return ('Command execution timed out', 1)
125
+ except Exception as e:
126
+ return ('Command execution failed', 1)
127
+
128
+ def is_app_browser(self,node:uia.Control):
129
+ process=Process(node.ProcessId)
130
+ return process.name() in BROWSER_NAMES
131
+
132
+ def get_default_language(self)->str:
133
+ command="Get-Culture | Select-Object Name,DisplayName | ConvertTo-Csv -NoTypeInformation"
134
+ response,_=self.execute_command(command)
135
+ reader=csv.DictReader(io.StringIO(response))
136
+ return "".join([row.get('DisplayName') for row in reader])
137
+
138
+ def resize_app(self,size:tuple[int,int]=None,loc:tuple[int,int]=None)->tuple[str,int]:
139
+ active_app=self.desktop_state.active_app
140
+ if active_app is None:
141
+ return "No active app found",1
142
+ if active_app.status==Status.MINIMIZED:
143
+ return f"{active_app.name} is minimized",1
144
+ elif active_app.status==Status.MAXIMIZED:
145
+ return f"{active_app.name} is maximized",1
146
+ else:
147
+ app_control=uia.ControlFromHandle(active_app.handle)
148
+ if loc is None:
149
+ x=app_control.BoundingRectangle.left
150
+ y=app_control.BoundingRectangle.top
151
+ loc=(x,y)
152
+ if size is None:
153
+ width=app_control.BoundingRectangle.width()
154
+ height=app_control.BoundingRectangle.height()
155
+ size=(width,height)
156
+ x,y=loc
157
+ width,height=size
158
+ app_control.MoveWindow(x,y,width,height)
159
+ return (f'{active_app.name} resized to {width}x{height} at {x},{y}.',0)
160
+
161
+ def is_app_running(self,name:str)->bool:
162
+ apps={app.name:app for app in self.get_apps()}
163
+ return process.extractOne(name,list(apps.keys()),score_cutoff=60) is not None
164
+
165
+ def app(self,mode:Literal['launch','switch','resize'],name:Optional[str]=None,loc:Optional[tuple[int,int]]=None,size:Optional[tuple[int,int]]=None):
166
+ match mode:
167
+ case 'launch':
168
+ response,status=self.launch_app(name)
169
+ sleep(1.25)
170
+ if status!=0:
171
+ return response
172
+ consecutive_waits=3
173
+ for _ in range(consecutive_waits):
174
+ if not self.is_app_running(name):
175
+ sleep(1.25)
176
+ else:
177
+ return f'{name.title()} launched.'
178
+ return f'Launching {name.title()} wait for it to come load.'
179
+ case 'resize':
180
+ response,status=self.resize_app(size=size,loc=loc)
181
+ if status!=0:
182
+ return response
183
+ else:
184
+ return response
185
+ case 'switch':
186
+ response,status=self.switch_app(name)
187
+ if status!=0:
188
+ return response
189
+ else:
190
+ return response
191
+
192
+ def launch_app(self,name:str)->tuple[str,int]:
193
+ apps_map=self.get_apps_from_start_menu()
194
+ matched_app=process.extractOne(name,apps_map.keys(),score_cutoff=70)
195
+ if matched_app is None:
196
+ return (f'{name.title()} not found in start menu.',1)
197
+ app_name,_=matched_app
198
+ appid=apps_map.get(app_name)
199
+ if appid is None:
200
+ return (name,f'{name.title()} not found in start menu.',1)
201
+ if name.endswith('.exe'):
202
+ response,status=self.execute_command(f'Start-Process {appid}')
203
+ else:
204
+ response,status=self.execute_command(f'Start-Process shell:AppsFolder\\{appid}')
205
+ return response,status
206
+
207
+ def switch_app(self,name:str):
208
+ apps={app.name:app for app in [self.desktop_state.active_app]+self.desktop_state.apps if app is not None}
209
+ matched_app:Optional[tuple[str,float]]=process.extractOne(name,list(apps.keys()),score_cutoff=70)
210
+ if matched_app is None:
211
+ return (f'Application {name.title()} not found.',1)
212
+ app_name,_=matched_app
213
+ app=apps.get(app_name)
214
+ target_handle=app.handle
215
+
216
+ if uia.IsIconic(target_handle):
217
+ uia.ShowWindow(target_handle, win32con.SW_RESTORE)
218
+ content=f'{app_name.title()} restored from Minimized state.'
219
+ else:
220
+ self.bring_window_to_top(target_handle)
221
+ content=f'Switched to {app_name.title()} window.'
222
+ return content,0
223
+
224
+ def bring_window_to_top(self,target_handle:int):
225
+ foreground_handle=win32gui.GetForegroundWindow()
226
+ foreground_thread,_=win32process.GetWindowThreadProcessId(foreground_handle)
227
+ target_thread,_=win32process.GetWindowThreadProcessId(target_handle)
228
+ try:
229
+ ctypes.windll.user32.AllowSetForegroundWindow(-1)
230
+ win32process.AttachThreadInput(foreground_thread,target_thread,True)
231
+ win32gui.SetForegroundWindow(target_handle)
232
+ win32gui.BringWindowToTop(target_handle)
233
+ except Exception as e:
234
+ logger.error(f'Failed to bring window to top: {e}')
235
+ finally:
236
+ win32process.AttachThreadInput(foreground_thread,target_thread,False)
237
+
238
+ def get_element_handle_from_label(self,label:int)->uia.Control:
239
+ tree_state=self.desktop_state.tree_state
240
+ element_node=tree_state.interactive_nodes[label]
241
+ xpath=element_node.xpath
242
+ element_handle=self.get_element_from_xpath(xpath)
243
+ return element_handle
244
+
245
+ def get_coordinates_from_label(self,label:int)->tuple[int,int]:
246
+ element_handle=self.get_element_handle_from_label(label)
247
+ bounding_rectangle=element_handle.BoundingRectangle
248
+ return bounding_rectangle.xcenter(),bounding_rectangle.ycenter()
249
+
250
+ def click(self,loc:tuple[int,int],button:str='left',clicks:int=2):
251
+ x,y=loc
252
+ pg.click(x,y,button=button,clicks=clicks,duration=0.1)
253
+
254
+ def type(self,loc:tuple[int,int],text:str,caret_position:Literal['start','end','none']='none',clear:Literal['true','false']='false',press_enter:Literal['true','false']='false'):
255
+ x,y=loc
256
+ pg.leftClick(x,y)
257
+ if caret_position == 'start':
258
+ pg.press('home')
259
+ elif caret_position == 'end':
260
+ pg.press('end')
261
+ else:
262
+ pass
263
+ if clear=='true':
264
+ pg.sleep(0.5)
265
+ pg.hotkey('ctrl','a')
266
+ pg.press('backspace')
267
+ pg.typewrite(text,interval=0.02)
268
+ if press_enter=='true':
269
+ pg.press('enter')
270
+
271
+ def scroll(self,loc:tuple[int,int]=None,type:Literal['horizontal','vertical']='vertical',direction:Literal['up','down','left','right']='down',wheel_times:int=1)->str|None:
272
+ if loc:
273
+ self.move(loc)
274
+ match type:
275
+ case 'vertical':
276
+ match direction:
277
+ case 'up':
278
+ uia.WheelUp(wheel_times)
279
+ case 'down':
280
+ uia.WheelDown(wheel_times)
281
+ case _:
282
+ return 'Invalid direction. Use "up" or "down".'
283
+ case 'horizontal':
284
+ match direction:
285
+ case 'left':
286
+ pg.keyDown('Shift')
287
+ pg.sleep(0.05)
288
+ uia.WheelUp(wheel_times)
289
+ pg.sleep(0.05)
290
+ pg.keyUp('Shift')
291
+ case 'right':
292
+ pg.keyDown('Shift')
293
+ pg.sleep(0.05)
294
+ uia.WheelDown(wheel_times)
295
+ pg.sleep(0.05)
296
+ pg.keyUp('Shift')
297
+ case _:
298
+ return 'Invalid direction. Use "left" or "right".'
299
+ case _:
300
+ return 'Invalid type. Use "horizontal" or "vertical".'
301
+ return None
302
+
303
+ def drag(self,loc:tuple[int,int]):
304
+ x,y=loc
305
+ pg.sleep(0.5)
306
+ pg.dragTo(x,y,duration=0.6)
307
+
308
+ def move(self,loc:tuple[int,int]):
309
+ x,y=loc
310
+ pg.moveTo(x,y,duration=0.1)
311
+
312
+ def shortcut(self,shortcut:str):
313
+ shortcut=shortcut.split('+')
314
+ if len(shortcut)>1:
315
+ pg.hotkey(*shortcut)
316
+ else:
317
+ pg.press(''.join(shortcut))
318
+
319
+ def multi_select(self,press_ctrl:Literal['true','false']='false',elements:list[tuple[int,int]|int]=[]):
320
+ if press_ctrl=='true':
321
+ pg.keyDown('ctrl')
322
+ for element in elements:
323
+ x,y=element
324
+ pg.click(x,y,duration=0.2)
325
+ pg.sleep(0.5)
326
+ pg.keyUp('ctrl')
327
+
328
+ def multi_edit(self,elements:list[tuple[int,int,str]|tuple[int,str]]):
329
+ for element in elements:
330
+ x,y,text=element
331
+ self.type((x,y),text=text,clear='true')
332
+
333
+ def scrape(self,url:str)->str:
334
+ response=requests.get(url,timeout=10)
335
+ html=response.text
336
+ content=markdownify(html=html)
337
+ return content
338
+
339
+ def get_app_size(self,control:uia.Control):
340
+ window=control.BoundingRectangle
341
+ if window.isempty():
342
+ return Size(width=0,height=0)
343
+ return Size(width=window.width(),height=window.height())
344
+
345
+ def is_app_visible(self,app)->bool:
346
+ is_minimized=self.get_app_status(app)!=Status.MINIMIZED
347
+ size=self.get_app_size(app)
348
+ area=size.width*size.height
349
+ is_overlay=self.is_overlay_app(app)
350
+ return not is_overlay and is_minimized and area>10
351
+
352
+ def is_overlay_app(self,element:uia.Control) -> bool:
353
+ no_children = len(element.GetChildren()) == 0
354
+ is_name = "Overlay" in element.Name.strip()
355
+ return no_children or is_name
356
+
357
+ def get_apps(self) -> list[App]:
358
+ try:
359
+ desktop = uia.GetRootControl() # Get the desktop control
360
+ children = desktop.GetChildren()
361
+ apps = []
362
+ for depth, child in enumerate(children):
363
+ if isinstance(child,(uia.WindowControl,uia.PaneControl)):
364
+ window_pattern=child.GetPattern(uia.PatternId.WindowPattern)
365
+ if (window_pattern is None):
366
+ continue
367
+ if window_pattern.CanMinimize and window_pattern.CanMaximize:
368
+ status = self.get_app_status(child)
369
+ size=self.get_app_size(child)
370
+ apps.append(App(**{
371
+ "name":child.Name,
372
+ "depth":depth,
373
+ "status":status,
374
+ "size":size,
375
+ "handle":child.NativeWindowHandle,
376
+ "process_id":child.ProcessId
377
+ }))
378
+ except Exception as ex:
379
+ logger.error(f"Error in get_apps: {ex}")
380
+ apps = []
381
+ return apps
382
+
383
+ def get_xpath_from_element(self,element:uia.Control):
384
+ current=element
385
+ if current is None:
386
+ return ""
387
+ path_parts=[]
388
+ while current is not None:
389
+ parent=current.GetParentControl()
390
+ if parent is None:
391
+ # we are at the root node
392
+ path_parts.append(f'{current.ControlTypeName}')
393
+ break
394
+ children=parent.GetChildren()
395
+ same_type_children=["-".join(map(lambda x:str(x),child.GetRuntimeId())) for child in children if child.ControlType==current.ControlType]
396
+ index=same_type_children.index("-".join(map(lambda x:str(x),current.GetRuntimeId())))
397
+ if same_type_children:
398
+ path_parts.append(f'{current.ControlTypeName}[{index+1}]')
399
+ else:
400
+ path_parts.append(f'{current.ControlTypeName}')
401
+ current=parent
402
+ path_parts.reverse()
403
+ xpath="/".join(path_parts)
404
+ return xpath
405
+
406
+ def get_element_from_xpath(self,xpath:str)->uia.Control:
407
+ pattern = re.compile(r'(\w+)(?:\[(\d+)\])?')
408
+ parts=xpath.split("/")
409
+ root=uia.GetRootControl()
410
+ element=root
411
+ for part in parts[1:]:
412
+ match=pattern.fullmatch(part)
413
+ if match is None:
414
+ continue
415
+ control_type, index=match.groups()
416
+ index=int(index) if index else None
417
+ children=element.GetChildren()
418
+ same_type_children=list(filter(lambda x:x.ControlTypeName==control_type,children))
419
+ if index:
420
+ element=same_type_children[index-1]
421
+ else:
422
+ element=same_type_children[0]
423
+ return element
424
+
425
+ def get_windows_version(self)->str:
426
+ response,status=self.execute_command("(Get-CimInstance Win32_OperatingSystem).Caption")
427
+ if status==0:
428
+ return response.strip()
429
+ return "Windows"
430
+
431
+ def get_user_account_type(self)->str:
432
+ response,status=self.execute_command("(Get-LocalUser -Name $env:USERNAME).PrincipalSource")
433
+ return "Local Account" if response.strip()=='Local' else "Microsoft Account" if status==0 else "Local Account"
434
+
435
+ def get_dpi_scaling(self):
436
+ user32 = ctypes.windll.user32
437
+ dpi = user32.GetDpiForSystem()
438
+ return dpi / 96.0
439
+
440
+ def get_screen_size(self)->Size:
441
+ width, height = uia.GetScreenSize()
442
+ return Size(width=width,height=height)
443
+
444
+ def get_screenshot(self)->Image.Image:
445
+ return pg.screenshot()
446
+
447
+ @contextmanager
448
+ def auto_minimize(self):
449
+ try:
450
+ handle = uia.GetForegroundWindow()
451
+ uia.ShowWindow(handle, win32con.SW_MINIMIZE)
452
+ yield
453
+ finally:
454
+ uia.ShowWindow(handle, win32con.SW_RESTORE)
src/desktop/views.py ADDED
@@ -0,0 +1,58 @@
1
+ from src.tree.views import TreeState
2
+ from dataclasses import dataclass
3
+ from tabulate import tabulate
4
+ from typing import Optional
5
+ from PIL.Image import Image
6
+ from enum import Enum
7
+
8
+ class Browser(Enum):
9
+ CHROME='Chrome'
10
+ EDGE='Edge'
11
+ FIREFOX='Firefox'
12
+
13
+ class Status(Enum):
14
+ MAXIMIZED='Maximized'
15
+ MINIMIZED='Minimized'
16
+ NORMAL='Normal'
17
+ HIDDEN='Hidden'
18
+
19
+
20
+ @dataclass
21
+ class App:
22
+ name:str
23
+ depth:int
24
+ status:Status
25
+ size:'Size'
26
+ handle: int
27
+ process_id:int
28
+
29
+ def to_row(self):
30
+ return [self.name, self.depth, self.status.value, self.size.width, self.size.height, self.handle]
31
+
32
+ @dataclass
33
+ class Size:
34
+ width:int
35
+ height:int
36
+
37
+ def to_string(self):
38
+ return f'({self.width},{self.height})'
39
+
40
+ @dataclass
41
+ class DesktopState:
42
+ apps:list[App]
43
+ active_app:Optional[App]
44
+ screenshot:Image|None
45
+ tree_state:TreeState
46
+
47
+ def active_app_to_string(self):
48
+ if self.active_app is None:
49
+ return 'No active app found'
50
+ headers = ["Name", "Depth", "Status", "Width", "Height", "Handle"]
51
+ return tabulate([self.active_app.to_row()], headers=headers, tablefmt="simple")
52
+
53
+ def apps_to_string(self):
54
+ if not self.apps:
55
+ return 'No apps running in background'
56
+ headers = ["Name", "Depth", "Status", "Width", "Height", "Handle"]
57
+ rows = [app.to_row() for app in self.apps]
58
+ return tabulate(rows, headers=headers, tablefmt="simple")
src/tree/__init__.py ADDED
File without changes
src/tree/config.py ADDED
@@ -0,0 +1,51 @@
1
+ INTERACTIVE_CONTROL_TYPE_NAMES=set([
2
+ 'ButtonControl',
3
+ 'ListItemControl',
4
+ 'MenuItemControl',
5
+ 'EditControl',
6
+ 'CheckBoxControl',
7
+ 'RadioButtonControl',
8
+ 'ComboBoxControl',
9
+ 'HyperlinkControl',
10
+ 'SplitButtonControl',
11
+ 'TabItemControl',
12
+ 'TreeItemControl',
13
+ 'DataItemControl',
14
+ 'HeaderItemControl',
15
+ 'TextBoxControl',
16
+ 'SpinnerControl',
17
+ 'ScrollBarControl'
18
+ ])
19
+
20
+ DOCUMENT_CONTROL_TYPE_NAMES=set([
21
+ 'DocumentControl'
22
+ ])
23
+
24
+ STRUCTURAL_CONTROL_TYPE_NAMES = set([
25
+ 'PaneControl',
26
+ 'GroupControl',
27
+ 'CustomControl'
28
+ ])
29
+
30
+ INFORMATIVE_CONTROL_TYPE_NAMES=set([
31
+ 'TextControl',
32
+ 'ImageControl',
33
+ 'StatusBarControl',
34
+ # 'ProgressBarControl',
35
+ # 'ToolTipControl',
36
+ # 'TitleBarControl',
37
+ # 'SeparatorControl',
38
+ # 'HeaderControl',
39
+ # 'HeaderItemControl',
40
+ ])
41
+
42
+ DEFAULT_ACTIONS=set([
43
+ 'Click',
44
+ 'Press',
45
+ 'Jump',
46
+ 'Check',
47
+ 'Uncheck',
48
+ 'Double Click'
49
+ ])
50
+
51
+ THREAD_MAX_RETRIES = 3