windows-mcp 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,21 @@
1
- from typing import Set
2
-
3
- BROWSER_NAMES=set([
4
- 'msedge.exe',
5
- 'chrome.exe',
6
- 'firefox.exe'
7
- ])
8
-
9
- AVOIDED_APPS:Set[str]=set([
10
- 'AgentUI'
11
- ])
12
-
13
- EXCLUDED_APPS:Set[str]=set([
14
- 'Progman',
15
- 'Shell_TrayWnd',
16
- 'Shell_SecondaryTrayWnd',
17
- 'Microsoft.UI.Content.PopupWindowSiteBridge',
18
- 'Windows.UI.Core.CoreWindow',
19
- ])
20
-
1
+ from typing import Set
2
+
3
+ BROWSER_NAMES=set([
4
+ 'msedge.exe',
5
+ 'chrome.exe',
6
+ 'firefox.exe'
7
+ ])
8
+
9
+ AVOIDED_APPS:Set[str]=set([
10
+ 'AgentUI'
11
+ ])
12
+
13
+ EXCLUDED_APPS:Set[str]=set([
14
+ 'Progman',
15
+ 'Shell_TrayWnd',
16
+ 'Shell_SecondaryTrayWnd',
17
+ 'Microsoft.UI.Content.PopupWindowSiteBridge',
18
+ 'Windows.UI.Core.CoreWindow',
19
+ ])
20
+
21
21
  PROCESS_PER_MONITOR_DPI_AWARE = 2
@@ -1,14 +1,16 @@
1
1
  from windows_mcp.desktop.config import BROWSER_NAMES, PROCESS_PER_MONITOR_DPI_AWARE
2
- from windows_mcp.desktop.views import DesktopState, App, Size, Status
2
+ from windows_mcp.desktop.views import DesktopState, App, Status, Size
3
+ from windows_mcp.tree.views import BoundingBox, TreeElementNode
4
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from PIL import ImageGrab, ImageFont, ImageDraw, Image
3
6
  from windows_mcp.tree.service import Tree
4
7
  from locale import getpreferredencoding
5
8
  from contextlib import contextmanager
6
9
  from typing import Optional,Literal
7
10
  from markdownify import markdownify
8
11
  from fuzzywuzzy import process
12
+ from time import sleep,time
9
13
  from psutil import Process
10
- from time import sleep
11
- from PIL import Image
12
14
  import win32process
13
15
  import subprocess
14
16
  import win32gui
@@ -21,20 +23,17 @@ import csv
21
23
  import re
22
24
  import os
23
25
  import io
26
+ import random
24
27
 
25
28
  logger = logging.getLogger(__name__)
26
29
  logger.setLevel(logging.INFO)
27
- handler = logging.StreamHandler()
28
- formatter = logging.Formatter('[%(levelname)s] %(message)s')
29
- handler.setFormatter(formatter)
30
- logger.addHandler(handler)
31
30
 
32
31
  try:
33
32
  ctypes.windll.shcore.SetProcessDpiAwareness(PROCESS_PER_MONITOR_DPI_AWARE)
34
33
  except Exception:
35
34
  ctypes.windll.user32.SetProcessDPIAware()
36
35
 
37
- import uiautomation as uia
36
+ import windows_mcp.uia as uia
38
37
  import pyautogui as pg
39
38
 
40
39
  pg.FAILSAFE=False
@@ -46,47 +45,50 @@ class Desktop:
46
45
  self.tree=Tree(self)
47
46
  self.desktop_state=None
48
47
 
49
- def get_resolution(self)->tuple[int,int]:
50
- return pg.size()
51
-
52
- def get_state(self,use_vision:bool=False,use_dom:bool=False,as_bytes:bool=False,scale:float=1.0)->DesktopState:
48
+ def get_state(self,use_annotation:bool=True,use_vision:bool=False,use_dom:bool=False,as_bytes:bool=False,scale:float=1.0)->DesktopState:
53
49
  sleep(0.1)
54
- apps=self.get_apps()
55
- active_app=self.get_active_app()
50
+ start_time = time()
51
+
52
+ controls_handles=self.get_controls_handles() # Taskbar,Program Manager,Apps, Dialogs
53
+ apps,apps_handles=self.get_apps(controls_handles=controls_handles) # Apps
54
+ active_app=self.get_active_app(apps=apps) #Active App
55
+ active_app_handle=active_app.handle if active_app else None
56
+
56
57
  if active_app is not None and active_app in apps:
57
58
  apps.remove(active_app)
58
- logger.debug(f"Active app: {active_app}")
59
+
60
+ logger.debug(f"Active app: {active_app or 'No Active App Found'}")
59
61
  logger.debug(f"Apps: {apps}")
60
- tree_state=self.tree.get_state(active_app,apps,use_dom=use_dom)
62
+
63
+ #Preparing handles for Tree
64
+ other_apps_handles=list(controls_handles-apps_handles)
65
+
66
+ tree_state=self.tree.get_state(active_app_handle,other_apps_handles,use_dom=use_dom)
67
+
61
68
  if use_vision:
62
- screenshot=self.tree.get_annotated_screenshot(tree_state.interactive_nodes,scale=scale)
69
+ if use_annotation:
70
+ nodes=tree_state.interactive_nodes
71
+ screenshot=self.get_annotated_screenshot(nodes=nodes)
72
+ else:
73
+ screenshot=self.get_screenshot()
74
+
75
+ if scale != 1.0:
76
+ screenshot = screenshot.resize((int(screenshot.width * scale), int(screenshot.height * scale)), Image.LANCZOS)
77
+
63
78
  if as_bytes:
64
- bytes_io=io.BytesIO()
65
- screenshot.save(bytes_io,format='PNG')
66
- screenshot=bytes_io.getvalue()
79
+ buffered = io.BytesIO()
80
+ screenshot.save(buffered, format="PNG")
81
+ screenshot = buffered.getvalue()
82
+ buffered.close()
67
83
  else:
68
84
  screenshot=None
85
+
69
86
  self.desktop_state=DesktopState(apps= apps,active_app=active_app,screenshot=screenshot,tree_state=tree_state)
87
+ # Log the time taken to capture the state
88
+ end_time = time()
89
+ logger.info(f"Desktop State capture took {end_time - start_time:.2f} seconds")
70
90
  return self.desktop_state
71
91
 
72
- def get_window_element_from_element(self,element:uia.Control)->uia.Control|None:
73
- while element is not None:
74
- if uia.IsTopLevelWindow(element.NativeWindowHandle):
75
- return element
76
- element = element.GetParentControl()
77
- return None
78
-
79
- def get_active_app(self)->App|None:
80
- try:
81
- handle=uia.GetForegroundWindow()
82
- for app in self.get_apps():
83
- if app.handle!=handle:
84
- continue
85
- return app
86
- except Exception as ex:
87
- logger.error(f"Error in get_active_app: {ex}")
88
- return None
89
-
90
92
  def get_app_status(self,control:uia.Control)->Status:
91
93
  if uia.IsIconic(control.NativeWindowHandle):
92
94
  return Status.MINIMIZED
@@ -106,31 +108,52 @@ class Desktop:
106
108
 
107
109
  def get_apps_from_start_menu(self)->dict[str,str]:
108
110
  command='Get-StartApps | ConvertTo-Csv -NoTypeInformation'
109
- apps_info,_=self.execute_command(command)
110
- reader=csv.DictReader(io.StringIO(apps_info))
111
- return {row.get('Name').lower():row.get('AppID') for row in reader}
111
+ apps_info, status = self.execute_command(command)
112
+
113
+ if status != 0 or not apps_info:
114
+ logger.error(f"Failed to get apps from start menu: {apps_info}")
115
+ return {}
116
+
117
+ try:
118
+ reader = csv.DictReader(io.StringIO(apps_info.strip()))
119
+ return {
120
+ row.get('Name').lower(): row.get('AppID')
121
+ for row in reader
122
+ if row.get('Name') and row.get('AppID')
123
+ }
124
+ except Exception as e:
125
+ logger.error(f"Error parsing start menu apps: {e}")
126
+ return {}
112
127
 
113
- def execute_command(self,command:str)->tuple[str,int]:
128
+ def execute_command(self, command: str,timeout:int=10) -> tuple[str, int]:
114
129
  try:
115
130
  encoded = base64.b64encode(command.encode("utf-16le")).decode("ascii")
116
131
  result = subprocess.run(
117
132
  ['powershell', '-NoProfile', '-EncodedCommand', encoded],
118
- capture_output=True,
119
- errors='ignore',
120
- timeout=25,
133
+ capture_output=True, # No errors='ignore' - let subprocess return bytes
134
+ timeout=timeout,
121
135
  cwd=os.path.expanduser(path='~')
122
136
  )
123
- stdout=result.stdout
124
- stderr=result.stderr
125
- return (stdout or stderr,result.returncode)
137
+ # Handle both bytes and str output (subprocess behavior varies by environment)
138
+ stdout = result.stdout
139
+ stderr = result.stderr
140
+ if isinstance(stdout, bytes):
141
+ stdout = stdout.decode(self.encoding, errors='ignore')
142
+ if isinstance(stderr, bytes):
143
+ stderr = stderr.decode(self.encoding, errors='ignore')
144
+ return (stdout or stderr, result.returncode)
126
145
  except subprocess.TimeoutExpired:
127
146
  return ('Command execution timed out', 1)
128
147
  except Exception as e:
129
- return ('Command execution failed', 1)
148
+ return (f'Command execution failed: {type(e).__name__}: {e}', 1)
130
149
 
131
150
  def is_app_browser(self,node:uia.Control):
132
- process=Process(node.ProcessId)
133
- return process.name() in BROWSER_NAMES
151
+ '''Give any node of the app and it will return True if the app is a browser, False otherwise.'''
152
+ try:
153
+ process=Process(node.ProcessId)
154
+ return process.name() in BROWSER_NAMES
155
+ except:
156
+ return False
134
157
 
135
158
  def get_default_language(self)->str:
136
159
  command="Get-Culture | Select-Object Name,DisplayName | ConvertTo-Csv -NoTypeInformation"
@@ -162,23 +185,32 @@ class Desktop:
162
185
  return (f'{active_app.name} resized to {width}x{height} at {x},{y}.',0)
163
186
 
164
187
  def is_app_running(self,name:str)->bool:
165
- apps={app.name:app for app in self.get_apps()}
166
- return process.extractOne(name,list(apps.keys()),score_cutoff=60) is not None
188
+ apps, _ = self.get_apps()
189
+ apps_dict = {app.name: app for app in apps}
190
+ return process.extractOne(name,list(apps_dict.keys()),score_cutoff=60) is not None
167
191
 
168
192
  def app(self,mode:Literal['launch','switch','resize'],name:Optional[str]=None,loc:Optional[tuple[int,int]]=None,size:Optional[tuple[int,int]]=None):
169
193
  match mode:
170
194
  case 'launch':
171
- response,status=self.launch_app(name)
172
- sleep(1.25)
195
+ response,status,pid=self.launch_app(name)
173
196
  if status!=0:
174
197
  return response
175
- consecutive_waits=10
176
- for _ in range(consecutive_waits):
177
- if not self.is_app_running(name):
178
- sleep(1.25)
179
- else:
180
- return f'{name.title()} launched.'
181
- return f'Launching {name.title()} wait for it to come load.'
198
+
199
+ # Smart wait using UIA Exists (avoids manual Python loops)
200
+ launched = False
201
+ if pid > 0:
202
+ if uia.WindowControl(ProcessId=pid).Exists(maxSearchSeconds=10):
203
+ launched = True
204
+
205
+ if not launched:
206
+ # Fallback: Regex search for the window title
207
+ safe_name = re.escape(name)
208
+ if uia.WindowControl(RegexName=f'(?i).*{safe_name}.*').Exists(maxSearchSeconds=10):
209
+ launched = True
210
+
211
+ if launched:
212
+ return f'{name.title()} launched.'
213
+ return f'Launching {name.title()} sent, but window not detected yet.'
182
214
  case 'resize':
183
215
  response,status=self.resize_app(size=size,loc=loc)
184
216
  if status!=0:
@@ -192,21 +224,29 @@ class Desktop:
192
224
  else:
193
225
  return response
194
226
 
195
- def launch_app(self,name:str)->tuple[str,int]:
227
+ def launch_app(self,name:str)->tuple[str,int,int]:
196
228
  apps_map=self.get_apps_from_start_menu()
197
229
  matched_app=process.extractOne(name,apps_map.keys(),score_cutoff=70)
198
230
  if matched_app is None:
199
- return (f'{name.title()} not found in start menu.',1)
231
+ return (f'{name.title()} not found in start menu.',1,0)
200
232
  app_name,_=matched_app
201
233
  appid=apps_map.get(app_name)
202
234
  if appid is None:
203
- return (f'{name.title()} not found in start menu.',1)
204
- if appid.endswith('.exe'):
205
- command=f"Start-Process '{appid}'"
235
+ return (name,f'{name.title()} not found in start menu.',1,0)
236
+
237
+ pid = 0
238
+ if os.path.exists(appid) or "\\" in appid:
239
+ # It's a file path, we can try to get the PID using PassThru
240
+ command = f'Start-Process "{appid}" -PassThru | Select-Object -ExpandProperty Id'
241
+ response, status = self.execute_command(command)
242
+ if status == 0 and response.strip().isdigit():
243
+ pid = int(response.strip())
206
244
  else:
207
- command=f"Start-Process shell:AppsFolder\\{appid}"
208
- response,status=self.execute_command(command)
209
- return response,status
245
+ # It's an AUMID (Store App)
246
+ command = f'Start-Process "shell:AppsFolder\\{appid}"'
247
+ response, status = self.execute_command(command)
248
+
249
+ return response, status, pid
210
250
 
211
251
  def switch_app(self,name:str):
212
252
  apps={app.name:app for app in [self.desktop_state.active_app]+self.desktop_state.apps if app is not None}
@@ -225,19 +265,46 @@ class Desktop:
225
265
  content=f'Switched to {app_name.title()} window.'
226
266
  return content,0
227
267
 
228
- def bring_window_to_top(self,target_handle:int):
229
- foreground_handle=win32gui.GetForegroundWindow()
230
- foreground_thread,_=win32process.GetWindowThreadProcessId(foreground_handle)
231
- target_thread,_=win32process.GetWindowThreadProcessId(target_handle)
268
+ def bring_window_to_top(self, target_handle: int):
269
+ if not win32gui.IsWindow(target_handle):
270
+ raise ValueError("Invalid window handle")
271
+
232
272
  try:
273
+ if win32gui.IsIconic(target_handle):
274
+ win32gui.ShowWindow(target_handle, win32con.SW_RESTORE)
275
+
276
+ foreground_handle = win32gui.GetForegroundWindow()
277
+ foreground_thread, _ = win32process.GetWindowThreadProcessId(foreground_handle)
278
+ target_thread, _ = win32process.GetWindowThreadProcessId(target_handle)
279
+
280
+ if not foreground_thread or not target_thread or foreground_thread == target_thread:
281
+ win32gui.SetForegroundWindow(target_handle)
282
+ win32gui.BringWindowToTop(target_handle)
283
+ return
284
+
233
285
  ctypes.windll.user32.AllowSetForegroundWindow(-1)
234
- win32process.AttachThreadInput(foreground_thread,target_thread,True)
235
- win32gui.SetForegroundWindow(target_handle)
236
- win32gui.BringWindowToTop(target_handle)
286
+
287
+ attached = False
288
+ try:
289
+ win32process.AttachThreadInput(foreground_thread, target_thread, True)
290
+ attached = True
291
+
292
+ win32gui.SetForegroundWindow(target_handle)
293
+ win32gui.BringWindowToTop(target_handle)
294
+
295
+ win32gui.SetWindowPos(
296
+ target_handle,
297
+ win32con.HWND_TOP,
298
+ 0, 0, 0, 0,
299
+ win32con.SWP_NOMOVE | win32con.SWP_NOSIZE | win32con.SWP_SHOWWINDOW
300
+ )
301
+
302
+ finally:
303
+ if attached:
304
+ win32process.AttachThreadInput(foreground_thread, target_thread, False)
305
+
237
306
  except Exception as e:
238
- logger.error(f'Failed to bring window to top: {e}')
239
- finally:
240
- win32process.AttachThreadInput(foreground_thread,target_thread,False)
307
+ logger.exception(f"Failed to bring window to top: {e}")
241
308
 
242
309
  def get_element_handle_from_label(self,label:int)->uia.Control:
243
310
  tree_state=self.desktop_state.tree_state
@@ -340,16 +407,23 @@ class Desktop:
340
407
  content=markdownify(html=html)
341
408
  return content
342
409
 
343
- def get_app_size(self,control:uia.Control):
344
- window=control.BoundingRectangle
345
- if window.isempty():
346
- return Size(width=0,height=0)
347
- return Size(width=window.width(),height=window.height())
410
+ def get_app_from_element(self,element:uia.Control)->App|None:
411
+ if element is None:
412
+ return None
413
+ top_window=element.GetTopLevelControl()
414
+ if top_window is None:
415
+ return None
416
+ handle=top_window.NativeWindowHandle
417
+ apps,_=self.get_apps()
418
+ for app in apps:
419
+ if app.handle==handle:
420
+ return app
421
+ return None
348
422
 
349
- def is_app_visible(self,app)->bool:
423
+ def is_app_visible(self,app:uia.Control)->bool:
350
424
  is_minimized=self.get_app_status(app)!=Status.MINIMIZED
351
- size=self.get_app_size(app)
352
- area=size.width*size.height
425
+ size=app.BoundingRectangle
426
+ area=size.width()*size.height()
353
427
  is_overlay=self.is_overlay_app(app)
354
428
  return not is_overlay and is_minimized and area>10
355
429
 
@@ -357,32 +431,77 @@ class Desktop:
357
431
  no_children = len(element.GetChildren()) == 0
358
432
  is_name = "Overlay" in element.Name.strip()
359
433
  return no_children or is_name
434
+
435
+ def get_controls_handles(self):
436
+ handles = set()
437
+ root=uia.GetRootControl()
438
+ children=root.GetChildren()
439
+ for child in children:
440
+ handles.add(child.NativeWindowHandle)
441
+ return handles
442
+
443
+ def get_active_app(self,apps:list[App]|None=None)->App|None:
444
+ try:
445
+ if apps is None:
446
+ apps,_=self.get_apps()
447
+ handle=uia.GetForegroundWindow()
448
+ for app in apps:
449
+ if app.handle!=handle:
450
+ continue
451
+ return app
452
+ except Exception as ex:
453
+ logger.error(f"Error in get_active_app: {ex}")
454
+ return None
360
455
 
361
- def get_apps(self) -> list[App]:
456
+ def get_apps(self,controls_handles:set[int]|None=None) -> tuple[list[App],set[int]]:
362
457
  try:
363
- desktop = uia.GetRootControl() # Get the desktop control
364
- children = desktop.GetChildren()
365
458
  apps = []
366
- for depth, child in enumerate(children):
459
+ handles = set()
460
+ controls_handles=controls_handles or self.get_controls_handles()
461
+ for depth, hwnd in enumerate(controls_handles):
462
+ try:
463
+ child = uia.ControlFromHandle(hwnd)
464
+ except Exception:
465
+ continue
466
+
467
+ # Filter out Overlays (e.g. NVIDIA, Steam)
468
+ if self.is_overlay_app(child):
469
+ continue
470
+
367
471
  if isinstance(child,(uia.WindowControl,uia.PaneControl)):
368
472
  window_pattern=child.GetPattern(uia.PatternId.WindowPattern)
369
473
  if (window_pattern is None):
370
474
  continue
475
+
371
476
  if window_pattern.CanMinimize and window_pattern.CanMaximize:
372
477
  status = self.get_app_status(child)
373
- size=self.get_app_size(child)
478
+
479
+ bounding_rect=child.BoundingRectangle
480
+ if bounding_rect.isempty() and status!=Status.MINIMIZED:
481
+ continue
482
+
374
483
  apps.append(App(**{
375
484
  "name":child.Name,
485
+ "runtime_id":tuple(child.GetRuntimeId()),
376
486
  "depth":depth,
377
487
  "status":status,
378
- "size":size,
488
+ "bounding_box":BoundingBox(
489
+ left=bounding_rect.left,
490
+ top=bounding_rect.top,
491
+ right=bounding_rect.right,
492
+ bottom=bounding_rect.bottom,
493
+ width=bounding_rect.width(),
494
+ height=bounding_rect.height()
495
+ ),
379
496
  "handle":child.NativeWindowHandle,
380
- "process_id":child.ProcessId
497
+ "process_id":child.ProcessId,
498
+ "is_browser":self.is_app_browser(child)
381
499
  }))
500
+ handles.add(child.NativeWindowHandle)
382
501
  except Exception as ex:
383
502
  logger.error(f"Error in get_apps: {ex}")
384
503
  apps = []
385
- return apps
504
+ return apps,handles
386
505
 
387
506
  def get_xpath_from_element(self,element:uia.Control):
388
507
  current=element
@@ -442,11 +561,72 @@ class Desktop:
442
561
  return dpi / 96.0
443
562
 
444
563
  def get_screen_size(self)->Size:
445
- width, height = uia.GetScreenSize()
564
+ width, height = uia.GetVirtualScreenSize()
446
565
  return Size(width=width,height=height)
447
566
 
448
567
  def get_screenshot(self)->Image.Image:
449
- return pg.screenshot()
568
+ try:
569
+ return ImageGrab.grab(all_screens=True)
570
+ except Exception as e:
571
+ logger.warning(f"Failed to capture all screens: {e}. Fallback to primary.")
572
+ return pg.screenshot()
573
+
574
+ def get_annotated_screenshot(self, nodes: list[TreeElementNode]) -> Image.Image:
575
+ screenshot = self.get_screenshot()
576
+ sleep(0.10)
577
+ # Add padding
578
+ padding = 5
579
+ width = int(screenshot.width + (1.5 * padding))
580
+ height = int(screenshot.height + (1.5 * padding))
581
+ padded_screenshot = Image.new("RGB", (width, height), color=(255, 255, 255))
582
+ padded_screenshot.paste(screenshot, (padding, padding))
583
+
584
+ draw = ImageDraw.Draw(padded_screenshot)
585
+ font_size = 12
586
+ try:
587
+ font = ImageFont.truetype('arial.ttf', font_size)
588
+ except IOError:
589
+ font = ImageFont.load_default()
590
+
591
+ def get_random_color():
592
+ return "#{:06x}".format(random.randint(0, 0xFFFFFF))
593
+
594
+ left_offset, top_offset, _, _ = uia.GetVirtualScreenRect()
595
+
596
+ def draw_annotation(label, node: TreeElementNode):
597
+ box = node.bounding_box
598
+ color = get_random_color()
599
+
600
+ # Scale and pad the bounding box also clip the bounding box
601
+ # Adjust for virtual screen offset so coordinates map to the screenshot image
602
+ adjusted_box = (
603
+ int(box.left - left_offset) + padding,
604
+ int(box.top - top_offset) + padding,
605
+ int(box.right - left_offset) + padding,
606
+ int(box.bottom - top_offset) + padding
607
+ )
608
+ # Draw bounding box
609
+ draw.rectangle(adjusted_box, outline=color, width=2)
610
+
611
+ # Label dimensions
612
+ label_width = draw.textlength(str(label), font=font)
613
+ label_height = font_size
614
+ left, top, right, bottom = adjusted_box
615
+
616
+ # Label position above bounding box
617
+ label_x1 = right - label_width
618
+ label_y1 = top - label_height - 4
619
+ label_x2 = label_x1 + label_width
620
+ label_y2 = label_y1 + label_height + 4
621
+
622
+ # Draw label background and text
623
+ draw.rectangle([(label_x1, label_y1), (label_x2, label_y2)], fill=color)
624
+ draw.text((label_x1 + 2, label_y1 + 2), str(label), fill=(255, 255, 255), font=font)
625
+
626
+ # Draw annotations in parallel
627
+ with ThreadPoolExecutor() as executor:
628
+ executor.map(draw_annotation, range(len(nodes)), nodes)
629
+ return padded_screenshot
450
630
 
451
631
  @contextmanager
452
632
  def auto_minimize(self):
@@ -455,4 +635,4 @@ class Desktop:
455
635
  uia.ShowWindow(handle, win32con.SW_MINIMIZE)
456
636
  yield
457
637
  finally:
458
- uia.ShowWindow(handle, win32con.SW_RESTORE)
638
+ uia.ShowWindow(handle, win32con.SW_RESTORE)