windows-mcp 0.5.8__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,601 +1,543 @@
1
- from windows_mcp.tree.config import INTERACTIVE_CONTROL_TYPE_NAMES,DOCUMENT_CONTROL_TYPE_NAMES,INFORMATIVE_CONTROL_TYPE_NAMES, DEFAULT_ACTIONS, THREAD_MAX_RETRIES
2
- from windows_mcp.tree.views import TreeElementNode, ScrollElementNode, TextElementNode, Center, BoundingBox, TreeState, DOMInfo
3
- from windows_mcp.uia import Control,ImageControl,ScrollPattern,WindowControl,Rect,GetRootControl,PatternId
4
- from concurrent.futures import ThreadPoolExecutor, as_completed
5
- from windows_mcp.tree.utils import random_point_within_bounding_box
6
- from PIL import Image, ImageFont, ImageDraw
7
- from typing import TYPE_CHECKING,Optional
8
- from windows_mcp.desktop.views import App
9
- from time import sleep,time
10
- import logging
11
- import random
12
-
13
- logger = logging.getLogger(__name__)
14
- logger.setLevel(logging.INFO)
15
- handler = logging.StreamHandler()
16
- formatter = logging.Formatter('[%(levelname)s] %(message)s')
17
- handler.setFormatter(formatter)
18
- logger.addHandler(handler)
19
-
20
- if TYPE_CHECKING:
21
- from windows_mcp.desktop.service import Desktop
22
-
23
- class Tree:
24
- def __init__(self,desktop:'Desktop'):
25
- self.desktop=desktop
26
- self.screen_size=self.desktop.get_screen_size()
27
- self.dom_info:Optional[DOMInfo]=None
28
- self.dom_bounding_box:BoundingBox=None
29
- self.screen_box=BoundingBox(
30
- top=0, left=0, bottom=self.screen_size.height, right=self.screen_size.width,
31
- width=self.screen_size.width, height=self.screen_size.height
32
- )
33
-
34
- def get_state(self,active_app:App,other_apps:list[App],use_dom:bool=False)->TreeState:
35
- root=GetRootControl()
36
- other_apps_handle=set(map(lambda other_app: other_app.handle,other_apps))
37
- apps=list(filter(lambda app:app.NativeWindowHandle not in other_apps_handle,root.GetChildren()))
38
- del other_apps_handle
39
- if active_app:
40
- apps=list(filter(lambda app:app.ClassName!='Progman',apps))
41
- interactive_nodes,scrollable_nodes,dom_informative_nodes=self.get_appwise_nodes(apps=apps,use_dom=use_dom)
42
- return TreeState(dom_info=self.dom_info,interactive_nodes=interactive_nodes,scrollable_nodes=scrollable_nodes,dom_informative_nodes=dom_informative_nodes)
43
-
44
- def get_appwise_nodes(self,apps:list[Control],use_dom:bool=False)-> tuple[list[TreeElementNode],list[ScrollElementNode],list[TextElementNode]]:
45
- interactive_nodes, scrollable_nodes,dom_informative_nodes = [], [], []
46
- with ThreadPoolExecutor() as executor:
47
- retry_counts = {app: 0 for app in apps}
48
- future_to_app = {
49
- executor.submit(
50
- self.get_nodes, app,
51
- self.desktop.is_app_browser(app),
52
- use_dom
53
- ): app
54
- for app in apps
55
- }
56
- while future_to_app: # keep running until no pending futures
57
- for future in as_completed(list(future_to_app)):
58
- app = future_to_app.pop(future) # remove completed future
59
- try:
60
- result = future.result()
61
- if result:
62
- element_nodes, scroll_nodes,informative_nodes = result
63
- interactive_nodes.extend(element_nodes)
64
- scrollable_nodes.extend(scroll_nodes)
65
- dom_informative_nodes.extend(informative_nodes)
66
- except Exception as e:
67
- retry_counts[app] += 1
68
- logger.debug(f"Error in processing node {app.Name}, retry attempt {retry_counts[app]}\nError: {e}")
69
- if retry_counts[app] < THREAD_MAX_RETRIES:
70
- logger.debug(f"Retrying {app.Name} for the {retry_counts[app]}th time")
71
- new_future = executor.submit(self.get_nodes, app, self.desktop.is_app_browser(app),use_dom)
72
- future_to_app[new_future] = app
73
- else:
74
- logger.error(f"Task failed completely for {app.Name} after {THREAD_MAX_RETRIES} retries")
75
- return interactive_nodes,scrollable_nodes,dom_informative_nodes
76
-
77
- def iou_bounding_box(self,window_box: Rect,element_box: Rect,) -> BoundingBox:
78
- # Step 1: Intersection of element and window (existing logic)
79
- intersection_left = max(window_box.left, element_box.left)
80
- intersection_top = max(window_box.top, element_box.top)
81
- intersection_right = min(window_box.right, element_box.right)
82
- intersection_bottom = min(window_box.bottom, element_box.bottom)
83
-
84
- # Step 2: Clamp to screen boundaries (new addition)
85
- intersection_left = max(self.screen_box.left, intersection_left)
86
- intersection_top = max(self.screen_box.top, intersection_top)
87
- intersection_right = min(self.screen_box.right, intersection_right)
88
- intersection_bottom = min(self.screen_box.bottom, intersection_bottom)
89
-
90
- # Step 3: Validate intersection
91
- if (intersection_right > intersection_left and intersection_bottom > intersection_top):
92
- bounding_box = BoundingBox(
93
- left=intersection_left,
94
- top=intersection_top,
95
- right=intersection_right,
96
- bottom=intersection_bottom,
97
- width=intersection_right - intersection_left,
98
- height=intersection_bottom - intersection_top
99
- )
100
- else:
101
- # No valid visible intersection (either outside window or screen)
102
- bounding_box = BoundingBox(
103
- left=0,
104
- top=0,
105
- right=0,
106
- bottom=0,
107
- width=0,
108
- height=0
109
- )
110
- return bounding_box
111
-
112
- def get_nodes(self, node: Control, is_browser:bool=False,use_dom:bool=False) -> tuple[list[TreeElementNode],list[ScrollElementNode]]:
113
- window_bounding_box=node.BoundingRectangle
114
-
115
- def is_element_visible(node:Control,threshold:int=0):
116
- is_control=node.IsControlElement
117
- box=node.BoundingRectangle
118
- if box.isempty():
119
- return False
120
- width=box.width()
121
- height=box.height()
122
- area=width*height
123
- is_offscreen=(not node.IsOffscreen) or node.ControlTypeName in ['EditControl']
124
- return area > threshold and is_offscreen and is_control
125
-
126
- def is_element_enabled(node:Control):
127
- try:
128
- return node.IsEnabled
129
- except Exception:
130
- return False
131
-
132
- def is_default_action(node:Control):
133
- legacy_pattern=node.GetLegacyIAccessiblePattern()
134
- default_action=legacy_pattern.DefaultAction.title()
135
- if default_action in DEFAULT_ACTIONS:
136
- return True
137
- return False
138
-
139
- def is_element_image(node:Control):
140
- if isinstance(node,ImageControl):
141
- if node.LocalizedControlType=='graphic' or not node.IsKeyboardFocusable:
142
- return True
143
- return False
144
-
145
- def is_element_text(node:Control):
146
- try:
147
- if node.ControlTypeName in INFORMATIVE_CONTROL_TYPE_NAMES:
148
- if is_element_visible(node) and is_element_enabled(node) and not is_element_image(node):
149
- return True
150
- except Exception:
151
- return False
152
- return False
153
-
154
- def is_window_modal(node:WindowControl):
155
- try:
156
- window_pattern=node.GetWindowPattern()
157
- return window_pattern.IsModal
158
- except Exception:
159
- return False
160
-
161
- def is_keyboard_focusable(node:Control):
162
- try:
163
- if node.ControlTypeName in set(['EditControl','ButtonControl','CheckBoxControl','RadioButtonControl','TabItemControl']):
164
- return True
165
- return node.IsKeyboardFocusable
166
- except Exception:
167
- return False
168
-
169
- def element_has_child_element(node:Control,control_type:str,child_control_type:str):
170
- if node.LocalizedControlType==control_type:
171
- first_child=node.GetFirstChildControl()
172
- if first_child is None:
173
- return False
174
- return first_child.LocalizedControlType==child_control_type
175
-
176
- def group_has_no_name(node:Control):
177
- try:
178
- if node.ControlTypeName=='GroupControl':
179
- if not node.Name.strip():
180
- return True
181
- return False
182
- except Exception:
183
- return False
184
-
185
- def is_element_scrollable(node:Control):
186
- try:
187
- if (node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|INFORMATIVE_CONTROL_TYPE_NAMES) or node.IsOffscreen:
188
- return False
189
- scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
190
- if scroll_pattern is None:
191
- return False
192
- return scroll_pattern.VerticallyScrollable
193
- except Exception:
194
- return False
195
-
196
- def is_element_interactive(node:Control):
197
- try:
198
- if is_browser and node.ControlTypeName in set(['DataItemControl','ListItemControl']) and not is_keyboard_focusable(node):
199
- return False
200
- elif not is_browser and node.ControlTypeName=="ImageControl" and is_keyboard_focusable(node):
201
- return True
202
- elif node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|DOCUMENT_CONTROL_TYPE_NAMES:
203
- return is_element_visible(node) and is_element_enabled(node) and (not is_element_image(node) or is_keyboard_focusable(node))
204
- elif node.ControlTypeName=='GroupControl':
205
- if is_browser:
206
- return is_element_visible(node) and is_element_enabled(node) and (is_default_action(node) or is_keyboard_focusable(node))
207
- # else:
208
- # return is_element_visible and is_element_enabled(node) and is_default_action(node)
209
- except Exception:
210
- return False
211
- return False
212
-
213
- def dom_correction(node:Control):
214
- if element_has_child_element(node,'list item','link') or element_has_child_element(node,'item','link'):
215
- dom_interactive_nodes.pop()
216
- return None
217
- elif node.ControlTypeName=='GroupControl':
218
- dom_interactive_nodes.pop()
219
- if is_keyboard_focusable(node):
220
- child=node
221
- try:
222
- while child.GetFirstChildControl() is not None:
223
- if child.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES:
224
- return None
225
- child=child.GetFirstChildControl()
226
- except Exception:
227
- return None
228
- if child.ControlTypeName!='TextControl':
229
- return None
230
- legacy_pattern=node.GetLegacyIAccessiblePattern()
231
- value=legacy_pattern.Value
232
- element_bounding_box = node.BoundingRectangle
233
- bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
234
- center = bounding_box.get_center()
235
- is_focused=node.HasKeyboardFocus
236
- dom_interactive_nodes.append(TreeElementNode(**{
237
- 'name':child.Name.strip(),
238
- 'control_type':node.LocalizedControlType,
239
- 'value':value,
240
- 'shortcut':node.AcceleratorKey,
241
- 'bounding_box':bounding_box,
242
- 'xpath':'',
243
- 'center':center,
244
- 'app_name':app_name,
245
- 'is_focused':is_focused
246
- }))
247
- elif element_has_child_element(node,'link','heading'):
248
- dom_interactive_nodes.pop()
249
- node=node.GetFirstChildControl()
250
- control_type='link'
251
- legacy_pattern=node.GetLegacyIAccessiblePattern()
252
- value=legacy_pattern.Value
253
- element_bounding_box = node.BoundingRectangle
254
- bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
255
- center = bounding_box.get_center()
256
- is_focused=node.HasKeyboardFocus
257
- dom_interactive_nodes.append(TreeElementNode(**{
258
- 'name':node.Name.strip(),
259
- 'control_type':control_type,
260
- 'value':node.Name.strip(),
261
- 'shortcut':node.AcceleratorKey,
262
- 'bounding_box':bounding_box,
263
- 'xpath':'',
264
- 'center':center,
265
- 'app_name':app_name,
266
- 'is_focused':is_focused
267
- }))
268
-
269
- def tree_traversal(node: Control,is_dom:bool=False,is_dialog:bool=False):
270
- # Checks to skip the nodes that are not interactive
271
- if node.IsOffscreen and (node.ControlTypeName not in set(["GroupControl","EditControl","TitleBarControl"])) and node.ClassName not in set(["Popup","Windows.UI.Core.CoreComponentInputSource"]):
272
- return None
273
-
274
- if is_element_scrollable(node):
275
- scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
276
- box = node.BoundingRectangle
277
- # Get the center
278
- x,y=random_point_within_bounding_box(node=node,scale_factor=0.8)
279
- center = Center(x=x,y=y)
280
- scrollable_nodes.append(ScrollElementNode(**{
281
- 'name':node.Name.strip() or node.AutomationId or node.LocalizedControlType.capitalize() or "''",
282
- 'app_name':app_name,
283
- 'control_type':node.LocalizedControlType.title(),
284
- 'bounding_box':BoundingBox(**{
285
- 'left':box.left,
286
- 'top':box.top,
287
- 'right':box.right,
288
- 'bottom':box.bottom,
289
- 'width':box.width(),
290
- 'height':box.height()
291
- }),
292
- 'center':center,
293
- 'xpath':'',
294
- 'horizontal_scrollable':scroll_pattern.HorizontallyScrollable,
295
- 'horizontal_scroll_percent':scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
296
- 'vertical_scrollable':scroll_pattern.VerticallyScrollable,
297
- 'vertical_scroll_percent':scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0,
298
- 'is_focused':node.HasKeyboardFocus
299
- }))
300
-
301
- if is_element_interactive(node):
302
- legacy_pattern=node.GetLegacyIAccessiblePattern()
303
- value=legacy_pattern.Value.strip() if legacy_pattern.Value is not None else ""
304
- is_focused=node.HasKeyboardFocus
305
- name=node.Name.strip()
306
- element_bounding_box = node.BoundingRectangle
307
- if is_browser and is_dom:
308
- bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
309
- center = bounding_box.get_center()
310
- tree_node=TreeElementNode(**{
311
- 'name':name,
312
- 'control_type':node.LocalizedControlType.title(),
313
- 'value':value,
314
- 'shortcut':node.AcceleratorKey,
315
- 'bounding_box':bounding_box,
316
- 'center':center,
317
- 'xpath':'',
318
- 'app_name':app_name,
319
- 'is_focused':is_focused
320
- })
321
- dom_interactive_nodes.append(tree_node)
322
- dom_correction(node=node)
323
- else:
324
- bounding_box=self.iou_bounding_box(window_bounding_box,element_bounding_box)
325
- center = bounding_box.get_center()
326
- tree_node=TreeElementNode(**{
327
- 'name':name,
328
- 'control_type':node.LocalizedControlType.title(),
329
- 'value':value,
330
- 'shortcut':node.AcceleratorKey,
331
- 'bounding_box':bounding_box,
332
- 'center':center,
333
- 'xpath':'',
334
- 'app_name':app_name,
335
- 'is_focused':is_focused
336
- })
337
- interactive_nodes.append(tree_node)
338
- elif is_element_text(node):
339
- dom_informative_nodes.append(TextElementNode(
340
- text=node.Name.strip(),
341
- ))
342
-
343
- children=node.GetChildren()
344
-
345
- # Recursively traverse the tree the right to left for normal apps and for DOM traverse from left to right
346
- for child in (children if is_dom else children[::-1]):
347
- # Incrementally building the xpath
348
-
349
- # Check if the child is a DOM element
350
- if is_browser and child.AutomationId == "RootWebArea":
351
- bounding_box=child.BoundingRectangle
352
- self.dom_bounding_box=BoundingBox(left=bounding_box.left,top=bounding_box.top,
353
- right=bounding_box.right,bottom=bounding_box.bottom,width=bounding_box.width(),
354
- height=bounding_box.height())
355
- scroll_pattern=child.GetPattern(PatternId.ScrollPattern)
356
- self.dom_info=DOMInfo(
357
- horizontal_scrollable=scroll_pattern.HorizontallyScrollable,
358
- horizontal_scroll_percent=scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
359
- vertical_scrollable=scroll_pattern.VerticallyScrollable,
360
- vertical_scroll_percent=scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0
361
- )
362
- # enter DOM subtree
363
- tree_traversal(child, is_dom=True, is_dialog=is_dialog)
364
- # Check if the child is a dialog
365
- elif isinstance(child,WindowControl):
366
- if not child.IsOffscreen:
367
- if is_dom:
368
- bounding_box=child.BoundingRectangle
369
- if bounding_box.width() > 0.8*self.dom_bounding_box.width:
370
- # Because this window element covers the majority of the screen
371
- dom_interactive_nodes.clear()
372
- else:
373
- if is_window_modal(child):
374
- # Because this window element is modal
375
- interactive_nodes.clear()
376
- # enter dialog subtree
377
- tree_traversal(child, is_dom=is_dom, is_dialog=True)
378
- else:
379
- # normal non-dialog children
380
- tree_traversal(child, is_dom=is_dom, is_dialog=is_dialog)
381
-
382
- interactive_nodes, dom_interactive_nodes, scrollable_nodes, dom_informative_nodes = [], [], [], []
383
- app_name=node.Name.strip()
384
- match node.ClassName:
385
- case "Progman":
386
- app_name="Desktop"
387
- case 'Shell_TrayWnd'|'Shell_SecondaryTrayWnd':
388
- app_name="Taskbar"
389
- case 'Microsoft.UI.Content.PopupWindowSiteBridge':
390
- app_name="Context Menu"
391
- case _:
392
- pass
393
- tree_traversal(node,is_dom=False,is_dialog=False)
394
-
395
- logger.debug(f'Interactive nodes:{len(interactive_nodes)}')
396
- logger.debug(f'DOM interactive nodes:{len(dom_interactive_nodes)}')
397
- logger.debug(f'Scrollable nodes:{len(scrollable_nodes)}')
398
-
399
- if use_dom:
400
- if is_browser:
401
- return (dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
402
- else:
403
- return ([],[],[])
404
- else:
405
- return (interactive_nodes+dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
406
-
407
- def _on_focus_change(self, sender:'ctypes.POINTER(IUIAutomationElement)'):
408
- """Handle focus change events."""
409
- # Debounce duplicate events
410
- current_time = time()
411
- element = Control.CreateControlFromElement(sender)
412
- runtime_id=element.GetRuntimeId()
413
- event_key = tuple(runtime_id)
414
- if hasattr(self, '_last_focus_event') and self._last_focus_event:
415
- last_key, last_time = self._last_focus_event
416
- if last_key == event_key and (current_time - last_time) < 1.0:
417
- return None
418
- self._last_focus_event = (event_key, current_time)
419
-
420
- try:
421
- logger.debug(f"[WatchDog] Focus changed to: '{element.Name}' ({element.ControlTypeName})")
422
- except Exception:
423
- pass
424
-
425
- def _on_structure_change(self, sender:'ctypes.POINTER(IUIAutomationElement)', changeType:int, runtime_id:list[int]):
426
- """Handle structure change events."""
427
- try:
428
- # Debounce duplicate events
429
- current_time = time()
430
- event_key = (changeType, tuple(runtime_id))
431
- if hasattr(self, '_last_structure_event') and self._last_structure_event:
432
- last_key, last_time = self._last_structure_event
433
- if last_key == event_key and (current_time - last_time) < 5.0:
434
- return None
435
- self._last_structure_event = (event_key, current_time)
436
-
437
- node = Control.CreateControlFromElement(sender)
438
-
439
- match StructureChangeType(changeType):
440
- case StructureChangeType.StructureChangeType_ChildAdded|StructureChangeType.StructureChangeType_ChildrenBulkAdded:
441
- interactive_nodes=[]
442
- app=self.desktop.get_app_from_element(node)
443
- app_name=self.app_name_correction(app.name if app else node.Name.strip())
444
- is_browser=app.is_browser if app else False
445
- if isinstance(node,WindowControl|PaneControl):
446
- #Subtree traversal
447
- window_bounding_box=app.bounding_box if app else node.BoundingRectangle
448
- self.tree_traversal(node,window_bounding_box,app_name,is_browser,interactive_nodes=interactive_nodes)
449
- else:
450
- #If element is interactive take it else skip it
451
- if not self.is_element_interactive(node=node,is_browser=is_browser):
452
- return None
453
- legacy_pattern=node.GetLegacyIAccessiblePattern()
454
- value=legacy_pattern.Value.strip() if legacy_pattern.Value is not None else ""
455
- cursor_type=AccessibleRoleNames.get(legacy_pattern.Role, "Default")
456
- runtime_id=node.GetRuntimeId()
457
- is_focused=node.HasKeyboardFocus
458
- name=node.Name.strip()
459
- element_bounding_box = node.BoundingRectangle
460
- bounding_box=self.iou_bounding_box(window_bounding_box,element_bounding_box)
461
- center = bounding_box.get_center()
462
-
463
- interactive_nodes.append(TreeElementNode(
464
- name=name,
465
- control_type=cursor_type,
466
- bounding_box=bounding_box,
467
- center=center,
468
- runtime_id=runtime_id,
469
- app_name=app_name,
470
- value=value,
471
- shortcut="",
472
- xpath="",
473
- is_focused=is_focused
474
- ))
475
- if self.tree_state:
476
- existing_ids={n.runtime_id for n in self.tree_state.interactive_nodes}
477
- interactive_nodes=[n for n in interactive_nodes if n.runtime_id not in existing_ids]
478
- self.tree_state.interactive_nodes.extend(interactive_nodes)
479
- case StructureChangeType.StructureChangeType_ChildrenBulkRemoved | StructureChangeType.StructureChangeType_ChildRemoved:
480
- if changeType == StructureChangeType.StructureChangeType_ChildRemoved and self.tree_state:
481
- if isinstance(node,WindowControl|PaneControl):
482
- parent_bounding_box=BoundingBox.from_bounding_rectangle(node.BoundingRectangle)
483
- # Remove nodes spatially contained in the parent (heuristic for "is descendant")
484
- def is_contained(n:'TreeElementNode'):
485
- cx, cy = n.center.x, n.center.y
486
- return (parent_bounding_box.left <= cx <= parent_bounding_box.right and
487
- parent_bounding_box.top <= cy <= parent_bounding_box.bottom)
488
- self.tree_state.interactive_nodes = list(filter(lambda n:not is_contained(n),self.tree_state.interactive_nodes))
489
- else:
490
- target_runtime_id = tuple(runtime_id)
491
- self.tree_state.interactive_nodes = list(filter(lambda n:n.runtime_id != target_runtime_id,self.tree_state.interactive_nodes))
492
- case StructureChangeType.StructureChangeType_ChildrenInvalidated:
493
- #Rebuild subtree
494
- parent_bounding_box=BoundingBox.from_bounding_rectangle(node.BoundingRectangle)
495
- app=self.desktop.get_app_from_element(node)
496
- app_name=self.app_name_correction(app.name if app else node.Name.strip())
497
- is_browser=app.is_browser if app else False
498
- window_bounding_box=app.bounding_box if app else parent_bounding_box
499
- interactive_nodes=[]
500
- self.tree_traversal(node,window_bounding_box,app_name,is_browser,interactive_nodes=interactive_nodes)
501
-
502
- # Remove nodes spatially contained in the parent (heuristic for "is descendant")
503
- def is_contained(n:'TreeElementNode'):
504
- cx, cy = n.center.x, n.center.y
505
- return (parent_bounding_box.left <= cx <= parent_bounding_box.right and
506
- parent_bounding_box.top <= cy <= parent_bounding_box.bottom)
507
-
508
- if self.tree_state:
509
- self.tree_state.interactive_nodes = list(filter(lambda n:not is_contained(n),self.tree_state.interactive_nodes))
510
- self.tree_state.interactive_nodes.extend(interactive_nodes)
511
- case StructureChangeType.StructureChangeType_ChildrenReordered:
512
- app=self.desktop.get_app_from_element(node)
513
- app_name=self.app_name_correction(app.name if app else node.Name.strip())
514
- is_browser=app.is_browser if app else False
515
- window_bounding_box=app.bounding_box if app else node.BoundingRectangle
516
- interactive_nodes=[]
517
- self.tree_traversal(node,window_bounding_box,app_name,is_browser,interactive_nodes=interactive_nodes)
518
-
519
- # Update existing nodes
520
- fresh_nodes_map = {n.runtime_id: n for n in interactive_nodes}
521
- def update_node(existing_node:'TreeElementNode'):
522
- if new_node:=fresh_nodes_map.get(existing_node.runtime_id):
523
- existing_node.update_from_node(new_node)
524
- list(map(update_node,self.tree_state.interactive_nodes))
525
- except Exception as e:
526
- logger.debug(f"[WatchDog] Structure changed with error: {e}, StructureChangeType={StructureChangeType(changeType).name}")
527
-
528
- try:
529
- logger.debug(f"[WatchDog] Structure changed: Type={StructureChangeType(changeType).name} RuntimeID={tuple(runtime_id)} Sender: '{node.Name}' ({node.ControlTypeName})")
530
- except Exception:
531
- pass
532
-
533
- def _on_property_change(self, sender:'ctypes.POINTER(IUIAutomationElement)', propertyId:int, newValue):
534
- """Handle property change events."""
535
- try:
536
- element = Control.CreateControlFromElement(sender)
537
- logger.debug(f"[WatchDog] Property changed: ID={propertyId} Value={newValue} Element: '{element.Name}' ({element.ControlTypeName})")
538
- except Exception:
539
- pass
540
-
541
- def get_annotated_screenshot(self, nodes: list[TreeElementNode],scale:float=1.0) -> Image.Image:
542
- screenshot = self.desktop.get_screenshot()
543
- sleep(0.10)
544
-
545
- original_width = screenshot.width
546
- original_height = screenshot.height
547
-
548
- scaled_width = int(original_width * scale)
549
- scaled_height = int(original_height * scale)
550
- screenshot = screenshot.resize((scaled_width, scaled_height), Image.Resampling.LANCZOS)
551
-
552
- # Add padding
553
- padding = 5
554
- width = int(screenshot.width + (1.5 * padding))
555
- height = int(screenshot.height + (1.5 * padding))
556
- padded_screenshot = Image.new("RGB", (width, height), color=(255, 255, 255))
557
- padded_screenshot.paste(screenshot, (padding, padding))
558
-
559
- draw = ImageDraw.Draw(padded_screenshot)
560
- font_size = 12
561
- try:
562
- font = ImageFont.truetype('arial.ttf', font_size)
563
- except IOError:
564
- font = ImageFont.load_default()
565
-
566
- def get_random_color():
567
- return "#{:06x}".format(random.randint(0, 0xFFFFFF))
568
-
569
- def draw_annotation(label, node: TreeElementNode):
570
- box = node.bounding_box
571
- color = get_random_color()
572
-
573
- # Scale and pad the bounding box coordinates
574
- adjusted_box = (
575
- int(box.left * scale) + padding,
576
- int(box.top * scale) + padding,
577
- int(box.right * scale) + padding,
578
- int(box.bottom * scale) + padding
579
- )
580
- # Draw bounding box
581
- draw.rectangle(adjusted_box, outline=color, width=2)
582
-
583
- # Label dimensions
584
- label_width = draw.textlength(str(label), font=font)
585
- label_height = font_size
586
- left, top, right, bottom = adjusted_box
587
-
588
- # Label position above bounding box
589
- label_x1 = right - label_width
590
- label_y1 = top - label_height - 4
591
- label_x2 = label_x1 + label_width
592
- label_y2 = label_y1 + label_height + 4
593
-
594
- # Draw label background and text
595
- draw.rectangle([(label_x1, label_y1), (label_x2, label_y2)], fill=color)
596
- draw.text((label_x1 + 2, label_y1 + 2), str(label), fill=(255, 255, 255), font=font)
597
-
598
- # Draw annotations in parallel
599
- with ThreadPoolExecutor() as executor:
600
- executor.map(draw_annotation, range(len(nodes)), nodes)
601
- return padded_screenshot
1
+ from windows_mcp.uia import Control,ImageControl,ScrollPattern,WindowControl,Rect,GetRootControl,PatternId,AccessibleRoleNames,PaneControl,GroupControl,StructureChangeType,TreeScope,ControlFromHandle
2
+ from windows_mcp.tree.config import INTERACTIVE_CONTROL_TYPE_NAMES,DOCUMENT_CONTROL_TYPE_NAMES,INFORMATIVE_CONTROL_TYPE_NAMES, DEFAULT_ACTIONS, INTERACTIVE_ROLES, THREAD_MAX_RETRIES
3
+ from windows_mcp.tree.views import TreeElementNode, ScrollElementNode, TextElementNode, Center, BoundingBox, TreeState
4
+ from windows_mcp.tree.cache_utils import CacheRequestFactory,CachedControlHelper
5
+ from windows_mcp.tree.utils import random_point_within_bounding_box
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
+ from typing import TYPE_CHECKING,Optional,Any
8
+ from time import sleep,time
9
+ import threading
10
+ import logging
11
+ import random
12
+ import weakref
13
+ import comtypes
14
+
15
+ logger = logging.getLogger(__name__)
16
+ logger.setLevel(logging.INFO)
17
+
18
+ if TYPE_CHECKING:
19
+ from windows_mcp.desktop.service import Desktop
20
+
21
+ class Tree:
22
+ def __init__(self,desktop:'Desktop'):
23
+ self.desktop=weakref.proxy(desktop)
24
+ self.screen_size=desktop.get_screen_size()
25
+ self.dom:Optional[Control]=None
26
+ self.dom_bounding_box:BoundingBox=None
27
+ self.screen_box=BoundingBox(
28
+ top=0, left=0, bottom=self.screen_size.height, right=self.screen_size.width,
29
+ width=self.screen_size.width, height=self.screen_size.height
30
+ )
31
+ self.tree_state=None
32
+
33
+
34
+ def get_state(self,active_app_handle:int|None,other_apps_handles:list[int],use_dom:bool=False)->TreeState:
35
+ # Reset DOM state to prevent leaks and stale data
36
+ self.dom = None
37
+ self.dom_bounding_box = None
38
+ start_time = time()
39
+
40
+ active_app_flag=False
41
+ if active_app_handle:
42
+ active_app_flag=True
43
+ apps_handles=[active_app_handle]+other_apps_handles
44
+ else:
45
+ apps_handles=other_apps_handles
46
+
47
+ interactive_nodes,scrollable_nodes,dom_informative_nodes=self.get_appwise_nodes(apps_handles=apps_handles,active_app_flag=active_app_flag,use_dom=use_dom)
48
+ root_node=TreeElementNode(
49
+ name="Desktop",
50
+ control_type="PaneControl",
51
+ bounding_box=self.screen_box,
52
+ center=self.screen_box.get_center(),
53
+ app_name="Desktop",
54
+ xpath='',
55
+ value='',
56
+ shortcut='',
57
+ is_focused=False
58
+ )
59
+ if self.dom:
60
+ scroll_pattern:ScrollPattern=self.dom.GetPattern(PatternId.ScrollPattern)
61
+ dom_node=ScrollElementNode(
62
+ name="DOM",
63
+ control_type="DocumentControl",
64
+ bounding_box=self.dom_bounding_box,
65
+ center=self.dom_bounding_box.get_center(),
66
+ horizontal_scrollable=scroll_pattern.HorizontallyScrollable if scroll_pattern else False,
67
+ horizontal_scroll_percent=scroll_pattern.HorizontalScrollPercent if scroll_pattern and scroll_pattern.HorizontallyScrollable else 0,
68
+ vertical_scrollable=scroll_pattern.VerticallyScrollable if scroll_pattern else False,
69
+ vertical_scroll_percent=scroll_pattern.VerticalScrollPercent if scroll_pattern and scroll_pattern.VerticallyScrollable else 0,
70
+ xpath='',
71
+ app_name="DOM",
72
+ is_focused=False
73
+ )
74
+ else:
75
+ dom_node=None
76
+ self.tree_state=TreeState(root_node=root_node,dom_node=dom_node,interactive_nodes=interactive_nodes,scrollable_nodes=scrollable_nodes,dom_informative_nodes=dom_informative_nodes)
77
+ end_time = time()
78
+ logger.info(f"Tree State capture took {end_time - start_time:.2f} seconds")
79
+ return self.tree_state
80
+
81
+ def get_appwise_nodes(self,apps_handles:list[int],active_app_flag:bool,use_dom:bool=False) -> tuple[list[TreeElementNode],list[ScrollElementNode],list[TextElementNode]]:
82
+ interactive_nodes, scrollable_nodes, dom_informative_nodes = [], [], []
83
+
84
+ # Pre-calculate browser status in main thread to pass simple types to workers
85
+ task_inputs = []
86
+ for handle in apps_handles:
87
+ is_browser = False
88
+ try:
89
+ # Use temporary control for property check in main thread
90
+ # This is safe as we don't pass this specific COM object to the thread
91
+ temp_node = ControlFromHandle(handle)
92
+ if active_app_flag and temp_node.ClassName=='Progman':
93
+ continue
94
+ is_browser = self.desktop.is_app_browser(temp_node)
95
+ except Exception:
96
+ pass
97
+ task_inputs.append((handle, is_browser))
98
+
99
+ with ThreadPoolExecutor() as executor:
100
+ retry_counts = {handle: 0 for handle in apps_handles}
101
+ future_to_handle = {
102
+ executor.submit(self.get_nodes, handle, is_browser, use_dom): handle
103
+ for handle, is_browser in task_inputs
104
+ }
105
+ while future_to_handle: # keep running until no pending futures
106
+ for future in as_completed(list(future_to_handle)):
107
+ handle = future_to_handle.pop(future) # remove completed future
108
+ try:
109
+ result = future.result()
110
+ if result:
111
+ element_nodes, scroll_nodes, info_nodes = result
112
+ interactive_nodes.extend(element_nodes)
113
+ scrollable_nodes.extend(scroll_nodes)
114
+ dom_informative_nodes.extend(info_nodes)
115
+ except Exception as e:
116
+ retry_counts[handle] += 1
117
+ logger.debug(f"Error in processing handle {handle}, retry attempt {retry_counts[handle]}\nError: {e}")
118
+ if retry_counts[handle] < THREAD_MAX_RETRIES:
119
+ # Need to find is_browser again for retry
120
+ is_browser = next((ib for h, ib in task_inputs if h == handle), False)
121
+ new_future = executor.submit(self.get_nodes, handle, is_browser, use_dom)
122
+ future_to_handle[new_future] = handle
123
+ else:
124
+ logger.error(f"Task failed completely for handle {handle} after {THREAD_MAX_RETRIES} retries")
125
+ return interactive_nodes,scrollable_nodes,dom_informative_nodes
126
+
127
+ def iou_bounding_box(self,window_box: Rect,element_box: Rect,) -> BoundingBox:
128
+ # Step 1: Intersection of element and window (existing logic)
129
+ intersection_left = max(window_box.left, element_box.left)
130
+ intersection_top = max(window_box.top, element_box.top)
131
+ intersection_right = min(window_box.right, element_box.right)
132
+ intersection_bottom = min(window_box.bottom, element_box.bottom)
133
+
134
+ # Step 2: Clamp to screen boundaries (new addition)
135
+ intersection_left = max(self.screen_box.left, intersection_left)
136
+ intersection_top = max(self.screen_box.top, intersection_top)
137
+ intersection_right = min(self.screen_box.right, intersection_right)
138
+ intersection_bottom = min(self.screen_box.bottom, intersection_bottom)
139
+
140
+ # Step 3: Validate intersection
141
+ if (intersection_right > intersection_left and intersection_bottom > intersection_top):
142
+ bounding_box = BoundingBox(
143
+ left=intersection_left,
144
+ top=intersection_top,
145
+ right=intersection_right,
146
+ bottom=intersection_bottom,
147
+ width=intersection_right - intersection_left,
148
+ height=intersection_bottom - intersection_top
149
+ )
150
+ else:
151
+ # No valid visible intersection (either outside window or screen)
152
+ bounding_box = BoundingBox(
153
+ left=0,
154
+ top=0,
155
+ right=0,
156
+ bottom=0,
157
+ width=0,
158
+ height=0
159
+ )
160
+ return bounding_box
161
+
162
+
163
+
164
+ def element_has_child_element(self, node:Control,control_type:str,child_control_type:str):
165
+ if node.LocalizedControlType==control_type:
166
+ first_child=node.GetFirstChildControl()
167
+ if first_child is None:
168
+ return False
169
+ return first_child.LocalizedControlType==child_control_type
170
+
171
+ def _dom_correction(self, node:Control, dom_interactive_nodes:list[TreeElementNode], app_name:str):
172
+ if self.element_has_child_element(node,'list item','link') or self.element_has_child_element(node,'item','link'):
173
+ dom_interactive_nodes.pop()
174
+ return None
175
+ elif node.ControlTypeName=='GroupControl':
176
+ dom_interactive_nodes.pop()
177
+ # Inlined is_keyboard_focusable logic for correction
178
+ control_type_name_check = node.CachedControlTypeName
179
+ is_kb_focusable = False
180
+ if control_type_name_check in set(['EditControl','ButtonControl','CheckBoxControl','RadioButtonControl','TabItemControl']):
181
+ is_kb_focusable = True
182
+ else:
183
+ is_kb_focusable = node.CachedIsKeyboardFocusable
184
+
185
+ if is_kb_focusable:
186
+ child=node
187
+ try:
188
+ while child.GetFirstChildControl() is not None:
189
+ if child.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES:
190
+ return None
191
+ child=child.GetFirstChildControl()
192
+ except Exception:
193
+ return None
194
+ if child.ControlTypeName!='TextControl':
195
+ return None
196
+ legacy_pattern=node.GetLegacyIAccessiblePattern()
197
+ value=legacy_pattern.Value
198
+ element_bounding_box = node.BoundingRectangle
199
+ bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
200
+ center = bounding_box.get_center()
201
+ is_focused=node.HasKeyboardFocus
202
+ dom_interactive_nodes.append(TreeElementNode(**{
203
+ 'name':child.Name.strip(),
204
+ 'control_type':node.LocalizedControlType,
205
+ 'value':value,
206
+ 'shortcut':node.AcceleratorKey,
207
+ 'bounding_box':bounding_box,
208
+ 'xpath':'',
209
+ 'center':center,
210
+ 'app_name':app_name,
211
+ 'is_focused':is_focused
212
+ }))
213
+ elif self.element_has_child_element(node,'link','heading'):
214
+ dom_interactive_nodes.pop()
215
+ node=node.GetFirstChildControl()
216
+ control_type='link'
217
+ legacy_pattern=node.GetLegacyIAccessiblePattern()
218
+ value=legacy_pattern.Value
219
+ element_bounding_box = node.BoundingRectangle
220
+ bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
221
+ center = bounding_box.get_center()
222
+ is_focused=node.HasKeyboardFocus
223
+ dom_interactive_nodes.append(TreeElementNode(**{
224
+ 'name':node.Name.strip(),
225
+ 'control_type':control_type,
226
+ 'value':node.Name.strip(),
227
+ 'shortcut':node.AcceleratorKey,
228
+ 'bounding_box':bounding_box,
229
+ 'xpath':'',
230
+ 'center':center,
231
+ 'app_name':app_name,
232
+ 'is_focused':is_focused
233
+ }))
234
+
235
+ def tree_traversal(self, node: Control, window_bounding_box:Rect, app_name:str, is_browser:bool,
236
+ interactive_nodes:Optional[list[TreeElementNode]]=None, scrollable_nodes:Optional[list[ScrollElementNode]]=None,
237
+ dom_interactive_nodes:Optional[list[TreeElementNode]]=None, dom_informative_nodes:Optional[list[TextElementNode]]=None,
238
+ is_dom:bool=False, is_dialog:bool=False,
239
+ element_cache_req:Optional[Any]=None, children_cache_req:Optional[Any]=None):
240
+ try:
241
+ # Build cached control if caching is enabled
242
+ if not hasattr(node, '_is_cached') and element_cache_req:
243
+ node = CachedControlHelper.build_cached_control(node, element_cache_req)
244
+
245
+ # Checks to skip the nodes that are not interactive
246
+ is_offscreen = node.CachedIsOffscreen
247
+ control_type_name = node.CachedControlTypeName
248
+ class_name = node.CachedClassName
249
+
250
+ # Scrollable check
251
+ if scrollable_nodes is not None:
252
+ if (control_type_name not in (INTERACTIVE_CONTROL_TYPE_NAMES|INFORMATIVE_CONTROL_TYPE_NAMES)) and not is_offscreen:
253
+ try:
254
+ scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
255
+ if scroll_pattern and scroll_pattern.VerticallyScrollable:
256
+ box = node.CachedBoundingRectangle
257
+ x,y=random_point_within_bounding_box(node=node,scale_factor=0.8)
258
+ center = Center(x=x,y=y)
259
+ name = node.CachedName
260
+ automation_id = node.CachedAutomationId
261
+ localized_control_type = node.CachedLocalizedControlType
262
+ has_keyboard_focus = node.CachedHasKeyboardFocus
263
+ scrollable_nodes.append(ScrollElementNode(**{
264
+ 'name':name.strip() or automation_id or localized_control_type.capitalize() or "''",
265
+ 'control_type':localized_control_type.title(),
266
+ 'bounding_box':BoundingBox(**{
267
+ 'left':box.left,
268
+ 'top':box.top,
269
+ 'right':box.right,
270
+ 'bottom':box.bottom,
271
+ 'width':box.width(),
272
+ 'height':box.height()
273
+ }),
274
+ 'center':center,
275
+ 'xpath':'',
276
+ 'horizontal_scrollable':scroll_pattern.HorizontallyScrollable,
277
+ 'horizontal_scroll_percent':scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
278
+ 'vertical_scrollable':scroll_pattern.VerticallyScrollable,
279
+ 'vertical_scroll_percent':scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0,
280
+ 'app_name':app_name,
281
+ 'is_focused':has_keyboard_focus
282
+ }))
283
+ except Exception:
284
+ pass
285
+
286
+ # Interactive and Informative checks
287
+ # Pre-calculate common properties
288
+ is_control_element = node.CachedIsControlElement
289
+ element_bounding_box = node.CachedBoundingRectangle
290
+ width = element_bounding_box.width()
291
+ height = element_bounding_box.height()
292
+ area = width * height
293
+
294
+ # Is Visible Check
295
+ is_visible = (area > 0) and (not is_offscreen or control_type_name == 'EditControl') and is_control_element
296
+
297
+ if is_visible:
298
+ is_enabled = node.CachedIsEnabled
299
+ if is_enabled:
300
+ # Determine is_keyboard_focusable
301
+ if control_type_name in set(['EditControl','ButtonControl','CheckBoxControl','RadioButtonControl','TabItemControl']):
302
+ is_keyboard_focusable = True
303
+ else:
304
+ is_keyboard_focusable = node.CachedIsKeyboardFocusable
305
+
306
+ # Interactive Check
307
+ if interactive_nodes is not None:
308
+ is_interactive = False
309
+ if is_browser and control_type_name in set(['DataItemControl','ListItemControl']) and not is_keyboard_focusable:
310
+ is_interactive = False
311
+ elif not is_browser and control_type_name == "ImageControl" and is_keyboard_focusable:
312
+ is_interactive = True
313
+ elif control_type_name in (INTERACTIVE_CONTROL_TYPE_NAMES|DOCUMENT_CONTROL_TYPE_NAMES):
314
+ # Role check
315
+ try:
316
+ legacy_pattern = node.GetLegacyIAccessiblePattern()
317
+ is_role_interactive = AccessibleRoleNames.get(legacy_pattern.Role, "Default") in INTERACTIVE_ROLES
318
+ except Exception:
319
+ is_role_interactive = False
320
+
321
+ # Image check
322
+ is_image = False
323
+ if control_type_name == 'ImageControl': # approximated
324
+ localized = node.CachedLocalizedControlType
325
+ if localized == 'graphic' or not is_keyboard_focusable:
326
+ is_image = True
327
+
328
+ if is_role_interactive and (not is_image or is_keyboard_focusable):
329
+ is_interactive = True
330
+
331
+ elif control_type_name == 'GroupControl':
332
+ if is_browser:
333
+ try:
334
+ legacy_pattern = node.GetLegacyIAccessiblePattern()
335
+ is_role_interactive = AccessibleRoleNames.get(legacy_pattern.Role, "Default") in INTERACTIVE_ROLES
336
+ except Exception:
337
+ is_role_interactive = False
338
+
339
+ is_default_action = False
340
+ try:
341
+ legacy_pattern = node.GetLegacyIAccessiblePattern()
342
+ if legacy_pattern.DefaultAction.title() in DEFAULT_ACTIONS:
343
+ is_default_action = True
344
+ except: pass
345
+
346
+ if is_role_interactive and (is_default_action or is_keyboard_focusable):
347
+ is_interactive = True
348
+
349
+ if is_interactive:
350
+ legacy_pattern=node.GetLegacyIAccessiblePattern()
351
+ value=legacy_pattern.Value.strip() if legacy_pattern.Value is not None else ""
352
+ is_focused = node.CachedHasKeyboardFocus
353
+ name = node.CachedName.strip()
354
+ localized_control_type = node.CachedLocalizedControlType
355
+ accelerator_key = node.CachedAcceleratorKey
356
+
357
+ if is_browser and is_dom:
358
+ bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
359
+ center = bounding_box.get_center()
360
+ tree_node=TreeElementNode(**{
361
+ 'name':name,
362
+ 'control_type':localized_control_type.title(),
363
+ 'value':value,
364
+ 'shortcut':accelerator_key,
365
+ 'bounding_box':bounding_box,
366
+ 'center':center,
367
+ 'xpath':'',
368
+ 'app_name':app_name,
369
+ 'is_focused':is_focused
370
+ })
371
+ dom_interactive_nodes.append(tree_node)
372
+ self._dom_correction(node, dom_interactive_nodes, app_name)
373
+ else:
374
+ bounding_box=self.iou_bounding_box(window_bounding_box,element_bounding_box)
375
+ center = bounding_box.get_center()
376
+ tree_node=TreeElementNode(**{
377
+ 'name':name,
378
+ 'control_type':localized_control_type.title(),
379
+ 'value':value,
380
+ 'shortcut':accelerator_key,
381
+ 'bounding_box':bounding_box,
382
+ 'center':center,
383
+ 'xpath':'',
384
+ 'app_name':app_name,
385
+ 'is_focused':is_focused
386
+ })
387
+ interactive_nodes.append(tree_node)
388
+
389
+ # Informative Check
390
+ if dom_informative_nodes is not None:
391
+ # is_element_text check
392
+ is_text = False
393
+ if control_type_name in INFORMATIVE_CONTROL_TYPE_NAMES:
394
+ # is_element_image check
395
+ is_image_check = False
396
+ if control_type_name == 'ImageControl':
397
+ localized = node.CachedLocalizedControlType
398
+
399
+ # Check keybord focusable again if not established, but reuse
400
+ if not is_keyboard_focusable:
401
+ # If localized is graphic OR not focusable -> image
402
+ # wait, is_element_image: if localized=='graphic' or not focusable -> True
403
+ if localized == 'graphic':
404
+ is_image_check = True
405
+ else:
406
+ is_image_check = True # not focusable
407
+ elif localized == 'graphic':
408
+ is_image_check = True
409
+
410
+ if not is_image_check:
411
+ is_text = True
412
+
413
+ if is_text:
414
+ if is_browser and is_dom:
415
+ name = node.CachedName
416
+ dom_informative_nodes.append(TextElementNode(
417
+ text=name.strip(),
418
+ ))
419
+
420
+ # Phase 3: Cached Children Retrieval
421
+ children = CachedControlHelper.get_cached_children(node, children_cache_req)
422
+
423
+ # Recursively traverse the tree the right to left for normal apps and for DOM traverse from left to right
424
+ for child in (children if is_dom else children[::-1]):
425
+ # Incrementally building the xpath
426
+
427
+ # Check if the child is a DOM element
428
+ if is_browser and child.CachedAutomationId=="RootWebArea":
429
+ bounding_box=child.CachedBoundingRectangle
430
+ self.dom_bounding_box=BoundingBox(left=bounding_box.left,top=bounding_box.top,
431
+ right=bounding_box.right,bottom=bounding_box.bottom,width=bounding_box.width(),
432
+ height=bounding_box.height())
433
+ self.dom=child
434
+ # enter DOM subtree
435
+ self.tree_traversal(child, window_bounding_box, app_name, is_browser, interactive_nodes, scrollable_nodes, dom_interactive_nodes, dom_informative_nodes, is_dom=True, is_dialog=is_dialog, element_cache_req=element_cache_req, children_cache_req=children_cache_req)
436
+ # Check if the child is a dialog
437
+ elif isinstance(child,WindowControl):
438
+ if not child.CachedIsOffscreen:
439
+ if is_dom:
440
+ bounding_box=child.CachedBoundingRectangle
441
+ if bounding_box.width() > 0.8*self.dom_bounding_box.width:
442
+ # Because this window element covers the majority of the screen
443
+ dom_interactive_nodes.clear()
444
+ else:
445
+ # Inline is_window_modal
446
+ is_modal = False
447
+ try:
448
+ window_pattern = child.GetWindowPattern()
449
+ is_modal = window_pattern.IsModal
450
+ except Exception:
451
+ pass
452
+
453
+ if is_modal:
454
+ # Because this window element is modal
455
+ interactive_nodes.clear()
456
+ # enter dialog subtree
457
+ self.tree_traversal(child, window_bounding_box, app_name, is_browser, interactive_nodes, scrollable_nodes, dom_interactive_nodes, dom_informative_nodes, is_dom=is_dom, is_dialog=True, element_cache_req=element_cache_req, children_cache_req=children_cache_req)
458
+ else:
459
+ # normal non-dialog children
460
+ self.tree_traversal(child, window_bounding_box, app_name, is_browser, interactive_nodes, scrollable_nodes, dom_interactive_nodes, dom_informative_nodes, is_dom=is_dom, is_dialog=is_dialog, element_cache_req=element_cache_req, children_cache_req=children_cache_req)
461
+ except Exception as e:
462
+ logger.error(f"Error in tree_traversal: {e}", exc_info=True)
463
+ raise
464
+
465
+ def app_name_correction(self,app_name:str)->str:
466
+ match app_name:
467
+ case "Progman":
468
+ return "Desktop"
469
+ case 'Shell_TrayWnd'|'Shell_SecondaryTrayWnd':
470
+ return "Taskbar"
471
+ case 'Microsoft.UI.Content.PopupWindowSiteBridge':
472
+ return "Context Menu"
473
+ case _:
474
+ return app_name
475
+
476
+ def get_nodes(self, handle: int, is_browser:bool=False, use_dom:bool=False) -> tuple[list[TreeElementNode],list[ScrollElementNode],list[TextElementNode]]:
477
+ try:
478
+ comtypes.CoInitialize()
479
+ # Rehydrate Control from handle within the thread's COM context
480
+ node = ControlFromHandle(handle)
481
+ if not node:
482
+ raise Exception("Failed to create Control from handle")
483
+
484
+ # Create fresh cache requests for this traversal session
485
+ element_cache_req = CacheRequestFactory.create_tree_traversal_cache()
486
+ element_cache_req.TreeScope = TreeScope.TreeScope_Element
487
+
488
+ children_cache_req = CacheRequestFactory.create_tree_traversal_cache()
489
+ children_cache_req.TreeScope = TreeScope.TreeScope_Element | TreeScope.TreeScope_Children
490
+
491
+ window_bounding_box=node.BoundingRectangle
492
+
493
+ interactive_nodes, dom_interactive_nodes, dom_informative_nodes, scrollable_nodes = [], [], [], []
494
+ app_name=node.Name.strip()
495
+ app_name=self.app_name_correction(app_name)
496
+
497
+ self.tree_traversal(node, window_bounding_box, app_name, is_browser, interactive_nodes, scrollable_nodes, dom_interactive_nodes, dom_informative_nodes, is_dom=False, is_dialog=False, element_cache_req=element_cache_req, children_cache_req=children_cache_req)
498
+ logger.debug(f'App name:{app_name}')
499
+ logger.debug(f'Interactive nodes:{len(interactive_nodes)}')
500
+ if is_browser:
501
+ logger.debug(f'DOM interactive nodes:{len(dom_interactive_nodes)}')
502
+ logger.debug(f'DOM informative nodes:{len(dom_informative_nodes)}')
503
+ logger.debug(f'Scrollable nodes:{len(scrollable_nodes)}')
504
+
505
+ if use_dom:
506
+ if is_browser:
507
+ return (dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
508
+ else:
509
+ return ([],[],[])
510
+ else:
511
+ interactive_nodes.extend(dom_interactive_nodes)
512
+ return (interactive_nodes,scrollable_nodes,dom_informative_nodes)
513
+ except Exception as e:
514
+ logger.error(f"Error getting nodes for {node.Name}: {e}")
515
+ raise e
516
+ finally:
517
+ comtypes.CoUninitialize()
518
+
519
+ def _on_focus_change(self, sender:'ctypes.POINTER(IUIAutomationElement)'):
520
+ """Handle focus change events."""
521
+ # Debounce duplicate events
522
+ current_time = time()
523
+ element = Control.CreateControlFromElement(sender)
524
+ runtime_id=element.GetRuntimeId()
525
+ event_key = tuple(runtime_id)
526
+ if hasattr(self, '_last_focus_event') and self._last_focus_event:
527
+ last_key, last_time = self._last_focus_event
528
+ if last_key == event_key and (current_time - last_time) < 1.0:
529
+ return None
530
+ self._last_focus_event = (event_key, current_time)
531
+
532
+ try:
533
+ logger.debug(f"[WatchDog] Focus changed to: '{element.Name}' ({element.ControlTypeName})")
534
+ except Exception:
535
+ pass
536
+
537
+ def _on_property_change(self, sender:'ctypes.POINTER(IUIAutomationElement)', propertyId:int, newValue):
538
+ """Handle property change events."""
539
+ try:
540
+ element = Control.CreateControlFromElement(sender)
541
+ logger.debug(f"[WatchDog] Property changed: ID={propertyId} Value={newValue} Element: '{element.Name}' ({element.ControlTypeName})")
542
+ except Exception:
543
+ pass