windows-mcp 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,77 +1,127 @@
1
- from windows_mcp.tree.config import INTERACTIVE_CONTROL_TYPE_NAMES,DOCUMENT_CONTROL_TYPE_NAMES,INFORMATIVE_CONTROL_TYPE_NAMES, DEFAULT_ACTIONS, THREAD_MAX_RETRIES
2
- from windows_mcp.tree.views import TreeElementNode, ScrollElementNode, TextElementNode, Center, BoundingBox, TreeState, DOMInfo
3
- from uiautomation import Control,ImageControl,ScrollPattern,WindowControl,Rect,GetRootControl,PatternId
4
- from concurrent.futures import ThreadPoolExecutor, as_completed
1
+ from windows_mcp.uia import Control,ImageControl,ScrollPattern,WindowControl,Rect,GetRootControl,PatternId,AccessibleRoleNames,PaneControl,GroupControl,StructureChangeType,TreeScope,ControlFromHandle
2
+ from windows_mcp.tree.config import INTERACTIVE_CONTROL_TYPE_NAMES,DOCUMENT_CONTROL_TYPE_NAMES,INFORMATIVE_CONTROL_TYPE_NAMES, DEFAULT_ACTIONS, INTERACTIVE_ROLES, THREAD_MAX_RETRIES
3
+ from windows_mcp.tree.views import TreeElementNode, ScrollElementNode, TextElementNode, Center, BoundingBox, TreeState
4
+ from windows_mcp.tree.cache_utils import CacheRequestFactory,CachedControlHelper
5
5
  from windows_mcp.tree.utils import random_point_within_bounding_box
6
- from PIL import Image, ImageFont, ImageDraw
7
- from typing import TYPE_CHECKING,Optional
8
- from windows_mcp.desktop.views import App
9
- from time import sleep
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
+ from typing import TYPE_CHECKING,Optional,Any
8
+ from time import sleep,time
9
+ import threading
10
10
  import logging
11
11
  import random
12
+ import weakref
13
+ import comtypes
12
14
 
13
15
  logger = logging.getLogger(__name__)
14
16
  logger.setLevel(logging.INFO)
15
- handler = logging.StreamHandler()
16
- formatter = logging.Formatter('[%(levelname)s] %(message)s')
17
- handler.setFormatter(formatter)
18
- logger.addHandler(handler)
19
17
 
20
18
  if TYPE_CHECKING:
21
19
  from windows_mcp.desktop.service import Desktop
22
20
 
23
21
  class Tree:
24
22
  def __init__(self,desktop:'Desktop'):
25
- self.desktop=desktop
26
- self.screen_size=self.desktop.get_screen_size()
27
- self.dom_info:Optional[DOMInfo]=None
23
+ self.desktop=weakref.proxy(desktop)
24
+ self.screen_size=desktop.get_screen_size()
25
+ self.dom:Optional[Control]=None
28
26
  self.dom_bounding_box:BoundingBox=None
29
27
  self.screen_box=BoundingBox(
30
28
  top=0, left=0, bottom=self.screen_size.height, right=self.screen_size.width,
31
- width=self.screen_size.width, height=self.screen_size.height
29
+ width=self.screen_size.width, height=self.screen_size.height
32
30
  )
31
+ self.tree_state=None
33
32
 
34
- def get_state(self,active_app:App,other_apps:list[App],use_dom:bool=False)->TreeState:
35
- root=GetRootControl()
36
- other_apps_handle=set(map(lambda other_app: other_app.handle,other_apps))
37
- apps=list(filter(lambda app:app.NativeWindowHandle not in other_apps_handle,root.GetChildren()))
38
- del other_apps_handle
39
- if active_app:
40
- apps=list(filter(lambda app:app.ClassName!='Progman',apps))
41
- interactive_nodes,scrollable_nodes,dom_informative_nodes=self.get_appwise_nodes(apps=apps,use_dom=use_dom)
42
- return TreeState(dom_info=self.dom_info,interactive_nodes=interactive_nodes,scrollable_nodes=scrollable_nodes,dom_informative_nodes=dom_informative_nodes)
43
33
 
44
- def get_appwise_nodes(self,apps:list[Control],use_dom:bool=False)-> tuple[list[TreeElementNode],list[ScrollElementNode],list[TextElementNode]]:
45
- interactive_nodes, scrollable_nodes,dom_informative_nodes = [], [], []
34
+ def get_state(self,active_app_handle:int|None,other_apps_handles:list[int],use_dom:bool=False)->TreeState:
35
+ # Reset DOM state to prevent leaks and stale data
36
+ self.dom = None
37
+ self.dom_bounding_box = None
38
+ start_time = time()
39
+
40
+ active_app_flag=False
41
+ if active_app_handle:
42
+ active_app_flag=True
43
+ apps_handles=[active_app_handle]+other_apps_handles
44
+ else:
45
+ apps_handles=other_apps_handles
46
+
47
+ interactive_nodes,scrollable_nodes,dom_informative_nodes=self.get_appwise_nodes(apps_handles=apps_handles,active_app_flag=active_app_flag,use_dom=use_dom)
48
+ root_node=TreeElementNode(
49
+ name="Desktop",
50
+ control_type="PaneControl",
51
+ bounding_box=self.screen_box,
52
+ center=self.screen_box.get_center(),
53
+ app_name="Desktop",
54
+ xpath='',
55
+ value='',
56
+ shortcut='',
57
+ is_focused=False
58
+ )
59
+ if self.dom:
60
+ scroll_pattern:ScrollPattern=self.dom.GetPattern(PatternId.ScrollPattern)
61
+ dom_node=ScrollElementNode(
62
+ name="DOM",
63
+ control_type="DocumentControl",
64
+ bounding_box=self.dom_bounding_box,
65
+ center=self.dom_bounding_box.get_center(),
66
+ horizontal_scrollable=scroll_pattern.HorizontallyScrollable if scroll_pattern else False,
67
+ horizontal_scroll_percent=scroll_pattern.HorizontalScrollPercent if scroll_pattern and scroll_pattern.HorizontallyScrollable else 0,
68
+ vertical_scrollable=scroll_pattern.VerticallyScrollable if scroll_pattern else False,
69
+ vertical_scroll_percent=scroll_pattern.VerticalScrollPercent if scroll_pattern and scroll_pattern.VerticallyScrollable else 0,
70
+ xpath='',
71
+ app_name="DOM",
72
+ is_focused=False
73
+ )
74
+ else:
75
+ dom_node=None
76
+ self.tree_state=TreeState(root_node=root_node,dom_node=dom_node,interactive_nodes=interactive_nodes,scrollable_nodes=scrollable_nodes,dom_informative_nodes=dom_informative_nodes)
77
+ end_time = time()
78
+ logger.info(f"Tree State capture took {end_time - start_time:.2f} seconds")
79
+ return self.tree_state
80
+
81
+ def get_appwise_nodes(self,apps_handles:list[int],active_app_flag:bool,use_dom:bool=False) -> tuple[list[TreeElementNode],list[ScrollElementNode],list[TextElementNode]]:
82
+ interactive_nodes, scrollable_nodes, dom_informative_nodes = [], [], []
83
+
84
+ # Pre-calculate browser status in main thread to pass simple types to workers
85
+ task_inputs = []
86
+ for handle in apps_handles:
87
+ is_browser = False
88
+ try:
89
+ # Use temporary control for property check in main thread
90
+ # This is safe as we don't pass this specific COM object to the thread
91
+ temp_node = ControlFromHandle(handle)
92
+ if active_app_flag and temp_node.ClassName=='Progman':
93
+ continue
94
+ is_browser = self.desktop.is_app_browser(temp_node)
95
+ except Exception:
96
+ pass
97
+ task_inputs.append((handle, is_browser))
98
+
46
99
  with ThreadPoolExecutor() as executor:
47
- retry_counts = {app: 0 for app in apps}
48
- future_to_app = {
49
- executor.submit(
50
- self.get_nodes, app,
51
- self.desktop.is_app_browser(app),
52
- use_dom
53
- ): app
54
- for app in apps
100
+ retry_counts = {handle: 0 for handle in apps_handles}
101
+ future_to_handle = {
102
+ executor.submit(self.get_nodes, handle, is_browser, use_dom): handle
103
+ for handle, is_browser in task_inputs
55
104
  }
56
- while future_to_app: # keep running until no pending futures
57
- for future in as_completed(list(future_to_app)):
58
- app = future_to_app.pop(future) # remove completed future
105
+ while future_to_handle: # keep running until no pending futures
106
+ for future in as_completed(list(future_to_handle)):
107
+ handle = future_to_handle.pop(future) # remove completed future
59
108
  try:
60
109
  result = future.result()
61
110
  if result:
62
- element_nodes, scroll_nodes,informative_nodes = result
111
+ element_nodes, scroll_nodes, info_nodes = result
63
112
  interactive_nodes.extend(element_nodes)
64
113
  scrollable_nodes.extend(scroll_nodes)
65
- dom_informative_nodes.extend(informative_nodes)
114
+ dom_informative_nodes.extend(info_nodes)
66
115
  except Exception as e:
67
- retry_counts[app] += 1
68
- logger.debug(f"Error in processing node {app.Name}, retry attempt {retry_counts[app]}\nError: {e}")
69
- if retry_counts[app] < THREAD_MAX_RETRIES:
70
- logger.debug(f"Retrying {app.Name} for the {retry_counts[app]}th time")
71
- new_future = executor.submit(self.get_nodes, app, self.desktop.is_app_browser(app),use_dom)
72
- future_to_app[new_future] = app
116
+ retry_counts[handle] += 1
117
+ logger.debug(f"Error in processing handle {handle}, retry attempt {retry_counts[handle]}\nError: {e}")
118
+ if retry_counts[handle] < THREAD_MAX_RETRIES:
119
+ # Need to find is_browser again for retry
120
+ is_browser = next((ib for h, ib in task_inputs if h == handle), False)
121
+ new_future = executor.submit(self.get_nodes, handle, is_browser, use_dom)
122
+ future_to_handle[new_future] = handle
73
123
  else:
74
- logger.error(f"Task failed completely for {app.Name} after {THREAD_MAX_RETRIES} retries")
124
+ logger.error(f"Task failed completely for handle {handle} after {THREAD_MAX_RETRIES} retries")
75
125
  return interactive_nodes,scrollable_nodes,dom_informative_nodes
76
126
 
77
127
  def iou_bounding_box(self,window_box: Rect,element_box: Rect,) -> BoundingBox:
@@ -109,145 +159,40 @@ class Tree:
109
159
  )
110
160
  return bounding_box
111
161
 
112
- def get_nodes(self, node: Control, is_browser:bool=False,use_dom:bool=False) -> tuple[list[TreeElementNode],list[ScrollElementNode]]:
113
- window_bounding_box=node.BoundingRectangle
114
162
 
115
- def is_element_visible(node:Control,threshold:int=0):
116
- is_control=node.IsControlElement
117
- box=node.BoundingRectangle
118
- if box.isempty():
119
- return False
120
- width=box.width()
121
- height=box.height()
122
- area=width*height
123
- is_offscreen=(not node.IsOffscreen) or node.ControlTypeName in ['EditControl']
124
- return area > threshold and is_offscreen and is_control
125
-
126
- def is_element_enabled(node:Control):
127
- try:
128
- return node.IsEnabled
129
- except Exception:
130
- return False
131
-
132
- def is_default_action(node:Control):
133
- legacy_pattern=node.GetLegacyIAccessiblePattern()
134
- default_action=legacy_pattern.DefaultAction.title()
135
- if default_action in DEFAULT_ACTIONS:
136
- return True
137
- return False
138
-
139
- def is_element_image(node:Control):
140
- if isinstance(node,ImageControl):
141
- if node.LocalizedControlType=='graphic' or not node.IsKeyboardFocusable:
142
- return True
143
- return False
144
-
145
- def is_element_text(node:Control):
146
- try:
147
- if node.ControlTypeName in INFORMATIVE_CONTROL_TYPE_NAMES:
148
- if is_element_visible(node) and is_element_enabled(node) and not is_element_image(node):
149
- return True
150
- except Exception:
151
- return False
152
- return False
153
-
154
- def is_window_modal(node:WindowControl):
155
- try:
156
- window_pattern=node.GetWindowPattern()
157
- return window_pattern.IsModal
158
- except Exception:
159
- return False
160
-
161
- def is_keyboard_focusable(node:Control):
162
- try:
163
- if node.ControlTypeName in set(['EditControl','ButtonControl','CheckBoxControl','RadioButtonControl','TabItemControl']):
164
- return True
165
- return node.IsKeyboardFocusable
166
- except Exception:
167
- return False
168
-
169
- def element_has_child_element(node:Control,control_type:str,child_control_type:str):
170
- if node.LocalizedControlType==control_type:
171
- first_child=node.GetFirstChildControl()
172
- if first_child is None:
173
- return False
174
- return first_child.LocalizedControlType==child_control_type
175
-
176
- def group_has_no_name(node:Control):
177
- try:
178
- if node.ControlTypeName=='GroupControl':
179
- if not node.Name.strip():
180
- return True
181
- return False
182
- except Exception:
183
- return False
184
-
185
- def is_element_scrollable(node:Control):
186
- try:
187
- if (node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|INFORMATIVE_CONTROL_TYPE_NAMES) or node.IsOffscreen:
188
- return False
189
- scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
190
- if scroll_pattern is None:
191
- return False
192
- return scroll_pattern.VerticallyScrollable
193
- except Exception:
194
- return False
195
-
196
- def is_element_interactive(node:Control):
197
- try:
198
- if is_browser and node.ControlTypeName in set(['DataItemControl','ListItemControl']) and not is_keyboard_focusable(node):
199
- return False
200
- elif not is_browser and node.ControlTypeName=="ImageControl" and is_keyboard_focusable(node):
201
- return True
202
- elif node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|DOCUMENT_CONTROL_TYPE_NAMES:
203
- return is_element_visible(node) and is_element_enabled(node) and (not is_element_image(node) or is_keyboard_focusable(node))
204
- elif node.ControlTypeName=='GroupControl':
205
- if is_browser:
206
- return is_element_visible(node) and is_element_enabled(node) and (is_default_action(node) or is_keyboard_focusable(node))
207
- # else:
208
- # return is_element_visible and is_element_enabled(node) and is_default_action(node)
209
- except Exception:
163
+
164
+ def element_has_child_element(self, node:Control,control_type:str,child_control_type:str):
165
+ if node.LocalizedControlType==control_type:
166
+ first_child=node.GetFirstChildControl()
167
+ if first_child is None:
210
168
  return False
211
- return False
212
-
213
- def dom_correction(node:Control):
214
- if element_has_child_element(node,'list item','link') or element_has_child_element(node,'item','link'):
215
- dom_interactive_nodes.pop()
216
- return None
217
- elif node.ControlTypeName=='GroupControl':
218
- dom_interactive_nodes.pop()
219
- if is_keyboard_focusable(node):
220
- child=node
221
- try:
222
- while child.GetFirstChildControl() is not None:
223
- if child.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES:
224
- return None
225
- child=child.GetFirstChildControl()
226
- except Exception:
227
- return None
228
- if child.ControlTypeName!='TextControl':
229
- return None
230
- legacy_pattern=node.GetLegacyIAccessiblePattern()
231
- value=legacy_pattern.Value
232
- element_bounding_box = node.BoundingRectangle
233
- bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
234
- center = bounding_box.get_center()
235
- is_focused=node.HasKeyboardFocus
236
- dom_interactive_nodes.append(TreeElementNode(**{
237
- 'name':child.Name.strip(),
238
- 'control_type':node.LocalizedControlType,
239
- 'value':value,
240
- 'shortcut':node.AcceleratorKey,
241
- 'bounding_box':bounding_box,
242
- 'xpath':'',
243
- 'center':center,
244
- 'app_name':app_name,
245
- 'is_focused':is_focused
246
- }))
247
- elif element_has_child_element(node,'link','heading'):
248
- dom_interactive_nodes.pop()
249
- node=node.GetFirstChildControl()
250
- control_type='link'
169
+ return first_child.LocalizedControlType==child_control_type
170
+
171
+ def _dom_correction(self, node:Control, dom_interactive_nodes:list[TreeElementNode], app_name:str):
172
+ if self.element_has_child_element(node,'list item','link') or self.element_has_child_element(node,'item','link'):
173
+ dom_interactive_nodes.pop()
174
+ return None
175
+ elif node.ControlTypeName=='GroupControl':
176
+ dom_interactive_nodes.pop()
177
+ # Inlined is_keyboard_focusable logic for correction
178
+ control_type_name_check = node.CachedControlTypeName
179
+ is_kb_focusable = False
180
+ if control_type_name_check in set(['EditControl','ButtonControl','CheckBoxControl','RadioButtonControl','TabItemControl']):
181
+ is_kb_focusable = True
182
+ else:
183
+ is_kb_focusable = node.CachedIsKeyboardFocusable
184
+
185
+ if is_kb_focusable:
186
+ child=node
187
+ try:
188
+ while child.GetFirstChildControl() is not None:
189
+ if child.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES:
190
+ return None
191
+ child=child.GetFirstChildControl()
192
+ except Exception:
193
+ return None
194
+ if child.ControlTypeName!='TextControl':
195
+ return None
251
196
  legacy_pattern=node.GetLegacyIAccessiblePattern()
252
197
  value=legacy_pattern.Value
253
198
  element_bounding_box = node.BoundingRectangle
@@ -255,9 +200,9 @@ class Tree:
255
200
  center = bounding_box.get_center()
256
201
  is_focused=node.HasKeyboardFocus
257
202
  dom_interactive_nodes.append(TreeElementNode(**{
258
- 'name':node.Name.strip(),
259
- 'control_type':control_type,
260
- 'value':node.Name.strip(),
203
+ 'name':child.Name.strip(),
204
+ 'control_type':node.LocalizedControlType,
205
+ 'value':value,
261
206
  'shortcut':node.AcceleratorKey,
262
207
  'bounding_box':bounding_box,
263
208
  'xpath':'',
@@ -265,203 +210,334 @@ class Tree:
265
210
  'app_name':app_name,
266
211
  'is_focused':is_focused
267
212
  }))
213
+ elif self.element_has_child_element(node,'link','heading'):
214
+ dom_interactive_nodes.pop()
215
+ node=node.GetFirstChildControl()
216
+ control_type='link'
217
+ legacy_pattern=node.GetLegacyIAccessiblePattern()
218
+ value=legacy_pattern.Value
219
+ element_bounding_box = node.BoundingRectangle
220
+ bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
221
+ center = bounding_box.get_center()
222
+ is_focused=node.HasKeyboardFocus
223
+ dom_interactive_nodes.append(TreeElementNode(**{
224
+ 'name':node.Name.strip(),
225
+ 'control_type':control_type,
226
+ 'value':node.Name.strip(),
227
+ 'shortcut':node.AcceleratorKey,
228
+ 'bounding_box':bounding_box,
229
+ 'xpath':'',
230
+ 'center':center,
231
+ 'app_name':app_name,
232
+ 'is_focused':is_focused
233
+ }))
234
+
235
+ def tree_traversal(self, node: Control, window_bounding_box:Rect, app_name:str, is_browser:bool,
236
+ interactive_nodes:Optional[list[TreeElementNode]]=None, scrollable_nodes:Optional[list[ScrollElementNode]]=None,
237
+ dom_interactive_nodes:Optional[list[TreeElementNode]]=None, dom_informative_nodes:Optional[list[TextElementNode]]=None,
238
+ is_dom:bool=False, is_dialog:bool=False,
239
+ element_cache_req:Optional[Any]=None, children_cache_req:Optional[Any]=None):
240
+ try:
241
+ # Build cached control if caching is enabled
242
+ if not hasattr(node, '_is_cached') and element_cache_req:
243
+ node = CachedControlHelper.build_cached_control(node, element_cache_req)
268
244
 
269
- def tree_traversal(node: Control,is_dom:bool=False,is_dialog:bool=False):
270
245
  # Checks to skip the nodes that are not interactive
271
- if node.IsOffscreen and (node.ControlTypeName not in set(["GroupControl","EditControl","TitleBarControl"])) and node.ClassName not in set(["Popup","Windows.UI.Core.CoreComponentInputSource"]):
272
- return None
246
+ is_offscreen = node.CachedIsOffscreen
247
+ control_type_name = node.CachedControlTypeName
248
+ class_name = node.CachedClassName
273
249
 
274
- if is_element_scrollable(node):
275
- scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
276
- box = node.BoundingRectangle
277
- # Get the center
278
- x,y=random_point_within_bounding_box(node=node,scale_factor=0.8)
279
- center = Center(x=x,y=y)
280
- scrollable_nodes.append(ScrollElementNode(**{
281
- 'name':node.Name.strip() or node.AutomationId or node.LocalizedControlType.capitalize() or "''",
282
- 'app_name':app_name,
283
- 'control_type':node.LocalizedControlType.title(),
284
- 'bounding_box':BoundingBox(**{
285
- 'left':box.left,
286
- 'top':box.top,
287
- 'right':box.right,
288
- 'bottom':box.bottom,
289
- 'width':box.width(),
290
- 'height':box.height()
291
- }),
292
- 'center':center,
293
- 'xpath':'',
294
- 'horizontal_scrollable':scroll_pattern.HorizontallyScrollable,
295
- 'horizontal_scroll_percent':scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
296
- 'vertical_scrollable':scroll_pattern.VerticallyScrollable,
297
- 'vertical_scroll_percent':scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0,
298
- 'is_focused':node.HasKeyboardFocus
299
- }))
300
-
301
- if is_element_interactive(node):
302
- legacy_pattern=node.GetLegacyIAccessiblePattern()
303
- value=legacy_pattern.Value.strip() if legacy_pattern.Value is not None else ""
304
- is_focused=node.HasKeyboardFocus
305
- name=node.Name.strip()
306
- element_bounding_box = node.BoundingRectangle
307
- if is_browser and is_dom:
308
- bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
309
- center = bounding_box.get_center()
310
- tree_node=TreeElementNode(**{
311
- 'name':name,
312
- 'control_type':node.LocalizedControlType.title(),
313
- 'value':value,
314
- 'shortcut':node.AcceleratorKey,
315
- 'bounding_box':bounding_box,
316
- 'center':center,
317
- 'xpath':'',
318
- 'app_name':app_name,
319
- 'is_focused':is_focused
320
- })
321
- dom_interactive_nodes.append(tree_node)
322
- dom_correction(node=node)
323
- else:
324
- bounding_box=self.iou_bounding_box(window_bounding_box,element_bounding_box)
325
- center = bounding_box.get_center()
326
- tree_node=TreeElementNode(**{
327
- 'name':name,
328
- 'control_type':node.LocalizedControlType.title(),
329
- 'value':value,
330
- 'shortcut':node.AcceleratorKey,
331
- 'bounding_box':bounding_box,
332
- 'center':center,
333
- 'xpath':'',
334
- 'app_name':app_name,
335
- 'is_focused':is_focused
336
- })
337
- interactive_nodes.append(tree_node)
338
- elif is_element_text(node):
339
- dom_informative_nodes.append(TextElementNode(
340
- text=node.Name.strip(),
341
- ))
250
+ # Scrollable check
251
+ if scrollable_nodes is not None:
252
+ if (control_type_name not in (INTERACTIVE_CONTROL_TYPE_NAMES|INFORMATIVE_CONTROL_TYPE_NAMES)) and not is_offscreen:
253
+ try:
254
+ scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
255
+ if scroll_pattern and scroll_pattern.VerticallyScrollable:
256
+ box = node.CachedBoundingRectangle
257
+ x,y=random_point_within_bounding_box(node=node,scale_factor=0.8)
258
+ center = Center(x=x,y=y)
259
+ name = node.CachedName
260
+ automation_id = node.CachedAutomationId
261
+ localized_control_type = node.CachedLocalizedControlType
262
+ has_keyboard_focus = node.CachedHasKeyboardFocus
263
+ scrollable_nodes.append(ScrollElementNode(**{
264
+ 'name':name.strip() or automation_id or localized_control_type.capitalize() or "''",
265
+ 'control_type':localized_control_type.title(),
266
+ 'bounding_box':BoundingBox(**{
267
+ 'left':box.left,
268
+ 'top':box.top,
269
+ 'right':box.right,
270
+ 'bottom':box.bottom,
271
+ 'width':box.width(),
272
+ 'height':box.height()
273
+ }),
274
+ 'center':center,
275
+ 'xpath':'',
276
+ 'horizontal_scrollable':scroll_pattern.HorizontallyScrollable,
277
+ 'horizontal_scroll_percent':scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
278
+ 'vertical_scrollable':scroll_pattern.VerticallyScrollable,
279
+ 'vertical_scroll_percent':scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0,
280
+ 'app_name':app_name,
281
+ 'is_focused':has_keyboard_focus
282
+ }))
283
+ except Exception:
284
+ pass
285
+
286
+ # Interactive and Informative checks
287
+ # Pre-calculate common properties
288
+ is_control_element = node.CachedIsControlElement
289
+ element_bounding_box = node.CachedBoundingRectangle
290
+ width = element_bounding_box.width()
291
+ height = element_bounding_box.height()
292
+ area = width * height
293
+
294
+ # Is Visible Check
295
+ is_visible = (area > 0) and (not is_offscreen or control_type_name == 'EditControl') and is_control_element
342
296
 
343
- children=node.GetChildren()
297
+ if is_visible:
298
+ is_enabled = node.CachedIsEnabled
299
+ if is_enabled:
300
+ # Determine is_keyboard_focusable
301
+ if control_type_name in set(['EditControl','ButtonControl','CheckBoxControl','RadioButtonControl','TabItemControl']):
302
+ is_keyboard_focusable = True
303
+ else:
304
+ is_keyboard_focusable = node.CachedIsKeyboardFocusable
305
+
306
+ # Interactive Check
307
+ if interactive_nodes is not None:
308
+ is_interactive = False
309
+ if is_browser and control_type_name in set(['DataItemControl','ListItemControl']) and not is_keyboard_focusable:
310
+ is_interactive = False
311
+ elif not is_browser and control_type_name == "ImageControl" and is_keyboard_focusable:
312
+ is_interactive = True
313
+ elif control_type_name in (INTERACTIVE_CONTROL_TYPE_NAMES|DOCUMENT_CONTROL_TYPE_NAMES):
314
+ # Role check
315
+ try:
316
+ legacy_pattern = node.GetLegacyIAccessiblePattern()
317
+ is_role_interactive = AccessibleRoleNames.get(legacy_pattern.Role, "Default") in INTERACTIVE_ROLES
318
+ except Exception:
319
+ is_role_interactive = False
320
+
321
+ # Image check
322
+ is_image = False
323
+ if control_type_name == 'ImageControl': # approximated
324
+ localized = node.CachedLocalizedControlType
325
+ if localized == 'graphic' or not is_keyboard_focusable:
326
+ is_image = True
327
+
328
+ if is_role_interactive and (not is_image or is_keyboard_focusable):
329
+ is_interactive = True
330
+
331
+ elif control_type_name == 'GroupControl':
332
+ if is_browser:
333
+ try:
334
+ legacy_pattern = node.GetLegacyIAccessiblePattern()
335
+ is_role_interactive = AccessibleRoleNames.get(legacy_pattern.Role, "Default") in INTERACTIVE_ROLES
336
+ except Exception:
337
+ is_role_interactive = False
338
+
339
+ is_default_action = False
340
+ try:
341
+ legacy_pattern = node.GetLegacyIAccessiblePattern()
342
+ if legacy_pattern.DefaultAction.title() in DEFAULT_ACTIONS:
343
+ is_default_action = True
344
+ except: pass
345
+
346
+ if is_role_interactive and (is_default_action or is_keyboard_focusable):
347
+ is_interactive = True
348
+
349
+ if is_interactive:
350
+ legacy_pattern=node.GetLegacyIAccessiblePattern()
351
+ value=legacy_pattern.Value.strip() if legacy_pattern.Value is not None else ""
352
+ is_focused = node.CachedHasKeyboardFocus
353
+ name = node.CachedName.strip()
354
+ localized_control_type = node.CachedLocalizedControlType
355
+ accelerator_key = node.CachedAcceleratorKey
356
+
357
+ if is_browser and is_dom:
358
+ bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
359
+ center = bounding_box.get_center()
360
+ tree_node=TreeElementNode(**{
361
+ 'name':name,
362
+ 'control_type':localized_control_type.title(),
363
+ 'value':value,
364
+ 'shortcut':accelerator_key,
365
+ 'bounding_box':bounding_box,
366
+ 'center':center,
367
+ 'xpath':'',
368
+ 'app_name':app_name,
369
+ 'is_focused':is_focused
370
+ })
371
+ dom_interactive_nodes.append(tree_node)
372
+ self._dom_correction(node, dom_interactive_nodes, app_name)
373
+ else:
374
+ bounding_box=self.iou_bounding_box(window_bounding_box,element_bounding_box)
375
+ center = bounding_box.get_center()
376
+ tree_node=TreeElementNode(**{
377
+ 'name':name,
378
+ 'control_type':localized_control_type.title(),
379
+ 'value':value,
380
+ 'shortcut':accelerator_key,
381
+ 'bounding_box':bounding_box,
382
+ 'center':center,
383
+ 'xpath':'',
384
+ 'app_name':app_name,
385
+ 'is_focused':is_focused
386
+ })
387
+ interactive_nodes.append(tree_node)
388
+
389
+ # Informative Check
390
+ if dom_informative_nodes is not None:
391
+ # is_element_text check
392
+ is_text = False
393
+ if control_type_name in INFORMATIVE_CONTROL_TYPE_NAMES:
394
+ # is_element_image check
395
+ is_image_check = False
396
+ if control_type_name == 'ImageControl':
397
+ localized = node.CachedLocalizedControlType
398
+
399
+ # Check keybord focusable again if not established, but reuse
400
+ if not is_keyboard_focusable:
401
+ # If localized is graphic OR not focusable -> image
402
+ # wait, is_element_image: if localized=='graphic' or not focusable -> True
403
+ if localized == 'graphic':
404
+ is_image_check = True
405
+ else:
406
+ is_image_check = True # not focusable
407
+ elif localized == 'graphic':
408
+ is_image_check = True
344
409
 
410
+ if not is_image_check:
411
+ is_text = True
412
+
413
+ if is_text:
414
+ if is_browser and is_dom:
415
+ name = node.CachedName
416
+ dom_informative_nodes.append(TextElementNode(
417
+ text=name.strip(),
418
+ ))
419
+
420
+ # Phase 3: Cached Children Retrieval
421
+ children = CachedControlHelper.get_cached_children(node, children_cache_req)
422
+
345
423
  # Recursively traverse the tree the right to left for normal apps and for DOM traverse from left to right
346
424
  for child in (children if is_dom else children[::-1]):
347
425
  # Incrementally building the xpath
348
426
 
349
427
  # Check if the child is a DOM element
350
- if is_browser and child.AutomationId == "RootWebArea":
351
- bounding_box=child.BoundingRectangle
428
+ if is_browser and child.CachedAutomationId=="RootWebArea":
429
+ bounding_box=child.CachedBoundingRectangle
352
430
  self.dom_bounding_box=BoundingBox(left=bounding_box.left,top=bounding_box.top,
353
431
  right=bounding_box.right,bottom=bounding_box.bottom,width=bounding_box.width(),
354
432
  height=bounding_box.height())
355
- scroll_pattern=child.GetPattern(PatternId.ScrollPattern)
356
- self.dom_info=DOMInfo(
357
- horizontal_scrollable=scroll_pattern.HorizontallyScrollable,
358
- horizontal_scroll_percent=scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
359
- vertical_scrollable=scroll_pattern.VerticallyScrollable,
360
- vertical_scroll_percent=scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0
361
- )
433
+ self.dom=child
362
434
  # enter DOM subtree
363
- tree_traversal(child, is_dom=True, is_dialog=is_dialog)
435
+ self.tree_traversal(child, window_bounding_box, app_name, is_browser, interactive_nodes, scrollable_nodes, dom_interactive_nodes, dom_informative_nodes, is_dom=True, is_dialog=is_dialog, element_cache_req=element_cache_req, children_cache_req=children_cache_req)
364
436
  # Check if the child is a dialog
365
437
  elif isinstance(child,WindowControl):
366
- if not child.IsOffscreen:
438
+ if not child.CachedIsOffscreen:
367
439
  if is_dom:
368
- bounding_box=child.BoundingRectangle
440
+ bounding_box=child.CachedBoundingRectangle
369
441
  if bounding_box.width() > 0.8*self.dom_bounding_box.width:
370
442
  # Because this window element covers the majority of the screen
371
443
  dom_interactive_nodes.clear()
372
444
  else:
373
- if is_window_modal(child):
445
+ # Inline is_window_modal
446
+ is_modal = False
447
+ try:
448
+ window_pattern = child.GetWindowPattern()
449
+ is_modal = window_pattern.IsModal
450
+ except Exception:
451
+ pass
452
+
453
+ if is_modal:
374
454
  # Because this window element is modal
375
455
  interactive_nodes.clear()
376
456
  # enter dialog subtree
377
- tree_traversal(child, is_dom=is_dom, is_dialog=True)
457
+ self.tree_traversal(child, window_bounding_box, app_name, is_browser, interactive_nodes, scrollable_nodes, dom_interactive_nodes, dom_informative_nodes, is_dom=is_dom, is_dialog=True, element_cache_req=element_cache_req, children_cache_req=children_cache_req)
378
458
  else:
379
459
  # normal non-dialog children
380
- tree_traversal(child, is_dom=is_dom, is_dialog=is_dialog)
460
+ self.tree_traversal(child, window_bounding_box, app_name, is_browser, interactive_nodes, scrollable_nodes, dom_interactive_nodes, dom_informative_nodes, is_dom=is_dom, is_dialog=is_dialog, element_cache_req=element_cache_req, children_cache_req=children_cache_req)
461
+ except Exception as e:
462
+ logger.error(f"Error in tree_traversal: {e}", exc_info=True)
463
+ raise
381
464
 
382
- interactive_nodes, dom_interactive_nodes, scrollable_nodes, dom_informative_nodes = [], [], [], []
383
- app_name=node.Name.strip()
384
- match node.ClassName:
465
+ def app_name_correction(self,app_name:str)->str:
466
+ match app_name:
385
467
  case "Progman":
386
- app_name="Desktop"
468
+ return "Desktop"
387
469
  case 'Shell_TrayWnd'|'Shell_SecondaryTrayWnd':
388
- app_name="Taskbar"
470
+ return "Taskbar"
389
471
  case 'Microsoft.UI.Content.PopupWindowSiteBridge':
390
- app_name="Context Menu"
472
+ return "Context Menu"
391
473
  case _:
392
- pass
393
- tree_traversal(node,is_dom=False,is_dialog=False)
474
+ return app_name
475
+
476
+ def get_nodes(self, handle: int, is_browser:bool=False, use_dom:bool=False) -> tuple[list[TreeElementNode],list[ScrollElementNode],list[TextElementNode]]:
477
+ try:
478
+ comtypes.CoInitialize()
479
+ # Rehydrate Control from handle within the thread's COM context
480
+ node = ControlFromHandle(handle)
481
+ if not node:
482
+ raise Exception("Failed to create Control from handle")
483
+
484
+ # Create fresh cache requests for this traversal session
485
+ element_cache_req = CacheRequestFactory.create_tree_traversal_cache()
486
+ element_cache_req.TreeScope = TreeScope.TreeScope_Element
487
+
488
+ children_cache_req = CacheRequestFactory.create_tree_traversal_cache()
489
+ children_cache_req.TreeScope = TreeScope.TreeScope_Element | TreeScope.TreeScope_Children
394
490
 
395
- logger.debug(f'Interactive nodes:{len(interactive_nodes)}')
396
- logger.debug(f'DOM interactive nodes:{len(dom_interactive_nodes)}')
397
- logger.debug(f'Scrollable nodes:{len(scrollable_nodes)}')
491
+ window_bounding_box=node.BoundingRectangle
492
+
493
+ interactive_nodes, dom_interactive_nodes, dom_informative_nodes, scrollable_nodes = [], [], [], []
494
+ app_name=node.Name.strip()
495
+ app_name=self.app_name_correction(app_name)
398
496
 
399
- if use_dom:
497
+ self.tree_traversal(node, window_bounding_box, app_name, is_browser, interactive_nodes, scrollable_nodes, dom_interactive_nodes, dom_informative_nodes, is_dom=False, is_dialog=False, element_cache_req=element_cache_req, children_cache_req=children_cache_req)
498
+ logger.debug(f'App name:{app_name}')
499
+ logger.debug(f'Interactive nodes:{len(interactive_nodes)}')
400
500
  if is_browser:
401
- return (dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
501
+ logger.debug(f'DOM interactive nodes:{len(dom_interactive_nodes)}')
502
+ logger.debug(f'DOM informative nodes:{len(dom_informative_nodes)}')
503
+ logger.debug(f'Scrollable nodes:{len(scrollable_nodes)}')
504
+
505
+ if use_dom:
506
+ if is_browser:
507
+ return (dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
508
+ else:
509
+ return ([],[],[])
402
510
  else:
403
- return ([],[],[])
404
- else:
405
- return (interactive_nodes+dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
406
-
407
- def get_annotated_screenshot(self, nodes: list[TreeElementNode],scale:float=1.0) -> Image.Image:
408
- screenshot = self.desktop.get_screenshot()
409
- sleep(0.10)
410
-
411
- original_width = screenshot.width
412
- original_height = screenshot.height
511
+ interactive_nodes.extend(dom_interactive_nodes)
512
+ return (interactive_nodes,scrollable_nodes,dom_informative_nodes)
513
+ except Exception as e:
514
+ logger.error(f"Error getting nodes for {node.Name}: {e}")
515
+ raise e
516
+ finally:
517
+ comtypes.CoUninitialize()
413
518
 
414
- scaled_width = int(original_width * scale)
415
- scaled_height = int(original_height * scale)
416
- screenshot = screenshot.resize((scaled_width, scaled_height), Image.Resampling.LANCZOS)
417
-
418
- # Add padding
419
- padding = 5
420
- width = int(screenshot.width + (1.5 * padding))
421
- height = int(screenshot.height + (1.5 * padding))
422
- padded_screenshot = Image.new("RGB", (width, height), color=(255, 255, 255))
423
- padded_screenshot.paste(screenshot, (padding, padding))
519
+ def _on_focus_change(self, sender:'ctypes.POINTER(IUIAutomationElement)'):
520
+ """Handle focus change events."""
521
+ # Debounce duplicate events
522
+ current_time = time()
523
+ element = Control.CreateControlFromElement(sender)
524
+ runtime_id=element.GetRuntimeId()
525
+ event_key = tuple(runtime_id)
526
+ if hasattr(self, '_last_focus_event') and self._last_focus_event:
527
+ last_key, last_time = self._last_focus_event
528
+ if last_key == event_key and (current_time - last_time) < 1.0:
529
+ return None
530
+ self._last_focus_event = (event_key, current_time)
424
531
 
425
- draw = ImageDraw.Draw(padded_screenshot)
426
- font_size = 12
427
532
  try:
428
- font = ImageFont.truetype('arial.ttf', font_size)
429
- except IOError:
430
- font = ImageFont.load_default()
431
-
432
- def get_random_color():
433
- return "#{:06x}".format(random.randint(0, 0xFFFFFF))
434
-
435
- def draw_annotation(label, node: TreeElementNode):
436
- box = node.bounding_box
437
- color = get_random_color()
438
-
439
- # Scale and pad the bounding box coordinates
440
- adjusted_box = (
441
- int(box.left * scale) + padding,
442
- int(box.top * scale) + padding,
443
- int(box.right * scale) + padding,
444
- int(box.bottom * scale) + padding
445
- )
446
- # Draw bounding box
447
- draw.rectangle(adjusted_box, outline=color, width=2)
448
-
449
- # Label dimensions
450
- label_width = draw.textlength(str(label), font=font)
451
- label_height = font_size
452
- left, top, right, bottom = adjusted_box
453
-
454
- # Label position above bounding box
455
- label_x1 = right - label_width
456
- label_y1 = top - label_height - 4
457
- label_x2 = label_x1 + label_width
458
- label_y2 = label_y1 + label_height + 4
533
+ logger.debug(f"[WatchDog] Focus changed to: '{element.Name}' ({element.ControlTypeName})")
534
+ except Exception:
535
+ pass
459
536
 
460
- # Draw label background and text
461
- draw.rectangle([(label_x1, label_y1), (label_x2, label_y2)], fill=color)
462
- draw.text((label_x1 + 2, label_y1 + 2), str(label), fill=(255, 255, 255), font=font)
463
-
464
- # Draw annotations in parallel
465
- with ThreadPoolExecutor() as executor:
466
- executor.map(draw_annotation, range(len(nodes)), nodes)
467
- return padded_screenshot
537
+ def _on_property_change(self, sender:'ctypes.POINTER(IUIAutomationElement)', propertyId:int, newValue):
538
+ """Handle property change events."""
539
+ try:
540
+ element = Control.CreateControlFromElement(sender)
541
+ logger.debug(f"[WatchDog] Property changed: ID={propertyId} Value={newValue} Element: '{element.Name}' ({element.ControlTypeName})")
542
+ except Exception:
543
+ pass