windows-mcp 0.5.7__py3-none-any.whl → 0.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- windows_mcp/__main__.py +314 -312
- windows_mcp/analytics.py +175 -171
- windows_mcp/desktop/config.py +20 -20
- windows_mcp/desktop/service.py +457 -457
- windows_mcp/desktop/views.py +57 -57
- windows_mcp/tree/config.py +50 -50
- windows_mcp/tree/service.py +600 -466
- windows_mcp/tree/utils.py +21 -21
- windows_mcp/tree/views.py +115 -115
- windows_mcp/uia/__init__.py +4 -0
- windows_mcp/uia/controls.py +4781 -0
- windows_mcp/uia/core.py +3269 -0
- windows_mcp/uia/enums.py +1963 -0
- windows_mcp/uia/events.py +83 -0
- windows_mcp/uia/patterns.py +2106 -0
- windows_mcp/watchdog/__init__.py +1 -0
- windows_mcp/watchdog/event_handlers.py +51 -0
- windows_mcp/watchdog/service.py +188 -0
- {windows_mcp-0.5.7.dist-info → windows_mcp-0.5.8.dist-info}/METADATA +4 -4
- windows_mcp-0.5.8.dist-info/RECORD +26 -0
- windows_mcp-0.5.7.dist-info/RECORD +0 -17
- {windows_mcp-0.5.7.dist-info → windows_mcp-0.5.8.dist-info}/WHEEL +0 -0
- {windows_mcp-0.5.7.dist-info → windows_mcp-0.5.8.dist-info}/entry_points.txt +0 -0
- {windows_mcp-0.5.7.dist-info → windows_mcp-0.5.8.dist-info}/licenses/LICENSE.md +0 -0
windows_mcp/tree/service.py
CHANGED
|
@@ -1,467 +1,601 @@
|
|
|
1
|
-
from windows_mcp.tree.config import INTERACTIVE_CONTROL_TYPE_NAMES,DOCUMENT_CONTROL_TYPE_NAMES,INFORMATIVE_CONTROL_TYPE_NAMES, DEFAULT_ACTIONS, THREAD_MAX_RETRIES
|
|
2
|
-
from windows_mcp.tree.views import TreeElementNode, ScrollElementNode, TextElementNode, Center, BoundingBox, TreeState, DOMInfo
|
|
3
|
-
from
|
|
4
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
-
from windows_mcp.tree.utils import random_point_within_bounding_box
|
|
6
|
-
from PIL import Image, ImageFont, ImageDraw
|
|
7
|
-
from typing import TYPE_CHECKING,Optional
|
|
8
|
-
from windows_mcp.desktop.views import App
|
|
9
|
-
from time import sleep
|
|
10
|
-
import logging
|
|
11
|
-
import random
|
|
12
|
-
|
|
13
|
-
logger = logging.getLogger(__name__)
|
|
14
|
-
logger.setLevel(logging.INFO)
|
|
15
|
-
handler = logging.StreamHandler()
|
|
16
|
-
formatter = logging.Formatter('[%(levelname)s] %(message)s')
|
|
17
|
-
handler.setFormatter(formatter)
|
|
18
|
-
logger.addHandler(handler)
|
|
19
|
-
|
|
20
|
-
if TYPE_CHECKING:
|
|
21
|
-
from windows_mcp.desktop.service import Desktop
|
|
22
|
-
|
|
23
|
-
class Tree:
|
|
24
|
-
def __init__(self,desktop:'Desktop'):
|
|
25
|
-
self.desktop=desktop
|
|
26
|
-
self.screen_size=self.desktop.get_screen_size()
|
|
27
|
-
self.dom_info:Optional[DOMInfo]=None
|
|
28
|
-
self.dom_bounding_box:BoundingBox=None
|
|
29
|
-
self.screen_box=BoundingBox(
|
|
30
|
-
top=0, left=0, bottom=self.screen_size.height, right=self.screen_size.width,
|
|
31
|
-
width=self.screen_size.width, height=self.screen_size.height
|
|
32
|
-
)
|
|
33
|
-
|
|
34
|
-
def get_state(self,active_app:App,other_apps:list[App],use_dom:bool=False)->TreeState:
|
|
35
|
-
root=GetRootControl()
|
|
36
|
-
other_apps_handle=set(map(lambda other_app: other_app.handle,other_apps))
|
|
37
|
-
apps=list(filter(lambda app:app.NativeWindowHandle not in other_apps_handle,root.GetChildren()))
|
|
38
|
-
del other_apps_handle
|
|
39
|
-
if active_app:
|
|
40
|
-
apps=list(filter(lambda app:app.ClassName!='Progman',apps))
|
|
41
|
-
interactive_nodes,scrollable_nodes,dom_informative_nodes=self.get_appwise_nodes(apps=apps,use_dom=use_dom)
|
|
42
|
-
return TreeState(dom_info=self.dom_info,interactive_nodes=interactive_nodes,scrollable_nodes=scrollable_nodes,dom_informative_nodes=dom_informative_nodes)
|
|
43
|
-
|
|
44
|
-
def get_appwise_nodes(self,apps:list[Control],use_dom:bool=False)-> tuple[list[TreeElementNode],list[ScrollElementNode],list[TextElementNode]]:
|
|
45
|
-
interactive_nodes, scrollable_nodes,dom_informative_nodes = [], [], []
|
|
46
|
-
with ThreadPoolExecutor() as executor:
|
|
47
|
-
retry_counts = {app: 0 for app in apps}
|
|
48
|
-
future_to_app = {
|
|
49
|
-
executor.submit(
|
|
50
|
-
self.get_nodes, app,
|
|
51
|
-
self.desktop.is_app_browser(app),
|
|
52
|
-
use_dom
|
|
53
|
-
): app
|
|
54
|
-
for app in apps
|
|
55
|
-
}
|
|
56
|
-
while future_to_app: # keep running until no pending futures
|
|
57
|
-
for future in as_completed(list(future_to_app)):
|
|
58
|
-
app = future_to_app.pop(future) # remove completed future
|
|
59
|
-
try:
|
|
60
|
-
result = future.result()
|
|
61
|
-
if result:
|
|
62
|
-
element_nodes, scroll_nodes,informative_nodes = result
|
|
63
|
-
interactive_nodes.extend(element_nodes)
|
|
64
|
-
scrollable_nodes.extend(scroll_nodes)
|
|
65
|
-
dom_informative_nodes.extend(informative_nodes)
|
|
66
|
-
except Exception as e:
|
|
67
|
-
retry_counts[app] += 1
|
|
68
|
-
logger.debug(f"Error in processing node {app.Name}, retry attempt {retry_counts[app]}\nError: {e}")
|
|
69
|
-
if retry_counts[app] < THREAD_MAX_RETRIES:
|
|
70
|
-
logger.debug(f"Retrying {app.Name} for the {retry_counts[app]}th time")
|
|
71
|
-
new_future = executor.submit(self.get_nodes, app, self.desktop.is_app_browser(app),use_dom)
|
|
72
|
-
future_to_app[new_future] = app
|
|
73
|
-
else:
|
|
74
|
-
logger.error(f"Task failed completely for {app.Name} after {THREAD_MAX_RETRIES} retries")
|
|
75
|
-
return interactive_nodes,scrollable_nodes,dom_informative_nodes
|
|
76
|
-
|
|
77
|
-
def iou_bounding_box(self,window_box: Rect,element_box: Rect,) -> BoundingBox:
|
|
78
|
-
# Step 1: Intersection of element and window (existing logic)
|
|
79
|
-
intersection_left = max(window_box.left, element_box.left)
|
|
80
|
-
intersection_top = max(window_box.top, element_box.top)
|
|
81
|
-
intersection_right = min(window_box.right, element_box.right)
|
|
82
|
-
intersection_bottom = min(window_box.bottom, element_box.bottom)
|
|
83
|
-
|
|
84
|
-
# Step 2: Clamp to screen boundaries (new addition)
|
|
85
|
-
intersection_left = max(self.screen_box.left, intersection_left)
|
|
86
|
-
intersection_top = max(self.screen_box.top, intersection_top)
|
|
87
|
-
intersection_right = min(self.screen_box.right, intersection_right)
|
|
88
|
-
intersection_bottom = min(self.screen_box.bottom, intersection_bottom)
|
|
89
|
-
|
|
90
|
-
# Step 3: Validate intersection
|
|
91
|
-
if (intersection_right > intersection_left and intersection_bottom > intersection_top):
|
|
92
|
-
bounding_box = BoundingBox(
|
|
93
|
-
left=intersection_left,
|
|
94
|
-
top=intersection_top,
|
|
95
|
-
right=intersection_right,
|
|
96
|
-
bottom=intersection_bottom,
|
|
97
|
-
width=intersection_right - intersection_left,
|
|
98
|
-
height=intersection_bottom - intersection_top
|
|
99
|
-
)
|
|
100
|
-
else:
|
|
101
|
-
# No valid visible intersection (either outside window or screen)
|
|
102
|
-
bounding_box = BoundingBox(
|
|
103
|
-
left=0,
|
|
104
|
-
top=0,
|
|
105
|
-
right=0,
|
|
106
|
-
bottom=0,
|
|
107
|
-
width=0,
|
|
108
|
-
height=0
|
|
109
|
-
)
|
|
110
|
-
return bounding_box
|
|
111
|
-
|
|
112
|
-
def get_nodes(self, node: Control, is_browser:bool=False,use_dom:bool=False) -> tuple[list[TreeElementNode],list[ScrollElementNode]]:
|
|
113
|
-
window_bounding_box=node.BoundingRectangle
|
|
114
|
-
|
|
115
|
-
def is_element_visible(node:Control,threshold:int=0):
|
|
116
|
-
is_control=node.IsControlElement
|
|
117
|
-
box=node.BoundingRectangle
|
|
118
|
-
if box.isempty():
|
|
119
|
-
return False
|
|
120
|
-
width=box.width()
|
|
121
|
-
height=box.height()
|
|
122
|
-
area=width*height
|
|
123
|
-
is_offscreen=(not node.IsOffscreen) or node.ControlTypeName in ['EditControl']
|
|
124
|
-
return area > threshold and is_offscreen and is_control
|
|
125
|
-
|
|
126
|
-
def is_element_enabled(node:Control):
|
|
127
|
-
try:
|
|
128
|
-
return node.IsEnabled
|
|
129
|
-
except Exception:
|
|
130
|
-
return False
|
|
131
|
-
|
|
132
|
-
def is_default_action(node:Control):
|
|
133
|
-
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
134
|
-
default_action=legacy_pattern.DefaultAction.title()
|
|
135
|
-
if default_action in DEFAULT_ACTIONS:
|
|
136
|
-
return True
|
|
137
|
-
return False
|
|
138
|
-
|
|
139
|
-
def is_element_image(node:Control):
|
|
140
|
-
if isinstance(node,ImageControl):
|
|
141
|
-
if node.LocalizedControlType=='graphic' or not node.IsKeyboardFocusable:
|
|
142
|
-
return True
|
|
143
|
-
return False
|
|
144
|
-
|
|
145
|
-
def is_element_text(node:Control):
|
|
146
|
-
try:
|
|
147
|
-
if node.ControlTypeName in INFORMATIVE_CONTROL_TYPE_NAMES:
|
|
148
|
-
if is_element_visible(node) and is_element_enabled(node) and not is_element_image(node):
|
|
149
|
-
return True
|
|
150
|
-
except Exception:
|
|
151
|
-
return False
|
|
152
|
-
return False
|
|
153
|
-
|
|
154
|
-
def is_window_modal(node:WindowControl):
|
|
155
|
-
try:
|
|
156
|
-
window_pattern=node.GetWindowPattern()
|
|
157
|
-
return window_pattern.IsModal
|
|
158
|
-
except Exception:
|
|
159
|
-
return False
|
|
160
|
-
|
|
161
|
-
def is_keyboard_focusable(node:Control):
|
|
162
|
-
try:
|
|
163
|
-
if node.ControlTypeName in set(['EditControl','ButtonControl','CheckBoxControl','RadioButtonControl','TabItemControl']):
|
|
164
|
-
return True
|
|
165
|
-
return node.IsKeyboardFocusable
|
|
166
|
-
except Exception:
|
|
167
|
-
return False
|
|
168
|
-
|
|
169
|
-
def element_has_child_element(node:Control,control_type:str,child_control_type:str):
|
|
170
|
-
if node.LocalizedControlType==control_type:
|
|
171
|
-
first_child=node.GetFirstChildControl()
|
|
172
|
-
if first_child is None:
|
|
173
|
-
return False
|
|
174
|
-
return first_child.LocalizedControlType==child_control_type
|
|
175
|
-
|
|
176
|
-
def group_has_no_name(node:Control):
|
|
177
|
-
try:
|
|
178
|
-
if node.ControlTypeName=='GroupControl':
|
|
179
|
-
if not node.Name.strip():
|
|
180
|
-
return True
|
|
181
|
-
return False
|
|
182
|
-
except Exception:
|
|
183
|
-
return False
|
|
184
|
-
|
|
185
|
-
def is_element_scrollable(node:Control):
|
|
186
|
-
try:
|
|
187
|
-
if (node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|INFORMATIVE_CONTROL_TYPE_NAMES) or node.IsOffscreen:
|
|
188
|
-
return False
|
|
189
|
-
scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
|
|
190
|
-
if scroll_pattern is None:
|
|
191
|
-
return False
|
|
192
|
-
return scroll_pattern.VerticallyScrollable
|
|
193
|
-
except Exception:
|
|
194
|
-
return False
|
|
195
|
-
|
|
196
|
-
def is_element_interactive(node:Control):
|
|
197
|
-
try:
|
|
198
|
-
if is_browser and node.ControlTypeName in set(['DataItemControl','ListItemControl']) and not is_keyboard_focusable(node):
|
|
199
|
-
return False
|
|
200
|
-
elif not is_browser and node.ControlTypeName=="ImageControl" and is_keyboard_focusable(node):
|
|
201
|
-
return True
|
|
202
|
-
elif node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|DOCUMENT_CONTROL_TYPE_NAMES:
|
|
203
|
-
return is_element_visible(node) and is_element_enabled(node) and (not is_element_image(node) or is_keyboard_focusable(node))
|
|
204
|
-
elif node.ControlTypeName=='GroupControl':
|
|
205
|
-
if is_browser:
|
|
206
|
-
return is_element_visible(node) and is_element_enabled(node) and (is_default_action(node) or is_keyboard_focusable(node))
|
|
207
|
-
# else:
|
|
208
|
-
# return is_element_visible and is_element_enabled(node) and is_default_action(node)
|
|
209
|
-
except Exception:
|
|
210
|
-
return False
|
|
211
|
-
return False
|
|
212
|
-
|
|
213
|
-
def dom_correction(node:Control):
|
|
214
|
-
if element_has_child_element(node,'list item','link') or element_has_child_element(node,'item','link'):
|
|
215
|
-
dom_interactive_nodes.pop()
|
|
216
|
-
return None
|
|
217
|
-
elif node.ControlTypeName=='GroupControl':
|
|
218
|
-
dom_interactive_nodes.pop()
|
|
219
|
-
if is_keyboard_focusable(node):
|
|
220
|
-
child=node
|
|
221
|
-
try:
|
|
222
|
-
while child.GetFirstChildControl() is not None:
|
|
223
|
-
if child.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES:
|
|
224
|
-
return None
|
|
225
|
-
child=child.GetFirstChildControl()
|
|
226
|
-
except Exception:
|
|
227
|
-
return None
|
|
228
|
-
if child.ControlTypeName!='TextControl':
|
|
229
|
-
return None
|
|
230
|
-
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
231
|
-
value=legacy_pattern.Value
|
|
232
|
-
element_bounding_box = node.BoundingRectangle
|
|
233
|
-
bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
|
|
234
|
-
center = bounding_box.get_center()
|
|
235
|
-
is_focused=node.HasKeyboardFocus
|
|
236
|
-
dom_interactive_nodes.append(TreeElementNode(**{
|
|
237
|
-
'name':child.Name.strip(),
|
|
238
|
-
'control_type':node.LocalizedControlType,
|
|
239
|
-
'value':value,
|
|
240
|
-
'shortcut':node.AcceleratorKey,
|
|
241
|
-
'bounding_box':bounding_box,
|
|
242
|
-
'xpath':'',
|
|
243
|
-
'center':center,
|
|
244
|
-
'app_name':app_name,
|
|
245
|
-
'is_focused':is_focused
|
|
246
|
-
}))
|
|
247
|
-
elif element_has_child_element(node,'link','heading'):
|
|
248
|
-
dom_interactive_nodes.pop()
|
|
249
|
-
node=node.GetFirstChildControl()
|
|
250
|
-
control_type='link'
|
|
251
|
-
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
252
|
-
value=legacy_pattern.Value
|
|
253
|
-
element_bounding_box = node.BoundingRectangle
|
|
254
|
-
bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
|
|
255
|
-
center = bounding_box.get_center()
|
|
256
|
-
is_focused=node.HasKeyboardFocus
|
|
257
|
-
dom_interactive_nodes.append(TreeElementNode(**{
|
|
258
|
-
'name':node.Name.strip(),
|
|
259
|
-
'control_type':control_type,
|
|
260
|
-
'value':node.Name.strip(),
|
|
261
|
-
'shortcut':node.AcceleratorKey,
|
|
262
|
-
'bounding_box':bounding_box,
|
|
263
|
-
'xpath':'',
|
|
264
|
-
'center':center,
|
|
265
|
-
'app_name':app_name,
|
|
266
|
-
'is_focused':is_focused
|
|
267
|
-
}))
|
|
268
|
-
|
|
269
|
-
def tree_traversal(node: Control,is_dom:bool=False,is_dialog:bool=False):
|
|
270
|
-
# Checks to skip the nodes that are not interactive
|
|
271
|
-
if node.IsOffscreen and (node.ControlTypeName not in set(["GroupControl","EditControl","TitleBarControl"])) and node.ClassName not in set(["Popup","Windows.UI.Core.CoreComponentInputSource"]):
|
|
272
|
-
return None
|
|
273
|
-
|
|
274
|
-
if is_element_scrollable(node):
|
|
275
|
-
scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
|
|
276
|
-
box = node.BoundingRectangle
|
|
277
|
-
# Get the center
|
|
278
|
-
x,y=random_point_within_bounding_box(node=node,scale_factor=0.8)
|
|
279
|
-
center = Center(x=x,y=y)
|
|
280
|
-
scrollable_nodes.append(ScrollElementNode(**{
|
|
281
|
-
'name':node.Name.strip() or node.AutomationId or node.LocalizedControlType.capitalize() or "''",
|
|
282
|
-
'app_name':app_name,
|
|
283
|
-
'control_type':node.LocalizedControlType.title(),
|
|
284
|
-
'bounding_box':BoundingBox(**{
|
|
285
|
-
'left':box.left,
|
|
286
|
-
'top':box.top,
|
|
287
|
-
'right':box.right,
|
|
288
|
-
'bottom':box.bottom,
|
|
289
|
-
'width':box.width(),
|
|
290
|
-
'height':box.height()
|
|
291
|
-
}),
|
|
292
|
-
'center':center,
|
|
293
|
-
'xpath':'',
|
|
294
|
-
'horizontal_scrollable':scroll_pattern.HorizontallyScrollable,
|
|
295
|
-
'horizontal_scroll_percent':scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
|
|
296
|
-
'vertical_scrollable':scroll_pattern.VerticallyScrollable,
|
|
297
|
-
'vertical_scroll_percent':scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0,
|
|
298
|
-
'is_focused':node.HasKeyboardFocus
|
|
299
|
-
}))
|
|
300
|
-
|
|
301
|
-
if is_element_interactive(node):
|
|
302
|
-
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
303
|
-
value=legacy_pattern.Value.strip() if legacy_pattern.Value is not None else ""
|
|
304
|
-
is_focused=node.HasKeyboardFocus
|
|
305
|
-
name=node.Name.strip()
|
|
306
|
-
element_bounding_box = node.BoundingRectangle
|
|
307
|
-
if is_browser and is_dom:
|
|
308
|
-
bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
|
|
309
|
-
center = bounding_box.get_center()
|
|
310
|
-
tree_node=TreeElementNode(**{
|
|
311
|
-
'name':name,
|
|
312
|
-
'control_type':node.LocalizedControlType.title(),
|
|
313
|
-
'value':value,
|
|
314
|
-
'shortcut':node.AcceleratorKey,
|
|
315
|
-
'bounding_box':bounding_box,
|
|
316
|
-
'center':center,
|
|
317
|
-
'xpath':'',
|
|
318
|
-
'app_name':app_name,
|
|
319
|
-
'is_focused':is_focused
|
|
320
|
-
})
|
|
321
|
-
dom_interactive_nodes.append(tree_node)
|
|
322
|
-
dom_correction(node=node)
|
|
323
|
-
else:
|
|
324
|
-
bounding_box=self.iou_bounding_box(window_bounding_box,element_bounding_box)
|
|
325
|
-
center = bounding_box.get_center()
|
|
326
|
-
tree_node=TreeElementNode(**{
|
|
327
|
-
'name':name,
|
|
328
|
-
'control_type':node.LocalizedControlType.title(),
|
|
329
|
-
'value':value,
|
|
330
|
-
'shortcut':node.AcceleratorKey,
|
|
331
|
-
'bounding_box':bounding_box,
|
|
332
|
-
'center':center,
|
|
333
|
-
'xpath':'',
|
|
334
|
-
'app_name':app_name,
|
|
335
|
-
'is_focused':is_focused
|
|
336
|
-
})
|
|
337
|
-
interactive_nodes.append(tree_node)
|
|
338
|
-
elif is_element_text(node):
|
|
339
|
-
dom_informative_nodes.append(TextElementNode(
|
|
340
|
-
text=node.Name.strip(),
|
|
341
|
-
))
|
|
342
|
-
|
|
343
|
-
children=node.GetChildren()
|
|
344
|
-
|
|
345
|
-
# Recursively traverse the tree the right to left for normal apps and for DOM traverse from left to right
|
|
346
|
-
for child in (children if is_dom else children[::-1]):
|
|
347
|
-
# Incrementally building the xpath
|
|
348
|
-
|
|
349
|
-
# Check if the child is a DOM element
|
|
350
|
-
if is_browser and child.AutomationId == "RootWebArea":
|
|
351
|
-
bounding_box=child.BoundingRectangle
|
|
352
|
-
self.dom_bounding_box=BoundingBox(left=bounding_box.left,top=bounding_box.top,
|
|
353
|
-
right=bounding_box.right,bottom=bounding_box.bottom,width=bounding_box.width(),
|
|
354
|
-
height=bounding_box.height())
|
|
355
|
-
scroll_pattern=child.GetPattern(PatternId.ScrollPattern)
|
|
356
|
-
self.dom_info=DOMInfo(
|
|
357
|
-
horizontal_scrollable=scroll_pattern.HorizontallyScrollable,
|
|
358
|
-
horizontal_scroll_percent=scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
|
|
359
|
-
vertical_scrollable=scroll_pattern.VerticallyScrollable,
|
|
360
|
-
vertical_scroll_percent=scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0
|
|
361
|
-
)
|
|
362
|
-
# enter DOM subtree
|
|
363
|
-
tree_traversal(child, is_dom=True, is_dialog=is_dialog)
|
|
364
|
-
# Check if the child is a dialog
|
|
365
|
-
elif isinstance(child,WindowControl):
|
|
366
|
-
if not child.IsOffscreen:
|
|
367
|
-
if is_dom:
|
|
368
|
-
bounding_box=child.BoundingRectangle
|
|
369
|
-
if bounding_box.width() > 0.8*self.dom_bounding_box.width:
|
|
370
|
-
# Because this window element covers the majority of the screen
|
|
371
|
-
dom_interactive_nodes.clear()
|
|
372
|
-
else:
|
|
373
|
-
if is_window_modal(child):
|
|
374
|
-
# Because this window element is modal
|
|
375
|
-
interactive_nodes.clear()
|
|
376
|
-
# enter dialog subtree
|
|
377
|
-
tree_traversal(child, is_dom=is_dom, is_dialog=True)
|
|
378
|
-
else:
|
|
379
|
-
# normal non-dialog children
|
|
380
|
-
tree_traversal(child, is_dom=is_dom, is_dialog=is_dialog)
|
|
381
|
-
|
|
382
|
-
interactive_nodes, dom_interactive_nodes, scrollable_nodes, dom_informative_nodes = [], [], [], []
|
|
383
|
-
app_name=node.Name.strip()
|
|
384
|
-
match node.ClassName:
|
|
385
|
-
case "Progman":
|
|
386
|
-
app_name="Desktop"
|
|
387
|
-
case 'Shell_TrayWnd'|'Shell_SecondaryTrayWnd':
|
|
388
|
-
app_name="Taskbar"
|
|
389
|
-
case 'Microsoft.UI.Content.PopupWindowSiteBridge':
|
|
390
|
-
app_name="Context Menu"
|
|
391
|
-
case _:
|
|
392
|
-
pass
|
|
393
|
-
tree_traversal(node,is_dom=False,is_dialog=False)
|
|
394
|
-
|
|
395
|
-
logger.debug(f'Interactive nodes:{len(interactive_nodes)}')
|
|
396
|
-
logger.debug(f'DOM interactive nodes:{len(dom_interactive_nodes)}')
|
|
397
|
-
logger.debug(f'Scrollable nodes:{len(scrollable_nodes)}')
|
|
398
|
-
|
|
399
|
-
if use_dom:
|
|
400
|
-
if is_browser:
|
|
401
|
-
return (dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
|
|
402
|
-
else:
|
|
403
|
-
return ([],[],[])
|
|
404
|
-
else:
|
|
405
|
-
return (interactive_nodes+dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
|
|
406
|
-
|
|
407
|
-
def
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
try:
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
1
|
+
from windows_mcp.tree.config import INTERACTIVE_CONTROL_TYPE_NAMES,DOCUMENT_CONTROL_TYPE_NAMES,INFORMATIVE_CONTROL_TYPE_NAMES, DEFAULT_ACTIONS, THREAD_MAX_RETRIES
|
|
2
|
+
from windows_mcp.tree.views import TreeElementNode, ScrollElementNode, TextElementNode, Center, BoundingBox, TreeState, DOMInfo
|
|
3
|
+
from windows_mcp.uia import Control,ImageControl,ScrollPattern,WindowControl,Rect,GetRootControl,PatternId
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
+
from windows_mcp.tree.utils import random_point_within_bounding_box
|
|
6
|
+
from PIL import Image, ImageFont, ImageDraw
|
|
7
|
+
from typing import TYPE_CHECKING,Optional
|
|
8
|
+
from windows_mcp.desktop.views import App
|
|
9
|
+
from time import sleep,time
|
|
10
|
+
import logging
|
|
11
|
+
import random
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
logger.setLevel(logging.INFO)
|
|
15
|
+
handler = logging.StreamHandler()
|
|
16
|
+
formatter = logging.Formatter('[%(levelname)s] %(message)s')
|
|
17
|
+
handler.setFormatter(formatter)
|
|
18
|
+
logger.addHandler(handler)
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from windows_mcp.desktop.service import Desktop
|
|
22
|
+
|
|
23
|
+
class Tree:
|
|
24
|
+
def __init__(self,desktop:'Desktop'):
|
|
25
|
+
self.desktop=desktop
|
|
26
|
+
self.screen_size=self.desktop.get_screen_size()
|
|
27
|
+
self.dom_info:Optional[DOMInfo]=None
|
|
28
|
+
self.dom_bounding_box:BoundingBox=None
|
|
29
|
+
self.screen_box=BoundingBox(
|
|
30
|
+
top=0, left=0, bottom=self.screen_size.height, right=self.screen_size.width,
|
|
31
|
+
width=self.screen_size.width, height=self.screen_size.height
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def get_state(self,active_app:App,other_apps:list[App],use_dom:bool=False)->TreeState:
|
|
35
|
+
root=GetRootControl()
|
|
36
|
+
other_apps_handle=set(map(lambda other_app: other_app.handle,other_apps))
|
|
37
|
+
apps=list(filter(lambda app:app.NativeWindowHandle not in other_apps_handle,root.GetChildren()))
|
|
38
|
+
del other_apps_handle
|
|
39
|
+
if active_app:
|
|
40
|
+
apps=list(filter(lambda app:app.ClassName!='Progman',apps))
|
|
41
|
+
interactive_nodes,scrollable_nodes,dom_informative_nodes=self.get_appwise_nodes(apps=apps,use_dom=use_dom)
|
|
42
|
+
return TreeState(dom_info=self.dom_info,interactive_nodes=interactive_nodes,scrollable_nodes=scrollable_nodes,dom_informative_nodes=dom_informative_nodes)
|
|
43
|
+
|
|
44
|
+
def get_appwise_nodes(self,apps:list[Control],use_dom:bool=False)-> tuple[list[TreeElementNode],list[ScrollElementNode],list[TextElementNode]]:
|
|
45
|
+
interactive_nodes, scrollable_nodes,dom_informative_nodes = [], [], []
|
|
46
|
+
with ThreadPoolExecutor() as executor:
|
|
47
|
+
retry_counts = {app: 0 for app in apps}
|
|
48
|
+
future_to_app = {
|
|
49
|
+
executor.submit(
|
|
50
|
+
self.get_nodes, app,
|
|
51
|
+
self.desktop.is_app_browser(app),
|
|
52
|
+
use_dom
|
|
53
|
+
): app
|
|
54
|
+
for app in apps
|
|
55
|
+
}
|
|
56
|
+
while future_to_app: # keep running until no pending futures
|
|
57
|
+
for future in as_completed(list(future_to_app)):
|
|
58
|
+
app = future_to_app.pop(future) # remove completed future
|
|
59
|
+
try:
|
|
60
|
+
result = future.result()
|
|
61
|
+
if result:
|
|
62
|
+
element_nodes, scroll_nodes,informative_nodes = result
|
|
63
|
+
interactive_nodes.extend(element_nodes)
|
|
64
|
+
scrollable_nodes.extend(scroll_nodes)
|
|
65
|
+
dom_informative_nodes.extend(informative_nodes)
|
|
66
|
+
except Exception as e:
|
|
67
|
+
retry_counts[app] += 1
|
|
68
|
+
logger.debug(f"Error in processing node {app.Name}, retry attempt {retry_counts[app]}\nError: {e}")
|
|
69
|
+
if retry_counts[app] < THREAD_MAX_RETRIES:
|
|
70
|
+
logger.debug(f"Retrying {app.Name} for the {retry_counts[app]}th time")
|
|
71
|
+
new_future = executor.submit(self.get_nodes, app, self.desktop.is_app_browser(app),use_dom)
|
|
72
|
+
future_to_app[new_future] = app
|
|
73
|
+
else:
|
|
74
|
+
logger.error(f"Task failed completely for {app.Name} after {THREAD_MAX_RETRIES} retries")
|
|
75
|
+
return interactive_nodes,scrollable_nodes,dom_informative_nodes
|
|
76
|
+
|
|
77
|
+
def iou_bounding_box(self,window_box: Rect,element_box: Rect,) -> BoundingBox:
|
|
78
|
+
# Step 1: Intersection of element and window (existing logic)
|
|
79
|
+
intersection_left = max(window_box.left, element_box.left)
|
|
80
|
+
intersection_top = max(window_box.top, element_box.top)
|
|
81
|
+
intersection_right = min(window_box.right, element_box.right)
|
|
82
|
+
intersection_bottom = min(window_box.bottom, element_box.bottom)
|
|
83
|
+
|
|
84
|
+
# Step 2: Clamp to screen boundaries (new addition)
|
|
85
|
+
intersection_left = max(self.screen_box.left, intersection_left)
|
|
86
|
+
intersection_top = max(self.screen_box.top, intersection_top)
|
|
87
|
+
intersection_right = min(self.screen_box.right, intersection_right)
|
|
88
|
+
intersection_bottom = min(self.screen_box.bottom, intersection_bottom)
|
|
89
|
+
|
|
90
|
+
# Step 3: Validate intersection
|
|
91
|
+
if (intersection_right > intersection_left and intersection_bottom > intersection_top):
|
|
92
|
+
bounding_box = BoundingBox(
|
|
93
|
+
left=intersection_left,
|
|
94
|
+
top=intersection_top,
|
|
95
|
+
right=intersection_right,
|
|
96
|
+
bottom=intersection_bottom,
|
|
97
|
+
width=intersection_right - intersection_left,
|
|
98
|
+
height=intersection_bottom - intersection_top
|
|
99
|
+
)
|
|
100
|
+
else:
|
|
101
|
+
# No valid visible intersection (either outside window or screen)
|
|
102
|
+
bounding_box = BoundingBox(
|
|
103
|
+
left=0,
|
|
104
|
+
top=0,
|
|
105
|
+
right=0,
|
|
106
|
+
bottom=0,
|
|
107
|
+
width=0,
|
|
108
|
+
height=0
|
|
109
|
+
)
|
|
110
|
+
return bounding_box
|
|
111
|
+
|
|
112
|
+
def get_nodes(self, node: Control, is_browser:bool=False,use_dom:bool=False) -> tuple[list[TreeElementNode],list[ScrollElementNode]]:
|
|
113
|
+
window_bounding_box=node.BoundingRectangle
|
|
114
|
+
|
|
115
|
+
def is_element_visible(node:Control,threshold:int=0):
|
|
116
|
+
is_control=node.IsControlElement
|
|
117
|
+
box=node.BoundingRectangle
|
|
118
|
+
if box.isempty():
|
|
119
|
+
return False
|
|
120
|
+
width=box.width()
|
|
121
|
+
height=box.height()
|
|
122
|
+
area=width*height
|
|
123
|
+
is_offscreen=(not node.IsOffscreen) or node.ControlTypeName in ['EditControl']
|
|
124
|
+
return area > threshold and is_offscreen and is_control
|
|
125
|
+
|
|
126
|
+
def is_element_enabled(node:Control):
|
|
127
|
+
try:
|
|
128
|
+
return node.IsEnabled
|
|
129
|
+
except Exception:
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
def is_default_action(node:Control):
|
|
133
|
+
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
134
|
+
default_action=legacy_pattern.DefaultAction.title()
|
|
135
|
+
if default_action in DEFAULT_ACTIONS:
|
|
136
|
+
return True
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
def is_element_image(node:Control):
|
|
140
|
+
if isinstance(node,ImageControl):
|
|
141
|
+
if node.LocalizedControlType=='graphic' or not node.IsKeyboardFocusable:
|
|
142
|
+
return True
|
|
143
|
+
return False
|
|
144
|
+
|
|
145
|
+
def is_element_text(node:Control):
|
|
146
|
+
try:
|
|
147
|
+
if node.ControlTypeName in INFORMATIVE_CONTROL_TYPE_NAMES:
|
|
148
|
+
if is_element_visible(node) and is_element_enabled(node) and not is_element_image(node):
|
|
149
|
+
return True
|
|
150
|
+
except Exception:
|
|
151
|
+
return False
|
|
152
|
+
return False
|
|
153
|
+
|
|
154
|
+
def is_window_modal(node:WindowControl):
|
|
155
|
+
try:
|
|
156
|
+
window_pattern=node.GetWindowPattern()
|
|
157
|
+
return window_pattern.IsModal
|
|
158
|
+
except Exception:
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
def is_keyboard_focusable(node:Control):
|
|
162
|
+
try:
|
|
163
|
+
if node.ControlTypeName in set(['EditControl','ButtonControl','CheckBoxControl','RadioButtonControl','TabItemControl']):
|
|
164
|
+
return True
|
|
165
|
+
return node.IsKeyboardFocusable
|
|
166
|
+
except Exception:
|
|
167
|
+
return False
|
|
168
|
+
|
|
169
|
+
def element_has_child_element(node:Control,control_type:str,child_control_type:str):
|
|
170
|
+
if node.LocalizedControlType==control_type:
|
|
171
|
+
first_child=node.GetFirstChildControl()
|
|
172
|
+
if first_child is None:
|
|
173
|
+
return False
|
|
174
|
+
return first_child.LocalizedControlType==child_control_type
|
|
175
|
+
|
|
176
|
+
def group_has_no_name(node:Control):
|
|
177
|
+
try:
|
|
178
|
+
if node.ControlTypeName=='GroupControl':
|
|
179
|
+
if not node.Name.strip():
|
|
180
|
+
return True
|
|
181
|
+
return False
|
|
182
|
+
except Exception:
|
|
183
|
+
return False
|
|
184
|
+
|
|
185
|
+
def is_element_scrollable(node:Control):
|
|
186
|
+
try:
|
|
187
|
+
if (node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|INFORMATIVE_CONTROL_TYPE_NAMES) or node.IsOffscreen:
|
|
188
|
+
return False
|
|
189
|
+
scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
|
|
190
|
+
if scroll_pattern is None:
|
|
191
|
+
return False
|
|
192
|
+
return scroll_pattern.VerticallyScrollable
|
|
193
|
+
except Exception:
|
|
194
|
+
return False
|
|
195
|
+
|
|
196
|
+
def is_element_interactive(node:Control):
|
|
197
|
+
try:
|
|
198
|
+
if is_browser and node.ControlTypeName in set(['DataItemControl','ListItemControl']) and not is_keyboard_focusable(node):
|
|
199
|
+
return False
|
|
200
|
+
elif not is_browser and node.ControlTypeName=="ImageControl" and is_keyboard_focusable(node):
|
|
201
|
+
return True
|
|
202
|
+
elif node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|DOCUMENT_CONTROL_TYPE_NAMES:
|
|
203
|
+
return is_element_visible(node) and is_element_enabled(node) and (not is_element_image(node) or is_keyboard_focusable(node))
|
|
204
|
+
elif node.ControlTypeName=='GroupControl':
|
|
205
|
+
if is_browser:
|
|
206
|
+
return is_element_visible(node) and is_element_enabled(node) and (is_default_action(node) or is_keyboard_focusable(node))
|
|
207
|
+
# else:
|
|
208
|
+
# return is_element_visible and is_element_enabled(node) and is_default_action(node)
|
|
209
|
+
except Exception:
|
|
210
|
+
return False
|
|
211
|
+
return False
|
|
212
|
+
|
|
213
|
+
def dom_correction(node:Control):
|
|
214
|
+
if element_has_child_element(node,'list item','link') or element_has_child_element(node,'item','link'):
|
|
215
|
+
dom_interactive_nodes.pop()
|
|
216
|
+
return None
|
|
217
|
+
elif node.ControlTypeName=='GroupControl':
|
|
218
|
+
dom_interactive_nodes.pop()
|
|
219
|
+
if is_keyboard_focusable(node):
|
|
220
|
+
child=node
|
|
221
|
+
try:
|
|
222
|
+
while child.GetFirstChildControl() is not None:
|
|
223
|
+
if child.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES:
|
|
224
|
+
return None
|
|
225
|
+
child=child.GetFirstChildControl()
|
|
226
|
+
except Exception:
|
|
227
|
+
return None
|
|
228
|
+
if child.ControlTypeName!='TextControl':
|
|
229
|
+
return None
|
|
230
|
+
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
231
|
+
value=legacy_pattern.Value
|
|
232
|
+
element_bounding_box = node.BoundingRectangle
|
|
233
|
+
bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
|
|
234
|
+
center = bounding_box.get_center()
|
|
235
|
+
is_focused=node.HasKeyboardFocus
|
|
236
|
+
dom_interactive_nodes.append(TreeElementNode(**{
|
|
237
|
+
'name':child.Name.strip(),
|
|
238
|
+
'control_type':node.LocalizedControlType,
|
|
239
|
+
'value':value,
|
|
240
|
+
'shortcut':node.AcceleratorKey,
|
|
241
|
+
'bounding_box':bounding_box,
|
|
242
|
+
'xpath':'',
|
|
243
|
+
'center':center,
|
|
244
|
+
'app_name':app_name,
|
|
245
|
+
'is_focused':is_focused
|
|
246
|
+
}))
|
|
247
|
+
elif element_has_child_element(node,'link','heading'):
|
|
248
|
+
dom_interactive_nodes.pop()
|
|
249
|
+
node=node.GetFirstChildControl()
|
|
250
|
+
control_type='link'
|
|
251
|
+
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
252
|
+
value=legacy_pattern.Value
|
|
253
|
+
element_bounding_box = node.BoundingRectangle
|
|
254
|
+
bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
|
|
255
|
+
center = bounding_box.get_center()
|
|
256
|
+
is_focused=node.HasKeyboardFocus
|
|
257
|
+
dom_interactive_nodes.append(TreeElementNode(**{
|
|
258
|
+
'name':node.Name.strip(),
|
|
259
|
+
'control_type':control_type,
|
|
260
|
+
'value':node.Name.strip(),
|
|
261
|
+
'shortcut':node.AcceleratorKey,
|
|
262
|
+
'bounding_box':bounding_box,
|
|
263
|
+
'xpath':'',
|
|
264
|
+
'center':center,
|
|
265
|
+
'app_name':app_name,
|
|
266
|
+
'is_focused':is_focused
|
|
267
|
+
}))
|
|
268
|
+
|
|
269
|
+
def tree_traversal(node: Control,is_dom:bool=False,is_dialog:bool=False):
|
|
270
|
+
# Checks to skip the nodes that are not interactive
|
|
271
|
+
if node.IsOffscreen and (node.ControlTypeName not in set(["GroupControl","EditControl","TitleBarControl"])) and node.ClassName not in set(["Popup","Windows.UI.Core.CoreComponentInputSource"]):
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
if is_element_scrollable(node):
|
|
275
|
+
scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
|
|
276
|
+
box = node.BoundingRectangle
|
|
277
|
+
# Get the center
|
|
278
|
+
x,y=random_point_within_bounding_box(node=node,scale_factor=0.8)
|
|
279
|
+
center = Center(x=x,y=y)
|
|
280
|
+
scrollable_nodes.append(ScrollElementNode(**{
|
|
281
|
+
'name':node.Name.strip() or node.AutomationId or node.LocalizedControlType.capitalize() or "''",
|
|
282
|
+
'app_name':app_name,
|
|
283
|
+
'control_type':node.LocalizedControlType.title(),
|
|
284
|
+
'bounding_box':BoundingBox(**{
|
|
285
|
+
'left':box.left,
|
|
286
|
+
'top':box.top,
|
|
287
|
+
'right':box.right,
|
|
288
|
+
'bottom':box.bottom,
|
|
289
|
+
'width':box.width(),
|
|
290
|
+
'height':box.height()
|
|
291
|
+
}),
|
|
292
|
+
'center':center,
|
|
293
|
+
'xpath':'',
|
|
294
|
+
'horizontal_scrollable':scroll_pattern.HorizontallyScrollable,
|
|
295
|
+
'horizontal_scroll_percent':scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
|
|
296
|
+
'vertical_scrollable':scroll_pattern.VerticallyScrollable,
|
|
297
|
+
'vertical_scroll_percent':scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0,
|
|
298
|
+
'is_focused':node.HasKeyboardFocus
|
|
299
|
+
}))
|
|
300
|
+
|
|
301
|
+
if is_element_interactive(node):
|
|
302
|
+
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
303
|
+
value=legacy_pattern.Value.strip() if legacy_pattern.Value is not None else ""
|
|
304
|
+
is_focused=node.HasKeyboardFocus
|
|
305
|
+
name=node.Name.strip()
|
|
306
|
+
element_bounding_box = node.BoundingRectangle
|
|
307
|
+
if is_browser and is_dom:
|
|
308
|
+
bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
|
|
309
|
+
center = bounding_box.get_center()
|
|
310
|
+
tree_node=TreeElementNode(**{
|
|
311
|
+
'name':name,
|
|
312
|
+
'control_type':node.LocalizedControlType.title(),
|
|
313
|
+
'value':value,
|
|
314
|
+
'shortcut':node.AcceleratorKey,
|
|
315
|
+
'bounding_box':bounding_box,
|
|
316
|
+
'center':center,
|
|
317
|
+
'xpath':'',
|
|
318
|
+
'app_name':app_name,
|
|
319
|
+
'is_focused':is_focused
|
|
320
|
+
})
|
|
321
|
+
dom_interactive_nodes.append(tree_node)
|
|
322
|
+
dom_correction(node=node)
|
|
323
|
+
else:
|
|
324
|
+
bounding_box=self.iou_bounding_box(window_bounding_box,element_bounding_box)
|
|
325
|
+
center = bounding_box.get_center()
|
|
326
|
+
tree_node=TreeElementNode(**{
|
|
327
|
+
'name':name,
|
|
328
|
+
'control_type':node.LocalizedControlType.title(),
|
|
329
|
+
'value':value,
|
|
330
|
+
'shortcut':node.AcceleratorKey,
|
|
331
|
+
'bounding_box':bounding_box,
|
|
332
|
+
'center':center,
|
|
333
|
+
'xpath':'',
|
|
334
|
+
'app_name':app_name,
|
|
335
|
+
'is_focused':is_focused
|
|
336
|
+
})
|
|
337
|
+
interactive_nodes.append(tree_node)
|
|
338
|
+
elif is_element_text(node):
|
|
339
|
+
dom_informative_nodes.append(TextElementNode(
|
|
340
|
+
text=node.Name.strip(),
|
|
341
|
+
))
|
|
342
|
+
|
|
343
|
+
children=node.GetChildren()
|
|
344
|
+
|
|
345
|
+
# Recursively traverse the tree the right to left for normal apps and for DOM traverse from left to right
|
|
346
|
+
for child in (children if is_dom else children[::-1]):
|
|
347
|
+
# Incrementally building the xpath
|
|
348
|
+
|
|
349
|
+
# Check if the child is a DOM element
|
|
350
|
+
if is_browser and child.AutomationId == "RootWebArea":
|
|
351
|
+
bounding_box=child.BoundingRectangle
|
|
352
|
+
self.dom_bounding_box=BoundingBox(left=bounding_box.left,top=bounding_box.top,
|
|
353
|
+
right=bounding_box.right,bottom=bounding_box.bottom,width=bounding_box.width(),
|
|
354
|
+
height=bounding_box.height())
|
|
355
|
+
scroll_pattern=child.GetPattern(PatternId.ScrollPattern)
|
|
356
|
+
self.dom_info=DOMInfo(
|
|
357
|
+
horizontal_scrollable=scroll_pattern.HorizontallyScrollable,
|
|
358
|
+
horizontal_scroll_percent=scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
|
|
359
|
+
vertical_scrollable=scroll_pattern.VerticallyScrollable,
|
|
360
|
+
vertical_scroll_percent=scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0
|
|
361
|
+
)
|
|
362
|
+
# enter DOM subtree
|
|
363
|
+
tree_traversal(child, is_dom=True, is_dialog=is_dialog)
|
|
364
|
+
# Check if the child is a dialog
|
|
365
|
+
elif isinstance(child,WindowControl):
|
|
366
|
+
if not child.IsOffscreen:
|
|
367
|
+
if is_dom:
|
|
368
|
+
bounding_box=child.BoundingRectangle
|
|
369
|
+
if bounding_box.width() > 0.8*self.dom_bounding_box.width:
|
|
370
|
+
# Because this window element covers the majority of the screen
|
|
371
|
+
dom_interactive_nodes.clear()
|
|
372
|
+
else:
|
|
373
|
+
if is_window_modal(child):
|
|
374
|
+
# Because this window element is modal
|
|
375
|
+
interactive_nodes.clear()
|
|
376
|
+
# enter dialog subtree
|
|
377
|
+
tree_traversal(child, is_dom=is_dom, is_dialog=True)
|
|
378
|
+
else:
|
|
379
|
+
# normal non-dialog children
|
|
380
|
+
tree_traversal(child, is_dom=is_dom, is_dialog=is_dialog)
|
|
381
|
+
|
|
382
|
+
interactive_nodes, dom_interactive_nodes, scrollable_nodes, dom_informative_nodes = [], [], [], []
|
|
383
|
+
app_name=node.Name.strip()
|
|
384
|
+
match node.ClassName:
|
|
385
|
+
case "Progman":
|
|
386
|
+
app_name="Desktop"
|
|
387
|
+
case 'Shell_TrayWnd'|'Shell_SecondaryTrayWnd':
|
|
388
|
+
app_name="Taskbar"
|
|
389
|
+
case 'Microsoft.UI.Content.PopupWindowSiteBridge':
|
|
390
|
+
app_name="Context Menu"
|
|
391
|
+
case _:
|
|
392
|
+
pass
|
|
393
|
+
tree_traversal(node,is_dom=False,is_dialog=False)
|
|
394
|
+
|
|
395
|
+
logger.debug(f'Interactive nodes:{len(interactive_nodes)}')
|
|
396
|
+
logger.debug(f'DOM interactive nodes:{len(dom_interactive_nodes)}')
|
|
397
|
+
logger.debug(f'Scrollable nodes:{len(scrollable_nodes)}')
|
|
398
|
+
|
|
399
|
+
if use_dom:
|
|
400
|
+
if is_browser:
|
|
401
|
+
return (dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
|
|
402
|
+
else:
|
|
403
|
+
return ([],[],[])
|
|
404
|
+
else:
|
|
405
|
+
return (interactive_nodes+dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
|
|
406
|
+
|
|
407
|
+
def _on_focus_change(self, sender:'ctypes.POINTER(IUIAutomationElement)'):
|
|
408
|
+
"""Handle focus change events."""
|
|
409
|
+
# Debounce duplicate events
|
|
410
|
+
current_time = time()
|
|
411
|
+
element = Control.CreateControlFromElement(sender)
|
|
412
|
+
runtime_id=element.GetRuntimeId()
|
|
413
|
+
event_key = tuple(runtime_id)
|
|
414
|
+
if hasattr(self, '_last_focus_event') and self._last_focus_event:
|
|
415
|
+
last_key, last_time = self._last_focus_event
|
|
416
|
+
if last_key == event_key and (current_time - last_time) < 1.0:
|
|
417
|
+
return None
|
|
418
|
+
self._last_focus_event = (event_key, current_time)
|
|
419
|
+
|
|
420
|
+
try:
|
|
421
|
+
logger.debug(f"[WatchDog] Focus changed to: '{element.Name}' ({element.ControlTypeName})")
|
|
422
|
+
except Exception:
|
|
423
|
+
pass
|
|
424
|
+
|
|
425
|
+
def _on_structure_change(self, sender:'ctypes.POINTER(IUIAutomationElement)', changeType:int, runtime_id:list[int]):
|
|
426
|
+
"""Handle structure change events."""
|
|
427
|
+
try:
|
|
428
|
+
# Debounce duplicate events
|
|
429
|
+
current_time = time()
|
|
430
|
+
event_key = (changeType, tuple(runtime_id))
|
|
431
|
+
if hasattr(self, '_last_structure_event') and self._last_structure_event:
|
|
432
|
+
last_key, last_time = self._last_structure_event
|
|
433
|
+
if last_key == event_key and (current_time - last_time) < 5.0:
|
|
434
|
+
return None
|
|
435
|
+
self._last_structure_event = (event_key, current_time)
|
|
436
|
+
|
|
437
|
+
node = Control.CreateControlFromElement(sender)
|
|
438
|
+
|
|
439
|
+
match StructureChangeType(changeType):
|
|
440
|
+
case StructureChangeType.StructureChangeType_ChildAdded|StructureChangeType.StructureChangeType_ChildrenBulkAdded:
|
|
441
|
+
interactive_nodes=[]
|
|
442
|
+
app=self.desktop.get_app_from_element(node)
|
|
443
|
+
app_name=self.app_name_correction(app.name if app else node.Name.strip())
|
|
444
|
+
is_browser=app.is_browser if app else False
|
|
445
|
+
if isinstance(node,WindowControl|PaneControl):
|
|
446
|
+
#Subtree traversal
|
|
447
|
+
window_bounding_box=app.bounding_box if app else node.BoundingRectangle
|
|
448
|
+
self.tree_traversal(node,window_bounding_box,app_name,is_browser,interactive_nodes=interactive_nodes)
|
|
449
|
+
else:
|
|
450
|
+
#If element is interactive take it else skip it
|
|
451
|
+
if not self.is_element_interactive(node=node,is_browser=is_browser):
|
|
452
|
+
return None
|
|
453
|
+
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
454
|
+
value=legacy_pattern.Value.strip() if legacy_pattern.Value is not None else ""
|
|
455
|
+
cursor_type=AccessibleRoleNames.get(legacy_pattern.Role, "Default")
|
|
456
|
+
runtime_id=node.GetRuntimeId()
|
|
457
|
+
is_focused=node.HasKeyboardFocus
|
|
458
|
+
name=node.Name.strip()
|
|
459
|
+
element_bounding_box = node.BoundingRectangle
|
|
460
|
+
bounding_box=self.iou_bounding_box(window_bounding_box,element_bounding_box)
|
|
461
|
+
center = bounding_box.get_center()
|
|
462
|
+
|
|
463
|
+
interactive_nodes.append(TreeElementNode(
|
|
464
|
+
name=name,
|
|
465
|
+
control_type=cursor_type,
|
|
466
|
+
bounding_box=bounding_box,
|
|
467
|
+
center=center,
|
|
468
|
+
runtime_id=runtime_id,
|
|
469
|
+
app_name=app_name,
|
|
470
|
+
value=value,
|
|
471
|
+
shortcut="",
|
|
472
|
+
xpath="",
|
|
473
|
+
is_focused=is_focused
|
|
474
|
+
))
|
|
475
|
+
if self.tree_state:
|
|
476
|
+
existing_ids={n.runtime_id for n in self.tree_state.interactive_nodes}
|
|
477
|
+
interactive_nodes=[n for n in interactive_nodes if n.runtime_id not in existing_ids]
|
|
478
|
+
self.tree_state.interactive_nodes.extend(interactive_nodes)
|
|
479
|
+
case StructureChangeType.StructureChangeType_ChildrenBulkRemoved | StructureChangeType.StructureChangeType_ChildRemoved:
|
|
480
|
+
if changeType == StructureChangeType.StructureChangeType_ChildRemoved and self.tree_state:
|
|
481
|
+
if isinstance(node,WindowControl|PaneControl):
|
|
482
|
+
parent_bounding_box=BoundingBox.from_bounding_rectangle(node.BoundingRectangle)
|
|
483
|
+
# Remove nodes spatially contained in the parent (heuristic for "is descendant")
|
|
484
|
+
def is_contained(n:'TreeElementNode'):
|
|
485
|
+
cx, cy = n.center.x, n.center.y
|
|
486
|
+
return (parent_bounding_box.left <= cx <= parent_bounding_box.right and
|
|
487
|
+
parent_bounding_box.top <= cy <= parent_bounding_box.bottom)
|
|
488
|
+
self.tree_state.interactive_nodes = list(filter(lambda n:not is_contained(n),self.tree_state.interactive_nodes))
|
|
489
|
+
else:
|
|
490
|
+
target_runtime_id = tuple(runtime_id)
|
|
491
|
+
self.tree_state.interactive_nodes = list(filter(lambda n:n.runtime_id != target_runtime_id,self.tree_state.interactive_nodes))
|
|
492
|
+
case StructureChangeType.StructureChangeType_ChildrenInvalidated:
|
|
493
|
+
#Rebuild subtree
|
|
494
|
+
parent_bounding_box=BoundingBox.from_bounding_rectangle(node.BoundingRectangle)
|
|
495
|
+
app=self.desktop.get_app_from_element(node)
|
|
496
|
+
app_name=self.app_name_correction(app.name if app else node.Name.strip())
|
|
497
|
+
is_browser=app.is_browser if app else False
|
|
498
|
+
window_bounding_box=app.bounding_box if app else parent_bounding_box
|
|
499
|
+
interactive_nodes=[]
|
|
500
|
+
self.tree_traversal(node,window_bounding_box,app_name,is_browser,interactive_nodes=interactive_nodes)
|
|
501
|
+
|
|
502
|
+
# Remove nodes spatially contained in the parent (heuristic for "is descendant")
|
|
503
|
+
def is_contained(n:'TreeElementNode'):
|
|
504
|
+
cx, cy = n.center.x, n.center.y
|
|
505
|
+
return (parent_bounding_box.left <= cx <= parent_bounding_box.right and
|
|
506
|
+
parent_bounding_box.top <= cy <= parent_bounding_box.bottom)
|
|
507
|
+
|
|
508
|
+
if self.tree_state:
|
|
509
|
+
self.tree_state.interactive_nodes = list(filter(lambda n:not is_contained(n),self.tree_state.interactive_nodes))
|
|
510
|
+
self.tree_state.interactive_nodes.extend(interactive_nodes)
|
|
511
|
+
case StructureChangeType.StructureChangeType_ChildrenReordered:
|
|
512
|
+
app=self.desktop.get_app_from_element(node)
|
|
513
|
+
app_name=self.app_name_correction(app.name if app else node.Name.strip())
|
|
514
|
+
is_browser=app.is_browser if app else False
|
|
515
|
+
window_bounding_box=app.bounding_box if app else node.BoundingRectangle
|
|
516
|
+
interactive_nodes=[]
|
|
517
|
+
self.tree_traversal(node,window_bounding_box,app_name,is_browser,interactive_nodes=interactive_nodes)
|
|
518
|
+
|
|
519
|
+
# Update existing nodes
|
|
520
|
+
fresh_nodes_map = {n.runtime_id: n for n in interactive_nodes}
|
|
521
|
+
def update_node(existing_node:'TreeElementNode'):
|
|
522
|
+
if new_node:=fresh_nodes_map.get(existing_node.runtime_id):
|
|
523
|
+
existing_node.update_from_node(new_node)
|
|
524
|
+
list(map(update_node,self.tree_state.interactive_nodes))
|
|
525
|
+
except Exception as e:
|
|
526
|
+
logger.debug(f"[WatchDog] Structure changed with error: {e}, StructureChangeType={StructureChangeType(changeType).name}")
|
|
527
|
+
|
|
528
|
+
try:
|
|
529
|
+
logger.debug(f"[WatchDog] Structure changed: Type={StructureChangeType(changeType).name} RuntimeID={tuple(runtime_id)} Sender: '{node.Name}' ({node.ControlTypeName})")
|
|
530
|
+
except Exception:
|
|
531
|
+
pass
|
|
532
|
+
|
|
533
|
+
def _on_property_change(self, sender:'ctypes.POINTER(IUIAutomationElement)', propertyId:int, newValue):
|
|
534
|
+
"""Handle property change events."""
|
|
535
|
+
try:
|
|
536
|
+
element = Control.CreateControlFromElement(sender)
|
|
537
|
+
logger.debug(f"[WatchDog] Property changed: ID={propertyId} Value={newValue} Element: '{element.Name}' ({element.ControlTypeName})")
|
|
538
|
+
except Exception:
|
|
539
|
+
pass
|
|
540
|
+
|
|
541
|
+
def get_annotated_screenshot(self, nodes: list[TreeElementNode],scale:float=1.0) -> Image.Image:
|
|
542
|
+
screenshot = self.desktop.get_screenshot()
|
|
543
|
+
sleep(0.10)
|
|
544
|
+
|
|
545
|
+
original_width = screenshot.width
|
|
546
|
+
original_height = screenshot.height
|
|
547
|
+
|
|
548
|
+
scaled_width = int(original_width * scale)
|
|
549
|
+
scaled_height = int(original_height * scale)
|
|
550
|
+
screenshot = screenshot.resize((scaled_width, scaled_height), Image.Resampling.LANCZOS)
|
|
551
|
+
|
|
552
|
+
# Add padding
|
|
553
|
+
padding = 5
|
|
554
|
+
width = int(screenshot.width + (1.5 * padding))
|
|
555
|
+
height = int(screenshot.height + (1.5 * padding))
|
|
556
|
+
padded_screenshot = Image.new("RGB", (width, height), color=(255, 255, 255))
|
|
557
|
+
padded_screenshot.paste(screenshot, (padding, padding))
|
|
558
|
+
|
|
559
|
+
draw = ImageDraw.Draw(padded_screenshot)
|
|
560
|
+
font_size = 12
|
|
561
|
+
try:
|
|
562
|
+
font = ImageFont.truetype('arial.ttf', font_size)
|
|
563
|
+
except IOError:
|
|
564
|
+
font = ImageFont.load_default()
|
|
565
|
+
|
|
566
|
+
def get_random_color():
|
|
567
|
+
return "#{:06x}".format(random.randint(0, 0xFFFFFF))
|
|
568
|
+
|
|
569
|
+
def draw_annotation(label, node: TreeElementNode):
|
|
570
|
+
box = node.bounding_box
|
|
571
|
+
color = get_random_color()
|
|
572
|
+
|
|
573
|
+
# Scale and pad the bounding box coordinates
|
|
574
|
+
adjusted_box = (
|
|
575
|
+
int(box.left * scale) + padding,
|
|
576
|
+
int(box.top * scale) + padding,
|
|
577
|
+
int(box.right * scale) + padding,
|
|
578
|
+
int(box.bottom * scale) + padding
|
|
579
|
+
)
|
|
580
|
+
# Draw bounding box
|
|
581
|
+
draw.rectangle(adjusted_box, outline=color, width=2)
|
|
582
|
+
|
|
583
|
+
# Label dimensions
|
|
584
|
+
label_width = draw.textlength(str(label), font=font)
|
|
585
|
+
label_height = font_size
|
|
586
|
+
left, top, right, bottom = adjusted_box
|
|
587
|
+
|
|
588
|
+
# Label position above bounding box
|
|
589
|
+
label_x1 = right - label_width
|
|
590
|
+
label_y1 = top - label_height - 4
|
|
591
|
+
label_x2 = label_x1 + label_width
|
|
592
|
+
label_y2 = label_y1 + label_height + 4
|
|
593
|
+
|
|
594
|
+
# Draw label background and text
|
|
595
|
+
draw.rectangle([(label_x1, label_y1), (label_x2, label_y2)], fill=color)
|
|
596
|
+
draw.text((label_x1 + 2, label_y1 + 2), str(label), fill=(255, 255, 255), font=font)
|
|
597
|
+
|
|
598
|
+
# Draw annotations in parallel
|
|
599
|
+
with ThreadPoolExecutor() as executor:
|
|
600
|
+
executor.map(draw_annotation, range(len(nodes)), nodes)
|
|
467
601
|
return padded_screenshot
|