windows-mcp 0.5.7__py3-none-any.whl → 0.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,467 +1,601 @@
1
- from windows_mcp.tree.config import INTERACTIVE_CONTROL_TYPE_NAMES,DOCUMENT_CONTROL_TYPE_NAMES,INFORMATIVE_CONTROL_TYPE_NAMES, DEFAULT_ACTIONS, THREAD_MAX_RETRIES
2
- from windows_mcp.tree.views import TreeElementNode, ScrollElementNode, TextElementNode, Center, BoundingBox, TreeState, DOMInfo
3
- from uiautomation import Control,ImageControl,ScrollPattern,WindowControl,Rect,GetRootControl,PatternId
4
- from concurrent.futures import ThreadPoolExecutor, as_completed
5
- from windows_mcp.tree.utils import random_point_within_bounding_box
6
- from PIL import Image, ImageFont, ImageDraw
7
- from typing import TYPE_CHECKING,Optional
8
- from windows_mcp.desktop.views import App
9
- from time import sleep
10
- import logging
11
- import random
12
-
13
- logger = logging.getLogger(__name__)
14
- logger.setLevel(logging.INFO)
15
- handler = logging.StreamHandler()
16
- formatter = logging.Formatter('[%(levelname)s] %(message)s')
17
- handler.setFormatter(formatter)
18
- logger.addHandler(handler)
19
-
20
- if TYPE_CHECKING:
21
- from windows_mcp.desktop.service import Desktop
22
-
23
- class Tree:
24
- def __init__(self,desktop:'Desktop'):
25
- self.desktop=desktop
26
- self.screen_size=self.desktop.get_screen_size()
27
- self.dom_info:Optional[DOMInfo]=None
28
- self.dom_bounding_box:BoundingBox=None
29
- self.screen_box=BoundingBox(
30
- top=0, left=0, bottom=self.screen_size.height, right=self.screen_size.width,
31
- width=self.screen_size.width, height=self.screen_size.height
32
- )
33
-
34
- def get_state(self,active_app:App,other_apps:list[App],use_dom:bool=False)->TreeState:
35
- root=GetRootControl()
36
- other_apps_handle=set(map(lambda other_app: other_app.handle,other_apps))
37
- apps=list(filter(lambda app:app.NativeWindowHandle not in other_apps_handle,root.GetChildren()))
38
- del other_apps_handle
39
- if active_app:
40
- apps=list(filter(lambda app:app.ClassName!='Progman',apps))
41
- interactive_nodes,scrollable_nodes,dom_informative_nodes=self.get_appwise_nodes(apps=apps,use_dom=use_dom)
42
- return TreeState(dom_info=self.dom_info,interactive_nodes=interactive_nodes,scrollable_nodes=scrollable_nodes,dom_informative_nodes=dom_informative_nodes)
43
-
44
- def get_appwise_nodes(self,apps:list[Control],use_dom:bool=False)-> tuple[list[TreeElementNode],list[ScrollElementNode],list[TextElementNode]]:
45
- interactive_nodes, scrollable_nodes,dom_informative_nodes = [], [], []
46
- with ThreadPoolExecutor() as executor:
47
- retry_counts = {app: 0 for app in apps}
48
- future_to_app = {
49
- executor.submit(
50
- self.get_nodes, app,
51
- self.desktop.is_app_browser(app),
52
- use_dom
53
- ): app
54
- for app in apps
55
- }
56
- while future_to_app: # keep running until no pending futures
57
- for future in as_completed(list(future_to_app)):
58
- app = future_to_app.pop(future) # remove completed future
59
- try:
60
- result = future.result()
61
- if result:
62
- element_nodes, scroll_nodes,informative_nodes = result
63
- interactive_nodes.extend(element_nodes)
64
- scrollable_nodes.extend(scroll_nodes)
65
- dom_informative_nodes.extend(informative_nodes)
66
- except Exception as e:
67
- retry_counts[app] += 1
68
- logger.debug(f"Error in processing node {app.Name}, retry attempt {retry_counts[app]}\nError: {e}")
69
- if retry_counts[app] < THREAD_MAX_RETRIES:
70
- logger.debug(f"Retrying {app.Name} for the {retry_counts[app]}th time")
71
- new_future = executor.submit(self.get_nodes, app, self.desktop.is_app_browser(app),use_dom)
72
- future_to_app[new_future] = app
73
- else:
74
- logger.error(f"Task failed completely for {app.Name} after {THREAD_MAX_RETRIES} retries")
75
- return interactive_nodes,scrollable_nodes,dom_informative_nodes
76
-
77
- def iou_bounding_box(self,window_box: Rect,element_box: Rect,) -> BoundingBox:
78
- # Step 1: Intersection of element and window (existing logic)
79
- intersection_left = max(window_box.left, element_box.left)
80
- intersection_top = max(window_box.top, element_box.top)
81
- intersection_right = min(window_box.right, element_box.right)
82
- intersection_bottom = min(window_box.bottom, element_box.bottom)
83
-
84
- # Step 2: Clamp to screen boundaries (new addition)
85
- intersection_left = max(self.screen_box.left, intersection_left)
86
- intersection_top = max(self.screen_box.top, intersection_top)
87
- intersection_right = min(self.screen_box.right, intersection_right)
88
- intersection_bottom = min(self.screen_box.bottom, intersection_bottom)
89
-
90
- # Step 3: Validate intersection
91
- if (intersection_right > intersection_left and intersection_bottom > intersection_top):
92
- bounding_box = BoundingBox(
93
- left=intersection_left,
94
- top=intersection_top,
95
- right=intersection_right,
96
- bottom=intersection_bottom,
97
- width=intersection_right - intersection_left,
98
- height=intersection_bottom - intersection_top
99
- )
100
- else:
101
- # No valid visible intersection (either outside window or screen)
102
- bounding_box = BoundingBox(
103
- left=0,
104
- top=0,
105
- right=0,
106
- bottom=0,
107
- width=0,
108
- height=0
109
- )
110
- return bounding_box
111
-
112
- def get_nodes(self, node: Control, is_browser:bool=False,use_dom:bool=False) -> tuple[list[TreeElementNode],list[ScrollElementNode]]:
113
- window_bounding_box=node.BoundingRectangle
114
-
115
- def is_element_visible(node:Control,threshold:int=0):
116
- is_control=node.IsControlElement
117
- box=node.BoundingRectangle
118
- if box.isempty():
119
- return False
120
- width=box.width()
121
- height=box.height()
122
- area=width*height
123
- is_offscreen=(not node.IsOffscreen) or node.ControlTypeName in ['EditControl']
124
- return area > threshold and is_offscreen and is_control
125
-
126
- def is_element_enabled(node:Control):
127
- try:
128
- return node.IsEnabled
129
- except Exception:
130
- return False
131
-
132
- def is_default_action(node:Control):
133
- legacy_pattern=node.GetLegacyIAccessiblePattern()
134
- default_action=legacy_pattern.DefaultAction.title()
135
- if default_action in DEFAULT_ACTIONS:
136
- return True
137
- return False
138
-
139
- def is_element_image(node:Control):
140
- if isinstance(node,ImageControl):
141
- if node.LocalizedControlType=='graphic' or not node.IsKeyboardFocusable:
142
- return True
143
- return False
144
-
145
- def is_element_text(node:Control):
146
- try:
147
- if node.ControlTypeName in INFORMATIVE_CONTROL_TYPE_NAMES:
148
- if is_element_visible(node) and is_element_enabled(node) and not is_element_image(node):
149
- return True
150
- except Exception:
151
- return False
152
- return False
153
-
154
- def is_window_modal(node:WindowControl):
155
- try:
156
- window_pattern=node.GetWindowPattern()
157
- return window_pattern.IsModal
158
- except Exception:
159
- return False
160
-
161
- def is_keyboard_focusable(node:Control):
162
- try:
163
- if node.ControlTypeName in set(['EditControl','ButtonControl','CheckBoxControl','RadioButtonControl','TabItemControl']):
164
- return True
165
- return node.IsKeyboardFocusable
166
- except Exception:
167
- return False
168
-
169
- def element_has_child_element(node:Control,control_type:str,child_control_type:str):
170
- if node.LocalizedControlType==control_type:
171
- first_child=node.GetFirstChildControl()
172
- if first_child is None:
173
- return False
174
- return first_child.LocalizedControlType==child_control_type
175
-
176
- def group_has_no_name(node:Control):
177
- try:
178
- if node.ControlTypeName=='GroupControl':
179
- if not node.Name.strip():
180
- return True
181
- return False
182
- except Exception:
183
- return False
184
-
185
- def is_element_scrollable(node:Control):
186
- try:
187
- if (node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|INFORMATIVE_CONTROL_TYPE_NAMES) or node.IsOffscreen:
188
- return False
189
- scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
190
- if scroll_pattern is None:
191
- return False
192
- return scroll_pattern.VerticallyScrollable
193
- except Exception:
194
- return False
195
-
196
- def is_element_interactive(node:Control):
197
- try:
198
- if is_browser and node.ControlTypeName in set(['DataItemControl','ListItemControl']) and not is_keyboard_focusable(node):
199
- return False
200
- elif not is_browser and node.ControlTypeName=="ImageControl" and is_keyboard_focusable(node):
201
- return True
202
- elif node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|DOCUMENT_CONTROL_TYPE_NAMES:
203
- return is_element_visible(node) and is_element_enabled(node) and (not is_element_image(node) or is_keyboard_focusable(node))
204
- elif node.ControlTypeName=='GroupControl':
205
- if is_browser:
206
- return is_element_visible(node) and is_element_enabled(node) and (is_default_action(node) or is_keyboard_focusable(node))
207
- # else:
208
- # return is_element_visible and is_element_enabled(node) and is_default_action(node)
209
- except Exception:
210
- return False
211
- return False
212
-
213
- def dom_correction(node:Control):
214
- if element_has_child_element(node,'list item','link') or element_has_child_element(node,'item','link'):
215
- dom_interactive_nodes.pop()
216
- return None
217
- elif node.ControlTypeName=='GroupControl':
218
- dom_interactive_nodes.pop()
219
- if is_keyboard_focusable(node):
220
- child=node
221
- try:
222
- while child.GetFirstChildControl() is not None:
223
- if child.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES:
224
- return None
225
- child=child.GetFirstChildControl()
226
- except Exception:
227
- return None
228
- if child.ControlTypeName!='TextControl':
229
- return None
230
- legacy_pattern=node.GetLegacyIAccessiblePattern()
231
- value=legacy_pattern.Value
232
- element_bounding_box = node.BoundingRectangle
233
- bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
234
- center = bounding_box.get_center()
235
- is_focused=node.HasKeyboardFocus
236
- dom_interactive_nodes.append(TreeElementNode(**{
237
- 'name':child.Name.strip(),
238
- 'control_type':node.LocalizedControlType,
239
- 'value':value,
240
- 'shortcut':node.AcceleratorKey,
241
- 'bounding_box':bounding_box,
242
- 'xpath':'',
243
- 'center':center,
244
- 'app_name':app_name,
245
- 'is_focused':is_focused
246
- }))
247
- elif element_has_child_element(node,'link','heading'):
248
- dom_interactive_nodes.pop()
249
- node=node.GetFirstChildControl()
250
- control_type='link'
251
- legacy_pattern=node.GetLegacyIAccessiblePattern()
252
- value=legacy_pattern.Value
253
- element_bounding_box = node.BoundingRectangle
254
- bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
255
- center = bounding_box.get_center()
256
- is_focused=node.HasKeyboardFocus
257
- dom_interactive_nodes.append(TreeElementNode(**{
258
- 'name':node.Name.strip(),
259
- 'control_type':control_type,
260
- 'value':node.Name.strip(),
261
- 'shortcut':node.AcceleratorKey,
262
- 'bounding_box':bounding_box,
263
- 'xpath':'',
264
- 'center':center,
265
- 'app_name':app_name,
266
- 'is_focused':is_focused
267
- }))
268
-
269
- def tree_traversal(node: Control,is_dom:bool=False,is_dialog:bool=False):
270
- # Checks to skip the nodes that are not interactive
271
- if node.IsOffscreen and (node.ControlTypeName not in set(["GroupControl","EditControl","TitleBarControl"])) and node.ClassName not in set(["Popup","Windows.UI.Core.CoreComponentInputSource"]):
272
- return None
273
-
274
- if is_element_scrollable(node):
275
- scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
276
- box = node.BoundingRectangle
277
- # Get the center
278
- x,y=random_point_within_bounding_box(node=node,scale_factor=0.8)
279
- center = Center(x=x,y=y)
280
- scrollable_nodes.append(ScrollElementNode(**{
281
- 'name':node.Name.strip() or node.AutomationId or node.LocalizedControlType.capitalize() or "''",
282
- 'app_name':app_name,
283
- 'control_type':node.LocalizedControlType.title(),
284
- 'bounding_box':BoundingBox(**{
285
- 'left':box.left,
286
- 'top':box.top,
287
- 'right':box.right,
288
- 'bottom':box.bottom,
289
- 'width':box.width(),
290
- 'height':box.height()
291
- }),
292
- 'center':center,
293
- 'xpath':'',
294
- 'horizontal_scrollable':scroll_pattern.HorizontallyScrollable,
295
- 'horizontal_scroll_percent':scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
296
- 'vertical_scrollable':scroll_pattern.VerticallyScrollable,
297
- 'vertical_scroll_percent':scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0,
298
- 'is_focused':node.HasKeyboardFocus
299
- }))
300
-
301
- if is_element_interactive(node):
302
- legacy_pattern=node.GetLegacyIAccessiblePattern()
303
- value=legacy_pattern.Value.strip() if legacy_pattern.Value is not None else ""
304
- is_focused=node.HasKeyboardFocus
305
- name=node.Name.strip()
306
- element_bounding_box = node.BoundingRectangle
307
- if is_browser and is_dom:
308
- bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
309
- center = bounding_box.get_center()
310
- tree_node=TreeElementNode(**{
311
- 'name':name,
312
- 'control_type':node.LocalizedControlType.title(),
313
- 'value':value,
314
- 'shortcut':node.AcceleratorKey,
315
- 'bounding_box':bounding_box,
316
- 'center':center,
317
- 'xpath':'',
318
- 'app_name':app_name,
319
- 'is_focused':is_focused
320
- })
321
- dom_interactive_nodes.append(tree_node)
322
- dom_correction(node=node)
323
- else:
324
- bounding_box=self.iou_bounding_box(window_bounding_box,element_bounding_box)
325
- center = bounding_box.get_center()
326
- tree_node=TreeElementNode(**{
327
- 'name':name,
328
- 'control_type':node.LocalizedControlType.title(),
329
- 'value':value,
330
- 'shortcut':node.AcceleratorKey,
331
- 'bounding_box':bounding_box,
332
- 'center':center,
333
- 'xpath':'',
334
- 'app_name':app_name,
335
- 'is_focused':is_focused
336
- })
337
- interactive_nodes.append(tree_node)
338
- elif is_element_text(node):
339
- dom_informative_nodes.append(TextElementNode(
340
- text=node.Name.strip(),
341
- ))
342
-
343
- children=node.GetChildren()
344
-
345
- # Recursively traverse the tree the right to left for normal apps and for DOM traverse from left to right
346
- for child in (children if is_dom else children[::-1]):
347
- # Incrementally building the xpath
348
-
349
- # Check if the child is a DOM element
350
- if is_browser and child.AutomationId == "RootWebArea":
351
- bounding_box=child.BoundingRectangle
352
- self.dom_bounding_box=BoundingBox(left=bounding_box.left,top=bounding_box.top,
353
- right=bounding_box.right,bottom=bounding_box.bottom,width=bounding_box.width(),
354
- height=bounding_box.height())
355
- scroll_pattern=child.GetPattern(PatternId.ScrollPattern)
356
- self.dom_info=DOMInfo(
357
- horizontal_scrollable=scroll_pattern.HorizontallyScrollable,
358
- horizontal_scroll_percent=scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
359
- vertical_scrollable=scroll_pattern.VerticallyScrollable,
360
- vertical_scroll_percent=scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0
361
- )
362
- # enter DOM subtree
363
- tree_traversal(child, is_dom=True, is_dialog=is_dialog)
364
- # Check if the child is a dialog
365
- elif isinstance(child,WindowControl):
366
- if not child.IsOffscreen:
367
- if is_dom:
368
- bounding_box=child.BoundingRectangle
369
- if bounding_box.width() > 0.8*self.dom_bounding_box.width:
370
- # Because this window element covers the majority of the screen
371
- dom_interactive_nodes.clear()
372
- else:
373
- if is_window_modal(child):
374
- # Because this window element is modal
375
- interactive_nodes.clear()
376
- # enter dialog subtree
377
- tree_traversal(child, is_dom=is_dom, is_dialog=True)
378
- else:
379
- # normal non-dialog children
380
- tree_traversal(child, is_dom=is_dom, is_dialog=is_dialog)
381
-
382
- interactive_nodes, dom_interactive_nodes, scrollable_nodes, dom_informative_nodes = [], [], [], []
383
- app_name=node.Name.strip()
384
- match node.ClassName:
385
- case "Progman":
386
- app_name="Desktop"
387
- case 'Shell_TrayWnd'|'Shell_SecondaryTrayWnd':
388
- app_name="Taskbar"
389
- case 'Microsoft.UI.Content.PopupWindowSiteBridge':
390
- app_name="Context Menu"
391
- case _:
392
- pass
393
- tree_traversal(node,is_dom=False,is_dialog=False)
394
-
395
- logger.debug(f'Interactive nodes:{len(interactive_nodes)}')
396
- logger.debug(f'DOM interactive nodes:{len(dom_interactive_nodes)}')
397
- logger.debug(f'Scrollable nodes:{len(scrollable_nodes)}')
398
-
399
- if use_dom:
400
- if is_browser:
401
- return (dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
402
- else:
403
- return ([],[],[])
404
- else:
405
- return (interactive_nodes+dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
406
-
407
- def get_annotated_screenshot(self, nodes: list[TreeElementNode],scale:float=1.0) -> Image.Image:
408
- screenshot = self.desktop.get_screenshot()
409
- sleep(0.10)
410
-
411
- original_width = screenshot.width
412
- original_height = screenshot.height
413
-
414
- scaled_width = int(original_width * scale)
415
- scaled_height = int(original_height * scale)
416
- screenshot = screenshot.resize((scaled_width, scaled_height), Image.Resampling.LANCZOS)
417
-
418
- # Add padding
419
- padding = 5
420
- width = int(screenshot.width + (1.5 * padding))
421
- height = int(screenshot.height + (1.5 * padding))
422
- padded_screenshot = Image.new("RGB", (width, height), color=(255, 255, 255))
423
- padded_screenshot.paste(screenshot, (padding, padding))
424
-
425
- draw = ImageDraw.Draw(padded_screenshot)
426
- font_size = 12
427
- try:
428
- font = ImageFont.truetype('arial.ttf', font_size)
429
- except IOError:
430
- font = ImageFont.load_default()
431
-
432
- def get_random_color():
433
- return "#{:06x}".format(random.randint(0, 0xFFFFFF))
434
-
435
- def draw_annotation(label, node: TreeElementNode):
436
- box = node.bounding_box
437
- color = get_random_color()
438
-
439
- # Scale and pad the bounding box coordinates
440
- adjusted_box = (
441
- int(box.left * scale) + padding,
442
- int(box.top * scale) + padding,
443
- int(box.right * scale) + padding,
444
- int(box.bottom * scale) + padding
445
- )
446
- # Draw bounding box
447
- draw.rectangle(adjusted_box, outline=color, width=2)
448
-
449
- # Label dimensions
450
- label_width = draw.textlength(str(label), font=font)
451
- label_height = font_size
452
- left, top, right, bottom = adjusted_box
453
-
454
- # Label position above bounding box
455
- label_x1 = right - label_width
456
- label_y1 = top - label_height - 4
457
- label_x2 = label_x1 + label_width
458
- label_y2 = label_y1 + label_height + 4
459
-
460
- # Draw label background and text
461
- draw.rectangle([(label_x1, label_y1), (label_x2, label_y2)], fill=color)
462
- draw.text((label_x1 + 2, label_y1 + 2), str(label), fill=(255, 255, 255), font=font)
463
-
464
- # Draw annotations in parallel
465
- with ThreadPoolExecutor() as executor:
466
- executor.map(draw_annotation, range(len(nodes)), nodes)
1
+ from windows_mcp.tree.config import INTERACTIVE_CONTROL_TYPE_NAMES,DOCUMENT_CONTROL_TYPE_NAMES,INFORMATIVE_CONTROL_TYPE_NAMES, DEFAULT_ACTIONS, THREAD_MAX_RETRIES
2
+ from windows_mcp.tree.views import TreeElementNode, ScrollElementNode, TextElementNode, Center, BoundingBox, TreeState, DOMInfo
3
+ from windows_mcp.uia import Control,ImageControl,ScrollPattern,WindowControl,Rect,GetRootControl,PatternId
4
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from windows_mcp.tree.utils import random_point_within_bounding_box
6
+ from PIL import Image, ImageFont, ImageDraw
7
+ from typing import TYPE_CHECKING,Optional
8
+ from windows_mcp.desktop.views import App
9
+ from time import sleep,time
10
+ import logging
11
+ import random
12
+
13
+ logger = logging.getLogger(__name__)
14
+ logger.setLevel(logging.INFO)
15
+ handler = logging.StreamHandler()
16
+ formatter = logging.Formatter('[%(levelname)s] %(message)s')
17
+ handler.setFormatter(formatter)
18
+ logger.addHandler(handler)
19
+
20
+ if TYPE_CHECKING:
21
+ from windows_mcp.desktop.service import Desktop
22
+
23
+ class Tree:
24
+ def __init__(self,desktop:'Desktop'):
25
+ self.desktop=desktop
26
+ self.screen_size=self.desktop.get_screen_size()
27
+ self.dom_info:Optional[DOMInfo]=None
28
+ self.dom_bounding_box:BoundingBox=None
29
+ self.screen_box=BoundingBox(
30
+ top=0, left=0, bottom=self.screen_size.height, right=self.screen_size.width,
31
+ width=self.screen_size.width, height=self.screen_size.height
32
+ )
33
+
34
+ def get_state(self,active_app:App,other_apps:list[App],use_dom:bool=False)->TreeState:
35
+ root=GetRootControl()
36
+ other_apps_handle=set(map(lambda other_app: other_app.handle,other_apps))
37
+ apps=list(filter(lambda app:app.NativeWindowHandle not in other_apps_handle,root.GetChildren()))
38
+ del other_apps_handle
39
+ if active_app:
40
+ apps=list(filter(lambda app:app.ClassName!='Progman',apps))
41
+ interactive_nodes,scrollable_nodes,dom_informative_nodes=self.get_appwise_nodes(apps=apps,use_dom=use_dom)
42
+ return TreeState(dom_info=self.dom_info,interactive_nodes=interactive_nodes,scrollable_nodes=scrollable_nodes,dom_informative_nodes=dom_informative_nodes)
43
+
44
+ def get_appwise_nodes(self,apps:list[Control],use_dom:bool=False)-> tuple[list[TreeElementNode],list[ScrollElementNode],list[TextElementNode]]:
45
+ interactive_nodes, scrollable_nodes,dom_informative_nodes = [], [], []
46
+ with ThreadPoolExecutor() as executor:
47
+ retry_counts = {app: 0 for app in apps}
48
+ future_to_app = {
49
+ executor.submit(
50
+ self.get_nodes, app,
51
+ self.desktop.is_app_browser(app),
52
+ use_dom
53
+ ): app
54
+ for app in apps
55
+ }
56
+ while future_to_app: # keep running until no pending futures
57
+ for future in as_completed(list(future_to_app)):
58
+ app = future_to_app.pop(future) # remove completed future
59
+ try:
60
+ result = future.result()
61
+ if result:
62
+ element_nodes, scroll_nodes,informative_nodes = result
63
+ interactive_nodes.extend(element_nodes)
64
+ scrollable_nodes.extend(scroll_nodes)
65
+ dom_informative_nodes.extend(informative_nodes)
66
+ except Exception as e:
67
+ retry_counts[app] += 1
68
+ logger.debug(f"Error in processing node {app.Name}, retry attempt {retry_counts[app]}\nError: {e}")
69
+ if retry_counts[app] < THREAD_MAX_RETRIES:
70
+ logger.debug(f"Retrying {app.Name} for the {retry_counts[app]}th time")
71
+ new_future = executor.submit(self.get_nodes, app, self.desktop.is_app_browser(app),use_dom)
72
+ future_to_app[new_future] = app
73
+ else:
74
+ logger.error(f"Task failed completely for {app.Name} after {THREAD_MAX_RETRIES} retries")
75
+ return interactive_nodes,scrollable_nodes,dom_informative_nodes
76
+
77
+ def iou_bounding_box(self,window_box: Rect,element_box: Rect,) -> BoundingBox:
78
+ # Step 1: Intersection of element and window (existing logic)
79
+ intersection_left = max(window_box.left, element_box.left)
80
+ intersection_top = max(window_box.top, element_box.top)
81
+ intersection_right = min(window_box.right, element_box.right)
82
+ intersection_bottom = min(window_box.bottom, element_box.bottom)
83
+
84
+ # Step 2: Clamp to screen boundaries (new addition)
85
+ intersection_left = max(self.screen_box.left, intersection_left)
86
+ intersection_top = max(self.screen_box.top, intersection_top)
87
+ intersection_right = min(self.screen_box.right, intersection_right)
88
+ intersection_bottom = min(self.screen_box.bottom, intersection_bottom)
89
+
90
+ # Step 3: Validate intersection
91
+ if (intersection_right > intersection_left and intersection_bottom > intersection_top):
92
+ bounding_box = BoundingBox(
93
+ left=intersection_left,
94
+ top=intersection_top,
95
+ right=intersection_right,
96
+ bottom=intersection_bottom,
97
+ width=intersection_right - intersection_left,
98
+ height=intersection_bottom - intersection_top
99
+ )
100
+ else:
101
+ # No valid visible intersection (either outside window or screen)
102
+ bounding_box = BoundingBox(
103
+ left=0,
104
+ top=0,
105
+ right=0,
106
+ bottom=0,
107
+ width=0,
108
+ height=0
109
+ )
110
+ return bounding_box
111
+
112
+ def get_nodes(self, node: Control, is_browser:bool=False,use_dom:bool=False) -> tuple[list[TreeElementNode],list[ScrollElementNode]]:
113
+ window_bounding_box=node.BoundingRectangle
114
+
115
+ def is_element_visible(node:Control,threshold:int=0):
116
+ is_control=node.IsControlElement
117
+ box=node.BoundingRectangle
118
+ if box.isempty():
119
+ return False
120
+ width=box.width()
121
+ height=box.height()
122
+ area=width*height
123
+ is_offscreen=(not node.IsOffscreen) or node.ControlTypeName in ['EditControl']
124
+ return area > threshold and is_offscreen and is_control
125
+
126
+ def is_element_enabled(node:Control):
127
+ try:
128
+ return node.IsEnabled
129
+ except Exception:
130
+ return False
131
+
132
+ def is_default_action(node:Control):
133
+ legacy_pattern=node.GetLegacyIAccessiblePattern()
134
+ default_action=legacy_pattern.DefaultAction.title()
135
+ if default_action in DEFAULT_ACTIONS:
136
+ return True
137
+ return False
138
+
139
+ def is_element_image(node:Control):
140
+ if isinstance(node,ImageControl):
141
+ if node.LocalizedControlType=='graphic' or not node.IsKeyboardFocusable:
142
+ return True
143
+ return False
144
+
145
+ def is_element_text(node:Control):
146
+ try:
147
+ if node.ControlTypeName in INFORMATIVE_CONTROL_TYPE_NAMES:
148
+ if is_element_visible(node) and is_element_enabled(node) and not is_element_image(node):
149
+ return True
150
+ except Exception:
151
+ return False
152
+ return False
153
+
154
+ def is_window_modal(node:WindowControl):
155
+ try:
156
+ window_pattern=node.GetWindowPattern()
157
+ return window_pattern.IsModal
158
+ except Exception:
159
+ return False
160
+
161
+ def is_keyboard_focusable(node:Control):
162
+ try:
163
+ if node.ControlTypeName in set(['EditControl','ButtonControl','CheckBoxControl','RadioButtonControl','TabItemControl']):
164
+ return True
165
+ return node.IsKeyboardFocusable
166
+ except Exception:
167
+ return False
168
+
169
+ def element_has_child_element(node:Control,control_type:str,child_control_type:str):
170
+ if node.LocalizedControlType==control_type:
171
+ first_child=node.GetFirstChildControl()
172
+ if first_child is None:
173
+ return False
174
+ return first_child.LocalizedControlType==child_control_type
175
+
176
+ def group_has_no_name(node:Control):
177
+ try:
178
+ if node.ControlTypeName=='GroupControl':
179
+ if not node.Name.strip():
180
+ return True
181
+ return False
182
+ except Exception:
183
+ return False
184
+
185
+ def is_element_scrollable(node:Control):
186
+ try:
187
+ if (node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|INFORMATIVE_CONTROL_TYPE_NAMES) or node.IsOffscreen:
188
+ return False
189
+ scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
190
+ if scroll_pattern is None:
191
+ return False
192
+ return scroll_pattern.VerticallyScrollable
193
+ except Exception:
194
+ return False
195
+
196
+ def is_element_interactive(node:Control):
197
+ try:
198
+ if is_browser and node.ControlTypeName in set(['DataItemControl','ListItemControl']) and not is_keyboard_focusable(node):
199
+ return False
200
+ elif not is_browser and node.ControlTypeName=="ImageControl" and is_keyboard_focusable(node):
201
+ return True
202
+ elif node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|DOCUMENT_CONTROL_TYPE_NAMES:
203
+ return is_element_visible(node) and is_element_enabled(node) and (not is_element_image(node) or is_keyboard_focusable(node))
204
+ elif node.ControlTypeName=='GroupControl':
205
+ if is_browser:
206
+ return is_element_visible(node) and is_element_enabled(node) and (is_default_action(node) or is_keyboard_focusable(node))
207
+ # else:
208
+ # return is_element_visible and is_element_enabled(node) and is_default_action(node)
209
+ except Exception:
210
+ return False
211
+ return False
212
+
213
+ def dom_correction(node:Control):
214
+ if element_has_child_element(node,'list item','link') or element_has_child_element(node,'item','link'):
215
+ dom_interactive_nodes.pop()
216
+ return None
217
+ elif node.ControlTypeName=='GroupControl':
218
+ dom_interactive_nodes.pop()
219
+ if is_keyboard_focusable(node):
220
+ child=node
221
+ try:
222
+ while child.GetFirstChildControl() is not None:
223
+ if child.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES:
224
+ return None
225
+ child=child.GetFirstChildControl()
226
+ except Exception:
227
+ return None
228
+ if child.ControlTypeName!='TextControl':
229
+ return None
230
+ legacy_pattern=node.GetLegacyIAccessiblePattern()
231
+ value=legacy_pattern.Value
232
+ element_bounding_box = node.BoundingRectangle
233
+ bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
234
+ center = bounding_box.get_center()
235
+ is_focused=node.HasKeyboardFocus
236
+ dom_interactive_nodes.append(TreeElementNode(**{
237
+ 'name':child.Name.strip(),
238
+ 'control_type':node.LocalizedControlType,
239
+ 'value':value,
240
+ 'shortcut':node.AcceleratorKey,
241
+ 'bounding_box':bounding_box,
242
+ 'xpath':'',
243
+ 'center':center,
244
+ 'app_name':app_name,
245
+ 'is_focused':is_focused
246
+ }))
247
+ elif element_has_child_element(node,'link','heading'):
248
+ dom_interactive_nodes.pop()
249
+ node=node.GetFirstChildControl()
250
+ control_type='link'
251
+ legacy_pattern=node.GetLegacyIAccessiblePattern()
252
+ value=legacy_pattern.Value
253
+ element_bounding_box = node.BoundingRectangle
254
+ bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
255
+ center = bounding_box.get_center()
256
+ is_focused=node.HasKeyboardFocus
257
+ dom_interactive_nodes.append(TreeElementNode(**{
258
+ 'name':node.Name.strip(),
259
+ 'control_type':control_type,
260
+ 'value':node.Name.strip(),
261
+ 'shortcut':node.AcceleratorKey,
262
+ 'bounding_box':bounding_box,
263
+ 'xpath':'',
264
+ 'center':center,
265
+ 'app_name':app_name,
266
+ 'is_focused':is_focused
267
+ }))
268
+
269
+ def tree_traversal(node: Control,is_dom:bool=False,is_dialog:bool=False):
270
+ # Checks to skip the nodes that are not interactive
271
+ if node.IsOffscreen and (node.ControlTypeName not in set(["GroupControl","EditControl","TitleBarControl"])) and node.ClassName not in set(["Popup","Windows.UI.Core.CoreComponentInputSource"]):
272
+ return None
273
+
274
+ if is_element_scrollable(node):
275
+ scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
276
+ box = node.BoundingRectangle
277
+ # Get the center
278
+ x,y=random_point_within_bounding_box(node=node,scale_factor=0.8)
279
+ center = Center(x=x,y=y)
280
+ scrollable_nodes.append(ScrollElementNode(**{
281
+ 'name':node.Name.strip() or node.AutomationId or node.LocalizedControlType.capitalize() or "''",
282
+ 'app_name':app_name,
283
+ 'control_type':node.LocalizedControlType.title(),
284
+ 'bounding_box':BoundingBox(**{
285
+ 'left':box.left,
286
+ 'top':box.top,
287
+ 'right':box.right,
288
+ 'bottom':box.bottom,
289
+ 'width':box.width(),
290
+ 'height':box.height()
291
+ }),
292
+ 'center':center,
293
+ 'xpath':'',
294
+ 'horizontal_scrollable':scroll_pattern.HorizontallyScrollable,
295
+ 'horizontal_scroll_percent':scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
296
+ 'vertical_scrollable':scroll_pattern.VerticallyScrollable,
297
+ 'vertical_scroll_percent':scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0,
298
+ 'is_focused':node.HasKeyboardFocus
299
+ }))
300
+
301
+ if is_element_interactive(node):
302
+ legacy_pattern=node.GetLegacyIAccessiblePattern()
303
+ value=legacy_pattern.Value.strip() if legacy_pattern.Value is not None else ""
304
+ is_focused=node.HasKeyboardFocus
305
+ name=node.Name.strip()
306
+ element_bounding_box = node.BoundingRectangle
307
+ if is_browser and is_dom:
308
+ bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
309
+ center = bounding_box.get_center()
310
+ tree_node=TreeElementNode(**{
311
+ 'name':name,
312
+ 'control_type':node.LocalizedControlType.title(),
313
+ 'value':value,
314
+ 'shortcut':node.AcceleratorKey,
315
+ 'bounding_box':bounding_box,
316
+ 'center':center,
317
+ 'xpath':'',
318
+ 'app_name':app_name,
319
+ 'is_focused':is_focused
320
+ })
321
+ dom_interactive_nodes.append(tree_node)
322
+ dom_correction(node=node)
323
+ else:
324
+ bounding_box=self.iou_bounding_box(window_bounding_box,element_bounding_box)
325
+ center = bounding_box.get_center()
326
+ tree_node=TreeElementNode(**{
327
+ 'name':name,
328
+ 'control_type':node.LocalizedControlType.title(),
329
+ 'value':value,
330
+ 'shortcut':node.AcceleratorKey,
331
+ 'bounding_box':bounding_box,
332
+ 'center':center,
333
+ 'xpath':'',
334
+ 'app_name':app_name,
335
+ 'is_focused':is_focused
336
+ })
337
+ interactive_nodes.append(tree_node)
338
+ elif is_element_text(node):
339
+ dom_informative_nodes.append(TextElementNode(
340
+ text=node.Name.strip(),
341
+ ))
342
+
343
+ children=node.GetChildren()
344
+
345
+ # Recursively traverse the tree the right to left for normal apps and for DOM traverse from left to right
346
+ for child in (children if is_dom else children[::-1]):
347
+ # Incrementally building the xpath
348
+
349
+ # Check if the child is a DOM element
350
+ if is_browser and child.AutomationId == "RootWebArea":
351
+ bounding_box=child.BoundingRectangle
352
+ self.dom_bounding_box=BoundingBox(left=bounding_box.left,top=bounding_box.top,
353
+ right=bounding_box.right,bottom=bounding_box.bottom,width=bounding_box.width(),
354
+ height=bounding_box.height())
355
+ scroll_pattern=child.GetPattern(PatternId.ScrollPattern)
356
+ self.dom_info=DOMInfo(
357
+ horizontal_scrollable=scroll_pattern.HorizontallyScrollable,
358
+ horizontal_scroll_percent=scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
359
+ vertical_scrollable=scroll_pattern.VerticallyScrollable,
360
+ vertical_scroll_percent=scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0
361
+ )
362
+ # enter DOM subtree
363
+ tree_traversal(child, is_dom=True, is_dialog=is_dialog)
364
+ # Check if the child is a dialog
365
+ elif isinstance(child,WindowControl):
366
+ if not child.IsOffscreen:
367
+ if is_dom:
368
+ bounding_box=child.BoundingRectangle
369
+ if bounding_box.width() > 0.8*self.dom_bounding_box.width:
370
+ # Because this window element covers the majority of the screen
371
+ dom_interactive_nodes.clear()
372
+ else:
373
+ if is_window_modal(child):
374
+ # Because this window element is modal
375
+ interactive_nodes.clear()
376
+ # enter dialog subtree
377
+ tree_traversal(child, is_dom=is_dom, is_dialog=True)
378
+ else:
379
+ # normal non-dialog children
380
+ tree_traversal(child, is_dom=is_dom, is_dialog=is_dialog)
381
+
382
+ interactive_nodes, dom_interactive_nodes, scrollable_nodes, dom_informative_nodes = [], [], [], []
383
+ app_name=node.Name.strip()
384
+ match node.ClassName:
385
+ case "Progman":
386
+ app_name="Desktop"
387
+ case 'Shell_TrayWnd'|'Shell_SecondaryTrayWnd':
388
+ app_name="Taskbar"
389
+ case 'Microsoft.UI.Content.PopupWindowSiteBridge':
390
+ app_name="Context Menu"
391
+ case _:
392
+ pass
393
+ tree_traversal(node,is_dom=False,is_dialog=False)
394
+
395
+ logger.debug(f'Interactive nodes:{len(interactive_nodes)}')
396
+ logger.debug(f'DOM interactive nodes:{len(dom_interactive_nodes)}')
397
+ logger.debug(f'Scrollable nodes:{len(scrollable_nodes)}')
398
+
399
+ if use_dom:
400
+ if is_browser:
401
+ return (dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
402
+ else:
403
+ return ([],[],[])
404
+ else:
405
+ return (interactive_nodes+dom_interactive_nodes,scrollable_nodes,dom_informative_nodes)
406
+
407
+ def _on_focus_change(self, sender:'ctypes.POINTER(IUIAutomationElement)'):
408
+ """Handle focus change events."""
409
+ # Debounce duplicate events
410
+ current_time = time()
411
+ element = Control.CreateControlFromElement(sender)
412
+ runtime_id=element.GetRuntimeId()
413
+ event_key = tuple(runtime_id)
414
+ if hasattr(self, '_last_focus_event') and self._last_focus_event:
415
+ last_key, last_time = self._last_focus_event
416
+ if last_key == event_key and (current_time - last_time) < 1.0:
417
+ return None
418
+ self._last_focus_event = (event_key, current_time)
419
+
420
+ try:
421
+ logger.debug(f"[WatchDog] Focus changed to: '{element.Name}' ({element.ControlTypeName})")
422
+ except Exception:
423
+ pass
424
+
425
+ def _on_structure_change(self, sender:'ctypes.POINTER(IUIAutomationElement)', changeType:int, runtime_id:list[int]):
426
+ """Handle structure change events."""
427
+ try:
428
+ # Debounce duplicate events
429
+ current_time = time()
430
+ event_key = (changeType, tuple(runtime_id))
431
+ if hasattr(self, '_last_structure_event') and self._last_structure_event:
432
+ last_key, last_time = self._last_structure_event
433
+ if last_key == event_key and (current_time - last_time) < 5.0:
434
+ return None
435
+ self._last_structure_event = (event_key, current_time)
436
+
437
+ node = Control.CreateControlFromElement(sender)
438
+
439
+ match StructureChangeType(changeType):
440
+ case StructureChangeType.StructureChangeType_ChildAdded|StructureChangeType.StructureChangeType_ChildrenBulkAdded:
441
+ interactive_nodes=[]
442
+ app=self.desktop.get_app_from_element(node)
443
+ app_name=self.app_name_correction(app.name if app else node.Name.strip())
444
+ is_browser=app.is_browser if app else False
445
+ if isinstance(node,WindowControl|PaneControl):
446
+ #Subtree traversal
447
+ window_bounding_box=app.bounding_box if app else node.BoundingRectangle
448
+ self.tree_traversal(node,window_bounding_box,app_name,is_browser,interactive_nodes=interactive_nodes)
449
+ else:
450
+ #If element is interactive take it else skip it
451
+ if not self.is_element_interactive(node=node,is_browser=is_browser):
452
+ return None
453
+ legacy_pattern=node.GetLegacyIAccessiblePattern()
454
+ value=legacy_pattern.Value.strip() if legacy_pattern.Value is not None else ""
455
+ cursor_type=AccessibleRoleNames.get(legacy_pattern.Role, "Default")
456
+ runtime_id=node.GetRuntimeId()
457
+ is_focused=node.HasKeyboardFocus
458
+ name=node.Name.strip()
459
+ element_bounding_box = node.BoundingRectangle
460
+ bounding_box=self.iou_bounding_box(window_bounding_box,element_bounding_box)
461
+ center = bounding_box.get_center()
462
+
463
+ interactive_nodes.append(TreeElementNode(
464
+ name=name,
465
+ control_type=cursor_type,
466
+ bounding_box=bounding_box,
467
+ center=center,
468
+ runtime_id=runtime_id,
469
+ app_name=app_name,
470
+ value=value,
471
+ shortcut="",
472
+ xpath="",
473
+ is_focused=is_focused
474
+ ))
475
+ if self.tree_state:
476
+ existing_ids={n.runtime_id for n in self.tree_state.interactive_nodes}
477
+ interactive_nodes=[n for n in interactive_nodes if n.runtime_id not in existing_ids]
478
+ self.tree_state.interactive_nodes.extend(interactive_nodes)
479
+ case StructureChangeType.StructureChangeType_ChildrenBulkRemoved | StructureChangeType.StructureChangeType_ChildRemoved:
480
+ if changeType == StructureChangeType.StructureChangeType_ChildRemoved and self.tree_state:
481
+ if isinstance(node,WindowControl|PaneControl):
482
+ parent_bounding_box=BoundingBox.from_bounding_rectangle(node.BoundingRectangle)
483
+ # Remove nodes spatially contained in the parent (heuristic for "is descendant")
484
+ def is_contained(n:'TreeElementNode'):
485
+ cx, cy = n.center.x, n.center.y
486
+ return (parent_bounding_box.left <= cx <= parent_bounding_box.right and
487
+ parent_bounding_box.top <= cy <= parent_bounding_box.bottom)
488
+ self.tree_state.interactive_nodes = list(filter(lambda n:not is_contained(n),self.tree_state.interactive_nodes))
489
+ else:
490
+ target_runtime_id = tuple(runtime_id)
491
+ self.tree_state.interactive_nodes = list(filter(lambda n:n.runtime_id != target_runtime_id,self.tree_state.interactive_nodes))
492
+ case StructureChangeType.StructureChangeType_ChildrenInvalidated:
493
+ #Rebuild subtree
494
+ parent_bounding_box=BoundingBox.from_bounding_rectangle(node.BoundingRectangle)
495
+ app=self.desktop.get_app_from_element(node)
496
+ app_name=self.app_name_correction(app.name if app else node.Name.strip())
497
+ is_browser=app.is_browser if app else False
498
+ window_bounding_box=app.bounding_box if app else parent_bounding_box
499
+ interactive_nodes=[]
500
+ self.tree_traversal(node,window_bounding_box,app_name,is_browser,interactive_nodes=interactive_nodes)
501
+
502
+ # Remove nodes spatially contained in the parent (heuristic for "is descendant")
503
+ def is_contained(n:'TreeElementNode'):
504
+ cx, cy = n.center.x, n.center.y
505
+ return (parent_bounding_box.left <= cx <= parent_bounding_box.right and
506
+ parent_bounding_box.top <= cy <= parent_bounding_box.bottom)
507
+
508
+ if self.tree_state:
509
+ self.tree_state.interactive_nodes = list(filter(lambda n:not is_contained(n),self.tree_state.interactive_nodes))
510
+ self.tree_state.interactive_nodes.extend(interactive_nodes)
511
+ case StructureChangeType.StructureChangeType_ChildrenReordered:
512
+ app=self.desktop.get_app_from_element(node)
513
+ app_name=self.app_name_correction(app.name if app else node.Name.strip())
514
+ is_browser=app.is_browser if app else False
515
+ window_bounding_box=app.bounding_box if app else node.BoundingRectangle
516
+ interactive_nodes=[]
517
+ self.tree_traversal(node,window_bounding_box,app_name,is_browser,interactive_nodes=interactive_nodes)
518
+
519
+ # Update existing nodes
520
+ fresh_nodes_map = {n.runtime_id: n for n in interactive_nodes}
521
+ def update_node(existing_node:'TreeElementNode'):
522
+ if new_node:=fresh_nodes_map.get(existing_node.runtime_id):
523
+ existing_node.update_from_node(new_node)
524
+ list(map(update_node,self.tree_state.interactive_nodes))
525
+ except Exception as e:
526
+ logger.debug(f"[WatchDog] Structure changed with error: {e}, StructureChangeType={StructureChangeType(changeType).name}")
527
+
528
+ try:
529
+ logger.debug(f"[WatchDog] Structure changed: Type={StructureChangeType(changeType).name} RuntimeID={tuple(runtime_id)} Sender: '{node.Name}' ({node.ControlTypeName})")
530
+ except Exception:
531
+ pass
532
+
533
+ def _on_property_change(self, sender:'ctypes.POINTER(IUIAutomationElement)', propertyId:int, newValue):
534
+ """Handle property change events."""
535
+ try:
536
+ element = Control.CreateControlFromElement(sender)
537
+ logger.debug(f"[WatchDog] Property changed: ID={propertyId} Value={newValue} Element: '{element.Name}' ({element.ControlTypeName})")
538
+ except Exception:
539
+ pass
540
+
541
+ def get_annotated_screenshot(self, nodes: list[TreeElementNode],scale:float=1.0) -> Image.Image:
542
+ screenshot = self.desktop.get_screenshot()
543
+ sleep(0.10)
544
+
545
+ original_width = screenshot.width
546
+ original_height = screenshot.height
547
+
548
+ scaled_width = int(original_width * scale)
549
+ scaled_height = int(original_height * scale)
550
+ screenshot = screenshot.resize((scaled_width, scaled_height), Image.Resampling.LANCZOS)
551
+
552
+ # Add padding
553
+ padding = 5
554
+ width = int(screenshot.width + (1.5 * padding))
555
+ height = int(screenshot.height + (1.5 * padding))
556
+ padded_screenshot = Image.new("RGB", (width, height), color=(255, 255, 255))
557
+ padded_screenshot.paste(screenshot, (padding, padding))
558
+
559
+ draw = ImageDraw.Draw(padded_screenshot)
560
+ font_size = 12
561
+ try:
562
+ font = ImageFont.truetype('arial.ttf', font_size)
563
+ except IOError:
564
+ font = ImageFont.load_default()
565
+
566
+ def get_random_color():
567
+ return "#{:06x}".format(random.randint(0, 0xFFFFFF))
568
+
569
+ def draw_annotation(label, node: TreeElementNode):
570
+ box = node.bounding_box
571
+ color = get_random_color()
572
+
573
+ # Scale and pad the bounding box coordinates
574
+ adjusted_box = (
575
+ int(box.left * scale) + padding,
576
+ int(box.top * scale) + padding,
577
+ int(box.right * scale) + padding,
578
+ int(box.bottom * scale) + padding
579
+ )
580
+ # Draw bounding box
581
+ draw.rectangle(adjusted_box, outline=color, width=2)
582
+
583
+ # Label dimensions
584
+ label_width = draw.textlength(str(label), font=font)
585
+ label_height = font_size
586
+ left, top, right, bottom = adjusted_box
587
+
588
+ # Label position above bounding box
589
+ label_x1 = right - label_width
590
+ label_y1 = top - label_height - 4
591
+ label_x2 = label_x1 + label_width
592
+ label_y2 = label_y1 + label_height + 4
593
+
594
+ # Draw label background and text
595
+ draw.rectangle([(label_x1, label_y1), (label_x2, label_y2)], fill=color)
596
+ draw.text((label_x1 + 2, label_y1 + 2), str(label), fill=(255, 255, 255), font=font)
597
+
598
+ # Draw annotations in parallel
599
+ with ThreadPoolExecutor() as executor:
600
+ executor.map(draw_annotation, range(len(nodes)), nodes)
467
601
  return padded_screenshot