windows-mcp 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- main.py +263 -0
- src/__init__.py +0 -0
- src/desktop/__init__.py +0 -0
- src/desktop/config.py +21 -0
- src/desktop/service.py +454 -0
- src/desktop/views.py +58 -0
- src/tree/__init__.py +0 -0
- src/tree/config.py +51 -0
- src/tree/service.py +444 -0
- src/tree/utils.py +22 -0
- src/tree/views.py +102 -0
- windows_mcp-0.5.2.dist-info/METADATA +388 -0
- windows_mcp-0.5.2.dist-info/RECORD +16 -0
- windows_mcp-0.5.2.dist-info/WHEEL +4 -0
- windows_mcp-0.5.2.dist-info/entry_points.txt +2 -0
- windows_mcp-0.5.2.dist-info/licenses/LICENSE.md +21 -0
src/tree/service.py
ADDED
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
from src.tree.config import INTERACTIVE_CONTROL_TYPE_NAMES,DOCUMENT_CONTROL_TYPE_NAMES,INFORMATIVE_CONTROL_TYPE_NAMES, DEFAULT_ACTIONS, THREAD_MAX_RETRIES
|
|
2
|
+
from uiautomation import Control,ImageControl,ScrollPattern,WindowControl,Rect,GetRootControl,PatternId
|
|
3
|
+
from src.tree.views import TreeElementNode, ScrollElementNode, Center, BoundingBox, TreeState
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
+
from src.tree.utils import random_point_within_bounding_box
|
|
6
|
+
from PIL import Image, ImageFont, ImageDraw
|
|
7
|
+
from src.desktop.views import App
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
from time import sleep
|
|
10
|
+
import logging
|
|
11
|
+
import random
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
logger.setLevel(logging.INFO)
|
|
15
|
+
handler = logging.StreamHandler()
|
|
16
|
+
formatter = logging.Formatter('[%(levelname)s] %(message)s')
|
|
17
|
+
handler.setFormatter(formatter)
|
|
18
|
+
logger.addHandler(handler)
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from src.desktop.service import Desktop
|
|
22
|
+
|
|
23
|
+
class Tree:
|
|
24
|
+
def __init__(self,desktop:'Desktop'):
|
|
25
|
+
self.desktop=desktop
|
|
26
|
+
screen_size=self.desktop.get_screen_size()
|
|
27
|
+
self.dom_bounding_box:BoundingBox=None
|
|
28
|
+
self.screen_box=BoundingBox(
|
|
29
|
+
top=0, left=0, bottom=screen_size.height, right=screen_size.width,
|
|
30
|
+
width=screen_size.width, height=screen_size.height
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
def get_state(self,active_app:App,other_apps:list[App])->TreeState:
|
|
34
|
+
root=GetRootControl()
|
|
35
|
+
other_apps_handle=set(map(lambda other_app: other_app.handle,other_apps))
|
|
36
|
+
apps=list(filter(lambda app:app.NativeWindowHandle not in other_apps_handle,root.GetChildren()))
|
|
37
|
+
del other_apps_handle
|
|
38
|
+
if active_app:
|
|
39
|
+
apps=list(filter(lambda app:app.ClassName!='Progman',apps))
|
|
40
|
+
interactive_nodes,scrollable_nodes=self.get_appwise_nodes(apps=apps)
|
|
41
|
+
return TreeState(interactive_nodes=interactive_nodes,scrollable_nodes=scrollable_nodes)
|
|
42
|
+
|
|
43
|
+
def get_appwise_nodes(self,apps:list[Control]) -> tuple[list[TreeElementNode],list[ScrollElementNode]]:
|
|
44
|
+
interactive_nodes, scrollable_nodes = [], []
|
|
45
|
+
with ThreadPoolExecutor() as executor:
|
|
46
|
+
retry_counts = {app: 0 for app in apps}
|
|
47
|
+
future_to_app = {
|
|
48
|
+
executor.submit(
|
|
49
|
+
self.get_nodes, app,
|
|
50
|
+
self.desktop.is_app_browser(app)
|
|
51
|
+
): app
|
|
52
|
+
for app in apps
|
|
53
|
+
}
|
|
54
|
+
while future_to_app: # keep running until no pending futures
|
|
55
|
+
for future in as_completed(list(future_to_app)):
|
|
56
|
+
app = future_to_app.pop(future) # remove completed future
|
|
57
|
+
try:
|
|
58
|
+
result = future.result()
|
|
59
|
+
if result:
|
|
60
|
+
element_nodes, scroll_nodes = result
|
|
61
|
+
interactive_nodes.extend(element_nodes)
|
|
62
|
+
scrollable_nodes.extend(scroll_nodes)
|
|
63
|
+
except Exception as e:
|
|
64
|
+
retry_counts[app] += 1
|
|
65
|
+
logger.debug(f"Error in processing node {app.Name}, retry attempt {retry_counts[app]}\nError: {e}")
|
|
66
|
+
if retry_counts[app] < THREAD_MAX_RETRIES:
|
|
67
|
+
new_future = executor.submit(self.get_nodes, app, self.desktop.is_app_browser(app))
|
|
68
|
+
future_to_app[new_future] = app
|
|
69
|
+
else:
|
|
70
|
+
logger.error(f"Task failed completely for {app.Name} after {THREAD_MAX_RETRIES} retries")
|
|
71
|
+
return interactive_nodes,scrollable_nodes
|
|
72
|
+
|
|
73
|
+
def iou_bounding_box(self,window_box: Rect,element_box: Rect,) -> BoundingBox:
|
|
74
|
+
# Step 1: Intersection of element and window (existing logic)
|
|
75
|
+
intersection_left = max(window_box.left, element_box.left)
|
|
76
|
+
intersection_top = max(window_box.top, element_box.top)
|
|
77
|
+
intersection_right = min(window_box.right, element_box.right)
|
|
78
|
+
intersection_bottom = min(window_box.bottom, element_box.bottom)
|
|
79
|
+
|
|
80
|
+
# Step 2: Clamp to screen boundaries (new addition)
|
|
81
|
+
intersection_left = max(self.screen_box.left, intersection_left)
|
|
82
|
+
intersection_top = max(self.screen_box.top, intersection_top)
|
|
83
|
+
intersection_right = min(self.screen_box.right, intersection_right)
|
|
84
|
+
intersection_bottom = min(self.screen_box.bottom, intersection_bottom)
|
|
85
|
+
|
|
86
|
+
# Step 3: Validate intersection
|
|
87
|
+
if (intersection_right > intersection_left and intersection_bottom > intersection_top):
|
|
88
|
+
bounding_box = BoundingBox(
|
|
89
|
+
left=intersection_left,
|
|
90
|
+
top=intersection_top,
|
|
91
|
+
right=intersection_right,
|
|
92
|
+
bottom=intersection_bottom,
|
|
93
|
+
width=intersection_right - intersection_left,
|
|
94
|
+
height=intersection_bottom - intersection_top
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
# No valid visible intersection (either outside window or screen)
|
|
98
|
+
bounding_box = BoundingBox(
|
|
99
|
+
left=0,
|
|
100
|
+
top=0,
|
|
101
|
+
right=0,
|
|
102
|
+
bottom=0,
|
|
103
|
+
width=0,
|
|
104
|
+
height=0
|
|
105
|
+
)
|
|
106
|
+
return bounding_box
|
|
107
|
+
|
|
108
|
+
def get_nodes(self, node: Control, is_browser:bool=False) -> tuple[list[TreeElementNode],list[ScrollElementNode]]:
|
|
109
|
+
window_bounding_box=node.BoundingRectangle
|
|
110
|
+
|
|
111
|
+
def is_element_visible(node:Control,threshold:int=0):
|
|
112
|
+
is_control=node.IsControlElement
|
|
113
|
+
box=node.BoundingRectangle
|
|
114
|
+
if box.isempty():
|
|
115
|
+
return False
|
|
116
|
+
width=box.width()
|
|
117
|
+
height=box.height()
|
|
118
|
+
area=width*height
|
|
119
|
+
is_offscreen=(not node.IsOffscreen) or node.ControlTypeName in ['EditControl']
|
|
120
|
+
return area > threshold and is_offscreen and is_control
|
|
121
|
+
|
|
122
|
+
def is_element_enabled(node:Control):
|
|
123
|
+
try:
|
|
124
|
+
return node.IsEnabled
|
|
125
|
+
except Exception:
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
def is_default_action(node:Control):
|
|
129
|
+
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
130
|
+
default_action=legacy_pattern.DefaultAction.title()
|
|
131
|
+
if default_action in DEFAULT_ACTIONS:
|
|
132
|
+
return True
|
|
133
|
+
return False
|
|
134
|
+
|
|
135
|
+
def is_element_image(node:Control):
|
|
136
|
+
if isinstance(node,ImageControl):
|
|
137
|
+
if node.LocalizedControlType=='graphic' or not node.IsKeyboardFocusable:
|
|
138
|
+
return True
|
|
139
|
+
return False
|
|
140
|
+
|
|
141
|
+
def is_element_text(node:Control):
|
|
142
|
+
try:
|
|
143
|
+
if node.ControlTypeName in INFORMATIVE_CONTROL_TYPE_NAMES:
|
|
144
|
+
if is_element_visible(node) and is_element_enabled(node) and not is_element_image(node):
|
|
145
|
+
return True
|
|
146
|
+
except Exception:
|
|
147
|
+
return False
|
|
148
|
+
return False
|
|
149
|
+
|
|
150
|
+
def is_window_modal(node:WindowControl):
|
|
151
|
+
try:
|
|
152
|
+
window_pattern=node.GetWindowPattern()
|
|
153
|
+
return window_pattern.IsModal
|
|
154
|
+
except Exception:
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
def is_keyboard_focusable(node:Control):
|
|
158
|
+
try:
|
|
159
|
+
if node.ControlTypeName in set(['EditControl','ButtonControl','CheckBoxControl','RadioButtonControl','TabItemControl']):
|
|
160
|
+
return True
|
|
161
|
+
return node.IsKeyboardFocusable
|
|
162
|
+
except Exception:
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
def element_has_child_element(node:Control,control_type:str,child_control_type:str):
|
|
166
|
+
if node.LocalizedControlType==control_type:
|
|
167
|
+
first_child=node.GetFirstChildControl()
|
|
168
|
+
if first_child is None:
|
|
169
|
+
return False
|
|
170
|
+
return first_child.LocalizedControlType==child_control_type
|
|
171
|
+
|
|
172
|
+
def group_has_no_name(node:Control):
|
|
173
|
+
try:
|
|
174
|
+
if node.ControlTypeName=='GroupControl':
|
|
175
|
+
if not node.Name.strip():
|
|
176
|
+
return True
|
|
177
|
+
return False
|
|
178
|
+
except Exception:
|
|
179
|
+
return False
|
|
180
|
+
|
|
181
|
+
def is_element_scrollable(node:Control):
|
|
182
|
+
try:
|
|
183
|
+
if (node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|INFORMATIVE_CONTROL_TYPE_NAMES) or node.IsOffscreen:
|
|
184
|
+
return False
|
|
185
|
+
scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
|
|
186
|
+
if scroll_pattern is None:
|
|
187
|
+
return False
|
|
188
|
+
return scroll_pattern.VerticallyScrollable
|
|
189
|
+
except Exception:
|
|
190
|
+
return False
|
|
191
|
+
|
|
192
|
+
def is_element_interactive(node:Control):
|
|
193
|
+
try:
|
|
194
|
+
if is_browser and node.ControlTypeName in set(['DataItemControl','ListItemControl']) and not is_keyboard_focusable(node):
|
|
195
|
+
return False
|
|
196
|
+
elif not is_browser and node.ControlTypeName=="ImageControl" and is_keyboard_focusable(node):
|
|
197
|
+
return True
|
|
198
|
+
elif node.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES|DOCUMENT_CONTROL_TYPE_NAMES:
|
|
199
|
+
return is_element_visible(node) and is_element_enabled(node) and (not is_element_image(node) or is_keyboard_focusable(node))
|
|
200
|
+
elif node.ControlTypeName=='GroupControl':
|
|
201
|
+
if is_browser:
|
|
202
|
+
return is_element_visible(node) and is_element_enabled(node) and (is_default_action(node) or is_keyboard_focusable(node))
|
|
203
|
+
# else:
|
|
204
|
+
# return is_element_visible and is_element_enabled(node) and is_default_action(node)
|
|
205
|
+
except Exception:
|
|
206
|
+
return False
|
|
207
|
+
return False
|
|
208
|
+
|
|
209
|
+
def dom_correction(node:Control):
|
|
210
|
+
if element_has_child_element(node,'list item','link') or element_has_child_element(node,'item','link'):
|
|
211
|
+
dom_interactive_nodes.pop()
|
|
212
|
+
return None
|
|
213
|
+
elif node.ControlTypeName=='GroupControl':
|
|
214
|
+
dom_interactive_nodes.pop()
|
|
215
|
+
if is_keyboard_focusable(node):
|
|
216
|
+
child=node
|
|
217
|
+
try:
|
|
218
|
+
while child.GetFirstChildControl() is not None:
|
|
219
|
+
if child.ControlTypeName in INTERACTIVE_CONTROL_TYPE_NAMES:
|
|
220
|
+
return None
|
|
221
|
+
child=child.GetFirstChildControl()
|
|
222
|
+
except Exception:
|
|
223
|
+
return None
|
|
224
|
+
if child.ControlTypeName!='TextControl':
|
|
225
|
+
return None
|
|
226
|
+
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
227
|
+
value=legacy_pattern.Value
|
|
228
|
+
element_bounding_box = node.BoundingRectangle
|
|
229
|
+
bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
|
|
230
|
+
center = bounding_box.get_center()
|
|
231
|
+
is_focused=node.HasKeyboardFocus
|
|
232
|
+
dom_interactive_nodes.append(TreeElementNode(**{
|
|
233
|
+
'name':child.Name.strip(),
|
|
234
|
+
'control_type':node.LocalizedControlType,
|
|
235
|
+
'value':value,
|
|
236
|
+
'shortcut':node.AcceleratorKey,
|
|
237
|
+
'bounding_box':bounding_box,
|
|
238
|
+
'xpath':'',
|
|
239
|
+
'center':center,
|
|
240
|
+
'app_name':app_name,
|
|
241
|
+
'is_focused':is_focused
|
|
242
|
+
}))
|
|
243
|
+
elif element_has_child_element(node,'link','heading'):
|
|
244
|
+
dom_interactive_nodes.pop()
|
|
245
|
+
node=node.GetFirstChildControl()
|
|
246
|
+
control_type='link'
|
|
247
|
+
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
248
|
+
value=legacy_pattern.Value
|
|
249
|
+
element_bounding_box = node.BoundingRectangle
|
|
250
|
+
bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
|
|
251
|
+
center = bounding_box.get_center()
|
|
252
|
+
is_focused=node.HasKeyboardFocus
|
|
253
|
+
dom_interactive_nodes.append(TreeElementNode(**{
|
|
254
|
+
'name':node.Name.strip(),
|
|
255
|
+
'control_type':control_type,
|
|
256
|
+
'value':node.Name.strip(),
|
|
257
|
+
'shortcut':node.AcceleratorKey,
|
|
258
|
+
'bounding_box':bounding_box,
|
|
259
|
+
'xpath':'',
|
|
260
|
+
'center':center,
|
|
261
|
+
'app_name':app_name,
|
|
262
|
+
'is_focused':is_focused
|
|
263
|
+
}))
|
|
264
|
+
|
|
265
|
+
def tree_traversal(node: Control,is_dom:bool=False,is_dialog:bool=False):
|
|
266
|
+
# Checks to skip the nodes that are not interactive
|
|
267
|
+
if node.IsOffscreen and (node.ControlTypeName not in set(["GroupControl","EditControl","TitleBarControl"])) and node.ClassName not in set(["Popup","Windows.UI.Core.CoreComponentInputSource"]):
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
if is_element_scrollable(node):
|
|
271
|
+
scroll_pattern:ScrollPattern=node.GetPattern(PatternId.ScrollPattern)
|
|
272
|
+
box = node.BoundingRectangle
|
|
273
|
+
# Get the center
|
|
274
|
+
x,y=random_point_within_bounding_box(node=node,scale_factor=0.8)
|
|
275
|
+
center = Center(x=x,y=y)
|
|
276
|
+
scrollable_nodes.append(ScrollElementNode(**{
|
|
277
|
+
'name':node.Name.strip() or node.AutomationId or node.LocalizedControlType.capitalize() or "''",
|
|
278
|
+
'app_name':app_name,
|
|
279
|
+
'control_type':node.LocalizedControlType.title(),
|
|
280
|
+
'bounding_box':BoundingBox(**{
|
|
281
|
+
'left':box.left,
|
|
282
|
+
'top':box.top,
|
|
283
|
+
'right':box.right,
|
|
284
|
+
'bottom':box.bottom,
|
|
285
|
+
'width':box.width(),
|
|
286
|
+
'height':box.height()
|
|
287
|
+
}),
|
|
288
|
+
'center':center,
|
|
289
|
+
'xpath':'',
|
|
290
|
+
'horizontal_scrollable':scroll_pattern.HorizontallyScrollable,
|
|
291
|
+
'horizontal_scroll_percent':scroll_pattern.HorizontalScrollPercent if scroll_pattern.HorizontallyScrollable else 0,
|
|
292
|
+
'vertical_scrollable':scroll_pattern.VerticallyScrollable,
|
|
293
|
+
'vertical_scroll_percent':scroll_pattern.VerticalScrollPercent if scroll_pattern.VerticallyScrollable else 0,
|
|
294
|
+
'is_focused':node.HasKeyboardFocus
|
|
295
|
+
}))
|
|
296
|
+
|
|
297
|
+
if is_element_interactive(node):
|
|
298
|
+
legacy_pattern=node.GetLegacyIAccessiblePattern()
|
|
299
|
+
value=legacy_pattern.Value.strip() if legacy_pattern.Value is not None else ""
|
|
300
|
+
is_focused=node.HasKeyboardFocus
|
|
301
|
+
name=node.Name.strip()
|
|
302
|
+
element_bounding_box = node.BoundingRectangle
|
|
303
|
+
if is_browser and is_dom:
|
|
304
|
+
bounding_box=self.iou_bounding_box(self.dom_bounding_box,element_bounding_box)
|
|
305
|
+
center = bounding_box.get_center()
|
|
306
|
+
tree_node=TreeElementNode(**{
|
|
307
|
+
'name':name,
|
|
308
|
+
'control_type':node.LocalizedControlType.title(),
|
|
309
|
+
'value':value,
|
|
310
|
+
'shortcut':node.AcceleratorKey,
|
|
311
|
+
'bounding_box':bounding_box,
|
|
312
|
+
'center':center,
|
|
313
|
+
'xpath':'',
|
|
314
|
+
'app_name':app_name,
|
|
315
|
+
'is_focused':is_focused
|
|
316
|
+
})
|
|
317
|
+
dom_interactive_nodes.append(tree_node)
|
|
318
|
+
dom_correction(node=node)
|
|
319
|
+
else:
|
|
320
|
+
bounding_box=self.iou_bounding_box(window_bounding_box,element_bounding_box)
|
|
321
|
+
center = bounding_box.get_center()
|
|
322
|
+
tree_node=TreeElementNode(**{
|
|
323
|
+
'name':name,
|
|
324
|
+
'control_type':node.LocalizedControlType.title(),
|
|
325
|
+
'value':value,
|
|
326
|
+
'shortcut':node.AcceleratorKey,
|
|
327
|
+
'bounding_box':bounding_box,
|
|
328
|
+
'center':center,
|
|
329
|
+
'xpath':'',
|
|
330
|
+
'app_name':app_name,
|
|
331
|
+
'is_focused':is_focused
|
|
332
|
+
})
|
|
333
|
+
interactive_nodes.append(tree_node)
|
|
334
|
+
# elif is_element_text(node):
|
|
335
|
+
# informative_nodes.append(TextElementNode(
|
|
336
|
+
# name=node.Name.strip() or "''",
|
|
337
|
+
# app_name=app_name
|
|
338
|
+
# ))
|
|
339
|
+
|
|
340
|
+
children=node.GetChildren()
|
|
341
|
+
|
|
342
|
+
# Recursively traverse the tree the right to left for normal apps and for DOM traverse from left to right
|
|
343
|
+
for child in (children if is_dom else children[::-1]):
|
|
344
|
+
# Incrementally building the xpath
|
|
345
|
+
|
|
346
|
+
# Check if the child is a DOM element
|
|
347
|
+
if is_browser and child.ClassName == "Chrome_RenderWidgetHostHWND":
|
|
348
|
+
bounding_box=child.BoundingRectangle
|
|
349
|
+
self.dom_bounding_box=BoundingBox(left=bounding_box.left,top=bounding_box.top,
|
|
350
|
+
right=bounding_box.right,bottom=bounding_box.bottom,width=bounding_box.width(),
|
|
351
|
+
height=bounding_box.height())
|
|
352
|
+
# enter DOM subtree
|
|
353
|
+
tree_traversal(child, is_dom=True, is_dialog=is_dialog)
|
|
354
|
+
# Check if the child is a dialog
|
|
355
|
+
elif isinstance(child,WindowControl):
|
|
356
|
+
if not child.IsOffscreen:
|
|
357
|
+
if is_dom:
|
|
358
|
+
bounding_box=child.BoundingRectangle
|
|
359
|
+
if bounding_box.width() > 0.8*self.dom_bounding_box.width:
|
|
360
|
+
# Because this window element covers the majority of the screen
|
|
361
|
+
dom_interactive_nodes.clear()
|
|
362
|
+
else:
|
|
363
|
+
if is_window_modal(child):
|
|
364
|
+
# Because this window element is modal
|
|
365
|
+
interactive_nodes.clear()
|
|
366
|
+
# enter dialog subtree
|
|
367
|
+
tree_traversal(child, is_dom=is_dom, is_dialog=True)
|
|
368
|
+
else:
|
|
369
|
+
# normal non-dialog children
|
|
370
|
+
tree_traversal(child, is_dom=is_dom, is_dialog=is_dialog)
|
|
371
|
+
|
|
372
|
+
interactive_nodes, dom_interactive_nodes, scrollable_nodes = [], [], []
|
|
373
|
+
app_name=node.Name.strip()
|
|
374
|
+
match node.ClassName:
|
|
375
|
+
case "Progman":
|
|
376
|
+
app_name="Desktop"
|
|
377
|
+
case 'Shell_TrayWnd'|'Shell_SecondaryTrayWnd':
|
|
378
|
+
app_name="Taskbar"
|
|
379
|
+
case 'Microsoft.UI.Content.PopupWindowSiteBridge':
|
|
380
|
+
app_name="Context Menu"
|
|
381
|
+
case _:
|
|
382
|
+
pass
|
|
383
|
+
tree_traversal(node,is_dom=False,is_dialog=False)
|
|
384
|
+
|
|
385
|
+
logger.debug(f'Interactive nodes:{len(interactive_nodes)}')
|
|
386
|
+
logger.debug(f'DOM interactive nodes:{len(dom_interactive_nodes)}')
|
|
387
|
+
logger.debug(f'Scrollable nodes:{len(scrollable_nodes)}')
|
|
388
|
+
|
|
389
|
+
interactive_nodes.extend(dom_interactive_nodes)
|
|
390
|
+
return (interactive_nodes,scrollable_nodes)
|
|
391
|
+
|
|
392
|
+
def annotated_screenshot(self, nodes: list[TreeElementNode]) -> Image.Image:
|
|
393
|
+
screenshot = self.desktop.get_screenshot()
|
|
394
|
+
sleep(0.10)
|
|
395
|
+
# Add padding
|
|
396
|
+
padding = 5
|
|
397
|
+
width = int(screenshot.width + (1.5 * padding))
|
|
398
|
+
height = int(screenshot.height + (1.5 * padding))
|
|
399
|
+
padded_screenshot = Image.new("RGB", (width, height), color=(255, 255, 255))
|
|
400
|
+
padded_screenshot.paste(screenshot, (padding, padding))
|
|
401
|
+
|
|
402
|
+
draw = ImageDraw.Draw(padded_screenshot)
|
|
403
|
+
font_size = 12
|
|
404
|
+
try:
|
|
405
|
+
font = ImageFont.truetype('arial.ttf', font_size)
|
|
406
|
+
except IOError:
|
|
407
|
+
font = ImageFont.load_default()
|
|
408
|
+
|
|
409
|
+
def get_random_color():
|
|
410
|
+
return "#{:06x}".format(random.randint(0, 0xFFFFFF))
|
|
411
|
+
|
|
412
|
+
def draw_annotation(label, node: TreeElementNode):
|
|
413
|
+
box = node.bounding_box
|
|
414
|
+
color = get_random_color()
|
|
415
|
+
|
|
416
|
+
# Scale and pad the bounding box also clip the bounding box
|
|
417
|
+
adjusted_box = (
|
|
418
|
+
int(box.left) + padding,
|
|
419
|
+
int(box.top) + padding,
|
|
420
|
+
int(box.right) + padding,
|
|
421
|
+
int(box.bottom) + padding
|
|
422
|
+
)
|
|
423
|
+
# Draw bounding box
|
|
424
|
+
draw.rectangle(adjusted_box, outline=color, width=2)
|
|
425
|
+
|
|
426
|
+
# Label dimensions
|
|
427
|
+
label_width = draw.textlength(str(label), font=font)
|
|
428
|
+
label_height = font_size
|
|
429
|
+
left, top, right, bottom = adjusted_box
|
|
430
|
+
|
|
431
|
+
# Label position above bounding box
|
|
432
|
+
label_x1 = right - label_width
|
|
433
|
+
label_y1 = top - label_height - 4
|
|
434
|
+
label_x2 = label_x1 + label_width
|
|
435
|
+
label_y2 = label_y1 + label_height + 4
|
|
436
|
+
|
|
437
|
+
# Draw label background and text
|
|
438
|
+
draw.rectangle([(label_x1, label_y1), (label_x2, label_y2)], fill=color)
|
|
439
|
+
draw.text((label_x1 + 2, label_y1 + 2), str(label), fill=(255, 255, 255), font=font)
|
|
440
|
+
|
|
441
|
+
# Draw annotations in parallel
|
|
442
|
+
with ThreadPoolExecutor() as executor:
|
|
443
|
+
executor.map(draw_annotation, range(len(nodes)), nodes)
|
|
444
|
+
return padded_screenshot
|
src/tree/utils.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from uiautomation import Control
|
|
3
|
+
|
|
4
|
+
def random_point_within_bounding_box(node: Control, scale_factor: float = 1.0) -> tuple[int, int]:
|
|
5
|
+
"""
|
|
6
|
+
Generate a random point within a scaled-down bounding box.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
node (Control): The node with a bounding rectangle
|
|
10
|
+
scale_factor (float, optional): The factor to scale down the bounding box. Defaults to 1.0.
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
tuple: A random point (x, y) within the scaled-down bounding box
|
|
14
|
+
"""
|
|
15
|
+
box = node.BoundingRectangle
|
|
16
|
+
scaled_width = int(box.width() * scale_factor)
|
|
17
|
+
scaled_height = int(box.height() * scale_factor)
|
|
18
|
+
scaled_left = box.left + (box.width() - scaled_width) // 2
|
|
19
|
+
scaled_top = box.top + (box.height() - scaled_height) // 2
|
|
20
|
+
x = random.randint(scaled_left, scaled_left + scaled_width)
|
|
21
|
+
y = random.randint(scaled_top, scaled_top + scaled_height)
|
|
22
|
+
return (x, y)
|
src/tree/views.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from dataclasses import dataclass,field
|
|
2
|
+
from tabulate import tabulate
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class TreeState:
|
|
6
|
+
interactive_nodes:list['TreeElementNode']=field(default_factory=list)
|
|
7
|
+
scrollable_nodes:list['ScrollElementNode']=field(default_factory=list)
|
|
8
|
+
|
|
9
|
+
def interactive_elements_to_string(self) -> str:
|
|
10
|
+
if not self.interactive_nodes:
|
|
11
|
+
return "No interactive elements"
|
|
12
|
+
headers = ["Label", "App Name", "ControlType", "Name", "Value", "Shortcut", "Coordinates" ,"IsFocused"]
|
|
13
|
+
rows = [node.to_row(idx) for idx, node in enumerate(self.interactive_nodes)]
|
|
14
|
+
return tabulate(rows, headers=headers, tablefmt="simple")
|
|
15
|
+
|
|
16
|
+
def scrollable_elements_to_string(self) -> str:
|
|
17
|
+
if not self.scrollable_nodes:
|
|
18
|
+
return "No scrollable elements"
|
|
19
|
+
headers = [
|
|
20
|
+
"Label", "App Name", "ControlType", "Name", "Coordinates",
|
|
21
|
+
"Horizontal Scrollable", "Horizontal Scroll Percent(%)", "Vertical Scrollable", "Vertical Scroll Percent(%)", "IsFocused"
|
|
22
|
+
]
|
|
23
|
+
base_index = len(self.interactive_nodes)
|
|
24
|
+
rows = [node.to_row(idx, base_index) for idx, node in enumerate(self.scrollable_nodes)]
|
|
25
|
+
return tabulate(rows, headers=headers, tablefmt="simple")
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class BoundingBox:
|
|
29
|
+
left:int
|
|
30
|
+
top:int
|
|
31
|
+
right:int
|
|
32
|
+
bottom:int
|
|
33
|
+
width:int
|
|
34
|
+
height:int
|
|
35
|
+
|
|
36
|
+
def get_center(self)->'Center':
|
|
37
|
+
return Center(x=self.left+self.width//2,y=self.top+self.height//2)
|
|
38
|
+
|
|
39
|
+
def xywh_to_string(self):
|
|
40
|
+
return f'({self.left},{self.top},{self.width},{self.height})'
|
|
41
|
+
|
|
42
|
+
def xyxy_to_string(self):
|
|
43
|
+
x1,y1,x2,y2=self.convert_xywh_to_xyxy()
|
|
44
|
+
return f'({x1},{y1},{x2},{y2})'
|
|
45
|
+
|
|
46
|
+
def convert_xywh_to_xyxy(self)->tuple[int,int,int,int]:
|
|
47
|
+
x1,y1=self.left,self.top
|
|
48
|
+
x2,y2=self.left+self.width,self.top+self.height
|
|
49
|
+
return x1,y1,x2,y2
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class Center:
|
|
53
|
+
x:int
|
|
54
|
+
y:int
|
|
55
|
+
|
|
56
|
+
def to_string(self)->str:
|
|
57
|
+
return f'({self.x},{self.y})'
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class TreeElementNode:
|
|
61
|
+
name: str
|
|
62
|
+
control_type: str
|
|
63
|
+
app_name: str
|
|
64
|
+
value:str
|
|
65
|
+
shortcut: str
|
|
66
|
+
bounding_box: BoundingBox
|
|
67
|
+
center: Center
|
|
68
|
+
xpath:str
|
|
69
|
+
is_focused:bool
|
|
70
|
+
|
|
71
|
+
def to_row(self, index: int):
|
|
72
|
+
return [index, self.app_name, self.control_type, self.name, self.value, self.shortcut, self.center.to_string(),self.is_focused]
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class ScrollElementNode:
|
|
76
|
+
name: str
|
|
77
|
+
control_type: str
|
|
78
|
+
xpath:str
|
|
79
|
+
app_name: str
|
|
80
|
+
bounding_box: BoundingBox
|
|
81
|
+
center: Center
|
|
82
|
+
horizontal_scrollable: bool
|
|
83
|
+
horizontal_scroll_percent: float
|
|
84
|
+
vertical_scrollable: bool
|
|
85
|
+
vertical_scroll_percent: float
|
|
86
|
+
is_focused: bool
|
|
87
|
+
|
|
88
|
+
def to_row(self, index: int, base_index: int):
|
|
89
|
+
return [
|
|
90
|
+
base_index + index,
|
|
91
|
+
self.app_name,
|
|
92
|
+
self.control_type,
|
|
93
|
+
self.name,
|
|
94
|
+
self.center.to_string(),
|
|
95
|
+
self.horizontal_scrollable,
|
|
96
|
+
self.horizontal_scroll_percent,
|
|
97
|
+
self.vertical_scrollable,
|
|
98
|
+
self.vertical_scroll_percent,
|
|
99
|
+
self.is_focused
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
ElementNode=TreeElementNode|ScrollElementNode
|