sunholo 0.74.0__py3-none-any.whl → 0.74.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,12 @@ from ..logging import log
8
8
 
9
9
  from ..utils.parsers import get_clean_website_name
10
10
 
11
+ try:
12
+ from playwright.sync_api import sync_playwright, Response
13
+ except ImportError:
14
+ sync_playwright = None
15
+ Response = None
16
+
11
17
  class BrowseWebWithImagePromptsBot:
12
18
  """
13
19
  BrowseWebWithImagePromptsBot is a base class for creating bots that interact with web pages using Playwright.
@@ -114,8 +120,10 @@ class BrowseWebWithImagePromptsBot:
114
120
  except ImportError as err:
115
121
  print(err)
116
122
  sync_playwright = None
123
+
117
124
  if not sync_playwright:
118
125
  raise ImportError("playright needed for BrowseWebWithImagePromptsBot class - install via `pip install sunholo[tools]`")
126
+
119
127
  self.session_id = session_id or datetime.now().strftime("%Y%m%d%H%M%S")
120
128
  self.website_name = website_name
121
129
  self.browser_type = browser_type
@@ -124,6 +132,7 @@ class BrowseWebWithImagePromptsBot:
124
132
  self.screenshot_dir = f"browser_tool/{get_clean_website_name(website_name)}/{session_id}"
125
133
  os.makedirs(self.screenshot_dir, exist_ok=True)
126
134
  self.cookie_file = os.path.join(self.screenshot_dir, "cookies.json")
135
+ self.action_log_file = os.path.join(self.screenshot_dir, "action_log.json")
127
136
  self.playwright = sync_playwright().start()
128
137
 
129
138
  if browser_type == 'chromium':
@@ -138,7 +147,7 @@ class BrowseWebWithImagePromptsBot:
138
147
  self.context = self.browser.new_context()
139
148
  self.page = self.context.new_page()
140
149
  self.load_cookies()
141
- self.actions_log = []
150
+ self.action_log = []
142
151
  self.session_goal = None
143
152
  self.session_screenshots = []
144
153
 
@@ -152,72 +161,176 @@ class BrowseWebWithImagePromptsBot:
152
161
  cookies = self.context.cookies()
153
162
  with open(self.cookie_file, 'w') as f:
154
163
  json.dump(cookies, f)
164
+
165
+ def save_action_log(self):
166
+ with open(self.action_log_file, 'w') as f:
167
+ json.dump(self.action_log, f)
168
+
169
+ def load_action_log(self):
170
+ if os.path.exists(self.action_log_file):
171
+ with open(self.action_log_file, 'r') as f:
172
+ action_log = json.load(f)
173
+ self.action_log = action_log
155
174
 
156
175
  def navigate(self, url):
176
+ def handle_response(response: Response): # type: ignore
177
+ status = response.status
178
+ url = response.url
179
+ if 300 <= status < 400:
180
+ log.info(f"Redirecting from {url}")
157
181
  try:
158
- self.page.goto(url)
182
+ self.page.on("response", handle_response)
183
+
184
+ previous_url = self.page.url
185
+
186
+ response = self.page.goto(url)
187
+ status = response.status
188
+ if status != 200:
189
+ log.error(f"Failed to navigate to {url}: HTTP {status}")
190
+ self.action_log.append(f"Tried to navigate to {url} but failed: HTTP {status} - browsing back to {previous_url}")
191
+ url = previous_url
192
+ self.page.goto(previous_url)
193
+
159
194
  self.page.wait_for_load_state()
160
195
  log.info(f'Navigated to {url}')
161
- self.actions_log.append(f"Navigated to {url}")
196
+ self.action_log.append(f"Navigated to {url}")
197
+
162
198
  except Exception as err:
163
199
  log.warning(f"navigate failed with {str(err)}")
164
- self.actions_log.append(f"Tried to navigate to {url} but got an error")
200
+ self.action_log.append(f"Tried to navigate to {url} but got an error")
201
+
202
+ def get_locator(self, selector, by_text=True):
203
+ if by_text:
204
+ elements = self.page.locator(f"text={selector}").all()
205
+ if elements:
206
+ return elements[0]
207
+ else:
208
+ log.warning(f"No elements found with text: {selector}")
209
+ return None
210
+ else:
211
+ return self.page.locator(selector)
212
+
213
+ def click(self, selector, by_text=True):
214
+ (x,y)=(0,0)
165
215
 
216
+ element = self.get_locator(selector, by_text=by_text)
217
+ if element is None:
218
+ self.action_log.append(f"Tried to click on text {selector} but it was not a valid location to click")
219
+ return (x,y)
166
220
 
167
- def click(self, selector):
168
221
  try:
169
- self.page.click(selector)
222
+ bounding_box = element.bounding_box()
223
+ if bounding_box:
224
+ x = bounding_box['x'] + bounding_box['width'] / 2
225
+ y = bounding_box['y'] + bounding_box['height'] / 2
226
+ except Exception as err:
227
+ log.warning(f"Could not do bounding box - {str(err)}")
228
+
229
+ try:
230
+ element.click()
170
231
  self.page.wait_for_load_state()
171
- log.info(f"Clicked on element with selector {selector}")
172
- self.actions_log.append(f"Clicked on element with selector {selector}")
232
+ log.info(f"Clicked on element with selector {selector} at {x=},{y=}")
233
+ self.action_log.append(f"Clicked on element with selector {selector} at {x=},{y=}")
234
+
235
+ return (x,y)
236
+
173
237
  except Exception as err:
174
238
  log.warning(f"click failed with {str(err)}")
175
- self.actions_log.append(f"Tried to click on element with selector {selector} but got an error")
239
+ self.action_log.append(f"Tried to click on element with selector {selector} at {x=},{y=} but got an error")
240
+
241
+ return (x,y)
176
242
 
177
- def scroll(self, direction='down', amount=1):
243
+ def scroll(self, direction='down', amount=100):
178
244
  try:
179
- for _ in range(amount):
180
- if direction == 'down':
181
- self.page.evaluate("window.scrollBy(0, window.innerHeight)")
182
- elif direction == 'up':
183
- self.page.evaluate("window.scrollBy(0, -window.innerHeight)")
184
- elif direction == 'left':
185
- self.page.evaluate("window.scrollBy(-window.innerWidth, 0)")
186
- elif direction == 'right':
187
- self.page.evaluate("window.scrollBy(window.innerWidth, 0)")
188
- self.page.wait_for_timeout(500)
189
- log.info(f"Scrolled {direction} by {amount} page heights")
190
- self.actions_log.append(f"Scrolled {direction} by {amount} page heights")
245
+ if direction == 'down':
246
+ self.page.mouse.wheel(0, amount)
247
+ elif direction == 'up':
248
+ self.page.mouse.wheel(0, -amount)
249
+ elif direction == 'left':
250
+ self.page.mouse.wheel(-amount, 0)
251
+ elif direction == 'right':
252
+ self.page.mouse.wheel(amount, 0)
253
+ self.page.wait_for_timeout(500)
254
+ log.info(f"Scrolled {direction} by {amount} pixels")
255
+ self.action_log.append(f"Scrolled {direction} by {amount} pixels")
191
256
  except Exception as err:
192
257
  log.warning(f"Scrolled failed with {str(err)}")
193
- self.actions_log.append(f"Tried to scroll {direction} by {amount} page heights but got an error")
258
+ self.action_log.append(f"Tried to scroll {direction} by {amount} pixels but got an error")
194
259
 
260
+ def type_text(self, selector, text, by_text=True):
261
+ (x,y)=(0,0)
262
+ element = self.get_locator(selector, by_text=by_text)
263
+ if element is None:
264
+ self.action_log.append(f"Tried to type {text} via website text: {selector} but it was not a valid location to add text")
265
+ return (x,y)
195
266
 
196
- def type_text(self, selector, text):
197
267
  try:
198
- self.page.fill(selector, text)
268
+ bounding_box = element.bounding_box()
269
+ if bounding_box:
270
+ x = bounding_box['x'] + bounding_box['width'] / 2
271
+ y = bounding_box['y'] + bounding_box['height'] / 2
272
+ except Exception as err:
273
+ log.warning(f"Could not do bounding box - {str(err)}")
274
+
275
+ try:
276
+ element.fill(text)
199
277
  self.page.wait_for_load_state()
200
- log.info(f"Typed text '{text}' into element with selector {selector}")
201
- self.actions_log.append(f"Typed text '{text}' into element with selector {selector}")
278
+ log.info(f"Typed text '{text}' into element with selector {selector} at {x=},{y=}")
279
+ self.action_log.append(f"Typed text '{text}' into element with selector {selector} at {x=},{y=}")
280
+
281
+ return (x, y)
282
+
202
283
  except Exception as err:
203
284
  log.warning(f"Typed text failed with {str(err)}")
204
- self.actions_log.append(f"Tried to type text '{text}' into element with selector {selector} but got an error")
285
+ self.action_log.append(f"Tried to type text '{text}' into element with selector {selector} at {x=},{y=} but got an error")
205
286
 
206
- def take_screenshot(self, final=False):
287
+ return (x, y)
288
+
289
+ def take_screenshot(self, final=False, full_page=False, mark_action=None):
207
290
  timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
208
- parsed_url = urllib.parse.urlparse({self.page.url})
291
+ parsed_url = urllib.parse.urlparse(self.page.url)
292
+
209
293
  url_path = parsed_url.path
294
+ if url_path == "/":
295
+ url_path = "index.html"
210
296
  if final:
211
297
  screenshot_path = os.path.join(self.screenshot_dir, f"final/{timestamp}_{url_path}.png")
212
298
  else:
213
299
  screenshot_path = os.path.join(self.screenshot_dir, f"{timestamp}_{url_path}.png")
214
- self.page.screenshot(path=screenshot_path)
300
+ self.page.screenshot(path=screenshot_path, full_page=full_page)
301
+
302
+ if mark_action:
303
+ self.mark_screenshot(screenshot_path, mark_action)
304
+
215
305
  log.info(f"Screenshot {self.page.url} taken and saved to {screenshot_path}")
216
- #self.actions_log.append(f"Screenshot {self.page.url} taken and saved to {screenshot_path}")
306
+ #self.action_log.append(f"Screenshot {self.page.url} taken and saved to {screenshot_path}")
217
307
  self.session_screenshots.append(screenshot_path)
218
308
 
219
309
  return screenshot_path
220
310
 
311
+ def mark_screenshot(self, screenshot_path, mark_action):
312
+ """
313
+ Marks the screenshot with the specified action.
314
+
315
+ Parameters:
316
+ screenshot_path (str): The path to the screenshot.
317
+ mark_action (dict): Action details for marking the screenshot.
318
+ """
319
+ from PIL import Image, ImageDraw
320
+
321
+ image = Image.open(screenshot_path)
322
+ draw = ImageDraw.Draw(image)
323
+
324
+ if mark_action['type'] == 'click':
325
+ x, y = mark_action['position']
326
+ radius = 10
327
+ draw.ellipse((x-radius, y-radius, x+radius, y+radius), outline='red', width=3)
328
+ elif mark_action['type'] == 'type':
329
+ x, y = mark_action['position']
330
+ draw.rectangle((x-5, y-5, x+5, y+5), outline='blue', width=3)
331
+
332
+ image.save(screenshot_path)
333
+
221
334
  def get_latest_screenshot_path(self):
222
335
  screenshots = sorted(
223
336
  [f for f in os.listdir(self.screenshot_dir) if f.startswith('screenshot_')],
@@ -230,7 +343,7 @@ class BrowseWebWithImagePromptsBot:
230
343
 
231
344
  def create_prompt_vars(self, last_message):
232
345
  prompt = {
233
- "last_actions": self.actions_log,
346
+ "last_actions": self.action_log,
234
347
  "session_goal": self.session_goal,
235
348
  "last_message": last_message
236
349
  }
@@ -291,23 +404,30 @@ This method should be implemented by subclasses: `def send_prompt_to_llm(self, p
291
404
  if not isinstance(instructions, list):
292
405
  log.error(f"{instructions} {type(instructions)}")
293
406
  for instruction in instructions:
407
+ mark_action = None
294
408
  if not isinstance(instruction, dict):
295
409
  log.error(f"{instruction} {type(instruction)}")
296
410
  action = instruction['action']
297
411
  if action == 'navigate':
298
412
  self.navigate(instruction['url'])
299
413
  elif action == 'click':
300
- self.click(instruction['selector'])
414
+ x,y = self.click(instruction['selector'])
415
+ if (x,y) != (0,0):
416
+ mark_action = {'type':'click', 'position': (x,y)}
301
417
  elif action == 'scroll':
302
- self.scroll(instruction.get('direction', 'down'), instruction.get('amount', 1))
418
+ self.scroll(instruction.get('direction', 'down'),
419
+ int(instruction.get('amount', 1))
420
+ )
303
421
  elif action == 'type':
304
- self.type_text(instruction['selector'], instruction['text'])
422
+ x,y = self.type_text(instruction['selector'], instruction['text'])
423
+ if (x,y) != (0,0):
424
+ mark_action = {'type':'type', 'position': (x,y)}
305
425
  self.steps += 1
306
426
  if self.steps >= self.max_steps:
307
427
  log.warning(f"Reached the maximum number of steps: {self.max_steps}")
308
428
  return
309
429
 
310
- screenshot_path = self.take_screenshot()
430
+ screenshot_path = self.take_screenshot(mark_action=mark_action)
311
431
  next_browser_instructions = self.send_screenshot_to_llm(
312
432
  screenshot_path,
313
433
  last_message=last_message)
@@ -330,8 +450,10 @@ This method should be implemented by subclasses: `def send_prompt_to_llm(self, p
330
450
  if 'new_instructions' not in next_instructions:
331
451
  log.error('Browser status: "in-progress" but no new_instructions')
332
452
  last_message = next_instructions['message']
333
- log.info(f'Browser message: {last_message}')
334
- next_instructions = self.execute_instructions(next_instructions['new_instructions'], last_message=last_message)
453
+ self.action_log.append(last_message)
454
+ next_instructions = self.execute_instructions(
455
+ next_instructions['new_instructions'],
456
+ last_message=last_message)
335
457
  else:
336
458
  log.info(f'Session finished due to status={next_instructions["status"]}')
337
459
  in_session=False
@@ -344,12 +466,14 @@ This method should be implemented by subclasses: `def send_prompt_to_llm(self, p
344
466
  log.info("Session finished")
345
467
  final_path = self.take_screenshot(final=True)
346
468
  self.close()
469
+ self.save_action_log()
347
470
 
348
471
  return {
349
472
  "website": self.website_name,
350
- "log": self.actions_log,
473
+ "log": self.action_log,
351
474
  "next_instructions": next_instructions,
352
475
  "session_screenshots": self.session_screenshots,
353
476
  "final_page": final_path,
477
+ "session_goal": self.session_goal
354
478
  }
355
479
 
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.74.0
3
+ Version: 0.74.1
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.74.0.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.74.1.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -99,7 +99,7 @@ sunholo/streaming/streaming.py,sha256=9z6pXINEopuL_Z1RnmgXAoZJum9dzyuOxqYtEYnjf8
99
99
  sunholo/summarise/__init__.py,sha256=MZk3dblUMODcPb1crq4v-Z508NrFIpkSWNf9FIO8BcU,38
100
100
  sunholo/summarise/summarise.py,sha256=C3HhjepTjUhUC8FLk4jMQIBvq1BcORniwuTFHjPVhVo,3784
101
101
  sunholo/tools/__init__.py,sha256=5NuYpwwTX81qGUWvgwfItoSLXteNnp7KjgD7IPZUFjI,53
102
- sunholo/tools/web_browser.py,sha256=FupCEFaOtn4Adf7eaF1wAubYIF_gOiHTlbj1qwLdi8o,15297
102
+ sunholo/tools/web_browser.py,sha256=ElwIBtVptyYcPd0wo7WXLNYCC02FJL_Lv3cfTzOJpnQ,19663
103
103
  sunholo/utils/__init__.py,sha256=Hv02T5L2zYWvCso5hzzwm8FQogwBq0OgtUbN_7Quzqc,89
104
104
  sunholo/utils/api_key.py,sha256=Ct4bIAQZxzPEw14hP586LpVxBAVi_W9Serpy0BK-7KI,244
105
105
  sunholo/utils/big_context.py,sha256=gJIP7_ZL-YSLhOMq8jmFTMqH1wq8eB1NK7oKPeZAq2s,5578
@@ -117,9 +117,9 @@ sunholo/vertex/extensions_class.py,sha256=4PsUM9dSYrIPpq9bZ3K2rL9MRb_rlqAgnMsW0o
117
117
  sunholo/vertex/init.py,sha256=-w7b9GKsyJnAJpYHYz6_zBUtmeJeLXlEkgOfwoe4DEI,2715
118
118
  sunholo/vertex/memory_tools.py,sha256=pomHrDKqvY8MZxfUqoEwhdlpCvSGP6KmFJMVKOimXjs,6842
119
119
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
120
- sunholo-0.74.0.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
121
- sunholo-0.74.0.dist-info/METADATA,sha256=y18aO3XDpmIm0YqXiXmCQhvkuDx950rhG05YJt0Uvcc,7010
122
- sunholo-0.74.0.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
123
- sunholo-0.74.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
124
- sunholo-0.74.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
125
- sunholo-0.74.0.dist-info/RECORD,,
120
+ sunholo-0.74.1.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
121
+ sunholo-0.74.1.dist-info/METADATA,sha256=6QFlkGilosGyFUklfh5uzkTD4ghMdfCNuzwyLmiSyCE,7010
122
+ sunholo-0.74.1.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
123
+ sunholo-0.74.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
124
+ sunholo-0.74.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
125
+ sunholo-0.74.1.dist-info/RECORD,,