sunholo 0.74.0__py3-none-any.whl → 0.74.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,11 +3,18 @@ import base64
3
3
  import json
4
4
  from datetime import datetime
5
5
  import urllib.parse
6
+ import time
6
7
 
7
8
  from ..logging import log
8
9
 
9
10
  from ..utils.parsers import get_clean_website_name
10
11
 
12
+ try:
13
+ from playwright.sync_api import sync_playwright, Response
14
+ except ImportError:
15
+ sync_playwright = None
16
+ Response = None
17
+
11
18
  class BrowseWebWithImagePromptsBot:
12
19
  """
13
20
  BrowseWebWithImagePromptsBot is a base class for creating bots that interact with web pages using Playwright.
@@ -114,8 +121,10 @@ class BrowseWebWithImagePromptsBot:
114
121
  except ImportError as err:
115
122
  print(err)
116
123
  sync_playwright = None
124
+
117
125
  if not sync_playwright:
118
126
  raise ImportError("playright needed for BrowseWebWithImagePromptsBot class - install via `pip install sunholo[tools]`")
127
+
119
128
  self.session_id = session_id or datetime.now().strftime("%Y%m%d%H%M%S")
120
129
  self.website_name = website_name
121
130
  self.browser_type = browser_type
@@ -124,6 +133,7 @@ class BrowseWebWithImagePromptsBot:
124
133
  self.screenshot_dir = f"browser_tool/{get_clean_website_name(website_name)}/{session_id}"
125
134
  os.makedirs(self.screenshot_dir, exist_ok=True)
126
135
  self.cookie_file = os.path.join(self.screenshot_dir, "cookies.json")
136
+ self.action_log_file = os.path.join(self.screenshot_dir, "action_log.json")
127
137
  self.playwright = sync_playwright().start()
128
138
 
129
139
  if browser_type == 'chromium':
@@ -138,7 +148,7 @@ class BrowseWebWithImagePromptsBot:
138
148
  self.context = self.browser.new_context()
139
149
  self.page = self.context.new_page()
140
150
  self.load_cookies()
141
- self.actions_log = []
151
+ self.action_log = []
142
152
  self.session_goal = None
143
153
  self.session_screenshots = []
144
154
 
@@ -152,72 +162,176 @@ class BrowseWebWithImagePromptsBot:
152
162
  cookies = self.context.cookies()
153
163
  with open(self.cookie_file, 'w') as f:
154
164
  json.dump(cookies, f)
165
+
166
+ def save_action_log(self):
167
+ with open(self.action_log_file, 'w') as f:
168
+ json.dump(self.action_log, f)
169
+
170
+ def load_action_log(self):
171
+ if os.path.exists(self.action_log_file):
172
+ with open(self.action_log_file, 'r') as f:
173
+ action_log = json.load(f)
174
+ self.action_log = action_log
155
175
 
156
176
  def navigate(self, url):
177
+ def handle_response(response: Response): # type: ignore
178
+ status = response.status
179
+ url = response.url
180
+ if 300 <= status < 400:
181
+ log.info(f"Redirecting from {url}")
157
182
  try:
158
- self.page.goto(url)
183
+ self.page.on("response", handle_response)
184
+
185
+ previous_url = self.page.url
186
+
187
+ response = self.page.goto(url)
188
+ status = response.status
189
+ if status != 200:
190
+ log.error(f"Failed to navigate to {url}: HTTP {status}")
191
+ self.action_log.append(f"Tried to navigate to {url} but failed: HTTP {status} - browsing back to {previous_url}")
192
+ url = previous_url
193
+ self.page.goto(previous_url)
194
+
159
195
  self.page.wait_for_load_state()
160
196
  log.info(f'Navigated to {url}')
161
- self.actions_log.append(f"Navigated to {url}")
197
+ self.action_log.append(f"Navigated to {url}")
198
+
162
199
  except Exception as err:
163
200
  log.warning(f"navigate failed with {str(err)}")
164
- self.actions_log.append(f"Tried to navigate to {url} but got an error")
201
+ self.action_log.append(f"Tried to navigate to {url} but got an error")
202
+
203
+ def get_locator(self, selector, by_text=True):
204
+ if by_text:
205
+ elements = self.page.locator(f"text={selector}").all()
206
+ if elements:
207
+ return elements[0]
208
+ else:
209
+ log.warning(f"No elements found with text: {selector}")
210
+ return None
211
+ else:
212
+ return self.page.locator(selector)
213
+
214
+ def click(self, selector, by_text=True):
215
+ (x,y)=(0,0)
165
216
 
217
+ element = self.get_locator(selector, by_text=by_text)
218
+ if element is None:
219
+ self.action_log.append(f"Tried to click on text {selector} but it was not a valid location to click")
220
+ return (x,y)
166
221
 
167
- def click(self, selector):
168
222
  try:
169
- self.page.click(selector)
223
+ bounding_box = element.bounding_box()
224
+ if bounding_box:
225
+ x = bounding_box['x'] + bounding_box['width'] / 2
226
+ y = bounding_box['y'] + bounding_box['height'] / 2
227
+ except Exception as err:
228
+ log.warning(f"Could not do bounding box - {str(err)}")
229
+
230
+ try:
231
+ element.click()
170
232
  self.page.wait_for_load_state()
171
- log.info(f"Clicked on element with selector {selector}")
172
- self.actions_log.append(f"Clicked on element with selector {selector}")
233
+ log.info(f"Clicked on element with selector {selector} at {x=},{y=}")
234
+ self.action_log.append(f"Clicked on element with selector {selector} at {x=},{y=}")
235
+
236
+ return (x,y)
237
+
173
238
  except Exception as err:
174
239
  log.warning(f"click failed with {str(err)}")
175
- self.actions_log.append(f"Tried to click on element with selector {selector} but got an error")
240
+ self.action_log.append(f"Tried to click on element with selector {selector} at {x=},{y=} but got an error")
176
241
 
177
- def scroll(self, direction='down', amount=1):
242
+ return (x,y)
243
+
244
+ def scroll(self, direction='down', amount=100):
178
245
  try:
179
- for _ in range(amount):
180
- if direction == 'down':
181
- self.page.evaluate("window.scrollBy(0, window.innerHeight)")
182
- elif direction == 'up':
183
- self.page.evaluate("window.scrollBy(0, -window.innerHeight)")
184
- elif direction == 'left':
185
- self.page.evaluate("window.scrollBy(-window.innerWidth, 0)")
186
- elif direction == 'right':
187
- self.page.evaluate("window.scrollBy(window.innerWidth, 0)")
188
- self.page.wait_for_timeout(500)
189
- log.info(f"Scrolled {direction} by {amount} page heights")
190
- self.actions_log.append(f"Scrolled {direction} by {amount} page heights")
246
+ if direction == 'down':
247
+ self.page.mouse.wheel(0, amount)
248
+ elif direction == 'up':
249
+ self.page.mouse.wheel(0, -amount)
250
+ elif direction == 'left':
251
+ self.page.mouse.wheel(-amount, 0)
252
+ elif direction == 'right':
253
+ self.page.mouse.wheel(amount, 0)
254
+ self.page.wait_for_timeout(500)
255
+ log.info(f"Scrolled {direction} by {amount} pixels")
256
+ self.action_log.append(f"Scrolled {direction} by {amount} pixels")
191
257
  except Exception as err:
192
258
  log.warning(f"Scrolled failed with {str(err)}")
193
- self.actions_log.append(f"Tried to scroll {direction} by {amount} page heights but got an error")
259
+ self.action_log.append(f"Tried to scroll {direction} by {amount} pixels but got an error")
194
260
 
261
+ def type_text(self, selector, text, by_text=True):
262
+ (x,y)=(0,0)
263
+ element = self.get_locator(selector, by_text=by_text)
264
+ if element is None:
265
+ self.action_log.append(f"Tried to type {text} via website text: {selector} but it was not a valid location to add text")
266
+ return (x,y)
195
267
 
196
- def type_text(self, selector, text):
197
268
  try:
198
- self.page.fill(selector, text)
269
+ bounding_box = element.bounding_box()
270
+ if bounding_box:
271
+ x = bounding_box['x'] + bounding_box['width'] / 2
272
+ y = bounding_box['y'] + bounding_box['height'] / 2
273
+ except Exception as err:
274
+ log.warning(f"Could not do bounding box - {str(err)}")
275
+
276
+ try:
277
+ element.fill(text)
199
278
  self.page.wait_for_load_state()
200
- log.info(f"Typed text '{text}' into element with selector {selector}")
201
- self.actions_log.append(f"Typed text '{text}' into element with selector {selector}")
279
+ log.info(f"Typed text '{text}' into element with selector {selector} at {x=},{y=}")
280
+ self.action_log.append(f"Typed text '{text}' into element with selector {selector} at {x=},{y=}")
281
+
282
+ return (x, y)
283
+
202
284
  except Exception as err:
203
285
  log.warning(f"Typed text failed with {str(err)}")
204
- self.actions_log.append(f"Tried to type text '{text}' into element with selector {selector} but got an error")
286
+ self.action_log.append(f"Tried to type text '{text}' into element with selector {selector} at {x=},{y=} but got an error")
205
287
 
206
- def take_screenshot(self, final=False):
288
+ return (x, y)
289
+
290
+ def take_screenshot(self, final=False, full_page=False, mark_action=None):
207
291
  timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
208
- parsed_url = urllib.parse.urlparse({self.page.url})
292
+ parsed_url = urllib.parse.urlparse(self.page.url)
293
+
209
294
  url_path = parsed_url.path
295
+ if url_path == "/":
296
+ url_path = "index.html"
210
297
  if final:
211
298
  screenshot_path = os.path.join(self.screenshot_dir, f"final/{timestamp}_{url_path}.png")
212
299
  else:
213
300
  screenshot_path = os.path.join(self.screenshot_dir, f"{timestamp}_{url_path}.png")
214
- self.page.screenshot(path=screenshot_path)
301
+ self.page.screenshot(path=screenshot_path, full_page=full_page)
302
+
303
+ if mark_action:
304
+ self.mark_screenshot(screenshot_path, mark_action)
305
+
215
306
  log.info(f"Screenshot {self.page.url} taken and saved to {screenshot_path}")
216
- #self.actions_log.append(f"Screenshot {self.page.url} taken and saved to {screenshot_path}")
307
+ #self.action_log.append(f"Screenshot {self.page.url} taken and saved to {screenshot_path}")
217
308
  self.session_screenshots.append(screenshot_path)
218
309
 
219
310
  return screenshot_path
220
311
 
312
+ def mark_screenshot(self, screenshot_path, mark_action):
313
+ """
314
+ Marks the screenshot with the specified action.
315
+
316
+ Parameters:
317
+ screenshot_path (str): The path to the screenshot.
318
+ mark_action (dict): Action details for marking the screenshot.
319
+ """
320
+ from PIL import Image, ImageDraw
321
+
322
+ image = Image.open(screenshot_path)
323
+ draw = ImageDraw.Draw(image)
324
+
325
+ if mark_action['type'] == 'click':
326
+ x, y = mark_action['position']
327
+ radius = 10
328
+ draw.ellipse((x-radius, y-radius, x+radius, y+radius), outline='red', width=3)
329
+ elif mark_action['type'] == 'type':
330
+ x, y = mark_action['position']
331
+ draw.rectangle((x-5, y-5, x+5, y+5), outline='blue', width=3)
332
+
333
+ image.save(screenshot_path)
334
+
221
335
  def get_latest_screenshot_path(self):
222
336
  screenshots = sorted(
223
337
  [f for f in os.listdir(self.screenshot_dir) if f.startswith('screenshot_')],
@@ -230,7 +344,7 @@ class BrowseWebWithImagePromptsBot:
230
344
 
231
345
  def create_prompt_vars(self, last_message):
232
346
  prompt = {
233
- "last_actions": self.actions_log,
347
+ "last_actions": self.action_log,
234
348
  "session_goal": self.session_goal,
235
349
  "last_message": last_message
236
350
  }
@@ -241,12 +355,18 @@ class BrowseWebWithImagePromptsBot:
241
355
  output = response
242
356
  elif isinstance(response, str):
243
357
  output = json.loads(response)
244
-
245
- #TODO: more validation
246
- log.info(f'Response: {output=}')
358
+ elif isinstance(response, list):
359
+ log.warning(f'Response was a list, assuming its only new_instructions: {response=}')
360
+ output['new_instructions'] = response
361
+ output['status'] = 'in-progress'
362
+ output['message'] = 'No message was received, which is a mistake by the assistant'
363
+ else:
364
+ log.warning(f'Unknown response: {response=} {type(response)}')
365
+ output = None
247
366
 
248
367
  if 'status' not in output:
249
368
  log.error(f'Response did not contain status')
369
+
250
370
 
251
371
  if 'new_instructions' not in output:
252
372
  log.warning(f'Response did not include new_instructions')
@@ -278,6 +398,8 @@ This method should be implemented by subclasses: `def send_prompt_to_llm(self, p
278
398
  self.save_cookies()
279
399
  self.browser.close()
280
400
  self.playwright.stop()
401
+ self.save_action_log()
402
+ self.create_gif_from_pngs()
281
403
 
282
404
  def execute_instructions(self, instructions: list, last_message: str=None):
283
405
  if not instructions:
@@ -291,28 +413,72 @@ This method should be implemented by subclasses: `def send_prompt_to_llm(self, p
291
413
  if not isinstance(instructions, list):
292
414
  log.error(f"{instructions} {type(instructions)}")
293
415
  for instruction in instructions:
416
+ mark_action = None
294
417
  if not isinstance(instruction, dict):
295
418
  log.error(f"{instruction} {type(instruction)}")
296
419
  action = instruction['action']
297
420
  if action == 'navigate':
298
421
  self.navigate(instruction['url'])
299
422
  elif action == 'click':
300
- self.click(instruction['selector'])
423
+ x,y = self.click(instruction['selector'])
424
+ if (x,y) != (0,0):
425
+ mark_action = {'type':'click', 'position': (x,y)}
301
426
  elif action == 'scroll':
302
- self.scroll(instruction.get('direction', 'down'), instruction.get('amount', 1))
427
+ self.scroll(instruction.get('direction', 'down'),
428
+ int(instruction.get('amount', 1))
429
+ )
303
430
  elif action == 'type':
304
- self.type_text(instruction['selector'], instruction['text'])
431
+ x,y = self.type_text(instruction['selector'], instruction['text'])
432
+ if (x,y) != (0,0):
433
+ mark_action = {'type':'type', 'position': (x,y)}
305
434
  self.steps += 1
306
435
  if self.steps >= self.max_steps:
307
436
  log.warning(f"Reached the maximum number of steps: {self.max_steps}")
308
437
  return
309
-
310
- screenshot_path = self.take_screenshot()
438
+ time.sleep(2)
439
+ screenshot_path = self.take_screenshot(mark_action=mark_action)
311
440
  next_browser_instructions = self.send_screenshot_to_llm(
312
441
  screenshot_path,
313
442
  last_message=last_message)
314
443
 
315
444
  return next_browser_instructions
445
+
446
+ def create_gif_from_pngs(self, frame_duration=500):
447
+ """
448
+ Creates a GIF from a folder of PNG images.
449
+
450
+ Args:
451
+ folder_path (str): The path to the folder containing PNG images.
452
+ output_gif_path (str): The path where the output GIF will be saved.
453
+ duration (int): Duration between frames in milliseconds.
454
+
455
+ Example:
456
+ create_gif_from_pngs('/path/to/png_folder', '/path/to/output.gif', duration=500)
457
+ """
458
+ from PIL import Image
459
+
460
+ folder_path=self.screenshot_dir
461
+ output_gif_path = os.path.join(self.screenshot_dir, "session.gif")
462
+
463
+ # List all PNG files in the folder
464
+ png_files = [f for f in sorted(os.listdir(folder_path)) if f.endswith('.png')]
465
+
466
+ # Open images and store them in a list
467
+ images = [Image.open(os.path.join(folder_path, file)) for file in png_files]
468
+
469
+ duration = len(images) * frame_duration
470
+ # Save images as a GIF
471
+ if images:
472
+ images[0].save(
473
+ output_gif_path,
474
+ save_all=True,
475
+ append_images=images[1:],
476
+ duration=duration,
477
+ loop=0
478
+ )
479
+ print(f"GIF saved at {output_gif_path}")
480
+ else:
481
+ print("No PNG images found in the folder.")
316
482
 
317
483
  def start_session(self, instructions, session_goal):
318
484
  self.session_goal = session_goal
@@ -322,6 +488,9 @@ This method should be implemented by subclasses: `def send_prompt_to_llm(self, p
322
488
 
323
489
  next_instructions = self.execute_instructions(instructions)
324
490
 
491
+ # load previous actions from same session
492
+ self.load_action_log()
493
+
325
494
  in_session = True
326
495
  while in_session:
327
496
  if next_instructions and 'status' in next_instructions:
@@ -330,8 +499,10 @@ This method should be implemented by subclasses: `def send_prompt_to_llm(self, p
330
499
  if 'new_instructions' not in next_instructions:
331
500
  log.error('Browser status: "in-progress" but no new_instructions')
332
501
  last_message = next_instructions['message']
333
- log.info(f'Browser message: {last_message}')
334
- next_instructions = self.execute_instructions(next_instructions['new_instructions'], last_message=last_message)
502
+ self.action_log.append(last_message)
503
+ next_instructions = self.execute_instructions(
504
+ next_instructions['new_instructions'],
505
+ last_message=last_message)
335
506
  else:
336
507
  log.info(f'Session finished due to status={next_instructions["status"]}')
337
508
  in_session=False
@@ -342,14 +513,16 @@ This method should be implemented by subclasses: `def send_prompt_to_llm(self, p
342
513
  break
343
514
 
344
515
  log.info("Session finished")
345
- final_path = self.take_screenshot(final=True)
516
+ final_screenshot = self.take_screenshot()
517
+
346
518
  self.close()
347
519
 
348
520
  return {
349
521
  "website": self.website_name,
350
- "log": self.actions_log,
522
+ "log": self.action_log,
351
523
  "next_instructions": next_instructions,
352
524
  "session_screenshots": self.session_screenshots,
353
- "final_page": final_path,
525
+ "final_screenshot": final_screenshot,
526
+ "session_goal": self.session_goal
354
527
  }
355
528
 
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.74.0
3
+ Version: 0.74.2
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.74.0.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.74.2.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -99,7 +99,7 @@ sunholo/streaming/streaming.py,sha256=9z6pXINEopuL_Z1RnmgXAoZJum9dzyuOxqYtEYnjf8
99
99
  sunholo/summarise/__init__.py,sha256=MZk3dblUMODcPb1crq4v-Z508NrFIpkSWNf9FIO8BcU,38
100
100
  sunholo/summarise/summarise.py,sha256=C3HhjepTjUhUC8FLk4jMQIBvq1BcORniwuTFHjPVhVo,3784
101
101
  sunholo/tools/__init__.py,sha256=5NuYpwwTX81qGUWvgwfItoSLXteNnp7KjgD7IPZUFjI,53
102
- sunholo/tools/web_browser.py,sha256=FupCEFaOtn4Adf7eaF1wAubYIF_gOiHTlbj1qwLdi8o,15297
102
+ sunholo/tools/web_browser.py,sha256=FqFD9uI1VQ9ui10evIqxTgor_xqM9LhGt16Fz3EmP-w,21533
103
103
  sunholo/utils/__init__.py,sha256=Hv02T5L2zYWvCso5hzzwm8FQogwBq0OgtUbN_7Quzqc,89
104
104
  sunholo/utils/api_key.py,sha256=Ct4bIAQZxzPEw14hP586LpVxBAVi_W9Serpy0BK-7KI,244
105
105
  sunholo/utils/big_context.py,sha256=gJIP7_ZL-YSLhOMq8jmFTMqH1wq8eB1NK7oKPeZAq2s,5578
@@ -117,9 +117,9 @@ sunholo/vertex/extensions_class.py,sha256=4PsUM9dSYrIPpq9bZ3K2rL9MRb_rlqAgnMsW0o
117
117
  sunholo/vertex/init.py,sha256=-w7b9GKsyJnAJpYHYz6_zBUtmeJeLXlEkgOfwoe4DEI,2715
118
118
  sunholo/vertex/memory_tools.py,sha256=pomHrDKqvY8MZxfUqoEwhdlpCvSGP6KmFJMVKOimXjs,6842
119
119
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
120
- sunholo-0.74.0.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
121
- sunholo-0.74.0.dist-info/METADATA,sha256=y18aO3XDpmIm0YqXiXmCQhvkuDx950rhG05YJt0Uvcc,7010
122
- sunholo-0.74.0.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
123
- sunholo-0.74.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
124
- sunholo-0.74.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
125
- sunholo-0.74.0.dist-info/RECORD,,
120
+ sunholo-0.74.2.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
121
+ sunholo-0.74.2.dist-info/METADATA,sha256=BVh6lN_0n6RL1KsMmzybJEAvSno1p1zc7iRqnh12iJQ,7010
122
+ sunholo-0.74.2.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
123
+ sunholo-0.74.2.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
124
+ sunholo-0.74.2.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
125
+ sunholo-0.74.2.dist-info/RECORD,,