sunholo 0.73.2__py3-none-any.whl → 0.74.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sunholo/cli/chat_vac.py CHANGED
@@ -8,6 +8,8 @@ from ..qna.parsers import parse_output
8
8
  from ..gcs.add_file import add_file_to_gcs
9
9
  from .run_proxy import clean_proxy_list, start_proxy, stop_proxy
10
10
  from ..invoke import invoke_vac
11
+ from ..utils.big_context import has_text_extension, merge_text_files, load_gitignore_patterns, build_file_tree
12
+ import tempfile
11
13
 
12
14
  import uuid
13
15
  import os
@@ -24,6 +26,55 @@ from rich.panel import Panel
24
26
  from rich.text import Text
25
27
  from rich.table import Table
26
28
 
29
+ def read_and_add_to_user_input(user_input):
30
+ read_input = None
31
+
32
+ path = user_input.split(" ", 1)[1] if " " in user_input else None
33
+ if not path:
34
+ console.print("[bold red]Please provide a valid file or folder path.[/bold red]")
35
+ return None
36
+
37
+ if os.path.isfile(path):
38
+ if not has_text_extension(path):
39
+ console.print("[bold red]Unsupported file type. Please provide a text file or preprocess to text, or use !upload (e.g. images) or `sunholo embed`.[/bold red]")
40
+ return None
41
+
42
+ try:
43
+ with open(path, 'r', encoding='utf-8') as file:
44
+ file_content = file.read()
45
+ read_input = file_content
46
+ console.print(f"[bold yellow]File content from {path} read into user_input: [{len(read_input.split())}] words[/bold yellow]")
47
+ except FileNotFoundError:
48
+ console.print("[bold red]File not found. Please check the path and try again.[/bold red]")
49
+ return None
50
+ except IOError:
51
+ console.print("[bold red]File could not be read. Please ensure it is a readable text file.[/bold red]")
52
+ return None
53
+ elif os.path.isdir(path):
54
+ patterns = []
55
+ gitignore_path = os.path.join(path, '.gitignore')
56
+
57
+ if os.path.exists(gitignore_path):
58
+ patterns = load_gitignore_patterns(gitignore_path)
59
+
60
+ try:
61
+ with tempfile.NamedTemporaryFile(delete=False, mode='w+', encoding='utf-8') as temp_file:
62
+ temp_file_path = temp_file.name
63
+ file_tree = merge_text_files(path, temp_file_path, patterns)
64
+ console.print(f"[bold yellow]Contents of the folder '{path}' have been merged add added to input.[/bold yellow]")
65
+ console.print("\n".join(file_tree))
66
+ temp_file.seek(0)
67
+ read_input = temp_file.read()
68
+ console.print(f"[bold yellow]Total words: [{len(read_input.split())}] - watch out for high token costs! Use !clear_read to reset[/bold yellow]")
69
+ os.remove(temp_file_path) # Clean up the temporary file
70
+ except Exception as e:
71
+ console.print(f"[bold red]An error occurred while reading the folder: {str(e)}[/bold red]")
72
+ return None
73
+ else:
74
+ console.print("[bold red]The provided path is neither a file nor a folder. Please check the path and try again.[/bold red]")
75
+ return None
76
+
77
+ return read_input
27
78
 
28
79
  def get_service_url(vac_name, project, region, no_config=False):
29
80
 
@@ -67,6 +118,8 @@ def stream_chat_session(service_url, service_name, stream=True):
67
118
  chat_history = []
68
119
  agent_name = ConfigManager(service_name).vacConfig("agent")
69
120
  file_reply = None
121
+ read_file = None
122
+ read_file_count = None
70
123
  while True:
71
124
  session_id = str(uuid.uuid4())
72
125
  user_input = Prompt.ask("[bold cyan]You[/bold cyan]")
@@ -81,9 +134,26 @@ def stream_chat_session(service_url, service_name, stream=True):
81
134
 
82
135
  if special_reply:
83
136
  console.print(f"[bold yellow]{service_name}:[/bold yellow] {special_reply}", end='\n')
84
- continue
85
-
86
- if user_input.lower().startswith("upload"):
137
+ continue
138
+
139
+ if user_input.lower().startswith("!read"):
140
+ read_file = read_and_add_to_user_input(user_input)
141
+ if read_file:
142
+ read_file_count = len(read_file.split())
143
+ continue
144
+
145
+ if user_input.lower().startswith("!ls"):
146
+ items = os.listdir(os.getcwd())
147
+ for item in items:
148
+ console.print(item)
149
+ continue
150
+
151
+ if user_input.lower().startswith("!tree"):
152
+ tree = build_file_tree(os.getcwd(), patterns=[])
153
+ console.print(tree)
154
+ continue
155
+
156
+ if user_input.lower().startswith("!upload"):
87
157
  file_path = user_input.split(" ", 1)[1] if " " in user_input else None
88
158
  if not file_path:
89
159
  console.print("[bold red]Please provide a valid file path.[/bold red]")
@@ -95,7 +165,7 @@ def stream_chat_session(service_url, service_name, stream=True):
95
165
  console.print("[bold red]Invalid file upload[/bold red]")
96
166
  continue
97
167
 
98
- console.print(f"[bold yellow]{service_name}:[/bold yellow] Uploaded {file_path} to {file_reply} - image will be sent each reply until you issue 'clear_upload' ", end='\n')
168
+ console.print(f"[bold yellow]{service_name}:[/bold yellow] Uploaded {file_path} to {file_reply} - image will be sent each reply until you issue '!clear_upload' ", end='\n')
99
169
 
100
170
  except FileNotFoundError:
101
171
  console.print("[bold red]File not found. Please check the path and try again.[/bold red]")
@@ -103,10 +173,29 @@ def stream_chat_session(service_url, service_name, stream=True):
103
173
  # file_reply stays for each message from now on
104
174
  continue
105
175
 
106
- if user_input.lower().startswith("clear_upload"):
176
+ if user_input.lower().startswith("!clear_upload"):
107
177
  console.print("[bold yellow]File upload path cleared.[/bold yellow]")
108
178
  file_path = None
179
+ continue
180
+
181
+ if user_input.lower().startswith("!clear_read"):
182
+ console.print("[bold yellow]Read in file(s) cleared.[/bold yellow]")
183
+ read_file = None
184
+ read_file_count = None
185
+ continue
186
+
187
+ if user_input.lower().startswith("!"):
188
+ console.print("[bold red]Could find no valid chat command for you, sorry[/bold red]")
189
+ continue
190
+
191
+ if read_file:
192
+ user_input = f"<user added file>{read_file}</user added file>\n{user_input}"
109
193
 
194
+ # guardrail
195
+ if len(user_input)> 1000000:
196
+ console.print("[bold red]Over 1 million characters in user_input, aborting as probably unintentional. Use API directly instead.[/bold red]")
197
+ continue
198
+
110
199
  if not stream:
111
200
  vac_response = send_to_qa(user_input,
112
201
  vector_name=service_name,
@@ -166,8 +255,15 @@ def stream_chat_session(service_url, service_name, stream=True):
166
255
  response_started = False
167
256
  vac_response = ""
168
257
 
169
- # point or star?
170
- with console.status(f"[bold orange]Thinking...{file_reply}[/bold orange]", spinner="star") as status:
258
+
259
+ thinking = "[bold orange]Thinking...[/bold orange]"
260
+ if file_reply:
261
+ thinking = f"[bold orange]Thinking with upload {file_reply} - issue !clear_upload to remove...[/bold orange]"
262
+
263
+ if read_file:
264
+ thinking = f"{thinking} - [bold orange]additional [{read_file_count}] words added via !read_file contents - issue !clear_read to remove[/bold orange]"
265
+
266
+ with console.status(thinking, spinner="star") as status:
171
267
  for token in stream_response():
172
268
  if not response_started:
173
269
  status.stop()
sunholo/tools/__init__.py CHANGED
@@ -0,0 +1 @@
1
+ from .web_browser import BrowseWebWithImagePromptsBot
@@ -2,23 +2,72 @@ import os
2
2
  import base64
3
3
  import json
4
4
  from datetime import datetime
5
- try:
6
- from playwright.sync_api import sync_playwright
7
- except ImportError:
8
- sync_playwright = None
5
+ import urllib.parse
6
+
7
+ from ..logging import log
8
+
9
+ from ..utils.parsers import get_clean_website_name
9
10
 
10
11
  class BrowseWebWithImagePromptsBot:
11
12
  """
12
- Examples:
13
+ BrowseWebWithImagePromptsBot is a base class for creating bots that interact with web pages using Playwright.
14
+ The bot can perform actions such as navigating, clicking, scrolling, typing text, and taking screenshots.
15
+ It also supports cookie management to maintain session state across interactions.
16
+
17
+ Methods:
18
+ - __init__(session_id, website_name, browser_type='chromium', headless=True):
19
+ Initializes the bot with the given session ID, website name, browser type, and headless mode.
20
+ Supported browser types: 'chromium', 'firefox', 'webkit'.
21
+
22
+ - load_cookies():
23
+ Loads cookies from a file and adds them to the browser context.
24
+
25
+ - save_cookies():
26
+ Saves the current cookies to a file.
27
+
28
+ - navigate(url):
29
+ Navigates to the specified URL.
30
+
31
+ - click(selector):
32
+ Clicks on the element specified by the selector.
33
+
34
+ - scroll(direction='down', amount=1):
35
+ Scrolls the page in the specified direction ('down', 'up', 'left', 'right') by the specified amount.
36
+
37
+ - type_text(selector, text):
38
+ Types the specified text into the element specified by the selector.
39
+
40
+ - take_screenshot():
41
+ Takes a screenshot and saves it with a timestamp in the session-specific directory. Returns the path to the screenshot.
42
+
43
+ - get_latest_screenshot_path():
44
+ Retrieves the path to the most recent screenshot in the session-specific directory.
45
+
46
+ - create_prompt_vars(current_action_description, session_goal):
47
+ Creates a dictionary of prompt variables from the current action description and session goal.
48
+
49
+ - send_screenshot_to_llm(screenshot_path, current_action_description="", session_goal=""):
50
+ Encodes the screenshot in base64, creates prompt variables, and sends them to the LLM. Returns the new instructions from the LLM.
51
+
52
+ - send_prompt_to_llm(prompt_vars, screenshot_base64):
53
+ Abstract method to be implemented by subclasses. Sends the prompt variables and screenshot to the LLM and returns the response.
54
+
55
+ - close():
56
+ Saves cookies, closes the browser, and stops Playwright.
57
+
58
+ - execute_instructions(instructions):
59
+ Executes the given set of instructions, takes a screenshot after each step, and sends the screenshot to the LLM for further instructions.
60
+
61
+ Example usage:
13
62
 
14
63
  ```python
15
64
  class ProductionBot(BrowseWebWithImagePromptsBot):
16
- def send_prompt_to_llm(self, prompt, screenshot_base64):
65
+ def send_prompt_to_llm(self, prompt_vars, screenshot_base64):
17
66
  # Implement the actual logic to send the prompt and screenshot to the LLM and return the response
18
67
  api_url = "https://api.example.com/process" # Replace with the actual LLM API endpoint
19
68
  headers = {"Content-Type": "application/json"}
20
69
  data = {
21
- "prompt": prompt,
70
+ "prompt": prompt_vars,
22
71
  "screenshot": screenshot_base64
23
72
  }
24
73
  response = requests.post(api_url, headers=headers, data=json.dumps(data))
@@ -31,7 +80,7 @@ class BrowseWebWithImagePromptsBot:
31
80
  website_name = data.get('website_name')
32
81
  browser_type = data.get('browser_type', 'chromium')
33
82
  current_action_description = data.get('current_action_description', "")
34
- next_goal = data.get('next_goal', "")
83
+ session_goal = data.get('session_goal', "")
35
84
 
36
85
  bot = ProductionBot(session_id=session_id, website_name=website_name, browser_type=browser_type, headless=True)
37
86
 
@@ -39,12 +88,13 @@ class BrowseWebWithImagePromptsBot:
39
88
  initial_instructions = data.get('instructions')
40
89
  if initial_instructions:
41
90
  bot.execute_instructions(initial_instructions)
91
+ else:
92
+ bot.execute_instructions([{'action':'navigate', 'url': website_name}])
42
93
 
43
- # Take initial screenshot and send to LLM if no instructions provided
44
- if not initial_instructions:
45
- screenshot_path = bot.take_screenshot()
46
- new_instructions = bot.send_screenshot_to_llm(screenshot_path, current_action_description, next_goal)
47
- bot.execute_instructions(new_instructions)
94
+ # Take initial screenshot and send to LLM
95
+ screenshot_path = bot.take_screenshot()
96
+ new_instructions = bot.send_screenshot_to_llm(screenshot_path, current_action_description, session_goal)
97
+ bot.execute_instructions(new_instructions)
48
98
 
49
99
  # Take final screenshot
50
100
  bot.take_screenshot()
@@ -57,13 +107,21 @@ class BrowseWebWithImagePromptsBot:
57
107
  app.run(host='0.0.0.0', port=8080)
58
108
  ```
59
109
  """
60
- def __init__(self, session_id, website_name, browser_type='chromium', headless=True):
110
+ #class BrowseWebWithImagePromptsBot:
111
+ def __init__(self, session_id, website_name, browser_type='chromium', headless=True, max_steps=10):
112
+ try:
113
+ from playwright.sync_api import sync_playwright
114
+ except ImportError as err:
115
+ print(err)
116
+ sync_playwright = None
61
117
  if not sync_playwright:
62
118
  raise ImportError("playright needed for BrowseWebWithImagePromptsBot class - install via `pip install sunholo[tools]`")
63
- self.session_id = session_id
119
+ self.session_id = session_id or datetime.now().strftime("%Y%m%d%H%M%S")
64
120
  self.website_name = website_name
65
121
  self.browser_type = browser_type
66
- self.screenshot_dir = f"{website_name}_{session_id}"
122
+ self.max_steps = max_steps
123
+ self.steps = 0
124
+ self.screenshot_dir = f"browser_tool/{get_clean_website_name(website_name)}/{session_id}"
67
125
  os.makedirs(self.screenshot_dir, exist_ok=True)
68
126
  self.cookie_file = os.path.join(self.screenshot_dir, "cookies.json")
69
127
  self.playwright = sync_playwright().start()
@@ -80,6 +138,9 @@ class BrowseWebWithImagePromptsBot:
80
138
  self.context = self.browser.new_context()
81
139
  self.page = self.context.new_page()
82
140
  self.load_cookies()
141
+ self.actions_log = []
142
+ self.session_goal = None
143
+ self.session_screenshots = []
83
144
 
84
145
  def load_cookies(self):
85
146
  if os.path.exists(self.cookie_file):
@@ -93,29 +154,68 @@ class BrowseWebWithImagePromptsBot:
93
154
  json.dump(cookies, f)
94
155
 
95
156
  def navigate(self, url):
96
- self.page.goto(url)
157
+ try:
158
+ self.page.goto(url)
159
+ self.page.wait_for_load_state()
160
+ log.info(f'Navigated to {url}')
161
+ self.actions_log.append(f"Navigated to {url}")
162
+ except Exception as err:
163
+ log.warning(f"navigate failed with {str(err)}")
164
+ self.actions_log.append(f"Tried to navigate to {url} but got an error")
165
+
97
166
 
98
167
  def click(self, selector):
99
- self.page.click(selector)
168
+ try:
169
+ self.page.click(selector)
170
+ self.page.wait_for_load_state()
171
+ log.info(f"Clicked on element with selector {selector}")
172
+ self.actions_log.append(f"Clicked on element with selector {selector}")
173
+ except Exception as err:
174
+ log.warning(f"click failed with {str(err)}")
175
+ self.actions_log.append(f"Tried to click on element with selector {selector} but got an error")
100
176
 
101
177
  def scroll(self, direction='down', amount=1):
102
- for _ in range(amount):
103
- if direction == 'down':
104
- self.page.evaluate("window.scrollBy(0, window.innerHeight)")
105
- elif direction == 'up':
106
- self.page.evaluate("window.scrollBy(0, -window.innerHeight)")
107
- elif direction == 'left':
108
- self.page.evaluate("window.scrollBy(-window.innerWidth, 0)")
109
- elif direction == 'right':
110
- self.page.evaluate("window.scrollBy(window.innerWidth, 0)")
178
+ try:
179
+ for _ in range(amount):
180
+ if direction == 'down':
181
+ self.page.evaluate("window.scrollBy(0, window.innerHeight)")
182
+ elif direction == 'up':
183
+ self.page.evaluate("window.scrollBy(0, -window.innerHeight)")
184
+ elif direction == 'left':
185
+ self.page.evaluate("window.scrollBy(-window.innerWidth, 0)")
186
+ elif direction == 'right':
187
+ self.page.evaluate("window.scrollBy(window.innerWidth, 0)")
188
+ self.page.wait_for_timeout(500)
189
+ log.info(f"Scrolled {direction} by {amount} page heights")
190
+ self.actions_log.append(f"Scrolled {direction} by {amount} page heights")
191
+ except Exception as err:
192
+ log.warning(f"Scrolled failed with {str(err)}")
193
+ self.actions_log.append(f"Tried to scroll {direction} by {amount} page heights but got an error")
194
+
111
195
 
112
196
  def type_text(self, selector, text):
113
- self.page.fill(selector, text)
197
+ try:
198
+ self.page.fill(selector, text)
199
+ self.page.wait_for_load_state()
200
+ log.info(f"Typed text '{text}' into element with selector {selector}")
201
+ self.actions_log.append(f"Typed text '{text}' into element with selector {selector}")
202
+ except Exception as err:
203
+ log.warning(f"Typed text failed with {str(err)}")
204
+ self.actions_log.append(f"Tried to type text '{text}' into element with selector {selector} but got an error")
114
205
 
115
- def take_screenshot(self):
116
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
117
- screenshot_path = os.path.join(self.screenshot_dir, f"screenshot_{timestamp}.png")
206
+ def take_screenshot(self, final=False):
207
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
208
+ parsed_url = urllib.parse.urlparse({self.page.url})
209
+ url_path = parsed_url.path
210
+ if final:
211
+ screenshot_path = os.path.join(self.screenshot_dir, f"final/{timestamp}_{url_path}.png")
212
+ else:
213
+ screenshot_path = os.path.join(self.screenshot_dir, f"{timestamp}_{url_path}.png")
118
214
  self.page.screenshot(path=screenshot_path)
215
+ log.info(f"Screenshot {self.page.url} taken and saved to {screenshot_path}")
216
+ #self.actions_log.append(f"Screenshot {self.page.url} taken and saved to {screenshot_path}")
217
+ self.session_screenshots.append(screenshot_path)
218
+
119
219
  return screenshot_path
120
220
 
121
221
  def get_latest_screenshot_path(self):
@@ -128,31 +228,71 @@ class BrowseWebWithImagePromptsBot:
128
228
  return os.path.join(self.screenshot_dir, screenshots[0])
129
229
  return None
130
230
 
131
- def create_prompt_vars(self, current_action_description, next_goal):
231
+ def create_prompt_vars(self, last_message):
132
232
  prompt = {
133
- "current_action_description": current_action_description,
134
- "next_goal": next_goal,
233
+ "last_actions": self.actions_log,
234
+ "session_goal": self.session_goal,
235
+ "last_message": last_message
135
236
  }
136
237
  return prompt
238
+
239
+ def check_llm_response(self, response):
240
+ if isinstance(response, dict):
241
+ output = response
242
+ elif isinstance(response, str):
243
+ output = json.loads(response)
244
+
245
+ #TODO: more validation
246
+ log.info(f'Response: {output=}')
247
+
248
+ if 'status' not in output:
249
+ log.error(f'Response did not contain status')
250
+
251
+ if 'new_instructions' not in output:
252
+ log.warning(f'Response did not include new_instructions')
253
+
254
+ if 'message' not in output:
255
+ log.warning(f'Response did not include message')
256
+
257
+ return output
137
258
 
138
- def send_screenshot_to_llm(self, screenshot_path, current_action_description="", next_goal=""):
259
+ def send_screenshot_to_llm(self, screenshot_path, last_message):
139
260
  with open(screenshot_path, "rb") as image_file:
140
261
  encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
141
262
 
142
- prompt_vars = self.create_prompt(current_action_description, next_goal)
263
+ prompt_vars = self.create_prompt_vars(last_message)
143
264
  response = self.send_prompt_to_llm(prompt_vars, encoded_image) # Sending prompt and image separately
144
- return json.loads(response)
265
+
266
+ return self.check_llm_response(response)
145
267
 
146
268
  def send_prompt_to_llm(self, prompt_vars, screenshot_base64):
147
- raise NotImplementedError("This method should be implemented by subclasses: `def send_prompt_to_llm(self, prompt_vars, screenshot_base64)`")
269
+ raise NotImplementedError("""
270
+ This method should be implemented by subclasses: `def send_prompt_to_llm(self, prompt_vars, screenshot_base64)`")
271
+ prompt = {
272
+ "last_actions": self.action_log,
273
+ "session_goal": self.session_goal,
274
+ }
275
+ """)
148
276
 
149
277
  def close(self):
150
278
  self.save_cookies()
151
279
  self.browser.close()
152
280
  self.playwright.stop()
153
281
 
154
- def execute_instructions(self, instructions):
282
+ def execute_instructions(self, instructions: list, last_message: str=None):
283
+ if not instructions:
284
+ log.info("No instructions found, returning immediately")
285
+ return
286
+
287
+ if self.steps >= self.max_steps:
288
+ log.warning(f"Reached the maximum number of steps: {self.max_steps}")
289
+ return
290
+
291
+ if not isinstance(instructions, list):
292
+ log.error(f"{instructions} {type(instructions)}")
155
293
  for instruction in instructions:
294
+ if not isinstance(instruction, dict):
295
+ log.error(f"{instruction} {type(instruction)}")
156
296
  action = instruction['action']
157
297
  if action == 'navigate':
158
298
  self.navigate(instruction['url'])
@@ -162,8 +302,54 @@ class BrowseWebWithImagePromptsBot:
162
302
  self.scroll(instruction.get('direction', 'down'), instruction.get('amount', 1))
163
303
  elif action == 'type':
164
304
  self.type_text(instruction['selector'], instruction['text'])
165
- screenshot_path = self.take_screenshot()
166
- new_instructions = self.send_screenshot_to_llm(screenshot_path, instruction.get('description', ''), instruction.get('next_goal', ''))
167
- if new_instructions:
168
- self.execute_instructions(new_instructions)
305
+ self.steps += 1
306
+ if self.steps >= self.max_steps:
307
+ log.warning(f"Reached the maximum number of steps: {self.max_steps}")
308
+ return
309
+
310
+ screenshot_path = self.take_screenshot()
311
+ next_browser_instructions = self.send_screenshot_to_llm(
312
+ screenshot_path,
313
+ last_message=last_message)
314
+
315
+ return next_browser_instructions
316
+
317
+ def start_session(self, instructions, session_goal):
318
+ self.session_goal = session_goal
319
+
320
+ if not instructions:
321
+ instructions = [{'action': 'navigate', 'url': self.website_name}]
322
+
323
+ next_instructions = self.execute_instructions(instructions)
324
+
325
+ in_session = True
326
+ while in_session:
327
+ if next_instructions and 'status' in next_instructions:
328
+ if next_instructions['status'] == 'in-progress':
329
+ log.info(f'Browser message: {next_instructions.get('message')}')
330
+ if 'new_instructions' not in next_instructions:
331
+ log.error('Browser status: "in-progress" but no new_instructions')
332
+ last_message = next_instructions['message']
333
+ log.info(f'Browser message: {last_message}')
334
+ next_instructions = self.execute_instructions(next_instructions['new_instructions'], last_message=last_message)
335
+ else:
336
+ log.info(f'Session finished due to status={next_instructions["status"]}')
337
+ in_session=False
338
+ break
339
+ else:
340
+ log.info('Session finished due to next_instructions being empty')
341
+ in_session=False
342
+ break
343
+
344
+ log.info("Session finished")
345
+ final_path = self.take_screenshot(final=True)
346
+ self.close()
347
+
348
+ return {
349
+ "website": self.website_name,
350
+ "log": self.actions_log,
351
+ "next_instructions": next_instructions,
352
+ "session_screenshots": self.session_screenshots,
353
+ "final_page": final_path,
354
+ }
169
355
 
sunholo/utils/parsers.py CHANGED
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
  import re
15
15
  import hashlib
16
+ import urllib.parse
16
17
 
17
18
  def validate_extension_id(ext_id):
18
19
  """
@@ -183,4 +184,8 @@ def escape_braces(text):
183
184
  # Replace single braces with double braces
184
185
  text = re.sub(r'(?<!{){(?!{)', '{{', text) # Replace '{' with '{{' if not already double braced
185
186
  text = re.sub(r'(?<!})}(?!})', '}}', text) # Replace '}' with '}}' if not already double braced
186
- return text
187
+ return text
188
+
189
+ def get_clean_website_name(url):
190
+ parsed_url = urllib.parse.urlparse(url)
191
+ return parsed_url.netloc
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.73.2
3
+ Version: 0.74.0
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.73.2.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.74.0.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -58,6 +58,7 @@ Requires-Dist: pg8000 ; extra == 'all'
58
58
  Requires-Dist: pgvector ; extra == 'all'
59
59
  Requires-Dist: pillow ; extra == 'all'
60
60
  Requires-Dist: playwright ; extra == 'all'
61
+ Requires-Dist: psutil ; extra == 'all'
61
62
  Requires-Dist: psycopg2-binary ; extra == 'all'
62
63
  Requires-Dist: pypdf ; extra == 'all'
63
64
  Requires-Dist: python-socketio ; extra == 'all'
@@ -67,7 +68,7 @@ Requires-Dist: supabase ; extra == 'all'
67
68
  Requires-Dist: tabulate ; extra == 'all'
68
69
  Requires-Dist: tantivy ; extra == 'all'
69
70
  Requires-Dist: tiktoken ; extra == 'all'
70
- Requires-Dist: unstructured[local-inference] ; extra == 'all'
71
+ Requires-Dist: unstructured[local-inference] ==0.14.9 ; extra == 'all'
71
72
  Provides-Extra: anthropic
72
73
  Requires-Dist: langchain-anthropic >=0.1.13 ; extra == 'anthropic'
73
74
  Provides-Extra: cli
@@ -114,10 +115,11 @@ Requires-Dist: tiktoken ; extra == 'openai'
114
115
  Provides-Extra: pipeline
115
116
  Requires-Dist: GitPython ; extra == 'pipeline'
116
117
  Requires-Dist: lark ; extra == 'pipeline'
118
+ Requires-Dist: psutil ; extra == 'pipeline'
117
119
  Requires-Dist: pypdf ; extra == 'pipeline'
118
120
  Requires-Dist: pytesseract ; extra == 'pipeline'
119
121
  Requires-Dist: tabulate ; extra == 'pipeline'
120
- Requires-Dist: unstructured[local-inference] ; extra == 'pipeline'
122
+ Requires-Dist: unstructured[local-inference] ==0.14.9 ; extra == 'pipeline'
121
123
  Provides-Extra: tools
122
124
  Requires-Dist: openapi-spec-validator ; extra == 'tools'
123
125
  Requires-Dist: playwright ; extra == 'tools'
@@ -33,7 +33,7 @@ sunholo/chunker/pdfs.py,sha256=daCZ1xjn1YvxlifIyxskWNpLJLe-Q9D_Jq12MWx3tZo,2473
33
33
  sunholo/chunker/publish.py,sha256=tiO615A2uo_ZjzdFDzNH1PL_1kJeLMUQwLJ4w67rNIc,2932
34
34
  sunholo/chunker/splitter.py,sha256=jtGfi_ZdhVdyFhfw0e4ynEpmwIyrxQtV63OituYWy6o,6729
35
35
  sunholo/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- sunholo/cli/chat_vac.py,sha256=nDhBwUW7RHj7s4qhUcSK2FK4ae031iM71eKQ_WLBxgc,18695
36
+ sunholo/cli/chat_vac.py,sha256=MjwGJQUJOkHV4vLAlhyYVQ02JoI5pE7zaLSSaBfcTco,23019
37
37
  sunholo/cli/cli.py,sha256=u70fcSQzQx2iPvE23SVCVYRFabmZ-XtgEd6vHcrABi0,3725
38
38
  sunholo/cli/cli_init.py,sha256=JMZ9AX2cPDZ-_mv3adiv2ToFVNyRPtjk9Biszl1kiR0,2358
39
39
  sunholo/cli/configs.py,sha256=QUM9DvKOdZmEQRM5uI3Nh887T0YDiSMr7O240zTLqws,4546
@@ -98,8 +98,8 @@ sunholo/streaming/stream_lookup.py,sha256=uTTUjf96mV7OCc-Sc8N09Fpu5g0T_mD_HbSziv
98
98
  sunholo/streaming/streaming.py,sha256=9z6pXINEopuL_Z1RnmgXAoZJum9dzyuOxqYtEYnjf8w,16405
99
99
  sunholo/summarise/__init__.py,sha256=MZk3dblUMODcPb1crq4v-Z508NrFIpkSWNf9FIO8BcU,38
100
100
  sunholo/summarise/summarise.py,sha256=C3HhjepTjUhUC8FLk4jMQIBvq1BcORniwuTFHjPVhVo,3784
101
- sunholo/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
102
- sunholo/tools/web_browser.py,sha256=NgsAeVcndl-vMAbAfIzDJ8eRfCh5LDZan16OCNEKFmI,7094
101
+ sunholo/tools/__init__.py,sha256=5NuYpwwTX81qGUWvgwfItoSLXteNnp7KjgD7IPZUFjI,53
102
+ sunholo/tools/web_browser.py,sha256=FupCEFaOtn4Adf7eaF1wAubYIF_gOiHTlbj1qwLdi8o,15297
103
103
  sunholo/utils/__init__.py,sha256=Hv02T5L2zYWvCso5hzzwm8FQogwBq0OgtUbN_7Quzqc,89
104
104
  sunholo/utils/api_key.py,sha256=Ct4bIAQZxzPEw14hP586LpVxBAVi_W9Serpy0BK-7KI,244
105
105
  sunholo/utils/big_context.py,sha256=gJIP7_ZL-YSLhOMq8jmFTMqH1wq8eB1NK7oKPeZAq2s,5578
@@ -108,7 +108,7 @@ sunholo/utils/config_class.py,sha256=uyAsPXdxOY47CbQ8RifhUDL2BlxWP2QI-DIWBNlv6yk
108
108
  sunholo/utils/config_schema.py,sha256=Wv-ncitzljOhgbDaq9qnFqH5LCuxNv59dTGDWgd1qdk,4189
109
109
  sunholo/utils/gcp.py,sha256=uueODEpA-P6O15-t0hmcGC9dONLO_hLfzSsSoQnkUss,4854
110
110
  sunholo/utils/gcp_project.py,sha256=0ozs6tzI4qEvEeXb8MxLnCdEVoWKxlM6OH05htj7_tc,1325
111
- sunholo/utils/parsers.py,sha256=z98cQ1v2_ScnqHxCtApNeAN2the8MdvS6RpKL6vWyOU,5287
111
+ sunholo/utils/parsers.py,sha256=aCIT08VjVbu8E3BAxepIiqFQa8zwu4bTgEstU_qjyg8,5414
112
112
  sunholo/utils/timedelta.py,sha256=BbLabEx7_rbErj_YbNM0MBcaFN76DC4PTe4zD2ucezg,493
113
113
  sunholo/utils/user_ids.py,sha256=SQd5_H7FE7vcTZp9AQuQDWBXd4FEEd7TeVMQe1H4Ny8,292
114
114
  sunholo/utils/version.py,sha256=P1QAJQdZfT2cMqdTSmXmcxrD2PssMPEGM-WI6083Fck,237
@@ -117,9 +117,9 @@ sunholo/vertex/extensions_class.py,sha256=4PsUM9dSYrIPpq9bZ3K2rL9MRb_rlqAgnMsW0o
117
117
  sunholo/vertex/init.py,sha256=-w7b9GKsyJnAJpYHYz6_zBUtmeJeLXlEkgOfwoe4DEI,2715
118
118
  sunholo/vertex/memory_tools.py,sha256=pomHrDKqvY8MZxfUqoEwhdlpCvSGP6KmFJMVKOimXjs,6842
119
119
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
120
- sunholo-0.73.2.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
121
- sunholo-0.73.2.dist-info/METADATA,sha256=TUj-qcbRdSqJj0mt0Nz3mhoiz2oYwRgWEfVt3vDqgag,6909
122
- sunholo-0.73.2.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
123
- sunholo-0.73.2.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
124
- sunholo-0.73.2.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
125
- sunholo-0.73.2.dist-info/RECORD,,
120
+ sunholo-0.74.0.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
121
+ sunholo-0.74.0.dist-info/METADATA,sha256=y18aO3XDpmIm0YqXiXmCQhvkuDx950rhG05YJt0Uvcc,7010
122
+ sunholo-0.74.0.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
123
+ sunholo-0.74.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
124
+ sunholo-0.74.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
125
+ sunholo-0.74.0.dist-info/RECORD,,