sunholo 0.71.10__py3-none-any.whl → 0.71.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -87,8 +87,14 @@ class DiscoveryEngineClient:
87
87
  location=self.location,
88
88
  collection=collection,
89
89
  )
90
+
91
+ def create_data_store(self, type="chunk", chunk_size: int = 500, collection: str = "default_collection"):
92
+ if type == "chunk":
93
+ return self.create_data_store_chunk(chunk_size, collection)
94
+ else:
95
+ raise NotImplementedError("Not done yet - non-chunk data stores.")
90
96
 
91
- def create_data_store(
97
+ def create_data_store_chunk(
92
98
  self, chunk_size: int = 500,
93
99
  collection: str = "default_collection"
94
100
  ) -> str:
@@ -166,7 +172,6 @@ class DiscoveryEngineClient:
166
172
  num_next_chunks: int = 3,
167
173
  page_size: int = 10,
168
174
  parse_chunks_to_string: bool = True,
169
- doc_or_chunks: str = "CHUNKS", # or DOCUMENTS
170
175
  serving_config: str = "default_serving_config",
171
176
  ):
172
177
  """Retrieves chunks or documents based on a query.
@@ -178,6 +183,7 @@ class DiscoveryEngineClient:
178
183
  num_next_chunks (int, optional): Number of next chunks to return for context (default is 3).
179
184
  page_size (int, optional): The maximum number of results to return per page (default is 10).
180
185
  parse_chunks_to_string: If True will put chunks in one big string, False will return object
186
+ serving_config: The resource name of the Search serving config
181
187
 
182
188
  Returns:
183
189
  discoveryengine.SearchResponse: The search response object containing the search results.
@@ -198,12 +204,13 @@ class DiscoveryEngineClient:
198
204
  serving_config
199
205
  )
200
206
 
207
+
201
208
  search_request = discoveryengine.SearchRequest(
202
209
  serving_config=serving_config_path,
203
210
  query=query,
204
211
  page_size=page_size,
205
212
  content_search_spec=discoveryengine.SearchRequest.ContentSearchSpec(
206
- search_result_mode=doc_or_chunks, # CHUNKS or DOCUMENTS
213
+ search_result_mode="CHUNKS",
207
214
  chunk_spec=discoveryengine.SearchRequest.ContentSearchSpec.ChunkSpec(
208
215
  num_previous_chunks=num_previous_chunks,
209
216
  num_next_chunks=num_next_chunks,
@@ -270,7 +277,9 @@ class DiscoveryEngineClient:
270
277
  search_tier=None,
271
278
  search_add_ons=None,
272
279
  ) -> str:
273
-
280
+ """
281
+ You only need this if calling Data Store via Vertex Tools.
282
+ """
274
283
  # The full resource name of the collection
275
284
  # e.g. projects/{project}/locations/{location}/collections/default_collection
276
285
  parent = self.data_store_path()
@@ -300,8 +309,13 @@ class DiscoveryEngineClient:
300
309
  )
301
310
 
302
311
  # Make the request
303
- operation = self.engine_client.create_engine(request=request)
312
+ try:
313
+ operation = self.engine_client.create_engine(request=request)
314
+ except AlreadyExists as err:
315
+ log.info(f"Engine already exists: - {str(err)}")
304
316
 
317
+ return engine_id
318
+
305
319
  log.info(f"Waiting for create vertex ai search operation to complete: {operation.operation.name}")
306
320
  response = operation.result()
307
321
 
File without changes
@@ -0,0 +1,169 @@
1
+ import os
2
+ import base64
3
+ import json
4
+ from datetime import datetime
5
+ try:
6
+ from playwright.sync_api import sync_playwright
7
+ except ImportError:
8
+ sync_playwright = None
9
+
10
+ class BrowseWebWithImagePromptsBot:
11
+ """
12
+ Examples:
13
+
14
+ ```python
15
+ class ProductionBot(BrowseWebWithImagePromptsBot):
16
+ def send_prompt_to_llm(self, prompt, screenshot_base64):
17
+ # Implement the actual logic to send the prompt and screenshot to the LLM and return the response
18
+ api_url = "https://api.example.com/process" # Replace with the actual LLM API endpoint
19
+ headers = {"Content-Type": "application/json"}
20
+ data = {
21
+ "prompt": prompt,
22
+ "screenshot": screenshot_base64
23
+ }
24
+ response = requests.post(api_url, headers=headers, data=json.dumps(data))
25
+ return response.text # Assuming the response is in JSON format
26
+
27
+ @app.route('/run-bot', methods=['POST'])
28
+ def run_bot():
29
+ data = request.json
30
+ session_id = data.get('session_id')
31
+ website_name = data.get('website_name')
32
+ browser_type = data.get('browser_type', 'chromium')
33
+ current_action_description = data.get('current_action_description', "")
34
+ next_goal = data.get('next_goal', "")
35
+
36
+ bot = ProductionBot(session_id=session_id, website_name=website_name, browser_type=browser_type, headless=True)
37
+
38
+ # Check if initial instructions are provided
39
+ initial_instructions = data.get('instructions')
40
+ if initial_instructions:
41
+ bot.execute_instructions(initial_instructions)
42
+
43
+ # Take initial screenshot and send to LLM if no instructions provided
44
+ if not initial_instructions:
45
+ screenshot_path = bot.take_screenshot()
46
+ new_instructions = bot.send_screenshot_to_llm(screenshot_path, current_action_description, next_goal)
47
+ bot.execute_instructions(new_instructions)
48
+
49
+ # Take final screenshot
50
+ bot.take_screenshot()
51
+
52
+ bot.close()
53
+
54
+ return jsonify({"status": "completed", "new_instructions": new_instructions})
55
+
56
+ if __name__ == "__main__":
57
+ app.run(host='0.0.0.0', port=8080)
58
+ ```
59
+ """
60
+ def __init__(self, session_id, website_name, browser_type='chromium', headless=True):
61
+ if not sync_playwright:
62
+ raise ImportError("playright needed for BrowseWebWithImagePromptsBot class - install via `pip install sunholo[tools]`")
63
+ self.session_id = session_id
64
+ self.website_name = website_name
65
+ self.browser_type = browser_type
66
+ self.screenshot_dir = f"{website_name}_{session_id}"
67
+ os.makedirs(self.screenshot_dir, exist_ok=True)
68
+ self.cookie_file = os.path.join(self.screenshot_dir, "cookies.json")
69
+ self.playwright = sync_playwright().start()
70
+
71
+ if browser_type == 'chromium':
72
+ self.browser = self.playwright.chromium.launch(headless=headless)
73
+ elif browser_type == 'firefox':
74
+ self.browser = self.playwright.firefox.launch(headless=headless)
75
+ elif browser_type == 'webkit':
76
+ self.browser = self.playwright.webkit.launch(headless=headless)
77
+ else:
78
+ raise ValueError(f"Unsupported browser type: {browser_type}")
79
+
80
+ self.context = self.browser.new_context()
81
+ self.page = self.context.new_page()
82
+ self.load_cookies()
83
+
84
+ def load_cookies(self):
85
+ if os.path.exists(self.cookie_file):
86
+ with open(self.cookie_file, 'r') as f:
87
+ cookies = json.load(f)
88
+ self.context.add_cookies(cookies)
89
+
90
+ def save_cookies(self):
91
+ cookies = self.context.cookies()
92
+ with open(self.cookie_file, 'w') as f:
93
+ json.dump(cookies, f)
94
+
95
+ def navigate(self, url):
96
+ self.page.goto(url)
97
+
98
+ def click(self, selector):
99
+ self.page.click(selector)
100
+
101
+ def scroll(self, direction='down', amount=1):
102
+ for _ in range(amount):
103
+ if direction == 'down':
104
+ self.page.evaluate("window.scrollBy(0, window.innerHeight)")
105
+ elif direction == 'up':
106
+ self.page.evaluate("window.scrollBy(0, -window.innerHeight)")
107
+ elif direction == 'left':
108
+ self.page.evaluate("window.scrollBy(-window.innerWidth, 0)")
109
+ elif direction == 'right':
110
+ self.page.evaluate("window.scrollBy(window.innerWidth, 0)")
111
+
112
+ def type_text(self, selector, text):
113
+ self.page.fill(selector, text)
114
+
115
+ def take_screenshot(self):
116
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
117
+ screenshot_path = os.path.join(self.screenshot_dir, f"screenshot_{timestamp}.png")
118
+ self.page.screenshot(path=screenshot_path)
119
+ return screenshot_path
120
+
121
+ def get_latest_screenshot_path(self):
122
+ screenshots = sorted(
123
+ [f for f in os.listdir(self.screenshot_dir) if f.startswith('screenshot_')],
124
+ key=lambda x: os.path.getmtime(os.path.join(self.screenshot_dir, x)),
125
+ reverse=True
126
+ )
127
+ if screenshots:
128
+ return os.path.join(self.screenshot_dir, screenshots[0])
129
+ return None
130
+
131
+ def create_prompt_vars(self, current_action_description, next_goal):
132
+ prompt = {
133
+ "current_action_description": current_action_description,
134
+ "next_goal": next_goal,
135
+ }
136
+ return prompt
137
+
138
+ def send_screenshot_to_llm(self, screenshot_path, current_action_description="", next_goal=""):
139
+ with open(screenshot_path, "rb") as image_file:
140
+ encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
141
+
142
+ prompt_vars = self.create_prompt(current_action_description, next_goal)
143
+ response = self.send_prompt_to_llm(prompt_vars, encoded_image) # Sending prompt and image separately
144
+ return json.loads(response)
145
+
146
+ def send_prompt_to_llm(self, prompt_vars, screenshot_base64):
147
+ raise NotImplementedError("This method should be implemented by subclasses: `def send_prompt_to_llm(self, prompt_vars, screenshot_base64)`")
148
+
149
+ def close(self):
150
+ self.save_cookies()
151
+ self.browser.close()
152
+ self.playwright.stop()
153
+
154
+ def execute_instructions(self, instructions):
155
+ for instruction in instructions:
156
+ action = instruction['action']
157
+ if action == 'navigate':
158
+ self.navigate(instruction['url'])
159
+ elif action == 'click':
160
+ self.click(instruction['selector'])
161
+ elif action == 'scroll':
162
+ self.scroll(instruction.get('direction', 'down'), instruction.get('amount', 1))
163
+ elif action == 'type':
164
+ self.type_text(instruction['selector'], instruction['text'])
165
+ screenshot_path = self.take_screenshot()
166
+ new_instructions = self.send_screenshot_to_llm(screenshot_path, instruction.get('description', ''), instruction.get('next_goal', ''))
167
+ if new_instructions:
168
+ self.execute_instructions(new_instructions)
169
+
@@ -1,3 +1,3 @@
1
1
  from .init import init_vertex
2
2
  from .memory_tools import get_vertex_memories, print_grounding_response, get_google_search_grounding
3
- from .safety import vertex_safety
3
+ from .safety import vertex_safety, genai_safety
@@ -88,6 +88,10 @@ def get_vertex_memories(vector_name):
88
88
 
89
89
  try:
90
90
  project_id = value.get('project_id') or get_gcp_project()
91
+ if value.get('chunks'):
92
+ log.warning("Data stores for chunks do not work with Tools yet, call data store directly instead")
93
+ continue
94
+
91
95
  de = DiscoveryEngineClient(vector_name, project_id=project_id)
92
96
  log.info(f"Found vectorstore {vectorstore}")
93
97
 
sunholo/vertex/safety.py CHANGED
@@ -1,10 +1,37 @@
1
- try:
2
- from vertexai.generative_models import (
1
+
2
+
3
+ def genai_safety(threshold: str = "BLOCK_ONLY_HIGH"):
4
+ """
5
+ BLOCK_ONLY_HIGH - block when high probability of unsafe content is detected
6
+ BLOCK_MEDIUM_AND_ABOVE - block when medium or high probability of content is detected
7
+ BLOCK_LOW_AND_ABOVE - block when low, medium, or high probability of unsafe content is detected
8
+ BLOCK_NONE - no block, but need to be on an allow list to use
9
+ """
10
+ from google.generativeai.types import (
3
11
  HarmCategory,
4
- HarmBlockThreshold,
12
+ HarmBlockThreshold
5
13
  )
6
- except ImportError:
7
- pass
14
+
15
+ if threshold == 'BLOCK_ONLY_HIGH':
16
+ thresh = HarmBlockThreshold.BLOCK_ONLY_HIGH
17
+ elif threshold == 'BLOCK_MEDIUM_AND_ABOVE':
18
+ thresh = HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
19
+ elif threshold == 'BLOCK_LOW_AND_ABOVE':
20
+ thresh = HarmBlockThreshold.BLOCK_LOW_AND_ABOVE
21
+ elif threshold == 'BLOCK_NONE':
22
+ thresh = HarmBlockThreshold.BLOCK_NONE
23
+ else:
24
+ raise ValueError("Invalid threshold")
25
+
26
+ safety_settings = {
27
+ HarmCategory.HARM_CATEGORY_HARASSMENT: thresh,
28
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: thresh,
29
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: thresh,
30
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: thresh,
31
+ }
32
+
33
+ return safety_settings
34
+
8
35
 
9
36
  def vertex_safety(threshold: str = "BLOCK_ONLY_HIGH"):
10
37
  """
@@ -13,6 +40,10 @@ def vertex_safety(threshold: str = "BLOCK_ONLY_HIGH"):
13
40
  BLOCK_LOW_AND_ABOVE - block when low, medium, or high probability of unsafe content is detected
14
41
  BLOCK_NONE - no block, but need to be on an allow list to use
15
42
  """
43
+ from vertexai.generative_models import (
44
+ HarmCategory,
45
+ HarmBlockThreshold,
46
+ )
16
47
 
17
48
  if threshold == 'BLOCK_ONLY_HIGH':
18
49
  thresh = HarmBlockThreshold.BLOCK_ONLY_HIGH
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.71.10
3
+ Version: 0.71.12
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.71.10.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.71.12.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -124,6 +124,16 @@ This is the Sunholo Python project, a comprehensive toolkit for working with lan
124
124
 
125
125
  Please refer to the website for full documentation at https://dev.sunholo.com/
126
126
 
127
+ ## Tests via pytest
128
+
129
+ If loading from GitHub, run tests:
130
+
131
+ ```bash
132
+ pip install pytest
133
+ pip install . --use-feature=in-tree-build
134
+ pytest tests
135
+ ```
136
+
127
137
  ## Demos
128
138
 
129
139
  Using https://github.com/charmbracelet/vhs
@@ -63,7 +63,7 @@ sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3
63
63
  sunholo/discovery_engine/__init__.py,sha256=qUKWzuHApDRJIUoynukVdGRBEq8eC9T7l9a3bWckgI0,59
64
64
  sunholo/discovery_engine/chunker_handler.py,sha256=H1HHDqWMCkchJER1_oU9TOLxqf2PygiMO6CL3uKZP64,4563
65
65
  sunholo/discovery_engine/create_new.py,sha256=7oZG78T6lW0EspRzlo7-qRyXFSuFxDn2dfSAVEaqlqY,978
66
- sunholo/discovery_engine/discovery_engine_client.py,sha256=-e3rzmqYcVmb-Q7scq-ae1NF7hQGfJCIU1ym2Cu2u6g,17167
66
+ sunholo/discovery_engine/discovery_engine_client.py,sha256=YYsFeaW41l8jmWCruQnYxJGKEYBZ7dduTBDhdxI63hQ,17719
67
67
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
68
68
  sunholo/embedder/embed_chunk.py,sha256=d_dIzeNF630Q0Ar-u1hxos60s0tLIImJccAvuo_LTIw,6814
69
69
  sunholo/gcs/__init__.py,sha256=DtVw_AZwQn-IguR5BJuIi2XJeF_FQXizhJikzRNrXiE,50
@@ -96,6 +96,8 @@ sunholo/streaming/stream_lookup.py,sha256=uTTUjf96mV7OCc-Sc8N09Fpu5g0T_mD_HbSziv
96
96
  sunholo/streaming/streaming.py,sha256=9z6pXINEopuL_Z1RnmgXAoZJum9dzyuOxqYtEYnjf8w,16405
97
97
  sunholo/summarise/__init__.py,sha256=MZk3dblUMODcPb1crq4v-Z508NrFIpkSWNf9FIO8BcU,38
98
98
  sunholo/summarise/summarise.py,sha256=C3HhjepTjUhUC8FLk4jMQIBvq1BcORniwuTFHjPVhVo,3784
99
+ sunholo/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
+ sunholo/tools/web_browser.py,sha256=NgsAeVcndl-vMAbAfIzDJ8eRfCh5LDZan16OCNEKFmI,7094
99
101
  sunholo/utils/__init__.py,sha256=G11nN_6ATjxpuMfG_BvcUr9UU8onPIgkpTK6CjOcbr8,48
100
102
  sunholo/utils/api_key.py,sha256=Ct4bIAQZxzPEw14hP586LpVxBAVi_W9Serpy0BK-7KI,244
101
103
  sunholo/utils/big_context.py,sha256=gJIP7_ZL-YSLhOMq8jmFTMqH1wq8eB1NK7oKPeZAq2s,5578
@@ -107,15 +109,15 @@ sunholo/utils/parsers.py,sha256=z98cQ1v2_ScnqHxCtApNeAN2the8MdvS6RpKL6vWyOU,5287
107
109
  sunholo/utils/timedelta.py,sha256=BbLabEx7_rbErj_YbNM0MBcaFN76DC4PTe4zD2ucezg,493
108
110
  sunholo/utils/user_ids.py,sha256=SQd5_H7FE7vcTZp9AQuQDWBXd4FEEd7TeVMQe1H4Ny8,292
109
111
  sunholo/utils/version.py,sha256=P1QAJQdZfT2cMqdTSmXmcxrD2PssMPEGM-WI6083Fck,237
110
- sunholo/vertex/__init__.py,sha256=dZa4xWYo-KU6br7SHZJPzfB4T6ICGkw_FshDs1tvnCA,165
112
+ sunholo/vertex/__init__.py,sha256=36ogRu_aP2OqPniHjtd6Pb0iBtHpBBkvX692eU42J20,179
111
113
  sunholo/vertex/extensions.py,sha256=d-Ikt9gHFf-jUMPmyU-xHwYe22QtEyr90Ua1LDKgTws,11026
112
114
  sunholo/vertex/extensions_class.py,sha256=0-XMrMvfhMN380ZdGXl11Mt7R9kCu9rB4Vduiflk8QA,9202
113
115
  sunholo/vertex/init.py,sha256=RLjQppTUwubWgwf2PoAke-EtcwlVkFPaPMYvUsMw1KQ,2029
114
- sunholo/vertex/memory_tools.py,sha256=sipBI7TFttbYzobSWS_1TzWFVTPnJckz3NvLVbTepMc,6345
115
- sunholo/vertex/safety.py,sha256=3meAX0HyGZYrH7rXPUAHxtI_3w_zoy_RX7Shtkoa660,1275
116
- sunholo-0.71.10.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
117
- sunholo-0.71.10.dist-info/METADATA,sha256=LIiaYAQSZmRw9v6rhqq3i-GZjIP_K9zZItlJ-j9cxW0,6615
118
- sunholo-0.71.10.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
119
- sunholo-0.71.10.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
120
- sunholo-0.71.10.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
121
- sunholo-0.71.10.dist-info/RECORD,,
116
+ sunholo/vertex/memory_tools.py,sha256=FLTbNX_YbpxxUxZHAsXEihlUgLELfLOfxsdEkwDm_GI,6546
117
+ sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
118
+ sunholo-0.71.12.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
119
+ sunholo-0.71.12.dist-info/METADATA,sha256=7ELMJTOfh4YUo1BPQV2khVCSZbBuWoVc3reFFo6robc,6759
120
+ sunholo-0.71.12.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
121
+ sunholo-0.71.12.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
122
+ sunholo-0.71.12.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
123
+ sunholo-0.71.12.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.1.1)
2
+ Generator: setuptools (70.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5