sunholo 0.78.4__py3-none-any.whl → 0.79.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -175,6 +175,10 @@ Be careful not to add any speculation or any details that are not covered in the
175
175
  bucket_name = os.getenv("DOC_BUCKET")
176
176
  if not bucket_name:
177
177
  raise ValueError("No DOC_BUCKET configured for summary")
178
+
179
+ if bucket_name.startswith("gs://"):
180
+ bucket_name = bucket_name[len("gs://"):]
181
+
178
182
  with tempfile.NamedTemporaryFile(mode='w+', delete=False) as temp_file:
179
183
  temp_file.write(summary)
180
184
  temp_file.flush()
sunholo/components/llm.py CHANGED
@@ -16,15 +16,22 @@ from ..utils import load_config_key, ConfigManager
16
16
 
17
17
  import os
18
18
 
19
- def pick_llm(vector_name):
19
+ def pick_llm(vector_name:str=None, config:ConfigManager=None):
20
+
21
+ if config is None:
22
+ if vector_name is None:
23
+ raise ValueError("config and vector_name was None")
24
+ config = ConfigManager(vector_name)
25
+
20
26
  log.debug('Picking llm')
21
27
 
22
- llm_str = load_config_key("llm", vector_name, kind="vacConfig")
28
+ llm_str = config.vacConfig("llm")
23
29
 
24
30
  if llm_str == 'openai':
25
- llm_chat = get_llm_chat(vector_name)
26
- llm = get_llm_chat(vector_name, model="gpt-3.5-turbo-16k") # TODO: fix it needs llm_chat and not llm
27
- embeddings = get_embeddings(vector_name)
31
+ llm_chat = get_llm_chat(config=config)
32
+ llm = get_llm_chat(model="gpt-3.5-turbo-16k", config=config) # TODO: fix it needs llm_chat and not llm
33
+ embeddings = get_embeddings(config=config)
34
+
28
35
  log.debug("Chose OpenAI")
29
36
  elif llm_str == 'vertex':
30
37
  llm = get_llm_chat(vector_name) # TODO: fix it needs llm_chat and not llm
@@ -62,7 +69,9 @@ def llm_str_to_llm(llm_str, model=None, vector_name=None, config=None):
62
69
  if llm_str is None:
63
70
  raise NotImplementedError("llm_str was None")
64
71
 
65
- if vector_name:
72
+ if config is None:
73
+ if vector_name is None:
74
+ raise ValueError("vector_name and config was None")
66
75
  config = ConfigManager(vector_name)
67
76
 
68
77
  if llm_str == 'openai':
@@ -125,10 +134,16 @@ def get_llm(vector_name=None, model=None, config=None):
125
134
  log.debug(f"Chose LLM: {llm_str}")
126
135
  return llm_str_to_llm(llm_str, model=model, config=config)
127
136
 
128
- def get_llm_chat(vector_name, model=None):
129
- llm_str = load_config_key("llm", vector_name, kind="vacConfig")
137
+ def get_llm_chat(vector_name:str=None, model=None, config:ConfigManager=None):
138
+
139
+ if config is None:
140
+ if vector_name is None:
141
+ raise ValueError("config and vector_name was None")
142
+ config = ConfigManager(vector_name)
143
+
144
+ llm_str = config.vacConfig("llm")
130
145
  if not model:
131
- model = load_config_key("model", vector_name, kind="vacConfig")
146
+ model = config.vacConfig("model")
132
147
 
133
148
  log.debug(f"Chose LLM: {llm_str}")
134
149
  # Configure LLMs based on llm_str
@@ -167,7 +182,7 @@ def get_llm_chat(vector_name, model=None):
167
182
  return ChatAnthropic(model_name = model, temperature=0)
168
183
  elif llm_str == 'azure':
169
184
  from langchain_openai import AzureChatOpenAI
170
- azure_config = load_config_key("azure", vector_name, kind="vacConfig")
185
+ azure_config = config.vacConfig("azure")
171
186
  if not azure_config:
172
187
  raise ValueError("Need to configure azure.config if llm='azure'")
173
188
 
@@ -209,22 +224,37 @@ def get_llm_chat(vector_name, model=None):
209
224
  if llm_str is None:
210
225
  raise NotImplementedError(f'No llm implemented for {llm_str}')
211
226
 
212
- def get_embeddings(vector_name):
227
+ def get_embeddings(vector_name=None, config:ConfigManager=None):
228
+
229
+ if not config:
230
+ if not vector_name:
231
+ raise ValueError(f"config and vector_name was None: {vector_name}")
232
+ config = ConfigManager(vector_name)
233
+
213
234
 
214
235
  llm_str = None
215
- embed_dict = load_config_key("embedder", vector_name, kind="vacConfig")
236
+ embed_dict = config.vacConfig("embedder")
216
237
 
217
238
  if embed_dict:
218
239
  llm_str = embed_dict.get('llm')
219
240
 
220
241
  if llm_str is None:
221
- llm_str = load_config_key("llm", vector_name, kind="vacConfig")
242
+ llm_str = config.vacConfig("llm")
243
+
244
+ if llm_str is None:
245
+ raise ValueError(f"llm_str was None: {llm_str}")
222
246
 
223
- return pick_embedding(llm_str, vector_name=vector_name)
247
+ return pick_embedding(llm_str, config=config)
224
248
 
225
249
 
226
250
  #TODO: specify model
227
- def pick_embedding(llm_str: str, vector_name: str=None):
251
+ def pick_embedding(llm_str: str, vector_name: str=None, config: ConfigManager=None):
252
+
253
+ if not config:
254
+ if not vector_name:
255
+ raise ValueError(f"config and vector_name was None {vector_name}")
256
+ config = ConfigManager(vector_name)
257
+
228
258
  # get embedding directly from llm_str
229
259
  # Configure embeddings based on llm_str
230
260
  if llm_str == 'openai':
@@ -244,7 +274,7 @@ def pick_embedding(llm_str: str, vector_name: str=None):
244
274
  elif llm_str == 'azure':
245
275
  from langchain_openai import AzureOpenAIEmbeddings
246
276
 
247
- azure_config = load_config_key("azure", vector_name, kind="vacConfig")
277
+ azure_config = config.vacConfig("azure")
248
278
  if not azure_config:
249
279
  raise ValueError("Need to configure azure.config if llm='azure'")
250
280
 
@@ -26,8 +26,14 @@ from langchain.retrievers import ContextualCompressionRetriever
26
26
 
27
27
 
28
28
 
29
- def load_memories(vector_name):
30
- memories = ConfigManager(vector_name).vacConfig("memory")
29
+ def load_memories(vector_name:str=None, config:ConfigManager=None):
30
+ if config is None:
31
+ if vector_name is None:
32
+ raise ValueError("vector_name and config were none")
33
+ config = ConfigManager(vector_name)
34
+
35
+ memories = config.vacConfig("memory")
36
+
31
37
  log.info(f"Found memory settings for {vector_name}: {memories}")
32
38
  if not memories or len(memories) == 0:
33
39
  log.info(f"No memory settings found for {vector_name}")
@@ -232,7 +232,7 @@ async def load_alloydb_sql_async(sql, vector_name):
232
232
  return documents
233
233
 
234
234
  def and_or_ilike(sources, search_type="OR", operator="ILIKE"):
235
- unique_sources = set(sources)
235
+ unique_sources = set(sources.split())
236
236
  # Choose the delimiter based on the search_type argument
237
237
  delimiter = ' AND ' if search_type.upper() == "AND" else ' OR '
238
238
 
@@ -240,14 +240,14 @@ def and_or_ilike(sources, search_type="OR", operator="ILIKE"):
240
240
  conditions = delimiter.join(f"TRIM(source) {operator} '%{source}%'" for source in unique_sources)
241
241
  if not conditions:
242
242
  log.warning("Alloydb doc query found no like_patterns")
243
- return []
243
+ return ""
244
244
 
245
245
  return conditions
246
246
 
247
247
  def _get_sources_from_docstore(sources, vector_name, search_type="OR"):
248
248
  if not sources:
249
249
  log.warning("No sources found for alloydb fetch")
250
- return []
250
+ return ""
251
251
 
252
252
  table_name = f"{vector_name}_docstore"
253
253
 
@@ -263,10 +263,37 @@ def _get_sources_from_docstore(sources, vector_name, search_type="OR"):
263
263
 
264
264
  return query
265
265
 
266
+ def _list_sources_from_docstore(sources, vector_name, search_type="OR"):
267
+ table_name = f"{vector_name}_docstore"
268
+
269
+
270
+ if sources:
271
+ conditions = and_or_ilike(sources, search_type=search_type)
272
+ query = f"""
273
+ SELECT DISTINCT langchain_metadata->>'objectId' AS objectId
274
+ FROM {table_name}
275
+ WHERE {conditions}
276
+ ORDER BY langchain_metadata->>'objectId' ASC
277
+ LIMIT 500;
278
+ """
279
+ else:
280
+ query = f"""
281
+ SELECT DISTINCT langchain_metadata->>'objectId' AS objectId
282
+ FROM {table_name}
283
+ ORDER BY langchain_metadata->>'objectId' ASC
284
+ LIMIT 500;
285
+ """
286
+
287
+ return query
266
288
 
267
- async def get_sources_from_docstore_async(sources, vector_name, search_type="OR"):
289
+
290
+ async def get_sources_from_docstore_async(sources, vector_name, search_type="OR", just_source_name=False):
268
291
 
269
- query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
292
+ if just_source_name:
293
+ query = _list_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
294
+ else:
295
+ query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
296
+
270
297
  if not query:
271
298
  return []
272
299
 
@@ -274,9 +301,13 @@ async def get_sources_from_docstore_async(sources, vector_name, search_type="OR"
274
301
 
275
302
  return documents
276
303
 
277
- def get_sources_from_docstore(sources, vector_name, search_type="OR"):
304
+ def get_sources_from_docstore(sources, vector_name, search_type="OR", just_source_name=False):
278
305
 
279
- query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
306
+ if just_source_name:
307
+ query = _list_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
308
+ else:
309
+ query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
310
+
280
311
  if not query:
281
312
  return []
282
313
 
@@ -303,3 +334,5 @@ def delete_sources_from_alloydb(sources, vector_name):
303
334
  DELETE FROM {vector_name}_vectorstore_{vector_length}
304
335
  WHERE {conditions}
305
336
  """
337
+
338
+ return query
@@ -22,6 +22,7 @@ from langchain.schema import Document
22
22
  from ..components import get_embeddings, pick_vectorstore, load_memories, pick_embedding
23
23
  from ..logging import log
24
24
  from ..database.uuid import generate_uuid_from_object_id
25
+ from ..utils import ConfigManager
25
26
 
26
27
  def embed_pubsub_chunk(data: dict):
27
28
  """Triggered from a message on a Cloud Pub/Sub topic "embed_chunk" topic
@@ -63,6 +64,9 @@ def embed_pubsub_chunk(data: dict):
63
64
  log.error(msg)
64
65
  return msg
65
66
 
67
+ config = ConfigManager(vector_name)
68
+ log.info(f"{config=}")
69
+
66
70
  log.info(f"Embedding: {vector_name} page_content: {page_content[:30]}...[{len(page_content)}] - {metadata}")
67
71
 
68
72
  if 'eventTime' not in metadata:
@@ -102,9 +106,9 @@ def embed_pubsub_chunk(data: dict):
102
106
  doc = Document(page_content=page_content, metadata=metadata)
103
107
 
104
108
  # init embedding and vector store
105
- embeddings = get_embeddings(vector_name)
109
+ embeddings = get_embeddings(config=config)
106
110
 
107
- memories = load_memories(vector_name)
111
+ memories = load_memories(config=config)
108
112
  vectorstore_list = []
109
113
  for memory in memories: # Iterate over the list
110
114
  for key, value in memory.items():
@@ -114,7 +118,7 @@ def embed_pubsub_chunk(data: dict):
114
118
  # check if vectorstore specific embedding is available
115
119
  embed_llm = value.get('llm')
116
120
  if embed_llm:
117
- embeddings = pick_embedding(embed_llm)
121
+ embeddings = pick_embedding(embed_llm, config=config)
118
122
  # check if read only
119
123
  read_only = value.get('read_only')
120
124
  if read_only:
@@ -37,6 +37,10 @@ class ConfigManager:
37
37
  self.local_config_folder = local_config_folder
38
38
  self.configs_by_kind = self.load_all_configs()
39
39
 
40
+ test_agent = self.vacConfig("agent")
41
+ if not test_agent:
42
+ print(f"WARNING: No vacConfig.agent found for {self.vector_name} - are you in right folder? {local_config_folder=} {self.config_folder=}")
43
+
40
44
  def load_all_configs(self):
41
45
  """
42
46
  Load all configuration files from the specified directories into a dictionary.
@@ -121,7 +125,7 @@ class ConfigManager:
121
125
  self.config_cache[filename] = (config, datetime.now())
122
126
  log.debug(f"Loaded and cached {config_file}")
123
127
  if is_local:
124
- log.warning(f"Local configuration override for {filename}")
128
+ log.warning(f"Local configuration override for {filename} via {self.local_config_folder}")
125
129
  return config
126
130
 
127
131
  def _check_and_reload_configs(self):
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.78.4
3
+ Version: 0.79.0
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.78.4.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.79.0.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -31,7 +31,7 @@ sunholo/bots/github_webhook.py,sha256=5pQPRLM_wxxcILVaIzUDV8Kt7Arcm2dL1r1kMMHA52
31
31
  sunholo/bots/webapp.py,sha256=EIMxdAJ_xtufwJmvnn7N_Fb_1hZ9DjhJ0Kf_hp02vEU,1926
32
32
  sunholo/chunker/__init__.py,sha256=A5canS0XPgisHu0OZ7sVdILgEHGzgH9kpkDi4oBwLZk,135
33
33
  sunholo/chunker/azure.py,sha256=iZ0mXjei0cILsLuSUnZK0mmUUsQNiC3ZQr1iX8q5IeY,3263
34
- sunholo/chunker/doc_handling.py,sha256=rIyknpzDyj5A0u_DqSQVD_CXLRNZPOU6TCL4bhCdjOI,8563
34
+ sunholo/chunker/doc_handling.py,sha256=AV-HU4FePKsk1mPASc3XOhJrqwdxnvEKc0GSpPTswMA,8714
35
35
  sunholo/chunker/encode_metadata.py,sha256=SYHaqKcr4lCzwmrzUGhgX4_l4pzDv7wAeNCw7a461MA,1912
36
36
  sunholo/chunker/images.py,sha256=Xmh1vwHrVhoXm5iH2dhCc52O8YgdzE8KrDSdL-pGnp8,1861
37
37
  sunholo/chunker/loaders.py,sha256=xiToUVgPz2ZzcqpUAq7aNP3PTenb_rBUAFzu0JPycIg,10268
@@ -54,11 +54,11 @@ sunholo/cli/sun_rich.py,sha256=UpMqeJ0C8i0pkue1AHnnyyX0bFJ9zZeJ7HBR6yhuA8A,54
54
54
  sunholo/cli/swagger.py,sha256=absYKAU-7Yd2eiVNUY-g_WLl2zJfeRUNdWQ0oH8M_HM,1564
55
55
  sunholo/cli/vertex.py,sha256=8130YCarxHL1UC3aqblNmUwGZTXbkdL4Y_FOnZJsWiI,2056
56
56
  sunholo/components/__init__.py,sha256=IDoylb74zFKo6NIS3RQqUl0PDFBGVxM1dfUmO7OJ44U,176
57
- sunholo/components/llm.py,sha256=QTTpqUhfj7u9Ty9-E-XL8dpg4fp19z64FdRC1zbTHVo,10698
58
- sunholo/components/retriever.py,sha256=BFUw_6turT3CQJZWv_uXylmH5fHdb0gKfKJrQ_j6MGY,6533
57
+ sunholo/components/llm.py,sha256=XhSFuvthK35LDirX-zUbeLrLU8ccLSGxdJOOQovBGEM,11481
58
+ sunholo/components/retriever.py,sha256=F-wgZMpGJ8mGxJMAHA7HNgDwEhnvq1Pd6EGnTuBFlY8,6719
59
59
  sunholo/components/vectorstore.py,sha256=zUJ90L1S4IyxLB0JUWopeuwVjcsSqdhj1QreEfsJhsE,5548
60
60
  sunholo/database/__init__.py,sha256=Zz0Shcq-CtStf9rJGIYB_Ybzb8rY_Q9mfSj-nviM490,241
61
- sunholo/database/alloydb.py,sha256=d9W0pbZB0jTVIGF5OVaQ6kXHo-X3-6e9NpWNmV5e9UY,10464
61
+ sunholo/database/alloydb.py,sha256=ZZGDA6DBSoWouDFi69LvTT1DgiiBz3aSR6u1hFO-IZY,11520
62
62
  sunholo/database/alloydb_client.py,sha256=AYA0SSaBy-1XEfeZI97sMGehfrwnfbwZ8sE0exzI2E0,7254
63
63
  sunholo/database/database.py,sha256=UDHkceiEvJmS3esQX2LYEjEMrHcogN_JHuJXoVWCH3M,7354
64
64
  sunholo/database/lancedb.py,sha256=2rAbJVusMrm5TPtVTsUtmwn0z1iZ_wvbKhc6eyT6ClE,708
@@ -75,7 +75,7 @@ sunholo/discovery_engine/chunker_handler.py,sha256=fDqvXeXr58s6TB75MMIGKKEg42T21
75
75
  sunholo/discovery_engine/create_new.py,sha256=7oZG78T6lW0EspRzlo7-qRyXFSuFxDn2dfSAVEaqlqY,978
76
76
  sunholo/discovery_engine/discovery_engine_client.py,sha256=YYsFeaW41l8jmWCruQnYxJGKEYBZ7dduTBDhdxI63hQ,17719
77
77
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
78
- sunholo/embedder/embed_chunk.py,sha256=8BJ90tR0_JbCcsVCzrtPdZn6sVys0OhXSxLszlve_ko,6819
78
+ sunholo/embedder/embed_chunk.py,sha256=FFr5pDvFCsWNS5JnTjuf1aCpg4Qlut83wqndneavnj8,6944
79
79
  sunholo/gcs/__init__.py,sha256=SZvbsMFDko40sIRHTHppA37IijvJTae54vrhooEF5-4,90
80
80
  sunholo/gcs/add_file.py,sha256=m-iQeYAmdXxy2EJ1uMmM3gx-eKbTcNpfsAyRd4sL_hA,7120
81
81
  sunholo/gcs/download_folder.py,sha256=mfntDA3Gl-7quMK9_eSTWvUOY1330jF--1cb62C0K1E,1607
@@ -116,7 +116,7 @@ sunholo/utils/__init__.py,sha256=Hv02T5L2zYWvCso5hzzwm8FQogwBq0OgtUbN_7Quzqc,89
116
116
  sunholo/utils/api_key.py,sha256=Ct4bIAQZxzPEw14hP586LpVxBAVi_W9Serpy0BK-7KI,244
117
117
  sunholo/utils/big_context.py,sha256=gJIP7_ZL-YSLhOMq8jmFTMqH1wq8eB1NK7oKPeZAq2s,5578
118
118
  sunholo/utils/config.py,sha256=XOH2pIvHs6QLnCwVAn7RuyRyV10TfbCEXabSjuEhKdo,8947
119
- sunholo/utils/config_class.py,sha256=4fm2Bwn_zFhVJBiUnMBzfCA5LKhTcBMU3mzhf5seXrw,8553
119
+ sunholo/utils/config_class.py,sha256=GP58SfYYn32dSTUnyixKGujgF2DZRctc23ZhFRvDTZ8,8808
120
120
  sunholo/utils/config_schema.py,sha256=Wv-ncitzljOhgbDaq9qnFqH5LCuxNv59dTGDWgd1qdk,4189
121
121
  sunholo/utils/gcp.py,sha256=uueODEpA-P6O15-t0hmcGC9dONLO_hLfzSsSoQnkUss,4854
122
122
  sunholo/utils/gcp_project.py,sha256=0ozs6tzI4qEvEeXb8MxLnCdEVoWKxlM6OH05htj7_tc,1325
@@ -132,9 +132,9 @@ sunholo/vertex/init.py,sha256=uyg76EqS39jWJ2gxMqXOLWP6MQ2hc81wFdwgG86ZoCM,2868
132
132
  sunholo/vertex/memory_tools.py,sha256=pomHrDKqvY8MZxfUqoEwhdlpCvSGP6KmFJMVKOimXjs,6842
133
133
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
134
134
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
135
- sunholo-0.78.4.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
136
- sunholo-0.78.4.dist-info/METADATA,sha256=Ic8NbVh3Y5f0OZxVu18aB9QaI5PcIamIG3DDPzH6_6o,7348
137
- sunholo-0.78.4.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
138
- sunholo-0.78.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
139
- sunholo-0.78.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
140
- sunholo-0.78.4.dist-info/RECORD,,
135
+ sunholo-0.79.0.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
136
+ sunholo-0.79.0.dist-info/METADATA,sha256=-sMhBNEnku2Fz06P5Ihk8N6KP-VlQ37zCidev8zEMjU,7348
137
+ sunholo-0.79.0.dist-info/WHEEL,sha256=Rp8gFpivVLXx-k3U95ozHnQw8yDcPxmhOpn_Gx8d5nc,91
138
+ sunholo-0.79.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
139
+ sunholo-0.79.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
140
+ sunholo-0.79.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.3.0)
2
+ Generator: setuptools (72.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5