sunholo 0.125.1__py3-none-any.whl → 0.126.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,13 +35,16 @@ def process_chunker_data(message_data, metadata, vector_name):
35
35
  # checks if only a llamaindex chunking/embedder, return early as no other processing needed
36
36
  llamacheck = llamaindex_chunker_check(message_data, metadata, vector_name)
37
37
  if llamacheck:
38
+
38
39
  return llamacheck
39
40
 
40
41
  # if only a discovery engine memory, return early as no other processing needed
41
42
  discovery_check = discovery_engine_chunker_check(message_data, metadata, config=config, process=False)
42
43
  if discovery_check:
44
+
43
45
  return discovery_check
44
46
 
47
+ log.info(f"Discovery engine and llamaindex checks passed - continuing to other memory types for {metadata}")
45
48
  chunks = []
46
49
 
47
50
  if message_data.startswith("gs://"):
@@ -610,19 +610,20 @@ class AlloyDBClient:
610
610
  if items and isinstance(items[0], dict):
611
611
  # If the first item is a dictionary, we need to create columns for all its keys
612
612
  sample_item = items[0]
613
- # Flatten the dictionary with the key as prefix
614
- flattened_item = self._flatten_dict_for_schema(sample_item, key, "_")
615
613
 
616
- # Add columns for all keys in the flattened dictionary
617
- for item_key, item_value in flattened_item.items():
618
- item_column = self._get_column_definition(item_key, item_value)
619
- columns.append(item_column)
614
+ # Create columns for all keys in the sample item
615
+ for item_key, item_value in sample_item.items():
616
+ column_key = f"{key}_{item_key}"
617
+ column_type = self._get_sql_type(item_value)
618
+ columns.append(f'"{column_key}" {column_type}')
620
619
  else:
621
620
  # If items are simple values, just add a column for the list key itself
622
- columns.append(self._get_column_definition(key, items[0] if items else None))
621
+ column_type = self._get_sql_type(items[0] if items else None)
622
+ columns.append(f'"{key}" {column_type}')
623
623
  else:
624
624
  # Regular handling for non-list fields
625
- columns.append(self._get_column_definition(key, value))
625
+ column_type = self._get_sql_type(value)
626
+ columns.append(f'"{key}" {column_type}')
626
627
 
627
628
  # Add metadata columns
628
629
  columns.extend([
@@ -656,41 +657,40 @@ class AlloyDBClient:
656
657
  for user in users:
657
658
  grant_sql = f'GRANT SELECT, INSERT, UPDATE, DELETE ON TABLE "{table_name}" TO "{user}";'
658
659
  if self.engine_type == "pg8000":
659
- self.execute_sql(grant_sql)
660
+ self._execute_sql_pg8000(grant_sql)
660
661
  else:
661
662
  await self._execute_sql_async_langchain(grant_sql)
662
663
 
663
664
  return result
664
665
 
665
- def _get_column_definition(self, key, value):
666
+ def _get_sql_type(self, value):
666
667
  """
667
- Helper method to get SQL column definition from a key and value.
668
+ Helper method to determine SQL type from a Python value.
668
669
 
669
670
  Args:
670
- key (str): The column name
671
671
  value: The value to determine the column type
672
672
 
673
673
  Returns:
674
- str: SQL column definition
674
+ str: SQL type
675
675
  """
676
676
  if value is None:
677
677
  # For unknown types (None), default to TEXT
678
- return f'"{key}" TEXT'
678
+ return "TEXT"
679
679
  elif isinstance(value, dict):
680
680
  # For nested objects, store as JSONB
681
- return f'"{key}" JSONB'
681
+ return "JSONB"
682
682
  elif isinstance(value, list):
683
683
  # For arrays, store as JSONB
684
- return f'"{key}" JSONB'
684
+ return "JSONB"
685
685
  elif isinstance(value, int):
686
- return f'"{key}" INTEGER'
686
+ return "INTEGER"
687
687
  elif isinstance(value, float):
688
- return f'"{key}" NUMERIC'
688
+ return "NUMERIC"
689
689
  elif isinstance(value, bool):
690
- return f'"{key}" BOOLEAN'
690
+ return "BOOLEAN"
691
691
  else:
692
692
  # Default to TEXT for strings and other types
693
- return f'"{key}" TEXT'
693
+ return "TEXT"
694
694
 
695
695
  def _flatten_dict_for_schema(self, nested_dict, parent_key='', separator='.'):
696
696
  """
@@ -789,6 +789,8 @@ class AlloyDBClient:
789
789
  primary_list_key = next(iter(expandable_lists))
790
790
  primary_list = expandable_lists[primary_list_key]
791
791
 
792
+ log.info(f"Expanding list '{primary_list_key}' with {len(primary_list)} items into separate rows")
793
+
792
794
  # For each item in the primary list, create a new row
793
795
  for item_idx, item in enumerate(primary_list):
794
796
  # Create a copy of the regular data
@@ -797,7 +799,12 @@ class AlloyDBClient:
797
799
  # Add the current item from the primary list
798
800
  if isinstance(item, dict):
799
801
  # If it's a dictionary, flatten it with the primary key as prefix
800
- flattened_item = self._flatten_dict(item, primary_list_key, "_")
802
+ flattened_item = {}
803
+ for k, v in item.items():
804
+ flattened_key = f"{primary_list_key}_{k}"
805
+ flattened_item[flattened_key] = v
806
+
807
+ # Update row data with flattened item
801
808
  row_data.update(flattened_item)
802
809
  else:
803
810
  # If it's a simple value, just add it with the list key
@@ -809,8 +816,12 @@ class AlloyDBClient:
809
816
  # Insert this row
810
817
  result = await self._insert_single_row(table_name, row_data, metadata)
811
818
  results.append(result)
819
+
820
+ return results
812
821
 
813
- return results
822
+ # If we somehow get here (shouldn't happen), fall back to single row insert
823
+ return await self._insert_single_row(table_name, regular_data, metadata)
824
+
814
825
 
815
826
  async def _insert_single_row(self, table_name: str, data: dict, metadata: dict = None):
816
827
  """
@@ -100,9 +100,10 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
100
100
  log.warning("Only gs:// data is supported for Discovery Engine")
101
101
 
102
102
 
103
- def check_discovery_engine_in_memory(config:ConfigManager):
103
+ def check_discovery_engine_in_memory(config:ConfigManager) -> int:
104
104
  memories = config.vacConfig("memory")
105
105
 
106
+ discovery_engine_memories = 0
106
107
  for memory in memories: # Iterate over the list
107
108
  for key, value in memory.items(): # Now iterate over the dictionary
108
109
  log.info(f"Found memory {key}")
@@ -110,16 +111,16 @@ def check_discovery_engine_in_memory(config:ConfigManager):
110
111
  if vectorstore:
111
112
  if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
112
113
  log.info(f"Found vectorstore {vectorstore}")
113
- return True
114
+ discovery_engine_memories += 1
114
115
 
115
- return False
116
+ return discovery_engine_memories
116
117
 
117
118
  def check_write_memories(config:ConfigManager):
118
119
  write_mem = []
119
120
  memories = config.vacConfig("memory")
120
121
  for memory in memories:
121
122
  for key, value in memory.items():
122
- if value.get('read_only'):
123
+ if value and value.get('read_only'):
123
124
  continue
124
125
  write_mem.append(memory)
125
126
 
@@ -142,21 +143,25 @@ def discovery_engine_chunker_check(message_data,
142
143
  return None
143
144
 
144
145
  total_memories = len(check_write_memories(config))
145
- llama = None
146
- if check_discovery_engine_in_memory(config) and process:
147
- llama = do_discovery_engine(message_data, metadata, config=config)
148
- log.info(f"Processed discovery engine: {llama}")
146
+ total_discovery_memories = check_discovery_engine_in_memory(config)
149
147
 
150
- # If discovery engine is the only entry, return
151
- if llama and total_memories == 1:
152
-
153
- return llama
154
-
155
- # If not processing and only memory, do not process further
156
- if not process and total_memories == 1:
148
+ if not process and total_memories == total_discovery_memories:
149
+ log.info(f"Do not process discovery engine, and only memory found is discovery engine for {metadata} - stopping")
150
+
157
151
  return metadata
152
+
153
+ if total_discovery_memories > 0:
154
+ log.info(f"Process discovery engine for {metadata}")
155
+ disc_meta = do_discovery_engine(message_data, metadata, config=config)
156
+ log.info(f"Processed discovery engine: {disc_meta}")
157
+
158
+ # If discovery engine is the only entry, return
159
+ if total_discovery_memories == total_memories:
160
+ log.info(f"Process discovery engine was only type found in {metadata} - stopping")
161
+
162
+ return disc_meta
158
163
 
159
- elif llama:
160
- log.info("Discovery Engine found but not the only memory, continuing with other processes.")
164
+ elif disc_meta:
165
+ log.info("Discovery Engine found but not the only memory, continuing with other processes - returning None")
161
166
 
162
167
  return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sunholo
3
- Version: 0.125.1
3
+ Version: 0.126.0
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -38,7 +38,7 @@ sunholo/chunker/images.py,sha256=id2PBu6XyGEOtgafq2v0c9_O6kxaC_pYFMnbsIitkSg,186
38
38
  sunholo/chunker/loaders.py,sha256=KEFPHBr32DPJnRUNjOecLiZKsFD-Gk3BFIKyXSVHgbY,11143
39
39
  sunholo/chunker/message_data.py,sha256=bpb8QWQttqazm5lr7fTFJ5JDwf-P0SQ5cOIf6NikNyI,10836
40
40
  sunholo/chunker/pdfs.py,sha256=xwbuMJrbypcyPXfZ8tiUidWeMr80C2NhfTC1mwa8SHY,2477
41
- sunholo/chunker/process_chunker_data.py,sha256=xjOAf1FvHDwQaBm7kgDzLQUnwm6AW8qf4fTrwDnwmtc,3613
41
+ sunholo/chunker/process_chunker_data.py,sha256=soFl5blyHBryiAkeoalBsPnkU0wDg6bu8rZZrb3LGFc,3728
42
42
  sunholo/chunker/publish.py,sha256=IDud-NhRcEZFv9GkyWJFRKwfptIU052kSPKEx8AYW68,2943
43
43
  sunholo/chunker/pubsub.py,sha256=48bhuAcszN7LGe3-ksPSLHHhq0uKxiXOrizck5qpcP0,1012
44
44
  sunholo/chunker/splitter.py,sha256=RfekLPkjhCcNd1PFXIj_FxusJMJ8_3cyWl7bsYvtQ0g,7068
@@ -60,7 +60,7 @@ sunholo/components/retriever.py,sha256=Wmchv3huAM4w7DIS-a5Lp9Hi7M8pE6vZdxgseiT9S
60
60
  sunholo/components/vectorstore.py,sha256=k7GS1Y5c6ZGXSDAJvyCes6dTjhDAi0fjGbVLqpyfzBc,5918
61
61
  sunholo/database/__init__.py,sha256=bpB5Nk21kwqYj-qdVnvNgXjLsbflnH4g-San7OHMqR4,283
62
62
  sunholo/database/alloydb.py,sha256=x1zUMB-EVWbE2Zvp4nAs2Z-tB_kOZmS45H2lwVHdYnk,11678
63
- sunholo/database/alloydb_client.py,sha256=IszkoMgL_OBBg6YCWBIq2v5blIX9TApdlxPgNREDt1o,34429
63
+ sunholo/database/alloydb_client.py,sha256=Ih_9nd_5fXzki02fNhssn5Grg5-GcQrfmNjqv1wLK7A,34788
64
64
  sunholo/database/database.py,sha256=VqhZdkXUNdvWn8sUcUV3YNby1JDVf7IykPVXWBtxo9U,7361
65
65
  sunholo/database/lancedb.py,sha256=DyfZntiFKBlVPaFooNN1Z6Pl-LAs4nxWKKuq8GBqN58,715
66
66
  sunholo/database/static_dbs.py,sha256=8cvcMwUK6c32AS2e_WguKXWMkFf5iN3g9WHzsh0C07Q,442
@@ -72,7 +72,7 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
72
72
  sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
73
73
  sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
74
74
  sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
75
- sunholo/discovery_engine/chunker_handler.py,sha256=2775W5wHzdkYXqkRMlwh8MRbas20wcMnnnNngo0ljms,6160
75
+ sunholo/discovery_engine/chunker_handler.py,sha256=qznc5lUkYv-Au18Nl4dJSY0qF0RpNqJMigzWdVrmOdM,6573
76
76
  sunholo/discovery_engine/cli.py,sha256=KGVle5rkLL49oF9TQhrGI--8017IvvLOEoYur545Qb0,12790
77
77
  sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
78
78
  sunholo/discovery_engine/discovery_engine_client.py,sha256=NjIcP10I2-8yj6QZKrxGzNbh3SqQ5vGYsq9OwxCpWas,36935
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
168
168
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
169
169
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
170
170
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
171
- sunholo-0.125.1.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
- sunholo-0.125.1.dist-info/METADATA,sha256=KFpMxsW_gucPhBoSwzZU8UHg9tnFEMe3Lv254hgsKqE,10001
173
- sunholo-0.125.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
174
- sunholo-0.125.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
- sunholo-0.125.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
- sunholo-0.125.1.dist-info/RECORD,,
171
+ sunholo-0.126.0.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
172
+ sunholo-0.126.0.dist-info/METADATA,sha256=8qEuySuUqgo6zCfDjUB0aThQRZci-Mx5vEQPpfS2-wE,10001
173
+ sunholo-0.126.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
174
+ sunholo-0.126.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
175
+ sunholo-0.126.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
176
+ sunholo-0.126.0.dist-info/RECORD,,