sunholo 0.125.1__py3-none-any.whl → 0.126.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/chunker/process_chunker_data.py +3 -0
- sunholo/database/alloydb_client.py +33 -22
- sunholo/discovery_engine/chunker_handler.py +22 -17
- {sunholo-0.125.1.dist-info → sunholo-0.126.0.dist-info}/METADATA +1 -1
- {sunholo-0.125.1.dist-info → sunholo-0.126.0.dist-info}/RECORD +9 -9
- {sunholo-0.125.1.dist-info → sunholo-0.126.0.dist-info}/WHEEL +0 -0
- {sunholo-0.125.1.dist-info → sunholo-0.126.0.dist-info}/entry_points.txt +0 -0
- {sunholo-0.125.1.dist-info → sunholo-0.126.0.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.125.1.dist-info → sunholo-0.126.0.dist-info}/top_level.txt +0 -0
@@ -35,13 +35,16 @@ def process_chunker_data(message_data, metadata, vector_name):
|
|
35
35
|
# checks if only a llamaindex chunking/embedder, return early as no other processing needed
|
36
36
|
llamacheck = llamaindex_chunker_check(message_data, metadata, vector_name)
|
37
37
|
if llamacheck:
|
38
|
+
|
38
39
|
return llamacheck
|
39
40
|
|
40
41
|
# if only a discovery engine memory, return early as no other processing needed
|
41
42
|
discovery_check = discovery_engine_chunker_check(message_data, metadata, config=config, process=False)
|
42
43
|
if discovery_check:
|
44
|
+
|
43
45
|
return discovery_check
|
44
46
|
|
47
|
+
log.info(f"Discovery engine and llamaindex checks passed - continuing to other memory types for {metadata}")
|
45
48
|
chunks = []
|
46
49
|
|
47
50
|
if message_data.startswith("gs://"):
|
@@ -610,19 +610,20 @@ class AlloyDBClient:
|
|
610
610
|
if items and isinstance(items[0], dict):
|
611
611
|
# If the first item is a dictionary, we need to create columns for all its keys
|
612
612
|
sample_item = items[0]
|
613
|
-
# Flatten the dictionary with the key as prefix
|
614
|
-
flattened_item = self._flatten_dict_for_schema(sample_item, key, "_")
|
615
613
|
|
616
|
-
#
|
617
|
-
for item_key, item_value in
|
618
|
-
|
619
|
-
|
614
|
+
# Create columns for all keys in the sample item
|
615
|
+
for item_key, item_value in sample_item.items():
|
616
|
+
column_key = f"{key}_{item_key}"
|
617
|
+
column_type = self._get_sql_type(item_value)
|
618
|
+
columns.append(f'"{column_key}" {column_type}')
|
620
619
|
else:
|
621
620
|
# If items are simple values, just add a column for the list key itself
|
622
|
-
|
621
|
+
column_type = self._get_sql_type(items[0] if items else None)
|
622
|
+
columns.append(f'"{key}" {column_type}')
|
623
623
|
else:
|
624
624
|
# Regular handling for non-list fields
|
625
|
-
|
625
|
+
column_type = self._get_sql_type(value)
|
626
|
+
columns.append(f'"{key}" {column_type}')
|
626
627
|
|
627
628
|
# Add metadata columns
|
628
629
|
columns.extend([
|
@@ -656,41 +657,40 @@ class AlloyDBClient:
|
|
656
657
|
for user in users:
|
657
658
|
grant_sql = f'GRANT SELECT, INSERT, UPDATE, DELETE ON TABLE "{table_name}" TO "{user}";'
|
658
659
|
if self.engine_type == "pg8000":
|
659
|
-
self.
|
660
|
+
self._execute_sql_pg8000(grant_sql)
|
660
661
|
else:
|
661
662
|
await self._execute_sql_async_langchain(grant_sql)
|
662
663
|
|
663
664
|
return result
|
664
665
|
|
665
|
-
def
|
666
|
+
def _get_sql_type(self, value):
|
666
667
|
"""
|
667
|
-
Helper method to
|
668
|
+
Helper method to determine SQL type from a Python value.
|
668
669
|
|
669
670
|
Args:
|
670
|
-
key (str): The column name
|
671
671
|
value: The value to determine the column type
|
672
672
|
|
673
673
|
Returns:
|
674
|
-
str: SQL
|
674
|
+
str: SQL type
|
675
675
|
"""
|
676
676
|
if value is None:
|
677
677
|
# For unknown types (None), default to TEXT
|
678
|
-
return
|
678
|
+
return "TEXT"
|
679
679
|
elif isinstance(value, dict):
|
680
680
|
# For nested objects, store as JSONB
|
681
|
-
return
|
681
|
+
return "JSONB"
|
682
682
|
elif isinstance(value, list):
|
683
683
|
# For arrays, store as JSONB
|
684
|
-
return
|
684
|
+
return "JSONB"
|
685
685
|
elif isinstance(value, int):
|
686
|
-
return
|
686
|
+
return "INTEGER"
|
687
687
|
elif isinstance(value, float):
|
688
|
-
return
|
688
|
+
return "NUMERIC"
|
689
689
|
elif isinstance(value, bool):
|
690
|
-
return
|
690
|
+
return "BOOLEAN"
|
691
691
|
else:
|
692
692
|
# Default to TEXT for strings and other types
|
693
|
-
return
|
693
|
+
return "TEXT"
|
694
694
|
|
695
695
|
def _flatten_dict_for_schema(self, nested_dict, parent_key='', separator='.'):
|
696
696
|
"""
|
@@ -789,6 +789,8 @@ class AlloyDBClient:
|
|
789
789
|
primary_list_key = next(iter(expandable_lists))
|
790
790
|
primary_list = expandable_lists[primary_list_key]
|
791
791
|
|
792
|
+
log.info(f"Expanding list '{primary_list_key}' with {len(primary_list)} items into separate rows")
|
793
|
+
|
792
794
|
# For each item in the primary list, create a new row
|
793
795
|
for item_idx, item in enumerate(primary_list):
|
794
796
|
# Create a copy of the regular data
|
@@ -797,7 +799,12 @@ class AlloyDBClient:
|
|
797
799
|
# Add the current item from the primary list
|
798
800
|
if isinstance(item, dict):
|
799
801
|
# If it's a dictionary, flatten it with the primary key as prefix
|
800
|
-
flattened_item =
|
802
|
+
flattened_item = {}
|
803
|
+
for k, v in item.items():
|
804
|
+
flattened_key = f"{primary_list_key}_{k}"
|
805
|
+
flattened_item[flattened_key] = v
|
806
|
+
|
807
|
+
# Update row data with flattened item
|
801
808
|
row_data.update(flattened_item)
|
802
809
|
else:
|
803
810
|
# If it's a simple value, just add it with the list key
|
@@ -809,8 +816,12 @@ class AlloyDBClient:
|
|
809
816
|
# Insert this row
|
810
817
|
result = await self._insert_single_row(table_name, row_data, metadata)
|
811
818
|
results.append(result)
|
819
|
+
|
820
|
+
return results
|
812
821
|
|
813
|
-
|
822
|
+
# If we somehow get here (shouldn't happen), fall back to single row insert
|
823
|
+
return await self._insert_single_row(table_name, regular_data, metadata)
|
824
|
+
|
814
825
|
|
815
826
|
async def _insert_single_row(self, table_name: str, data: dict, metadata: dict = None):
|
816
827
|
"""
|
@@ -100,9 +100,10 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
|
|
100
100
|
log.warning("Only gs:// data is supported for Discovery Engine")
|
101
101
|
|
102
102
|
|
103
|
-
def check_discovery_engine_in_memory(config:ConfigManager):
|
103
|
+
def check_discovery_engine_in_memory(config:ConfigManager) -> int:
|
104
104
|
memories = config.vacConfig("memory")
|
105
105
|
|
106
|
+
discovery_engine_memories = 0
|
106
107
|
for memory in memories: # Iterate over the list
|
107
108
|
for key, value in memory.items(): # Now iterate over the dictionary
|
108
109
|
log.info(f"Found memory {key}")
|
@@ -110,16 +111,16 @@ def check_discovery_engine_in_memory(config:ConfigManager):
|
|
110
111
|
if vectorstore:
|
111
112
|
if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
|
112
113
|
log.info(f"Found vectorstore {vectorstore}")
|
113
|
-
|
114
|
+
discovery_engine_memories += 1
|
114
115
|
|
115
|
-
return
|
116
|
+
return discovery_engine_memories
|
116
117
|
|
117
118
|
def check_write_memories(config:ConfigManager):
|
118
119
|
write_mem = []
|
119
120
|
memories = config.vacConfig("memory")
|
120
121
|
for memory in memories:
|
121
122
|
for key, value in memory.items():
|
122
|
-
if value.get('read_only'):
|
123
|
+
if value and value.get('read_only'):
|
123
124
|
continue
|
124
125
|
write_mem.append(memory)
|
125
126
|
|
@@ -142,21 +143,25 @@ def discovery_engine_chunker_check(message_data,
|
|
142
143
|
return None
|
143
144
|
|
144
145
|
total_memories = len(check_write_memories(config))
|
145
|
-
|
146
|
-
if check_discovery_engine_in_memory(config) and process:
|
147
|
-
llama = do_discovery_engine(message_data, metadata, config=config)
|
148
|
-
log.info(f"Processed discovery engine: {llama}")
|
146
|
+
total_discovery_memories = check_discovery_engine_in_memory(config)
|
149
147
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
return llama
|
154
|
-
|
155
|
-
# If not processing and only memory, do not process further
|
156
|
-
if not process and total_memories == 1:
|
148
|
+
if not process and total_memories == total_discovery_memories:
|
149
|
+
log.info(f"Do not process discovery engine, and only memory found is discovery engine for {metadata} - stopping")
|
150
|
+
|
157
151
|
return metadata
|
152
|
+
|
153
|
+
if total_discovery_memories > 0:
|
154
|
+
log.info(f"Process discovery engine for {metadata}")
|
155
|
+
disc_meta = do_discovery_engine(message_data, metadata, config=config)
|
156
|
+
log.info(f"Processed discovery engine: {disc_meta}")
|
157
|
+
|
158
|
+
# If discovery engine is the only entry, return
|
159
|
+
if total_discovery_memories == total_memories:
|
160
|
+
log.info(f"Process discovery engine was only type found in {metadata} - stopping")
|
161
|
+
|
162
|
+
return disc_meta
|
158
163
|
|
159
|
-
elif
|
160
|
-
log.info("Discovery Engine found but not the only memory, continuing with other processes
|
164
|
+
elif disc_meta:
|
165
|
+
log.info("Discovery Engine found but not the only memory, continuing with other processes - returning None")
|
161
166
|
|
162
167
|
return None
|
@@ -38,7 +38,7 @@ sunholo/chunker/images.py,sha256=id2PBu6XyGEOtgafq2v0c9_O6kxaC_pYFMnbsIitkSg,186
|
|
38
38
|
sunholo/chunker/loaders.py,sha256=KEFPHBr32DPJnRUNjOecLiZKsFD-Gk3BFIKyXSVHgbY,11143
|
39
39
|
sunholo/chunker/message_data.py,sha256=bpb8QWQttqazm5lr7fTFJ5JDwf-P0SQ5cOIf6NikNyI,10836
|
40
40
|
sunholo/chunker/pdfs.py,sha256=xwbuMJrbypcyPXfZ8tiUidWeMr80C2NhfTC1mwa8SHY,2477
|
41
|
-
sunholo/chunker/process_chunker_data.py,sha256=
|
41
|
+
sunholo/chunker/process_chunker_data.py,sha256=soFl5blyHBryiAkeoalBsPnkU0wDg6bu8rZZrb3LGFc,3728
|
42
42
|
sunholo/chunker/publish.py,sha256=IDud-NhRcEZFv9GkyWJFRKwfptIU052kSPKEx8AYW68,2943
|
43
43
|
sunholo/chunker/pubsub.py,sha256=48bhuAcszN7LGe3-ksPSLHHhq0uKxiXOrizck5qpcP0,1012
|
44
44
|
sunholo/chunker/splitter.py,sha256=RfekLPkjhCcNd1PFXIj_FxusJMJ8_3cyWl7bsYvtQ0g,7068
|
@@ -60,7 +60,7 @@ sunholo/components/retriever.py,sha256=Wmchv3huAM4w7DIS-a5Lp9Hi7M8pE6vZdxgseiT9S
|
|
60
60
|
sunholo/components/vectorstore.py,sha256=k7GS1Y5c6ZGXSDAJvyCes6dTjhDAi0fjGbVLqpyfzBc,5918
|
61
61
|
sunholo/database/__init__.py,sha256=bpB5Nk21kwqYj-qdVnvNgXjLsbflnH4g-San7OHMqR4,283
|
62
62
|
sunholo/database/alloydb.py,sha256=x1zUMB-EVWbE2Zvp4nAs2Z-tB_kOZmS45H2lwVHdYnk,11678
|
63
|
-
sunholo/database/alloydb_client.py,sha256=
|
63
|
+
sunholo/database/alloydb_client.py,sha256=Ih_9nd_5fXzki02fNhssn5Grg5-GcQrfmNjqv1wLK7A,34788
|
64
64
|
sunholo/database/database.py,sha256=VqhZdkXUNdvWn8sUcUV3YNby1JDVf7IykPVXWBtxo9U,7361
|
65
65
|
sunholo/database/lancedb.py,sha256=DyfZntiFKBlVPaFooNN1Z6Pl-LAs4nxWKKuq8GBqN58,715
|
66
66
|
sunholo/database/static_dbs.py,sha256=8cvcMwUK6c32AS2e_WguKXWMkFf5iN3g9WHzsh0C07Q,442
|
@@ -72,7 +72,7 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
|
|
72
72
|
sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
|
73
73
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
74
74
|
sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
|
75
|
-
sunholo/discovery_engine/chunker_handler.py,sha256=
|
75
|
+
sunholo/discovery_engine/chunker_handler.py,sha256=qznc5lUkYv-Au18Nl4dJSY0qF0RpNqJMigzWdVrmOdM,6573
|
76
76
|
sunholo/discovery_engine/cli.py,sha256=KGVle5rkLL49oF9TQhrGI--8017IvvLOEoYur545Qb0,12790
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
78
78
|
sunholo/discovery_engine/discovery_engine_client.py,sha256=NjIcP10I2-8yj6QZKrxGzNbh3SqQ5vGYsq9OwxCpWas,36935
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.
|
172
|
-
sunholo-0.
|
173
|
-
sunholo-0.
|
174
|
-
sunholo-0.
|
175
|
-
sunholo-0.
|
176
|
-
sunholo-0.
|
171
|
+
sunholo-0.126.0.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.126.0.dist-info/METADATA,sha256=8qEuySuUqgo6zCfDjUB0aThQRZci-Mx5vEQPpfS2-wE,10001
|
173
|
+
sunholo-0.126.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
174
|
+
sunholo-0.126.0.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.126.0.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.126.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|