sunholo 0.125.2__py3-none-any.whl → 0.126.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/chunker/process_chunker_data.py +5 -2
- sunholo/discovery_engine/chunker_handler.py +33 -24
- {sunholo-0.125.2.dist-info → sunholo-0.126.1.dist-info}/METADATA +1 -1
- {sunholo-0.125.2.dist-info → sunholo-0.126.1.dist-info}/RECORD +8 -8
- {sunholo-0.125.2.dist-info → sunholo-0.126.1.dist-info}/WHEEL +0 -0
- {sunholo-0.125.2.dist-info → sunholo-0.126.1.dist-info}/entry_points.txt +0 -0
- {sunholo-0.125.2.dist-info → sunholo-0.126.1.dist-info}/licenses/LICENSE.txt +0 -0
- {sunholo-0.125.2.dist-info → sunholo-0.126.1.dist-info}/top_level.txt +0 -0
@@ -35,13 +35,16 @@ def process_chunker_data(message_data, metadata, vector_name):
|
|
35
35
|
# checks if only a llamaindex chunking/embedder, return early as no other processing needed
|
36
36
|
llamacheck = llamaindex_chunker_check(message_data, metadata, vector_name)
|
37
37
|
if llamacheck:
|
38
|
-
|
38
|
+
log.info(f"No further chunker required for {message_data}")
|
39
|
+
return None
|
39
40
|
|
40
41
|
# if only a discovery engine memory, return early as no other processing needed
|
41
42
|
discovery_check = discovery_engine_chunker_check(message_data, metadata, config=config, process=False)
|
42
43
|
if discovery_check:
|
43
|
-
|
44
|
+
log.info(f"No further chunker required for {message_data}")
|
45
|
+
return None
|
44
46
|
|
47
|
+
log.info(f"Discovery engine and llamaindex checks passed - continuing to other memory types for {metadata}")
|
45
48
|
chunks = []
|
46
49
|
|
47
50
|
if message_data.startswith("gs://"):
|
@@ -36,20 +36,24 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
|
|
36
36
|
log.info(f"Found vectorstore {vectorstore}")
|
37
37
|
if value.get('read_only'):
|
38
38
|
continue
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
|
40
|
+
project_id = value.get("project_id")
|
41
|
+
location = value.get("location", "eu")
|
42
|
+
if not project_id:
|
43
|
+
gcp_config = config.vacConfig("gcp_config")
|
44
|
+
if not gcp_config:
|
45
|
+
project_id = get_gcp_project()
|
46
|
+
else:
|
47
|
+
project_id = gcp_config.get("project_id")
|
44
48
|
|
45
49
|
if not project_id:
|
46
50
|
raise ValueError("Couldn't retrieve project_id for vertex_ai_search")
|
47
|
-
|
51
|
+
|
48
52
|
corpus = DiscoveryEngineClient(
|
49
53
|
data_store_id=config.vector_name,
|
50
|
-
project_id=project_id
|
54
|
+
project_id=project_id,
|
51
55
|
# location needs to be 'eu' or 'us' which doesn't work with other configurations
|
52
|
-
|
56
|
+
location=location
|
53
57
|
)
|
54
58
|
|
55
59
|
corpuses.append(corpus)
|
@@ -100,9 +104,10 @@ def do_discovery_engine(message_data:str, metadata:dict, config:ConfigManager=No
|
|
100
104
|
log.warning("Only gs:// data is supported for Discovery Engine")
|
101
105
|
|
102
106
|
|
103
|
-
def check_discovery_engine_in_memory(config:ConfigManager):
|
107
|
+
def check_discovery_engine_in_memory(config:ConfigManager) -> int:
|
104
108
|
memories = config.vacConfig("memory")
|
105
109
|
|
110
|
+
discovery_engine_memories = 0
|
106
111
|
for memory in memories: # Iterate over the list
|
107
112
|
for key, value in memory.items(): # Now iterate over the dictionary
|
108
113
|
log.info(f"Found memory {key}")
|
@@ -110,16 +115,16 @@ def check_discovery_engine_in_memory(config:ConfigManager):
|
|
110
115
|
if vectorstore:
|
111
116
|
if vectorstore == "discovery_engine" or vectorstore == "vertex_ai_search":
|
112
117
|
log.info(f"Found vectorstore {vectorstore}")
|
113
|
-
|
118
|
+
discovery_engine_memories += 1
|
114
119
|
|
115
|
-
return
|
120
|
+
return discovery_engine_memories
|
116
121
|
|
117
122
|
def check_write_memories(config:ConfigManager):
|
118
123
|
write_mem = []
|
119
124
|
memories = config.vacConfig("memory")
|
120
125
|
for memory in memories:
|
121
126
|
for key, value in memory.items():
|
122
|
-
if value.get('read_only'):
|
127
|
+
if value and value.get('read_only'):
|
123
128
|
continue
|
124
129
|
write_mem.append(memory)
|
125
130
|
|
@@ -142,21 +147,25 @@ def discovery_engine_chunker_check(message_data,
|
|
142
147
|
return None
|
143
148
|
|
144
149
|
total_memories = len(check_write_memories(config))
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
log.info(f"
|
150
|
+
total_discovery_memories = check_discovery_engine_in_memory(config)
|
151
|
+
|
152
|
+
if not process and total_memories == total_discovery_memories:
|
153
|
+
log.info(f"Do not process discovery engine, and only memory found is discovery engine for {metadata} - stopping")
|
154
|
+
|
155
|
+
return metadata
|
156
|
+
|
157
|
+
if total_discovery_memories > 0:
|
158
|
+
log.info(f"Process discovery engine for {metadata}")
|
159
|
+
disc_meta = do_discovery_engine(message_data, metadata, config=config)
|
160
|
+
log.info(f"Processed discovery engine: {disc_meta}")
|
149
161
|
|
150
162
|
# If discovery engine is the only entry, return
|
151
|
-
if
|
163
|
+
if total_discovery_memories == total_memories:
|
164
|
+
log.info(f"Process discovery engine was only type found in {metadata} - stopping")
|
152
165
|
|
153
|
-
return
|
154
|
-
|
155
|
-
# If not processing and only memory, do not process further
|
156
|
-
if not process and total_memories == 1:
|
157
|
-
return metadata
|
166
|
+
return disc_meta
|
158
167
|
|
159
|
-
elif
|
160
|
-
log.info("Discovery Engine found but not the only memory, continuing with other processes
|
168
|
+
elif disc_meta:
|
169
|
+
log.info("Discovery Engine found but not the only memory, continuing with other processes - returning None")
|
161
170
|
|
162
171
|
return None
|
@@ -38,7 +38,7 @@ sunholo/chunker/images.py,sha256=id2PBu6XyGEOtgafq2v0c9_O6kxaC_pYFMnbsIitkSg,186
|
|
38
38
|
sunholo/chunker/loaders.py,sha256=KEFPHBr32DPJnRUNjOecLiZKsFD-Gk3BFIKyXSVHgbY,11143
|
39
39
|
sunholo/chunker/message_data.py,sha256=bpb8QWQttqazm5lr7fTFJ5JDwf-P0SQ5cOIf6NikNyI,10836
|
40
40
|
sunholo/chunker/pdfs.py,sha256=xwbuMJrbypcyPXfZ8tiUidWeMr80C2NhfTC1mwa8SHY,2477
|
41
|
-
sunholo/chunker/process_chunker_data.py,sha256=
|
41
|
+
sunholo/chunker/process_chunker_data.py,sha256=kdFkNYxt0HwVCCNzhFR37prfMm-2bU8Zxnl0XcRbyd0,3845
|
42
42
|
sunholo/chunker/publish.py,sha256=IDud-NhRcEZFv9GkyWJFRKwfptIU052kSPKEx8AYW68,2943
|
43
43
|
sunholo/chunker/pubsub.py,sha256=48bhuAcszN7LGe3-ksPSLHHhq0uKxiXOrizck5qpcP0,1012
|
44
44
|
sunholo/chunker/splitter.py,sha256=RfekLPkjhCcNd1PFXIj_FxusJMJ8_3cyWl7bsYvtQ0g,7068
|
@@ -72,7 +72,7 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
|
|
72
72
|
sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
|
73
73
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
74
74
|
sunholo/discovery_engine/__init__.py,sha256=hLgqRDJ22Aov9o2QjAEfsVgnL3kMdM-g5p8RJ9OyKdQ,130
|
75
|
-
sunholo/discovery_engine/chunker_handler.py,sha256=
|
75
|
+
sunholo/discovery_engine/chunker_handler.py,sha256=ZplseIFT1sW48DtKtujB9e8cOD2gWwb-yi-K0CvLkEU,6655
|
76
76
|
sunholo/discovery_engine/cli.py,sha256=KGVle5rkLL49oF9TQhrGI--8017IvvLOEoYur545Qb0,12790
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
78
78
|
sunholo/discovery_engine/discovery_engine_client.py,sha256=NjIcP10I2-8yj6QZKrxGzNbh3SqQ5vGYsq9OwxCpWas,36935
|
@@ -168,9 +168,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
168
168
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
169
169
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
170
170
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
171
|
-
sunholo-0.
|
172
|
-
sunholo-0.
|
173
|
-
sunholo-0.
|
174
|
-
sunholo-0.
|
175
|
-
sunholo-0.
|
176
|
-
sunholo-0.
|
171
|
+
sunholo-0.126.1.dist-info/licenses/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
172
|
+
sunholo-0.126.1.dist-info/METADATA,sha256=HULIOhd32X0GAYPLPHV92eWFYJEe-vpwLzDNsy-LIDY,10001
|
173
|
+
sunholo-0.126.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
174
|
+
sunholo-0.126.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
175
|
+
sunholo-0.126.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
176
|
+
sunholo-0.126.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|