sunholo 0.119.9__py3-none-any.whl → 0.119.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/chunker/loaders.py +3 -4
- sunholo/discovery_engine/get_ai_search_chunks.py +28 -11
- {sunholo-0.119.9.dist-info → sunholo-0.119.11.dist-info}/METADATA +2 -1
- {sunholo-0.119.9.dist-info → sunholo-0.119.11.dist-info}/RECORD +8 -8
- {sunholo-0.119.9.dist-info → sunholo-0.119.11.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.119.9.dist-info → sunholo-0.119.11.dist-info}/WHEEL +0 -0
- {sunholo-0.119.9.dist-info → sunholo-0.119.11.dist-info}/entry_points.txt +0 -0
- {sunholo-0.119.9.dist-info → sunholo-0.119.11.dist-info}/top_level.txt +0 -0
sunholo/chunker/loaders.py
CHANGED
|
@@ -160,9 +160,8 @@ def read_gdrive_to_document(url: str, metadata: dict = None):
|
|
|
160
160
|
return docs
|
|
161
161
|
|
|
162
162
|
def read_url_to_document(url: str, metadata: dict = None):
|
|
163
|
+
from langchain_unstructured import UnstructuredLoader
|
|
163
164
|
|
|
164
|
-
if not UnstructuredLoader:
|
|
165
|
-
raise ImportError("UnstructuredLoader requires 'langchain_unstructured' to be installed")
|
|
166
165
|
unstructured_kwargs = {"skip_infer_table_types": [],
|
|
167
166
|
"extract_image_block_types": ["Image", "Table"]
|
|
168
167
|
}
|
|
@@ -179,7 +178,7 @@ def read_url_to_document(url: str, metadata: dict = None):
|
|
|
179
178
|
return docs
|
|
180
179
|
|
|
181
180
|
def read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None):
|
|
182
|
-
|
|
181
|
+
from langchain_unstructured import UnstructuredLoader
|
|
183
182
|
docs = []
|
|
184
183
|
pdf_path = str(pathlib.Path(gs_file))
|
|
185
184
|
|
|
@@ -276,7 +275,7 @@ def read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None):
|
|
|
276
275
|
return docs
|
|
277
276
|
|
|
278
277
|
def convert_to_txt_and_extract(gs_file, split=False):
|
|
279
|
-
|
|
278
|
+
from langchain_unstructured import UnstructuredLoader
|
|
280
279
|
if not UnstructuredLoader:
|
|
281
280
|
raise ImportError("UnstructuredLoader requires 'langchain_unstructured' to be installed")
|
|
282
281
|
|
|
@@ -5,12 +5,13 @@ from .discovery_engine_client import DiscoveryEngineClient
|
|
|
5
5
|
from ..components import load_memories
|
|
6
6
|
import traceback
|
|
7
7
|
|
|
8
|
-
def get_all_chunks(question:str, config:ConfigManager):
|
|
8
|
+
def get_all_chunks(question:str, config:ConfigManager, filter_str=None):
|
|
9
9
|
"""
|
|
10
10
|
Look through a config memory key and find all Vertex AI Search retrievers, call them and return a joined string of chunks
|
|
11
11
|
|
|
12
12
|
args: question - question to search similarity for
|
|
13
13
|
config: A ConfigManager object
|
|
14
|
+
filter_str: A filter that will restrict ai search via its metadata. See https://cloud.google.com/generative-ai-app-builder/docs/filter-search-metadata
|
|
14
15
|
|
|
15
16
|
returns: a big string of chunks
|
|
16
17
|
"""
|
|
@@ -39,7 +40,7 @@ def get_all_chunks(question:str, config:ConfigManager):
|
|
|
39
40
|
project_id = gcp_config.get('project_id')
|
|
40
41
|
serving_config = value.get('serving_config')
|
|
41
42
|
|
|
42
|
-
chunk = get_chunks(question, vector_name, num_chunks, project_id=project_id, serving_config=serving_config)
|
|
43
|
+
chunk = get_chunks(question, vector_name, num_chunks, filter_str=filter_str, project_id=project_id, serving_config=serving_config)
|
|
43
44
|
if chunk:
|
|
44
45
|
chunks.append(chunk)
|
|
45
46
|
if chunks:
|
|
@@ -48,23 +49,30 @@ def get_all_chunks(question:str, config:ConfigManager):
|
|
|
48
49
|
log.warning(f"No chunks found for {vector_name}")
|
|
49
50
|
return None
|
|
50
51
|
|
|
51
|
-
def get_chunks(question, vector_name, num_chunks, project_id=None, serving_config=None):
|
|
52
|
+
def get_chunks(question, vector_name, num_chunks, filter_str=None, project_id=None, serving_config=None):
|
|
52
53
|
if serving_config is None:
|
|
53
54
|
serving_config = "default_serving_config"
|
|
54
55
|
de = DiscoveryEngineClient(vector_name, project_id=project_id or get_gcp_project(include_config=True))
|
|
55
56
|
try:
|
|
56
|
-
|
|
57
|
+
if filter_str:
|
|
58
|
+
return de.search_with_filters(query=question,
|
|
59
|
+
filter_str=filter_str,
|
|
60
|
+
num_previous_chunks=num_chunks,
|
|
61
|
+
num_next_chunks=num_chunks)
|
|
62
|
+
else:
|
|
63
|
+
return de.get_chunks(question, num_previous_chunks=num_chunks, num_next_chunks=num_chunks, serving_config=serving_config)
|
|
57
64
|
except Exception as err:
|
|
58
|
-
log.error(f"No discovery engine chunks found: {str(err)}")
|
|
65
|
+
log.error(f"No discovery engine chunks found: {str(err)} {traceback.format_exc()}")
|
|
59
66
|
|
|
60
67
|
|
|
61
68
|
|
|
62
|
-
async def async_get_all_chunks(question:str, config:ConfigManager):
|
|
69
|
+
async def async_get_all_chunks(question:str, config:ConfigManager, filter_str=None):
|
|
63
70
|
"""
|
|
64
71
|
Look through a config memory key and find all Vertex AI Search retrievers, call them and return a joined string of chunks
|
|
65
72
|
|
|
66
73
|
args: question - question to search similarity for
|
|
67
74
|
config: A ConfigManager object
|
|
75
|
+
filter_str: A filter that will restrict ai search via its metadata. See https://cloud.google.com/generative-ai-app-builder/docs/filter-search-metadata
|
|
68
76
|
|
|
69
77
|
returns: a big string of chunks
|
|
70
78
|
"""
|
|
@@ -87,10 +95,16 @@ async def async_get_all_chunks(question:str, config:ConfigManager):
|
|
|
87
95
|
continue
|
|
88
96
|
else:
|
|
89
97
|
vector_name = new_vector_name
|
|
90
|
-
|
|
98
|
+
|
|
99
|
+
project_id = value.get('project_id') or get_gcp_project(include_config=True)
|
|
91
100
|
num_chunks = value.get('num_chunks') or 3
|
|
92
101
|
|
|
93
|
-
chunk = await async_get_chunks(question,
|
|
102
|
+
chunk = await async_get_chunks(question,
|
|
103
|
+
vector_name=vector_name,
|
|
104
|
+
num_chunks=num_chunks,
|
|
105
|
+
filter_str=filter_str,
|
|
106
|
+
project_id=project_id)
|
|
107
|
+
|
|
94
108
|
if chunk:
|
|
95
109
|
chunks.append(chunk)
|
|
96
110
|
if chunks:
|
|
@@ -99,9 +113,12 @@ async def async_get_all_chunks(question:str, config:ConfigManager):
|
|
|
99
113
|
log.warning(f"No chunks found for {vector_name}")
|
|
100
114
|
return None
|
|
101
115
|
|
|
102
|
-
async def async_get_chunks(question, vector_name, num_chunks):
|
|
103
|
-
de = DiscoveryEngineClient(vector_name, project_id=
|
|
116
|
+
async def async_get_chunks(question, vector_name, num_chunks, filter_str, project_id=None):
|
|
117
|
+
de = DiscoveryEngineClient(vector_name, project_id=project_id)
|
|
104
118
|
try:
|
|
105
|
-
return await de.
|
|
119
|
+
return await de.search_with_filters(query=question,
|
|
120
|
+
filter_str=filter_str,
|
|
121
|
+
num_previous_chunks=num_chunks,
|
|
122
|
+
num_next_chunks=num_chunks)
|
|
106
123
|
except Exception as err:
|
|
107
124
|
log.error(f"No discovery engine chunks found: {str(err)} {traceback.format_exc()}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.119.
|
|
3
|
+
Version: 0.119.11
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Author-email: Holosun ApS <multivac@sunholo.com>
|
|
6
6
|
License: Apache License, Version 2.0
|
|
@@ -95,6 +95,7 @@ Requires-Dist: langchain; extra == "langchain"
|
|
|
95
95
|
Requires-Dist: langchain_experimental; extra == "langchain"
|
|
96
96
|
Requires-Dist: langchain-community; extra == "langchain"
|
|
97
97
|
Requires-Dist: langsmith; extra == "langchain"
|
|
98
|
+
Requires-Dist: langchain-unstructured; extra == "langchain"
|
|
98
99
|
Provides-Extra: azure
|
|
99
100
|
Requires-Dist: azure-identity; extra == "azure"
|
|
100
101
|
Requires-Dist: azure-storage-blob; extra == "azure"
|
|
@@ -35,7 +35,7 @@ sunholo/chunker/azure.py,sha256=MVF9_-QdKUoJqlpEJ49pv2sdjMDxEiMNxzmO7w5nWDQ,3270
|
|
|
35
35
|
sunholo/chunker/doc_handling.py,sha256=t_lDazHfJbs4Q2Ruq2MvBBeJRfsjjQkzMxKuX8qQKBI,9087
|
|
36
36
|
sunholo/chunker/encode_metadata.py,sha256=hxxd9KU35Xi0Z_EL8kt_oD66pKfBLhEjBImC16ew-Eo,1919
|
|
37
37
|
sunholo/chunker/images.py,sha256=id2PBu6XyGEOtgafq2v0c9_O6kxaC_pYFMnbsIitkSg,1868
|
|
38
|
-
sunholo/chunker/loaders.py,sha256=
|
|
38
|
+
sunholo/chunker/loaders.py,sha256=KEFPHBr32DPJnRUNjOecLiZKsFD-Gk3BFIKyXSVHgbY,11143
|
|
39
39
|
sunholo/chunker/message_data.py,sha256=bpb8QWQttqazm5lr7fTFJ5JDwf-P0SQ5cOIf6NikNyI,10836
|
|
40
40
|
sunholo/chunker/pdfs.py,sha256=xwbuMJrbypcyPXfZ8tiUidWeMr80C2NhfTC1mwa8SHY,2477
|
|
41
41
|
sunholo/chunker/process_chunker_data.py,sha256=xjOAf1FvHDwQaBm7kgDzLQUnwm6AW8qf4fTrwDnwmtc,3613
|
|
@@ -76,7 +76,7 @@ sunholo/discovery_engine/chunker_handler.py,sha256=CYsyZLz_VZ0S_TcVlYmPxYz3JiAiP
|
|
|
76
76
|
sunholo/discovery_engine/cli.py,sha256=KGVle5rkLL49oF9TQhrGI--8017IvvLOEoYur545Qb0,12790
|
|
77
77
|
sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
|
|
78
78
|
sunholo/discovery_engine/discovery_engine_client.py,sha256=0h3h_Vy8vCcvfSBhGl8qSgVzDakOmsaPPFqHIGh3mv8,33053
|
|
79
|
-
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=
|
|
79
|
+
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=IDb9cm4X5XqQbUytZVQrBsm_oiSQxe3KjBAxaSZ3JyQ,5548
|
|
80
80
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
|
81
81
|
sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
|
|
82
82
|
sunholo/embedder/embed_metadata.py,sha256=2ziUIdVwnbCUU8gOwQWEvkrRcyp-7IeyZfSsWNkMquA,866
|
|
@@ -166,9 +166,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
|
166
166
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
|
167
167
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
|
168
168
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
|
169
|
-
sunholo-0.119.
|
|
170
|
-
sunholo-0.119.
|
|
171
|
-
sunholo-0.119.
|
|
172
|
-
sunholo-0.119.
|
|
173
|
-
sunholo-0.119.
|
|
174
|
-
sunholo-0.119.
|
|
169
|
+
sunholo-0.119.11.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
170
|
+
sunholo-0.119.11.dist-info/METADATA,sha256=gj6We8d7T27etGlhriYOvyJmNLF-bX0hbHui6sc_zmo,9715
|
|
171
|
+
sunholo-0.119.11.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
|
|
172
|
+
sunholo-0.119.11.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
173
|
+
sunholo-0.119.11.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
174
|
+
sunholo-0.119.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|