sunholo 0.119.9__py3-none-any.whl → 0.119.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -160,9 +160,8 @@ def read_gdrive_to_document(url: str, metadata: dict = None):
160
160
  return docs
161
161
 
162
162
  def read_url_to_document(url: str, metadata: dict = None):
163
+ from langchain_unstructured import UnstructuredLoader
163
164
 
164
- if not UnstructuredLoader:
165
- raise ImportError("UnstructuredLoader requires 'langchain_unstructured' to be installed")
166
165
  unstructured_kwargs = {"skip_infer_table_types": [],
167
166
  "extract_image_block_types": ["Image", "Table"]
168
167
  }
@@ -179,7 +178,7 @@ def read_url_to_document(url: str, metadata: dict = None):
179
178
  return docs
180
179
 
181
180
  def read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None):
182
-
181
+ from langchain_unstructured import UnstructuredLoader
183
182
  docs = []
184
183
  pdf_path = str(pathlib.Path(gs_file))
185
184
 
@@ -276,7 +275,7 @@ def read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None):
276
275
  return docs
277
276
 
278
277
  def convert_to_txt_and_extract(gs_file, split=False):
279
-
278
+ from langchain_unstructured import UnstructuredLoader
280
279
  if not UnstructuredLoader:
281
280
  raise ImportError("UnstructuredLoader requires 'langchain_unstructured' to be installed")
282
281
 
@@ -5,12 +5,13 @@ from .discovery_engine_client import DiscoveryEngineClient
5
5
  from ..components import load_memories
6
6
  import traceback
7
7
 
8
- def get_all_chunks(question:str, config:ConfigManager):
8
+ def get_all_chunks(question:str, config:ConfigManager, filter_str=None):
9
9
  """
10
10
  Look through a config memory key and find all Vertex AI Search retrievers, call them and return a joined string of chunks
11
11
 
12
12
  args: question - question to search similarity for
13
13
  config: A ConfigManager object
14
+ filter_str: A filter that will restrict ai search via its metadata. See https://cloud.google.com/generative-ai-app-builder/docs/filter-search-metadata
14
15
 
15
16
  returns: a big string of chunks
16
17
  """
@@ -39,7 +40,7 @@ def get_all_chunks(question:str, config:ConfigManager):
39
40
  project_id = gcp_config.get('project_id')
40
41
  serving_config = value.get('serving_config')
41
42
 
42
- chunk = get_chunks(question, vector_name, num_chunks, project_id=project_id, serving_config=serving_config)
43
+ chunk = get_chunks(question, vector_name, num_chunks, filter_str=filter_str, project_id=project_id, serving_config=serving_config)
43
44
  if chunk:
44
45
  chunks.append(chunk)
45
46
  if chunks:
@@ -48,23 +49,30 @@ def get_all_chunks(question:str, config:ConfigManager):
48
49
  log.warning(f"No chunks found for {vector_name}")
49
50
  return None
50
51
 
51
- def get_chunks(question, vector_name, num_chunks, project_id=None, serving_config=None):
52
+ def get_chunks(question, vector_name, num_chunks, filter_str=None, project_id=None, serving_config=None):
52
53
  if serving_config is None:
53
54
  serving_config = "default_serving_config"
54
55
  de = DiscoveryEngineClient(vector_name, project_id=project_id or get_gcp_project(include_config=True))
55
56
  try:
56
- return de.get_chunks(question, num_previous_chunks=num_chunks, num_next_chunks=num_chunks, serving_config=serving_config)
57
+ if filter_str:
58
+ return de.search_with_filters(query=question,
59
+ filter_str=filter_str,
60
+ num_previous_chunks=num_chunks,
61
+ num_next_chunks=num_chunks)
62
+ else:
63
+ return de.get_chunks(question, num_previous_chunks=num_chunks, num_next_chunks=num_chunks, serving_config=serving_config)
57
64
  except Exception as err:
58
- log.error(f"No discovery engine chunks found: {str(err)}")
65
+ log.error(f"No discovery engine chunks found: {str(err)} {traceback.format_exc()}")
59
66
 
60
67
 
61
68
 
62
- async def async_get_all_chunks(question:str, config:ConfigManager):
69
+ async def async_get_all_chunks(question:str, config:ConfigManager, filter_str=None):
63
70
  """
64
71
  Look through a config memory key and find all Vertex AI Search retrievers, call them and return a joined string of chunks
65
72
 
66
73
  args: question - question to search similarity for
67
74
  config: A ConfigManager object
75
+ filter_str: A filter that will restrict ai search via its metadata. See https://cloud.google.com/generative-ai-app-builder/docs/filter-search-metadata
68
76
 
69
77
  returns: a big string of chunks
70
78
  """
@@ -87,10 +95,16 @@ async def async_get_all_chunks(question:str, config:ConfigManager):
87
95
  continue
88
96
  else:
89
97
  vector_name = new_vector_name
90
-
98
+
99
+ project_id = value.get('project_id') or get_gcp_project(include_config=True)
91
100
  num_chunks = value.get('num_chunks') or 3
92
101
 
93
- chunk = await async_get_chunks(question, vector_name, num_chunks)
102
+ chunk = await async_get_chunks(question,
103
+ vector_name=vector_name,
104
+ num_chunks=num_chunks,
105
+ filter_str=filter_str,
106
+ project_id=project_id)
107
+
94
108
  if chunk:
95
109
  chunks.append(chunk)
96
110
  if chunks:
@@ -99,9 +113,12 @@ async def async_get_all_chunks(question:str, config:ConfigManager):
99
113
  log.warning(f"No chunks found for {vector_name}")
100
114
  return None
101
115
 
102
- async def async_get_chunks(question, vector_name, num_chunks):
103
- de = DiscoveryEngineClient(vector_name, project_id=get_gcp_project(include_config=True))
116
+ async def async_get_chunks(question, vector_name, num_chunks, filter_str, project_id=None):
117
+ de = DiscoveryEngineClient(vector_name, project_id=project_id)
104
118
  try:
105
- return await de.async_get_chunks(question, num_previous_chunks=num_chunks, num_next_chunks=num_chunks)
119
+ return await de.search_with_filters(query=question,
120
+ filter_str=filter_str,
121
+ num_previous_chunks=num_chunks,
122
+ num_next_chunks=num_chunks)
106
123
  except Exception as err:
107
124
  log.error(f"No discovery engine chunks found: {str(err)} {traceback.format_exc()}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: sunholo
3
- Version: 0.119.9
3
+ Version: 0.119.11
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Author-email: Holosun ApS <multivac@sunholo.com>
6
6
  License: Apache License, Version 2.0
@@ -95,6 +95,7 @@ Requires-Dist: langchain; extra == "langchain"
95
95
  Requires-Dist: langchain_experimental; extra == "langchain"
96
96
  Requires-Dist: langchain-community; extra == "langchain"
97
97
  Requires-Dist: langsmith; extra == "langchain"
98
+ Requires-Dist: langchain-unstructured; extra == "langchain"
98
99
  Provides-Extra: azure
99
100
  Requires-Dist: azure-identity; extra == "azure"
100
101
  Requires-Dist: azure-storage-blob; extra == "azure"
@@ -35,7 +35,7 @@ sunholo/chunker/azure.py,sha256=MVF9_-QdKUoJqlpEJ49pv2sdjMDxEiMNxzmO7w5nWDQ,3270
35
35
  sunholo/chunker/doc_handling.py,sha256=t_lDazHfJbs4Q2Ruq2MvBBeJRfsjjQkzMxKuX8qQKBI,9087
36
36
  sunholo/chunker/encode_metadata.py,sha256=hxxd9KU35Xi0Z_EL8kt_oD66pKfBLhEjBImC16ew-Eo,1919
37
37
  sunholo/chunker/images.py,sha256=id2PBu6XyGEOtgafq2v0c9_O6kxaC_pYFMnbsIitkSg,1868
38
- sunholo/chunker/loaders.py,sha256=5NXrMxV-WdbFpxeLhFzccw0_zhf1UQ7yKFFeaMkc9Bc,11105
38
+ sunholo/chunker/loaders.py,sha256=KEFPHBr32DPJnRUNjOecLiZKsFD-Gk3BFIKyXSVHgbY,11143
39
39
  sunholo/chunker/message_data.py,sha256=bpb8QWQttqazm5lr7fTFJ5JDwf-P0SQ5cOIf6NikNyI,10836
40
40
  sunholo/chunker/pdfs.py,sha256=xwbuMJrbypcyPXfZ8tiUidWeMr80C2NhfTC1mwa8SHY,2477
41
41
  sunholo/chunker/process_chunker_data.py,sha256=xjOAf1FvHDwQaBm7kgDzLQUnwm6AW8qf4fTrwDnwmtc,3613
@@ -76,7 +76,7 @@ sunholo/discovery_engine/chunker_handler.py,sha256=CYsyZLz_VZ0S_TcVlYmPxYz3JiAiP
76
76
  sunholo/discovery_engine/cli.py,sha256=KGVle5rkLL49oF9TQhrGI--8017IvvLOEoYur545Qb0,12790
77
77
  sunholo/discovery_engine/create_new.py,sha256=WUi4_xh_dFaGX3xA9jkNKZhaR6LCELjMPeRb0hyj4FU,1226
78
78
  sunholo/discovery_engine/discovery_engine_client.py,sha256=0h3h_Vy8vCcvfSBhGl8qSgVzDakOmsaPPFqHIGh3mv8,33053
79
- sunholo/discovery_engine/get_ai_search_chunks.py,sha256=hsFGOQugSeTMPEaQ16XTs_D45F8NABBm2IsAEdTk7kQ,4316
79
+ sunholo/discovery_engine/get_ai_search_chunks.py,sha256=IDb9cm4X5XqQbUytZVQrBsm_oiSQxe3KjBAxaSZ3JyQ,5548
80
80
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
81
81
  sunholo/embedder/embed_chunk.py,sha256=did2pKkWM2o0KkRcb0H9l2x_WjCq6OyuHDxGbITFKPM,6530
82
82
  sunholo/embedder/embed_metadata.py,sha256=2ziUIdVwnbCUU8gOwQWEvkrRcyp-7IeyZfSsWNkMquA,866
@@ -166,9 +166,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
166
166
  sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
167
167
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
168
168
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
169
- sunholo-0.119.9.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
170
- sunholo-0.119.9.dist-info/METADATA,sha256=XDQ0g73OJrI5qnlk8SuhbYxaUPF7yuAZQyMUUqkZ0_4,9654
171
- sunholo-0.119.9.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
172
- sunholo-0.119.9.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
173
- sunholo-0.119.9.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
174
- sunholo-0.119.9.dist-info/RECORD,,
169
+ sunholo-0.119.11.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
170
+ sunholo-0.119.11.dist-info/METADATA,sha256=gj6We8d7T27etGlhriYOvyJmNLF-bX0hbHui6sc_zmo,9715
171
+ sunholo-0.119.11.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
172
+ sunholo-0.119.11.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
173
+ sunholo-0.119.11.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
174
+ sunholo-0.119.11.dist-info/RECORD,,