sunholo 0.79.4__py3-none-any.whl → 0.79.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -113,8 +113,9 @@ def choose_splitter(extension: str, chunk_size: int=1024, chunk_overlap:int=200,
113
113
 
114
114
  if vector_name:
115
115
  # check if there is a chunking configuration
116
- from ..utils import load_config_key
117
- chunk_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
116
+ from ..utils import ConfigManager
117
+ config = ConfigManager(vector_name)
118
+ chunk_config = config.vacConfig("chunker")
118
119
  if chunk_config:
119
120
  if chunk_config.get("type") == "semantic":
120
121
  embedding_str = chunk_config.get("llm")
@@ -124,7 +125,7 @@ def choose_splitter(extension: str, chunk_size: int=1024, chunk_overlap:int=200,
124
125
  log.info(f"Semantic chunking for {vector_name}")
125
126
  from langchain_experimental.text_splitter import SemanticChunker
126
127
  from ..components import pick_embedding
127
- embeddings = pick_embedding(embedding_str)
128
+ embeddings = pick_embedding(embedding_str, config=config)
128
129
  semantic_splitter = SemanticChunker(
129
130
  embeddings, breakpoint_threshold_type="percentile"
130
131
  )
@@ -43,6 +43,8 @@ class LlamaIndexVertexCorpusManager:
43
43
  self.config = config
44
44
  self.project_id = project_id
45
45
  self.location = location
46
+ self.corpus_display_name = ""
47
+ self.corpus = ""
46
48
 
47
49
  if config:
48
50
  self.project_id = self.config.vacConfig('project_id') or project_id
@@ -104,6 +106,88 @@ class LlamaIndexVertexCorpusManager:
104
106
 
105
107
  return response
106
108
 
109
+ def list_files(self, corpus_display_name:str):
110
+ corpus = self.find_corpus_from_list(corpus_display_name)
111
+ files = rag.list_files(corpus_name=corpus.name)
112
+
113
+ log.info(f"--Files in {corpus.name}:\n{files}")
114
+
115
+ return files
116
+
117
+ def find_file_from_list(self, display_name: str, corpus_display_name:str):
118
+ """
119
+ Finds a file from the list of files by its display name.
120
+
121
+ Args:
122
+ display_name (str): The display name of the file.
123
+ corpus_display_name (str): The display name of the corpus to look within
124
+
125
+ Returns:
126
+ The found file object if it exists, otherwise None.
127
+ """
128
+ files = self.list_files(corpus_display_name)
129
+ for file in files:
130
+ if display_name == file.display_name:
131
+ log.info(f"Found existing file with display name: {display_name}")
132
+
133
+ return file
134
+
135
+ return None
136
+
137
+ def get_file(self, file_display_name:str=None, file_name:str=None, corpus_display_name:str=None):
138
+
139
+ if file_display_name:
140
+ rag_file = self.find_file_from_list(file_display_name, corpus_display_name)
141
+ log.info(f"Found {rag_file} via display name: {file_display_name}")
142
+
143
+ return rag_file
144
+
145
+ if not file_name:
146
+ raise ValueError("Need to supply one of file_display_name or file_name")
147
+
148
+ corpus = self.find_corpus_from_list(corpus_display_name)
149
+ if file_name.startswith("projects/"):
150
+ rag_file = rag.get_file(name=file_name)
151
+ else:
152
+ if not corpus_display_name:
153
+ raise ValueError("Must supply corpus_display_name if not a full file_name")
154
+ rag_file = rag.get_file(name=file_name, corpus_name=corpus.name)
155
+
156
+ log.info(f"Found {rag_file}")
157
+
158
+ return rag_file
159
+
160
+ def delete_file(self, file_name, corpus_display_name:str):
161
+
162
+ corpus = self.find_corpus_from_list(corpus_display_name)
163
+ if file_name.startswith("projects/"):
164
+ rag.delete_file(name=file_name)
165
+ else:
166
+ if not corpus_display_name:
167
+ raise ValueError("Must supply corpus_display_name if not a full file_name")
168
+ rag.delete_file(name=file_name, corpus_name=corpus.name)
169
+
170
+ log.info(f"File {file_name} deleted.")
171
+
172
+ return True
173
+
174
+ def query_corpus(self, query:str, corpus_disply_name:str):
175
+ corpus = self.find_corpus_from_list(corpus_disply_name)
176
+ response = rag.retrieval_query(
177
+ rag_resources=[
178
+ rag.RagResource(
179
+ rag_corpus=corpus.name,
180
+ # Supply IDs from `rag.list_files()`.
181
+ # rag_file_ids=["rag-file-1", "rag-file-2", ...],
182
+ )
183
+ ],
184
+ text=query,
185
+ similarity_top_k=10, # Optional
186
+ vector_distance_threshold=0.5, # Optional
187
+ )
188
+
189
+ return response
190
+
107
191
  def list_corpora(self):
108
192
  """
109
193
  List all VertexAI Corpus for the project/location
@@ -120,10 +204,13 @@ class LlamaIndexVertexCorpusManager:
120
204
  Returns:
121
205
  The found corpus object if it exists, otherwise None.
122
206
  """
207
+ if display_name == self.corpus_display_name:
208
+ return self.corpus
123
209
  corpora = self.list_corpora()
124
210
  for corp in corpora:
125
211
  if display_name == corp.display_name:
126
212
  log.info(f"Found existing corpus with display name: {display_name}")
213
+ self.corpus = corp
127
214
  return corp
128
215
  return None
129
216
 
@@ -211,6 +298,29 @@ def llamaindex_command(args):
211
298
  manager.upload_file(filename=args.filename, corpus_display_name=args.display_name, description=args.description)
212
299
  elif args.action == "upload_text":
213
300
  manager.upload_text(text=args.text, corpus_display_name=args.display_name, description=args.description)
301
+ elif args.action == "list_files":
302
+ files = manager.list_files(corpus_display_name=args.display_name)
303
+ if files:
304
+ console.print(files)
305
+ else:
306
+ console.print("No files found for {args.display_name}")
307
+ elif args.action == "get_file":
308
+ file = manager.get_file(file_display_name=args.file_name, corpus_display_name=args.display_name)
309
+ console.print(file)
310
+ return file
311
+ elif args.action == "delete_file":
312
+ deleted = manager.delete_file(args.file_name, corpus_display_name=args.display_name)
313
+ if deleted:
314
+ console.print(f"Deleted {args.file_name}")
315
+ else:
316
+ console.print(f"ERROR: Could not delete {args.file_name}")
317
+
318
+ elif args.action == "query":
319
+ answer = manager.query_corpus(args.query, corpus_disply_name=args.display_name)
320
+ if answer:
321
+ console.print(answer)
322
+ else:
323
+ console.print(f"No answer found for {args.query} in {args.display_name}")
214
324
  else:
215
325
  console.print(f"Unknown action: {args.action}")
216
326
 
@@ -270,5 +380,27 @@ def setup_llamaindex_subparser(subparsers):
270
380
  upload_text_parser.add_argument('text', help='The text content to upload')
271
381
  upload_text_parser.add_argument('--description', help='Description of the text upload', default=None)
272
382
 
383
+ # LlamaIndex list_files command
384
+ list_files_parser = llamaindex_subparsers.add_parser('list_files', help='List all files in a corpus')
385
+ list_files_parser.add_argument('display_name', help='The name of the corpus')
386
+ list_files_parser.add_argument('vac', nargs='?', default="global", help='The VAC config to set it up for')
387
+
388
+ # LlamaIndex get_file command
389
+ get_file_parser = llamaindex_subparsers.add_parser('get_file', help='Get a file from a corpus')
390
+ get_file_parser.add_argument('display_name', help='The name of the corpus')
391
+ get_file_parser.add_argument('file_name', help='The name of the file to get')
392
+ get_file_parser.add_argument('vac', nargs='?', default="global", help='The VAC config to set it up for')
393
+
394
+ # LlamaIndex delete_file command
395
+ delete_file_parser = llamaindex_subparsers.add_parser('delete_file', help='Delete a file from a corpus')
396
+ delete_file_parser.add_argument('display_name', help='The name of the corpus')
397
+ delete_file_parser.add_argument('file_name', help='The name of the file to delete')
398
+ delete_file_parser.add_argument('vac', nargs='?', default="global", help='The VAC config to set it up for')
399
+
400
+ # LlamaIndex query command
401
+ query_parser = llamaindex_subparsers.add_parser('query', help='Query a corpus')
402
+ query_parser.add_argument('display_name', help='The name of the corpus')
403
+ query_parser.add_argument('query', help='The query string')
404
+ query_parser.add_argument('vac', nargs='?', default="global", help='The VAC config to set it up for')
405
+
273
406
  llamaindex_parser.set_defaults(func=llamaindex_command)
274
-
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.79.4
3
+ Version: 0.79.6
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.79.4.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.79.6.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -40,7 +40,7 @@ sunholo/chunker/pdfs.py,sha256=njDPop751GMHi3cOwIKd2Yct-_lWR2gqcB7WykfHphs,2480
40
40
  sunholo/chunker/process_chunker_data.py,sha256=OnMvXHRv3rGpFsU50FyUNkNIwC1D8TkhaWWbn72yQss,3523
41
41
  sunholo/chunker/publish.py,sha256=AX5u-fcyDytED67IfizMzvOMcYPXEo6XBJvyk_7maK8,2939
42
42
  sunholo/chunker/pubsub.py,sha256=48bhuAcszN7LGe3-ksPSLHHhq0uKxiXOrizck5qpcP0,1012
43
- sunholo/chunker/splitter.py,sha256=liruCE6fqzPdQ8dwvoD5EjMgWe0PW5OmO-hKHmiG9Nk,6736
43
+ sunholo/chunker/splitter.py,sha256=QLAEsJOpEYFZr9-UGZUuAlNVyjfCWb8jvzCHg0rVShE,6751
44
44
  sunholo/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  sunholo/cli/chat_vac.py,sha256=UNLzZYAejjEvMR0EjpxIHXyvUpBDoqThohzzFL_m3Yg,23026
46
46
  sunholo/cli/cli.py,sha256=yuY7SLFiYDUKqJDOXy7jL1l6P0UVPMuAZK9bXXTG8ck,3939
@@ -90,7 +90,7 @@ sunholo/langfuse/prompts.py,sha256=27BsVfihM6-h1jscbkGSO4HsATl-d4ZN6tcNCVztWoY,1
90
90
  sunholo/llamaindex/__init__.py,sha256=DlY_cHWCsVEV1C5WBgDdHRgOMlJc8pDoCRukUJ8PT9w,88
91
91
  sunholo/llamaindex/get_files.py,sha256=6rhXCDqQ_lrIapISQ_OYQDjiSATXvS_9m3qq53-oIl0,781
92
92
  sunholo/llamaindex/import_files.py,sha256=Bnic5wz8c61af9Kwq8KSrNBbc4imYnzMtBCb2jzSImI,6224
93
- sunholo/llamaindex/llamaindex_class.py,sha256=45N_xU0gDdgUPNv5m36DopRmG9JHFErwmgssKZ3wOqA,11058
93
+ sunholo/llamaindex/llamaindex_class.py,sha256=USb8zmF0DuEdaDjplJHvAItL4Ud2eE0iZ5ITwJ5Wflo,16763
94
94
  sunholo/lookup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
95
  sunholo/lookup/model_lookup.yaml,sha256=O7o-jP53MLA06C8pI-ILwERShO-xf6z_258wtpZBv6A,739
96
96
  sunholo/patches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -132,9 +132,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
132
132
  sunholo/vertex/memory_tools.py,sha256=pgSahVDh7GPEulu3nl-w0jb5lTClb4TCnVxPnMokNZY,7533
133
133
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
134
134
  sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
135
- sunholo-0.79.4.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
136
- sunholo-0.79.4.dist-info/METADATA,sha256=-mV_Y5Er002Y0OS4lEz913p-XMfqTuifDtkRMlExgA4,7348
137
- sunholo-0.79.4.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
138
- sunholo-0.79.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
139
- sunholo-0.79.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
140
- sunholo-0.79.4.dist-info/RECORD,,
135
+ sunholo-0.79.6.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
136
+ sunholo-0.79.6.dist-info/METADATA,sha256=ykRVzCPMucoQbLQqvS9EydN33TnwAp5M4xfmK8rUPzc,7348
137
+ sunholo-0.79.6.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
138
+ sunholo-0.79.6.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
139
+ sunholo-0.79.6.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
140
+ sunholo-0.79.6.dist-info/RECORD,,