sunholo 0.79.4__py3-none-any.whl → 0.79.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/chunker/splitter.py +4 -3
- sunholo/llamaindex/llamaindex_class.py +133 -1
- {sunholo-0.79.4.dist-info → sunholo-0.79.6.dist-info}/METADATA +2 -2
- {sunholo-0.79.4.dist-info → sunholo-0.79.6.dist-info}/RECORD +8 -8
- {sunholo-0.79.4.dist-info → sunholo-0.79.6.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.79.4.dist-info → sunholo-0.79.6.dist-info}/WHEEL +0 -0
- {sunholo-0.79.4.dist-info → sunholo-0.79.6.dist-info}/entry_points.txt +0 -0
- {sunholo-0.79.4.dist-info → sunholo-0.79.6.dist-info}/top_level.txt +0 -0
sunholo/chunker/splitter.py
CHANGED
|
@@ -113,8 +113,9 @@ def choose_splitter(extension: str, chunk_size: int=1024, chunk_overlap:int=200,
|
|
|
113
113
|
|
|
114
114
|
if vector_name:
|
|
115
115
|
# check if there is a chunking configuration
|
|
116
|
-
from ..utils import
|
|
117
|
-
|
|
116
|
+
from ..utils import ConfigManager
|
|
117
|
+
config = ConfigManager(vector_name)
|
|
118
|
+
chunk_config = config.vacConfig("chunker")
|
|
118
119
|
if chunk_config:
|
|
119
120
|
if chunk_config.get("type") == "semantic":
|
|
120
121
|
embedding_str = chunk_config.get("llm")
|
|
@@ -124,7 +125,7 @@ def choose_splitter(extension: str, chunk_size: int=1024, chunk_overlap:int=200,
|
|
|
124
125
|
log.info(f"Semantic chunking for {vector_name}")
|
|
125
126
|
from langchain_experimental.text_splitter import SemanticChunker
|
|
126
127
|
from ..components import pick_embedding
|
|
127
|
-
embeddings = pick_embedding(embedding_str)
|
|
128
|
+
embeddings = pick_embedding(embedding_str, config=config)
|
|
128
129
|
semantic_splitter = SemanticChunker(
|
|
129
130
|
embeddings, breakpoint_threshold_type="percentile"
|
|
130
131
|
)
|
|
@@ -43,6 +43,8 @@ class LlamaIndexVertexCorpusManager:
|
|
|
43
43
|
self.config = config
|
|
44
44
|
self.project_id = project_id
|
|
45
45
|
self.location = location
|
|
46
|
+
self.corpus_display_name = ""
|
|
47
|
+
self.corpus = ""
|
|
46
48
|
|
|
47
49
|
if config:
|
|
48
50
|
self.project_id = self.config.vacConfig('project_id') or project_id
|
|
@@ -104,6 +106,88 @@ class LlamaIndexVertexCorpusManager:
|
|
|
104
106
|
|
|
105
107
|
return response
|
|
106
108
|
|
|
109
|
+
def list_files(self, corpus_display_name:str):
|
|
110
|
+
corpus = self.find_corpus_from_list(corpus_display_name)
|
|
111
|
+
files = rag.list_files(corpus_name=corpus.name)
|
|
112
|
+
|
|
113
|
+
log.info(f"--Files in {corpus.name}:\n{files}")
|
|
114
|
+
|
|
115
|
+
return files
|
|
116
|
+
|
|
117
|
+
def find_file_from_list(self, display_name: str, corpus_display_name:str):
|
|
118
|
+
"""
|
|
119
|
+
Finds a file from the list of files by its display name.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
display_name (str): The display name of the file.
|
|
123
|
+
corpus_display_name (str): The display name of the corpus to look within
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
The found file object if it exists, otherwise None.
|
|
127
|
+
"""
|
|
128
|
+
files = self.list_files(corpus_display_name)
|
|
129
|
+
for file in files:
|
|
130
|
+
if display_name == file.display_name:
|
|
131
|
+
log.info(f"Found existing file with display name: {display_name}")
|
|
132
|
+
|
|
133
|
+
return file
|
|
134
|
+
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
def get_file(self, file_display_name:str=None, file_name:str=None, corpus_display_name:str=None):
|
|
138
|
+
|
|
139
|
+
if file_display_name:
|
|
140
|
+
rag_file = self.find_file_from_list(file_display_name, corpus_display_name)
|
|
141
|
+
log.info(f"Found {rag_file} via display name: {file_display_name}")
|
|
142
|
+
|
|
143
|
+
return rag_file
|
|
144
|
+
|
|
145
|
+
if not file_name:
|
|
146
|
+
raise ValueError("Need to supply one of file_display_name or file_name")
|
|
147
|
+
|
|
148
|
+
corpus = self.find_corpus_from_list(corpus_display_name)
|
|
149
|
+
if file_name.startswith("projects/"):
|
|
150
|
+
rag_file = rag.get_file(name=file_name)
|
|
151
|
+
else:
|
|
152
|
+
if not corpus_display_name:
|
|
153
|
+
raise ValueError("Must supply corpus_display_name if not a full file_name")
|
|
154
|
+
rag_file = rag.get_file(name=file_name, corpus_name=corpus.name)
|
|
155
|
+
|
|
156
|
+
log.info(f"Found {rag_file}")
|
|
157
|
+
|
|
158
|
+
return rag_file
|
|
159
|
+
|
|
160
|
+
def delete_file(self, file_name, corpus_display_name:str):
|
|
161
|
+
|
|
162
|
+
corpus = self.find_corpus_from_list(corpus_display_name)
|
|
163
|
+
if file_name.startswith("projects/"):
|
|
164
|
+
rag.delete_file(name=file_name)
|
|
165
|
+
else:
|
|
166
|
+
if not corpus_display_name:
|
|
167
|
+
raise ValueError("Must supply corpus_display_name if not a full file_name")
|
|
168
|
+
rag.delete_file(name=file_name, corpus_name=corpus.name)
|
|
169
|
+
|
|
170
|
+
log.info(f"File {file_name} deleted.")
|
|
171
|
+
|
|
172
|
+
return True
|
|
173
|
+
|
|
174
|
+
def query_corpus(self, query:str, corpus_disply_name:str):
|
|
175
|
+
corpus = self.find_corpus_from_list(corpus_disply_name)
|
|
176
|
+
response = rag.retrieval_query(
|
|
177
|
+
rag_resources=[
|
|
178
|
+
rag.RagResource(
|
|
179
|
+
rag_corpus=corpus.name,
|
|
180
|
+
# Supply IDs from `rag.list_files()`.
|
|
181
|
+
# rag_file_ids=["rag-file-1", "rag-file-2", ...],
|
|
182
|
+
)
|
|
183
|
+
],
|
|
184
|
+
text=query,
|
|
185
|
+
similarity_top_k=10, # Optional
|
|
186
|
+
vector_distance_threshold=0.5, # Optional
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
return response
|
|
190
|
+
|
|
107
191
|
def list_corpora(self):
|
|
108
192
|
"""
|
|
109
193
|
List all VertexAI Corpus for the project/location
|
|
@@ -120,10 +204,13 @@ class LlamaIndexVertexCorpusManager:
|
|
|
120
204
|
Returns:
|
|
121
205
|
The found corpus object if it exists, otherwise None.
|
|
122
206
|
"""
|
|
207
|
+
if display_name == self.corpus_display_name:
|
|
208
|
+
return self.corpus
|
|
123
209
|
corpora = self.list_corpora()
|
|
124
210
|
for corp in corpora:
|
|
125
211
|
if display_name == corp.display_name:
|
|
126
212
|
log.info(f"Found existing corpus with display name: {display_name}")
|
|
213
|
+
self.corpus = corp
|
|
127
214
|
return corp
|
|
128
215
|
return None
|
|
129
216
|
|
|
@@ -211,6 +298,29 @@ def llamaindex_command(args):
|
|
|
211
298
|
manager.upload_file(filename=args.filename, corpus_display_name=args.display_name, description=args.description)
|
|
212
299
|
elif args.action == "upload_text":
|
|
213
300
|
manager.upload_text(text=args.text, corpus_display_name=args.display_name, description=args.description)
|
|
301
|
+
elif args.action == "list_files":
|
|
302
|
+
files = manager.list_files(corpus_display_name=args.display_name)
|
|
303
|
+
if files:
|
|
304
|
+
console.print(files)
|
|
305
|
+
else:
|
|
306
|
+
console.print("No files found for {args.display_name}")
|
|
307
|
+
elif args.action == "get_file":
|
|
308
|
+
file = manager.get_file(file_display_name=args.file_name, corpus_display_name=args.display_name)
|
|
309
|
+
console.print(file)
|
|
310
|
+
return file
|
|
311
|
+
elif args.action == "delete_file":
|
|
312
|
+
deleted = manager.delete_file(args.file_name, corpus_display_name=args.display_name)
|
|
313
|
+
if deleted:
|
|
314
|
+
console.print(f"Deleted {args.file_name}")
|
|
315
|
+
else:
|
|
316
|
+
console.print(f"ERROR: Could not delete {args.file_name}")
|
|
317
|
+
|
|
318
|
+
elif args.action == "query":
|
|
319
|
+
answer = manager.query_corpus(args.query, corpus_disply_name=args.display_name)
|
|
320
|
+
if answer:
|
|
321
|
+
console.print(answer)
|
|
322
|
+
else:
|
|
323
|
+
console.print(f"No answer found for {args.query} in {args.display_name}")
|
|
214
324
|
else:
|
|
215
325
|
console.print(f"Unknown action: {args.action}")
|
|
216
326
|
|
|
@@ -270,5 +380,27 @@ def setup_llamaindex_subparser(subparsers):
|
|
|
270
380
|
upload_text_parser.add_argument('text', help='The text content to upload')
|
|
271
381
|
upload_text_parser.add_argument('--description', help='Description of the text upload', default=None)
|
|
272
382
|
|
|
383
|
+
# LlamaIndex list_files command
|
|
384
|
+
list_files_parser = llamaindex_subparsers.add_parser('list_files', help='List all files in a corpus')
|
|
385
|
+
list_files_parser.add_argument('display_name', help='The name of the corpus')
|
|
386
|
+
list_files_parser.add_argument('vac', nargs='?', default="global", help='The VAC config to set it up for')
|
|
387
|
+
|
|
388
|
+
# LlamaIndex get_file command
|
|
389
|
+
get_file_parser = llamaindex_subparsers.add_parser('get_file', help='Get a file from a corpus')
|
|
390
|
+
get_file_parser.add_argument('display_name', help='The name of the corpus')
|
|
391
|
+
get_file_parser.add_argument('file_name', help='The name of the file to get')
|
|
392
|
+
get_file_parser.add_argument('vac', nargs='?', default="global", help='The VAC config to set it up for')
|
|
393
|
+
|
|
394
|
+
# LlamaIndex delete_file command
|
|
395
|
+
delete_file_parser = llamaindex_subparsers.add_parser('delete_file', help='Delete a file from a corpus')
|
|
396
|
+
delete_file_parser.add_argument('display_name', help='The name of the corpus')
|
|
397
|
+
delete_file_parser.add_argument('file_name', help='The name of the file to delete')
|
|
398
|
+
delete_file_parser.add_argument('vac', nargs='?', default="global", help='The VAC config to set it up for')
|
|
399
|
+
|
|
400
|
+
# LlamaIndex query command
|
|
401
|
+
query_parser = llamaindex_subparsers.add_parser('query', help='Query a corpus')
|
|
402
|
+
query_parser.add_argument('display_name', help='The name of the corpus')
|
|
403
|
+
query_parser.add_argument('query', help='The query string')
|
|
404
|
+
query_parser.add_argument('vac', nargs='?', default="global", help='The VAC config to set it up for')
|
|
405
|
+
|
|
273
406
|
llamaindex_parser.set_defaults(func=llamaindex_command)
|
|
274
|
-
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.79.
|
|
3
|
+
Version: 0.79.6
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.79.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.79.6.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -40,7 +40,7 @@ sunholo/chunker/pdfs.py,sha256=njDPop751GMHi3cOwIKd2Yct-_lWR2gqcB7WykfHphs,2480
|
|
|
40
40
|
sunholo/chunker/process_chunker_data.py,sha256=OnMvXHRv3rGpFsU50FyUNkNIwC1D8TkhaWWbn72yQss,3523
|
|
41
41
|
sunholo/chunker/publish.py,sha256=AX5u-fcyDytED67IfizMzvOMcYPXEo6XBJvyk_7maK8,2939
|
|
42
42
|
sunholo/chunker/pubsub.py,sha256=48bhuAcszN7LGe3-ksPSLHHhq0uKxiXOrizck5qpcP0,1012
|
|
43
|
-
sunholo/chunker/splitter.py,sha256=
|
|
43
|
+
sunholo/chunker/splitter.py,sha256=QLAEsJOpEYFZr9-UGZUuAlNVyjfCWb8jvzCHg0rVShE,6751
|
|
44
44
|
sunholo/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
45
|
sunholo/cli/chat_vac.py,sha256=UNLzZYAejjEvMR0EjpxIHXyvUpBDoqThohzzFL_m3Yg,23026
|
|
46
46
|
sunholo/cli/cli.py,sha256=yuY7SLFiYDUKqJDOXy7jL1l6P0UVPMuAZK9bXXTG8ck,3939
|
|
@@ -90,7 +90,7 @@ sunholo/langfuse/prompts.py,sha256=27BsVfihM6-h1jscbkGSO4HsATl-d4ZN6tcNCVztWoY,1
|
|
|
90
90
|
sunholo/llamaindex/__init__.py,sha256=DlY_cHWCsVEV1C5WBgDdHRgOMlJc8pDoCRukUJ8PT9w,88
|
|
91
91
|
sunholo/llamaindex/get_files.py,sha256=6rhXCDqQ_lrIapISQ_OYQDjiSATXvS_9m3qq53-oIl0,781
|
|
92
92
|
sunholo/llamaindex/import_files.py,sha256=Bnic5wz8c61af9Kwq8KSrNBbc4imYnzMtBCb2jzSImI,6224
|
|
93
|
-
sunholo/llamaindex/llamaindex_class.py,sha256=
|
|
93
|
+
sunholo/llamaindex/llamaindex_class.py,sha256=USb8zmF0DuEdaDjplJHvAItL4Ud2eE0iZ5ITwJ5Wflo,16763
|
|
94
94
|
sunholo/lookup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
95
|
sunholo/lookup/model_lookup.yaml,sha256=O7o-jP53MLA06C8pI-ILwERShO-xf6z_258wtpZBv6A,739
|
|
96
96
|
sunholo/patches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -132,9 +132,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
|
132
132
|
sunholo/vertex/memory_tools.py,sha256=pgSahVDh7GPEulu3nl-w0jb5lTClb4TCnVxPnMokNZY,7533
|
|
133
133
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
|
134
134
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
|
135
|
-
sunholo-0.79.
|
|
136
|
-
sunholo-0.79.
|
|
137
|
-
sunholo-0.79.
|
|
138
|
-
sunholo-0.79.
|
|
139
|
-
sunholo-0.79.
|
|
140
|
-
sunholo-0.79.
|
|
135
|
+
sunholo-0.79.6.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
136
|
+
sunholo-0.79.6.dist-info/METADATA,sha256=ykRVzCPMucoQbLQqvS9EydN33TnwAp5M4xfmK8rUPzc,7348
|
|
137
|
+
sunholo-0.79.6.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
138
|
+
sunholo-0.79.6.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
139
|
+
sunholo-0.79.6.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
140
|
+
sunholo-0.79.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|