sunholo 0.113.3__py3-none-any.whl → 0.114.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/chunker/doc_handling.py +14 -4
- sunholo/chunker/loaders.py +51 -50
- sunholo/chunker/message_data.py +4 -2
- sunholo/chunker/publish.py +5 -2
- sunholo/chunker/splitter.py +7 -2
- sunholo/components/retriever.py +11 -7
- sunholo/embedder/embed_chunk.py +5 -2
- sunholo/genai/process_funcs_cls.py +255 -205
- sunholo/streaming/content_buffer.py +7 -2
- sunholo/summarise/summarise.py +18 -8
- {sunholo-0.113.3.dist-info → sunholo-0.114.2.dist-info}/METADATA +7 -2
- {sunholo-0.113.3.dist-info → sunholo-0.114.2.dist-info}/RECORD +16 -16
- {sunholo-0.113.3.dist-info → sunholo-0.114.2.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.113.3.dist-info → sunholo-0.114.2.dist-info}/WHEEL +0 -0
- {sunholo-0.113.3.dist-info → sunholo-0.114.2.dist-info}/entry_points.txt +0 -0
- {sunholo-0.113.3.dist-info → sunholo-0.114.2.dist-info}/top_level.txt +0 -0
sunholo/chunker/doc_handling.py
CHANGED
|
@@ -12,10 +12,14 @@ import tempfile
|
|
|
12
12
|
import traceback
|
|
13
13
|
import json
|
|
14
14
|
import os
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
from langchain.prompts import PromptTemplate
|
|
18
|
-
from langchain_core.output_parsers import StrOutputParser
|
|
15
|
+
try:
|
|
16
|
+
from langchain.docstore.document import Document
|
|
17
|
+
from langchain.prompts import PromptTemplate
|
|
18
|
+
from langchain_core.output_parsers import StrOutputParser
|
|
19
|
+
except ImportError:
|
|
20
|
+
Document = None
|
|
21
|
+
PromptTemplate = None
|
|
22
|
+
StrOutputParser = None
|
|
19
23
|
|
|
20
24
|
def send_doc_to_docstore(docs, vector_name):
|
|
21
25
|
|
|
@@ -53,6 +57,9 @@ def send_doc_to_docstore(docs, vector_name):
|
|
|
53
57
|
|
|
54
58
|
def create_big_doc(docs):
|
|
55
59
|
|
|
60
|
+
if not Document:
|
|
61
|
+
raise ImportError("Document object requires langchain installed")
|
|
62
|
+
|
|
56
63
|
if not docs:
|
|
57
64
|
return None, None, None
|
|
58
65
|
|
|
@@ -107,6 +114,9 @@ def create_big_doc(docs):
|
|
|
107
114
|
|
|
108
115
|
def summarise_docs(docs, vector_name, summary_threshold_default=10000, model_limit_default=25000):
|
|
109
116
|
|
|
117
|
+
if not PromptTemplate or not StrOutputParser:
|
|
118
|
+
raise ImportError("PromptTemplate and StrOutputParser requires langchain installed")
|
|
119
|
+
|
|
110
120
|
if not docs:
|
|
111
121
|
return None
|
|
112
122
|
|
sunholo/chunker/loaders.py
CHANGED
|
@@ -13,11 +13,12 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
try:
|
|
15
15
|
from langchain_unstructured import UnstructuredLoader
|
|
16
|
+
from langchain_community.document_loaders import GitLoader
|
|
17
|
+
from langchain_community.document_loaders import GoogleDriveLoader
|
|
16
18
|
except ImportError:
|
|
17
19
|
UnstructuredLoader = None
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
from langchain_community.document_loaders import GoogleDriveLoader
|
|
20
|
+
GitLoader=None
|
|
21
|
+
GoogleDriveLoader=None
|
|
21
22
|
|
|
22
23
|
from ..custom_logging import log
|
|
23
24
|
from .pdfs import read_pdf_file
|
|
@@ -43,56 +44,56 @@ def convert_to_txt(file_path):
|
|
|
43
44
|
shutil.copyfile(file_path, txt_file)
|
|
44
45
|
return txt_file
|
|
45
46
|
|
|
47
|
+
if GoogleDriveLoader is not None:
|
|
48
|
+
class MyGoogleDriveLoader(GoogleDriveLoader):
|
|
49
|
+
url: Optional[str] = Field(None)
|
|
46
50
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
def __init__(self, url, *args, **kwargs):
|
|
51
|
-
super().__init__(*args, **kwargs, file_ids=['dummy']) # Pass dummy value
|
|
52
|
-
self.url = url
|
|
53
|
-
|
|
54
|
-
def _extract_id(self, url):
|
|
55
|
-
parsed_url = urlparse(unquote(url))
|
|
56
|
-
path_parts = parsed_url.path.split('/')
|
|
57
|
-
|
|
58
|
-
# Iterate over the parts
|
|
59
|
-
for part in path_parts:
|
|
60
|
-
# IDs are typically alphanumeric and at least a few characters long
|
|
61
|
-
# So let's say that to be an ID, a part has to be at least 15 characters long
|
|
62
|
-
if all(char.isalnum() or char in ['_', '-'] for char in part) and len(part) >= 15:
|
|
63
|
-
return part
|
|
64
|
-
|
|
65
|
-
# Return None if no ID was found
|
|
66
|
-
return None
|
|
51
|
+
def __init__(self, url, *args, **kwargs):
|
|
52
|
+
super().__init__(*args, **kwargs, file_ids=['dummy']) # Pass dummy value
|
|
53
|
+
self.url = url
|
|
67
54
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
#
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
55
|
+
def _extract_id(self, url):
|
|
56
|
+
parsed_url = urlparse(unquote(url))
|
|
57
|
+
path_parts = parsed_url.path.split('/')
|
|
58
|
+
|
|
59
|
+
# Iterate over the parts
|
|
60
|
+
for part in path_parts:
|
|
61
|
+
# IDs are typically alphanumeric and at least a few characters long
|
|
62
|
+
# So let's say that to be an ID, a part has to be at least 15 characters long
|
|
63
|
+
if all(char.isalnum() or char in ['_', '-'] for char in part) and len(part) >= 15:
|
|
64
|
+
return part
|
|
65
|
+
|
|
66
|
+
# Return None if no ID was found
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
def load_from_url(self, url: str):
|
|
70
|
+
id = self._extract_id(url)
|
|
71
|
+
from googleapiclient.errors import HttpError
|
|
72
|
+
from googleapiclient.discovery import build
|
|
73
|
+
|
|
74
|
+
# Identify type of URL
|
|
75
|
+
try:
|
|
76
|
+
service = build("drive", "v3", credentials=self._load_credentials())
|
|
77
|
+
file = service.files().get(fileId=id).execute()
|
|
78
|
+
except HttpError as err:
|
|
79
|
+
log.error(f"Error loading file {url}: {str(err)}")
|
|
80
|
+
raise
|
|
81
|
+
|
|
82
|
+
mime_type = file["mimeType"]
|
|
83
|
+
|
|
84
|
+
if "folder" in mime_type:
|
|
85
|
+
# If it's a folder, load documents from the folder
|
|
86
|
+
return self._load_documents_from_folder(id)
|
|
94
87
|
else:
|
|
95
|
-
|
|
88
|
+
# If it's not a folder, treat it as a single file
|
|
89
|
+
if mime_type == "application/vnd.google-apps.document":
|
|
90
|
+
return [self._load_document_from_id(id)]
|
|
91
|
+
elif mime_type == "application/vnd.google-apps.spreadsheet":
|
|
92
|
+
return self._load_sheet_from_id(id)
|
|
93
|
+
elif mime_type == "application/pdf":
|
|
94
|
+
return [self._load_file_from_id(id)]
|
|
95
|
+
else:
|
|
96
|
+
return []
|
|
96
97
|
|
|
97
98
|
def ignore_files(filepath):
|
|
98
99
|
"""Returns True if the given path's file extension is found within
|
sunholo/chunker/message_data.py
CHANGED
|
@@ -29,8 +29,10 @@ try:
|
|
|
29
29
|
except ImportError:
|
|
30
30
|
BlobServiceClient = None
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
try:
|
|
33
|
+
from langchain.schema import Document
|
|
34
|
+
except ImportError:
|
|
35
|
+
Document = None
|
|
34
36
|
|
|
35
37
|
from .splitter import chunk_doc_to_docs
|
|
36
38
|
from .pdfs import split_pdf_to_pages
|
sunholo/chunker/publish.py
CHANGED
|
@@ -3,8 +3,11 @@ from ..pubsub import PubSubManager
|
|
|
3
3
|
from ..utils.parsers import contains_url, extract_urls
|
|
4
4
|
from ..utils.gcp_project import get_gcp_project
|
|
5
5
|
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
try:
|
|
7
|
+
from langchain.schema import Document
|
|
8
|
+
except ImportError:
|
|
9
|
+
Document=None
|
|
10
|
+
|
|
8
11
|
def publish_if_urls(the_content, vector_name):
|
|
9
12
|
"""
|
|
10
13
|
Extracts URLs and puts them in a queue for processing on PubSub
|
sunholo/chunker/splitter.py
CHANGED
|
@@ -13,12 +13,17 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
from ..custom_logging import log
|
|
15
15
|
from ..utils.parsers import remove_whitespace
|
|
16
|
-
|
|
17
|
-
import langchain.text_splitter as text_splitter
|
|
16
|
+
|
|
18
17
|
from .images import upload_doc_images
|
|
19
18
|
from .doc_handling import send_doc_to_docstore, summarise_docs
|
|
20
19
|
from ..database.uuid import generate_uuid_from_object_id
|
|
21
20
|
|
|
21
|
+
try:
|
|
22
|
+
from langchain.schema import Document
|
|
23
|
+
import langchain.text_splitter as text_splitter
|
|
24
|
+
except ImportError:
|
|
25
|
+
Document=None
|
|
26
|
+
text_splitter=None
|
|
22
27
|
|
|
23
28
|
def chunk_doc_to_docs(documents: list, extension: str = ".md", min_size: int = 800, vector_name=None, **kwargs):
|
|
24
29
|
"""Turns a Document object into a list of many Document chunks.
|
sunholo/components/retriever.py
CHANGED
|
@@ -17,13 +17,17 @@ from ..utils import ConfigManager
|
|
|
17
17
|
from .llm import get_embeddings
|
|
18
18
|
from ..utils.gcp_project import get_gcp_project
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
from
|
|
22
|
-
# https://python.langchain.com/docs/integrations/retrievers/merger_retriever
|
|
23
|
-
from langchain_community.document_transformers import EmbeddingsRedundantFilter
|
|
24
|
-
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
|
|
25
|
-
from langchain.retrievers import ContextualCompressionRetriever
|
|
26
|
-
|
|
20
|
+
try:
|
|
21
|
+
from langchain.retrievers import MergerRetriever
|
|
22
|
+
# https://python.langchain.com/docs/integrations/retrievers/merger_retriever
|
|
23
|
+
from langchain_community.document_transformers import EmbeddingsRedundantFilter
|
|
24
|
+
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
|
|
25
|
+
from langchain.retrievers import ContextualCompressionRetriever
|
|
26
|
+
except ImportError:
|
|
27
|
+
MergerRetriever=None
|
|
28
|
+
EmbeddingsRedundantFilter=None
|
|
29
|
+
DocumentCompressorPipeline=None
|
|
30
|
+
ContextualCompressionRetriever=None
|
|
27
31
|
|
|
28
32
|
|
|
29
33
|
def load_memories(vector_name:str=None, config:ConfigManager=None):
|
sunholo/embedder/embed_chunk.py
CHANGED
|
@@ -17,8 +17,11 @@ import json
|
|
|
17
17
|
import datetime
|
|
18
18
|
import uuid
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
try:
|
|
21
|
+
from langchain.schema import Document
|
|
22
|
+
except ImportError:
|
|
23
|
+
Document = None
|
|
24
|
+
|
|
22
25
|
from ..components import get_embeddings, pick_vectorstore, load_memories, pick_embedding
|
|
23
26
|
from ..custom_logging import log
|
|
24
27
|
from ..database.uuid import generate_uuid_from_object_id
|
|
@@ -12,21 +12,21 @@ from collections import deque
|
|
|
12
12
|
try:
|
|
13
13
|
import google.generativeai as genai
|
|
14
14
|
import proto
|
|
15
|
-
from google.generativeai.types import RequestOptions, GenerateContentResponse
|
|
16
15
|
from google.api_core import retry
|
|
17
16
|
from google.generativeai import ChatSession
|
|
18
17
|
from google.api_core.exceptions import RetryError
|
|
18
|
+
from google.generativeai.types import RequestOptions, GenerateContentResponse
|
|
19
19
|
except ImportError:
|
|
20
20
|
genai = None
|
|
21
21
|
ChatSession = None
|
|
22
|
+
GenerateContentResponse = None
|
|
22
23
|
|
|
23
24
|
from .images import extract_gs_images_and_genai_upload
|
|
24
25
|
|
|
25
26
|
if TYPE_CHECKING:
|
|
26
27
|
from google.generativeai.protos import Part
|
|
27
28
|
from google.generativeai import ChatSession
|
|
28
|
-
|
|
29
|
-
|
|
29
|
+
from google.generativeai.types import RequestOptions, GenerateContentResponse
|
|
30
30
|
|
|
31
31
|
class GenAIFunctionProcessor:
|
|
32
32
|
"""
|
|
@@ -89,9 +89,19 @@ class GenAIFunctionProcessor:
|
|
|
89
89
|
self.trace = trace
|
|
90
90
|
self.parent_observation_id = parent_observation_id
|
|
91
91
|
|
|
92
|
+
# agent loops
|
|
92
93
|
self.last_api_requests_and_responses = []
|
|
93
94
|
self._validate_functions()
|
|
94
95
|
|
|
96
|
+
self.loop_span = None
|
|
97
|
+
self.token_queue = []
|
|
98
|
+
self.loop_text = ""
|
|
99
|
+
self.loop_content = []
|
|
100
|
+
self.loop_guardrail = 0
|
|
101
|
+
self.big_result = []
|
|
102
|
+
self.usage_metadata = {}
|
|
103
|
+
self.functions_called =[]
|
|
104
|
+
|
|
95
105
|
def construct_tools(self) -> dict:
|
|
96
106
|
"""
|
|
97
107
|
Constructs a dictionary of tools (functions) specific to the application.
|
|
@@ -249,7 +259,6 @@ class GenAIFunctionProcessor:
|
|
|
249
259
|
"""
|
|
250
260
|
api_requests_and_responses = []
|
|
251
261
|
|
|
252
|
-
|
|
253
262
|
if not full_response:
|
|
254
263
|
log.info("No response was found to process")
|
|
255
264
|
return api_requests_and_responses
|
|
@@ -272,7 +281,7 @@ class GenAIFunctionProcessor:
|
|
|
272
281
|
params_obj = {key: val for key, val in fn.args.items()}
|
|
273
282
|
|
|
274
283
|
params = ', '.join(f'{key}={val}' for key, val in params_obj.items())
|
|
275
|
-
log.info(f"Executing {function_name} with params {params} (Total Characters: {len(params)})")
|
|
284
|
+
log.info(f"== Executing {function_name} with params {params} (Total Characters: {len(params)})")
|
|
276
285
|
if len(params)>8000:
|
|
277
286
|
log.warning(f"Total parameters are over 8000 characters - it may not work properly: {params[:10000]}....[{len(params)}]")
|
|
278
287
|
|
|
@@ -413,6 +422,202 @@ class GenAIFunctionProcessor:
|
|
|
413
422
|
# If it's a primitive value, return it as is
|
|
414
423
|
return value
|
|
415
424
|
|
|
425
|
+
"""
|
|
426
|
+
self.loop_span = None
|
|
427
|
+
self.token_queue = None
|
|
428
|
+
self.loop_chat = None
|
|
429
|
+
self.loop_text = None
|
|
430
|
+
self.loop_content = None
|
|
431
|
+
self.loop_guardrail = None
|
|
432
|
+
"""
|
|
433
|
+
|
|
434
|
+
def _loop_update_content(self):
|
|
435
|
+
if self.loop_text:
|
|
436
|
+
# update content relying on gemini chat history, and the parsed function result objects
|
|
437
|
+
if self.loop_executed_responses:
|
|
438
|
+
self.loop_content = self.loop_executed_responses
|
|
439
|
+
else:
|
|
440
|
+
self.loop_content = [f"[{self.loop_guardrail}] Agent: {self.loop_text}"]
|
|
441
|
+
# if text includes gs:// try to download it
|
|
442
|
+
image_uploads = extract_gs_images_and_genai_upload(self.loop_text)
|
|
443
|
+
if image_uploads:
|
|
444
|
+
for img in image_uploads:
|
|
445
|
+
log.info(f"Adding {img=}")
|
|
446
|
+
self.loop_content.append(img)
|
|
447
|
+
self.loop_content.append(f"{img.name} was created by agent and added")
|
|
448
|
+
log.info(f"[{self.loop_guardrail}] Updated content:\n{self.loop_text}")
|
|
449
|
+
self.big_result.append(self.loop_text)
|
|
450
|
+
else:
|
|
451
|
+
log.warning(f"[{self.loop_guardrail}] No content created this loop")
|
|
452
|
+
self.loop_content = [f"[{self.loop_guardrail}] Agent: ERROR - No response was found for loop [{self.loop_guardrail}]"]
|
|
453
|
+
|
|
454
|
+
def _loop_handle_executed_responses(self, response):
|
|
455
|
+
try:
|
|
456
|
+
self.loop_executed_responses = self.process_funcs(response, loop_span=self.loop_span)
|
|
457
|
+
except Exception as err:
|
|
458
|
+
log.error(f"Error in executions: {str(err)}")
|
|
459
|
+
self.token_queue.append(f"{str(err)} for {response=}")
|
|
460
|
+
|
|
461
|
+
log.info(f"[{self.loop_guardrail}] {self.loop_executed_responses=}")
|
|
462
|
+
|
|
463
|
+
if self.loop_executed_responses:
|
|
464
|
+
self.token_queue.append("\n-- Agent Actions:\n")
|
|
465
|
+
fn_exec = self.loop_span.span(name="function_actions", input=self.loop_executed_responses) if self.loop_span else None
|
|
466
|
+
for executed_response in self.loop_executed_responses:
|
|
467
|
+
token = ""
|
|
468
|
+
fn = executed_response.function_response.name
|
|
469
|
+
fn_args = executed_response.function_response.response.get("args")
|
|
470
|
+
fn_result = executed_response.function_response.response["result"]
|
|
471
|
+
fn_log = f"{fn}({fn_args})"
|
|
472
|
+
log.info(fn_log)
|
|
473
|
+
self.functions_called.append(fn_log)
|
|
474
|
+
self.token_queue.append(f"\n-- {fn_log} ...executing...\n") if fn != "decide_to_go_on" else ""
|
|
475
|
+
while self.token_queue:
|
|
476
|
+
token = self.token_queue.popleft()
|
|
477
|
+
self.loop_callback.on_llm_new_token(token=token)
|
|
478
|
+
|
|
479
|
+
log.info(f"{fn_log} created a result={type(fn_result)=}")
|
|
480
|
+
fn_exec_one = fn_exec.span(name=fn, input=fn_args) if fn_exec else None
|
|
481
|
+
|
|
482
|
+
fn_result_json = None
|
|
483
|
+
# Convert MapComposite to a standard Python dictionary
|
|
484
|
+
if isinstance(fn_result, proto.marshal.collections.maps.MapComposite):
|
|
485
|
+
fn_result_json = self.convert_composite_to_native(fn_result)
|
|
486
|
+
elif isinstance(fn_result, proto.marshal.collections.repeated.RepeatedComposite):
|
|
487
|
+
fn_result = self.convert_composite_to_native(fn_result)
|
|
488
|
+
elif isinstance(fn_result, dict):
|
|
489
|
+
fn_result_json = fn_result
|
|
490
|
+
elif isinstance(fn_result, str):
|
|
491
|
+
try:
|
|
492
|
+
if isinstance(fn_result_json, str):
|
|
493
|
+
fn_result_json = json.loads(fn_result_json)
|
|
494
|
+
except json.JSONDecodeError:
|
|
495
|
+
log.warning(f"{fn_result} was not JSON decoded")
|
|
496
|
+
except Exception as err:
|
|
497
|
+
log.warning(f"{fn_result} was not json decoded due to unknown exception: {str(err)} {traceback.format_exc()}")
|
|
498
|
+
else:
|
|
499
|
+
log.warning(f"Unrecognised type for {fn_log}: {type(fn_result)}")
|
|
500
|
+
|
|
501
|
+
# should be a string or a dict by now
|
|
502
|
+
log.info(f"Processed {fn_log} to {fn_result_json=} type: {type(fn_result_json)}")
|
|
503
|
+
|
|
504
|
+
if fn == "decide_to_go_on":
|
|
505
|
+
log.info(f"{fn_result_json=} {type(fn_result)}")
|
|
506
|
+
if fn_result_json:
|
|
507
|
+
token = f"\n{'STOPPING' if not fn_result_json.get('go_on') else 'CONTINUE'}: {fn_result_json.get('chat_summary')}\n"
|
|
508
|
+
else:
|
|
509
|
+
log.warning(f"{fn_result_json} did not work for decide_to_go_on")
|
|
510
|
+
token = f"Error calling decide_to_go_on with {fn_result=}\n"
|
|
511
|
+
else:
|
|
512
|
+
|
|
513
|
+
token = f"--- {fn_log} result --- \n"
|
|
514
|
+
# if json dict we look for keys to extract
|
|
515
|
+
if fn_result_json:
|
|
516
|
+
log.info(f"{fn_result_json} dict parsing")
|
|
517
|
+
if fn_result_json.get('stdout'):
|
|
518
|
+
text = fn_result_json.get('stdout')
|
|
519
|
+
token += self.remove_invisible_characters(text)
|
|
520
|
+
if fn_result_json.get('stderr'):
|
|
521
|
+
text = fn_result_json.get('stdout')
|
|
522
|
+
token += self.remove_invisible_characters(text)
|
|
523
|
+
# If neither 'stdout' nor 'stderr' is present, dump the entire JSON
|
|
524
|
+
if 'stdout' not in fn_result_json and 'stderr' not in fn_result_json:
|
|
525
|
+
log.info(f"No recognised keys ('stdout' or 'stderr') in dict: {fn_result_json=} - dumping it all")
|
|
526
|
+
token += f"{json.dumps(fn_result_json, indent=2)}\n" # Added `indent=2` for readability
|
|
527
|
+
else:
|
|
528
|
+
# probably a string, just return it
|
|
529
|
+
log.info(f"{fn_result_json} non-dict (String?) parsing")
|
|
530
|
+
token += f"{self.remove_invisible_characters(fn_result)}\n--- end ---\n"
|
|
531
|
+
|
|
532
|
+
self.loop_text += token
|
|
533
|
+
self.token_queue.append(token)
|
|
534
|
+
fn_exec_one.end(output=token) if fn_exec_one else None
|
|
535
|
+
fn_exec.end(output=self.loop_text) if fn_exec else None
|
|
536
|
+
|
|
537
|
+
else:
|
|
538
|
+
token = f"\n[{self.loop_guardrail}] No function executions were performed\n"
|
|
539
|
+
self.token_queue.append(token)
|
|
540
|
+
self.loop_text += token
|
|
541
|
+
|
|
542
|
+
def _loop_output_text(self, response:GenerateContentResponse):
|
|
543
|
+
if not response:
|
|
544
|
+
return
|
|
545
|
+
|
|
546
|
+
for chunk in response:
|
|
547
|
+
if not chunk:
|
|
548
|
+
continue
|
|
549
|
+
|
|
550
|
+
log.debug(f"[{self.loop_guardrail}] {chunk=}")
|
|
551
|
+
try:
|
|
552
|
+
if hasattr(chunk, 'text') and isinstance(chunk.text, str):
|
|
553
|
+
token = chunk.text
|
|
554
|
+
self.token_queue.append(token)
|
|
555
|
+
self.loop_text += token
|
|
556
|
+
else:
|
|
557
|
+
log.info("skipping chunk with no text")
|
|
558
|
+
|
|
559
|
+
except ValueError as err:
|
|
560
|
+
self.token_queue.append(f"{str(err)} for {chunk=}")
|
|
561
|
+
|
|
562
|
+
def _loop_metadata(self, response:GenerateContentResponse, gen=None):
|
|
563
|
+
loop_metadata = None
|
|
564
|
+
if response:
|
|
565
|
+
loop_metadata = response.usage_metadata
|
|
566
|
+
if loop_metadata:
|
|
567
|
+
self.usage_metadata = {
|
|
568
|
+
"prompt_token_count": self.usage_metadata["prompt_token_count"] + (loop_metadata.prompt_token_count or 0),
|
|
569
|
+
"candidates_token_count": self.usage_metadata["candidates_token_count"] + (loop_metadata.candidates_token_count or 0),
|
|
570
|
+
"total_token_count": self.usage_metadata["total_token_count"] + (loop_metadata.total_token_count or 0),
|
|
571
|
+
}
|
|
572
|
+
self.token_queue.append((
|
|
573
|
+
"\n-- Agent response -- "
|
|
574
|
+
f"Loop tokens: [{loop_metadata.prompt_token_count}]/[{self.usage_metadata['prompt_token_count']}] "
|
|
575
|
+
f"Session tokens: [{loop_metadata.total_token_count}]/[{self.usage_metadata['total_token_count']}] \n"
|
|
576
|
+
))
|
|
577
|
+
gen.end(output=response.to_dict()) if gen else None
|
|
578
|
+
else:
|
|
579
|
+
gen.end(output="No response received") if gen else None
|
|
580
|
+
|
|
581
|
+
return loop_metadata
|
|
582
|
+
|
|
583
|
+
def _loop_call_agent(self, chat:ChatSession):
|
|
584
|
+
response=None
|
|
585
|
+
gen=None
|
|
586
|
+
try:
|
|
587
|
+
self.token_queue.append("\n= Calling Agent =\n")
|
|
588
|
+
loop_content = self.loop_content
|
|
589
|
+
gen = self.loop_span.generation(
|
|
590
|
+
name=f"loop_{self.loop_guardrail}",
|
|
591
|
+
model=self.model_name,
|
|
592
|
+
input = {'content': self.loop_content},
|
|
593
|
+
) if self.loop_span else None
|
|
594
|
+
|
|
595
|
+
log.info(f"{loop_content=}")
|
|
596
|
+
response: GenerateContentResponse = chat.send_message(loop_content, request_options=RequestOptions(
|
|
597
|
+
retry=retry.Retry(
|
|
598
|
+
initial=1,
|
|
599
|
+
multiplier=2,
|
|
600
|
+
maximum=10,
|
|
601
|
+
timeout=60
|
|
602
|
+
)
|
|
603
|
+
))
|
|
604
|
+
except RetryError as err:
|
|
605
|
+
msg = f"Retry error - lets try again if its occured less than twice: {str(err)}"
|
|
606
|
+
log.warning(msg)
|
|
607
|
+
self.token_queue.append(msg)
|
|
608
|
+
self.loop_text += msg
|
|
609
|
+
|
|
610
|
+
except Exception as e:
|
|
611
|
+
msg = f"Error sending {loop_content} to model: {str(e)}"
|
|
612
|
+
if "finish_reason: 10" in str(e):
|
|
613
|
+
msg = (f"I encounted an error on the previous step when sending this data: {json.dumps(loop_content)}"
|
|
614
|
+
" -- Can you examine what was sent and identify why? If possible correct it so we can answer the original user question.")
|
|
615
|
+
log.error(msg + f"{traceback.format_exc()}")
|
|
616
|
+
self.token_queue.append(msg)
|
|
617
|
+
self.loop_text += msg
|
|
618
|
+
|
|
619
|
+
return response, gen
|
|
620
|
+
|
|
416
621
|
def run_agent_loop(self, chat:ChatSession, content:list, callback=None, guardrail_max=10, loop_return=3): # type: ignore
|
|
417
622
|
"""
|
|
418
623
|
Runs the agent loop, sending messages to the orchestrator, processing responses, and executing functions.
|
|
@@ -429,17 +634,14 @@ class GenAIFunctionProcessor:
|
|
|
429
634
|
"""
|
|
430
635
|
if not callback:
|
|
431
636
|
callback = self.IOCallback()
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
usage_metadata = {
|
|
637
|
+
self.big_result = []
|
|
638
|
+
self.usage_metadata = {
|
|
435
639
|
"prompt_token_count": 0,
|
|
436
640
|
"candidates_token_count": 0,
|
|
437
641
|
"total_token_count": 0
|
|
438
642
|
}
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
# Initialize token queue to ensure sequential processing
|
|
442
|
-
token_queue = deque()
|
|
643
|
+
|
|
644
|
+
self.functions_called =[]
|
|
443
645
|
|
|
444
646
|
span = self.trace.span(
|
|
445
647
|
name=f"GenAIFunctionProcesser_{self.__class__.__name__}",
|
|
@@ -447,225 +649,73 @@ class GenAIFunctionProcessor:
|
|
|
447
649
|
input = {'content': content},
|
|
448
650
|
) if self.trace else None
|
|
449
651
|
|
|
450
|
-
|
|
652
|
+
self.loop_span = None
|
|
653
|
+
# Initialize token queue to ensure sequential processing
|
|
654
|
+
self.token_queue = deque()
|
|
655
|
+
self.loop_text = ""
|
|
656
|
+
self.loop_content = content
|
|
657
|
+
self.loop_guardrail = 0
|
|
658
|
+
self.loop_executed_responses = []
|
|
659
|
+
self.loop_callback = callback
|
|
451
660
|
|
|
452
|
-
|
|
661
|
+
while self.loop_guardrail < guardrail_max:
|
|
662
|
+
self.token_queue.append(f"\n----Loop [{self.loop_guardrail}] Start------\nFunctions: {list(self.funcs.keys())}\n")
|
|
453
663
|
|
|
454
664
|
content_parse = ""
|
|
455
665
|
for i, chunk in enumerate(content):
|
|
456
666
|
content_parse += f"\n - {i}) {chunk}"
|
|
457
|
-
content_parse += f"\n== End input content for loop [{
|
|
667
|
+
content_parse += f"\n== End input content for loop [{self.loop_guardrail}] =="
|
|
458
668
|
|
|
459
|
-
log.info(f"== Start input content for loop [{
|
|
460
|
-
|
|
669
|
+
log.info(f"== Start input content for loop [{self.loop_guardrail}]\n ## Content: {content_parse}")
|
|
670
|
+
|
|
671
|
+
# resets for this loop
|
|
672
|
+
self.loop_text = ""
|
|
461
673
|
response = None
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
674
|
+
self.loop_executed_responses = []
|
|
675
|
+
|
|
676
|
+
self.loop_span = span.span(
|
|
677
|
+
name=f"loop_{self.loop_guardrail}",
|
|
465
678
|
model=self.model_name,
|
|
466
|
-
input = {'content':
|
|
679
|
+
input = {'content': self.loop_content},
|
|
467
680
|
) if span else None
|
|
468
681
|
|
|
469
|
-
|
|
470
|
-
token_queue.append("\n= Calling Agent =\n")
|
|
471
|
-
|
|
472
|
-
gen = loop_span.generation(
|
|
473
|
-
name=f"loop_{guardrail}",
|
|
474
|
-
model=self.model_name,
|
|
475
|
-
input = {'content': content},
|
|
476
|
-
) if loop_span else None
|
|
477
|
-
|
|
478
|
-
response: GenerateContentResponse = chat.send_message(content, request_options=RequestOptions(
|
|
479
|
-
retry=retry.Retry(
|
|
480
|
-
initial=1,
|
|
481
|
-
multiplier=2,
|
|
482
|
-
maximum=10,
|
|
483
|
-
timeout=60
|
|
484
|
-
)
|
|
485
|
-
))
|
|
486
|
-
except RetryError as err:
|
|
487
|
-
msg = f"Retry error - lets try again if its occured less than twice: {str(err)}"
|
|
488
|
-
log.warning(msg)
|
|
489
|
-
token_queue.append(msg)
|
|
490
|
-
this_text += msg
|
|
491
|
-
|
|
492
|
-
except Exception as e:
|
|
493
|
-
msg = f"Error sending {content} to model: {str(e)}"
|
|
494
|
-
if "finish_reason: 10" in str(e):
|
|
495
|
-
msg = "The Gemini API does not work with this input - you need to try something else. Error is: finish_reason: 10"
|
|
496
|
-
log.error(msg + f"{traceback.format_exc()}")
|
|
497
|
-
token_queue.append(msg)
|
|
498
|
-
this_text += msg
|
|
499
|
-
|
|
500
|
-
if response:
|
|
501
|
-
loop_metadata = response.usage_metadata
|
|
502
|
-
if loop_metadata:
|
|
503
|
-
usage_metadata = {
|
|
504
|
-
"prompt_token_count": usage_metadata["prompt_token_count"] + (loop_metadata.prompt_token_count or 0),
|
|
505
|
-
"candidates_token_count": usage_metadata["candidates_token_count"] + (loop_metadata.candidates_token_count or 0),
|
|
506
|
-
"total_token_count": usage_metadata["total_token_count"] + (loop_metadata.total_token_count or 0),
|
|
507
|
-
}
|
|
508
|
-
token_queue.append((
|
|
509
|
-
"\n-- Agent response -- "
|
|
510
|
-
f"Loop tokens: [{loop_metadata.prompt_token_count}]/[{usage_metadata['prompt_token_count']}] "
|
|
511
|
-
f"Session tokens: [{loop_metadata.total_token_count}]/[{usage_metadata['total_token_count']}] \n"
|
|
512
|
-
))
|
|
513
|
-
loop_metadata = None
|
|
514
|
-
gen.end(output=response.to_dict()) if gen else None
|
|
515
|
-
else:
|
|
516
|
-
gen.end(output="No response received") if gen else None
|
|
517
|
-
|
|
518
|
-
if not response:
|
|
519
|
-
response = []
|
|
520
|
-
for chunk in response:
|
|
521
|
-
if not chunk:
|
|
522
|
-
continue
|
|
523
|
-
|
|
524
|
-
log.debug(f"[{guardrail}] {chunk=}")
|
|
525
|
-
try:
|
|
526
|
-
if hasattr(chunk, 'text') and isinstance(chunk.text, str):
|
|
527
|
-
token = chunk.text
|
|
528
|
-
token_queue.append(token)
|
|
529
|
-
this_text += token
|
|
530
|
-
else:
|
|
531
|
-
log.info("skipping chunk with no text")
|
|
532
|
-
|
|
533
|
-
except ValueError as err:
|
|
534
|
-
token_queue.append(f"{str(err)} for {chunk=}")
|
|
535
|
-
try:
|
|
536
|
-
executed_responses = self.process_funcs(response, loop_span=loop_span)
|
|
537
|
-
except Exception as err:
|
|
538
|
-
log.error(f"Error in executions: {str(err)}")
|
|
539
|
-
token_queue.append(f"{str(err)} for {response=}")
|
|
540
|
-
|
|
541
|
-
log.info(f"[{guardrail}] {executed_responses=}")
|
|
542
|
-
|
|
543
|
-
if executed_responses:
|
|
544
|
-
token_queue.append("\n-- Agent Actions:\n")
|
|
545
|
-
fn_exec = loop_span.span(name="function_actions", input=executed_responses) if loop_span else None
|
|
546
|
-
for executed_response in executed_responses:
|
|
547
|
-
token = ""
|
|
548
|
-
fn = executed_response.function_response.name
|
|
549
|
-
fn_args = executed_response.function_response.response.get("args")
|
|
550
|
-
fn_result = executed_response.function_response.response["result"]
|
|
551
|
-
fn_log = f"{fn}({fn_args})"
|
|
552
|
-
log.info(fn_log)
|
|
553
|
-
functions_called.append(fn_log)
|
|
554
|
-
function_results.append(fn_result)
|
|
555
|
-
token_queue.append(f"\n-- {fn_log} ...executing...\n") if fn != "decide_to_go_on" else ""
|
|
556
|
-
while token_queue:
|
|
557
|
-
token = token_queue.popleft()
|
|
558
|
-
callback.on_llm_new_token(token=token)
|
|
559
|
-
|
|
560
|
-
log.info(f"{fn_log} created a result={type(fn_result)=}")
|
|
561
|
-
fn_exec_one = fn_exec.span(name=fn, input=fn_args) if fn_exec else None
|
|
562
|
-
|
|
563
|
-
fn_result_json = None
|
|
564
|
-
# Convert MapComposite to a standard Python dictionary
|
|
565
|
-
if isinstance(fn_result, proto.marshal.collections.maps.MapComposite):
|
|
566
|
-
fn_result_json = self.convert_composite_to_native(fn_result)
|
|
567
|
-
elif isinstance(fn_result, proto.marshal.collections.repeated.RepeatedComposite):
|
|
568
|
-
fn_result = self.convert_composite_to_native(fn_result)
|
|
569
|
-
elif isinstance(fn_result, dict):
|
|
570
|
-
fn_result_json = fn_result
|
|
571
|
-
elif isinstance(fn_result, str):
|
|
572
|
-
try:
|
|
573
|
-
if isinstance(fn_result_json, str):
|
|
574
|
-
fn_result_json = json.loads(fn_result_json)
|
|
575
|
-
except json.JSONDecodeError:
|
|
576
|
-
log.warning(f"{fn_result} was not JSON decoded")
|
|
577
|
-
except Exception as err:
|
|
578
|
-
log.warning(f"{fn_result} was not json decoded due to unknown exception: {str(err)} {traceback.format_exc()}")
|
|
579
|
-
else:
|
|
580
|
-
log.warning(f"Unrecognised type for {fn_log}: {type(fn_result)}")
|
|
581
|
-
|
|
582
|
-
# should be a string or a dict by now
|
|
583
|
-
log.info(f"Processed {fn_log} to {fn_result_json=} type: {type(fn_result_json)}")
|
|
584
|
-
|
|
585
|
-
if fn == "decide_to_go_on":
|
|
586
|
-
log.info(f"{fn_result_json=} {type(fn_result)}")
|
|
587
|
-
if fn_result_json:
|
|
588
|
-
token = f"\n{'STOPPING' if not fn_result_json.get('go_on') else 'CONTINUE'}: {fn_result_json.get('chat_summary')}\n"
|
|
589
|
-
else:
|
|
590
|
-
log.warning(f"{fn_result_json} did not work for decide_to_go_on")
|
|
591
|
-
token = f"Error calling decide_to_go_on with {fn_result=}\n"
|
|
592
|
-
else:
|
|
682
|
+
response, gen = self._loop_call_agent(chat)
|
|
593
683
|
|
|
594
|
-
|
|
595
|
-
# if json dict we look for keys to extract
|
|
596
|
-
if fn_result_json:
|
|
597
|
-
log.info(f"{fn_result_json} dict parsing")
|
|
598
|
-
if fn_result_json.get('stdout'):
|
|
599
|
-
text = fn_result_json.get('stdout')
|
|
600
|
-
token += self.remove_invisible_characters(text)
|
|
601
|
-
if fn_result_json.get('stderr'):
|
|
602
|
-
text = fn_result_json.get('stdout')
|
|
603
|
-
token += self.remove_invisible_characters(text)
|
|
604
|
-
# If neither 'stdout' nor 'stderr' is present, dump the entire JSON
|
|
605
|
-
if 'stdout' not in fn_result_json and 'stderr' not in fn_result_json:
|
|
606
|
-
log.info(f"No recognised keys ('stdout' or 'stderr') in dict: {fn_result_json=} - dumping it all")
|
|
607
|
-
token += f"{json.dumps(fn_result_json, indent=2)}\n" # Added `indent=2` for readability
|
|
608
|
-
else:
|
|
609
|
-
# probably a string, just return it
|
|
610
|
-
log.info(f"{fn_result_json} non-dict (String?) parsing")
|
|
611
|
-
token += f"{self.remove_invisible_characters(fn_result)}\n--- end ---\n"
|
|
612
|
-
|
|
613
|
-
this_text += token
|
|
614
|
-
token_queue.append(token)
|
|
615
|
-
fn_exec_one.end(output=token) if fn_exec_one else None
|
|
616
|
-
fn_exec.end(output=this_text) if fn_exec else None
|
|
684
|
+
loop_metadata = self._loop_metadata(response, gen)
|
|
617
685
|
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
if this_text:
|
|
624
|
-
# update content relying on gemini chat history instead, and the parsed function result objects
|
|
625
|
-
if executed_responses:
|
|
626
|
-
content = executed_responses
|
|
627
|
-
else:
|
|
628
|
-
content = [f"[{guardrail}] Agent: No function responses where found: {this_text}"]
|
|
629
|
-
# if text includes gs:// try to download it
|
|
630
|
-
image_uploads = extract_gs_images_and_genai_upload(this_text)
|
|
631
|
-
if image_uploads:
|
|
632
|
-
for img in image_uploads:
|
|
633
|
-
log.info(f"Adding {img=}")
|
|
634
|
-
content.append(img)
|
|
635
|
-
content.append(f"{img.name} was created by agent and added")
|
|
636
|
-
log.info(f"[{guardrail}] Updated content:\n{this_text}")
|
|
637
|
-
big_result.append(this_text)
|
|
638
|
-
else:
|
|
639
|
-
log.warning(f"[{guardrail}] No content created this loop")
|
|
640
|
-
content = [f"[{guardrail}] Agent: ERROR - No response was found for loop [{guardrail}]"]
|
|
686
|
+
self._loop_output_text(response)
|
|
687
|
+
|
|
688
|
+
self._loop_handle_executed_responses(response)
|
|
689
|
+
|
|
690
|
+
self._loop_update_content()
|
|
641
691
|
|
|
642
|
-
token_queue.append(f"\n----Loop [{
|
|
643
|
-
loop_span.end(output=
|
|
692
|
+
self.token_queue.append(f"\n----Loop [{self.loop_guardrail}] End------\n{self.usage_metadata}\n----------------------")
|
|
693
|
+
self.loop_span.end(output=self.loop_content, metadata=loop_metadata) if self.loop_span else None
|
|
644
694
|
|
|
645
695
|
go_on_check = self.check_function_result("decide_to_go_on", {"go_on": False})
|
|
646
696
|
if go_on_check:
|
|
647
697
|
log.info("Breaking agent loop")
|
|
648
698
|
break
|
|
649
699
|
|
|
650
|
-
while token_queue:
|
|
651
|
-
token = token_queue.popleft()
|
|
652
|
-
|
|
700
|
+
while self.token_queue:
|
|
701
|
+
token = self.token_queue.popleft()
|
|
702
|
+
self.loop_callback.on_llm_new_token(token=token)
|
|
653
703
|
|
|
654
|
-
|
|
655
|
-
if
|
|
704
|
+
self.loop_guardrail += 1
|
|
705
|
+
if self.loop_guardrail > guardrail_max:
|
|
656
706
|
log.warning(f"Guardrail kicked in, more than {guardrail_max} loops")
|
|
657
707
|
break
|
|
658
708
|
|
|
659
|
-
while token_queue:
|
|
660
|
-
token = token_queue.popleft()
|
|
661
|
-
|
|
709
|
+
while self.token_queue:
|
|
710
|
+
token = self.token_queue.popleft()
|
|
711
|
+
self.loop_callback.on_llm_new_token(token=token)
|
|
662
712
|
|
|
663
|
-
usage_metadata["functions_called"] = functions_called
|
|
713
|
+
self.usage_metadata["functions_called"] = self.functions_called
|
|
664
714
|
|
|
665
|
-
big_text = "\n".join(big_result[-loop_return:])
|
|
666
|
-
span.end(output=big_text, metadata=
|
|
715
|
+
big_text = "\n".join(self.big_result[-loop_return:])
|
|
716
|
+
span.end(output=big_text, metadata=self.sage_metadata) if span else None
|
|
667
717
|
|
|
668
|
-
return big_text, usage_metadata
|
|
718
|
+
return big_text, self.usage_metadata
|
|
669
719
|
|
|
670
720
|
class IOCallback:
|
|
671
721
|
"""
|
|
@@ -12,8 +12,13 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
from typing import Any, Dict, List, Union
|
|
15
|
-
|
|
16
|
-
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
18
|
+
from langchain.schema import LLMResult
|
|
19
|
+
except ImportError:
|
|
20
|
+
StreamingStdOutCallbackHandler = None
|
|
21
|
+
LLMResult = None
|
|
17
22
|
|
|
18
23
|
import threading
|
|
19
24
|
import asyncio
|
sunholo/summarise/summarise.py
CHANGED
|
@@ -18,14 +18,24 @@ from ..custom_logging import log
|
|
|
18
18
|
from ..components import get_llm
|
|
19
19
|
from ..chunker.splitter import chunk_doc_to_docs
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
from langchain.
|
|
23
|
-
from langchain.chat_models import
|
|
24
|
-
from
|
|
25
|
-
from
|
|
26
|
-
from langchain.llms import
|
|
27
|
-
from langchain.
|
|
28
|
-
from langchain.
|
|
21
|
+
try:
|
|
22
|
+
from langchain.prompts import PromptTemplate
|
|
23
|
+
from langchain.chat_models import ChatVertexAI
|
|
24
|
+
from langchain.chat_models import ChatOpenAI
|
|
25
|
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
|
26
|
+
from langchain.llms import OpenAI
|
|
27
|
+
from langchain.llms import VertexAI
|
|
28
|
+
from langchain.chains.summarize import load_summarize_chain
|
|
29
|
+
from langchain.schema import Document
|
|
30
|
+
except ImportError:
|
|
31
|
+
PromptTemplate=None
|
|
32
|
+
ChatVertexAI=None
|
|
33
|
+
ChatOpenAI=None
|
|
34
|
+
ChatGoogleGenerativeAI=None
|
|
35
|
+
OpenAI=None
|
|
36
|
+
VertexAI=None
|
|
37
|
+
load_summarize_chain=None
|
|
38
|
+
Document=None
|
|
29
39
|
|
|
30
40
|
prompt_template = """Write a summary for below, including key concepts, people and distinct information but do not add anything that is not in the original text:
|
|
31
41
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.114.2
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.114.2.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -81,6 +81,11 @@ Requires-Dist: tenacity; extra == "all"
|
|
|
81
81
|
Requires-Dist: tiktoken; extra == "all"
|
|
82
82
|
Requires-Dist: unstructured[all-docs,local-inference]; extra == "all"
|
|
83
83
|
Requires-Dist: xlwings; extra == "all"
|
|
84
|
+
Provides-Extra: langchain
|
|
85
|
+
Requires-Dist: langchain==0.2.16; extra == "langchain"
|
|
86
|
+
Requires-Dist: langchain_experimental==0.0.65; extra == "langchain"
|
|
87
|
+
Requires-Dist: langchain-community==0.2.17; extra == "langchain"
|
|
88
|
+
Requires-Dist: langsmith==0.1.143; extra == "langchain"
|
|
84
89
|
Provides-Extra: azure
|
|
85
90
|
Requires-Dist: azure-identity; extra == "azure"
|
|
86
91
|
Requires-Dist: azure-storage-blob; extra == "azure"
|
|
@@ -31,16 +31,16 @@ sunholo/bots/github_webhook.py,sha256=5pQPRLM_wxxcILVaIzUDV8Kt7Arcm2dL1r1kMMHA52
|
|
|
31
31
|
sunholo/bots/webapp.py,sha256=EIMxdAJ_xtufwJmvnn7N_Fb_1hZ9DjhJ0Kf_hp02vEU,1926
|
|
32
32
|
sunholo/chunker/__init__.py,sha256=A5canS0XPgisHu0OZ7sVdILgEHGzgH9kpkDi4oBwLZk,135
|
|
33
33
|
sunholo/chunker/azure.py,sha256=MVF9_-QdKUoJqlpEJ49pv2sdjMDxEiMNxzmO7w5nWDQ,3270
|
|
34
|
-
sunholo/chunker/doc_handling.py,sha256=
|
|
34
|
+
sunholo/chunker/doc_handling.py,sha256=t_lDazHfJbs4Q2Ruq2MvBBeJRfsjjQkzMxKuX8qQKBI,9087
|
|
35
35
|
sunholo/chunker/encode_metadata.py,sha256=hxxd9KU35Xi0Z_EL8kt_oD66pKfBLhEjBImC16ew-Eo,1919
|
|
36
36
|
sunholo/chunker/images.py,sha256=id2PBu6XyGEOtgafq2v0c9_O6kxaC_pYFMnbsIitkSg,1868
|
|
37
|
-
sunholo/chunker/loaders.py,sha256=
|
|
38
|
-
sunholo/chunker/message_data.py,sha256
|
|
37
|
+
sunholo/chunker/loaders.py,sha256=5NXrMxV-WdbFpxeLhFzccw0_zhf1UQ7yKFFeaMkc9Bc,11105
|
|
38
|
+
sunholo/chunker/message_data.py,sha256=-gnF9cKIuo-aA6jhCXrAm6U2K9tL5GbeCQ3nMJfaxUQ,10884
|
|
39
39
|
sunholo/chunker/pdfs.py,sha256=njDPop751GMHi3cOwIKd2Yct-_lWR2gqcB7WykfHphs,2480
|
|
40
40
|
sunholo/chunker/process_chunker_data.py,sha256=uO-YOEHIjAOy0ZMJ0vea9OMNsQBISHfhbtgoyuHiP6s,3598
|
|
41
|
-
sunholo/chunker/publish.py,sha256=
|
|
41
|
+
sunholo/chunker/publish.py,sha256=8TrvmX51aQPvht11myxm0G4tuI3iqAM2wL2x8MWkfX4,2990
|
|
42
42
|
sunholo/chunker/pubsub.py,sha256=48bhuAcszN7LGe3-ksPSLHHhq0uKxiXOrizck5qpcP0,1012
|
|
43
|
-
sunholo/chunker/splitter.py,sha256=
|
|
43
|
+
sunholo/chunker/splitter.py,sha256=WltIA6LYELwG0FEtiDKclgRtKuw3rXI2myFOQM4LkIs,6826
|
|
44
44
|
sunholo/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
45
|
sunholo/cli/chat_vac.py,sha256=sYPzUDwwwebJvIobv3GRW_xbQQ4BTy9G-WHdarGCHB0,23705
|
|
46
46
|
sunholo/cli/cli.py,sha256=Bhyrs8GEtJTbsvPYufEY184ra13eusATXAnJClJ_LGY,4474
|
|
@@ -55,7 +55,7 @@ sunholo/cli/swagger.py,sha256=absYKAU-7Yd2eiVNUY-g_WLl2zJfeRUNdWQ0oH8M_HM,1564
|
|
|
55
55
|
sunholo/cli/vertex.py,sha256=8130YCarxHL1UC3aqblNmUwGZTXbkdL4Y_FOnZJsWiI,2056
|
|
56
56
|
sunholo/components/__init__.py,sha256=IDoylb74zFKo6NIS3RQqUl0PDFBGVxM1dfUmO7OJ44U,176
|
|
57
57
|
sunholo/components/llm.py,sha256=8iyY6K1ZiiJx9MGL1fY5CHh8CD3YYhwEA6O8B44tkxE,13004
|
|
58
|
-
sunholo/components/retriever.py,sha256=
|
|
58
|
+
sunholo/components/retriever.py,sha256=Wmchv3huAM4w7DIS-a5Lp9Hi7M8pE6vZdxgseiT9SXk,7820
|
|
59
59
|
sunholo/components/vectorstore.py,sha256=xKk7micTRwZckaI7U6PxvFz_ZSjCH48xPTDYiDcv2tc,5913
|
|
60
60
|
sunholo/database/__init__.py,sha256=bpB5Nk21kwqYj-qdVnvNgXjLsbflnH4g-San7OHMqR4,283
|
|
61
61
|
sunholo/database/alloydb.py,sha256=x1zUMB-EVWbE2Zvp4nAs2Z-tB_kOZmS45H2lwVHdYnk,11678
|
|
@@ -76,7 +76,7 @@ sunholo/discovery_engine/create_new.py,sha256=jWg5LW-QpFE8zq50ShaQJB3Wu8loiWB0P4
|
|
|
76
76
|
sunholo/discovery_engine/discovery_engine_client.py,sha256=jfIayVUOPM4svGF1S5Kk60rIG-xSo_e3zOHtBRg0nZA,22002
|
|
77
77
|
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=hsFGOQugSeTMPEaQ16XTs_D45F8NABBm2IsAEdTk7kQ,4316
|
|
78
78
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
|
79
|
-
sunholo/embedder/embed_chunk.py,sha256=
|
|
79
|
+
sunholo/embedder/embed_chunk.py,sha256=sy--Gtf8x0N2KcGnPaDyS2GIVrZjxml4HZ9smlY-B5A,7037
|
|
80
80
|
sunholo/excel/__init__.py,sha256=AqTMN9K4qJYi4maEgoORc5oxDVGO_eqmwzDaVP37JgY,56
|
|
81
81
|
sunholo/excel/plugin.py,sha256=rl3FoECZ6Ts8KKExPrbPwr3u3CegZfsevmcjgUXAlhE,4033
|
|
82
82
|
sunholo/gcs/__init__.py,sha256=SZvbsMFDko40sIRHTHppA37IijvJTae54vrhooEF5-4,90
|
|
@@ -89,7 +89,7 @@ sunholo/genai/__init__.py,sha256=6SWK7uV5F625J-P3xQoD6WKL59a9RSaidj-Guslyt8Q,192
|
|
|
89
89
|
sunholo/genai/file_handling.py,sha256=Z3E7TR1DnP9WnneeEGC8LcT6k-9GFxwXDPaVZWw8HLE,8366
|
|
90
90
|
sunholo/genai/images.py,sha256=EyjsDqt6XQw99pZUQamomCpMOoIah9bp3XY94WPU7Ms,1678
|
|
91
91
|
sunholo/genai/init.py,sha256=yG8E67TduFCTQPELo83OJuWfjwTnGZsyACospahyEaY,687
|
|
92
|
-
sunholo/genai/process_funcs_cls.py,sha256=
|
|
92
|
+
sunholo/genai/process_funcs_cls.py,sha256=D6eNrc3vtTZzwdkacZNOSfit499N_o0C5AHspyUJiYE,33690
|
|
93
93
|
sunholo/genai/safety.py,sha256=mkFDO_BeEgiKjQd9o2I4UxB6XI7a9U-oOFjZ8LGRUC4,1238
|
|
94
94
|
sunholo/invoke/__init__.py,sha256=o1RhwBGOtVK0MIdD55fAIMCkJsxTksi8GD5uoqVKI-8,184
|
|
95
95
|
sunholo/invoke/async_class.py,sha256=G8vD2H94fpBc37mSJSQODEKJ67P2mPQEHabtDaLOvxE,8033
|
|
@@ -119,12 +119,12 @@ sunholo/qna/retry.py,sha256=yMw7RTkw-RXCzfENPJOt8c32mXlpvOR589EGkvK-6yI,2028
|
|
|
119
119
|
sunholo/senses/__init__.py,sha256=fbWqVwwzkV5uRSb8lQzo4pn0ja_VYVWbUYapurSowBs,39
|
|
120
120
|
sunholo/senses/stream_voice.py,sha256=VpCmooEKghBT1jPJe9mX7gKggGqY6qt-bpO7hwY4sPE,18122
|
|
121
121
|
sunholo/streaming/__init__.py,sha256=MpbydI2UYo_adttPQFkxNM33b-QRyNEbrKJx0C2AGPc,241
|
|
122
|
-
sunholo/streaming/content_buffer.py,sha256=
|
|
122
|
+
sunholo/streaming/content_buffer.py,sha256=pSAoajCIDm8KM4TWkR-XptI925m_hSPQuEy0ea9Uq-c,12863
|
|
123
123
|
sunholo/streaming/langserve.py,sha256=hi7q8WY8DPKrALl9m_dOMxWOdE-iEuk7YW05SVDFIX8,6514
|
|
124
124
|
sunholo/streaming/stream_lookup.py,sha256=hYg1DbdSE_QNJ8ZB-ynXJlWgvFjrGvwoUsGJu_E0pRQ,360
|
|
125
125
|
sunholo/streaming/streaming.py,sha256=gSxLuwK-5-t5D1AjcHf838BY-L4jvdkdn_xePl-DK3o,16635
|
|
126
126
|
sunholo/summarise/__init__.py,sha256=MZk3dblUMODcPb1crq4v-Z508NrFIpkSWNf9FIO8BcU,38
|
|
127
|
-
sunholo/summarise/summarise.py,sha256=
|
|
127
|
+
sunholo/summarise/summarise.py,sha256=XYOdBrTYRSinEOvbwfGKayk-5ELdQFucNuzZ7XSmXeQ,4028
|
|
128
128
|
sunholo/terraform/__init__.py,sha256=yixxEltc3n9UpZaVi05GlgS-YRq_DVGjUc37I9ajeP4,76
|
|
129
129
|
sunholo/terraform/tfvars_editor.py,sha256=-TBBWbALYb5HLFYwD2s70Kp27ys6fzIyreBFOT5kqqY,13142
|
|
130
130
|
sunholo/tools/__init__.py,sha256=5NuYpwwTX81qGUWvgwfItoSLXteNnp7KjgD7IPZUFjI,53
|
|
@@ -150,9 +150,9 @@ sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
|
150
150
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
|
151
151
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
|
152
152
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
|
153
|
-
sunholo-0.
|
|
154
|
-
sunholo-0.
|
|
155
|
-
sunholo-0.
|
|
156
|
-
sunholo-0.
|
|
157
|
-
sunholo-0.
|
|
158
|
-
sunholo-0.
|
|
153
|
+
sunholo-0.114.2.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
154
|
+
sunholo-0.114.2.dist-info/METADATA,sha256=GEofig2vXtd-UPdm1fph5sd3JEGOART5NHqLkwDRRSA,9056
|
|
155
|
+
sunholo-0.114.2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
156
|
+
sunholo-0.114.2.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
157
|
+
sunholo-0.114.2.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
158
|
+
sunholo-0.114.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|