sunholo 0.59.2__tar.gz → 0.59.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sunholo-0.59.2 → sunholo-0.59.4}/PKG-INFO +2 -2
- {sunholo-0.59.2 → sunholo-0.59.4}/setup.py +1 -1
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/dispatch_to_qa.py +2 -2
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/route.py +4 -4
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/special_commands.py +2 -3
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/auth/run.py +1 -1
- sunholo-0.59.4/sunholo/bots/github_webhook.py +264 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/doc_handling.py +2 -2
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/splitter.py +1 -1
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/components/retriever.py +2 -2
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/alloydb.py +1 -1
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/database.py +3 -3
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/llamaindex/import_files.py +4 -4
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/streaming/streaming.py +4 -4
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/config.py +2 -6
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/config_schema.py +3 -2
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/PKG-INFO +2 -2
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/SOURCES.txt +1 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/LICENSE.txt +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/MANIFEST.in +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/README.md +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/setup.cfg +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/chat_history.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/fastapi/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/fastapi/base.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/fastapi/qna_routes.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/flask/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/flask/base.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/flask/qna_routes.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/langserve.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/pubsub.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/test_chat_history.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/archive/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/archive/archive.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/auth/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/bots/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/bots/discord.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/bots/webapp.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/data_to_embed_pubsub.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/images.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/loaders.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/message_data.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/pdfs.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/publish.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/cli/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/cli/cli.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/cli/cli_init.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/cli/configs.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/cli/deploy.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/components/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/components/llm.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/components/prompt.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/components/vectorstore.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/lancedb.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/sql/sb/create_function.sql +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/sql/sb/create_function_time.sql +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/sql/sb/create_table.sql +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/sql/sb/return_sources.sql +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/sql/sb/setup.sql +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/static_dbs.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/uuid.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/embedder/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/embedder/embed_chunk.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/gcs/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/gcs/add_file.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/gcs/download_url.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/gcs/metadata.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/langfuse/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/langfuse/callback.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/langfuse/prompts.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/llamaindex/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/llamaindex/generate.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/logging.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/lookup/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/lookup/model_lookup.yaml +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/patches/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/patches/langchain/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/patches/langchain/lancedb.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/patches/langchain/vertexai.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/pubsub/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/pubsub/process_pubsub.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/pubsub/pubsub_manager.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/qna/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/qna/parsers.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/qna/retry.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/streaming/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/streaming/content_buffer.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/streaming/langserve.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/summarise/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/summarise/summarise.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/big_context.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/gcp.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/parsers.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/vertex/__init__.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/vertex/init_vertex.py +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/dependency_links.txt +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/entry_points.txt +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/requires.txt +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/top_level.txt +0 -0
- {sunholo-0.59.2 → sunholo-0.59.4}/test/test_dispatch_to_qa.py +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.59.
|
|
3
|
+
Version: 0.59.4
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.59.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.59.4.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -49,8 +49,8 @@ def prep_request_payload(user_input, chat_history, vector_name, stream, **kwargs
|
|
|
49
49
|
# Add chat_history/vector_name to kwargs so langserve can use them too
|
|
50
50
|
kwargs['chat_history'] = chat_history
|
|
51
51
|
|
|
52
|
-
agent = load_config_key("agent", vector_name=vector_name,
|
|
53
|
-
agent_type = load_config_key("agent_type", vector_name=vector_name,
|
|
52
|
+
agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
|
|
53
|
+
agent_type = load_config_key("agent_type", vector_name=vector_name, kind="vacConfig")
|
|
54
54
|
|
|
55
55
|
# {'stream': '', 'invoke': ''}
|
|
56
56
|
endpoints = route_endpoint(vector_name)
|
|
@@ -16,12 +16,12 @@ from ..utils import load_config_key, load_config
|
|
|
16
16
|
|
|
17
17
|
def route_qna(vector_name):
|
|
18
18
|
|
|
19
|
-
agent_url = load_config_key('agent_url', vector_name=vector_name,
|
|
19
|
+
agent_url = load_config_key('agent_url', vector_name=vector_name, kind="vacConfig")
|
|
20
20
|
if agent_url:
|
|
21
21
|
log.info('agent_url found in llm_config.yaml')
|
|
22
22
|
return agent_url
|
|
23
23
|
|
|
24
|
-
agent = load_config_key('agent', vector_name,
|
|
24
|
+
agent = load_config_key('agent', vector_name, kind="vacConfig")
|
|
25
25
|
log.info(f'agent_type: {agent}')
|
|
26
26
|
|
|
27
27
|
agent_route, _ = load_config('config/cloud_run_urls.json')
|
|
@@ -37,9 +37,9 @@ def route_qna(vector_name):
|
|
|
37
37
|
|
|
38
38
|
def route_endpoint(vector_name):
|
|
39
39
|
|
|
40
|
-
agent_type = load_config_key('agent_type', vector_name,
|
|
40
|
+
agent_type = load_config_key('agent_type', vector_name, kind="vacConfig")
|
|
41
41
|
if not agent_type:
|
|
42
|
-
agent_type = load_config_key('agent', vector_name,
|
|
42
|
+
agent_type = load_config_key('agent', vector_name, kind="vacConfig")
|
|
43
43
|
|
|
44
44
|
stem = route_qna(vector_name)
|
|
45
45
|
|
|
@@ -41,14 +41,13 @@ def handle_special_commands(user_input,
|
|
|
41
41
|
vector_name,
|
|
42
42
|
chat_history,
|
|
43
43
|
bucket=None,
|
|
44
|
-
cmds=None
|
|
45
|
-
config_file="config/llm_config.yaml"):
|
|
44
|
+
cmds=None):
|
|
46
45
|
now = datetime.datetime.now()
|
|
47
46
|
hourmin = now.strftime("%H%M%S")
|
|
48
47
|
the_datetime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
49
48
|
|
|
50
49
|
if not cmds:
|
|
51
|
-
cmds = load_config_key("user_special_cmds", vector_name=vector_name,
|
|
50
|
+
cmds = load_config_key("user_special_cmds", vector_name=vector_name, kind="vacConfig")
|
|
52
51
|
if not cmds:
|
|
53
52
|
return None
|
|
54
53
|
|
|
@@ -15,7 +15,7 @@ def get_run_url(vector_name=None):
|
|
|
15
15
|
cloud_urls = route_qna(vector_name)
|
|
16
16
|
|
|
17
17
|
cloud_urls, _ = load_config('config/cloud_run_urls.json')
|
|
18
|
-
agent = load_config_key("agent", vector_name=vector_name,
|
|
18
|
+
agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
|
|
19
19
|
|
|
20
20
|
try:
|
|
21
21
|
log.info(f'Looking up URL for {agent}')
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
# from https://github.com/ray-project/docu-mentor
|
|
2
|
+
import base64
|
|
3
|
+
import httpx
|
|
4
|
+
from dotenv import load_dotenv
|
|
5
|
+
import jwt
|
|
6
|
+
import os
|
|
7
|
+
import time
|
|
8
|
+
|
|
9
|
+
load_dotenv()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
APP_ID = os.environ.get("APP_ID")
|
|
14
|
+
PRIVATE_KEY = os.environ.get("PRIVATE_KEY", "")
|
|
15
|
+
|
|
16
|
+
# with open('private-key.pem', 'r') as f:
|
|
17
|
+
# PRIVATE_KEY = f.read()
|
|
18
|
+
|
|
19
|
+
def generate_jwt():
|
|
20
|
+
payload = {
|
|
21
|
+
"iat": int(time.time()),
|
|
22
|
+
"exp": int(time.time()) + (10 * 60),
|
|
23
|
+
"iss": APP_ID,
|
|
24
|
+
}
|
|
25
|
+
if PRIVATE_KEY:
|
|
26
|
+
jwt_token = jwt.encode(payload, PRIVATE_KEY, algorithm="RS256")
|
|
27
|
+
return jwt_token
|
|
28
|
+
raise ValueError("PRIVATE_KEY not found.")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
async def get_installation_access_token(jwt, installation_id):
|
|
32
|
+
url = f"https://api.github.com/app/installations/{installation_id}/access_tokens"
|
|
33
|
+
headers = {
|
|
34
|
+
"Authorization": f"Bearer {jwt}",
|
|
35
|
+
"Accept": "application/vnd.github.v3+json",
|
|
36
|
+
}
|
|
37
|
+
async with httpx.AsyncClient() as client:
|
|
38
|
+
response = await client.post(url, headers=headers)
|
|
39
|
+
return response.json()["token"]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_diff_url(pr):
|
|
43
|
+
"""GitHub 302s to this URL."""
|
|
44
|
+
original_url = pr.get("url")
|
|
45
|
+
parts = original_url.split("/")
|
|
46
|
+
owner, repo, pr_number = parts[-4], parts[-3], parts[-1]
|
|
47
|
+
return f"https://patch-diff.githubusercontent.com/raw/{owner}/{repo}/pull/{pr_number}.diff"
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def get_branch_files(pr, branch, headers):
|
|
51
|
+
original_url = pr.get("url")
|
|
52
|
+
parts = original_url.split("/")
|
|
53
|
+
owner, repo = parts[-4], parts[-3]
|
|
54
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
|
|
55
|
+
async with httpx.AsyncClient() as client:
|
|
56
|
+
response = await client.get(url, headers=headers)
|
|
57
|
+
tree = response.json().get('tree', [])
|
|
58
|
+
files = {}
|
|
59
|
+
for item in tree:
|
|
60
|
+
if item['type'] == 'blob':
|
|
61
|
+
file_url = item['url']
|
|
62
|
+
print(file_url)
|
|
63
|
+
file_response = await client.get(file_url, headers=headers)
|
|
64
|
+
content = file_response.json().get('content', '')
|
|
65
|
+
# Decode the base64 content
|
|
66
|
+
decoded_content = base64.b64decode(content).decode('utf-8')
|
|
67
|
+
files[item['path']] = decoded_content
|
|
68
|
+
return files
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
async def get_pr_head_branch(pr, headers):
|
|
72
|
+
original_url = pr.get("url")
|
|
73
|
+
parts = original_url.split("/")
|
|
74
|
+
owner, repo, pr_number = parts[-4], parts[-3], parts[-1]
|
|
75
|
+
url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}"
|
|
76
|
+
|
|
77
|
+
async with httpx.AsyncClient() as client:
|
|
78
|
+
response = await client.get(url, headers=headers)
|
|
79
|
+
|
|
80
|
+
# Check if the response is successful
|
|
81
|
+
if response.status_code != 200:
|
|
82
|
+
print(f"Error: Received status code {response.status_code}")
|
|
83
|
+
print("Response body:", response.text)
|
|
84
|
+
return ''
|
|
85
|
+
|
|
86
|
+
# Safely get the 'ref'
|
|
87
|
+
data = response.json()
|
|
88
|
+
head_data = data.get('head', {})
|
|
89
|
+
ref = head_data.get('ref', '')
|
|
90
|
+
return ref
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def files_to_diff_dict(diff):
|
|
94
|
+
files_with_diff = {}
|
|
95
|
+
current_file = None
|
|
96
|
+
for line in diff.split("\n"):
|
|
97
|
+
if line.startswith("diff --git"):
|
|
98
|
+
current_file = line.split(" ")[2][2:]
|
|
99
|
+
files_with_diff[current_file] = {"text": []}
|
|
100
|
+
elif line.startswith("+") and not line.startswith("+++"):
|
|
101
|
+
files_with_diff[current_file]["text"].append(line[1:])
|
|
102
|
+
return files_with_diff
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def parse_diff_to_line_numbers(diff):
|
|
106
|
+
files_with_line_numbers = {}
|
|
107
|
+
current_file = None
|
|
108
|
+
line_number = 0
|
|
109
|
+
for line in diff.split("\n"):
|
|
110
|
+
if line.startswith("diff --git"):
|
|
111
|
+
current_file = line.split(" ")[2][2:]
|
|
112
|
+
files_with_line_numbers[current_file] = []
|
|
113
|
+
line_number = 0
|
|
114
|
+
elif line.startswith("@@"):
|
|
115
|
+
line_number = int(line.split(" ")[2].split(",")[0][1:]) - 1
|
|
116
|
+
elif line.startswith("+") and not line.startswith("+++"):
|
|
117
|
+
files_with_line_numbers[current_file].append(line_number)
|
|
118
|
+
line_number += 1
|
|
119
|
+
elif not line.startswith("-"):
|
|
120
|
+
line_number += 1
|
|
121
|
+
return files_with_line_numbers
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def get_context_from_files(files, files_with_line_numbers, context_lines=2):
|
|
125
|
+
context_data = {}
|
|
126
|
+
for file, lines in files_with_line_numbers.items():
|
|
127
|
+
file_content = files[file].split("\n")
|
|
128
|
+
context_data[file] = []
|
|
129
|
+
for line in lines:
|
|
130
|
+
start = max(line - context_lines, 0)
|
|
131
|
+
end = min(line + context_lines + 1, len(file_content))
|
|
132
|
+
context_data[file].append('\n'.join(file_content[start:end]))
|
|
133
|
+
return context_data
|
|
134
|
+
|
|
135
|
+
app = FastAPI()
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
async def handle_webhook(request: Request):
|
|
139
|
+
data = await request.json()
|
|
140
|
+
|
|
141
|
+
installation = data.get("installation")
|
|
142
|
+
if installation and installation.get("id"):
|
|
143
|
+
installation_id = installation.get("id")
|
|
144
|
+
logger.info(f"Installation ID: {installation_id}")
|
|
145
|
+
|
|
146
|
+
JWT_TOKEN = generate_jwt()
|
|
147
|
+
|
|
148
|
+
installation_access_token = await get_installation_access_token(
|
|
149
|
+
JWT_TOKEN, installation_id
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
headers = {
|
|
153
|
+
"Authorization": f"token {installation_access_token}",
|
|
154
|
+
"User-Agent": "docu-mentor-bot",
|
|
155
|
+
"Accept": "application/vnd.github.VERSION.diff",
|
|
156
|
+
}
|
|
157
|
+
else:
|
|
158
|
+
raise ValueError("No app installation found.")
|
|
159
|
+
|
|
160
|
+
# If PR exists and is opened
|
|
161
|
+
if "pull_request" in data.keys() and (
|
|
162
|
+
data["action"] in ["opened", "reopened"]
|
|
163
|
+
): # use "synchronize" for tracking new commits
|
|
164
|
+
pr = data.get("pull_request")
|
|
165
|
+
|
|
166
|
+
# Greet the user and show instructions.
|
|
167
|
+
async with httpx.AsyncClient() as client:
|
|
168
|
+
await client.post(
|
|
169
|
+
f"{pr['issue_url']}/comments",
|
|
170
|
+
json={"body": GREETING},
|
|
171
|
+
headers=headers,
|
|
172
|
+
)
|
|
173
|
+
return JSONResponse(content={}, status_code=200)
|
|
174
|
+
|
|
175
|
+
# Check if the event is a new or modified issue comment
|
|
176
|
+
if "issue" in data.keys() and data.get("action") in ["created", "edited"]:
|
|
177
|
+
issue = data["issue"]
|
|
178
|
+
|
|
179
|
+
# Check if the issue is a pull request
|
|
180
|
+
if "/pull/" in issue["html_url"]:
|
|
181
|
+
pr = issue.get("pull_request")
|
|
182
|
+
|
|
183
|
+
# Get the comment body
|
|
184
|
+
comment = data.get("comment")
|
|
185
|
+
comment_body = comment.get("body")
|
|
186
|
+
# Remove all whitespace characters except for regular spaces
|
|
187
|
+
comment_body = comment_body.translate(
|
|
188
|
+
str.maketrans("", "", string.whitespace.replace(" ", ""))
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Skip if the bot talks about itself
|
|
192
|
+
author_handle = comment["user"]["login"]
|
|
193
|
+
|
|
194
|
+
# Check if the bot is mentioned in the comment
|
|
195
|
+
if (
|
|
196
|
+
author_handle != "docu-mentor[bot]"
|
|
197
|
+
and "@docu-mentor run" in comment_body
|
|
198
|
+
):
|
|
199
|
+
async with httpx.AsyncClient() as client:
|
|
200
|
+
# Fetch diff from GitHub
|
|
201
|
+
files_to_keep = comment_body.replace(
|
|
202
|
+
"@docu-mentor run", ""
|
|
203
|
+
).split(" ")
|
|
204
|
+
files_to_keep = [item for item in files_to_keep if item]
|
|
205
|
+
|
|
206
|
+
logger.info(files_to_keep)
|
|
207
|
+
|
|
208
|
+
url = get_diff_url(pr)
|
|
209
|
+
diff_response = await client.get(url, headers=headers)
|
|
210
|
+
diff = diff_response.text
|
|
211
|
+
|
|
212
|
+
files_with_lines = parse_diff_to_line_numbers(diff)
|
|
213
|
+
|
|
214
|
+
# Get head branch of the PR
|
|
215
|
+
headers["Accept"] = "application/vnd.github.full+json"
|
|
216
|
+
head_branch = await get_pr_head_branch(pr, headers)
|
|
217
|
+
|
|
218
|
+
# Get files from head branch
|
|
219
|
+
head_branch_files = await get_branch_files(pr, head_branch, headers)
|
|
220
|
+
print("HEAD FILES", head_branch_files)
|
|
221
|
+
|
|
222
|
+
# Enrich diff data with context from the head branch.
|
|
223
|
+
context_files = get_context_from_files(head_branch_files, files_with_lines)
|
|
224
|
+
|
|
225
|
+
# Filter the dictionary
|
|
226
|
+
if files_to_keep:
|
|
227
|
+
context_files = {
|
|
228
|
+
k: context_files[k]
|
|
229
|
+
for k in context_files
|
|
230
|
+
if any(sub in k for sub in files_to_keep)
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
# Get suggestions from Docu Mentor
|
|
234
|
+
content, model, prompt_tokens, completion_tokens = \
|
|
235
|
+
ray_mentor(context_files) if ray.is_initialized() else mentor(context_files)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
# Let's comment on the PR
|
|
239
|
+
await client.post(
|
|
240
|
+
f"{comment['issue_url']}/comments",
|
|
241
|
+
json={
|
|
242
|
+
"body": f":rocket: Docu Mentor finished "
|
|
243
|
+
+ "analysing your PR! :rocket:\n\n"
|
|
244
|
+
+ "Take a look at your results:\n"
|
|
245
|
+
+ f"{content}\n\n"
|
|
246
|
+
+ "This bot is powered by "
|
|
247
|
+
+ "[Sunholo Multivac](https://www.sunholo.com/).\n"
|
|
248
|
+
+ f"It used the model {model}, used {prompt_tokens} prompt tokens, "
|
|
249
|
+
+ f"and {completion_tokens} completion tokens in total."
|
|
250
|
+
},
|
|
251
|
+
headers=headers,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
@serve.deployment(route_prefix="/")
|
|
255
|
+
@serve.ingress(app)
|
|
256
|
+
class ServeBot:
|
|
257
|
+
@app.get("/")
|
|
258
|
+
async def root(self):
|
|
259
|
+
return {"message": "Docu Mentor reporting for duty!"}
|
|
260
|
+
|
|
261
|
+
@app.post("/webhook/")
|
|
262
|
+
async def handle_webhook_route(self, request: Request):
|
|
263
|
+
return await handle_webhook(request)
|
|
264
|
+
|
|
@@ -21,7 +21,7 @@ def send_doc_to_docstore(docs, vector_name):
|
|
|
21
21
|
|
|
22
22
|
# docs all come from the same file but got split into a list of document objects
|
|
23
23
|
|
|
24
|
-
docstore_config = load_config_key("docstore", vector_name=vector_name,
|
|
24
|
+
docstore_config = load_config_key("docstore", vector_name=vector_name, kind="vacConfig")
|
|
25
25
|
if docstore_config is None:
|
|
26
26
|
log.info(f"No docstore config found for {vector_name} ")
|
|
27
27
|
|
|
@@ -110,7 +110,7 @@ def summarise_docs(docs, vector_name, summary_threshold_default=10000, model_lim
|
|
|
110
110
|
if not docs:
|
|
111
111
|
return None
|
|
112
112
|
|
|
113
|
-
chunker_config = load_config_key("chunker", vector_name=vector_name,
|
|
113
|
+
chunker_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
|
|
114
114
|
summarise_chunking_config = chunker_config.get("summarise") if chunker_config else None
|
|
115
115
|
|
|
116
116
|
if not summarise_chunking_config:
|
|
@@ -114,7 +114,7 @@ def choose_splitter(extension: str, chunk_size: int=1024, chunk_overlap:int=200,
|
|
|
114
114
|
if vector_name:
|
|
115
115
|
# check if there is a chunking configuration
|
|
116
116
|
from ..utils import load_config_key
|
|
117
|
-
chunk_config = load_config_key("chunker", vector_name=vector_name,
|
|
117
|
+
chunk_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
|
|
118
118
|
if chunk_config:
|
|
119
119
|
if chunk_config.get("type") == "semantic":
|
|
120
120
|
embedding_str = chunk_config.get("llm")
|
|
@@ -27,7 +27,7 @@ from langchain.retrievers import ContextualCompressionRetriever
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def load_memories(vector_name):
|
|
30
|
-
memories = load_config_key("memory", vector_name,
|
|
30
|
+
memories = load_config_key("memory", vector_name, kind="vacConfig")
|
|
31
31
|
log.info(f"Found memory settings for {vector_name}: {memories}")
|
|
32
32
|
if len(memories) == 0:
|
|
33
33
|
log.info(f"No memory settings found for {vector_name}")
|
|
@@ -70,7 +70,7 @@ def pick_retriever(vector_name, embeddings=None):
|
|
|
70
70
|
log.info(f"No retrievers were created for {memories}")
|
|
71
71
|
return None
|
|
72
72
|
|
|
73
|
-
k_override = load_config_key("memory_k", vector_name,
|
|
73
|
+
k_override = load_config_key("memory_k", vector_name, kind="vacConfig")
|
|
74
74
|
if not k_override:
|
|
75
75
|
k_override = 3
|
|
76
76
|
|
|
@@ -48,16 +48,16 @@ def lookup_connection_env(vs_str):
|
|
|
48
48
|
raise ValueError("Could not find vectorstore for {vs_str}")
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
def get_vector_size(vector_name: str
|
|
51
|
+
def get_vector_size(vector_name: str):
|
|
52
52
|
|
|
53
53
|
llm_str = None
|
|
54
|
-
embed_dict = load_config_key("embedder", vector_name,
|
|
54
|
+
embed_dict = load_config_key("embedder", vector_name, kind="vacConfig")
|
|
55
55
|
|
|
56
56
|
if embed_dict:
|
|
57
57
|
llm_str = embed_dict.get('llm')
|
|
58
58
|
|
|
59
59
|
if llm_str is None:
|
|
60
|
-
llm_str = load_config_key("llm", vector_name,
|
|
60
|
+
llm_str = load_config_key("llm", vector_name, kind="vacConfig")
|
|
61
61
|
|
|
62
62
|
if not isinstance(llm_str, str):
|
|
63
63
|
raise ValueError(f"get_vector_size() did not return a value string for {vector_name} - got {llm_str} instead")
|
|
@@ -102,7 +102,7 @@ def do_llamaindex(message_data, metadata, vector_name):
|
|
|
102
102
|
if not rag:
|
|
103
103
|
raise ValueError("Need to install vertexai module via `pip install sunholo[gcp]`")
|
|
104
104
|
|
|
105
|
-
gcp_config = load_config_key("gcp_config", vector_name=vector_name,
|
|
105
|
+
gcp_config = load_config_key("gcp_config", vector_name=vector_name, kind="vacConfig")
|
|
106
106
|
if not gcp_config:
|
|
107
107
|
raise ValueError(f"Need config.{vector_name}.gcp_config to configure llamaindex on VertexAI")
|
|
108
108
|
|
|
@@ -120,7 +120,7 @@ def do_llamaindex(message_data, metadata, vector_name):
|
|
|
120
120
|
log.info(f"Found llamaindex corpus: {corpus}")
|
|
121
121
|
|
|
122
122
|
# native support for cloud storage and drive links
|
|
123
|
-
chunker_config = load_config_key("chunker", vector_name=vector_name,
|
|
123
|
+
chunker_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
|
|
124
124
|
|
|
125
125
|
if message_data.startswith("gs://") or message_data.startswith("https://drive.google.com"):
|
|
126
126
|
log.info(f"rag.import_files for {message_data}")
|
|
@@ -160,7 +160,7 @@ def do_llamaindex(message_data, metadata, vector_name):
|
|
|
160
160
|
#)
|
|
161
161
|
|
|
162
162
|
def check_llamaindex_in_memory(vector_name):
|
|
163
|
-
memories = load_config_key("memory", vector_name=vector_name,
|
|
163
|
+
memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
|
|
164
164
|
for memory in memories: # Iterate over the list
|
|
165
165
|
for key, value in memory.items(): # Now iterate over the dictionary
|
|
166
166
|
log.info(f"Found memory {key}")
|
|
@@ -175,7 +175,7 @@ def check_llamaindex_in_memory(vector_name):
|
|
|
175
175
|
|
|
176
176
|
def llamaindex_chunker_check(message_data, metadata, vector_name):
|
|
177
177
|
# llamaindex handles its own chunking/embedding
|
|
178
|
-
memories = load_config_key("memory", vector_name=vector_name,
|
|
178
|
+
memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
|
|
179
179
|
total_memories = len(memories)
|
|
180
180
|
llama = None
|
|
181
181
|
if check_llamaindex_in_memory(vector_name):
|
|
@@ -248,8 +248,8 @@ def generate_proxy_stream(stream_to_f, user_input, vector_name, chat_history, ge
|
|
|
248
248
|
):
|
|
249
249
|
print(output) # Process each streaming output chunk
|
|
250
250
|
"""
|
|
251
|
-
agent = load_config_key("agent", vector_name=vector_name,
|
|
252
|
-
agent_type = load_config_key("agent_type", vector_name=vector_name,
|
|
251
|
+
agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
|
|
252
|
+
agent_type = load_config_key("agent_type", vector_name=vector_name, kind="vacConfig")
|
|
253
253
|
|
|
254
254
|
def generate():
|
|
255
255
|
json_buffer = ""
|
|
@@ -306,8 +306,8 @@ async def generate_proxy_stream_async(stream_to_f, user_input, vector_name, chat
|
|
|
306
306
|
):
|
|
307
307
|
print(output) # Process each streaming output chunk
|
|
308
308
|
"""
|
|
309
|
-
agent = load_config_key("agent", vector_name=vector_name,
|
|
310
|
-
agent_type = load_config_key("agent_type", vector_name=vector_name,
|
|
309
|
+
agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
|
|
310
|
+
agent_type = load_config_key("agent_type", vector_name=vector_name, kind="vacConfig")
|
|
311
311
|
|
|
312
312
|
async def generate():
|
|
313
313
|
json_buffer = ""
|
|
@@ -167,14 +167,13 @@ def load_config(filename: str=None) -> tuple[dict, str]:
|
|
|
167
167
|
|
|
168
168
|
return config, filename
|
|
169
169
|
|
|
170
|
-
def load_config_key(key: str, vector_name: str,
|
|
170
|
+
def load_config_key(key: str, vector_name: str, kind: str=None):
|
|
171
171
|
"""
|
|
172
172
|
Load a specific key from a configuration file.
|
|
173
173
|
|
|
174
174
|
Args:
|
|
175
175
|
key (str): The key to fetch from the configuration.
|
|
176
176
|
vector_name (str): The name of the vector in the configuration file.
|
|
177
|
-
filename (str, optional): The configuration file name. Defaults to the `_CONFIG_FILE` environment variable. Deprecated - use 'kind' instead
|
|
178
177
|
kind: (str, optional): Specify the type of configuration to retrieve e.g. 'vacConfig' which will pick from files within `_CONFIG_FOLDER`
|
|
179
178
|
|
|
180
179
|
Returns:
|
|
@@ -182,7 +181,7 @@ def load_config_key(key: str, vector_name: str, filename: str=None, kind: str=No
|
|
|
182
181
|
|
|
183
182
|
Example:
|
|
184
183
|
```python
|
|
185
|
-
api_url = load_config_key('apiUrl', 'myVector',
|
|
184
|
+
api_url = load_config_key('apiUrl', 'myVector', kind="vacConfig")
|
|
186
185
|
print(f'API URL: {api_url}')
|
|
187
186
|
```
|
|
188
187
|
"""
|
|
@@ -197,9 +196,6 @@ def load_config_key(key: str, vector_name: str, filename: str=None, kind: str=No
|
|
|
197
196
|
if kind:
|
|
198
197
|
log.info(f"Got kind: {kind} - applying to configs")
|
|
199
198
|
|
|
200
|
-
if filename:
|
|
201
|
-
log.warning(f"Got filename argument: {filename} for config - deprecated - use `kind='vacConfig'` instead")
|
|
202
|
-
|
|
203
199
|
if not configs_by_kind:
|
|
204
200
|
log.warning("Did not load configs via folder")
|
|
205
201
|
|
|
@@ -52,7 +52,8 @@ VAC_SUBCONFIG_SCHEMA = {
|
|
|
52
52
|
"cluster": {"type": "string"},
|
|
53
53
|
"instance": {"type": "string"},
|
|
54
54
|
"database": {"type": "string"}
|
|
55
|
-
}
|
|
55
|
+
},
|
|
56
|
+
"required": ["project_id", "region", "cluster", "instance", "database"]
|
|
56
57
|
},
|
|
57
58
|
"secrets": {
|
|
58
59
|
"type": "array",
|
|
@@ -87,7 +88,7 @@ VAC_CONFIG_SCHEMA = {
|
|
|
87
88
|
}
|
|
88
89
|
}
|
|
89
90
|
},
|
|
90
|
-
"required": ["kind", "apiVersion", "
|
|
91
|
+
"required": ["kind", "apiVersion", "vac"]
|
|
91
92
|
}
|
|
92
93
|
|
|
93
94
|
PROMPT_CONFIG_SCHEMA = {
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.59.
|
|
3
|
+
Version: 0.59.4
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.59.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.59.4.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|