sunholo 0.59.2__tar.gz → 0.59.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {sunholo-0.59.2 → sunholo-0.59.4}/PKG-INFO +2 -2
  2. {sunholo-0.59.2 → sunholo-0.59.4}/setup.py +1 -1
  3. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/dispatch_to_qa.py +2 -2
  4. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/route.py +4 -4
  5. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/special_commands.py +2 -3
  6. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/auth/run.py +1 -1
  7. sunholo-0.59.4/sunholo/bots/github_webhook.py +264 -0
  8. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/doc_handling.py +2 -2
  9. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/splitter.py +1 -1
  10. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/components/retriever.py +2 -2
  11. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/alloydb.py +1 -1
  12. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/database.py +3 -3
  13. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/llamaindex/import_files.py +4 -4
  14. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/streaming/streaming.py +4 -4
  15. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/config.py +2 -6
  16. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/config_schema.py +3 -2
  17. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/PKG-INFO +2 -2
  18. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/SOURCES.txt +1 -0
  19. {sunholo-0.59.2 → sunholo-0.59.4}/LICENSE.txt +0 -0
  20. {sunholo-0.59.2 → sunholo-0.59.4}/MANIFEST.in +0 -0
  21. {sunholo-0.59.2 → sunholo-0.59.4}/README.md +0 -0
  22. {sunholo-0.59.2 → sunholo-0.59.4}/setup.cfg +0 -0
  23. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/__init__.py +0 -0
  24. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/__init__.py +0 -0
  25. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/chat_history.py +0 -0
  26. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/fastapi/__init__.py +0 -0
  27. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/fastapi/base.py +0 -0
  28. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/fastapi/qna_routes.py +0 -0
  29. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/flask/__init__.py +0 -0
  30. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/flask/base.py +0 -0
  31. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/flask/qna_routes.py +0 -0
  32. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/langserve.py +0 -0
  33. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/pubsub.py +0 -0
  34. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/agents/test_chat_history.py +0 -0
  35. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/archive/__init__.py +0 -0
  36. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/archive/archive.py +0 -0
  37. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/auth/__init__.py +0 -0
  38. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/bots/__init__.py +0 -0
  39. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/bots/discord.py +0 -0
  40. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/bots/webapp.py +0 -0
  41. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/__init__.py +0 -0
  42. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/data_to_embed_pubsub.py +0 -0
  43. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/images.py +0 -0
  44. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/loaders.py +0 -0
  45. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/message_data.py +0 -0
  46. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/pdfs.py +0 -0
  47. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/chunker/publish.py +0 -0
  48. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/cli/__init__.py +0 -0
  49. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/cli/cli.py +0 -0
  50. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/cli/cli_init.py +0 -0
  51. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/cli/configs.py +0 -0
  52. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/cli/deploy.py +0 -0
  53. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/components/__init__.py +0 -0
  54. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/components/llm.py +0 -0
  55. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/components/prompt.py +0 -0
  56. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/components/vectorstore.py +0 -0
  57. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/__init__.py +0 -0
  58. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/lancedb.py +0 -0
  59. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/sql/sb/create_function.sql +0 -0
  60. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/sql/sb/create_function_time.sql +0 -0
  61. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/sql/sb/create_table.sql +0 -0
  62. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
  63. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/sql/sb/return_sources.sql +0 -0
  64. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/sql/sb/setup.sql +0 -0
  65. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/static_dbs.py +0 -0
  66. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/database/uuid.py +0 -0
  67. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/embedder/__init__.py +0 -0
  68. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/embedder/embed_chunk.py +0 -0
  69. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/gcs/__init__.py +0 -0
  70. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/gcs/add_file.py +0 -0
  71. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/gcs/download_url.py +0 -0
  72. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/gcs/metadata.py +0 -0
  73. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/langfuse/__init__.py +0 -0
  74. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/langfuse/callback.py +0 -0
  75. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/langfuse/prompts.py +0 -0
  76. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/llamaindex/__init__.py +0 -0
  77. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/llamaindex/generate.py +0 -0
  78. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/logging.py +0 -0
  79. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/lookup/__init__.py +0 -0
  80. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/lookup/model_lookup.yaml +0 -0
  81. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/patches/__init__.py +0 -0
  82. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/patches/langchain/__init__.py +0 -0
  83. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/patches/langchain/lancedb.py +0 -0
  84. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/patches/langchain/vertexai.py +0 -0
  85. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/pubsub/__init__.py +0 -0
  86. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/pubsub/process_pubsub.py +0 -0
  87. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/pubsub/pubsub_manager.py +0 -0
  88. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/qna/__init__.py +0 -0
  89. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/qna/parsers.py +0 -0
  90. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/qna/retry.py +0 -0
  91. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/streaming/__init__.py +0 -0
  92. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/streaming/content_buffer.py +0 -0
  93. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/streaming/langserve.py +0 -0
  94. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/summarise/__init__.py +0 -0
  95. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/summarise/summarise.py +0 -0
  96. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/__init__.py +0 -0
  97. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/big_context.py +0 -0
  98. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/gcp.py +0 -0
  99. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/utils/parsers.py +0 -0
  100. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/vertex/__init__.py +0 -0
  101. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo/vertex/init_vertex.py +0 -0
  102. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/dependency_links.txt +0 -0
  103. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/entry_points.txt +0 -0
  104. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/requires.txt +0 -0
  105. {sunholo-0.59.2 → sunholo-0.59.4}/sunholo.egg-info/top_level.txt +0 -0
  106. {sunholo-0.59.2 → sunholo-0.59.4}/test/test_dispatch_to_qa.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.59.2
3
+ Version: 0.59.4
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.59.2.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.59.4.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -1,7 +1,7 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
3
  # Define your base version
4
- version = '0.59.2'
4
+ version = '0.59.4'
5
5
 
6
6
  setup(
7
7
  name='sunholo',
@@ -49,8 +49,8 @@ def prep_request_payload(user_input, chat_history, vector_name, stream, **kwargs
49
49
  # Add chat_history/vector_name to kwargs so langserve can use them too
50
50
  kwargs['chat_history'] = chat_history
51
51
 
52
- agent = load_config_key("agent", vector_name=vector_name, filename="config/llm_config.yaml")
53
- agent_type = load_config_key("agent_type", vector_name=vector_name, filename="config/llm_config.yaml")
52
+ agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
53
+ agent_type = load_config_key("agent_type", vector_name=vector_name, kind="vacConfig")
54
54
 
55
55
  # {'stream': '', 'invoke': ''}
56
56
  endpoints = route_endpoint(vector_name)
@@ -16,12 +16,12 @@ from ..utils import load_config_key, load_config
16
16
 
17
17
  def route_qna(vector_name):
18
18
 
19
- agent_url = load_config_key('agent_url', vector_name=vector_name, filename='config/llm_config.yaml')
19
+ agent_url = load_config_key('agent_url', vector_name=vector_name, kind="vacConfig")
20
20
  if agent_url:
21
21
  log.info('agent_url found in llm_config.yaml')
22
22
  return agent_url
23
23
 
24
- agent = load_config_key('agent', vector_name, filename='config/llm_config.yaml')
24
+ agent = load_config_key('agent', vector_name, kind="vacConfig")
25
25
  log.info(f'agent_type: {agent}')
26
26
 
27
27
  agent_route, _ = load_config('config/cloud_run_urls.json')
@@ -37,9 +37,9 @@ def route_qna(vector_name):
37
37
 
38
38
  def route_endpoint(vector_name):
39
39
 
40
- agent_type = load_config_key('agent_type', vector_name, filename='config/llm_config.yaml')
40
+ agent_type = load_config_key('agent_type', vector_name, kind="vacConfig")
41
41
  if not agent_type:
42
- agent_type = load_config_key('agent', vector_name, filename='config/llm_config.yaml')
42
+ agent_type = load_config_key('agent', vector_name, kind="vacConfig")
43
43
 
44
44
  stem = route_qna(vector_name)
45
45
 
@@ -41,14 +41,13 @@ def handle_special_commands(user_input,
41
41
  vector_name,
42
42
  chat_history,
43
43
  bucket=None,
44
- cmds=None,
45
- config_file="config/llm_config.yaml"):
44
+ cmds=None):
46
45
  now = datetime.datetime.now()
47
46
  hourmin = now.strftime("%H%M%S")
48
47
  the_datetime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
49
48
 
50
49
  if not cmds:
51
- cmds = load_config_key("user_special_cmds", vector_name=vector_name, filename=config_file)
50
+ cmds = load_config_key("user_special_cmds", vector_name=vector_name, kind="vacConfig")
52
51
  if not cmds:
53
52
  return None
54
53
 
@@ -15,7 +15,7 @@ def get_run_url(vector_name=None):
15
15
  cloud_urls = route_qna(vector_name)
16
16
 
17
17
  cloud_urls, _ = load_config('config/cloud_run_urls.json')
18
- agent = load_config_key("agent", vector_name=vector_name, filename="config/llm_config.yaml")
18
+ agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
19
19
 
20
20
  try:
21
21
  log.info(f'Looking up URL for {agent}')
@@ -0,0 +1,264 @@
1
+ # from https://github.com/ray-project/docu-mentor
2
+ import base64
3
+ import httpx
4
+ from dotenv import load_dotenv
5
+ import jwt
6
+ import os
7
+ import time
8
+
9
+ load_dotenv()
10
+
11
+
12
+
13
+ APP_ID = os.environ.get("APP_ID")
14
+ PRIVATE_KEY = os.environ.get("PRIVATE_KEY", "")
15
+
16
+ # with open('private-key.pem', 'r') as f:
17
+ # PRIVATE_KEY = f.read()
18
+
19
+ def generate_jwt():
20
+ payload = {
21
+ "iat": int(time.time()),
22
+ "exp": int(time.time()) + (10 * 60),
23
+ "iss": APP_ID,
24
+ }
25
+ if PRIVATE_KEY:
26
+ jwt_token = jwt.encode(payload, PRIVATE_KEY, algorithm="RS256")
27
+ return jwt_token
28
+ raise ValueError("PRIVATE_KEY not found.")
29
+
30
+
31
+ async def get_installation_access_token(jwt, installation_id):
32
+ url = f"https://api.github.com/app/installations/{installation_id}/access_tokens"
33
+ headers = {
34
+ "Authorization": f"Bearer {jwt}",
35
+ "Accept": "application/vnd.github.v3+json",
36
+ }
37
+ async with httpx.AsyncClient() as client:
38
+ response = await client.post(url, headers=headers)
39
+ return response.json()["token"]
40
+
41
+
42
+ def get_diff_url(pr):
43
+ """GitHub 302s to this URL."""
44
+ original_url = pr.get("url")
45
+ parts = original_url.split("/")
46
+ owner, repo, pr_number = parts[-4], parts[-3], parts[-1]
47
+ return f"https://patch-diff.githubusercontent.com/raw/{owner}/{repo}/pull/{pr_number}.diff"
48
+
49
+
50
+ async def get_branch_files(pr, branch, headers):
51
+ original_url = pr.get("url")
52
+ parts = original_url.split("/")
53
+ owner, repo = parts[-4], parts[-3]
54
+ url = f"https://api.github.com/repos/{owner}/{repo}/git/trees/{branch}?recursive=1"
55
+ async with httpx.AsyncClient() as client:
56
+ response = await client.get(url, headers=headers)
57
+ tree = response.json().get('tree', [])
58
+ files = {}
59
+ for item in tree:
60
+ if item['type'] == 'blob':
61
+ file_url = item['url']
62
+ print(file_url)
63
+ file_response = await client.get(file_url, headers=headers)
64
+ content = file_response.json().get('content', '')
65
+ # Decode the base64 content
66
+ decoded_content = base64.b64decode(content).decode('utf-8')
67
+ files[item['path']] = decoded_content
68
+ return files
69
+
70
+
71
+ async def get_pr_head_branch(pr, headers):
72
+ original_url = pr.get("url")
73
+ parts = original_url.split("/")
74
+ owner, repo, pr_number = parts[-4], parts[-3], parts[-1]
75
+ url = f"https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}"
76
+
77
+ async with httpx.AsyncClient() as client:
78
+ response = await client.get(url, headers=headers)
79
+
80
+ # Check if the response is successful
81
+ if response.status_code != 200:
82
+ print(f"Error: Received status code {response.status_code}")
83
+ print("Response body:", response.text)
84
+ return ''
85
+
86
+ # Safely get the 'ref'
87
+ data = response.json()
88
+ head_data = data.get('head', {})
89
+ ref = head_data.get('ref', '')
90
+ return ref
91
+
92
+
93
+ def files_to_diff_dict(diff):
94
+ files_with_diff = {}
95
+ current_file = None
96
+ for line in diff.split("\n"):
97
+ if line.startswith("diff --git"):
98
+ current_file = line.split(" ")[2][2:]
99
+ files_with_diff[current_file] = {"text": []}
100
+ elif line.startswith("+") and not line.startswith("+++"):
101
+ files_with_diff[current_file]["text"].append(line[1:])
102
+ return files_with_diff
103
+
104
+
105
+ def parse_diff_to_line_numbers(diff):
106
+ files_with_line_numbers = {}
107
+ current_file = None
108
+ line_number = 0
109
+ for line in diff.split("\n"):
110
+ if line.startswith("diff --git"):
111
+ current_file = line.split(" ")[2][2:]
112
+ files_with_line_numbers[current_file] = []
113
+ line_number = 0
114
+ elif line.startswith("@@"):
115
+ line_number = int(line.split(" ")[2].split(",")[0][1:]) - 1
116
+ elif line.startswith("+") and not line.startswith("+++"):
117
+ files_with_line_numbers[current_file].append(line_number)
118
+ line_number += 1
119
+ elif not line.startswith("-"):
120
+ line_number += 1
121
+ return files_with_line_numbers
122
+
123
+
124
+ def get_context_from_files(files, files_with_line_numbers, context_lines=2):
125
+ context_data = {}
126
+ for file, lines in files_with_line_numbers.items():
127
+ file_content = files[file].split("\n")
128
+ context_data[file] = []
129
+ for line in lines:
130
+ start = max(line - context_lines, 0)
131
+ end = min(line + context_lines + 1, len(file_content))
132
+ context_data[file].append('\n'.join(file_content[start:end]))
133
+ return context_data
134
+
135
+ app = FastAPI()
136
+
137
+
138
+ async def handle_webhook(request: Request):
139
+ data = await request.json()
140
+
141
+ installation = data.get("installation")
142
+ if installation and installation.get("id"):
143
+ installation_id = installation.get("id")
144
+ logger.info(f"Installation ID: {installation_id}")
145
+
146
+ JWT_TOKEN = generate_jwt()
147
+
148
+ installation_access_token = await get_installation_access_token(
149
+ JWT_TOKEN, installation_id
150
+ )
151
+
152
+ headers = {
153
+ "Authorization": f"token {installation_access_token}",
154
+ "User-Agent": "docu-mentor-bot",
155
+ "Accept": "application/vnd.github.VERSION.diff",
156
+ }
157
+ else:
158
+ raise ValueError("No app installation found.")
159
+
160
+ # If PR exists and is opened
161
+ if "pull_request" in data.keys() and (
162
+ data["action"] in ["opened", "reopened"]
163
+ ): # use "synchronize" for tracking new commits
164
+ pr = data.get("pull_request")
165
+
166
+ # Greet the user and show instructions.
167
+ async with httpx.AsyncClient() as client:
168
+ await client.post(
169
+ f"{pr['issue_url']}/comments",
170
+ json={"body": GREETING},
171
+ headers=headers,
172
+ )
173
+ return JSONResponse(content={}, status_code=200)
174
+
175
+ # Check if the event is a new or modified issue comment
176
+ if "issue" in data.keys() and data.get("action") in ["created", "edited"]:
177
+ issue = data["issue"]
178
+
179
+ # Check if the issue is a pull request
180
+ if "/pull/" in issue["html_url"]:
181
+ pr = issue.get("pull_request")
182
+
183
+ # Get the comment body
184
+ comment = data.get("comment")
185
+ comment_body = comment.get("body")
186
+ # Remove all whitespace characters except for regular spaces
187
+ comment_body = comment_body.translate(
188
+ str.maketrans("", "", string.whitespace.replace(" ", ""))
189
+ )
190
+
191
+ # Skip if the bot talks about itself
192
+ author_handle = comment["user"]["login"]
193
+
194
+ # Check if the bot is mentioned in the comment
195
+ if (
196
+ author_handle != "docu-mentor[bot]"
197
+ and "@docu-mentor run" in comment_body
198
+ ):
199
+ async with httpx.AsyncClient() as client:
200
+ # Fetch diff from GitHub
201
+ files_to_keep = comment_body.replace(
202
+ "@docu-mentor run", ""
203
+ ).split(" ")
204
+ files_to_keep = [item for item in files_to_keep if item]
205
+
206
+ logger.info(files_to_keep)
207
+
208
+ url = get_diff_url(pr)
209
+ diff_response = await client.get(url, headers=headers)
210
+ diff = diff_response.text
211
+
212
+ files_with_lines = parse_diff_to_line_numbers(diff)
213
+
214
+ # Get head branch of the PR
215
+ headers["Accept"] = "application/vnd.github.full+json"
216
+ head_branch = await get_pr_head_branch(pr, headers)
217
+
218
+ # Get files from head branch
219
+ head_branch_files = await get_branch_files(pr, head_branch, headers)
220
+ print("HEAD FILES", head_branch_files)
221
+
222
+ # Enrich diff data with context from the head branch.
223
+ context_files = get_context_from_files(head_branch_files, files_with_lines)
224
+
225
+ # Filter the dictionary
226
+ if files_to_keep:
227
+ context_files = {
228
+ k: context_files[k]
229
+ for k in context_files
230
+ if any(sub in k for sub in files_to_keep)
231
+ }
232
+
233
+ # Get suggestions from Docu Mentor
234
+ content, model, prompt_tokens, completion_tokens = \
235
+ ray_mentor(context_files) if ray.is_initialized() else mentor(context_files)
236
+
237
+
238
+ # Let's comment on the PR
239
+ await client.post(
240
+ f"{comment['issue_url']}/comments",
241
+ json={
242
+ "body": f":rocket: Docu Mentor finished "
243
+ + "analysing your PR! :rocket:\n\n"
244
+ + "Take a look at your results:\n"
245
+ + f"{content}\n\n"
246
+ + "This bot is powered by "
247
+ + "[Sunholo Multivac](https://www.sunholo.com/).\n"
248
+ + f"It used the model {model}, used {prompt_tokens} prompt tokens, "
249
+ + f"and {completion_tokens} completion tokens in total."
250
+ },
251
+ headers=headers,
252
+ )
253
+
254
+ @serve.deployment(route_prefix="/")
255
+ @serve.ingress(app)
256
+ class ServeBot:
257
+ @app.get("/")
258
+ async def root(self):
259
+ return {"message": "Docu Mentor reporting for duty!"}
260
+
261
+ @app.post("/webhook/")
262
+ async def handle_webhook_route(self, request: Request):
263
+ return await handle_webhook(request)
264
+
@@ -21,7 +21,7 @@ def send_doc_to_docstore(docs, vector_name):
21
21
 
22
22
  # docs all come from the same file but got split into a list of document objects
23
23
 
24
- docstore_config = load_config_key("docstore", vector_name=vector_name, filename="config/llm_config.yaml")
24
+ docstore_config = load_config_key("docstore", vector_name=vector_name, kind="vacConfig")
25
25
  if docstore_config is None:
26
26
  log.info(f"No docstore config found for {vector_name} ")
27
27
 
@@ -110,7 +110,7 @@ def summarise_docs(docs, vector_name, summary_threshold_default=10000, model_lim
110
110
  if not docs:
111
111
  return None
112
112
 
113
- chunker_config = load_config_key("chunker", vector_name=vector_name, filename="config/llm_config.yaml")
113
+ chunker_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
114
114
  summarise_chunking_config = chunker_config.get("summarise") if chunker_config else None
115
115
 
116
116
  if not summarise_chunking_config:
@@ -114,7 +114,7 @@ def choose_splitter(extension: str, chunk_size: int=1024, chunk_overlap:int=200,
114
114
  if vector_name:
115
115
  # check if there is a chunking configuration
116
116
  from ..utils import load_config_key
117
- chunk_config = load_config_key("chunker", vector_name=vector_name, filename="config/llm_config.yaml")
117
+ chunk_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
118
118
  if chunk_config:
119
119
  if chunk_config.get("type") == "semantic":
120
120
  embedding_str = chunk_config.get("llm")
@@ -27,7 +27,7 @@ from langchain.retrievers import ContextualCompressionRetriever
27
27
 
28
28
 
29
29
  def load_memories(vector_name):
30
- memories = load_config_key("memory", vector_name, type="vacConfig")
30
+ memories = load_config_key("memory", vector_name, kind="vacConfig")
31
31
  log.info(f"Found memory settings for {vector_name}: {memories}")
32
32
  if len(memories) == 0:
33
33
  log.info(f"No memory settings found for {vector_name}")
@@ -70,7 +70,7 @@ def pick_retriever(vector_name, embeddings=None):
70
70
  log.info(f"No retrievers were created for {memories}")
71
71
  return None
72
72
 
73
- k_override = load_config_key("memory_k", vector_name, type="vacConfig")
73
+ k_override = load_config_key("memory_k", vector_name, kind="vacConfig")
74
74
  if not k_override:
75
75
  k_override = 3
76
76
 
@@ -19,7 +19,7 @@ def create_alloydb_engine(vector_name):
19
19
  alloydb_config = load_config_key(
20
20
  'alloydb_config',
21
21
  vector_name=vector_name,
22
- filename = "config/llm_config.yaml"
22
+ kind="vacConfig"
23
23
  )
24
24
 
25
25
  if alloydb_config is None:
@@ -48,16 +48,16 @@ def lookup_connection_env(vs_str):
48
48
  raise ValueError("Could not find vectorstore for {vs_str}")
49
49
 
50
50
 
51
- def get_vector_size(vector_name: str, config_file:str="config/llm_config.yaml"):
51
+ def get_vector_size(vector_name: str):
52
52
 
53
53
  llm_str = None
54
- embed_dict = load_config_key("embedder", vector_name, filename=config_file)
54
+ embed_dict = load_config_key("embedder", vector_name, kind="vacConfig")
55
55
 
56
56
  if embed_dict:
57
57
  llm_str = embed_dict.get('llm')
58
58
 
59
59
  if llm_str is None:
60
- llm_str = load_config_key("llm", vector_name, filename=config_file)
60
+ llm_str = load_config_key("llm", vector_name, kind="vacConfig")
61
61
 
62
62
  if not isinstance(llm_str, str):
63
63
  raise ValueError(f"get_vector_size() did not return a value string for {vector_name} - got {llm_str} instead")
@@ -102,7 +102,7 @@ def do_llamaindex(message_data, metadata, vector_name):
102
102
  if not rag:
103
103
  raise ValueError("Need to install vertexai module via `pip install sunholo[gcp]`")
104
104
 
105
- gcp_config = load_config_key("gcp_config", vector_name=vector_name, type="vacConfig")
105
+ gcp_config = load_config_key("gcp_config", vector_name=vector_name, kind="vacConfig")
106
106
  if not gcp_config:
107
107
  raise ValueError(f"Need config.{vector_name}.gcp_config to configure llamaindex on VertexAI")
108
108
 
@@ -120,7 +120,7 @@ def do_llamaindex(message_data, metadata, vector_name):
120
120
  log.info(f"Found llamaindex corpus: {corpus}")
121
121
 
122
122
  # native support for cloud storage and drive links
123
- chunker_config = load_config_key("chunker", vector_name=vector_name, type="vacConfig")
123
+ chunker_config = load_config_key("chunker", vector_name=vector_name, kind="vacConfig")
124
124
 
125
125
  if message_data.startswith("gs://") or message_data.startswith("https://drive.google.com"):
126
126
  log.info(f"rag.import_files for {message_data}")
@@ -160,7 +160,7 @@ def do_llamaindex(message_data, metadata, vector_name):
160
160
  #)
161
161
 
162
162
  def check_llamaindex_in_memory(vector_name):
163
- memories = load_config_key("memory", vector_name=vector_name, type="vacConfig")
163
+ memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
164
164
  for memory in memories: # Iterate over the list
165
165
  for key, value in memory.items(): # Now iterate over the dictionary
166
166
  log.info(f"Found memory {key}")
@@ -175,7 +175,7 @@ def check_llamaindex_in_memory(vector_name):
175
175
 
176
176
  def llamaindex_chunker_check(message_data, metadata, vector_name):
177
177
  # llamaindex handles its own chunking/embedding
178
- memories = load_config_key("memory", vector_name=vector_name, type="vacConfig")
178
+ memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
179
179
  total_memories = len(memories)
180
180
  llama = None
181
181
  if check_llamaindex_in_memory(vector_name):
@@ -248,8 +248,8 @@ def generate_proxy_stream(stream_to_f, user_input, vector_name, chat_history, ge
248
248
  ):
249
249
  print(output) # Process each streaming output chunk
250
250
  """
251
- agent = load_config_key("agent", vector_name=vector_name, filename="config/llm_config.yaml")
252
- agent_type = load_config_key("agent_type", vector_name=vector_name, filename="config/llm_config.yaml")
251
+ agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
252
+ agent_type = load_config_key("agent_type", vector_name=vector_name, kind="vacConfig")
253
253
 
254
254
  def generate():
255
255
  json_buffer = ""
@@ -306,8 +306,8 @@ async def generate_proxy_stream_async(stream_to_f, user_input, vector_name, chat
306
306
  ):
307
307
  print(output) # Process each streaming output chunk
308
308
  """
309
- agent = load_config_key("agent", vector_name=vector_name, type = "vacConfig")
310
- agent_type = load_config_key("agent_type", vector_name=vector_name, type = "vacConfig")
309
+ agent = load_config_key("agent", vector_name=vector_name, kind="vacConfig")
310
+ agent_type = load_config_key("agent_type", vector_name=vector_name, kind="vacConfig")
311
311
 
312
312
  async def generate():
313
313
  json_buffer = ""
@@ -167,14 +167,13 @@ def load_config(filename: str=None) -> tuple[dict, str]:
167
167
 
168
168
  return config, filename
169
169
 
170
- def load_config_key(key: str, vector_name: str, filename: str=None, kind: str=None):
170
+ def load_config_key(key: str, vector_name: str, kind: str=None):
171
171
  """
172
172
  Load a specific key from a configuration file.
173
173
 
174
174
  Args:
175
175
  key (str): The key to fetch from the configuration.
176
176
  vector_name (str): The name of the vector in the configuration file.
177
- filename (str, optional): The configuration file name. Defaults to the `_CONFIG_FILE` environment variable. Deprecated - use 'kind' instead
178
177
  kind: (str, optional): Specify the type of configuration to retrieve e.g. 'vacConfig' which will pick from files within `_CONFIG_FOLDER`
179
178
 
180
179
  Returns:
@@ -182,7 +181,7 @@ def load_config_key(key: str, vector_name: str, filename: str=None, kind: str=No
182
181
 
183
182
  Example:
184
183
  ```python
185
- api_url = load_config_key('apiUrl', 'myVector', 'config.yaml')
184
+ api_url = load_config_key('apiUrl', 'myVector', kind="vacConfig")
186
185
  print(f'API URL: {api_url}')
187
186
  ```
188
187
  """
@@ -197,9 +196,6 @@ def load_config_key(key: str, vector_name: str, filename: str=None, kind: str=No
197
196
  if kind:
198
197
  log.info(f"Got kind: {kind} - applying to configs")
199
198
 
200
- if filename:
201
- log.warning(f"Got filename argument: {filename} for config - deprecated - use `kind='vacConfig'` instead")
202
-
203
199
  if not configs_by_kind:
204
200
  log.warning("Did not load configs via folder")
205
201
 
@@ -52,7 +52,8 @@ VAC_SUBCONFIG_SCHEMA = {
52
52
  "cluster": {"type": "string"},
53
53
  "instance": {"type": "string"},
54
54
  "database": {"type": "string"}
55
- }
55
+ },
56
+ "required": ["project_id", "region", "cluster", "instance", "database"]
56
57
  },
57
58
  "secrets": {
58
59
  "type": "array",
@@ -87,7 +88,7 @@ VAC_CONFIG_SCHEMA = {
87
88
  }
88
89
  }
89
90
  },
90
- "required": ["kind", "apiVersion", "gcp_config", "vac"]
91
+ "required": ["kind", "apiVersion", "vac"]
91
92
  }
92
93
 
93
94
  PROMPT_CONFIG_SCHEMA = {
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.59.2
3
+ Version: 0.59.4
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.59.2.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.59.4.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -31,6 +31,7 @@ sunholo/auth/__init__.py
31
31
  sunholo/auth/run.py
32
32
  sunholo/bots/__init__.py
33
33
  sunholo/bots/discord.py
34
+ sunholo/bots/github_webhook.py
34
35
  sunholo/bots/webapp.py
35
36
  sunholo/chunker/__init__.py
36
37
  sunholo/chunker/data_to_embed_pubsub.py
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes