sunholo 0.61.5__py3-none-any.whl → 0.61.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/chunker/data_to_embed_pubsub.py +4 -0
- sunholo/chunker/message_data.py +1 -1
- sunholo/cli/chat_vac.py +22 -11
- sunholo/cli/cli.py +4 -0
- sunholo/cli/embedder.py +148 -0
- {sunholo-0.61.5.dist-info → sunholo-0.61.6.dist-info}/METADATA +2 -2
- {sunholo-0.61.5.dist-info → sunholo-0.61.6.dist-info}/RECORD +11 -10
- {sunholo-0.61.5.dist-info → sunholo-0.61.6.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.61.5.dist-info → sunholo-0.61.6.dist-info}/WHEEL +0 -0
- {sunholo-0.61.5.dist-info → sunholo-0.61.6.dist-info}/entry_points.txt +0 -0
- {sunholo-0.61.5.dist-info → sunholo-0.61.6.dist-info}/top_level.txt +0 -0
|
@@ -25,6 +25,10 @@ def data_to_embed_pubsub(data: dict):
|
|
|
25
25
|
|
|
26
26
|
message_data, metadata, vector_name = process_pubsub_message(data)
|
|
27
27
|
|
|
28
|
+
return process_chunker_data(message_data, metadata, vector_name)
|
|
29
|
+
|
|
30
|
+
def process_chunker_data(message_data, metadata, vector_name):
|
|
31
|
+
|
|
28
32
|
if metadata:
|
|
29
33
|
metadata["vector_name"] = vector_name
|
|
30
34
|
|
sunholo/chunker/message_data.py
CHANGED
|
@@ -186,7 +186,7 @@ def handle_json_content_message(message_data: str, metadata: dict, vector_name:
|
|
|
186
186
|
|
|
187
187
|
if the_content is None:
|
|
188
188
|
log.info("No content found")
|
|
189
|
-
return {"metadata": "No content found"}
|
|
189
|
+
return {"metadata": "No content found in 'page_content' JSON field"}
|
|
190
190
|
|
|
191
191
|
docs = [Document(page_content=the_content, metadata=metadata)]
|
|
192
192
|
|
sunholo/cli/chat_vac.py
CHANGED
|
@@ -20,14 +20,19 @@ from rich.text import Text
|
|
|
20
20
|
from rich.table import Table
|
|
21
21
|
|
|
22
22
|
|
|
23
|
-
def get_service_url(vac_name, project, region):
|
|
24
|
-
|
|
23
|
+
def get_service_url(vac_name, project, region, no_config=False):
|
|
24
|
+
|
|
25
|
+
if no_config:
|
|
26
|
+
agent_name = vac_name
|
|
27
|
+
else:
|
|
28
|
+
agent_name = load_config_key("agent", vac_name, kind="vacConfig")
|
|
29
|
+
|
|
25
30
|
proxies = clean_proxy_list()
|
|
26
31
|
if agent_name in proxies:
|
|
27
32
|
port = proxies[agent_name]['port']
|
|
28
33
|
url = f"http://127.0.0.1:{port}"
|
|
29
34
|
else:
|
|
30
|
-
print(f"No proxy found running for service: {agent_name} required for {vac_name} - attempting to connect")
|
|
35
|
+
console.print(f"No proxy found running for service: [bold orange]'{agent_name}[/bold orange] required for [bold orange]{vac_name}[/bold orange] - attempting to connect")
|
|
31
36
|
url = start_proxy(agent_name, region, project)
|
|
32
37
|
|
|
33
38
|
return url
|
|
@@ -144,15 +149,17 @@ def headless_mode(service_url, service_name, user_input, chat_history=None):
|
|
|
144
149
|
|
|
145
150
|
return chat_history
|
|
146
151
|
|
|
147
|
-
def resolve_service_url(args):
|
|
148
|
-
|
|
152
|
+
def resolve_service_url(args, no_config=False):
|
|
153
|
+
"""
|
|
154
|
+
no_config: some VACs do not have an entry in the config file e.g. chunker, embedder etc.
|
|
155
|
+
"""
|
|
149
156
|
if args.url_override:
|
|
150
157
|
|
|
151
158
|
return args.url_override
|
|
152
159
|
|
|
153
160
|
if not args.no_proxy:
|
|
154
161
|
try:
|
|
155
|
-
service_url = get_service_url(args.vac_name, args.project, args.region)
|
|
162
|
+
service_url = get_service_url(args.vac_name, args.project, args.region, no_config=no_config)
|
|
156
163
|
except ValueError as e:
|
|
157
164
|
console.print(f"[bold red]ERROR: Could not start {args.vac_name} proxy URL: {str(e)}[/bold red]")
|
|
158
165
|
sys.exit(1)
|
|
@@ -169,8 +176,6 @@ def resolve_service_url(args):
|
|
|
169
176
|
|
|
170
177
|
def vac_command(args):
|
|
171
178
|
|
|
172
|
-
service_url = resolve_service_url(args)
|
|
173
|
-
|
|
174
179
|
if args.action == 'list':
|
|
175
180
|
|
|
176
181
|
list_cloud_run_services(args.project, args.region)
|
|
@@ -178,13 +183,13 @@ def vac_command(args):
|
|
|
178
183
|
return
|
|
179
184
|
|
|
180
185
|
elif args.action == 'get-url':
|
|
181
|
-
|
|
186
|
+
service_url = resolve_service_url(args)
|
|
182
187
|
console.print(service_url)
|
|
183
188
|
|
|
184
189
|
return
|
|
185
190
|
|
|
186
191
|
elif args.action == 'chat':
|
|
187
|
-
|
|
192
|
+
service_url = resolve_service_url(args)
|
|
188
193
|
agent_name = load_config_key("agent", args.vac_name, kind="vacConfig")
|
|
189
194
|
|
|
190
195
|
if args.headless:
|
|
@@ -209,6 +214,7 @@ def vac_command(args):
|
|
|
209
214
|
stop_proxy(agent_name, stop_local=False)
|
|
210
215
|
|
|
211
216
|
elif args.action == 'invoke':
|
|
217
|
+
service_url = resolve_service_url(args, no_config=True)
|
|
212
218
|
try:
|
|
213
219
|
json_data = json.loads(args.data)
|
|
214
220
|
except json.JSONDecodeError as err:
|
|
@@ -222,7 +228,12 @@ def invoke_vac(service_url, data):
|
|
|
222
228
|
headers = {"Content-Type": "application/json"}
|
|
223
229
|
response = requests.post(service_url, headers=headers, data=json.dumps(data))
|
|
224
230
|
response.raise_for_status()
|
|
225
|
-
|
|
231
|
+
|
|
232
|
+
the_data = response.json()
|
|
233
|
+
console.print(the_data)
|
|
234
|
+
|
|
235
|
+
return the_data
|
|
236
|
+
|
|
226
237
|
except requests.exceptions.RequestException as e:
|
|
227
238
|
console.print(f"[bold red]ERROR: Failed to invoke VAC: {e}[/bold red]")
|
|
228
239
|
|
sunholo/cli/cli.py
CHANGED
|
@@ -7,6 +7,8 @@ from .cli_init import setup_init_subparser
|
|
|
7
7
|
from .merge_texts import setup_merge_text_subparser
|
|
8
8
|
from .run_proxy import setup_proxy_subparser
|
|
9
9
|
from .chat_vac import setup_vac_subparser
|
|
10
|
+
from .embedder import setup_embedder_subparser
|
|
11
|
+
|
|
10
12
|
from ..utils.config import load_config_key
|
|
11
13
|
|
|
12
14
|
from ..logging import log
|
|
@@ -64,6 +66,8 @@ def main(args=None):
|
|
|
64
66
|
setup_proxy_subparser(subparsers)
|
|
65
67
|
# vac command
|
|
66
68
|
setup_vac_subparser(subparsers)
|
|
69
|
+
# embed command
|
|
70
|
+
setup_embedder_subparser(subparsers)
|
|
67
71
|
|
|
68
72
|
args = parser.parse_args(args)
|
|
69
73
|
|
sunholo/cli/embedder.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
import base64
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
from argparse import Namespace
|
|
6
|
+
|
|
7
|
+
from .sun_rich import console
|
|
8
|
+
from rich.progress import Progress
|
|
9
|
+
|
|
10
|
+
from .chat_vac import resolve_service_url, invoke_vac
|
|
11
|
+
|
|
12
|
+
def encode_data(vac, content, metadata=None, local_chunks=False):
|
|
13
|
+
# Current time in UTC
|
|
14
|
+
now_utc = datetime.now(timezone.utc)
|
|
15
|
+
formatted_time = now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
16
|
+
|
|
17
|
+
# Default metadata if none provided
|
|
18
|
+
default_metadata = {"vector_name": vac, "source": "sunholo-cli", "eventTime": formatted_time}
|
|
19
|
+
|
|
20
|
+
# Merge default metadata with provided metadata
|
|
21
|
+
if metadata:
|
|
22
|
+
if not isinstance(metadata, dict):
|
|
23
|
+
metadata = json.loads(metadata)
|
|
24
|
+
else:
|
|
25
|
+
metadata = {}
|
|
26
|
+
|
|
27
|
+
# Update metadata with default values if not present
|
|
28
|
+
metadata.update(default_metadata)
|
|
29
|
+
|
|
30
|
+
# Encode the content (URL)
|
|
31
|
+
if isinstance(content, str):
|
|
32
|
+
message_data = base64.b64encode(content.encode('utf-8')).decode('utf-8')
|
|
33
|
+
else:
|
|
34
|
+
raise ValueError(f"Unsupported content type: {type(content)}")
|
|
35
|
+
|
|
36
|
+
# Construct the message dictionary
|
|
37
|
+
messageId = str(uuid.uuid4())
|
|
38
|
+
message = {
|
|
39
|
+
"message": {
|
|
40
|
+
"data": message_data,
|
|
41
|
+
"messageId": messageId,
|
|
42
|
+
"publishTime": formatted_time,
|
|
43
|
+
"attributes": {
|
|
44
|
+
"namespace": vac,
|
|
45
|
+
"return_chunks": str(local_chunks).lower()
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
# Merge metadata with attributes
|
|
51
|
+
message["message"]["attributes"].update(metadata)
|
|
52
|
+
|
|
53
|
+
#console.print()
|
|
54
|
+
#console.print(f"Sending message: {messageId} with metadata:")
|
|
55
|
+
#console.print(f"{message['message']['attributes']}")
|
|
56
|
+
|
|
57
|
+
return message
|
|
58
|
+
|
|
59
|
+
def embed_command(args):
|
|
60
|
+
chunk_args = vars(args).copy()
|
|
61
|
+
embed_args = vars(args).copy()
|
|
62
|
+
|
|
63
|
+
console.rule("Sending data for chunking")
|
|
64
|
+
|
|
65
|
+
if args.chunk_override:
|
|
66
|
+
chunk_args["url_override"] = args.chunk_override
|
|
67
|
+
else:
|
|
68
|
+
chunk_args["vac_name"] = "chunker"
|
|
69
|
+
chunk_args["url_override"] = ""
|
|
70
|
+
chunk_args = Namespace(**chunk_args)
|
|
71
|
+
chunk_url = resolve_service_url(chunk_args, no_config=True)
|
|
72
|
+
|
|
73
|
+
json_data = encode_data(args.vac_name, args.data, args.metadata, args.local_chunks)
|
|
74
|
+
|
|
75
|
+
with console.status(f"[bold orange]Sending {args.data} to chunk via {chunk_url}[/bold orange]", spinner="star"):
|
|
76
|
+
chunk_res = invoke_vac(f"{chunk_url}/pubsub_to_store", json_data)
|
|
77
|
+
|
|
78
|
+
if not args.local_chunks:
|
|
79
|
+
console.rule(f"Chunks sent for processing in cloud: {chunk_res}")
|
|
80
|
+
|
|
81
|
+
return
|
|
82
|
+
|
|
83
|
+
console.rule("Processing chunks locally")
|
|
84
|
+
|
|
85
|
+
if args.embed_override:
|
|
86
|
+
embed_args["url_override"] = args.embed_override
|
|
87
|
+
else:
|
|
88
|
+
embed_args["vac_name"] = "embedder"
|
|
89
|
+
embed_args["url_override"] = ""
|
|
90
|
+
embed_args = Namespace(**embed_args)
|
|
91
|
+
embed_url = resolve_service_url(embed_args, no_config=True)
|
|
92
|
+
|
|
93
|
+
if not chunk_res:
|
|
94
|
+
console.print(f"[bold red]ERROR: Did not get any chunks from {chunk_url} for {json_data}")
|
|
95
|
+
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
chunks = chunk_res.get('chunks')
|
|
99
|
+
if not chunks:
|
|
100
|
+
console.print(f"[bold red]ERROR: No chunks found within json data: {str(chunk_res)} [/bold red]")
|
|
101
|
+
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
embeds = []
|
|
105
|
+
with Progress() as progress:
|
|
106
|
+
task = progress.add_task(f"Embedding [{len(chunks)}] chunks via {embed_url}", total=len(chunks))
|
|
107
|
+
for chunk in chunks:
|
|
108
|
+
progress.console.print(f"Working on chunk {chunk['metadata']}")
|
|
109
|
+
|
|
110
|
+
# do this async?
|
|
111
|
+
content = chunk.get("page_content")
|
|
112
|
+
now_utc = datetime.now(timezone.utc)
|
|
113
|
+
formatted_time = now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
114
|
+
chunk["metadata"]["eventTime"] = formatted_time
|
|
115
|
+
if not content:
|
|
116
|
+
progress.console.print("[bold red]No content chunk found, skipping.[/bold red]")
|
|
117
|
+
progress.advance(task)
|
|
118
|
+
continue
|
|
119
|
+
progress.console.print(f"Sending chunk length {len(content)} to embedder")
|
|
120
|
+
processed_chunk = encode_data(vac = args.vac_name,
|
|
121
|
+
content = json.dumps(chunk))
|
|
122
|
+
|
|
123
|
+
embed_res = invoke_vac(f"{embed_url}/embed_chunk", processed_chunk)
|
|
124
|
+
embeds.append(embed_res)
|
|
125
|
+
progress.advance(task)
|
|
126
|
+
|
|
127
|
+
console.rule("Embedding pipeline finished")
|
|
128
|
+
|
|
129
|
+
return embed_res
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def setup_embedder_subparser(subparsers):
|
|
133
|
+
"""
|
|
134
|
+
Sets up an argparse subparser for the 'embed' command.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
subparsers: The subparsers object from argparse.ArgumentParser().
|
|
138
|
+
"""
|
|
139
|
+
embed_parser = subparsers.add_parser('embed', help='Send data for embedding to a VAC vector store')
|
|
140
|
+
embed_parser.add_argument('--embed-override', help='Override the embed VAC service URL.')
|
|
141
|
+
embed_parser.add_argument('--chunk-override', help='Override the chunk VAC service URL.')
|
|
142
|
+
embed_parser.add_argument('--no-proxy', action='store_true', help='Do not use the proxy and connect directly to the VAC service.')
|
|
143
|
+
embed_parser.add_argument('-m', '--metadata', default=None, help='Metadata to send with the embedding (as JSON string).')
|
|
144
|
+
embed_parser.add_argument('--local-chunks', action='store_true', help='Whether to process chunks to embed locally, or via the cloud.')
|
|
145
|
+
embed_parser.add_argument('vac_name', help='VAC service to embed the data for')
|
|
146
|
+
embed_parser.add_argument('data', help='String content to send for embedding')
|
|
147
|
+
|
|
148
|
+
embed_parser.set_defaults(func=embed_command)
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.61.
|
|
3
|
+
Version: 0.61.6
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.61.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.61.6.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -22,20 +22,21 @@ sunholo/bots/discord.py,sha256=cCFae5K1BCa6JVkWGLh_iZ9qFO1JpXb6K4eJrlDfEro,2442
|
|
|
22
22
|
sunholo/bots/github_webhook.py,sha256=5pQPRLM_wxxcILVaIzUDV8Kt7Arcm2dL1r1kMMHA524,9629
|
|
23
23
|
sunholo/bots/webapp.py,sha256=EIMxdAJ_xtufwJmvnn7N_Fb_1hZ9DjhJ0Kf_hp02vEU,1926
|
|
24
24
|
sunholo/chunker/__init__.py,sha256=UhQBZTKwDfBXm0TPv4LvsGc5pdUGCbYzi3qPTOkU4gw,55
|
|
25
|
-
sunholo/chunker/data_to_embed_pubsub.py,sha256=
|
|
25
|
+
sunholo/chunker/data_to_embed_pubsub.py,sha256=IY9SBRA7IO77QJBEgQuO1FiSCd6Dfm-TMEf1Ey-pLoo,3065
|
|
26
26
|
sunholo/chunker/doc_handling.py,sha256=rIyknpzDyj5A0u_DqSQVD_CXLRNZPOU6TCL4bhCdjOI,8563
|
|
27
27
|
sunholo/chunker/images.py,sha256=Xmh1vwHrVhoXm5iH2dhCc52O8YgdzE8KrDSdL-pGnp8,1861
|
|
28
28
|
sunholo/chunker/loaders.py,sha256=xiToUVgPz2ZzcqpUAq7aNP3PTenb_rBUAFzu0JPycIg,10268
|
|
29
|
-
sunholo/chunker/message_data.py,sha256=
|
|
29
|
+
sunholo/chunker/message_data.py,sha256=X6aA4yX5aGN_mEvsDPWvdYRqqn5GO1BU9QhT9w5A0ec,6789
|
|
30
30
|
sunholo/chunker/pdfs.py,sha256=daCZ1xjn1YvxlifIyxskWNpLJLe-Q9D_Jq12MWx3tZo,2473
|
|
31
31
|
sunholo/chunker/publish.py,sha256=PoT8q3XJeFCg10WrLkYhuaaXIrGVkvUD3-R9IfoWoH4,2703
|
|
32
32
|
sunholo/chunker/splitter.py,sha256=FLkDhkePkg_zGQpFBK13Cznw575D-Rf9pcaCpc1HUxY,6726
|
|
33
33
|
sunholo/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
-
sunholo/cli/chat_vac.py,sha256=
|
|
35
|
-
sunholo/cli/cli.py,sha256=
|
|
34
|
+
sunholo/cli/chat_vac.py,sha256=FEtaKQVYWenV1WKh4mfW7lTAaxOTvzmbG9MucG-UZoU,13179
|
|
35
|
+
sunholo/cli/cli.py,sha256=HEuCRCxzwMPO0JtT3X1liGVyvEq20Mb1-SlKn7HPx6E,2775
|
|
36
36
|
sunholo/cli/cli_init.py,sha256=JMZ9AX2cPDZ-_mv3adiv2ToFVNyRPtjk9Biszl1kiR0,2358
|
|
37
37
|
sunholo/cli/configs.py,sha256=QUM9DvKOdZmEQRM5uI3Nh887T0YDiSMr7O240zTLqws,4546
|
|
38
38
|
sunholo/cli/deploy.py,sha256=zxdwUsRTRMC8U5vyRv0JiKBLFn84Ug_Tc88-_h9hJSs,1609
|
|
39
|
+
sunholo/cli/embedder.py,sha256=hqIfqGCeV5UI_0dllNFsjdyjVWgC0Kmnw8kAKhN4jCI,5482
|
|
39
40
|
sunholo/cli/merge_texts.py,sha256=U9vdMwKmcPoc6iPOWX5MKSxn49dNGbNzVLw8ui5PhEU,1823
|
|
40
41
|
sunholo/cli/run_proxy.py,sha256=9ILCxSVHPzS-cSBvjdHhfZFlwsJ4Ttmu0vLtNoPCRgo,11469
|
|
41
42
|
sunholo/cli/sun_rich.py,sha256=UpMqeJ0C8i0pkue1AHnnyyX0bFJ9zZeJ7HBR6yhuA8A,54
|
|
@@ -95,9 +96,9 @@ sunholo/utils/parsers.py,sha256=OrHmASqIbI45atVOhiGodgLvnfrzkvVzyHnSvAXD89I,3841
|
|
|
95
96
|
sunholo/utils/user_ids.py,sha256=SQd5_H7FE7vcTZp9AQuQDWBXd4FEEd7TeVMQe1H4Ny8,292
|
|
96
97
|
sunholo/vertex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
97
98
|
sunholo/vertex/init_vertex.py,sha256=JDMUaBRdednzbKF-5p33qqLit2LMsvgvWW-NRz0AqO0,1801
|
|
98
|
-
sunholo-0.61.
|
|
99
|
-
sunholo-0.61.
|
|
100
|
-
sunholo-0.61.
|
|
101
|
-
sunholo-0.61.
|
|
102
|
-
sunholo-0.61.
|
|
103
|
-
sunholo-0.61.
|
|
99
|
+
sunholo-0.61.6.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
100
|
+
sunholo-0.61.6.dist-info/METADATA,sha256=Hoo-nKG0luMJ4XKms0VT9vm6LhE7ZGskG3nv3tfkpjg,8057
|
|
101
|
+
sunholo-0.61.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
102
|
+
sunholo-0.61.6.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
103
|
+
sunholo-0.61.6.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
104
|
+
sunholo-0.61.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|