sunholo 0.61.4__py3-none-any.whl → 0.61.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,10 @@ def data_to_embed_pubsub(data: dict):
25
25
 
26
26
  message_data, metadata, vector_name = process_pubsub_message(data)
27
27
 
28
+ return process_chunker_data(message_data, metadata, vector_name)
29
+
30
+ def process_chunker_data(message_data, metadata, vector_name):
31
+
28
32
  if metadata:
29
33
  metadata["vector_name"] = vector_name
30
34
 
@@ -186,7 +186,7 @@ def handle_json_content_message(message_data: str, metadata: dict, vector_name:
186
186
 
187
187
  if the_content is None:
188
188
  log.info("No content found")
189
- return {"metadata": "No content found"}
189
+ return {"metadata": "No content found in 'page_content' JSON field"}
190
190
 
191
191
  docs = [Document(page_content=the_content, metadata=metadata)]
192
192
 
sunholo/cli/chat_vac.py CHANGED
@@ -9,6 +9,7 @@ import uuid
9
9
  import sys
10
10
  import subprocess
11
11
  import json
12
+ import requests
12
13
 
13
14
  from rich import print
14
15
  from .sun_rich import console
@@ -19,14 +20,19 @@ from rich.text import Text
19
20
  from rich.table import Table
20
21
 
21
22
 
22
- def get_service_url(vac_name, project, region):
23
- agent_name = load_config_key("agent", vac_name, kind="vacConfig")
23
+ def get_service_url(vac_name, project, region, no_config=False):
24
+
25
+ if no_config:
26
+ agent_name = vac_name
27
+ else:
28
+ agent_name = load_config_key("agent", vac_name, kind="vacConfig")
29
+
24
30
  proxies = clean_proxy_list()
25
31
  if agent_name in proxies:
26
32
  port = proxies[agent_name]['port']
27
33
  url = f"http://127.0.0.1:{port}"
28
34
  else:
29
- print(f"No proxy found running for service: {agent_name} required for {vac_name} - attempting to connect")
35
+ console.print(f"No proxy found running for service: [bold orange]'{agent_name}[/bold orange] required for [bold orange]{vac_name}[/bold orange] - attempting to connect")
30
36
  url = start_proxy(agent_name, region, project)
31
37
 
32
38
  return url
@@ -143,28 +149,47 @@ def headless_mode(service_url, service_name, user_input, chat_history=None):
143
149
 
144
150
  return chat_history
145
151
 
152
+ def resolve_service_url(args, no_config=False):
153
+ """
154
+ no_config: some VACs do not have an entry in the config file e.g. chunker, embedder etc.
155
+ """
156
+ if args.url_override:
157
+
158
+ return args.url_override
159
+
160
+ if not args.no_proxy:
161
+ try:
162
+ service_url = get_service_url(args.vac_name, args.project, args.region, no_config=no_config)
163
+ except ValueError as e:
164
+ console.print(f"[bold red]ERROR: Could not start {args.vac_name} proxy URL: {str(e)}[/bold red]")
165
+ sys.exit(1)
166
+ else:
167
+ console.print(f"Not using a proxy, connecting directly to {service_url}")
168
+
169
+ agent_url = load_config_key("agent_url", args.vac_name, "vacConfig")
170
+ if agent_url:
171
+ console.print("Found agent_url within vacConfig: {agent_url}")
172
+
173
+ service_url = agent_url or get_cloud_run_service_url(args.project, args.region, args.vac_name)
174
+
175
+ return service_url
146
176
 
147
177
  def vac_command(args):
178
+
148
179
  if args.action == 'list':
180
+
149
181
  list_cloud_run_services(args.project, args.region)
182
+
150
183
  return
184
+
151
185
  elif args.action == 'get-url':
152
- service_url = get_cloud_run_service_url(args.project, args.region, args.vac_name)
153
- if service_url:
154
- console.print(service_url)
155
- return
156
- elif args.action == 'chat':
186
+ service_url = resolve_service_url(args)
187
+ console.print(service_url)
157
188
 
158
- if not args.no_proxy:
159
- try:
160
- service_url = get_service_url(args.vac_name, args.project, args.region)
161
- except ValueError as e:
162
- console.print(f"[bold red]ERROR: Could not start {args.vac_name} proxy URL: {str(e)}[/bold red]")
163
- sys.exit(1)
164
- else:
165
- service_url = get_cloud_run_service_url(args.project, args.region, args.vac_name)
166
- console.print(f"Not using a proxy, connecting directly to {service_url}")
189
+ return
167
190
 
191
+ elif args.action == 'chat':
192
+ service_url = resolve_service_url(args)
168
193
  agent_name = load_config_key("agent", args.vac_name, kind="vacConfig")
169
194
 
170
195
  if args.headless:
@@ -188,6 +213,30 @@ def vac_command(args):
188
213
 
189
214
  stop_proxy(agent_name, stop_local=False)
190
215
 
216
+ elif args.action == 'invoke':
217
+ service_url = resolve_service_url(args, no_config=True)
218
+ try:
219
+ json_data = json.loads(args.data)
220
+ except json.JSONDecodeError as err:
221
+ console.print(f"[bold red]ERROR: invalid JSON: {str(err)} [/bold red]")
222
+ sys.exit(1)
223
+
224
+ invoke_vac(service_url, json_data)
225
+
226
+ def invoke_vac(service_url, data):
227
+ try:
228
+ headers = {"Content-Type": "application/json"}
229
+ response = requests.post(service_url, headers=headers, data=json.dumps(data))
230
+ response.raise_for_status()
231
+
232
+ the_data = response.json()
233
+ console.print(the_data)
234
+
235
+ return the_data
236
+
237
+ except requests.exceptions.RequestException as e:
238
+ console.print(f"[bold red]ERROR: Failed to invoke VAC: {e}[/bold red]")
239
+
191
240
 
192
241
  def list_cloud_run_services(project, region):
193
242
  """
@@ -281,6 +330,8 @@ def setup_vac_subparser(subparsers):
281
330
  subparsers: The subparsers object from argparse.ArgumentParser().
282
331
  """
283
332
  vac_parser = subparsers.add_parser('vac', help='Interact with deployed VAC services.')
333
+ vac_parser.add_argument('--url_override', help='Override the VAC service URL.')
334
+ vac_parser.add_argument('--no-proxy', action='store_true', help='Do not use the proxy and connect directly to the VAC service.')
284
335
  vac_subparsers = vac_parser.add_subparsers(dest='action', help='VAC subcommands')
285
336
 
286
337
  # Subcommand for listing VAC services
@@ -296,6 +347,10 @@ def setup_vac_subparser(subparsers):
296
347
  chat_parser.add_argument('user_input', help='User input for the VAC service when in headless mode.', nargs='?', default=None)
297
348
  chat_parser.add_argument('--headless', action='store_true', help='Run in headless mode.')
298
349
  chat_parser.add_argument('--chat_history', help='Chat history for headless mode (as JSON string).', default=None)
299
- chat_parser.add_argument('--no-proxy', action='store_true', help='Do not use the proxy and connect directly to the VAC service.')
350
+
351
+ # Subcommand for invoking a VAC service directly
352
+ invoke_parser = vac_subparsers.add_parser('invoke', help='Invoke a VAC service directly with custom data.')
353
+ invoke_parser.add_argument('vac_name', help='Name of the VAC service.')
354
+ invoke_parser.add_argument('data', help='Data to send to the VAC service (as JSON string).')
300
355
 
301
356
  vac_parser.set_defaults(func=vac_command)
sunholo/cli/cli.py CHANGED
@@ -7,6 +7,8 @@ from .cli_init import setup_init_subparser
7
7
  from .merge_texts import setup_merge_text_subparser
8
8
  from .run_proxy import setup_proxy_subparser
9
9
  from .chat_vac import setup_vac_subparser
10
+ from .embedder import setup_embedder_subparser
11
+
10
12
  from ..utils.config import load_config_key
11
13
 
12
14
  from ..logging import log
@@ -64,6 +66,8 @@ def main(args=None):
64
66
  setup_proxy_subparser(subparsers)
65
67
  # vac command
66
68
  setup_vac_subparser(subparsers)
69
+ # embed command
70
+ setup_embedder_subparser(subparsers)
67
71
 
68
72
  args = parser.parse_args(args)
69
73
 
@@ -0,0 +1,148 @@
1
+ import json
2
+ import uuid
3
+ import base64
4
+ from datetime import datetime, timezone
5
+ from argparse import Namespace
6
+
7
+ from .sun_rich import console
8
+ from rich.progress import Progress
9
+
10
+ from .chat_vac import resolve_service_url, invoke_vac
11
+
12
+ def encode_data(vac, content, metadata=None, local_chunks=False):
13
+ # Current time in UTC
14
+ now_utc = datetime.now(timezone.utc)
15
+ formatted_time = now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
16
+
17
+ # Default metadata if none provided
18
+ default_metadata = {"vector_name": vac, "source": "sunholo-cli", "eventTime": formatted_time}
19
+
20
+ # Merge default metadata with provided metadata
21
+ if metadata:
22
+ if not isinstance(metadata, dict):
23
+ metadata = json.loads(metadata)
24
+ else:
25
+ metadata = {}
26
+
27
+ # Update metadata with default values if not present
28
+ metadata.update(default_metadata)
29
+
30
+ # Encode the content (URL)
31
+ if isinstance(content, str):
32
+ message_data = base64.b64encode(content.encode('utf-8')).decode('utf-8')
33
+ else:
34
+ raise ValueError(f"Unsupported content type: {type(content)}")
35
+
36
+ # Construct the message dictionary
37
+ messageId = str(uuid.uuid4())
38
+ message = {
39
+ "message": {
40
+ "data": message_data,
41
+ "messageId": messageId,
42
+ "publishTime": formatted_time,
43
+ "attributes": {
44
+ "namespace": vac,
45
+ "return_chunks": str(local_chunks).lower()
46
+ },
47
+ }
48
+ }
49
+
50
+ # Merge metadata with attributes
51
+ message["message"]["attributes"].update(metadata)
52
+
53
+ #console.print()
54
+ #console.print(f"Sending message: {messageId} with metadata:")
55
+ #console.print(f"{message['message']['attributes']}")
56
+
57
+ return message
58
+
59
+ def embed_command(args):
60
+ chunk_args = vars(args).copy()
61
+ embed_args = vars(args).copy()
62
+
63
+ console.rule("Sending data for chunking")
64
+
65
+ if args.chunk_override:
66
+ chunk_args["url_override"] = args.chunk_override
67
+ else:
68
+ chunk_args["vac_name"] = "chunker"
69
+ chunk_args["url_override"] = ""
70
+ chunk_args = Namespace(**chunk_args)
71
+ chunk_url = resolve_service_url(chunk_args, no_config=True)
72
+
73
+ json_data = encode_data(args.vac_name, args.data, args.metadata, args.local_chunks)
74
+
75
+ with console.status(f"[bold orange]Sending {args.data} to chunk via {chunk_url}[/bold orange]", spinner="star"):
76
+ chunk_res = invoke_vac(f"{chunk_url}/pubsub_to_store", json_data)
77
+
78
+ if not args.local_chunks:
79
+ console.rule(f"Chunks sent for processing in cloud: {chunk_res}")
80
+
81
+ return
82
+
83
+ console.rule("Processing chunks locally")
84
+
85
+ if args.embed_override:
86
+ embed_args["url_override"] = args.embed_override
87
+ else:
88
+ embed_args["vac_name"] = "embedder"
89
+ embed_args["url_override"] = ""
90
+ embed_args = Namespace(**embed_args)
91
+ embed_url = resolve_service_url(embed_args, no_config=True)
92
+
93
+ if not chunk_res:
94
+ console.print(f"[bold red]ERROR: Did not get any chunks from {chunk_url} for {json_data}")
95
+
96
+ return
97
+
98
+ chunks = chunk_res.get('chunks')
99
+ if not chunks:
100
+ console.print(f"[bold red]ERROR: No chunks found within json data: {str(chunk_res)} [/bold red]")
101
+
102
+ return
103
+
104
+ embeds = []
105
+ with Progress() as progress:
106
+ task = progress.add_task(f"Embedding [{len(chunks)}] chunks via {embed_url}", total=len(chunks))
107
+ for chunk in chunks:
108
+ progress.console.print(f"Working on chunk {chunk['metadata']}")
109
+
110
+ # do this async?
111
+ content = chunk.get("page_content")
112
+ now_utc = datetime.now(timezone.utc)
113
+ formatted_time = now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
114
+ chunk["metadata"]["eventTime"] = formatted_time
115
+ if not content:
116
+ progress.console.print("[bold red]No content chunk found, skipping.[/bold red]")
117
+ progress.advance(task)
118
+ continue
119
+ progress.console.print(f"Sending chunk length {len(content)} to embedder")
120
+ processed_chunk = encode_data(vac = args.vac_name,
121
+ content = json.dumps(chunk))
122
+
123
+ embed_res = invoke_vac(f"{embed_url}/embed_chunk", processed_chunk)
124
+ embeds.append(embed_res)
125
+ progress.advance(task)
126
+
127
+ console.rule("Embedding pipeline finished")
128
+
129
+ return embed_res
130
+
131
+
132
+ def setup_embedder_subparser(subparsers):
133
+ """
134
+ Sets up an argparse subparser for the 'embed' command.
135
+
136
+ Args:
137
+ subparsers: The subparsers object from argparse.ArgumentParser().
138
+ """
139
+ embed_parser = subparsers.add_parser('embed', help='Send data for embedding to a VAC vector store')
140
+ embed_parser.add_argument('--embed-override', help='Override the embed VAC service URL.')
141
+ embed_parser.add_argument('--chunk-override', help='Override the chunk VAC service URL.')
142
+ embed_parser.add_argument('--no-proxy', action='store_true', help='Do not use the proxy and connect directly to the VAC service.')
143
+ embed_parser.add_argument('-m', '--metadata', default=None, help='Metadata to send with the embedding (as JSON string).')
144
+ embed_parser.add_argument('--local-chunks', action='store_true', help='Whether to process chunks to embed locally, or via the cloud.')
145
+ embed_parser.add_argument('vac_name', help='VAC service to embed the data for')
146
+ embed_parser.add_argument('data', help='String content to send for embedding')
147
+
148
+ embed_parser.set_defaults(func=embed_command)
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.61.4
3
+ Version: 0.61.6
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.61.4.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.61.6.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -22,20 +22,21 @@ sunholo/bots/discord.py,sha256=cCFae5K1BCa6JVkWGLh_iZ9qFO1JpXb6K4eJrlDfEro,2442
22
22
  sunholo/bots/github_webhook.py,sha256=5pQPRLM_wxxcILVaIzUDV8Kt7Arcm2dL1r1kMMHA524,9629
23
23
  sunholo/bots/webapp.py,sha256=EIMxdAJ_xtufwJmvnn7N_Fb_1hZ9DjhJ0Kf_hp02vEU,1926
24
24
  sunholo/chunker/__init__.py,sha256=UhQBZTKwDfBXm0TPv4LvsGc5pdUGCbYzi3qPTOkU4gw,55
25
- sunholo/chunker/data_to_embed_pubsub.py,sha256=t-pWNYv2mnwVAkMcIOK2CrIb3yr2aS9iAdtryk7hT8o,2931
25
+ sunholo/chunker/data_to_embed_pubsub.py,sha256=IY9SBRA7IO77QJBEgQuO1FiSCd6Dfm-TMEf1Ey-pLoo,3065
26
26
  sunholo/chunker/doc_handling.py,sha256=rIyknpzDyj5A0u_DqSQVD_CXLRNZPOU6TCL4bhCdjOI,8563
27
27
  sunholo/chunker/images.py,sha256=Xmh1vwHrVhoXm5iH2dhCc52O8YgdzE8KrDSdL-pGnp8,1861
28
28
  sunholo/chunker/loaders.py,sha256=xiToUVgPz2ZzcqpUAq7aNP3PTenb_rBUAFzu0JPycIg,10268
29
- sunholo/chunker/message_data.py,sha256=iDP94dySU3Xct-gWGnB4NNRSh2luQmgJeCfQb7ktt3U,6760
29
+ sunholo/chunker/message_data.py,sha256=X6aA4yX5aGN_mEvsDPWvdYRqqn5GO1BU9QhT9w5A0ec,6789
30
30
  sunholo/chunker/pdfs.py,sha256=daCZ1xjn1YvxlifIyxskWNpLJLe-Q9D_Jq12MWx3tZo,2473
31
31
  sunholo/chunker/publish.py,sha256=PoT8q3XJeFCg10WrLkYhuaaXIrGVkvUD3-R9IfoWoH4,2703
32
32
  sunholo/chunker/splitter.py,sha256=FLkDhkePkg_zGQpFBK13Cznw575D-Rf9pcaCpc1HUxY,6726
33
33
  sunholo/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- sunholo/cli/chat_vac.py,sha256=_NUjwATKvzwfnBJRedP2GqPFNeaCF5F8OvdygXRH1LY,11379
35
- sunholo/cli/cli.py,sha256=cogY1F5rcIGFYpZVFtbDNlAIElpfyPSCvSLC1ZIpHXg,2666
34
+ sunholo/cli/chat_vac.py,sha256=FEtaKQVYWenV1WKh4mfW7lTAaxOTvzmbG9MucG-UZoU,13179
35
+ sunholo/cli/cli.py,sha256=HEuCRCxzwMPO0JtT3X1liGVyvEq20Mb1-SlKn7HPx6E,2775
36
36
  sunholo/cli/cli_init.py,sha256=JMZ9AX2cPDZ-_mv3adiv2ToFVNyRPtjk9Biszl1kiR0,2358
37
37
  sunholo/cli/configs.py,sha256=QUM9DvKOdZmEQRM5uI3Nh887T0YDiSMr7O240zTLqws,4546
38
38
  sunholo/cli/deploy.py,sha256=zxdwUsRTRMC8U5vyRv0JiKBLFn84Ug_Tc88-_h9hJSs,1609
39
+ sunholo/cli/embedder.py,sha256=hqIfqGCeV5UI_0dllNFsjdyjVWgC0Kmnw8kAKhN4jCI,5482
39
40
  sunholo/cli/merge_texts.py,sha256=U9vdMwKmcPoc6iPOWX5MKSxn49dNGbNzVLw8ui5PhEU,1823
40
41
  sunholo/cli/run_proxy.py,sha256=9ILCxSVHPzS-cSBvjdHhfZFlwsJ4Ttmu0vLtNoPCRgo,11469
41
42
  sunholo/cli/sun_rich.py,sha256=UpMqeJ0C8i0pkue1AHnnyyX0bFJ9zZeJ7HBR6yhuA8A,54
@@ -95,9 +96,9 @@ sunholo/utils/parsers.py,sha256=OrHmASqIbI45atVOhiGodgLvnfrzkvVzyHnSvAXD89I,3841
95
96
  sunholo/utils/user_ids.py,sha256=SQd5_H7FE7vcTZp9AQuQDWBXd4FEEd7TeVMQe1H4Ny8,292
96
97
  sunholo/vertex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
97
98
  sunholo/vertex/init_vertex.py,sha256=JDMUaBRdednzbKF-5p33qqLit2LMsvgvWW-NRz0AqO0,1801
98
- sunholo-0.61.4.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
99
- sunholo-0.61.4.dist-info/METADATA,sha256=fNJFynKc9QQ-n26UaiizHapEY9hWrt-imjk3WBxlJqs,8057
100
- sunholo-0.61.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
101
- sunholo-0.61.4.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
102
- sunholo-0.61.4.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
103
- sunholo-0.61.4.dist-info/RECORD,,
99
+ sunholo-0.61.6.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
100
+ sunholo-0.61.6.dist-info/METADATA,sha256=Hoo-nKG0luMJ4XKms0VT9vm6LhE7ZGskG3nv3tfkpjg,8057
101
+ sunholo-0.61.6.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
102
+ sunholo-0.61.6.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
103
+ sunholo-0.61.6.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
104
+ sunholo-0.61.6.dist-info/RECORD,,