sunholo 0.61.4__tar.gz → 0.61.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {sunholo-0.61.4 → sunholo-0.61.6}/PKG-INFO +2 -2
  2. {sunholo-0.61.4 → sunholo-0.61.6}/setup.py +1 -1
  3. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/chunker/data_to_embed_pubsub.py +4 -0
  4. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/chunker/message_data.py +1 -1
  5. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/cli/chat_vac.py +73 -18
  6. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/cli/cli.py +4 -0
  7. sunholo-0.61.6/sunholo/cli/embedder.py +148 -0
  8. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo.egg-info/PKG-INFO +2 -2
  9. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo.egg-info/SOURCES.txt +1 -0
  10. {sunholo-0.61.4 → sunholo-0.61.6}/tests/test_config.py +0 -4
  11. {sunholo-0.61.4 → sunholo-0.61.6}/LICENSE.txt +0 -0
  12. {sunholo-0.61.4 → sunholo-0.61.6}/MANIFEST.in +0 -0
  13. {sunholo-0.61.4 → sunholo-0.61.6}/README.md +0 -0
  14. {sunholo-0.61.4 → sunholo-0.61.6}/setup.cfg +0 -0
  15. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/__init__.py +0 -0
  16. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/__init__.py +0 -0
  17. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/chat_history.py +0 -0
  18. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/dispatch_to_qa.py +0 -0
  19. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/fastapi/__init__.py +0 -0
  20. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/fastapi/base.py +0 -0
  21. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/fastapi/qna_routes.py +0 -0
  22. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/flask/__init__.py +0 -0
  23. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/flask/base.py +0 -0
  24. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/flask/qna_routes.py +0 -0
  25. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/langserve.py +0 -0
  26. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/pubsub.py +0 -0
  27. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/route.py +0 -0
  28. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/agents/special_commands.py +0 -0
  29. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/archive/__init__.py +0 -0
  30. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/archive/archive.py +0 -0
  31. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/auth/__init__.py +0 -0
  32. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/auth/run.py +0 -0
  33. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/bots/__init__.py +0 -0
  34. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/bots/discord.py +0 -0
  35. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/bots/github_webhook.py +0 -0
  36. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/bots/webapp.py +0 -0
  37. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/chunker/__init__.py +0 -0
  38. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/chunker/doc_handling.py +0 -0
  39. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/chunker/images.py +0 -0
  40. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/chunker/loaders.py +0 -0
  41. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/chunker/pdfs.py +0 -0
  42. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/chunker/publish.py +0 -0
  43. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/chunker/splitter.py +0 -0
  44. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/cli/__init__.py +0 -0
  45. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/cli/cli_init.py +0 -0
  46. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/cli/configs.py +0 -0
  47. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/cli/deploy.py +0 -0
  48. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/cli/merge_texts.py +0 -0
  49. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/cli/run_proxy.py +0 -0
  50. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/cli/sun_rich.py +0 -0
  51. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/components/__init__.py +0 -0
  52. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/components/llm.py +0 -0
  53. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/components/prompt.py +0 -0
  54. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/components/retriever.py +0 -0
  55. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/components/vectorstore.py +0 -0
  56. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/database/__init__.py +0 -0
  57. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/database/alloydb.py +0 -0
  58. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/database/database.py +0 -0
  59. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/database/lancedb.py +0 -0
  60. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/database/sql/sb/create_function.sql +0 -0
  61. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/database/sql/sb/create_function_time.sql +0 -0
  62. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/database/sql/sb/create_table.sql +0 -0
  63. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
  64. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/database/sql/sb/return_sources.sql +0 -0
  65. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/database/sql/sb/setup.sql +0 -0
  66. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/database/static_dbs.py +0 -0
  67. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/database/uuid.py +0 -0
  68. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/embedder/__init__.py +0 -0
  69. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/embedder/embed_chunk.py +0 -0
  70. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/gcs/__init__.py +0 -0
  71. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/gcs/add_file.py +0 -0
  72. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/gcs/download_url.py +0 -0
  73. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/gcs/metadata.py +0 -0
  74. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/langfuse/__init__.py +0 -0
  75. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/langfuse/callback.py +0 -0
  76. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/langfuse/prompts.py +0 -0
  77. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/llamaindex/__init__.py +0 -0
  78. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/llamaindex/generate.py +0 -0
  79. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/llamaindex/import_files.py +0 -0
  80. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/logging.py +0 -0
  81. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/lookup/__init__.py +0 -0
  82. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/lookup/model_lookup.yaml +0 -0
  83. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/patches/__init__.py +0 -0
  84. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/patches/langchain/__init__.py +0 -0
  85. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/patches/langchain/lancedb.py +0 -0
  86. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/patches/langchain/vertexai.py +0 -0
  87. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/pubsub/__init__.py +0 -0
  88. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/pubsub/process_pubsub.py +0 -0
  89. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/pubsub/pubsub_manager.py +0 -0
  90. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/qna/__init__.py +0 -0
  91. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/qna/parsers.py +0 -0
  92. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/qna/retry.py +0 -0
  93. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/streaming/__init__.py +0 -0
  94. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/streaming/content_buffer.py +0 -0
  95. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/streaming/langserve.py +0 -0
  96. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/streaming/streaming.py +0 -0
  97. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/summarise/__init__.py +0 -0
  98. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/summarise/summarise.py +0 -0
  99. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/utils/__init__.py +0 -0
  100. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/utils/big_context.py +0 -0
  101. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/utils/config.py +0 -0
  102. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/utils/config_schema.py +0 -0
  103. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/utils/gcp.py +0 -0
  104. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/utils/parsers.py +0 -0
  105. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/utils/user_ids.py +0 -0
  106. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/vertex/__init__.py +0 -0
  107. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo/vertex/init_vertex.py +0 -0
  108. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo.egg-info/dependency_links.txt +0 -0
  109. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo.egg-info/entry_points.txt +0 -0
  110. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo.egg-info/requires.txt +0 -0
  111. {sunholo-0.61.4 → sunholo-0.61.6}/sunholo.egg-info/top_level.txt +0 -0
  112. {sunholo-0.61.4 → sunholo-0.61.6}/tests/test_chat_history.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.61.4
3
+ Version: 0.61.6
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.61.4.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.61.6.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -1,7 +1,7 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
3
  # Define your base version
4
- version = '0.61.4'
4
+ version = '0.61.6'
5
5
 
6
6
  setup(
7
7
  name='sunholo',
@@ -25,6 +25,10 @@ def data_to_embed_pubsub(data: dict):
25
25
 
26
26
  message_data, metadata, vector_name = process_pubsub_message(data)
27
27
 
28
+ return process_chunker_data(message_data, metadata, vector_name)
29
+
30
+ def process_chunker_data(message_data, metadata, vector_name):
31
+
28
32
  if metadata:
29
33
  metadata["vector_name"] = vector_name
30
34
 
@@ -186,7 +186,7 @@ def handle_json_content_message(message_data: str, metadata: dict, vector_name:
186
186
 
187
187
  if the_content is None:
188
188
  log.info("No content found")
189
- return {"metadata": "No content found"}
189
+ return {"metadata": "No content found in 'page_content' JSON field"}
190
190
 
191
191
  docs = [Document(page_content=the_content, metadata=metadata)]
192
192
 
@@ -9,6 +9,7 @@ import uuid
9
9
  import sys
10
10
  import subprocess
11
11
  import json
12
+ import requests
12
13
 
13
14
  from rich import print
14
15
  from .sun_rich import console
@@ -19,14 +20,19 @@ from rich.text import Text
19
20
  from rich.table import Table
20
21
 
21
22
 
22
- def get_service_url(vac_name, project, region):
23
- agent_name = load_config_key("agent", vac_name, kind="vacConfig")
23
+ def get_service_url(vac_name, project, region, no_config=False):
24
+
25
+ if no_config:
26
+ agent_name = vac_name
27
+ else:
28
+ agent_name = load_config_key("agent", vac_name, kind="vacConfig")
29
+
24
30
  proxies = clean_proxy_list()
25
31
  if agent_name in proxies:
26
32
  port = proxies[agent_name]['port']
27
33
  url = f"http://127.0.0.1:{port}"
28
34
  else:
29
- print(f"No proxy found running for service: {agent_name} required for {vac_name} - attempting to connect")
35
+ console.print(f"No proxy found running for service: [bold orange]'{agent_name}[/bold orange] required for [bold orange]{vac_name}[/bold orange] - attempting to connect")
30
36
  url = start_proxy(agent_name, region, project)
31
37
 
32
38
  return url
@@ -143,28 +149,47 @@ def headless_mode(service_url, service_name, user_input, chat_history=None):
143
149
 
144
150
  return chat_history
145
151
 
152
+ def resolve_service_url(args, no_config=False):
153
+ """
154
+ no_config: some VACs do not have an entry in the config file e.g. chunker, embedder etc.
155
+ """
156
+ if args.url_override:
157
+
158
+ return args.url_override
159
+
160
+ if not args.no_proxy:
161
+ try:
162
+ service_url = get_service_url(args.vac_name, args.project, args.region, no_config=no_config)
163
+ except ValueError as e:
164
+ console.print(f"[bold red]ERROR: Could not start {args.vac_name} proxy URL: {str(e)}[/bold red]")
165
+ sys.exit(1)
166
+ else:
167
+ console.print(f"Not using a proxy, connecting directly to {service_url}")
168
+
169
+ agent_url = load_config_key("agent_url", args.vac_name, "vacConfig")
170
+ if agent_url:
171
+ console.print("Found agent_url within vacConfig: {agent_url}")
172
+
173
+ service_url = agent_url or get_cloud_run_service_url(args.project, args.region, args.vac_name)
174
+
175
+ return service_url
146
176
 
147
177
  def vac_command(args):
178
+
148
179
  if args.action == 'list':
180
+
149
181
  list_cloud_run_services(args.project, args.region)
182
+
150
183
  return
184
+
151
185
  elif args.action == 'get-url':
152
- service_url = get_cloud_run_service_url(args.project, args.region, args.vac_name)
153
- if service_url:
154
- console.print(service_url)
155
- return
156
- elif args.action == 'chat':
186
+ service_url = resolve_service_url(args)
187
+ console.print(service_url)
157
188
 
158
- if not args.no_proxy:
159
- try:
160
- service_url = get_service_url(args.vac_name, args.project, args.region)
161
- except ValueError as e:
162
- console.print(f"[bold red]ERROR: Could not start {args.vac_name} proxy URL: {str(e)}[/bold red]")
163
- sys.exit(1)
164
- else:
165
- service_url = get_cloud_run_service_url(args.project, args.region, args.vac_name)
166
- console.print(f"Not using a proxy, connecting directly to {service_url}")
189
+ return
167
190
 
191
+ elif args.action == 'chat':
192
+ service_url = resolve_service_url(args)
168
193
  agent_name = load_config_key("agent", args.vac_name, kind="vacConfig")
169
194
 
170
195
  if args.headless:
@@ -188,6 +213,30 @@ def vac_command(args):
188
213
 
189
214
  stop_proxy(agent_name, stop_local=False)
190
215
 
216
+ elif args.action == 'invoke':
217
+ service_url = resolve_service_url(args, no_config=True)
218
+ try:
219
+ json_data = json.loads(args.data)
220
+ except json.JSONDecodeError as err:
221
+ console.print(f"[bold red]ERROR: invalid JSON: {str(err)} [/bold red]")
222
+ sys.exit(1)
223
+
224
+ invoke_vac(service_url, json_data)
225
+
226
+ def invoke_vac(service_url, data):
227
+ try:
228
+ headers = {"Content-Type": "application/json"}
229
+ response = requests.post(service_url, headers=headers, data=json.dumps(data))
230
+ response.raise_for_status()
231
+
232
+ the_data = response.json()
233
+ console.print(the_data)
234
+
235
+ return the_data
236
+
237
+ except requests.exceptions.RequestException as e:
238
+ console.print(f"[bold red]ERROR: Failed to invoke VAC: {e}[/bold red]")
239
+
191
240
 
192
241
  def list_cloud_run_services(project, region):
193
242
  """
@@ -281,6 +330,8 @@ def setup_vac_subparser(subparsers):
281
330
  subparsers: The subparsers object from argparse.ArgumentParser().
282
331
  """
283
332
  vac_parser = subparsers.add_parser('vac', help='Interact with deployed VAC services.')
333
+ vac_parser.add_argument('--url_override', help='Override the VAC service URL.')
334
+ vac_parser.add_argument('--no-proxy', action='store_true', help='Do not use the proxy and connect directly to the VAC service.')
284
335
  vac_subparsers = vac_parser.add_subparsers(dest='action', help='VAC subcommands')
285
336
 
286
337
  # Subcommand for listing VAC services
@@ -296,6 +347,10 @@ def setup_vac_subparser(subparsers):
296
347
  chat_parser.add_argument('user_input', help='User input for the VAC service when in headless mode.', nargs='?', default=None)
297
348
  chat_parser.add_argument('--headless', action='store_true', help='Run in headless mode.')
298
349
  chat_parser.add_argument('--chat_history', help='Chat history for headless mode (as JSON string).', default=None)
299
- chat_parser.add_argument('--no-proxy', action='store_true', help='Do not use the proxy and connect directly to the VAC service.')
350
+
351
+ # Subcommand for invoking a VAC service directly
352
+ invoke_parser = vac_subparsers.add_parser('invoke', help='Invoke a VAC service directly with custom data.')
353
+ invoke_parser.add_argument('vac_name', help='Name of the VAC service.')
354
+ invoke_parser.add_argument('data', help='Data to send to the VAC service (as JSON string).')
300
355
 
301
356
  vac_parser.set_defaults(func=vac_command)
@@ -7,6 +7,8 @@ from .cli_init import setup_init_subparser
7
7
  from .merge_texts import setup_merge_text_subparser
8
8
  from .run_proxy import setup_proxy_subparser
9
9
  from .chat_vac import setup_vac_subparser
10
+ from .embedder import setup_embedder_subparser
11
+
10
12
  from ..utils.config import load_config_key
11
13
 
12
14
  from ..logging import log
@@ -64,6 +66,8 @@ def main(args=None):
64
66
  setup_proxy_subparser(subparsers)
65
67
  # vac command
66
68
  setup_vac_subparser(subparsers)
69
+ # embed command
70
+ setup_embedder_subparser(subparsers)
67
71
 
68
72
  args = parser.parse_args(args)
69
73
 
@@ -0,0 +1,148 @@
1
+ import json
2
+ import uuid
3
+ import base64
4
+ from datetime import datetime, timezone
5
+ from argparse import Namespace
6
+
7
+ from .sun_rich import console
8
+ from rich.progress import Progress
9
+
10
+ from .chat_vac import resolve_service_url, invoke_vac
11
+
12
+ def encode_data(vac, content, metadata=None, local_chunks=False):
13
+ # Current time in UTC
14
+ now_utc = datetime.now(timezone.utc)
15
+ formatted_time = now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
16
+
17
+ # Default metadata if none provided
18
+ default_metadata = {"vector_name": vac, "source": "sunholo-cli", "eventTime": formatted_time}
19
+
20
+ # Merge default metadata with provided metadata
21
+ if metadata:
22
+ if not isinstance(metadata, dict):
23
+ metadata = json.loads(metadata)
24
+ else:
25
+ metadata = {}
26
+
27
+ # Update metadata with default values if not present
28
+ metadata.update(default_metadata)
29
+
30
+ # Encode the content (URL)
31
+ if isinstance(content, str):
32
+ message_data = base64.b64encode(content.encode('utf-8')).decode('utf-8')
33
+ else:
34
+ raise ValueError(f"Unsupported content type: {type(content)}")
35
+
36
+ # Construct the message dictionary
37
+ messageId = str(uuid.uuid4())
38
+ message = {
39
+ "message": {
40
+ "data": message_data,
41
+ "messageId": messageId,
42
+ "publishTime": formatted_time,
43
+ "attributes": {
44
+ "namespace": vac,
45
+ "return_chunks": str(local_chunks).lower()
46
+ },
47
+ }
48
+ }
49
+
50
+ # Merge metadata with attributes
51
+ message["message"]["attributes"].update(metadata)
52
+
53
+ #console.print()
54
+ #console.print(f"Sending message: {messageId} with metadata:")
55
+ #console.print(f"{message['message']['attributes']}")
56
+
57
+ return message
58
+
59
+ def embed_command(args):
60
+ chunk_args = vars(args).copy()
61
+ embed_args = vars(args).copy()
62
+
63
+ console.rule("Sending data for chunking")
64
+
65
+ if args.chunk_override:
66
+ chunk_args["url_override"] = args.chunk_override
67
+ else:
68
+ chunk_args["vac_name"] = "chunker"
69
+ chunk_args["url_override"] = ""
70
+ chunk_args = Namespace(**chunk_args)
71
+ chunk_url = resolve_service_url(chunk_args, no_config=True)
72
+
73
+ json_data = encode_data(args.vac_name, args.data, args.metadata, args.local_chunks)
74
+
75
+ with console.status(f"[bold orange]Sending {args.data} to chunk via {chunk_url}[/bold orange]", spinner="star"):
76
+ chunk_res = invoke_vac(f"{chunk_url}/pubsub_to_store", json_data)
77
+
78
+ if not args.local_chunks:
79
+ console.rule(f"Chunks sent for processing in cloud: {chunk_res}")
80
+
81
+ return
82
+
83
+ console.rule("Processing chunks locally")
84
+
85
+ if args.embed_override:
86
+ embed_args["url_override"] = args.embed_override
87
+ else:
88
+ embed_args["vac_name"] = "embedder"
89
+ embed_args["url_override"] = ""
90
+ embed_args = Namespace(**embed_args)
91
+ embed_url = resolve_service_url(embed_args, no_config=True)
92
+
93
+ if not chunk_res:
94
+ console.print(f"[bold red]ERROR: Did not get any chunks from {chunk_url} for {json_data}")
95
+
96
+ return
97
+
98
+ chunks = chunk_res.get('chunks')
99
+ if not chunks:
100
+ console.print(f"[bold red]ERROR: No chunks found within json data: {str(chunk_res)} [/bold red]")
101
+
102
+ return
103
+
104
+ embeds = []
105
+ with Progress() as progress:
106
+ task = progress.add_task(f"Embedding [{len(chunks)}] chunks via {embed_url}", total=len(chunks))
107
+ for chunk in chunks:
108
+ progress.console.print(f"Working on chunk {chunk['metadata']}")
109
+
110
+ # do this async?
111
+ content = chunk.get("page_content")
112
+ now_utc = datetime.now(timezone.utc)
113
+ formatted_time = now_utc.strftime("%Y-%m-%dT%H:%M:%SZ")
114
+ chunk["metadata"]["eventTime"] = formatted_time
115
+ if not content:
116
+ progress.console.print("[bold red]No content chunk found, skipping.[/bold red]")
117
+ progress.advance(task)
118
+ continue
119
+ progress.console.print(f"Sending chunk length {len(content)} to embedder")
120
+ processed_chunk = encode_data(vac = args.vac_name,
121
+ content = json.dumps(chunk))
122
+
123
+ embed_res = invoke_vac(f"{embed_url}/embed_chunk", processed_chunk)
124
+ embeds.append(embed_res)
125
+ progress.advance(task)
126
+
127
+ console.rule("Embedding pipeline finished")
128
+
129
+ return embed_res
130
+
131
+
132
+ def setup_embedder_subparser(subparsers):
133
+ """
134
+ Sets up an argparse subparser for the 'embed' command.
135
+
136
+ Args:
137
+ subparsers: The subparsers object from argparse.ArgumentParser().
138
+ """
139
+ embed_parser = subparsers.add_parser('embed', help='Send data for embedding to a VAC vector store')
140
+ embed_parser.add_argument('--embed-override', help='Override the embed VAC service URL.')
141
+ embed_parser.add_argument('--chunk-override', help='Override the chunk VAC service URL.')
142
+ embed_parser.add_argument('--no-proxy', action='store_true', help='Do not use the proxy and connect directly to the VAC service.')
143
+ embed_parser.add_argument('-m', '--metadata', default=None, help='Metadata to send with the embedding (as JSON string).')
144
+ embed_parser.add_argument('--local-chunks', action='store_true', help='Whether to process chunks to embed locally, or via the cloud.')
145
+ embed_parser.add_argument('vac_name', help='VAC service to embed the data for')
146
+ embed_parser.add_argument('data', help='String content to send for embedding')
147
+
148
+ embed_parser.set_defaults(func=embed_command)
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.61.4
3
+ Version: 0.61.6
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.61.4.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.61.6.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -47,6 +47,7 @@ sunholo/cli/cli.py
47
47
  sunholo/cli/cli_init.py
48
48
  sunholo/cli/configs.py
49
49
  sunholo/cli/deploy.py
50
+ sunholo/cli/embedder.py
50
51
  sunholo/cli/merge_texts.py
51
52
  sunholo/cli/run_proxy.py
52
53
  sunholo/cli/sun_rich.py
@@ -9,7 +9,3 @@ def test_load_config():
9
9
  with patch("builtins.open", mock_open(read_data='{"key": "value"}'), create=True):
10
10
  result, _ = config.load_config("mock_file.json")
11
11
  assert result == expected_config
12
-
13
- def test_load_config_key():
14
- with pytest.raises(KeyError):
15
- config.load_config_key("non_existent_key", "mock_vector_name")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes