sunholo 0.75.1__tar.gz → 0.76.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. {sunholo-0.75.1 → sunholo-0.76.2}/PKG-INFO +5 -2
  2. {sunholo-0.75.1 → sunholo-0.76.2}/setup.py +5 -1
  3. sunholo-0.76.2/sunholo/azure/__init__.py +1 -0
  4. sunholo-0.76.2/sunholo/azure/event_grid.py +69 -0
  5. sunholo-0.76.2/sunholo/chunker/__init__.py +3 -0
  6. sunholo-0.76.2/sunholo/chunker/azure.py +41 -0
  7. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/chunker/message_data.py +8 -3
  8. sunholo-0.75.1/sunholo/chunker/data_to_embed_pubsub.py → sunholo-0.76.2/sunholo/chunker/process_chunker_data.py +26 -49
  9. sunholo-0.76.2/sunholo/chunker/pubsub.py +31 -0
  10. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/cli/cli.py +3 -0
  11. sunholo-0.76.2/sunholo/cli/vertex.py +46 -0
  12. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/discovery_engine/chunker_handler.py +3 -0
  13. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/llamaindex/import_files.py +4 -1
  14. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/vertex/extensions_class.py +26 -12
  15. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo.egg-info/PKG-INFO +5 -2
  16. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo.egg-info/SOURCES.txt +6 -2
  17. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo.egg-info/requires.txt +4 -0
  18. sunholo-0.75.1/sunholo/chunker/__init__.py +0 -1
  19. sunholo-0.75.1/tests/test_chunker.py +0 -23
  20. {sunholo-0.75.1 → sunholo-0.76.2}/LICENSE.txt +0 -0
  21. {sunholo-0.75.1 → sunholo-0.76.2}/MANIFEST.in +0 -0
  22. {sunholo-0.75.1 → sunholo-0.76.2}/README.md +0 -0
  23. {sunholo-0.75.1 → sunholo-0.76.2}/setup.cfg +0 -0
  24. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/__init__.py +0 -0
  25. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/__init__.py +0 -0
  26. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/chat_history.py +0 -0
  27. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/dispatch_to_qa.py +0 -0
  28. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/fastapi/__init__.py +0 -0
  29. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/fastapi/base.py +0 -0
  30. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/fastapi/qna_routes.py +0 -0
  31. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/flask/__init__.py +0 -0
  32. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/flask/base.py +0 -0
  33. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/flask/qna_routes.py +0 -0
  34. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/flask/vac_routes.py +0 -0
  35. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/langserve.py +0 -0
  36. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/pubsub.py +0 -0
  37. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/route.py +0 -0
  38. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/special_commands.py +0 -0
  39. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/agents/swagger.py +0 -0
  40. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/archive/__init__.py +0 -0
  41. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/archive/archive.py +0 -0
  42. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/auth/__init__.py +0 -0
  43. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/auth/gcloud.py +0 -0
  44. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/auth/run.py +0 -0
  45. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/bots/__init__.py +0 -0
  46. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/bots/discord.py +0 -0
  47. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/bots/github_webhook.py +0 -0
  48. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/bots/webapp.py +0 -0
  49. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/chunker/doc_handling.py +0 -0
  50. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/chunker/images.py +0 -0
  51. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/chunker/loaders.py +0 -0
  52. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/chunker/pdfs.py +0 -0
  53. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/chunker/publish.py +0 -0
  54. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/chunker/splitter.py +0 -0
  55. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/cli/__init__.py +0 -0
  56. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/cli/chat_vac.py +0 -0
  57. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/cli/cli_init.py +0 -0
  58. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/cli/configs.py +0 -0
  59. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/cli/deploy.py +0 -0
  60. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/cli/embedder.py +0 -0
  61. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/cli/merge_texts.py +0 -0
  62. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/cli/run_proxy.py +0 -0
  63. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/cli/sun_rich.py +0 -0
  64. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/cli/swagger.py +0 -0
  65. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/components/__init__.py +0 -0
  66. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/components/llm.py +0 -0
  67. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/components/retriever.py +0 -0
  68. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/components/vectorstore.py +0 -0
  69. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/__init__.py +0 -0
  70. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/alloydb.py +0 -0
  71. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/alloydb_client.py +0 -0
  72. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/database.py +0 -0
  73. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/lancedb.py +0 -0
  74. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/sql/sb/create_function.sql +0 -0
  75. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/sql/sb/create_function_time.sql +0 -0
  76. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/sql/sb/create_table.sql +0 -0
  77. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
  78. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/sql/sb/return_sources.sql +0 -0
  79. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/sql/sb/setup.sql +0 -0
  80. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/static_dbs.py +0 -0
  81. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/database/uuid.py +0 -0
  82. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/discovery_engine/__init__.py +0 -0
  83. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/discovery_engine/create_new.py +0 -0
  84. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/discovery_engine/discovery_engine_client.py +0 -0
  85. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/embedder/__init__.py +0 -0
  86. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/embedder/embed_chunk.py +0 -0
  87. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/gcs/__init__.py +0 -0
  88. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/gcs/add_file.py +0 -0
  89. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/gcs/download_url.py +0 -0
  90. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/gcs/metadata.py +0 -0
  91. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/invoke/__init__.py +0 -0
  92. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/invoke/invoke_vac_utils.py +0 -0
  93. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/langfuse/__init__.py +0 -0
  94. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/langfuse/callback.py +0 -0
  95. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/langfuse/prompts.py +0 -0
  96. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/llamaindex/__init__.py +0 -0
  97. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/llamaindex/generate.py +0 -0
  98. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/llamaindex/get_files.py +0 -0
  99. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/logging.py +0 -0
  100. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/lookup/__init__.py +0 -0
  101. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/lookup/model_lookup.yaml +0 -0
  102. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/patches/__init__.py +0 -0
  103. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/patches/langchain/__init__.py +0 -0
  104. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/patches/langchain/lancedb.py +0 -0
  105. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/patches/langchain/vertexai.py +0 -0
  106. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/pubsub/__init__.py +0 -0
  107. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/pubsub/process_pubsub.py +0 -0
  108. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/pubsub/pubsub_manager.py +0 -0
  109. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/qna/__init__.py +0 -0
  110. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/qna/parsers.py +0 -0
  111. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/qna/retry.py +0 -0
  112. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/streaming/__init__.py +0 -0
  113. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/streaming/content_buffer.py +0 -0
  114. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/streaming/langserve.py +0 -0
  115. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/streaming/stream_lookup.py +0 -0
  116. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/streaming/streaming.py +0 -0
  117. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/summarise/__init__.py +0 -0
  118. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/summarise/summarise.py +0 -0
  119. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/tools/__init__.py +0 -0
  120. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/tools/web_browser.py +0 -0
  121. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/utils/__init__.py +0 -0
  122. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/utils/api_key.py +0 -0
  123. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/utils/big_context.py +0 -0
  124. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/utils/config.py +0 -0
  125. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/utils/config_class.py +0 -0
  126. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/utils/config_schema.py +0 -0
  127. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/utils/gcp.py +0 -0
  128. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/utils/gcp_project.py +0 -0
  129. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/utils/parsers.py +0 -0
  130. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/utils/timedelta.py +0 -0
  131. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/utils/user_ids.py +0 -0
  132. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/utils/version.py +0 -0
  133. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/vertex/__init__.py +0 -0
  134. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/vertex/init.py +0 -0
  135. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/vertex/memory_tools.py +0 -0
  136. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo/vertex/safety.py +0 -0
  137. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo.egg-info/dependency_links.txt +0 -0
  138. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo.egg-info/entry_points.txt +0 -0
  139. {sunholo-0.75.1 → sunholo-0.76.2}/sunholo.egg-info/top_level.txt +0 -0
  140. {sunholo-0.75.1 → sunholo-0.76.2}/tests/test_chat_history.py +0 -0
  141. {sunholo-0.75.1 → sunholo-0.76.2}/tests/test_config.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.75.1
3
+ Version: 0.76.2
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.75.1.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.76.2.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -25,6 +25,7 @@ Requires-Dist: langchain_experimental>0.0.60
25
25
  Requires-Dist: langchain-community
26
26
  Provides-Extra: all
27
27
  Requires-Dist: asyncpg; extra == "all"
28
+ Requires-Dist: azure-storage-blob; extra == "all"
28
29
  Requires-Dist: fastapi; extra == "all"
29
30
  Requires-Dist: flask; extra == "all"
30
31
  Requires-Dist: google-auth; extra == "all"
@@ -69,6 +70,8 @@ Requires-Dist: tabulate; extra == "all"
69
70
  Requires-Dist: tantivy; extra == "all"
70
71
  Requires-Dist: tiktoken; extra == "all"
71
72
  Requires-Dist: unstructured[local-inference]==0.14.9; extra == "all"
73
+ Provides-Extra: azure
74
+ Requires-Dist: azure-storage-blob; extra == "azure"
72
75
  Provides-Extra: cli
73
76
  Requires-Dist: jsonschema>=4.21.1; extra == "cli"
74
77
  Requires-Dist: rich; extra == "cli"
@@ -1,7 +1,7 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
3
  # Define your base version
4
- version = '0.75.1'
4
+ version = '0.76.2'
5
5
 
6
6
  setup(
7
7
  name='sunholo',
@@ -40,6 +40,7 @@ setup(
40
40
  # Define optional dependencies with feature names
41
41
  'all': [
42
42
  "asyncpg",
43
+ "azure-storage-blob",
43
44
  "fastapi",
44
45
  "flask",
45
46
  "google-auth",
@@ -86,6 +87,9 @@ setup(
86
87
  "unstructured[local-inference]==0.14.9",
87
88
 
88
89
  ],
90
+ 'azure': [
91
+ "azure-storage-blob"
92
+ ],
89
93
  'cli': [
90
94
  "jsonschema>=4.21.1",
91
95
  "rich"
@@ -0,0 +1 @@
1
+ from .event_grid import process_azure_blob_event
@@ -0,0 +1,69 @@
1
+ # process_azure_blob_event.py
2
+ from ..logging import log
3
+
4
+ def process_azure_blob_event(events: list) -> tuple:
5
+ """
6
+ Extracts message data and metadata from an Azure Blob Storage event.
7
+
8
+ Args:
9
+ events (list): The list of Azure Event Grid event data.
10
+
11
+ Returns:
12
+ tuple: A tuple containing the blob URL, attributes as metadata, and the vector name.
13
+
14
+ Example of Event Grid schema:
15
+ {
16
+ "topic": "/subscriptions/{subscription-id}/resourceGroups/{resource-group}/providers/Microsoft.Storage/storageAccounts/{storage-account}",
17
+ "subject": "/blobServices/default/containers/{container}/blobs/{blob}",
18
+ "eventType": "Microsoft.Storage.BlobCreated",
19
+ "eventTime": "2021-01-01T12:34:56.789Z",
20
+ "id": "event-id",
21
+ "data": {
22
+ "api": "PutBlob",
23
+ "clientRequestId": "client-request-id",
24
+ "requestId": "request-id",
25
+ "eTag": "etag",
26
+ "contentType": "application/octet-stream",
27
+ "contentLength": 524288,
28
+ "blobType": "BlockBlob",
29
+ "url": "https://{storage-account}.blob.core.windows.net/{container}/{blob}",
30
+ "sequencer": "0000000000000000000000000000000000000000000000000000000000000000",
31
+ "storageDiagnostics": {
32
+ "batchId": "batch-id"
33
+ }
34
+ },
35
+ "dataVersion": "",
36
+ "metadataVersion": "1"
37
+ }
38
+ """
39
+ storage_blob_created_event = "Microsoft.Storage.BlobCreated"
40
+
41
+ for event in events:
42
+ event_type = event['eventType']
43
+ data = event['data']
44
+
45
+ if event_type == storage_blob_created_event:
46
+ blob_url = data['url']
47
+ event_time = event['eventTime']
48
+ event_id = event['id']
49
+ subject = event['subject']
50
+ attributes = {
51
+ 'event_type': event_type,
52
+ 'event_time': event_time,
53
+ 'event_id': event_id,
54
+ 'subject': subject,
55
+ 'url': blob_url
56
+ }
57
+
58
+ vector_name = subject.split('/')[4] # Extracting the container name
59
+
60
+ log.info(f"Process Azure Blob Event was triggered by eventId {event_id} at {event_time}")
61
+ log.debug(f"Process Azure Blob Event data: {blob_url}")
62
+
63
+ # Check for a valid Azure Blob Storage event type
64
+ if event_type == "Microsoft.Storage.BlobCreated":
65
+ log.info(f"Got valid event from Azure Blob Storage: {blob_url}")
66
+
67
+ return blob_url, attributes, vector_name
68
+
69
+ return None, None, None
@@ -0,0 +1,3 @@
1
+ from .pubsub import data_to_embed_pubsub
2
+ from .azure import data_to_embed_azure
3
+ from .process_chunker_data import direct_file_to_embed
@@ -0,0 +1,41 @@
1
+ # Copyright [2024] [Holosun ApS]
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ..logging import log
16
+ from ..azure import process_azure_blob_event
17
+ from .process_chunker_data import process_chunker_data
18
+
19
+ def data_to_embed_azure(events: list):
20
+ """Triggered from a message on an Azure Data Grid event.
21
+ Args:
22
+ data JSON
23
+ """
24
+ validation_event_type = "Microsoft.EventGrid.SubscriptionValidationEvent"
25
+ storage_blob_created_event = "Microsoft.Storage.BlobCreated"
26
+
27
+ for event in events:
28
+ event_type = event['eventType']
29
+ data = event['data']
30
+
31
+ if event_type == validation_event_type:
32
+ validation_code = data['validationCode']
33
+ log.info(f"Got SubscriptionValidation event data, validation code: {validation_code}, topic: {event['topic']}")
34
+
35
+ # Return the validation response
36
+ return {"ValidationResponse": validation_code}
37
+ elif event_type == storage_blob_created_event:
38
+
39
+ message_data, metadata, vector_name = process_azure_blob_event(events)
40
+
41
+ return process_chunker_data(message_data, metadata, vector_name)
@@ -173,10 +173,15 @@ def handle_http_message(message_data: str, metadata: dict, vector_name:str):
173
173
 
174
174
  return chunks, metadata
175
175
 
176
- def handle_json_content_message(message_data: str, metadata: dict, vector_name: str):
176
+ def handle_json_content_message(message_data: dict, metadata: dict, vector_name: str):
177
177
  log.info("No tailored message_data detected, processing message json")
178
178
  # Process message containing direct JSON content
179
- the_json = json.loads(message_data)
179
+ try:
180
+ the_json = json.loads(message_data)
181
+ except Exception as e:
182
+ log.error(f"Could not load message {message_data} as JSON - {str(e)}")
183
+ return None, {"metadata": f"Could not load message as JSON - {str(e)}"}
184
+
180
185
  the_metadata = the_json.get("metadata", {})
181
186
  metadata.update(the_metadata)
182
187
  the_content = the_json.get("page_content", None)
@@ -186,7 +191,7 @@ def handle_json_content_message(message_data: str, metadata: dict, vector_name:
186
191
 
187
192
  if the_content is None:
188
193
  log.info("No content found")
189
- return {"metadata": "No content found in 'page_content' JSON field"}
194
+ return None, {"metadata": "No content found in 'page_content' JSON field"}
190
195
 
191
196
  docs = [Document(page_content=the_content, metadata=metadata)]
192
197
 
@@ -1,58 +1,21 @@
1
- # Copyright [2024] [Holosun ApS]
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
1
  import pathlib
15
2
 
16
- from ..logging import log
17
- from ..pubsub import process_pubsub_message
18
- from .message_data import handle_gcs_message, handle_google_drive_message, handle_github_message, handle_http_message, handle_json_content_message
19
- from .publish import process_docs_chunks_vector_name
20
- from .splitter import chunk_doc_to_docs
3
+ from .message_data import (
4
+ handle_gcs_message,
5
+ handle_google_drive_message,
6
+ handle_github_message,
7
+ handle_http_message,
8
+ handle_json_content_message
9
+ )
21
10
 
11
+ from . import loaders
22
12
  from ..llamaindex.import_files import llamaindex_chunker_check
23
13
  from ..discovery_engine.chunker_handler import discovery_engine_chunker_check
14
+ from .publish import process_docs_chunks_vector_name
15
+ from .splitter import chunk_doc_to_docs
24
16
 
25
- from . import loaders
26
-
27
- def direct_file_to_embed(file_name: pathlib.Path, metadata: dict, vector_name: str):
28
- """
29
- Send direct files to chunking embed pipeline
30
-
31
-
32
-
33
- """
34
- log.info(f"Sending direct file upload {file_name} to loaders.read_file_to_documents {metadata}")
35
- docs = loaders.read_file_to_documents(file_name, metadata=metadata)
36
- if docs is None:
37
- log.warning(f"loaders.read_file_to_documents docs2 failed to load file {metadata}")
38
-
39
- return None
40
-
41
- chunks = chunk_doc_to_docs(docs, file_name.suffix, vector_name=vector_name)
42
-
43
- return format_chunk_return(chunks, metadata, vector_name)
44
-
45
-
46
-
47
- def data_to_embed_pubsub(data: dict):
48
- """Triggered from a message on a Cloud Pub/Sub topic.
49
- Args:
50
- data JSON
51
- """
52
-
53
- message_data, metadata, vector_name = process_pubsub_message(data)
54
17
 
55
- return process_chunker_data(message_data, metadata, vector_name)
18
+ from ..logging import log
56
19
 
57
20
  def process_chunker_data(message_data, metadata, vector_name):
58
21
 
@@ -95,7 +58,6 @@ def process_chunker_data(message_data, metadata, vector_name):
95
58
 
96
59
  return format_chunk_return(chunks, metadata, vector_name)
97
60
 
98
-
99
61
  def format_chunk_return(chunks, metadata, vector_name):
100
62
  # to be really sure
101
63
  if metadata:
@@ -110,8 +72,23 @@ def format_chunk_return(chunks, metadata, vector_name):
110
72
 
111
73
  return output_list
112
74
 
75
+ # returns None when not on GCP
113
76
  process_docs_chunks_vector_name(chunks, vector_name, metadata)
114
77
 
115
78
  return metadata
116
79
 
117
80
 
81
+ def direct_file_to_embed(file_name: pathlib.Path, metadata: dict, vector_name: str):
82
+ """
83
+ Send direct files to chunking embed pipeline
84
+ """
85
+ log.info(f"Sending direct file upload {file_name} to loaders.read_file_to_documents {metadata}")
86
+ docs = loaders.read_file_to_documents(file_name, metadata=metadata)
87
+ if docs is None:
88
+ log.warning(f"loaders.read_file_to_documents docs2 failed to load file {metadata}")
89
+
90
+ return None
91
+
92
+ chunks = chunk_doc_to_docs(docs, file_name.suffix, vector_name=vector_name)
93
+
94
+ return format_chunk_return(chunks, metadata, vector_name)
@@ -0,0 +1,31 @@
1
+ # Copyright [2024] [Holosun ApS]
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ..logging import log
16
+ from ..pubsub import process_pubsub_message
17
+ from .process_chunker_data import process_chunker_data
18
+
19
+ def data_to_embed_pubsub(data: dict):
20
+ """Triggered from a message on a Cloud Pub/Sub topic.
21
+ Args:
22
+ data JSON
23
+ """
24
+
25
+ message_data, metadata, vector_name = process_pubsub_message(data)
26
+
27
+ return process_chunker_data(message_data, metadata, vector_name)
28
+
29
+
30
+
31
+
@@ -9,6 +9,7 @@ from .run_proxy import setup_proxy_subparser
9
9
  from .chat_vac import setup_vac_subparser
10
10
  from .embedder import setup_embedder_subparser
11
11
  from .swagger import setup_swagger_subparser
12
+ from .vertex import setup_vertex_subparser
12
13
 
13
14
  from ..utils import ConfigManager
14
15
 
@@ -84,6 +85,8 @@ def main(args=None):
84
85
  setup_embedder_subparser(subparsers)
85
86
  # swagger generation
86
87
  setup_swagger_subparser(subparsers)
88
+ # vertex
89
+ setup_vertex_subparser(subparsers)
87
90
 
88
91
  #TODO: add database setup commands: alloydb and supabase
89
92
 
@@ -0,0 +1,46 @@
1
+ from ..vertex import VertexAIExtensions
2
+
3
+ from .sun_rich import console
4
+
5
+ def deploy_extension(args):
6
+ vex = VertexAIExtensions()
7
+ console.rule(f"Creating Vertex extension '{args.display_name}'")
8
+
9
+ vex.create_extension(
10
+ args.display_name,
11
+ description=args.description,
12
+ tool_example_file=args.tool_example_file,
13
+ open_api_file=args.open_api_file,
14
+ service_account=args.service_account,
15
+ project_id=args.project,
16
+ bucket_name=args.bucket_name
17
+ )
18
+ extensions = vex.list_extensions(args.project)
19
+ console.print(extensions)
20
+
21
+ def list_extensions(args):
22
+ vex = VertexAIExtensions()
23
+ extensions = vex.list_extensions(args.project)
24
+ console.print(extensions)
25
+
26
+ def setup_vertex_subparser(subparsers):
27
+ """
28
+ Sets up an argparse subparser for the 'vertex' command.
29
+
30
+ Args:
31
+ subparsers: The subparsers object to add the 'vertex' subcommand to.
32
+ """
33
+ vertex_parser = subparsers.add_parser('vertex', help='Work with Google Vertex AI')
34
+ vertex_subparsers = vertex_parser.add_subparsers(dest='subcommand', help='Vertex AI subcommands')
35
+
36
+ create_parser = vertex_subparsers.add_parser('create-extension', help='Create a Vertex AI extension')
37
+ create_parser.add_argument('--display_name', required=True, help='Display name of the extension')
38
+ create_parser.add_argument('--description', required=True, help='Description of the extension')
39
+ create_parser.add_argument('--tool_example_file', required=True, help='Tool example file path')
40
+ create_parser.add_argument('--open_api_file', required=True, help='OpenAPI file path')
41
+ create_parser.add_argument('--service_account', required=True, help='Service account email')
42
+ create_parser.add_argument('--bucket_name', help='Bucket name to upload files to. Uses EXTENSION_BUCKET env var if not specified')
43
+ create_parser.set_defaults(func=deploy_extension)
44
+
45
+ list_parser = vertex_subparsers.add_parser('list-extensions', help='List all Vertex AI extensions')
46
+ list_parser.set_defaults(func=list_extensions)
@@ -102,6 +102,9 @@ def check_discovery_engine_in_memory(vector_name):
102
102
  def discovery_engine_chunker_check(message_data, metadata, vector_name):
103
103
  # discovery engine handles its own chunking/embedding
104
104
  memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
105
+ if not memories:
106
+ return None
107
+
105
108
  total_memories = len(memories)
106
109
  llama = None
107
110
  if check_discovery_engine_in_memory(vector_name):
@@ -131,7 +131,10 @@ def check_llamaindex_in_memory(vector_name):
131
131
 
132
132
  def llamaindex_chunker_check(message_data, metadata, vector_name):
133
133
  # llamaindex handles its own chunking/embedding
134
- memories = ConfigManager(vector_name).vacConfig("memory")
134
+ memories = load_memories(vector_name)
135
+ if not memories:
136
+ return None
137
+
135
138
  total_memories = len(memories)
136
139
  llama = None
137
140
  if check_llamaindex_in_memory(vector_name):
@@ -69,9 +69,12 @@ class VertexAIExtensions:
69
69
  self.tool_use_examples = None
70
70
  self.manifest = {}
71
71
  self.created_extensions = []
72
+ self.bucket_name = os.getenv('EXTENSIONS_BUCKET')
72
73
 
73
- def list_extensions(self):
74
- the_list = extensions.Extension.list()
74
+ def list_extensions(self, project_id:str=None):
75
+ project_id = project_id or get_gcp_project()
76
+ log.info(f"Creating extension within {project_id=}")
77
+ the_list = extensions.Extension.list(project=project_id)
75
78
 
76
79
  extensions_list = []
77
80
  for ext in the_list:
@@ -94,20 +97,23 @@ class VertexAIExtensions:
94
97
  validate(spec_dict)
95
98
 
96
99
  def upload_to_gcs(self, filename):
97
- if not os.getenv('EXTENSIONS_BUCKET'):
98
- raise ValueError('Please specify env var EXTENSIONS_BUCKET for location to upload openapi spec')
100
+ if not self.bucket_name:
101
+ raise ValueError('Please specify bucket_name or env var EXTENSIONS_BUCKET for location to upload openapi spec')
99
102
 
100
103
  from ..gcs.add_file import add_file_to_gcs
101
104
  file_base = os.path.basename(filename)
102
105
 
103
- self_uri = add_file_to_gcs(file_base, bucket_filepath=file_base)
106
+ self_uri = add_file_to_gcs(file_base, bucket_filepath=file_base, bucket_name=self.bucket_name)
104
107
 
105
108
  return self_uri
106
109
 
107
110
  def upload_openapi_file(self, filename: str):
108
111
  self.validate_openapi(filename)
112
+ if not self.bucket_name:
113
+ raise ValueError('Please specify env var EXTENSIONS_BUCKET for location to upload openapi spec')
114
+
109
115
 
110
- self.openapi_file_gcs = self.upload_to_gcs(filename)
116
+ self.openapi_file_gcs = self.upload_to_gcs(filename, bucket_name=self.bucket_name)
111
117
 
112
118
  def load_tool_use_examples(self, filename: str):
113
119
  import yaml
@@ -193,15 +199,23 @@ class VertexAIExtensions:
193
199
  open_api_file: str = None,
194
200
  tool_example_file: str = None,
195
201
  runtime_config: dict = None,
196
- service_account: str = None):
202
+ service_account: str = None,
203
+ project_id: str = None,
204
+ bucket_name: str = None):
197
205
 
198
- project_id = get_gcp_project()
206
+ project_id = project_id or get_gcp_project()
207
+ log.info(f"Creating extension within {project_id=}")
199
208
  extension_name = f"projects/{project_id}/locations/us-central1/extensions/{validate_extension_id(display_name)}"
200
209
 
210
+ if bucket_name:
211
+ log.info(f"Setting extension bucket name to {bucket_name}")
212
+ self.bucket_name = bucket_name
213
+
201
214
  listed_extensions = self.list_extensions()
215
+ log.info(f"Listing extensions:\n {listed_extensions}")
202
216
  for ext in listed_extensions:
203
- if ext.get('resource_name') == extension_name:
204
- raise NameError(f"resouce_name {extension_name} already exists. Delete it or rename your new extension")
217
+ if ext.get('display_name') == display_name:
218
+ raise NameError(f"display_name {display_name} already exists. Delete it or rename your new extension")
205
219
 
206
220
  if open_api_file:
207
221
  self.upload_openapi_file(open_api_file)
@@ -233,7 +247,7 @@ class VertexAIExtensions:
233
247
 
234
248
  return extension.resource_name
235
249
 
236
- def execute_extension(self, operation_id: str, operation_params: dict, extension_id: str=None):
250
+ def execute_extension(self, operation_id: str, operation_params: dict, extension_id: str=None, project_id: str=None):
237
251
  init_vertex(location=self.location)
238
252
 
239
253
  if not extension_id:
@@ -243,7 +257,7 @@ class VertexAIExtensions:
243
257
  else:
244
258
  extension_id = str(extension_id)
245
259
  if not extension_id.startswith("projects/"):
246
- project_id = get_gcp_project()
260
+ project_id = project_id or get_gcp_project()
247
261
  extension_name = f"projects/{project_id}/locations/{self.location}/extensions/{extension_id}"
248
262
  else:
249
263
  extension_name = extension_id
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.75.1
3
+ Version: 0.76.2
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.75.1.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.76.2.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -25,6 +25,7 @@ Requires-Dist: langchain_experimental>0.0.60
25
25
  Requires-Dist: langchain-community
26
26
  Provides-Extra: all
27
27
  Requires-Dist: asyncpg; extra == "all"
28
+ Requires-Dist: azure-storage-blob; extra == "all"
28
29
  Requires-Dist: fastapi; extra == "all"
29
30
  Requires-Dist: flask; extra == "all"
30
31
  Requires-Dist: google-auth; extra == "all"
@@ -69,6 +70,8 @@ Requires-Dist: tabulate; extra == "all"
69
70
  Requires-Dist: tantivy; extra == "all"
70
71
  Requires-Dist: tiktoken; extra == "all"
71
72
  Requires-Dist: unstructured[local-inference]==0.14.9; extra == "all"
73
+ Provides-Extra: azure
74
+ Requires-Dist: azure-storage-blob; extra == "azure"
72
75
  Provides-Extra: cli
73
76
  Requires-Dist: jsonschema>=4.21.1; extra == "cli"
74
77
  Requires-Dist: rich; extra == "cli"
@@ -31,18 +31,22 @@ sunholo/archive/archive.py
31
31
  sunholo/auth/__init__.py
32
32
  sunholo/auth/gcloud.py
33
33
  sunholo/auth/run.py
34
+ sunholo/azure/__init__.py
35
+ sunholo/azure/event_grid.py
34
36
  sunholo/bots/__init__.py
35
37
  sunholo/bots/discord.py
36
38
  sunholo/bots/github_webhook.py
37
39
  sunholo/bots/webapp.py
38
40
  sunholo/chunker/__init__.py
39
- sunholo/chunker/data_to_embed_pubsub.py
41
+ sunholo/chunker/azure.py
40
42
  sunholo/chunker/doc_handling.py
41
43
  sunholo/chunker/images.py
42
44
  sunholo/chunker/loaders.py
43
45
  sunholo/chunker/message_data.py
44
46
  sunholo/chunker/pdfs.py
47
+ sunholo/chunker/process_chunker_data.py
45
48
  sunholo/chunker/publish.py
49
+ sunholo/chunker/pubsub.py
46
50
  sunholo/chunker/splitter.py
47
51
  sunholo/cli/__init__.py
48
52
  sunholo/cli/chat_vac.py
@@ -55,6 +59,7 @@ sunholo/cli/merge_texts.py
55
59
  sunholo/cli/run_proxy.py
56
60
  sunholo/cli/sun_rich.py
57
61
  sunholo/cli/swagger.py
62
+ sunholo/cli/vertex.py
58
63
  sunholo/components/__init__.py
59
64
  sunholo/components/llm.py
60
65
  sunholo/components/retriever.py
@@ -130,5 +135,4 @@ sunholo/vertex/init.py
130
135
  sunholo/vertex/memory_tools.py
131
136
  sunholo/vertex/safety.py
132
137
  tests/test_chat_history.py
133
- tests/test_chunker.py
134
138
  tests/test_config.py
@@ -6,6 +6,7 @@ langchain-community
6
6
 
7
7
  [all]
8
8
  asyncpg
9
+ azure-storage-blob
9
10
  fastapi
10
11
  flask
11
12
  google-auth
@@ -54,6 +55,9 @@ unstructured[local-inference]==0.14.9
54
55
  [anthropic]
55
56
  langchain-anthropic>=0.1.13
56
57
 
58
+ [azure]
59
+ azure-storage-blob
60
+
57
61
  [cli]
58
62
  jsonschema>=4.21.1
59
63
  rich
@@ -1 +0,0 @@
1
- from .data_to_embed_pubsub import data_to_embed_pubsub, direct_file_to_embed
@@ -1,23 +0,0 @@
1
- import pytest
2
- from unittest.mock import patch, MagicMock
3
- from sunholo.chunker.data_to_embed_pubsub import data_to_embed_pubsub
4
-
5
- # Mock external calls within the function
6
- @patch('sunholo.chunker.data_to_embed_pubsub.process_pubsub_message', return_value=({}, {}, 'test_vector'))
7
- @patch('sunholo.chunker.data_to_embed_pubsub.process_chunker_data', return_value='processed_data')
8
- def test_data_to_embed_pubsub(mock_process_chunker_data, mock_process_pubsub_message):
9
- # Test the function with various inputs including edge cases
10
- assert data_to_embed_pubsub({}) == 'processed_data'
11
- assert data_to_embed_pubsub({'key': 'value'}) == 'processed_data'
12
- mock_process_pubsub_message.assert_called()
13
- mock_process_chunker_data.assert_called()
14
-
15
- # Ensure tests are self-contained and do not require external dependencies
16
- mock_process_pubsub_message = MagicMock(return_value=({}, {}, 'test_vector'))
17
- mock_process_chunker_data = MagicMock(return_value='processed_data')
18
- assert data_to_embed_pubsub({'key': 'value'}) == 'processed_data'
19
-
20
- # Validate the function's output against expected results
21
- expected_output = 'processed_data'
22
- actual_output = data_to_embed_pubsub({'key': 'value'})
23
- assert actual_output == expected_output, f"Expected {expected_output}, got {actual_output}"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes