sunholo 0.75.1__py3-none-any.whl → 0.76.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,55 @@
1
+ # needs to be in minimal to check gcp
2
+ import os
3
+
4
+ import google.auth
5
+ from google.auth.transport import requests
6
+ from ..utils.gcp import is_running_on_gcp
7
+
8
+
9
+ from ..logging import log
10
+
11
+ def get_default_email():
12
+ if not refresh_credentials():
13
+ log.error("Could not refresh the credentials properly.")
14
+ return None
15
+ # https://stackoverflow.com/questions/64234214/how-to-generate-a-blob-signed-url-in-google-cloud-run
16
+
17
+ gcs_credentials, project_id = get_default_creds()
18
+
19
+ service_account_email = getattr(gcs_credentials, 'service_account_email', None)
20
+ # If you use a service account credential, you can use the embedded email
21
+ if not service_account_email:
22
+ service_account_email = os.getenv('GCS_MAIL_USER')
23
+ if not service_account_email:
24
+ log.error("Could not create the credentials for signed requests - no credentials.service_account_email or GCS_MAIL_USER with roles/iam.serviceAccountTokenCreator")
25
+
26
+ return None
27
+
28
+ log.info(f"Found default email: {service_account_email=} for {project_id=}")
29
+ return service_account_email
30
+
31
+ def get_default_creds():
32
+ gcs_credentials = None
33
+ project_id = None
34
+ gcs_credentials, project_id = google.auth.default()
35
+
36
+ return gcs_credentials, project_id
37
+
38
+ def refresh_credentials():
39
+ if not is_running_on_gcp():
40
+ log.debug("Not running on Google Cloud so no credentials available for GCS.")
41
+ return False
42
+
43
+ gcs_credentials, project_id = get_default_creds()
44
+
45
+ if not gcs_credentials.token or gcs_credentials.expired or not gcs_credentials.valid:
46
+ try:
47
+ gcs_credentials.refresh(requests.Request())
48
+
49
+ return True
50
+
51
+ except Exception as e:
52
+ log.error(f"Failed to refresh gcs credentials: {e}")
53
+
54
+ return False
55
+
@@ -0,0 +1 @@
1
+ from .event_grid import process_azure_blob_event
@@ -0,0 +1,69 @@
1
+ # process_azure_blob_event.py
2
+ from ..logging import log
3
+
4
+ def process_azure_blob_event(events: list) -> tuple:
5
+ """
6
+ Extracts message data and metadata from an Azure Blob Storage event.
7
+
8
+ Args:
9
+ events (list): The list of Azure Event Grid event data.
10
+
11
+ Returns:
12
+ tuple: A tuple containing the blob URL, attributes as metadata, and the vector name.
13
+
14
+ Example of Event Grid schema:
15
+ {
16
+ "topic": "/subscriptions/{subscription-id}/resourceGroups/{resource-group}/providers/Microsoft.Storage/storageAccounts/{storage-account}",
17
+ "subject": "/blobServices/default/containers/{container}/blobs/{blob}",
18
+ "eventType": "Microsoft.Storage.BlobCreated",
19
+ "eventTime": "2021-01-01T12:34:56.789Z",
20
+ "id": "event-id",
21
+ "data": {
22
+ "api": "PutBlob",
23
+ "clientRequestId": "client-request-id",
24
+ "requestId": "request-id",
25
+ "eTag": "etag",
26
+ "contentType": "application/octet-stream",
27
+ "contentLength": 524288,
28
+ "blobType": "BlockBlob",
29
+ "url": "https://{storage-account}.blob.core.windows.net/{container}/{blob}",
30
+ "sequencer": "0000000000000000000000000000000000000000000000000000000000000000",
31
+ "storageDiagnostics": {
32
+ "batchId": "batch-id"
33
+ }
34
+ },
35
+ "dataVersion": "",
36
+ "metadataVersion": "1"
37
+ }
38
+ """
39
+ storage_blob_created_event = "Microsoft.Storage.BlobCreated"
40
+
41
+ for event in events:
42
+ event_type = event['eventType']
43
+ data = event['data']
44
+
45
+ if event_type == storage_blob_created_event:
46
+ blob_url = data['url']
47
+ event_time = event['eventTime']
48
+ event_id = event['id']
49
+ subject = event['subject']
50
+ attributes = {
51
+ 'event_type': event_type,
52
+ 'event_time': event_time,
53
+ 'event_id': event_id,
54
+ 'subject': subject,
55
+ 'url': blob_url
56
+ }
57
+
58
+ vector_name = subject.split('/')[4] # Extracting the container name
59
+
60
+ log.info(f"Process Azure Blob Event was triggered by eventId {event_id} at {event_time}")
61
+ log.debug(f"Process Azure Blob Event data: {blob_url}")
62
+
63
+ # Check for a valid Azure Blob Storage event type
64
+ if event_type == "Microsoft.Storage.BlobCreated":
65
+ log.info(f"Got valid event from Azure Blob Storage: {blob_url}")
66
+
67
+ return blob_url, attributes, vector_name
68
+
69
+ return None, None, None
@@ -1 +1,3 @@
1
- from .data_to_embed_pubsub import data_to_embed_pubsub, direct_file_to_embed
1
+ from .pubsub import data_to_embed_pubsub
2
+ from .azure import data_to_embed_azure
3
+ from .process_chunker_data import direct_file_to_embed
@@ -0,0 +1,41 @@
1
+ # Copyright [2024] [Holosun ApS]
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ..logging import log
16
+ from ..azure import process_azure_blob_event
17
+ from .process_chunker_data import process_chunker_data
18
+
19
+ def data_to_embed_azure(events: list):
20
+ """Triggered from a message on an Azure Data Grid event.
21
+ Args:
22
+ data JSON
23
+ """
24
+ validation_event_type = "Microsoft.EventGrid.SubscriptionValidationEvent"
25
+ storage_blob_created_event = "Microsoft.Storage.BlobCreated"
26
+
27
+ for event in events:
28
+ event_type = event['eventType']
29
+ data = event['data']
30
+
31
+ if event_type == validation_event_type:
32
+ validation_code = data['validationCode']
33
+ log.info(f"Got SubscriptionValidation event data, validation code: {validation_code}, topic: {event['topic']}")
34
+
35
+ # Return the validation response
36
+ return {"ValidationResponse": validation_code}
37
+ elif event_type == storage_blob_created_event:
38
+
39
+ message_data, metadata, vector_name = process_azure_blob_event(events)
40
+
41
+ return process_chunker_data(message_data, metadata, vector_name)
@@ -173,10 +173,15 @@ def handle_http_message(message_data: str, metadata: dict, vector_name:str):
173
173
 
174
174
  return chunks, metadata
175
175
 
176
- def handle_json_content_message(message_data: str, metadata: dict, vector_name: str):
176
+ def handle_json_content_message(message_data: dict, metadata: dict, vector_name: str):
177
177
  log.info("No tailored message_data detected, processing message json")
178
178
  # Process message containing direct JSON content
179
- the_json = json.loads(message_data)
179
+ try:
180
+ the_json = json.loads(message_data)
181
+ except Exception as e:
182
+ log.error(f"Could not load message {message_data} as JSON - {str(e)}")
183
+ return None, {"metadata": f"Could not load message as JSON - {str(e)}"}
184
+
180
185
  the_metadata = the_json.get("metadata", {})
181
186
  metadata.update(the_metadata)
182
187
  the_content = the_json.get("page_content", None)
@@ -186,7 +191,7 @@ def handle_json_content_message(message_data: str, metadata: dict, vector_name:
186
191
 
187
192
  if the_content is None:
188
193
  log.info("No content found")
189
- return {"metadata": "No content found in 'page_content' JSON field"}
194
+ return None, {"metadata": "No content found in 'page_content' JSON field"}
190
195
 
191
196
  docs = [Document(page_content=the_content, metadata=metadata)]
192
197
 
@@ -1,58 +1,21 @@
1
- # Copyright [2024] [Holosun ApS]
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
1
  import pathlib
15
2
 
16
- from ..logging import log
17
- from ..pubsub import process_pubsub_message
18
- from .message_data import handle_gcs_message, handle_google_drive_message, handle_github_message, handle_http_message, handle_json_content_message
19
- from .publish import process_docs_chunks_vector_name
20
- from .splitter import chunk_doc_to_docs
3
+ from .message_data import (
4
+ handle_gcs_message,
5
+ handle_google_drive_message,
6
+ handle_github_message,
7
+ handle_http_message,
8
+ handle_json_content_message
9
+ )
21
10
 
11
+ from . import loaders
22
12
  from ..llamaindex.import_files import llamaindex_chunker_check
23
13
  from ..discovery_engine.chunker_handler import discovery_engine_chunker_check
14
+ from .publish import process_docs_chunks_vector_name
15
+ from .splitter import chunk_doc_to_docs
24
16
 
25
- from . import loaders
26
-
27
- def direct_file_to_embed(file_name: pathlib.Path, metadata: dict, vector_name: str):
28
- """
29
- Send direct files to chunking embed pipeline
30
-
31
-
32
-
33
- """
34
- log.info(f"Sending direct file upload {file_name} to loaders.read_file_to_documents {metadata}")
35
- docs = loaders.read_file_to_documents(file_name, metadata=metadata)
36
- if docs is None:
37
- log.warning(f"loaders.read_file_to_documents docs2 failed to load file {metadata}")
38
-
39
- return None
40
-
41
- chunks = chunk_doc_to_docs(docs, file_name.suffix, vector_name=vector_name)
42
-
43
- return format_chunk_return(chunks, metadata, vector_name)
44
-
45
-
46
-
47
- def data_to_embed_pubsub(data: dict):
48
- """Triggered from a message on a Cloud Pub/Sub topic.
49
- Args:
50
- data JSON
51
- """
52
-
53
- message_data, metadata, vector_name = process_pubsub_message(data)
54
17
 
55
- return process_chunker_data(message_data, metadata, vector_name)
18
+ from ..logging import log
56
19
 
57
20
  def process_chunker_data(message_data, metadata, vector_name):
58
21
 
@@ -95,7 +58,6 @@ def process_chunker_data(message_data, metadata, vector_name):
95
58
 
96
59
  return format_chunk_return(chunks, metadata, vector_name)
97
60
 
98
-
99
61
  def format_chunk_return(chunks, metadata, vector_name):
100
62
  # to be really sure
101
63
  if metadata:
@@ -110,8 +72,23 @@ def format_chunk_return(chunks, metadata, vector_name):
110
72
 
111
73
  return output_list
112
74
 
75
+ # returns None when not on GCP
113
76
  process_docs_chunks_vector_name(chunks, vector_name, metadata)
114
77
 
115
78
  return metadata
116
79
 
117
80
 
81
+ def direct_file_to_embed(file_name: pathlib.Path, metadata: dict, vector_name: str):
82
+ """
83
+ Send direct files to chunking embed pipeline
84
+ """
85
+ log.info(f"Sending direct file upload {file_name} to loaders.read_file_to_documents {metadata}")
86
+ docs = loaders.read_file_to_documents(file_name, metadata=metadata)
87
+ if docs is None:
88
+ log.warning(f"loaders.read_file_to_documents docs2 failed to load file {metadata}")
89
+
90
+ return None
91
+
92
+ chunks = chunk_doc_to_docs(docs, file_name.suffix, vector_name=vector_name)
93
+
94
+ return format_chunk_return(chunks, metadata, vector_name)
@@ -0,0 +1,31 @@
1
+ # Copyright [2024] [Holosun ApS]
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ..logging import log
16
+ from ..pubsub import process_pubsub_message
17
+ from .process_chunker_data import process_chunker_data
18
+
19
+ def data_to_embed_pubsub(data: dict):
20
+ """Triggered from a message on a Cloud Pub/Sub topic.
21
+ Args:
22
+ data JSON
23
+ """
24
+
25
+ message_data, metadata, vector_name = process_pubsub_message(data)
26
+
27
+ return process_chunker_data(message_data, metadata, vector_name)
28
+
29
+
30
+
31
+
sunholo/cli/cli.py CHANGED
@@ -9,6 +9,7 @@ from .run_proxy import setup_proxy_subparser
9
9
  from .chat_vac import setup_vac_subparser
10
10
  from .embedder import setup_embedder_subparser
11
11
  from .swagger import setup_swagger_subparser
12
+ from .vertex import setup_vertex_subparser
12
13
 
13
14
  from ..utils import ConfigManager
14
15
 
@@ -84,6 +85,8 @@ def main(args=None):
84
85
  setup_embedder_subparser(subparsers)
85
86
  # swagger generation
86
87
  setup_swagger_subparser(subparsers)
88
+ # vertex
89
+ setup_vertex_subparser(subparsers)
87
90
 
88
91
  #TODO: add database setup commands: alloydb and supabase
89
92
 
sunholo/cli/vertex.py ADDED
@@ -0,0 +1,46 @@
1
+ from ..vertex import VertexAIExtensions
2
+
3
+ from .sun_rich import console
4
+
5
+ def deploy_extension(args):
6
+ vex = VertexAIExtensions(args.project)
7
+ console.rule(f"Creating Vertex extension '{args.display_name}' within '{args.project}'")
8
+
9
+ vex.create_extension(
10
+ args.display_name,
11
+ description=args.description,
12
+ tool_example_file=args.tool_example_file,
13
+ open_api_file=args.open_api_file,
14
+ service_account=args.service_account,
15
+ project_id=args.project,
16
+ bucket_name=args.bucket_name
17
+ )
18
+ extensions = vex.list_extensions(args.project)
19
+ console.print(extensions)
20
+
21
+ def list_extensions(args):
22
+ vex = VertexAIExtensions()
23
+ extensions = vex.list_extensions(args.project)
24
+ console.print(extensions)
25
+
26
+ def setup_vertex_subparser(subparsers):
27
+ """
28
+ Sets up an argparse subparser for the 'vertex' command.
29
+
30
+ Args:
31
+ subparsers: The subparsers object to add the 'vertex' subcommand to.
32
+ """
33
+ vertex_parser = subparsers.add_parser('vertex', help='Work with Google Vertex AI')
34
+ vertex_subparsers = vertex_parser.add_subparsers(dest='subcommand', help='Vertex AI subcommands')
35
+
36
+ create_parser = vertex_subparsers.add_parser('create-extension', help='Create a Vertex AI extension')
37
+ create_parser.add_argument('--display_name', required=True, help='Display name of the extension')
38
+ create_parser.add_argument('--description', required=True, help='Description of the extension')
39
+ create_parser.add_argument('--tool_example_file', required=True, help='Tool example file path')
40
+ create_parser.add_argument('--open_api_file', required=True, help='OpenAPI file path')
41
+ create_parser.add_argument('--service_account', required=True, help='Service account email')
42
+ create_parser.add_argument('--bucket_name', help='Bucket name to upload files to. Uses EXTENSION_BUCKET env var if not specified')
43
+ create_parser.set_defaults(func=deploy_extension)
44
+
45
+ list_parser = vertex_subparsers.add_parser('list-extensions', help='List all Vertex AI extensions')
46
+ list_parser.set_defaults(func=list_extensions)
@@ -102,6 +102,9 @@ def check_discovery_engine_in_memory(vector_name):
102
102
  def discovery_engine_chunker_check(message_data, metadata, vector_name):
103
103
  # discovery engine handles its own chunking/embedding
104
104
  memories = load_config_key("memory", vector_name=vector_name, kind="vacConfig")
105
+ if not memories:
106
+ return None
107
+
105
108
  total_memories = len(memories)
106
109
  llama = None
107
110
  if check_discovery_engine_in_memory(vector_name):
@@ -3,8 +3,6 @@ from urllib.parse import quote
3
3
  from datetime import datetime, timedelta
4
4
 
5
5
  # needs to be in minimal to check gcp
6
- import google.auth
7
- from google.auth.transport import requests
8
6
  from google.auth.exceptions import RefreshError
9
7
 
10
8
  try:
@@ -14,6 +12,7 @@ except ImportError:
14
12
 
15
13
  from ..logging import log
16
14
  from ..utils.gcp import is_running_on_gcp
15
+ from ..auth.refresh import refresh_credentials, get_default_creds, get_default_email
17
16
  from io import BytesIO
18
17
  try:
19
18
  from PIL import Image
@@ -77,47 +76,22 @@ def get_bytes_from_gcs(gs_uri):
77
76
 
78
77
  if is_running_on_gcp():
79
78
  # Perform a refresh request to get the access token of the current credentials (Else, it's None)
80
- gcs_credentials, project_id = google.auth.default()
79
+ gcs_credentials, project_id = get_default_creds()
81
80
  # Prepare global variables for client reuse
82
81
  if storage:
83
82
  gcs_client = storage.Client()
84
83
 
85
- def refresh_credentials():
86
- if not is_running_on_gcp():
87
- log.debug("Not running on Google Cloud so no credentials available for GCS.")
88
- return False
89
- if not gcs_credentials.token or gcs_credentials.expired or not gcs_credentials.valid:
90
- try:
91
- gcs_credentials.refresh(requests.Request())
92
- except Exception as e:
93
- log.error(f"Failed to refresh gcs credentials: {e}")
94
- return False
95
- return True
96
-
97
- refresh_credentials()
98
-
99
84
  def get_bucket(bucket_name):
100
85
  if bucket_name not in gcs_bucket_cache:
101
86
  gcs_bucket_cache[bucket_name] = gcs_client.get_bucket(bucket_name)
102
87
  return gcs_bucket_cache[bucket_name]
103
88
 
104
89
  def sign_gcs_url(bucket_name:str, object_name:str, expiry_secs = 86400):
105
- if not refresh_credentials():
106
- log.error("Could not refresh the credentials properly.")
107
- return None
108
- # https://stackoverflow.com/questions/64234214/how-to-generate-a-blob-signed-url-in-google-cloud-run
90
+
91
+ service_account_email = get_default_email()
109
92
 
110
93
  expires = datetime.now() + timedelta(seconds=expiry_secs)
111
94
 
112
- service_account_email = getattr(gcs_credentials, 'service_account_email', None)
113
- # If you use a service account credential, you can use the embedded email
114
- if not service_account_email:
115
- service_account_email = os.getenv('GCS_MAIL_USER')
116
- if service_account_email is None:
117
- log.error("For local testing must set a GCS_MAIL_USER to sign GCS URLs")
118
- log.error("Could not create the credentials for signed requests - no credentials.service_account_email or GCS_MAIL_USER with roles/iam.serviceAccountTokenCreator")
119
- return None
120
-
121
95
  try:
122
96
  bucket = get_bucket(bucket_name)
123
97
  blob = bucket.blob(object_name)
@@ -131,7 +131,10 @@ def check_llamaindex_in_memory(vector_name):
131
131
 
132
132
  def llamaindex_chunker_check(message_data, metadata, vector_name):
133
133
  # llamaindex handles its own chunking/embedding
134
- memories = ConfigManager(vector_name).vacConfig("memory")
134
+ memories = load_memories(vector_name)
135
+ if not memories:
136
+ return None
137
+
135
138
  total_memories = len(memories)
136
139
  llama = None
137
140
  if check_llamaindex_in_memory(vector_name):
@@ -50,7 +50,7 @@ class VertexAIExtensions:
50
50
  operation_params = operation_params)
51
51
  ```
52
52
  """
53
- def __init__(self):
53
+ def __init__(self, project_id=None):
54
54
  if extensions is None:
55
55
  raise ImportError("VertexAIExtensions needs vertexai.previewextensions to be installed. Install via `pip install sunholo[gcp]`")
56
56
 
@@ -69,9 +69,13 @@ class VertexAIExtensions:
69
69
  self.tool_use_examples = None
70
70
  self.manifest = {}
71
71
  self.created_extensions = []
72
+ self.bucket_name = os.getenv('EXTENSIONS_BUCKET')
73
+ init_vertex(location=self.location, project_id=project_id)
72
74
 
73
- def list_extensions(self):
74
- the_list = extensions.Extension.list()
75
+ def list_extensions(self, project_id:str=None):
76
+ project_id = project_id or get_gcp_project()
77
+ log.info(f"Creating extension within {project_id=}")
78
+ the_list = extensions.Extension.list(project=project_id)
75
79
 
76
80
  extensions_list = []
77
81
  for ext in the_list:
@@ -94,18 +98,21 @@ class VertexAIExtensions:
94
98
  validate(spec_dict)
95
99
 
96
100
  def upload_to_gcs(self, filename):
97
- if not os.getenv('EXTENSIONS_BUCKET'):
98
- raise ValueError('Please specify env var EXTENSIONS_BUCKET for location to upload openapi spec')
101
+ if not self.bucket_name:
102
+ raise ValueError('Please specify bucket_name or env var EXTENSIONS_BUCKET for location to upload openapi spec')
99
103
 
100
104
  from ..gcs.add_file import add_file_to_gcs
101
105
  file_base = os.path.basename(filename)
102
106
 
103
- self_uri = add_file_to_gcs(file_base, bucket_filepath=file_base)
107
+ self_uri = add_file_to_gcs(file_base, bucket_filepath=file_base, bucket_name=self.bucket_name)
104
108
 
105
109
  return self_uri
106
110
 
107
111
  def upload_openapi_file(self, filename: str):
108
112
  self.validate_openapi(filename)
113
+ if not self.bucket_name:
114
+ raise ValueError('Please specify env var EXTENSIONS_BUCKET for location to upload openapi spec')
115
+
109
116
 
110
117
  self.openapi_file_gcs = self.upload_to_gcs(filename)
111
118
 
@@ -193,15 +200,23 @@ class VertexAIExtensions:
193
200
  open_api_file: str = None,
194
201
  tool_example_file: str = None,
195
202
  runtime_config: dict = None,
196
- service_account: str = None):
203
+ service_account: str = None,
204
+ project_id: str = None,
205
+ bucket_name: str = None):
197
206
 
198
- project_id = get_gcp_project()
207
+ project_id = project_id or get_gcp_project()
208
+ log.info(f"Creating extension within {project_id=}")
199
209
  extension_name = f"projects/{project_id}/locations/us-central1/extensions/{validate_extension_id(display_name)}"
200
210
 
201
- listed_extensions = self.list_extensions()
211
+ if bucket_name:
212
+ log.info(f"Setting extension bucket name to {bucket_name}")
213
+ self.bucket_name = bucket_name
214
+
215
+ listed_extensions = self.list_extensions(project_id)
216
+ log.info(f"Listing extensions:\n {listed_extensions}")
202
217
  for ext in listed_extensions:
203
- if ext.get('resource_name') == extension_name:
204
- raise NameError(f"resouce_name {extension_name} already exists. Delete it or rename your new extension")
218
+ if ext.get('display_name') == display_name:
219
+ raise NameError(f"display_name {display_name} already exists. Delete it or rename your new extension")
205
220
 
206
221
  if open_api_file:
207
222
  self.upload_openapi_file(open_api_file)
@@ -233,8 +248,8 @@ class VertexAIExtensions:
233
248
 
234
249
  return extension.resource_name
235
250
 
236
- def execute_extension(self, operation_id: str, operation_params: dict, extension_id: str=None):
237
- init_vertex(location=self.location)
251
+ def execute_extension(self, operation_id: str, operation_params: dict, extension_id: str=None, project_id: str=None):
252
+ init_vertex(location=self.location, project_id=project_id)
238
253
 
239
254
  if not extension_id:
240
255
  extension_name = self.created_extension.resource_name
@@ -243,7 +258,7 @@ class VertexAIExtensions:
243
258
  else:
244
259
  extension_id = str(extension_id)
245
260
  if not extension_id.startswith("projects/"):
246
- project_id = get_gcp_project()
261
+ project_id = project_id or get_gcp_project()
247
262
  extension_name = f"projects/{project_id}/locations/{self.location}/extensions/{extension_id}"
248
263
  else:
249
264
  extension_name = extension_id
sunholo/vertex/init.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from ..logging import log
2
2
  from ..utils.gcp_project import get_gcp_project
3
+ from ..auth.refresh import get_default_email
3
4
  import os
4
5
 
5
6
  def init_genai():
@@ -19,7 +20,7 @@ def init_genai():
19
20
 
20
21
  genai.configure(api_key=GOOGLE_API_KEY)
21
22
 
22
- def init_vertex(gcp_config=None, location="eu"):
23
+ def init_vertex(gcp_config=None, location="eu", project_id=None):
23
24
  """
24
25
  Initializes the Vertex AI environment using the provided Google Cloud Platform configuration.
25
26
 
@@ -62,6 +63,10 @@ def init_vertex(gcp_config=None, location="eu"):
62
63
  project_id = gcp_config.get('project_id')
63
64
  location = gcp_config.get('location') or location
64
65
  else:
65
- project_id = get_gcp_project()
66
+ project_id = project_id or get_gcp_project()
67
+
68
+ log.info(f"Auth with email: {get_default_email()} in {project_id}")
66
69
 
67
70
  vertexai.init(project=project_id, location=location)
71
+
72
+
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.75.1
3
+ Version: 0.76.3
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.75.1.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.76.3.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -25,6 +25,7 @@ Requires-Dist: langchain-experimental >0.0.60
25
25
  Requires-Dist: langchain-community
26
26
  Provides-Extra: all
27
27
  Requires-Dist: asyncpg ; extra == 'all'
28
+ Requires-Dist: azure-storage-blob ; extra == 'all'
28
29
  Requires-Dist: fastapi ; extra == 'all'
29
30
  Requires-Dist: flask ; extra == 'all'
30
31
  Requires-Dist: google-auth ; extra == 'all'
@@ -71,6 +72,8 @@ Requires-Dist: tiktoken ; extra == 'all'
71
72
  Requires-Dist: unstructured[local-inference] ==0.14.9 ; extra == 'all'
72
73
  Provides-Extra: anthropic
73
74
  Requires-Dist: langchain-anthropic >=0.1.13 ; extra == 'anthropic'
75
+ Provides-Extra: azure
76
+ Requires-Dist: azure-storage-blob ; extra == 'azure'
74
77
  Provides-Extra: cli
75
78
  Requires-Dist: jsonschema >=4.21.1 ; extra == 'cli'
76
79
  Requires-Dist: rich ; extra == 'cli'
@@ -19,23 +19,28 @@ sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,3
19
19
  sunholo/archive/archive.py,sha256=C-UhG5x-XtZ8VheQp92IYJqgD0V3NFQjniqlit94t18,1197
20
20
  sunholo/auth/__init__.py,sha256=Y4Wpd6m0d3R7U7Ser51drO0Eg7VrfSS2VphZxRgtih8,70
21
21
  sunholo/auth/gcloud.py,sha256=PdbwkuTdRi4RKBmgG9uwsReegqC4VG15_tw5uzmA7Fs,298
22
+ sunholo/auth/refresh.py,sha256=uOdT7oQRVl0YsUP__NXj6PdUdLyXFSv2ylwF283esuw,1831
22
23
  sunholo/auth/run.py,sha256=SG53ToQJ8hyjdN4634osfvDEUv5gJU6dlHe4nGwMMYU,2612
24
+ sunholo/azure/__init__.py,sha256=S1WQ5jndzNgzhSBh9UpX_yw7hRVm3hCzkAWNxUdK4dA,48
25
+ sunholo/azure/event_grid.py,sha256=Gky7D5a-xxMzbcst_wOFfcI8AH5qOzWbKbt5iqOTr6U,2606
23
26
  sunholo/bots/__init__.py,sha256=EMFd7e2z68l6pzYOnkzHbLd2xJRvxTKFRNCTuhZ8hIw,130
24
27
  sunholo/bots/discord.py,sha256=cCFae5K1BCa6JVkWGLh_iZ9qFO1JpXb6K4eJrlDfEro,2442
25
28
  sunholo/bots/github_webhook.py,sha256=5pQPRLM_wxxcILVaIzUDV8Kt7Arcm2dL1r1kMMHA524,9629
26
29
  sunholo/bots/webapp.py,sha256=EIMxdAJ_xtufwJmvnn7N_Fb_1hZ9DjhJ0Kf_hp02vEU,1926
27
- sunholo/chunker/__init__.py,sha256=yWYwpejyYxDpZv1joTrFMsh2SWAkd0z7a1VKtmOfMhA,77
28
- sunholo/chunker/data_to_embed_pubsub.py,sha256=-bXtm3Tn6YczUWL9kSAc6OWdD6dDY2rKhgpD-90H6ms,4203
30
+ sunholo/chunker/__init__.py,sha256=A5canS0XPgisHu0OZ7sVdILgEHGzgH9kpkDi4oBwLZk,135
31
+ sunholo/chunker/azure.py,sha256=mfY1-g_J21d1Ubx0Pt89KGk5PBGGjpbhRUALfP2jIHw,1666
29
32
  sunholo/chunker/doc_handling.py,sha256=rIyknpzDyj5A0u_DqSQVD_CXLRNZPOU6TCL4bhCdjOI,8563
30
33
  sunholo/chunker/images.py,sha256=Xmh1vwHrVhoXm5iH2dhCc52O8YgdzE8KrDSdL-pGnp8,1861
31
34
  sunholo/chunker/loaders.py,sha256=xiToUVgPz2ZzcqpUAq7aNP3PTenb_rBUAFzu0JPycIg,10268
32
- sunholo/chunker/message_data.py,sha256=X6aA4yX5aGN_mEvsDPWvdYRqqn5GO1BU9QhT9w5A0ec,6789
35
+ sunholo/chunker/message_data.py,sha256=fDiwO_0HmuIq_1tZFK98ZOff6NAYCydWF8mjJ4Rx3Kk,7000
33
36
  sunholo/chunker/pdfs.py,sha256=daCZ1xjn1YvxlifIyxskWNpLJLe-Q9D_Jq12MWx3tZo,2473
37
+ sunholo/chunker/process_chunker_data.py,sha256=lomCmWPzFf_-zHr2-iRMhZ_cj1b2q04UsG6ynoQCJ08,3333
34
38
  sunholo/chunker/publish.py,sha256=tiO615A2uo_ZjzdFDzNH1PL_1kJeLMUQwLJ4w67rNIc,2932
39
+ sunholo/chunker/pubsub.py,sha256=XgLAuOFNDSqKEBvzRa0TSylZdPecRVHMp0nmmQ_OVco,1005
35
40
  sunholo/chunker/splitter.py,sha256=jtGfi_ZdhVdyFhfw0e4ynEpmwIyrxQtV63OituYWy6o,6729
36
41
  sunholo/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
42
  sunholo/cli/chat_vac.py,sha256=MjwGJQUJOkHV4vLAlhyYVQ02JoI5pE7zaLSSaBfcTco,23019
38
- sunholo/cli/cli.py,sha256=u70fcSQzQx2iPvE23SVCVYRFabmZ-XtgEd6vHcrABi0,3725
43
+ sunholo/cli/cli.py,sha256=3ZMcsR1VLCdrsfm0zGBQ9TKqO5qkOrtZ6-iVNmr6f_8,3820
39
44
  sunholo/cli/cli_init.py,sha256=JMZ9AX2cPDZ-_mv3adiv2ToFVNyRPtjk9Biszl1kiR0,2358
40
45
  sunholo/cli/configs.py,sha256=QUM9DvKOdZmEQRM5uI3Nh887T0YDiSMr7O240zTLqws,4546
41
46
  sunholo/cli/deploy.py,sha256=zxdwUsRTRMC8U5vyRv0JiKBLFn84Ug_Tc88-_h9hJSs,1609
@@ -44,6 +49,7 @@ sunholo/cli/merge_texts.py,sha256=U9vdMwKmcPoc6iPOWX5MKSxn49dNGbNzVLw8ui5PhEU,18
44
49
  sunholo/cli/run_proxy.py,sha256=OeR12ZfnasbJ-smBZQznmGufoDa4iNjUN9FCFo5JxSc,11520
45
50
  sunholo/cli/sun_rich.py,sha256=UpMqeJ0C8i0pkue1AHnnyyX0bFJ9zZeJ7HBR6yhuA8A,54
46
51
  sunholo/cli/swagger.py,sha256=absYKAU-7Yd2eiVNUY-g_WLl2zJfeRUNdWQ0oH8M_HM,1564
52
+ sunholo/cli/vertex.py,sha256=yN1ezeiweV1UIBZYvDYgtyMS0dXH374rYMxVVHid9UY,2101
47
53
  sunholo/components/__init__.py,sha256=IDoylb74zFKo6NIS3RQqUl0PDFBGVxM1dfUmO7OJ44U,176
48
54
  sunholo/components/llm.py,sha256=T4we3tGmqUj4tPwxQr9M6AXv_BALqZV_dRSvINan-oU,10374
49
55
  sunholo/components/retriever.py,sha256=BFUw_6turT3CQJZWv_uXylmH5fHdb0gKfKJrQ_j6MGY,6533
@@ -62,14 +68,14 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
62
68
  sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
63
69
  sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
64
70
  sunholo/discovery_engine/__init__.py,sha256=qUKWzuHApDRJIUoynukVdGRBEq8eC9T7l9a3bWckgI0,59
65
- sunholo/discovery_engine/chunker_handler.py,sha256=H1HHDqWMCkchJER1_oU9TOLxqf2PygiMO6CL3uKZP64,4563
71
+ sunholo/discovery_engine/chunker_handler.py,sha256=fDqvXeXr58s6TB75MMIGKKEg42T21ZIeDGuDfJN-vAw,4609
66
72
  sunholo/discovery_engine/create_new.py,sha256=7oZG78T6lW0EspRzlo7-qRyXFSuFxDn2dfSAVEaqlqY,978
67
73
  sunholo/discovery_engine/discovery_engine_client.py,sha256=YYsFeaW41l8jmWCruQnYxJGKEYBZ7dduTBDhdxI63hQ,17719
68
74
  sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
69
75
  sunholo/embedder/embed_chunk.py,sha256=d_dIzeNF630Q0Ar-u1hxos60s0tLIImJccAvuo_LTIw,6814
70
76
  sunholo/gcs/__init__.py,sha256=SZvbsMFDko40sIRHTHppA37IijvJTae54vrhooEF5-4,90
71
77
  sunholo/gcs/add_file.py,sha256=vWRjxuHBQkrPNrr9tRSFGT0N_nVIw120mqDEHiaHwuQ,7115
72
- sunholo/gcs/download_url.py,sha256=Kg9EdPnc---YSUTAZEdzJeITjDtQSLMYwb4uiU9LhIQ,6440
78
+ sunholo/gcs/download_url.py,sha256=iCIPESi2viQ-TcCINpbJXxUt7XJFFpF0KiVgSA6zFis,5228
73
79
  sunholo/gcs/metadata.py,sha256=C9sMPsHsq1ETetdQCqB3EBs3Kws8b8QHS9L7ei_v5aw,891
74
80
  sunholo/invoke/__init__.py,sha256=Dxivd9cU92X4v2JAZet4f7L2RJ5l_30rt9t2NiD-iLA,55
75
81
  sunholo/invoke/invoke_vac_utils.py,sha256=0JkCZDBEkRImzuB-nf70dF75t0WKtgA9G4TdaQJUB08,5240
@@ -79,7 +85,7 @@ sunholo/langfuse/prompts.py,sha256=EkbzSw9Jr05ULMsRDoGOp-frbtCZpnvdYSJEYNpzfX8,1
79
85
  sunholo/llamaindex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
80
86
  sunholo/llamaindex/generate.py,sha256=l1Picr-hVwkmAUD7XmTCa63qY9ERliFHQXwyX3BqB2Q,686
81
87
  sunholo/llamaindex/get_files.py,sha256=6rhXCDqQ_lrIapISQ_OYQDjiSATXvS_9m3qq53-oIl0,781
82
- sunholo/llamaindex/import_files.py,sha256=Iy_wkZCUSyrue_tAEHgnYaKDgg3-5GVygokHn3kd134,5747
88
+ sunholo/llamaindex/import_files.py,sha256=tzwOZdoJ6vHDQ-Rc2lSr3XNfm-ddmOxOITNzFLNzmoY,5773
83
89
  sunholo/lookup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
90
  sunholo/lookup/model_lookup.yaml,sha256=O7o-jP53MLA06C8pI-ILwERShO-xf6z_258wtpZBv6A,739
85
91
  sunholo/patches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -114,13 +120,13 @@ sunholo/utils/timedelta.py,sha256=BbLabEx7_rbErj_YbNM0MBcaFN76DC4PTe4zD2ucezg,49
114
120
  sunholo/utils/user_ids.py,sha256=SQd5_H7FE7vcTZp9AQuQDWBXd4FEEd7TeVMQe1H4Ny8,292
115
121
  sunholo/utils/version.py,sha256=P1QAJQdZfT2cMqdTSmXmcxrD2PssMPEGM-WI6083Fck,237
116
122
  sunholo/vertex/__init__.py,sha256=XH7FUKxdIgN9H2iDcWxL3sRnVHC3297G24RqEn4Ob0Y,240
117
- sunholo/vertex/extensions_class.py,sha256=vjYOaqU-KQsqkDsJSWBmydzMA-kY-PqZ0j_GUb8ZlNY,16884
118
- sunholo/vertex/init.py,sha256=-w7b9GKsyJnAJpYHYz6_zBUtmeJeLXlEkgOfwoe4DEI,2715
123
+ sunholo/vertex/extensions_class.py,sha256=cVpr0AbbBQV9WWZ9_X7S52aXclXNxvHywHDoBjDTxl8,17802
124
+ sunholo/vertex/init.py,sha256=aLdNjrX3bUPfnWRhKUg5KUxSu0Qnq2YvuFNsgml4QEY,2866
119
125
  sunholo/vertex/memory_tools.py,sha256=pomHrDKqvY8MZxfUqoEwhdlpCvSGP6KmFJMVKOimXjs,6842
120
126
  sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
121
- sunholo-0.75.1.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
122
- sunholo-0.75.1.dist-info/METADATA,sha256=GDyIzrBHNdyewLXAJ0_PcLkgMlp6scIbI5sEL8izPrY,7010
123
- sunholo-0.75.1.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
124
- sunholo-0.75.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
125
- sunholo-0.75.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
126
- sunholo-0.75.1.dist-info/RECORD,,
127
+ sunholo-0.76.3.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
128
+ sunholo-0.76.3.dist-info/METADATA,sha256=SHLQ84yDXYXlm3Mb1sCqMUZtwXxzsd2UzgEwrd-fU_4,7136
129
+ sunholo-0.76.3.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
130
+ sunholo-0.76.3.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
131
+ sunholo-0.76.3.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
132
+ sunholo-0.76.3.dist-info/RECORD,,