trustgraph-vertexai 1.3.19__tar.gz → 1.8.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/PKG-INFO +2 -2
- {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/pyproject.toml +1 -1
- trustgraph_vertexai-1.8.4/trustgraph/model/text_completion/vertexai/llm.py +396 -0
- trustgraph_vertexai-1.8.4/trustgraph/vertexai_version.py +1 -0
- {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/PKG-INFO +2 -2
- {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/requires.txt +1 -1
- trustgraph_vertexai-1.3.19/trustgraph/model/text_completion/vertexai/llm.py +0 -241
- trustgraph_vertexai-1.3.19/trustgraph/vertexai_version.py +0 -1
- {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/README.md +0 -0
- {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/setup.cfg +0 -0
- {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph/model/text_completion/vertexai/__init__.py +0 -0
- {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph/model/text_completion/vertexai/__main__.py +0 -0
- {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/SOURCES.txt +0 -0
- {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/dependency_links.txt +0 -0
- {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/entry_points.txt +0 -0
- {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: trustgraph-vertexai
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.4
|
|
4
4
|
Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
|
|
5
5
|
Author-email: "trustgraph.ai" <security@trustgraph.ai>
|
|
6
6
|
Project-URL: Homepage, https://github.com/trustgraph-ai/trustgraph
|
|
@@ -8,7 +8,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
8
8
|
Classifier: Operating System :: OS Independent
|
|
9
9
|
Requires-Python: >=3.8
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
|
-
Requires-Dist: trustgraph-base<1.
|
|
11
|
+
Requires-Dist: trustgraph-base<1.9,>=1.8
|
|
12
12
|
Requires-Dist: pulsar-client
|
|
13
13
|
Requires-Dist: google-cloud-aiplatform
|
|
14
14
|
Requires-Dist: prometheus-client
|
|
@@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
|
|
|
10
10
|
readme = "README.md"
|
|
11
11
|
requires-python = ">=3.8"
|
|
12
12
|
dependencies = [
|
|
13
|
-
"trustgraph-base>=1.
|
|
13
|
+
"trustgraph-base>=1.8,<1.9",
|
|
14
14
|
"pulsar-client",
|
|
15
15
|
"google-cloud-aiplatform",
|
|
16
16
|
"prometheus-client",
|
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Simple LLM service, performs text prompt completion using VertexAI on
|
|
3
|
+
Google Cloud. Input is prompt, output is response.
|
|
4
|
+
Supports both Google's Gemini models and Anthropic's Claude models.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
#
|
|
8
|
+
# Somewhat perplexed by the Google Cloud SDK choices. We're going off this
|
|
9
|
+
# one, which uses the google-cloud-aiplatform library:
|
|
10
|
+
# https://cloud.google.com/python/docs/reference/vertexai/1.94.0
|
|
11
|
+
# It seems it is possible to invoke VertexAI from the google-genai
|
|
12
|
+
# SDK too:
|
|
13
|
+
# https://googleapis.github.io/python-genai/genai.html#module-genai.client
|
|
14
|
+
# That would make this code look very much like the GoogleAIStudio
|
|
15
|
+
# code. And maybe not reliant on the google-cloud-aiplatform library?
|
|
16
|
+
#
|
|
17
|
+
# This module's imports bring in a lot of libraries.
|
|
18
|
+
|
|
19
|
+
from google.oauth2 import service_account
|
|
20
|
+
import google.auth
|
|
21
|
+
import google.api_core.exceptions
|
|
22
|
+
import vertexai
|
|
23
|
+
import logging
|
|
24
|
+
|
|
25
|
+
# Why is preview here?
|
|
26
|
+
from vertexai.generative_models import (
|
|
27
|
+
Content, FunctionDeclaration, GenerativeModel, GenerationConfig,
|
|
28
|
+
HarmCategory, HarmBlockThreshold, Part, Tool, SafetySetting,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Added for Anthropic model support
|
|
32
|
+
from anthropic import AnthropicVertex, RateLimitError
|
|
33
|
+
|
|
34
|
+
from .... exceptions import TooManyRequests
|
|
35
|
+
from .... base import LlmService, LlmResult, LlmChunk
|
|
36
|
+
|
|
37
|
+
# Module logger
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
default_ident = "text-completion"
|
|
41
|
+
|
|
42
|
+
default_model = 'gemini-1.5-flash-001'
|
|
43
|
+
default_region = 'us-central1'
|
|
44
|
+
default_temperature = 0.0
|
|
45
|
+
default_max_output = 8192
|
|
46
|
+
default_private_key = "private.json"
|
|
47
|
+
|
|
48
|
+
class Processor(LlmService):
|
|
49
|
+
|
|
50
|
+
def __init__(self, **params):
|
|
51
|
+
|
|
52
|
+
region = params.get("region", default_region)
|
|
53
|
+
model = params.get("model", default_model)
|
|
54
|
+
private_key = params.get("private_key", default_private_key)
|
|
55
|
+
temperature = params.get("temperature", default_temperature)
|
|
56
|
+
max_output = params.get("max_output", default_max_output)
|
|
57
|
+
|
|
58
|
+
if private_key is None:
|
|
59
|
+
logger.warning("Private key file not specified, using Application Default Credentials")
|
|
60
|
+
|
|
61
|
+
super(Processor, self).__init__(**params)
|
|
62
|
+
|
|
63
|
+
# Store default model and configuration parameters
|
|
64
|
+
self.default_model = model
|
|
65
|
+
self.region = region
|
|
66
|
+
self.temperature = temperature
|
|
67
|
+
self.max_output = max_output
|
|
68
|
+
self.private_key = private_key
|
|
69
|
+
|
|
70
|
+
# Model client caches
|
|
71
|
+
self.model_clients = {} # Cache for model instances
|
|
72
|
+
self.generation_configs = {} # Cache for generation configs (Gemini only)
|
|
73
|
+
self.anthropic_client = None # Single Anthropic client (handles multiple models)
|
|
74
|
+
|
|
75
|
+
# Shared parameters for both model types
|
|
76
|
+
self.api_params = {
|
|
77
|
+
"temperature": temperature,
|
|
78
|
+
"top_p": 1.0,
|
|
79
|
+
"top_k": 32,
|
|
80
|
+
"max_output_tokens": max_output,
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
logger.info("Initializing VertexAI...")
|
|
84
|
+
|
|
85
|
+
# Unified credential and project ID loading
|
|
86
|
+
if private_key:
|
|
87
|
+
credentials = (
|
|
88
|
+
service_account.Credentials.from_service_account_file(
|
|
89
|
+
private_key
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
project_id = credentials.project_id
|
|
93
|
+
else:
|
|
94
|
+
credentials, project_id = google.auth.default()
|
|
95
|
+
|
|
96
|
+
if not project_id:
|
|
97
|
+
raise RuntimeError(
|
|
98
|
+
"Could not determine Google Cloud project ID. "
|
|
99
|
+
"Ensure it's set in your environment or service account."
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Store credentials and project info for later use
|
|
103
|
+
self.credentials = credentials
|
|
104
|
+
self.project_id = project_id
|
|
105
|
+
|
|
106
|
+
# Initialize Vertex AI SDK for Gemini models
|
|
107
|
+
init_kwargs = {'location': region, 'project': project_id}
|
|
108
|
+
if credentials and private_key: # Pass credentials only if from a file
|
|
109
|
+
init_kwargs['credentials'] = credentials
|
|
110
|
+
|
|
111
|
+
vertexai.init(**init_kwargs)
|
|
112
|
+
|
|
113
|
+
# Pre-initialize Anthropic client if needed (single client handles all Claude models)
|
|
114
|
+
if 'claude' in self.default_model.lower():
|
|
115
|
+
self._get_anthropic_client()
|
|
116
|
+
|
|
117
|
+
# Safety settings for Gemini models
|
|
118
|
+
block_level = HarmBlockThreshold.BLOCK_ONLY_HIGH
|
|
119
|
+
self.safety_settings = [
|
|
120
|
+
SafetySetting(
|
|
121
|
+
category = HarmCategory.HARM_CATEGORY_HARASSMENT,
|
|
122
|
+
threshold = block_level,
|
|
123
|
+
),
|
|
124
|
+
SafetySetting(
|
|
125
|
+
category = HarmCategory.HARM_CATEGORY_HATE_SPEECH,
|
|
126
|
+
threshold = block_level,
|
|
127
|
+
),
|
|
128
|
+
SafetySetting(
|
|
129
|
+
category = HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
|
|
130
|
+
threshold = block_level,
|
|
131
|
+
),
|
|
132
|
+
SafetySetting(
|
|
133
|
+
category = HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
|
|
134
|
+
threshold = block_level,
|
|
135
|
+
),
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
logger.info("VertexAI initialization complete")
|
|
139
|
+
|
|
140
|
+
def _get_anthropic_client(self):
|
|
141
|
+
"""Get or create the Anthropic client (single client for all Claude models)"""
|
|
142
|
+
if self.anthropic_client is None:
|
|
143
|
+
logger.info(f"Initializing AnthropicVertex client")
|
|
144
|
+
anthropic_kwargs = {'region': self.region, 'project_id': self.project_id}
|
|
145
|
+
if self.credentials and self.private_key: # Pass credentials only if from a file
|
|
146
|
+
anthropic_kwargs['credentials'] = self.credentials
|
|
147
|
+
logger.debug(f"Using service account credentials for Anthropic models")
|
|
148
|
+
else:
|
|
149
|
+
logger.debug(f"Using Application Default Credentials for Anthropic models")
|
|
150
|
+
|
|
151
|
+
self.anthropic_client = AnthropicVertex(**anthropic_kwargs)
|
|
152
|
+
|
|
153
|
+
return self.anthropic_client
|
|
154
|
+
|
|
155
|
+
def _get_gemini_model(self, model_name, temperature=None):
|
|
156
|
+
"""Get or create a Gemini model instance"""
|
|
157
|
+
if model_name not in self.model_clients:
|
|
158
|
+
logger.info(f"Creating GenerativeModel instance for '{model_name}'")
|
|
159
|
+
self.model_clients[model_name] = GenerativeModel(model_name)
|
|
160
|
+
|
|
161
|
+
# Use provided temperature or fall back to default
|
|
162
|
+
effective_temperature = temperature if temperature is not None else self.temperature
|
|
163
|
+
|
|
164
|
+
# Create generation config with the effective temperature
|
|
165
|
+
generation_config = GenerationConfig(
|
|
166
|
+
temperature=effective_temperature,
|
|
167
|
+
top_p=1.0,
|
|
168
|
+
top_k=10,
|
|
169
|
+
candidate_count=1,
|
|
170
|
+
max_output_tokens=self.max_output,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
return self.model_clients[model_name], generation_config
|
|
174
|
+
|
|
175
|
+
async def generate_content(self, system, prompt, model=None, temperature=None):
|
|
176
|
+
|
|
177
|
+
# Use provided model or fall back to default
|
|
178
|
+
model_name = model or self.default_model
|
|
179
|
+
# Use provided temperature or fall back to default
|
|
180
|
+
effective_temperature = temperature if temperature is not None else self.temperature
|
|
181
|
+
|
|
182
|
+
logger.debug(f"Using model: {model_name}")
|
|
183
|
+
logger.debug(f"Using temperature: {effective_temperature}")
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
if 'claude' in model_name.lower():
|
|
187
|
+
# Anthropic API uses a dedicated system prompt
|
|
188
|
+
logger.debug(f"Sending request to Anthropic model '{model_name}'...")
|
|
189
|
+
client = self._get_anthropic_client()
|
|
190
|
+
|
|
191
|
+
response = client.messages.create(
|
|
192
|
+
model=model_name,
|
|
193
|
+
system=system,
|
|
194
|
+
messages=[{"role": "user", "content": prompt}],
|
|
195
|
+
max_tokens=self.api_params['max_output_tokens'],
|
|
196
|
+
temperature=effective_temperature,
|
|
197
|
+
top_p=self.api_params['top_p'],
|
|
198
|
+
top_k=self.api_params['top_k'],
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
resp = LlmResult(
|
|
202
|
+
text=response.content[0].text,
|
|
203
|
+
in_token=response.usage.input_tokens,
|
|
204
|
+
out_token=response.usage.output_tokens,
|
|
205
|
+
model=model_name
|
|
206
|
+
)
|
|
207
|
+
else:
|
|
208
|
+
# Gemini API combines system and user prompts
|
|
209
|
+
logger.debug(f"Sending request to Gemini model '{model_name}'...")
|
|
210
|
+
full_prompt = system + "\n\n" + prompt
|
|
211
|
+
|
|
212
|
+
llm, generation_config = self._get_gemini_model(model_name, effective_temperature)
|
|
213
|
+
|
|
214
|
+
response = llm.generate_content(
|
|
215
|
+
full_prompt, generation_config = generation_config,
|
|
216
|
+
safety_settings = self.safety_settings,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
resp = LlmResult(
|
|
220
|
+
text = response.text,
|
|
221
|
+
in_token = response.usage_metadata.prompt_token_count,
|
|
222
|
+
out_token = response.usage_metadata.candidates_token_count,
|
|
223
|
+
model = model_name
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
logger.info(f"Input Tokens: {resp.in_token}")
|
|
227
|
+
logger.info(f"Output Tokens: {resp.out_token}")
|
|
228
|
+
logger.debug("Send response...")
|
|
229
|
+
|
|
230
|
+
return resp
|
|
231
|
+
|
|
232
|
+
except (google.api_core.exceptions.ResourceExhausted, RateLimitError) as e:
|
|
233
|
+
logger.warning(f"Hit rate limit: {e}")
|
|
234
|
+
# Leave rate limit retries to the base handler
|
|
235
|
+
raise TooManyRequests()
|
|
236
|
+
|
|
237
|
+
except Exception as e:
|
|
238
|
+
# Apart from rate limits, treat all exceptions as unrecoverable
|
|
239
|
+
logger.error(f"VertexAI LLM exception: {e}", exc_info=True)
|
|
240
|
+
raise e
|
|
241
|
+
|
|
242
|
+
def supports_streaming(self):
|
|
243
|
+
"""VertexAI supports streaming for both Gemini and Claude models"""
|
|
244
|
+
return True
|
|
245
|
+
|
|
246
|
+
async def generate_content_stream(self, system, prompt, model=None, temperature=None):
|
|
247
|
+
"""
|
|
248
|
+
Stream content generation from VertexAI (Gemini or Claude).
|
|
249
|
+
Yields LlmChunk objects with is_final=True on the last chunk.
|
|
250
|
+
"""
|
|
251
|
+
# Use provided model or fall back to default
|
|
252
|
+
model_name = model or self.default_model
|
|
253
|
+
# Use provided temperature or fall back to default
|
|
254
|
+
effective_temperature = temperature if temperature is not None else self.temperature
|
|
255
|
+
|
|
256
|
+
logger.debug(f"Using model (streaming): {model_name}")
|
|
257
|
+
logger.debug(f"Using temperature: {effective_temperature}")
|
|
258
|
+
|
|
259
|
+
try:
|
|
260
|
+
if 'claude' in model_name.lower():
|
|
261
|
+
# Claude/Anthropic streaming
|
|
262
|
+
logger.debug(f"Streaming request to Anthropic model '{model_name}'...")
|
|
263
|
+
client = self._get_anthropic_client()
|
|
264
|
+
|
|
265
|
+
total_in_tokens = 0
|
|
266
|
+
total_out_tokens = 0
|
|
267
|
+
|
|
268
|
+
with client.messages.stream(
|
|
269
|
+
model=model_name,
|
|
270
|
+
system=system,
|
|
271
|
+
messages=[{"role": "user", "content": prompt}],
|
|
272
|
+
max_tokens=self.api_params['max_output_tokens'],
|
|
273
|
+
temperature=effective_temperature,
|
|
274
|
+
top_p=self.api_params['top_p'],
|
|
275
|
+
top_k=self.api_params['top_k'],
|
|
276
|
+
) as stream:
|
|
277
|
+
# Stream text chunks
|
|
278
|
+
for text in stream.text_stream:
|
|
279
|
+
yield LlmChunk(
|
|
280
|
+
text=text,
|
|
281
|
+
in_token=None,
|
|
282
|
+
out_token=None,
|
|
283
|
+
model=model_name,
|
|
284
|
+
is_final=False
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# Get final message with token counts
|
|
288
|
+
final_message = stream.get_final_message()
|
|
289
|
+
total_in_tokens = final_message.usage.input_tokens
|
|
290
|
+
total_out_tokens = final_message.usage.output_tokens
|
|
291
|
+
|
|
292
|
+
# Send final chunk with token counts
|
|
293
|
+
yield LlmChunk(
|
|
294
|
+
text="",
|
|
295
|
+
in_token=total_in_tokens,
|
|
296
|
+
out_token=total_out_tokens,
|
|
297
|
+
model=model_name,
|
|
298
|
+
is_final=True
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
logger.info(f"Input Tokens: {total_in_tokens}")
|
|
302
|
+
logger.info(f"Output Tokens: {total_out_tokens}")
|
|
303
|
+
|
|
304
|
+
else:
|
|
305
|
+
# Gemini streaming
|
|
306
|
+
logger.debug(f"Streaming request to Gemini model '{model_name}'...")
|
|
307
|
+
full_prompt = system + "\n\n" + prompt
|
|
308
|
+
|
|
309
|
+
llm, generation_config = self._get_gemini_model(model_name, effective_temperature)
|
|
310
|
+
|
|
311
|
+
response = llm.generate_content(
|
|
312
|
+
full_prompt,
|
|
313
|
+
generation_config=generation_config,
|
|
314
|
+
safety_settings=self.safety_settings,
|
|
315
|
+
stream=True # Enable streaming
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
total_in_tokens = 0
|
|
319
|
+
total_out_tokens = 0
|
|
320
|
+
|
|
321
|
+
# Stream chunks
|
|
322
|
+
for chunk in response:
|
|
323
|
+
if chunk.text:
|
|
324
|
+
yield LlmChunk(
|
|
325
|
+
text=chunk.text,
|
|
326
|
+
in_token=None,
|
|
327
|
+
out_token=None,
|
|
328
|
+
model=model_name,
|
|
329
|
+
is_final=False
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# Accumulate token counts if available
|
|
333
|
+
if hasattr(chunk, 'usage_metadata') and chunk.usage_metadata:
|
|
334
|
+
if hasattr(chunk.usage_metadata, 'prompt_token_count'):
|
|
335
|
+
total_in_tokens = chunk.usage_metadata.prompt_token_count
|
|
336
|
+
if hasattr(chunk.usage_metadata, 'candidates_token_count'):
|
|
337
|
+
total_out_tokens = chunk.usage_metadata.candidates_token_count
|
|
338
|
+
|
|
339
|
+
# Send final chunk with token counts
|
|
340
|
+
yield LlmChunk(
|
|
341
|
+
text="",
|
|
342
|
+
in_token=total_in_tokens,
|
|
343
|
+
out_token=total_out_tokens,
|
|
344
|
+
model=model_name,
|
|
345
|
+
is_final=True
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
logger.info(f"Input Tokens: {total_in_tokens}")
|
|
349
|
+
logger.info(f"Output Tokens: {total_out_tokens}")
|
|
350
|
+
|
|
351
|
+
except (google.api_core.exceptions.ResourceExhausted, RateLimitError) as e:
|
|
352
|
+
logger.warning(f"Hit rate limit during streaming: {e}")
|
|
353
|
+
raise TooManyRequests()
|
|
354
|
+
|
|
355
|
+
except Exception as e:
|
|
356
|
+
logger.error(f"VertexAI streaming exception: {e}", exc_info=True)
|
|
357
|
+
raise e
|
|
358
|
+
|
|
359
|
+
@staticmethod
|
|
360
|
+
def add_args(parser):
|
|
361
|
+
|
|
362
|
+
LlmService.add_args(parser)
|
|
363
|
+
|
|
364
|
+
parser.add_argument(
|
|
365
|
+
'-m', '--model',
|
|
366
|
+
default=default_model,
|
|
367
|
+
help=f'LLM model (e.g., gemini-1.5-flash-001, claude-3-sonnet@20240229) (default: {default_model})'
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
parser.add_argument(
|
|
371
|
+
'-k', '--private-key',
|
|
372
|
+
help=f'Google Cloud private JSON file (optional, uses ADC if not provided)'
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
parser.add_argument(
|
|
376
|
+
'-r', '--region',
|
|
377
|
+
default=default_region,
|
|
378
|
+
help=f'Google Cloud region (default: {default_region})',
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
parser.add_argument(
|
|
382
|
+
'-t', '--temperature',
|
|
383
|
+
type=float,
|
|
384
|
+
default=default_temperature,
|
|
385
|
+
help=f'LLM temperature parameter (default: {default_temperature})'
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
parser.add_argument(
|
|
389
|
+
'-x', '--max-output',
|
|
390
|
+
type=int,
|
|
391
|
+
default=default_max_output,
|
|
392
|
+
help=f'LLM max output tokens (default: {default_max_output})'
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
def run():
|
|
396
|
+
Processor.launch(default_ident, __doc__)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.8.4"
|
{trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: trustgraph-vertexai
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.8.4
|
|
4
4
|
Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
|
|
5
5
|
Author-email: "trustgraph.ai" <security@trustgraph.ai>
|
|
6
6
|
Project-URL: Homepage, https://github.com/trustgraph-ai/trustgraph
|
|
@@ -8,7 +8,7 @@ Classifier: Programming Language :: Python :: 3
|
|
|
8
8
|
Classifier: Operating System :: OS Independent
|
|
9
9
|
Requires-Python: >=3.8
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
|
-
Requires-Dist: trustgraph-base<1.
|
|
11
|
+
Requires-Dist: trustgraph-base<1.9,>=1.8
|
|
12
12
|
Requires-Dist: pulsar-client
|
|
13
13
|
Requires-Dist: google-cloud-aiplatform
|
|
14
14
|
Requires-Dist: prometheus-client
|
|
@@ -1,241 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Simple LLM service, performs text prompt completion using VertexAI on
|
|
3
|
-
Google Cloud. Input is prompt, output is response.
|
|
4
|
-
Supports both Google's Gemini models and Anthropic's Claude models.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
#
|
|
8
|
-
# Somewhat perplexed by the Google Cloud SDK choices. We're going off this
|
|
9
|
-
# one, which uses the google-cloud-aiplatform library:
|
|
10
|
-
# https://cloud.google.com/python/docs/reference/vertexai/1.94.0
|
|
11
|
-
# It seems it is possible to invoke VertexAI from the google-genai
|
|
12
|
-
# SDK too:
|
|
13
|
-
# https://googleapis.github.io/python-genai/genai.html#module-genai.client
|
|
14
|
-
# That would make this code look very much like the GoogleAIStudio
|
|
15
|
-
# code. And maybe not reliant on the google-cloud-aiplatform library?
|
|
16
|
-
#
|
|
17
|
-
# This module's imports bring in a lot of libraries.
|
|
18
|
-
|
|
19
|
-
from google.oauth2 import service_account
|
|
20
|
-
import google.auth
|
|
21
|
-
import vertexai
|
|
22
|
-
import logging
|
|
23
|
-
|
|
24
|
-
# Why is preview here?
|
|
25
|
-
from vertexai.generative_models import (
|
|
26
|
-
Content, FunctionDeclaration, GenerativeModel, GenerationConfig,
|
|
27
|
-
HarmCategory, HarmBlockThreshold, Part, Tool, SafetySetting,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
# Added for Anthropic model support
|
|
31
|
-
from anthropic import AnthropicVertex, RateLimitError
|
|
32
|
-
|
|
33
|
-
from .... exceptions import TooManyRequests
|
|
34
|
-
from .... base import LlmService, LlmResult
|
|
35
|
-
|
|
36
|
-
# Module logger
|
|
37
|
-
logger = logging.getLogger(__name__)
|
|
38
|
-
|
|
39
|
-
default_ident = "text-completion"
|
|
40
|
-
|
|
41
|
-
default_model = 'gemini-1.5-flash-001'
|
|
42
|
-
default_region = 'us-central1'
|
|
43
|
-
default_temperature = 0.0
|
|
44
|
-
default_max_output = 8192
|
|
45
|
-
default_private_key = "private.json"
|
|
46
|
-
|
|
47
|
-
class Processor(LlmService):
|
|
48
|
-
|
|
49
|
-
def __init__(self, **params):
|
|
50
|
-
|
|
51
|
-
region = params.get("region", default_region)
|
|
52
|
-
model = params.get("model", default_model)
|
|
53
|
-
private_key = params.get("private_key", default_private_key)
|
|
54
|
-
temperature = params.get("temperature", default_temperature)
|
|
55
|
-
max_output = params.get("max_output", default_max_output)
|
|
56
|
-
|
|
57
|
-
if private_key is None:
|
|
58
|
-
logger.warning("Private key file not specified, using Application Default Credentials")
|
|
59
|
-
|
|
60
|
-
super(Processor, self).__init__(**params)
|
|
61
|
-
|
|
62
|
-
self.model = model
|
|
63
|
-
self.is_anthropic = 'claude' in self.model.lower()
|
|
64
|
-
|
|
65
|
-
# Shared parameters for both model types
|
|
66
|
-
self.api_params = {
|
|
67
|
-
"temperature": temperature,
|
|
68
|
-
"top_p": 1.0,
|
|
69
|
-
"top_k": 32,
|
|
70
|
-
"max_output_tokens": max_output,
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
logger.info("Initializing VertexAI...")
|
|
74
|
-
|
|
75
|
-
# Unified credential and project ID loading
|
|
76
|
-
if private_key:
|
|
77
|
-
credentials = (
|
|
78
|
-
service_account.Credentials.from_service_account_file(
|
|
79
|
-
private_key
|
|
80
|
-
)
|
|
81
|
-
)
|
|
82
|
-
project_id = credentials.project_id
|
|
83
|
-
else:
|
|
84
|
-
credentials, project_id = google.auth.default()
|
|
85
|
-
|
|
86
|
-
if not project_id:
|
|
87
|
-
raise RuntimeError(
|
|
88
|
-
"Could not determine Google Cloud project ID. "
|
|
89
|
-
"Ensure it's set in your environment or service account."
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
# Initialize the appropriate client based on the model type
|
|
93
|
-
if self.is_anthropic:
|
|
94
|
-
logger.info(f"Initializing Anthropic model '{model}' via AnthropicVertex SDK")
|
|
95
|
-
# Initialize AnthropicVertex with credentials if provided, otherwise use ADC
|
|
96
|
-
anthropic_kwargs = {'region': region, 'project_id': project_id}
|
|
97
|
-
if credentials and private_key: # Pass credentials only if from a file
|
|
98
|
-
anthropic_kwargs['credentials'] = credentials
|
|
99
|
-
logger.debug(f"Using service account credentials for Anthropic model")
|
|
100
|
-
else:
|
|
101
|
-
logger.debug(f"Using Application Default Credentials for Anthropic model")
|
|
102
|
-
|
|
103
|
-
self.llm = AnthropicVertex(**anthropic_kwargs)
|
|
104
|
-
else:
|
|
105
|
-
# For Gemini models, initialize the Vertex AI SDK
|
|
106
|
-
logger.info(f"Initializing Google model '{model}' via Vertex AI SDK")
|
|
107
|
-
init_kwargs = {'location': region, 'project': project_id}
|
|
108
|
-
if credentials and private_key: # Pass credentials only if from a file
|
|
109
|
-
init_kwargs['credentials'] = credentials
|
|
110
|
-
|
|
111
|
-
vertexai.init(**init_kwargs)
|
|
112
|
-
|
|
113
|
-
self.llm = GenerativeModel(model)
|
|
114
|
-
|
|
115
|
-
self.generation_config = GenerationConfig(
|
|
116
|
-
temperature=temperature,
|
|
117
|
-
top_p=1.0,
|
|
118
|
-
top_k=10,
|
|
119
|
-
candidate_count=1,
|
|
120
|
-
max_output_tokens=max_output,
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
# Block none doesn't seem to work
|
|
124
|
-
block_level = HarmBlockThreshold.BLOCK_ONLY_HIGH
|
|
125
|
-
# block_level = HarmBlockThreshold.BLOCK_NONE
|
|
126
|
-
|
|
127
|
-
self.safety_settings = [
|
|
128
|
-
SafetySetting(
|
|
129
|
-
category = HarmCategory.HARM_CATEGORY_HARASSMENT,
|
|
130
|
-
threshold = block_level,
|
|
131
|
-
),
|
|
132
|
-
SafetySetting(
|
|
133
|
-
category = HarmCategory.HARM_CATEGORY_HATE_SPEECH,
|
|
134
|
-
threshold = block_level,
|
|
135
|
-
),
|
|
136
|
-
SafetySetting(
|
|
137
|
-
category = HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
|
|
138
|
-
threshold = block_level,
|
|
139
|
-
),
|
|
140
|
-
SafetySetting(
|
|
141
|
-
category = HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
|
|
142
|
-
threshold = block_level,
|
|
143
|
-
),
|
|
144
|
-
]
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
logger.info("VertexAI initialization complete")
|
|
148
|
-
|
|
149
|
-
async def generate_content(self, system, prompt):
|
|
150
|
-
|
|
151
|
-
try:
|
|
152
|
-
if self.is_anthropic:
|
|
153
|
-
# Anthropic API uses a dedicated system prompt
|
|
154
|
-
logger.debug("Sending request to Anthropic model...")
|
|
155
|
-
response = self.llm.messages.create(
|
|
156
|
-
model=self.model,
|
|
157
|
-
system=system,
|
|
158
|
-
messages=[{"role": "user", "content": prompt}],
|
|
159
|
-
max_tokens=self.api_params['max_output_tokens'],
|
|
160
|
-
temperature=self.api_params['temperature'],
|
|
161
|
-
top_p=self.api_params['top_p'],
|
|
162
|
-
top_k=self.api_params['top_k'],
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
resp = LlmResult(
|
|
166
|
-
text=response.content[0].text,
|
|
167
|
-
in_token=response.usage.input_tokens,
|
|
168
|
-
out_token=response.usage.output_tokens,
|
|
169
|
-
model=self.model
|
|
170
|
-
)
|
|
171
|
-
else:
|
|
172
|
-
# Gemini API combines system and user prompts
|
|
173
|
-
logger.debug("Sending request to Gemini model...")
|
|
174
|
-
full_prompt = system + "\n\n" + prompt
|
|
175
|
-
|
|
176
|
-
response = self.llm.generate_content(
|
|
177
|
-
full_prompt, generation_config = self.generation_config,
|
|
178
|
-
safety_settings = self.safety_settings,
|
|
179
|
-
)
|
|
180
|
-
|
|
181
|
-
resp = LlmResult(
|
|
182
|
-
text = response.text,
|
|
183
|
-
in_token = response.usage_metadata.prompt_token_count,
|
|
184
|
-
out_token = response.usage_metadata.candidates_token_count,
|
|
185
|
-
model = self.model
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
logger.info(f"Input Tokens: {resp.in_token}")
|
|
189
|
-
logger.info(f"Output Tokens: {resp.out_token}")
|
|
190
|
-
logger.debug("Send response...")
|
|
191
|
-
|
|
192
|
-
return resp
|
|
193
|
-
|
|
194
|
-
except (google.api_core.exceptions.ResourceExhausted, RateLimitError) as e:
|
|
195
|
-
logger.warning(f"Hit rate limit: {e}")
|
|
196
|
-
# Leave rate limit retries to the base handler
|
|
197
|
-
raise TooManyRequests()
|
|
198
|
-
|
|
199
|
-
except Exception as e:
|
|
200
|
-
# Apart from rate limits, treat all exceptions as unrecoverable
|
|
201
|
-
logger.error(f"VertexAI LLM exception: {e}", exc_info=True)
|
|
202
|
-
raise e
|
|
203
|
-
|
|
204
|
-
@staticmethod
|
|
205
|
-
def add_args(parser):
|
|
206
|
-
|
|
207
|
-
LlmService.add_args(parser)
|
|
208
|
-
|
|
209
|
-
parser.add_argument(
|
|
210
|
-
'-m', '--model',
|
|
211
|
-
default=default_model,
|
|
212
|
-
help=f'LLM model (e.g., gemini-1.5-flash-001, claude-3-sonnet@20240229) (default: {default_model})'
|
|
213
|
-
)
|
|
214
|
-
|
|
215
|
-
parser.add_argument(
|
|
216
|
-
'-k', '--private-key',
|
|
217
|
-
help=f'Google Cloud private JSON file (optional, uses ADC if not provided)'
|
|
218
|
-
)
|
|
219
|
-
|
|
220
|
-
parser.add_argument(
|
|
221
|
-
'-r', '--region',
|
|
222
|
-
default=default_region,
|
|
223
|
-
help=f'Google Cloud region (default: {default_region})',
|
|
224
|
-
)
|
|
225
|
-
|
|
226
|
-
parser.add_argument(
|
|
227
|
-
'-t', '--temperature',
|
|
228
|
-
type=float,
|
|
229
|
-
default=default_temperature,
|
|
230
|
-
help=f'LLM temperature parameter (default: {default_temperature})'
|
|
231
|
-
)
|
|
232
|
-
|
|
233
|
-
parser.add_argument(
|
|
234
|
-
'-x', '--max-output',
|
|
235
|
-
type=int,
|
|
236
|
-
default=default_max_output,
|
|
237
|
-
help=f'LLM max output tokens (default: {default_max_output})'
|
|
238
|
-
)
|
|
239
|
-
|
|
240
|
-
def run():
|
|
241
|
-
Processor.launch(default_ident, __doc__)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.3.19"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/top_level.txt
RENAMED
|
File without changes
|