trustgraph-vertexai 1.3.19__tar.gz → 1.8.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/PKG-INFO +2 -2
  2. {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/pyproject.toml +1 -1
  3. trustgraph_vertexai-1.8.4/trustgraph/model/text_completion/vertexai/llm.py +396 -0
  4. trustgraph_vertexai-1.8.4/trustgraph/vertexai_version.py +1 -0
  5. {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/PKG-INFO +2 -2
  6. {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/requires.txt +1 -1
  7. trustgraph_vertexai-1.3.19/trustgraph/model/text_completion/vertexai/llm.py +0 -241
  8. trustgraph_vertexai-1.3.19/trustgraph/vertexai_version.py +0 -1
  9. {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/README.md +0 -0
  10. {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/setup.cfg +0 -0
  11. {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph/model/text_completion/vertexai/__init__.py +0 -0
  12. {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph/model/text_completion/vertexai/__main__.py +0 -0
  13. {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/SOURCES.txt +0 -0
  14. {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/dependency_links.txt +0 -0
  15. {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/entry_points.txt +0 -0
  16. {trustgraph_vertexai-1.3.19 → trustgraph_vertexai-1.8.4}/trustgraph_vertexai.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: trustgraph-vertexai
3
- Version: 1.3.19
3
+ Version: 1.8.4
4
4
  Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
5
5
  Author-email: "trustgraph.ai" <security@trustgraph.ai>
6
6
  Project-URL: Homepage, https://github.com/trustgraph-ai/trustgraph
@@ -8,7 +8,7 @@ Classifier: Programming Language :: Python :: 3
8
8
  Classifier: Operating System :: OS Independent
9
9
  Requires-Python: >=3.8
10
10
  Description-Content-Type: text/markdown
11
- Requires-Dist: trustgraph-base<1.4,>=1.3
11
+ Requires-Dist: trustgraph-base<1.9,>=1.8
12
12
  Requires-Dist: pulsar-client
13
13
  Requires-Dist: google-cloud-aiplatform
14
14
  Requires-Dist: prometheus-client
@@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
10
10
  readme = "README.md"
11
11
  requires-python = ">=3.8"
12
12
  dependencies = [
13
- "trustgraph-base>=1.3,<1.4",
13
+ "trustgraph-base>=1.8,<1.9",
14
14
  "pulsar-client",
15
15
  "google-cloud-aiplatform",
16
16
  "prometheus-client",
@@ -0,0 +1,396 @@
1
+ """
2
+ Simple LLM service, performs text prompt completion using VertexAI on
3
+ Google Cloud. Input is prompt, output is response.
4
+ Supports both Google's Gemini models and Anthropic's Claude models.
5
+ """
6
+
7
+ #
8
+ # Somewhat perplexed by the Google Cloud SDK choices. We're going off this
9
+ # one, which uses the google-cloud-aiplatform library:
10
+ # https://cloud.google.com/python/docs/reference/vertexai/1.94.0
11
+ # It seems it is possible to invoke VertexAI from the google-genai
12
+ # SDK too:
13
+ # https://googleapis.github.io/python-genai/genai.html#module-genai.client
14
+ # That would make this code look very much like the GoogleAIStudio
15
+ # code. And maybe not reliant on the google-cloud-aiplatform library?
16
+ #
17
+ # This module's imports bring in a lot of libraries.
18
+
19
+ from google.oauth2 import service_account
20
+ import google.auth
21
+ import google.api_core.exceptions
22
+ import vertexai
23
+ import logging
24
+
25
+ # Why is preview here?
26
+ from vertexai.generative_models import (
27
+ Content, FunctionDeclaration, GenerativeModel, GenerationConfig,
28
+ HarmCategory, HarmBlockThreshold, Part, Tool, SafetySetting,
29
+ )
30
+
31
+ # Added for Anthropic model support
32
+ from anthropic import AnthropicVertex, RateLimitError
33
+
34
+ from .... exceptions import TooManyRequests
35
+ from .... base import LlmService, LlmResult, LlmChunk
36
+
37
+ # Module logger
38
+ logger = logging.getLogger(__name__)
39
+
40
+ default_ident = "text-completion"
41
+
42
+ default_model = 'gemini-1.5-flash-001'
43
+ default_region = 'us-central1'
44
+ default_temperature = 0.0
45
+ default_max_output = 8192
46
+ default_private_key = "private.json"
47
+
48
+ class Processor(LlmService):
49
+
50
+ def __init__(self, **params):
51
+
52
+ region = params.get("region", default_region)
53
+ model = params.get("model", default_model)
54
+ private_key = params.get("private_key", default_private_key)
55
+ temperature = params.get("temperature", default_temperature)
56
+ max_output = params.get("max_output", default_max_output)
57
+
58
+ if private_key is None:
59
+ logger.warning("Private key file not specified, using Application Default Credentials")
60
+
61
+ super(Processor, self).__init__(**params)
62
+
63
+ # Store default model and configuration parameters
64
+ self.default_model = model
65
+ self.region = region
66
+ self.temperature = temperature
67
+ self.max_output = max_output
68
+ self.private_key = private_key
69
+
70
+ # Model client caches
71
+ self.model_clients = {} # Cache for model instances
72
+ self.generation_configs = {} # Cache for generation configs (Gemini only)
73
+ self.anthropic_client = None # Single Anthropic client (handles multiple models)
74
+
75
+ # Shared parameters for both model types
76
+ self.api_params = {
77
+ "temperature": temperature,
78
+ "top_p": 1.0,
79
+ "top_k": 32,
80
+ "max_output_tokens": max_output,
81
+ }
82
+
83
+ logger.info("Initializing VertexAI...")
84
+
85
+ # Unified credential and project ID loading
86
+ if private_key:
87
+ credentials = (
88
+ service_account.Credentials.from_service_account_file(
89
+ private_key
90
+ )
91
+ )
92
+ project_id = credentials.project_id
93
+ else:
94
+ credentials, project_id = google.auth.default()
95
+
96
+ if not project_id:
97
+ raise RuntimeError(
98
+ "Could not determine Google Cloud project ID. "
99
+ "Ensure it's set in your environment or service account."
100
+ )
101
+
102
+ # Store credentials and project info for later use
103
+ self.credentials = credentials
104
+ self.project_id = project_id
105
+
106
+ # Initialize Vertex AI SDK for Gemini models
107
+ init_kwargs = {'location': region, 'project': project_id}
108
+ if credentials and private_key: # Pass credentials only if from a file
109
+ init_kwargs['credentials'] = credentials
110
+
111
+ vertexai.init(**init_kwargs)
112
+
113
+ # Pre-initialize Anthropic client if needed (single client handles all Claude models)
114
+ if 'claude' in self.default_model.lower():
115
+ self._get_anthropic_client()
116
+
117
+ # Safety settings for Gemini models
118
+ block_level = HarmBlockThreshold.BLOCK_ONLY_HIGH
119
+ self.safety_settings = [
120
+ SafetySetting(
121
+ category = HarmCategory.HARM_CATEGORY_HARASSMENT,
122
+ threshold = block_level,
123
+ ),
124
+ SafetySetting(
125
+ category = HarmCategory.HARM_CATEGORY_HATE_SPEECH,
126
+ threshold = block_level,
127
+ ),
128
+ SafetySetting(
129
+ category = HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
130
+ threshold = block_level,
131
+ ),
132
+ SafetySetting(
133
+ category = HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
134
+ threshold = block_level,
135
+ ),
136
+ ]
137
+
138
+ logger.info("VertexAI initialization complete")
139
+
140
+ def _get_anthropic_client(self):
141
+ """Get or create the Anthropic client (single client for all Claude models)"""
142
+ if self.anthropic_client is None:
143
+ logger.info(f"Initializing AnthropicVertex client")
144
+ anthropic_kwargs = {'region': self.region, 'project_id': self.project_id}
145
+ if self.credentials and self.private_key: # Pass credentials only if from a file
146
+ anthropic_kwargs['credentials'] = self.credentials
147
+ logger.debug(f"Using service account credentials for Anthropic models")
148
+ else:
149
+ logger.debug(f"Using Application Default Credentials for Anthropic models")
150
+
151
+ self.anthropic_client = AnthropicVertex(**anthropic_kwargs)
152
+
153
+ return self.anthropic_client
154
+
155
+ def _get_gemini_model(self, model_name, temperature=None):
156
+ """Get or create a Gemini model instance"""
157
+ if model_name not in self.model_clients:
158
+ logger.info(f"Creating GenerativeModel instance for '{model_name}'")
159
+ self.model_clients[model_name] = GenerativeModel(model_name)
160
+
161
+ # Use provided temperature or fall back to default
162
+ effective_temperature = temperature if temperature is not None else self.temperature
163
+
164
+ # Create generation config with the effective temperature
165
+ generation_config = GenerationConfig(
166
+ temperature=effective_temperature,
167
+ top_p=1.0,
168
+ top_k=10,
169
+ candidate_count=1,
170
+ max_output_tokens=self.max_output,
171
+ )
172
+
173
+ return self.model_clients[model_name], generation_config
174
+
175
+ async def generate_content(self, system, prompt, model=None, temperature=None):
176
+
177
+ # Use provided model or fall back to default
178
+ model_name = model or self.default_model
179
+ # Use provided temperature or fall back to default
180
+ effective_temperature = temperature if temperature is not None else self.temperature
181
+
182
+ logger.debug(f"Using model: {model_name}")
183
+ logger.debug(f"Using temperature: {effective_temperature}")
184
+
185
+ try:
186
+ if 'claude' in model_name.lower():
187
+ # Anthropic API uses a dedicated system prompt
188
+ logger.debug(f"Sending request to Anthropic model '{model_name}'...")
189
+ client = self._get_anthropic_client()
190
+
191
+ response = client.messages.create(
192
+ model=model_name,
193
+ system=system,
194
+ messages=[{"role": "user", "content": prompt}],
195
+ max_tokens=self.api_params['max_output_tokens'],
196
+ temperature=effective_temperature,
197
+ top_p=self.api_params['top_p'],
198
+ top_k=self.api_params['top_k'],
199
+ )
200
+
201
+ resp = LlmResult(
202
+ text=response.content[0].text,
203
+ in_token=response.usage.input_tokens,
204
+ out_token=response.usage.output_tokens,
205
+ model=model_name
206
+ )
207
+ else:
208
+ # Gemini API combines system and user prompts
209
+ logger.debug(f"Sending request to Gemini model '{model_name}'...")
210
+ full_prompt = system + "\n\n" + prompt
211
+
212
+ llm, generation_config = self._get_gemini_model(model_name, effective_temperature)
213
+
214
+ response = llm.generate_content(
215
+ full_prompt, generation_config = generation_config,
216
+ safety_settings = self.safety_settings,
217
+ )
218
+
219
+ resp = LlmResult(
220
+ text = response.text,
221
+ in_token = response.usage_metadata.prompt_token_count,
222
+ out_token = response.usage_metadata.candidates_token_count,
223
+ model = model_name
224
+ )
225
+
226
+ logger.info(f"Input Tokens: {resp.in_token}")
227
+ logger.info(f"Output Tokens: {resp.out_token}")
228
+ logger.debug("Send response...")
229
+
230
+ return resp
231
+
232
+ except (google.api_core.exceptions.ResourceExhausted, RateLimitError) as e:
233
+ logger.warning(f"Hit rate limit: {e}")
234
+ # Leave rate limit retries to the base handler
235
+ raise TooManyRequests()
236
+
237
+ except Exception as e:
238
+ # Apart from rate limits, treat all exceptions as unrecoverable
239
+ logger.error(f"VertexAI LLM exception: {e}", exc_info=True)
240
+ raise e
241
+
242
+ def supports_streaming(self):
243
+ """VertexAI supports streaming for both Gemini and Claude models"""
244
+ return True
245
+
246
+ async def generate_content_stream(self, system, prompt, model=None, temperature=None):
247
+ """
248
+ Stream content generation from VertexAI (Gemini or Claude).
249
+ Yields LlmChunk objects with is_final=True on the last chunk.
250
+ """
251
+ # Use provided model or fall back to default
252
+ model_name = model or self.default_model
253
+ # Use provided temperature or fall back to default
254
+ effective_temperature = temperature if temperature is not None else self.temperature
255
+
256
+ logger.debug(f"Using model (streaming): {model_name}")
257
+ logger.debug(f"Using temperature: {effective_temperature}")
258
+
259
+ try:
260
+ if 'claude' in model_name.lower():
261
+ # Claude/Anthropic streaming
262
+ logger.debug(f"Streaming request to Anthropic model '{model_name}'...")
263
+ client = self._get_anthropic_client()
264
+
265
+ total_in_tokens = 0
266
+ total_out_tokens = 0
267
+
268
+ with client.messages.stream(
269
+ model=model_name,
270
+ system=system,
271
+ messages=[{"role": "user", "content": prompt}],
272
+ max_tokens=self.api_params['max_output_tokens'],
273
+ temperature=effective_temperature,
274
+ top_p=self.api_params['top_p'],
275
+ top_k=self.api_params['top_k'],
276
+ ) as stream:
277
+ # Stream text chunks
278
+ for text in stream.text_stream:
279
+ yield LlmChunk(
280
+ text=text,
281
+ in_token=None,
282
+ out_token=None,
283
+ model=model_name,
284
+ is_final=False
285
+ )
286
+
287
+ # Get final message with token counts
288
+ final_message = stream.get_final_message()
289
+ total_in_tokens = final_message.usage.input_tokens
290
+ total_out_tokens = final_message.usage.output_tokens
291
+
292
+ # Send final chunk with token counts
293
+ yield LlmChunk(
294
+ text="",
295
+ in_token=total_in_tokens,
296
+ out_token=total_out_tokens,
297
+ model=model_name,
298
+ is_final=True
299
+ )
300
+
301
+ logger.info(f"Input Tokens: {total_in_tokens}")
302
+ logger.info(f"Output Tokens: {total_out_tokens}")
303
+
304
+ else:
305
+ # Gemini streaming
306
+ logger.debug(f"Streaming request to Gemini model '{model_name}'...")
307
+ full_prompt = system + "\n\n" + prompt
308
+
309
+ llm, generation_config = self._get_gemini_model(model_name, effective_temperature)
310
+
311
+ response = llm.generate_content(
312
+ full_prompt,
313
+ generation_config=generation_config,
314
+ safety_settings=self.safety_settings,
315
+ stream=True # Enable streaming
316
+ )
317
+
318
+ total_in_tokens = 0
319
+ total_out_tokens = 0
320
+
321
+ # Stream chunks
322
+ for chunk in response:
323
+ if chunk.text:
324
+ yield LlmChunk(
325
+ text=chunk.text,
326
+ in_token=None,
327
+ out_token=None,
328
+ model=model_name,
329
+ is_final=False
330
+ )
331
+
332
+ # Accumulate token counts if available
333
+ if hasattr(chunk, 'usage_metadata') and chunk.usage_metadata:
334
+ if hasattr(chunk.usage_metadata, 'prompt_token_count'):
335
+ total_in_tokens = chunk.usage_metadata.prompt_token_count
336
+ if hasattr(chunk.usage_metadata, 'candidates_token_count'):
337
+ total_out_tokens = chunk.usage_metadata.candidates_token_count
338
+
339
+ # Send final chunk with token counts
340
+ yield LlmChunk(
341
+ text="",
342
+ in_token=total_in_tokens,
343
+ out_token=total_out_tokens,
344
+ model=model_name,
345
+ is_final=True
346
+ )
347
+
348
+ logger.info(f"Input Tokens: {total_in_tokens}")
349
+ logger.info(f"Output Tokens: {total_out_tokens}")
350
+
351
+ except (google.api_core.exceptions.ResourceExhausted, RateLimitError) as e:
352
+ logger.warning(f"Hit rate limit during streaming: {e}")
353
+ raise TooManyRequests()
354
+
355
+ except Exception as e:
356
+ logger.error(f"VertexAI streaming exception: {e}", exc_info=True)
357
+ raise e
358
+
359
+ @staticmethod
360
+ def add_args(parser):
361
+
362
+ LlmService.add_args(parser)
363
+
364
+ parser.add_argument(
365
+ '-m', '--model',
366
+ default=default_model,
367
+ help=f'LLM model (e.g., gemini-1.5-flash-001, claude-3-sonnet@20240229) (default: {default_model})'
368
+ )
369
+
370
+ parser.add_argument(
371
+ '-k', '--private-key',
372
+ help=f'Google Cloud private JSON file (optional, uses ADC if not provided)'
373
+ )
374
+
375
+ parser.add_argument(
376
+ '-r', '--region',
377
+ default=default_region,
378
+ help=f'Google Cloud region (default: {default_region})',
379
+ )
380
+
381
+ parser.add_argument(
382
+ '-t', '--temperature',
383
+ type=float,
384
+ default=default_temperature,
385
+ help=f'LLM temperature parameter (default: {default_temperature})'
386
+ )
387
+
388
+ parser.add_argument(
389
+ '-x', '--max-output',
390
+ type=int,
391
+ default=default_max_output,
392
+ help=f'LLM max output tokens (default: {default_max_output})'
393
+ )
394
+
395
+ def run():
396
+ Processor.launch(default_ident, __doc__)
@@ -0,0 +1 @@
1
+ __version__ = "1.8.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: trustgraph-vertexai
3
- Version: 1.3.19
3
+ Version: 1.8.4
4
4
  Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
5
5
  Author-email: "trustgraph.ai" <security@trustgraph.ai>
6
6
  Project-URL: Homepage, https://github.com/trustgraph-ai/trustgraph
@@ -8,7 +8,7 @@ Classifier: Programming Language :: Python :: 3
8
8
  Classifier: Operating System :: OS Independent
9
9
  Requires-Python: >=3.8
10
10
  Description-Content-Type: text/markdown
11
- Requires-Dist: trustgraph-base<1.4,>=1.3
11
+ Requires-Dist: trustgraph-base<1.9,>=1.8
12
12
  Requires-Dist: pulsar-client
13
13
  Requires-Dist: google-cloud-aiplatform
14
14
  Requires-Dist: prometheus-client
@@ -1,4 +1,4 @@
1
- trustgraph-base<1.4,>=1.3
1
+ trustgraph-base<1.9,>=1.8
2
2
  pulsar-client
3
3
  google-cloud-aiplatform
4
4
  prometheus-client
@@ -1,241 +0,0 @@
1
- """
2
- Simple LLM service, performs text prompt completion using VertexAI on
3
- Google Cloud. Input is prompt, output is response.
4
- Supports both Google's Gemini models and Anthropic's Claude models.
5
- """
6
-
7
- #
8
- # Somewhat perplexed by the Google Cloud SDK choices. We're going off this
9
- # one, which uses the google-cloud-aiplatform library:
10
- # https://cloud.google.com/python/docs/reference/vertexai/1.94.0
11
- # It seems it is possible to invoke VertexAI from the google-genai
12
- # SDK too:
13
- # https://googleapis.github.io/python-genai/genai.html#module-genai.client
14
- # That would make this code look very much like the GoogleAIStudio
15
- # code. And maybe not reliant on the google-cloud-aiplatform library?
16
- #
17
- # This module's imports bring in a lot of libraries.
18
-
19
- from google.oauth2 import service_account
20
- import google.auth
21
- import vertexai
22
- import logging
23
-
24
- # Why is preview here?
25
- from vertexai.generative_models import (
26
- Content, FunctionDeclaration, GenerativeModel, GenerationConfig,
27
- HarmCategory, HarmBlockThreshold, Part, Tool, SafetySetting,
28
- )
29
-
30
- # Added for Anthropic model support
31
- from anthropic import AnthropicVertex, RateLimitError
32
-
33
- from .... exceptions import TooManyRequests
34
- from .... base import LlmService, LlmResult
35
-
36
- # Module logger
37
- logger = logging.getLogger(__name__)
38
-
39
- default_ident = "text-completion"
40
-
41
- default_model = 'gemini-1.5-flash-001'
42
- default_region = 'us-central1'
43
- default_temperature = 0.0
44
- default_max_output = 8192
45
- default_private_key = "private.json"
46
-
47
- class Processor(LlmService):
48
-
49
- def __init__(self, **params):
50
-
51
- region = params.get("region", default_region)
52
- model = params.get("model", default_model)
53
- private_key = params.get("private_key", default_private_key)
54
- temperature = params.get("temperature", default_temperature)
55
- max_output = params.get("max_output", default_max_output)
56
-
57
- if private_key is None:
58
- logger.warning("Private key file not specified, using Application Default Credentials")
59
-
60
- super(Processor, self).__init__(**params)
61
-
62
- self.model = model
63
- self.is_anthropic = 'claude' in self.model.lower()
64
-
65
- # Shared parameters for both model types
66
- self.api_params = {
67
- "temperature": temperature,
68
- "top_p": 1.0,
69
- "top_k": 32,
70
- "max_output_tokens": max_output,
71
- }
72
-
73
- logger.info("Initializing VertexAI...")
74
-
75
- # Unified credential and project ID loading
76
- if private_key:
77
- credentials = (
78
- service_account.Credentials.from_service_account_file(
79
- private_key
80
- )
81
- )
82
- project_id = credentials.project_id
83
- else:
84
- credentials, project_id = google.auth.default()
85
-
86
- if not project_id:
87
- raise RuntimeError(
88
- "Could not determine Google Cloud project ID. "
89
- "Ensure it's set in your environment or service account."
90
- )
91
-
92
- # Initialize the appropriate client based on the model type
93
- if self.is_anthropic:
94
- logger.info(f"Initializing Anthropic model '{model}' via AnthropicVertex SDK")
95
- # Initialize AnthropicVertex with credentials if provided, otherwise use ADC
96
- anthropic_kwargs = {'region': region, 'project_id': project_id}
97
- if credentials and private_key: # Pass credentials only if from a file
98
- anthropic_kwargs['credentials'] = credentials
99
- logger.debug(f"Using service account credentials for Anthropic model")
100
- else:
101
- logger.debug(f"Using Application Default Credentials for Anthropic model")
102
-
103
- self.llm = AnthropicVertex(**anthropic_kwargs)
104
- else:
105
- # For Gemini models, initialize the Vertex AI SDK
106
- logger.info(f"Initializing Google model '{model}' via Vertex AI SDK")
107
- init_kwargs = {'location': region, 'project': project_id}
108
- if credentials and private_key: # Pass credentials only if from a file
109
- init_kwargs['credentials'] = credentials
110
-
111
- vertexai.init(**init_kwargs)
112
-
113
- self.llm = GenerativeModel(model)
114
-
115
- self.generation_config = GenerationConfig(
116
- temperature=temperature,
117
- top_p=1.0,
118
- top_k=10,
119
- candidate_count=1,
120
- max_output_tokens=max_output,
121
- )
122
-
123
- # Block none doesn't seem to work
124
- block_level = HarmBlockThreshold.BLOCK_ONLY_HIGH
125
- # block_level = HarmBlockThreshold.BLOCK_NONE
126
-
127
- self.safety_settings = [
128
- SafetySetting(
129
- category = HarmCategory.HARM_CATEGORY_HARASSMENT,
130
- threshold = block_level,
131
- ),
132
- SafetySetting(
133
- category = HarmCategory.HARM_CATEGORY_HATE_SPEECH,
134
- threshold = block_level,
135
- ),
136
- SafetySetting(
137
- category = HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
138
- threshold = block_level,
139
- ),
140
- SafetySetting(
141
- category = HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
142
- threshold = block_level,
143
- ),
144
- ]
145
-
146
-
147
- logger.info("VertexAI initialization complete")
148
-
149
- async def generate_content(self, system, prompt):
150
-
151
- try:
152
- if self.is_anthropic:
153
- # Anthropic API uses a dedicated system prompt
154
- logger.debug("Sending request to Anthropic model...")
155
- response = self.llm.messages.create(
156
- model=self.model,
157
- system=system,
158
- messages=[{"role": "user", "content": prompt}],
159
- max_tokens=self.api_params['max_output_tokens'],
160
- temperature=self.api_params['temperature'],
161
- top_p=self.api_params['top_p'],
162
- top_k=self.api_params['top_k'],
163
- )
164
-
165
- resp = LlmResult(
166
- text=response.content[0].text,
167
- in_token=response.usage.input_tokens,
168
- out_token=response.usage.output_tokens,
169
- model=self.model
170
- )
171
- else:
172
- # Gemini API combines system and user prompts
173
- logger.debug("Sending request to Gemini model...")
174
- full_prompt = system + "\n\n" + prompt
175
-
176
- response = self.llm.generate_content(
177
- full_prompt, generation_config = self.generation_config,
178
- safety_settings = self.safety_settings,
179
- )
180
-
181
- resp = LlmResult(
182
- text = response.text,
183
- in_token = response.usage_metadata.prompt_token_count,
184
- out_token = response.usage_metadata.candidates_token_count,
185
- model = self.model
186
- )
187
-
188
- logger.info(f"Input Tokens: {resp.in_token}")
189
- logger.info(f"Output Tokens: {resp.out_token}")
190
- logger.debug("Send response...")
191
-
192
- return resp
193
-
194
- except (google.api_core.exceptions.ResourceExhausted, RateLimitError) as e:
195
- logger.warning(f"Hit rate limit: {e}")
196
- # Leave rate limit retries to the base handler
197
- raise TooManyRequests()
198
-
199
- except Exception as e:
200
- # Apart from rate limits, treat all exceptions as unrecoverable
201
- logger.error(f"VertexAI LLM exception: {e}", exc_info=True)
202
- raise e
203
-
204
- @staticmethod
205
- def add_args(parser):
206
-
207
- LlmService.add_args(parser)
208
-
209
- parser.add_argument(
210
- '-m', '--model',
211
- default=default_model,
212
- help=f'LLM model (e.g., gemini-1.5-flash-001, claude-3-sonnet@20240229) (default: {default_model})'
213
- )
214
-
215
- parser.add_argument(
216
- '-k', '--private-key',
217
- help=f'Google Cloud private JSON file (optional, uses ADC if not provided)'
218
- )
219
-
220
- parser.add_argument(
221
- '-r', '--region',
222
- default=default_region,
223
- help=f'Google Cloud region (default: {default_region})',
224
- )
225
-
226
- parser.add_argument(
227
- '-t', '--temperature',
228
- type=float,
229
- default=default_temperature,
230
- help=f'LLM temperature parameter (default: {default_temperature})'
231
- )
232
-
233
- parser.add_argument(
234
- '-x', '--max-output',
235
- type=int,
236
- default=default_max_output,
237
- help=f'LLM max output tokens (default: {default_max_output})'
238
- )
239
-
240
- def run():
241
- Processor.launch(default_ident, __doc__)
@@ -1 +0,0 @@
1
- __version__ = "1.3.19"