PyPI - trustgraph-vertexai - Versions diffs - 0.22.9__tar.gz → 0.22.10__tar.gz - Mend

trustgraph-vertexai 0.22.9tar.gz → 0.22.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of trustgraph-vertexai might be problematic. Click here for more details.

Files changed (15) hide show

{trustgraph-vertexai-0.22.9 → trustgraph-vertexai-0.22.10}/PKG-INFO RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.1
 Name: trustgraph-vertexai
-Version: 0.22.9
+Version: 0.22.10
 Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
 Home-page: https://github.com/trustgraph-ai/trustgraph
-Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.22.9.tar.gz
+Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.22.10.tar.gz
 Author: trustgraph.ai
 Author-email: security@trustgraph.ai
 Classifier: Programming Language :: Python :: 3

{trustgraph-vertexai-0.22.9 → trustgraph-vertexai-0.22.10}/trustgraph/model/text_completion/vertexai/llm.py RENAMED Viewed

@@ -4,30 +4,50 @@ Simple LLM service, performs text prompt completion using VertexAI on
 Google Cloud.   Input is prompt, output is response.
 """
+import vertexai
+import time
+from prometheus_client import Histogram
+import os
 from google.oauth2 import service_account
 import google
-import vertexai
 from vertexai.preview.generative_models import (
-    Content, FunctionDeclaration, GenerativeModel, GenerationConfig,
-    HarmCategory, HarmBlockThreshold, Part, Tool,
+    Content,
+    FunctionDeclaration,
+    GenerativeModel,
+    GenerationConfig,
+    HarmCategory,
+    HarmBlockThreshold,
+    Part,
+    Tool,
 )
+from .... schema import TextCompletionRequest, TextCompletionResponse, Error
+from .... schema import text_completion_request_queue
+from .... schema import text_completion_response_queue
+from .... log_level import LogLevel
+from .... base import ConsumerProducer
 from .... exceptions import TooManyRequests
-from .... base import LlmService, LlmResult
-default_ident = "text-completion"
+module = ".".join(__name__.split(".")[1:-1])
+default_input_queue = text_completion_request_queue
+default_output_queue = text_completion_response_queue
+default_subscriber = module
 default_model = 'gemini-1.0-pro-001'
 default_region = 'us-central1'
 default_temperature = 0.0
 default_max_output = 8192
 default_private_key = "private.json"
-class Processor(LlmService):
+class Processor(ConsumerProducer):
     def __init__(self, **params):
+        input_queue = params.get("input_queue", default_input_queue)
+        output_queue = params.get("output_queue", default_output_queue)
+        subscriber = params.get("subscriber", default_subscriber)
         region = params.get("region", default_region)
         model = params.get("model", default_model)
         private_key = params.get("private_key", default_private_key)
@@ -37,7 +57,28 @@ class Processor(LlmService):
         if private_key is None:
             raise RuntimeError("Private key file not specified")
-        super(Processor, self).__init__(**params)
+        super(Processor, self).__init__(
+            **params | {
+                "input_queue": input_queue,
+                "output_queue": output_queue,
+                "subscriber": subscriber,
+                "input_schema": TextCompletionRequest,
+                "output_schema": TextCompletionResponse,
+            }
+        )
+        if not hasattr(__class__, "text_completion_metric"):
+            __class__.text_completion_metric = Histogram(
+                'text_completion_duration',
+                'Text completion duration (seconds)',
+                buckets=[
+                    0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
+                    8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
+                    17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
+                    30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
+                    120.0
+                ]
+            )
         self.parameters = {
             "temperature": temperature,
@@ -69,11 +110,7 @@ class Processor(LlmService):
         print("Initialise VertexAI...", flush=True)
         if private_key:
-            credentials = (
-                service_account.Credentials.from_service_account_file(
-                    private_key
-                )
-            )
+            credentials = service_account.Credentials.from_service_account_file(private_key)
         else:
             credentials = None
@@ -94,29 +131,50 @@ class Processor(LlmService):
         print("Initialisation complete", flush=True)
-    async def generate_content(self, system, prompt):
+    async def handle(self, msg):
         try:
-            prompt = system + "\n\n" + prompt
+            v = msg.value()
-            response = self.llm.generate_content(
-                prompt, generation_config=self.generation_config,
-                safety_settings=self.safety_settings
-            )
+            # Sender-produced ID
+            id = msg.properties()["id"]
-            resp = LlmResult()
-            resp.text = response.text
-            resp.in_token = response.usage_metadata.prompt_token_count
-            resp.out_token = response.usage_metadata.candidates_token_count
-            resp.model = self.model
+            print(f"Handling prompt {id}...", flush=True)
-            print(f"Input Tokens: {resp.in_token}", flush=True)
-            print(f"Output Tokens: {resp.out_token}", flush=True)
+            prompt = v.system + "\n\n" + v.prompt
+            with __class__.text_completion_metric.time():
+                response = self.llm.generate_content(
+                    prompt, generation_config=self.generation_config,
+                    safety_settings=self.safety_settings
+                )
+            resp = response.text
+            inputtokens = int(response.usage_metadata.prompt_token_count)
+            outputtokens = int(response.usage_metadata.candidates_token_count)
+            print(resp, flush=True)
+            print(f"Input Tokens: {inputtokens}", flush=True)
+            print(f"Output Tokens: {outputtokens}", flush=True)
             print("Send response...", flush=True)
-            return resp
+            r = TextCompletionResponse(
+                error=None,
+                response=resp,
+                in_token=inputtokens,
+                out_token=outputtokens,
+                model=self.model
+            )
+            await self.send(r, properties={"id": id})
+            print("Done.", flush=True)
+            # Acknowledge successful processing of the message
+            self.consumer.acknowledge(msg)
         except google.api_core.exceptions.ResourceExhausted as e:
@@ -128,19 +186,40 @@ class Processor(LlmService):
         except Exception as e:
             # Apart from rate limits, treat all exceptions as unrecoverable
             print(f"Exception: {e}")
-            raise e
+            print("Send error response...", flush=True)
+            r = TextCompletionResponse(
+                error=Error(
+                    type = "llm-error",
+                    message = str(e),
+                ),
+                response=None,
+                in_token=None,
+                out_token=None,
+                model=None,
+            )
+            await self.send(r, properties={"id": id})
+            self.consumer.acknowledge(msg)
     @staticmethod
     def add_args(parser):
-        LlmService.add_args(parser)
+        ConsumerProducer.add_args(
+            parser, default_input_queue, default_subscriber,
+            default_output_queue,
+        )
         parser.add_argument(
             '-m', '--model',
             default=default_model,
             help=f'LLM model (default: {default_model})'
         )
+        # Also: text-bison-32k
         parser.add_argument(
             '-k', '--private-key',
@@ -168,5 +247,6 @@ class Processor(LlmService):
         )
 def run():
-    Processor.launch(default_ident, __doc__)
+    Processor.launch(module, __doc__)

trustgraph-vertexai-0.22.10/trustgraph/vertexai_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.22.10"

{trustgraph-vertexai-0.22.9 → trustgraph-vertexai-0.22.10}/trustgraph_vertexai.egg-info/PKG-INFO RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.1
 Name: trustgraph-vertexai
-Version: 0.22.9
+Version: 0.22.10
 Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
 Home-page: https://github.com/trustgraph-ai/trustgraph
-Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.22.9.tar.gz
+Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.22.10.tar.gz
 Author: trustgraph.ai
 Author-email: security@trustgraph.ai
 Classifier: Programming Language :: Python :: 3