trustgraph-bedrock 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ Metadata-Version: 2.1
2
+ Name: trustgraph-bedrock
3
+ Version: 0.0.0
4
+ Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
5
+ Home-page: https://github.com/trustgraph-ai/trustgraph
6
+ Author: trustgraph.ai
7
+ Author-email: security@trustgraph.ai
8
+ License: UNKNOWN
9
+ Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.0.0.tar.gz
10
+ Platform: UNKNOWN
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
13
+ Classifier: Operating System :: OS Independent
14
+ Requires-Python: >=3.8
15
+ Description-Content-Type: text/markdown
16
+
17
+ See https://trustgraph.ai/
18
+
19
+
@@ -0,0 +1 @@
1
+ See https://trustgraph.ai/
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from trustgraph.model.text_completion.bedrock import run
4
+
5
+ run()
6
+
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,45 @@
1
+ import setuptools
2
+ import os
3
+ import importlib
4
+
5
+ with open("README.md", "r") as fh:
6
+ long_description = fh.read()
7
+
8
+ # Load a version number module
9
+ spec = importlib.util.spec_from_file_location(
10
+ 'version', 'trustgraph/bedrock_version.py'
11
+ )
12
+ version_module = importlib.util.module_from_spec(spec)
13
+ spec.loader.exec_module(version_module)
14
+
15
+ version = version_module.__version__
16
+
17
+ setuptools.setup(
18
+ name="trustgraph-bedrock",
19
+ version=version,
20
+ author="trustgraph.ai",
21
+ author_email="security@trustgraph.ai",
22
+ description="TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.",
23
+ long_description=long_description,
24
+ long_description_content_type="text/markdown",
25
+ url="https://github.com/trustgraph-ai/trustgraph",
26
+ packages=setuptools.find_namespace_packages(
27
+ where='./',
28
+ ),
29
+ classifiers=[
30
+ "Programming Language :: Python :: 3",
31
+ "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
32
+ "Operating System :: OS Independent",
33
+ ],
34
+ python_requires='>=3.8',
35
+ download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
36
+ install_requires=[
37
+ "trustgraph-base>=0.19,<0.20",
38
+ "pulsar-client",
39
+ "prometheus-client",
40
+ "boto3",
41
+ ],
42
+ scripts=[
43
+ "scripts/text-completion-bedrock",
44
+ ]
45
+ )
@@ -0,0 +1 @@
1
+ __version__ = "0.0.0"
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from . llm import run
4
+
5
+ if __name__ == '__main__':
6
+ run()
7
+
@@ -0,0 +1,336 @@
1
+
2
+ """
3
+ Simple LLM service, performs text prompt completion using AWS Bedrock.
4
+ Input is prompt, output is response. Mistral is default.
5
+ """
6
+
7
+ import boto3
8
+ import json
9
+ from prometheus_client import Histogram
10
+ import os
11
+
12
+ from .... schema import TextCompletionRequest, TextCompletionResponse, Error
13
+ from .... schema import text_completion_request_queue
14
+ from .... schema import text_completion_response_queue
15
+ from .... log_level import LogLevel
16
+ from .... base import ConsumerProducer
17
+ from .... exceptions import TooManyRequests
18
+
19
+ module = ".".join(__name__.split(".")[1:-1])
20
+
21
+ default_input_queue = text_completion_request_queue
22
+ default_output_queue = text_completion_response_queue
23
+ default_subscriber = module
24
+ default_model = 'mistral.mistral-large-2407-v1:0'
25
+ default_temperature = 0.0
26
+ default_max_output = 2048
27
+ default_aws_id_key = os.getenv("AWS_ID_KEY", None)
28
+ default_aws_secret = os.getenv("AWS_SECRET", None)
29
+ default_aws_region = os.getenv("AWS_REGION", 'us-west-2')
30
+
31
+ class Processor(ConsumerProducer):
32
+
33
+ def __init__(self, **params):
34
+
35
+ input_queue = params.get("input_queue", default_input_queue)
36
+ output_queue = params.get("output_queue", default_output_queue)
37
+ subscriber = params.get("subscriber", default_subscriber)
38
+ model = params.get("model", default_model)
39
+ aws_id_key = params.get("aws_id_key", default_aws_id_key)
40
+ aws_secret = params.get("aws_secret", default_aws_secret)
41
+ aws_region = params.get("aws_region", default_aws_region)
42
+ temperature = params.get("temperature", default_temperature)
43
+ max_output = params.get("max_output", default_max_output)
44
+
45
+ if aws_id_key is None:
46
+ raise RuntimeError("AWS ID not specified")
47
+
48
+ if aws_secret is None:
49
+ raise RuntimeError("AWS secret not specified")
50
+
51
+ if aws_region is None:
52
+ raise RuntimeError("AWS region not specified")
53
+
54
+ super(Processor, self).__init__(
55
+ **params | {
56
+ "input_queue": input_queue,
57
+ "output_queue": output_queue,
58
+ "subscriber": subscriber,
59
+ "input_schema": TextCompletionRequest,
60
+ "output_schema": TextCompletionResponse,
61
+ "model": model,
62
+ "temperature": temperature,
63
+ "max_output": max_output,
64
+ }
65
+ )
66
+
67
+ if not hasattr(__class__, "text_completion_metric"):
68
+ __class__.text_completion_metric = Histogram(
69
+ 'text_completion_duration',
70
+ 'Text completion duration (seconds)',
71
+ buckets=[
72
+ 0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0,
73
+ 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
74
+ 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0,
75
+ 30.0, 35.0, 40.0, 45.0, 50.0, 60.0, 80.0, 100.0,
76
+ 120.0
77
+ ]
78
+ )
79
+
80
+ self.model = model
81
+ self.temperature = temperature
82
+ self.max_output = max_output
83
+
84
+ self.session = boto3.Session(
85
+ aws_access_key_id=aws_id_key,
86
+ aws_secret_access_key=aws_secret,
87
+ region_name=aws_region
88
+ )
89
+
90
+ self.bedrock = self.session.client(service_name='bedrock-runtime')
91
+
92
+ print("Initialised", flush=True)
93
+
94
+ def handle(self, msg):
95
+
96
+ v = msg.value()
97
+
98
+ # Sender-produced ID
99
+
100
+ id = msg.properties()["id"]
101
+
102
+ print(f"Handling prompt {id}...", flush=True)
103
+
104
+ prompt = v.system + "\n\n" + v.prompt
105
+
106
+ try:
107
+
108
+ # Mistral Input Format
109
+ if self.model.startswith("mistral"):
110
+ promptbody = json.dumps({
111
+ "prompt": prompt,
112
+ "max_tokens": self.max_output,
113
+ "temperature": self.temperature,
114
+ "top_p": 0.99,
115
+ "top_k": 40
116
+ })
117
+
118
+ # Llama 3.1 Input Format
119
+ elif self.model.startswith("meta"):
120
+ promptbody = json.dumps({
121
+ "prompt": prompt,
122
+ "max_gen_len": self.max_output,
123
+ "temperature": self.temperature,
124
+ "top_p": 0.95,
125
+ })
126
+
127
+ # Anthropic Input Format
128
+ elif self.model.startswith("anthropic"):
129
+ promptbody = json.dumps({
130
+ "anthropic_version": "bedrock-2023-05-31",
131
+ "max_tokens": self.max_output,
132
+ "temperature": self.temperature,
133
+ "top_p": 0.999,
134
+ "messages": [
135
+ {
136
+ "role": "user",
137
+ "content": [
138
+ {
139
+ "type": "text",
140
+ "text": prompt
141
+ }
142
+ ]
143
+ }
144
+ ]
145
+ })
146
+
147
+ # Jamba Input Format
148
+ elif self.model.startswith("ai21"):
149
+ promptbody = json.dumps({
150
+ "max_tokens": self.max_output,
151
+ "temperature": self.temperature,
152
+ "top_p": 0.9,
153
+ "messages": [
154
+ {
155
+ "role": "user",
156
+ "content": prompt
157
+ }
158
+ ]
159
+ })
160
+
161
+ # Cohere Input Format
162
+ elif self.model.startswith("cohere"):
163
+ promptbody = json.dumps({
164
+ "max_tokens": self.max_output,
165
+ "temperature": self.temperature,
166
+ "message": prompt
167
+ })
168
+
169
+ # Use Mistral format as defualt
170
+ else:
171
+ promptbody = json.dumps({
172
+ "prompt": prompt,
173
+ "max_tokens": self.max_output,
174
+ "temperature": self.temperature,
175
+ "top_p": 0.99,
176
+ "top_k": 40
177
+ })
178
+
179
+ accept = 'application/json'
180
+ contentType = 'application/json'
181
+
182
+ # FIXME: Consider catching request limits and raise TooManyRequests
183
+ # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html
184
+
185
+ with __class__.text_completion_metric.time():
186
+ response = self.bedrock.invoke_model(
187
+ body=promptbody, modelId=self.model, accept=accept,
188
+ contentType=contentType
189
+ )
190
+
191
+ # Mistral Response Structure
192
+ if self.model.startswith("mistral"):
193
+ response_body = json.loads(response.get("body").read())
194
+ outputtext = response_body['outputs'][0]['text']
195
+
196
+ # Claude Response Structure
197
+ elif self.model.startswith("anthropic"):
198
+ model_response = json.loads(response["body"].read())
199
+ outputtext = model_response['content'][0]['text']
200
+
201
+ # Llama 3.1 Response Structure
202
+ elif self.model.startswith("meta"):
203
+ model_response = json.loads(response["body"].read())
204
+ outputtext = model_response["generation"]
205
+
206
+ # Jamba Response Structure
207
+ elif self.model.startswith("ai21"):
208
+ content = response['body'].read()
209
+ content_str = content.decode('utf-8')
210
+ content_json = json.loads(content_str)
211
+ outputtext = content_json['choices'][0]['message']['content']
212
+
213
+ # Cohere Input Format
214
+ elif self.model.startswith("cohere"):
215
+ content = response['body'].read()
216
+ content_str = content.decode('utf-8')
217
+ content_json = json.loads(content_str)
218
+ outputtext = content_json['text']
219
+
220
+ # Use Mistral as default
221
+ else:
222
+ response_body = json.loads(response.get("body").read())
223
+ outputtext = response_body['outputs'][0]['text']
224
+
225
+ metadata = response['ResponseMetadata']['HTTPHeaders']
226
+ inputtokens = int(metadata['x-amzn-bedrock-input-token-count'])
227
+ outputtokens = int(metadata['x-amzn-bedrock-output-token-count'])
228
+
229
+ print(outputtext, flush=True)
230
+ print(f"Input Tokens: {inputtokens}", flush=True)
231
+ print(f"Output Tokens: {outputtokens}", flush=True)
232
+
233
+ print("Send response...", flush=True)
234
+ r = TextCompletionResponse(
235
+ error=None,
236
+ response=outputtext,
237
+ in_token=inputtokens,
238
+ out_token=outputtokens,
239
+ model=str(self.model),
240
+ )
241
+
242
+ self.send(r, properties={"id": id})
243
+
244
+ print("Done.", flush=True)
245
+
246
+
247
+ # FIXME: Wrong exception, don't know what Bedrock throws
248
+ # for a rate limit
249
+ except TooManyRequests:
250
+
251
+ print("Send rate limit response...", flush=True)
252
+
253
+ r = TextCompletionResponse(
254
+ error=Error(
255
+ type = "rate-limit",
256
+ message = str(e),
257
+ ),
258
+ response=None,
259
+ in_token=None,
260
+ out_token=None,
261
+ model=None,
262
+ )
263
+
264
+ self.producer.send(r, properties={"id": id})
265
+
266
+ self.consumer.acknowledge(msg)
267
+
268
+ except Exception as e:
269
+
270
+ print(f"Exception: {e}")
271
+
272
+ print("Send error response...", flush=True)
273
+
274
+ r = TextCompletionResponse(
275
+ error=Error(
276
+ type = "llm-error",
277
+ message = str(e),
278
+ ),
279
+ response=None,
280
+ in_token=None,
281
+ out_token=None,
282
+ model=None,
283
+ )
284
+
285
+ self.consumer.acknowledge(msg)
286
+
287
+ @staticmethod
288
+ def add_args(parser):
289
+
290
+ ConsumerProducer.add_args(
291
+ parser, default_input_queue, default_subscriber,
292
+ default_output_queue,
293
+ )
294
+
295
+ parser.add_argument(
296
+ '-m', '--model',
297
+ default="mistral.mistral-large-2407-v1:0",
298
+ help=f'Bedrock model (default: Mistral-Large-2407)'
299
+ )
300
+
301
+ parser.add_argument(
302
+ '-z', '--aws-id-key',
303
+ default=default_aws_id_key,
304
+ help=f'AWS ID Key'
305
+ )
306
+
307
+ parser.add_argument(
308
+ '-k', '--aws-secret',
309
+ default=default_aws_secret,
310
+ help=f'AWS Secret Key'
311
+ )
312
+
313
+ parser.add_argument(
314
+ '-r', '--aws-region',
315
+ default=default_aws_region,
316
+ help=f'AWS Region'
317
+ )
318
+
319
+ parser.add_argument(
320
+ '-t', '--temperature',
321
+ type=float,
322
+ default=default_temperature,
323
+ help=f'LLM temperature parameter (default: {default_temperature})'
324
+ )
325
+
326
+ parser.add_argument(
327
+ '-x', '--max-output',
328
+ type=int,
329
+ default=default_max_output,
330
+ help=f'LLM max output tokens (default: {default_max_output})'
331
+ )
332
+
333
+ def run():
334
+
335
+ Processor.start(module, __doc__)
336
+
@@ -0,0 +1,19 @@
1
+ Metadata-Version: 2.1
2
+ Name: trustgraph-bedrock
3
+ Version: 0.0.0
4
+ Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
5
+ Home-page: https://github.com/trustgraph-ai/trustgraph
6
+ Author: trustgraph.ai
7
+ Author-email: security@trustgraph.ai
8
+ License: UNKNOWN
9
+ Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.0.0.tar.gz
10
+ Platform: UNKNOWN
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
13
+ Classifier: Operating System :: OS Independent
14
+ Requires-Python: >=3.8
15
+ Description-Content-Type: text/markdown
16
+
17
+ See https://trustgraph.ai/
18
+
19
+
@@ -0,0 +1,12 @@
1
+ README.md
2
+ setup.py
3
+ scripts/text-completion-bedrock
4
+ trustgraph/bedrock_version.py
5
+ trustgraph/model/text_completion/bedrock/__init__.py
6
+ trustgraph/model/text_completion/bedrock/__main__.py
7
+ trustgraph/model/text_completion/bedrock/llm.py
8
+ trustgraph_bedrock.egg-info/PKG-INFO
9
+ trustgraph_bedrock.egg-info/SOURCES.txt
10
+ trustgraph_bedrock.egg-info/dependency_links.txt
11
+ trustgraph_bedrock.egg-info/requires.txt
12
+ trustgraph_bedrock.egg-info/top_level.txt
@@ -0,0 +1,4 @@
1
+ boto3
2
+ prometheus-client
3
+ pulsar-client
4
+ trustgraph-base<0.20,>=0.19
@@ -0,0 +1,2 @@
1
+ scripts
2
+ trustgraph