trustgraph-base 0.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trustgraph-base-0.0.0/PKG-INFO +19 -0
- trustgraph-base-0.0.0/README.md +1 -0
- trustgraph-base-0.0.0/setup.cfg +4 -0
- trustgraph-base-0.0.0/setup.py +42 -0
- trustgraph-base-0.0.0/trustgraph/api/__init__.py +3 -0
- trustgraph-base-0.0.0/trustgraph/api/api.py +368 -0
- trustgraph-base-0.0.0/trustgraph/base/__init__.py +6 -0
- trustgraph-base-0.0.0/trustgraph/base/base_processor.py +120 -0
- trustgraph-base-0.0.0/trustgraph/base/consumer.py +107 -0
- trustgraph-base-0.0.0/trustgraph/base/consumer_producer.py +140 -0
- trustgraph-base-0.0.0/trustgraph/base/producer.py +56 -0
- trustgraph-base-0.0.0/trustgraph/base_version.py +1 -0
- trustgraph-base-0.0.0/trustgraph/clients/__init__.py +0 -0
- trustgraph-base-0.0.0/trustgraph/clients/agent_client.py +64 -0
- trustgraph-base-0.0.0/trustgraph/clients/base.py +133 -0
- trustgraph-base-0.0.0/trustgraph/clients/document_embeddings_client.py +49 -0
- trustgraph-base-0.0.0/trustgraph/clients/document_rag_client.py +46 -0
- trustgraph-base-0.0.0/trustgraph/clients/embeddings_client.py +44 -0
- trustgraph-base-0.0.0/trustgraph/clients/graph_embeddings_client.py +49 -0
- trustgraph-base-0.0.0/trustgraph/clients/graph_rag_client.py +49 -0
- trustgraph-base-0.0.0/trustgraph/clients/llm_client.py +42 -0
- trustgraph-base-0.0.0/trustgraph/clients/prompt_client.py +172 -0
- trustgraph-base-0.0.0/trustgraph/clients/triples_query_client.py +66 -0
- trustgraph-base-0.0.0/trustgraph/exceptions.py +14 -0
- trustgraph-base-0.0.0/trustgraph/knowledge/__init__.py +7 -0
- trustgraph-base-0.0.0/trustgraph/knowledge/defs.py +33 -0
- trustgraph-base-0.0.0/trustgraph/knowledge/document.py +130 -0
- trustgraph-base-0.0.0/trustgraph/knowledge/identifier.py +23 -0
- trustgraph-base-0.0.0/trustgraph/knowledge/organization.py +50 -0
- trustgraph-base-0.0.0/trustgraph/knowledge/publication.py +79 -0
- trustgraph-base-0.0.0/trustgraph/log_level.py +20 -0
- trustgraph-base-0.0.0/trustgraph/objects/__init__.py +0 -0
- trustgraph-base-0.0.0/trustgraph/objects/field.py +72 -0
- trustgraph-base-0.0.0/trustgraph/objects/object.py +8 -0
- trustgraph-base-0.0.0/trustgraph/rdf.py +7 -0
- trustgraph-base-0.0.0/trustgraph/schema/__init__.py +14 -0
- trustgraph-base-0.0.0/trustgraph/schema/agent.py +37 -0
- trustgraph-base-0.0.0/trustgraph/schema/documents.py +71 -0
- trustgraph-base-0.0.0/trustgraph/schema/graph.py +90 -0
- trustgraph-base-0.0.0/trustgraph/schema/lookup.py +42 -0
- trustgraph-base-0.0.0/trustgraph/schema/metadata.py +16 -0
- trustgraph-base-0.0.0/trustgraph/schema/models.py +45 -0
- trustgraph-base-0.0.0/trustgraph/schema/object.py +33 -0
- trustgraph-base-0.0.0/trustgraph/schema/prompt.py +66 -0
- trustgraph-base-0.0.0/trustgraph/schema/retrieval.py +44 -0
- trustgraph-base-0.0.0/trustgraph/schema/topic.py +4 -0
- trustgraph-base-0.0.0/trustgraph/schema/types.py +30 -0
- trustgraph-base-0.0.0/trustgraph_base.egg-info/PKG-INFO +19 -0
- trustgraph-base-0.0.0/trustgraph_base.egg-info/SOURCES.txt +50 -0
- trustgraph-base-0.0.0/trustgraph_base.egg-info/dependency_links.txt +1 -0
- trustgraph-base-0.0.0/trustgraph_base.egg-info/requires.txt +2 -0
- trustgraph-base-0.0.0/trustgraph_base.egg-info/top_level.txt +1 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: trustgraph-base
|
3
|
+
Version: 0.0.0
|
4
|
+
Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
|
5
|
+
Home-page: https://github.com/trustgraph-ai/trustgraph
|
6
|
+
Author: trustgraph.ai
|
7
|
+
Author-email: security@trustgraph.ai
|
8
|
+
License: UNKNOWN
|
9
|
+
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.0.0.tar.gz
|
10
|
+
Platform: UNKNOWN
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
12
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
13
|
+
Classifier: Operating System :: OS Independent
|
14
|
+
Requires-Python: >=3.8
|
15
|
+
Description-Content-Type: text/markdown
|
16
|
+
|
17
|
+
See https://trustgraph.ai/
|
18
|
+
|
19
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
See https://trustgraph.ai/
|
@@ -0,0 +1,42 @@
|
|
1
|
+
import setuptools
|
2
|
+
import os
|
3
|
+
import importlib
|
4
|
+
|
5
|
+
with open("README.md", "r") as fh:
|
6
|
+
long_description = fh.read()
|
7
|
+
|
8
|
+
# Load a version number module
|
9
|
+
spec = importlib.util.spec_from_file_location(
|
10
|
+
'version', 'trustgraph/base_version.py'
|
11
|
+
)
|
12
|
+
version_module = importlib.util.module_from_spec(spec)
|
13
|
+
spec.loader.exec_module(version_module)
|
14
|
+
|
15
|
+
version = version_module.__version__
|
16
|
+
|
17
|
+
setuptools.setup(
|
18
|
+
name="trustgraph-base",
|
19
|
+
version=version,
|
20
|
+
author="trustgraph.ai",
|
21
|
+
author_email="security@trustgraph.ai",
|
22
|
+
description="TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.",
|
23
|
+
long_description=long_description,
|
24
|
+
long_description_content_type="text/markdown",
|
25
|
+
url="https://github.com/trustgraph-ai/trustgraph",
|
26
|
+
packages=setuptools.find_namespace_packages(
|
27
|
+
where='./',
|
28
|
+
),
|
29
|
+
classifiers=[
|
30
|
+
"Programming Language :: Python :: 3",
|
31
|
+
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
32
|
+
"Operating System :: OS Independent",
|
33
|
+
],
|
34
|
+
python_requires='>=3.8',
|
35
|
+
download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
|
36
|
+
install_requires=[
|
37
|
+
"pulsar-client",
|
38
|
+
"prometheus-client",
|
39
|
+
],
|
40
|
+
scripts=[
|
41
|
+
]
|
42
|
+
)
|
@@ -0,0 +1,368 @@
|
|
1
|
+
|
2
|
+
import requests
|
3
|
+
import json
|
4
|
+
import dataclasses
|
5
|
+
import base64
|
6
|
+
|
7
|
+
from trustgraph.knowledge import hash, Uri, Literal
|
8
|
+
|
9
|
+
class ProtocolException(Exception):
|
10
|
+
pass
|
11
|
+
|
12
|
+
class ApplicationException(Exception):
|
13
|
+
pass
|
14
|
+
|
15
|
+
@dataclasses.dataclass
|
16
|
+
class Triple:
|
17
|
+
s : str
|
18
|
+
p : str
|
19
|
+
o : str
|
20
|
+
|
21
|
+
class Api:
|
22
|
+
|
23
|
+
def __init__(self, url="http://localhost:8088/"):
|
24
|
+
|
25
|
+
self.url = url
|
26
|
+
|
27
|
+
if not url.endswith("/"):
|
28
|
+
self.url += "/"
|
29
|
+
|
30
|
+
self.url += "api/v1/"
|
31
|
+
|
32
|
+
def check_error(self, response):
|
33
|
+
|
34
|
+
if "error" in response:
|
35
|
+
|
36
|
+
try:
|
37
|
+
msg = response["error"]["message"]
|
38
|
+
tp = response["error"]["message"]
|
39
|
+
except:
|
40
|
+
raise ApplicationException(
|
41
|
+
"Error, but the error object is broken"
|
42
|
+
)
|
43
|
+
|
44
|
+
raise ApplicationException(f"{tp}: {msg}")
|
45
|
+
|
46
|
+
def text_completion(self, system, prompt):
|
47
|
+
|
48
|
+
# The input consists of system and prompt strings
|
49
|
+
input = {
|
50
|
+
"system": system,
|
51
|
+
"prompt": prompt
|
52
|
+
}
|
53
|
+
|
54
|
+
url = f"{self.url}text-completion"
|
55
|
+
|
56
|
+
# Invoke the API, input is passed as JSON
|
57
|
+
resp = requests.post(url, json=input)
|
58
|
+
|
59
|
+
# Should be a 200 status code
|
60
|
+
if resp.status_code != 200:
|
61
|
+
raise ProtocolException(f"Status code {resp.status_code}")
|
62
|
+
|
63
|
+
try:
|
64
|
+
# Parse the response as JSON
|
65
|
+
object = resp.json()
|
66
|
+
except:
|
67
|
+
raise ProtocolException(f"Expected JSON response")
|
68
|
+
|
69
|
+
self.check_error(resp)
|
70
|
+
|
71
|
+
try:
|
72
|
+
return object["response"]
|
73
|
+
except:
|
74
|
+
raise ProtocolException(f"Response not formatted correctly")
|
75
|
+
|
76
|
+
def agent(self, question):
|
77
|
+
|
78
|
+
# The input consists of a question
|
79
|
+
input = {
|
80
|
+
"question": question
|
81
|
+
}
|
82
|
+
|
83
|
+
url = f"{self.url}agent"
|
84
|
+
|
85
|
+
# Invoke the API, input is passed as JSON
|
86
|
+
resp = requests.post(url, json=input)
|
87
|
+
|
88
|
+
# Should be a 200 status code
|
89
|
+
if resp.status_code != 200:
|
90
|
+
raise ProtocolException(f"Status code {resp.status_code}")
|
91
|
+
|
92
|
+
try:
|
93
|
+
# Parse the response as JSON
|
94
|
+
object = resp.json()
|
95
|
+
except:
|
96
|
+
raise ProtocolException(f"Expected JSON response")
|
97
|
+
|
98
|
+
self.check_error(resp)
|
99
|
+
|
100
|
+
try:
|
101
|
+
return object["answer"]
|
102
|
+
except:
|
103
|
+
raise ProtocolException(f"Response not formatted correctly")
|
104
|
+
|
105
|
+
def graph_rag(self, question):
|
106
|
+
|
107
|
+
# The input consists of a question
|
108
|
+
input = {
|
109
|
+
"query": question
|
110
|
+
}
|
111
|
+
|
112
|
+
url = f"{self.url}graph-rag"
|
113
|
+
|
114
|
+
# Invoke the API, input is passed as JSON
|
115
|
+
resp = requests.post(url, json=input)
|
116
|
+
|
117
|
+
# Should be a 200 status code
|
118
|
+
if resp.status_code != 200:
|
119
|
+
raise ProtocolException(f"Status code {resp.status_code}")
|
120
|
+
|
121
|
+
try:
|
122
|
+
# Parse the response as JSON
|
123
|
+
object = resp.json()
|
124
|
+
except:
|
125
|
+
raise ProtocolException(f"Expected JSON response")
|
126
|
+
|
127
|
+
self.check_error(resp)
|
128
|
+
|
129
|
+
try:
|
130
|
+
return object["response"]
|
131
|
+
except:
|
132
|
+
raise ProtocolException(f"Response not formatted correctly")
|
133
|
+
|
134
|
+
def document_rag(self, question):
|
135
|
+
|
136
|
+
# The input consists of a question
|
137
|
+
input = {
|
138
|
+
"query": question
|
139
|
+
}
|
140
|
+
|
141
|
+
url = f"{self.url}document-rag"
|
142
|
+
|
143
|
+
# Invoke the API, input is passed as JSON
|
144
|
+
resp = requests.post(url, json=input)
|
145
|
+
|
146
|
+
# Should be a 200 status code
|
147
|
+
if resp.status_code != 200:
|
148
|
+
raise ProtocolException(f"Status code {resp.status_code}")
|
149
|
+
|
150
|
+
try:
|
151
|
+
# Parse the response as JSON
|
152
|
+
object = resp.json()
|
153
|
+
except:
|
154
|
+
raise ProtocolException(f"Expected JSON response")
|
155
|
+
|
156
|
+
self.check_error(resp)
|
157
|
+
|
158
|
+
try:
|
159
|
+
return object["response"]
|
160
|
+
except:
|
161
|
+
raise ProtocolException(f"Response not formatted correctly")
|
162
|
+
|
163
|
+
def embeddings(self, text):
|
164
|
+
|
165
|
+
# The input consists of a text block
|
166
|
+
input = {
|
167
|
+
"text": text
|
168
|
+
}
|
169
|
+
|
170
|
+
url = f"{self.url}embeddings"
|
171
|
+
|
172
|
+
# Invoke the API, input is passed as JSON
|
173
|
+
resp = requests.post(url, json=input)
|
174
|
+
|
175
|
+
# Should be a 200 status code
|
176
|
+
if resp.status_code != 200:
|
177
|
+
raise ProtocolException(f"Status code {resp.status_code}")
|
178
|
+
|
179
|
+
try:
|
180
|
+
# Parse the response as JSON
|
181
|
+
object = resp.json()
|
182
|
+
except:
|
183
|
+
raise ProtocolException(f"Expected JSON response")
|
184
|
+
|
185
|
+
self.check_error(resp)
|
186
|
+
|
187
|
+
try:
|
188
|
+
return object["vectors"]
|
189
|
+
except:
|
190
|
+
raise ProtocolException(f"Response not formatted correctly")
|
191
|
+
|
192
|
+
def prompt(self, id, variables):
|
193
|
+
|
194
|
+
# The input consists of system and prompt strings
|
195
|
+
input = {
|
196
|
+
"id": id,
|
197
|
+
"variables": variables
|
198
|
+
}
|
199
|
+
|
200
|
+
url = f"{self.url}prompt"
|
201
|
+
|
202
|
+
# Invoke the API, input is passed as JSON
|
203
|
+
resp = requests.post(url, json=input)
|
204
|
+
|
205
|
+
# Should be a 200 status code
|
206
|
+
if resp.status_code != 200:
|
207
|
+
raise ProtocolException(f"Status code {resp.status_code}")
|
208
|
+
|
209
|
+
try:
|
210
|
+
# Parse the response as JSON
|
211
|
+
object = resp.json()
|
212
|
+
except:
|
213
|
+
raise ProtocolException("Expected JSON response")
|
214
|
+
|
215
|
+
self.check_error(resp)
|
216
|
+
|
217
|
+
if "text" in object:
|
218
|
+
return object["text"]
|
219
|
+
|
220
|
+
if "object" in object:
|
221
|
+
try:
|
222
|
+
return json.loads(object["object"])
|
223
|
+
except Exception as e:
|
224
|
+
raise ProtocolException(
|
225
|
+
"Returned object not well-formed JSON"
|
226
|
+
)
|
227
|
+
|
228
|
+
raise ProtocolException("Response not formatted correctly")
|
229
|
+
|
230
|
+
def triples_query(self, s=None, p=None, o=None, limit=10000):
|
231
|
+
|
232
|
+
# The input consists of system and prompt strings
|
233
|
+
input = {
|
234
|
+
"limit": limit
|
235
|
+
}
|
236
|
+
|
237
|
+
if s:
|
238
|
+
if not isinstance(s, Uri):
|
239
|
+
raise RuntimeError("s must be Uri")
|
240
|
+
input["s"] = { "v": str(s), "e": isinstance(s, Uri), }
|
241
|
+
|
242
|
+
if p:
|
243
|
+
if not isinstance(p, Uri):
|
244
|
+
raise RuntimeError("p must be Uri")
|
245
|
+
input["p"] = { "v": str(p), "e": isinstance(p, Uri), }
|
246
|
+
|
247
|
+
if o:
|
248
|
+
if not isinstance(o, Uri) and not isinstance(o, Literal):
|
249
|
+
raise RuntimeError("o must be Uri or Literal")
|
250
|
+
input["o"] = { "v": str(o), "e": isinstance(o, Uri), }
|
251
|
+
|
252
|
+
url = f"{self.url}triples-query"
|
253
|
+
|
254
|
+
# Invoke the API, input is passed as JSON
|
255
|
+
resp = requests.post(url, json=input)
|
256
|
+
|
257
|
+
# Should be a 200 status code
|
258
|
+
if resp.status_code != 200:
|
259
|
+
raise ProtocolException(f"Status code {resp.status_code}")
|
260
|
+
|
261
|
+
try:
|
262
|
+
# Parse the response as JSON
|
263
|
+
object = resp.json()
|
264
|
+
except:
|
265
|
+
raise ProtocolException("Expected JSON response")
|
266
|
+
|
267
|
+
self.check_error(resp)
|
268
|
+
|
269
|
+
if "response" not in object:
|
270
|
+
raise ProtocolException("Response not formatted correctly")
|
271
|
+
|
272
|
+
def to_value(x):
|
273
|
+
if x["e"]: return Uri(x["v"])
|
274
|
+
return Literal(x["v"])
|
275
|
+
|
276
|
+
return [
|
277
|
+
Triple(
|
278
|
+
s=to_value(t["s"]),
|
279
|
+
p=to_value(t["p"]),
|
280
|
+
o=to_value(t["o"])
|
281
|
+
)
|
282
|
+
for t in object["response"]
|
283
|
+
]
|
284
|
+
|
285
|
+
return object["response"]
|
286
|
+
|
287
|
+
def load_document(self, document, id=None, metadata=None):
|
288
|
+
|
289
|
+
if id is None:
|
290
|
+
|
291
|
+
if metadata is not None:
|
292
|
+
|
293
|
+
# Situation makes no sense. What can the metadata possibly
|
294
|
+
# mean if the caller doesn't know the document ID.
|
295
|
+
# Metadata should relate to the document by ID
|
296
|
+
raise RuntimeError("Can't specify metadata without id")
|
297
|
+
|
298
|
+
id = hash(document)
|
299
|
+
|
300
|
+
triples = []
|
301
|
+
|
302
|
+
def emit(t):
|
303
|
+
triples.append(t)
|
304
|
+
|
305
|
+
if metadata:
|
306
|
+
metadata.emit(
|
307
|
+
lambda t: triples.append({
|
308
|
+
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
|
309
|
+
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
|
310
|
+
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
|
311
|
+
})
|
312
|
+
)
|
313
|
+
|
314
|
+
input = {
|
315
|
+
"id": id,
|
316
|
+
"metadata": triples,
|
317
|
+
"data": base64.b64encode(document).decode("utf-8"),
|
318
|
+
}
|
319
|
+
|
320
|
+
url = f"{self.url}load/document"
|
321
|
+
|
322
|
+
# Invoke the API, input is passed as JSON
|
323
|
+
resp = requests.post(url, json=input)
|
324
|
+
|
325
|
+
# Should be a 200 status code
|
326
|
+
if resp.status_code != 200:
|
327
|
+
raise ProtocolException(f"Status code {resp.status_code}")
|
328
|
+
|
329
|
+
def load_text(self, text, id=None, metadata=None, charset="utf-8"):
|
330
|
+
|
331
|
+
if id is None:
|
332
|
+
|
333
|
+
if metadata is not None:
|
334
|
+
|
335
|
+
# Situation makes no sense. What can the metadata possibly
|
336
|
+
# mean if the caller doesn't know the document ID.
|
337
|
+
# Metadata should relate to the document by ID
|
338
|
+
raise RuntimeError("Can't specify metadata without id")
|
339
|
+
|
340
|
+
id = hash(text)
|
341
|
+
|
342
|
+
triples = []
|
343
|
+
|
344
|
+
if metadata:
|
345
|
+
metadata.emit(
|
346
|
+
lambda t: triples.append({
|
347
|
+
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
|
348
|
+
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
|
349
|
+
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
|
350
|
+
})
|
351
|
+
)
|
352
|
+
|
353
|
+
input = {
|
354
|
+
"id": id,
|
355
|
+
"metadata": triples,
|
356
|
+
"charset": charset,
|
357
|
+
"text": base64.b64encode(text).decode("utf-8"),
|
358
|
+
}
|
359
|
+
|
360
|
+
url = f"{self.url}load/text"
|
361
|
+
|
362
|
+
# Invoke the API, input is passed as JSON
|
363
|
+
resp = requests.post(url, json=input)
|
364
|
+
|
365
|
+
# Should be a 200 status code
|
366
|
+
if resp.status_code != 200:
|
367
|
+
raise ProtocolException(f"Status code {resp.status_code}")
|
368
|
+
|
@@ -0,0 +1,120 @@
|
|
1
|
+
|
2
|
+
import os
|
3
|
+
import argparse
|
4
|
+
import pulsar
|
5
|
+
import _pulsar
|
6
|
+
import time
|
7
|
+
from prometheus_client import start_http_server, Info
|
8
|
+
|
9
|
+
from .. log_level import LogLevel
|
10
|
+
|
11
|
+
class BaseProcessor:
|
12
|
+
|
13
|
+
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
14
|
+
|
15
|
+
def __init__(self, **params):
|
16
|
+
|
17
|
+
self.client = None
|
18
|
+
|
19
|
+
if not hasattr(__class__, "params_metric"):
|
20
|
+
__class__.params_metric = Info(
|
21
|
+
'params', 'Parameters configuration'
|
22
|
+
)
|
23
|
+
|
24
|
+
# FIXME: Maybe outputs information it should not
|
25
|
+
__class__.params_metric.info({
|
26
|
+
k: str(params[k])
|
27
|
+
for k in params
|
28
|
+
})
|
29
|
+
|
30
|
+
pulsar_host = params.get("pulsar_host", self.default_pulsar_host)
|
31
|
+
log_level = params.get("log_level", LogLevel.INFO)
|
32
|
+
|
33
|
+
self.pulsar_host = pulsar_host
|
34
|
+
|
35
|
+
self.client = pulsar.Client(
|
36
|
+
pulsar_host,
|
37
|
+
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
38
|
+
)
|
39
|
+
|
40
|
+
def __del__(self):
|
41
|
+
|
42
|
+
if hasattr(self, "client"):
|
43
|
+
if self.client:
|
44
|
+
self.client.close()
|
45
|
+
|
46
|
+
@staticmethod
|
47
|
+
def add_args(parser):
|
48
|
+
|
49
|
+
parser.add_argument(
|
50
|
+
'-p', '--pulsar-host',
|
51
|
+
default=__class__.default_pulsar_host,
|
52
|
+
help=f'Pulsar host (default: {__class__.default_pulsar_host})',
|
53
|
+
)
|
54
|
+
|
55
|
+
parser.add_argument(
|
56
|
+
'-l', '--log-level',
|
57
|
+
type=LogLevel,
|
58
|
+
default=LogLevel.INFO,
|
59
|
+
choices=list(LogLevel),
|
60
|
+
help=f'Output queue (default: info)'
|
61
|
+
)
|
62
|
+
|
63
|
+
parser.add_argument(
|
64
|
+
'--metrics',
|
65
|
+
action=argparse.BooleanOptionalAction,
|
66
|
+
default=True,
|
67
|
+
help=f'Metrics enabled (default: true)',
|
68
|
+
)
|
69
|
+
|
70
|
+
parser.add_argument(
|
71
|
+
'-P', '--metrics-port',
|
72
|
+
type=int,
|
73
|
+
default=8000,
|
74
|
+
help=f'Pulsar host (default: 8000)',
|
75
|
+
)
|
76
|
+
|
77
|
+
def run(self):
|
78
|
+
raise RuntimeError("Something should have implemented the run method")
|
79
|
+
|
80
|
+
@classmethod
|
81
|
+
def start(cls, prog, doc):
|
82
|
+
|
83
|
+
parser = argparse.ArgumentParser(
|
84
|
+
prog=prog,
|
85
|
+
description=doc
|
86
|
+
)
|
87
|
+
|
88
|
+
cls.add_args(parser)
|
89
|
+
|
90
|
+
args = parser.parse_args()
|
91
|
+
args = vars(args)
|
92
|
+
|
93
|
+
print(args)
|
94
|
+
|
95
|
+
if args["metrics"]:
|
96
|
+
start_http_server(args["metrics_port"])
|
97
|
+
|
98
|
+
while True:
|
99
|
+
|
100
|
+
try:
|
101
|
+
|
102
|
+
p = cls(**args)
|
103
|
+
p.run()
|
104
|
+
|
105
|
+
except KeyboardInterrupt:
|
106
|
+
print("Keyboard interrupt.")
|
107
|
+
return
|
108
|
+
|
109
|
+
except _pulsar.Interrupted:
|
110
|
+
print("Pulsar Interrupted.")
|
111
|
+
return
|
112
|
+
|
113
|
+
except Exception as e:
|
114
|
+
|
115
|
+
print(type(e))
|
116
|
+
|
117
|
+
print("Exception:", e, flush=True)
|
118
|
+
print("Will retry...", flush=True)
|
119
|
+
|
120
|
+
time.sleep(4)
|
@@ -0,0 +1,107 @@
|
|
1
|
+
|
2
|
+
from pulsar.schema import JsonSchema
|
3
|
+
from prometheus_client import Histogram, Info, Counter, Enum
|
4
|
+
import time
|
5
|
+
|
6
|
+
from . base_processor import BaseProcessor
|
7
|
+
from .. exceptions import TooManyRequests
|
8
|
+
|
9
|
+
class Consumer(BaseProcessor):
|
10
|
+
|
11
|
+
def __init__(self, **params):
|
12
|
+
|
13
|
+
if not hasattr(__class__, "state_metric"):
|
14
|
+
__class__.state_metric = Enum(
|
15
|
+
'processor_state', 'Processor state',
|
16
|
+
states=['starting', 'running', 'stopped']
|
17
|
+
)
|
18
|
+
__class__.state_metric.state('starting')
|
19
|
+
|
20
|
+
__class__.state_metric.state('starting')
|
21
|
+
|
22
|
+
super(Consumer, self).__init__(**params)
|
23
|
+
|
24
|
+
input_queue = params.get("input_queue")
|
25
|
+
subscriber = params.get("subscriber")
|
26
|
+
input_schema = params.get("input_schema")
|
27
|
+
|
28
|
+
if input_schema == None:
|
29
|
+
raise RuntimeError("input_schema must be specified")
|
30
|
+
|
31
|
+
if not hasattr(__class__, "request_metric"):
|
32
|
+
__class__.request_metric = Histogram(
|
33
|
+
'request_latency', 'Request latency (seconds)'
|
34
|
+
)
|
35
|
+
|
36
|
+
if not hasattr(__class__, "pubsub_metric"):
|
37
|
+
__class__.pubsub_metric = Info(
|
38
|
+
'pubsub', 'Pub/sub configuration'
|
39
|
+
)
|
40
|
+
|
41
|
+
if not hasattr(__class__, "processing_metric"):
|
42
|
+
__class__.processing_metric = Counter(
|
43
|
+
'processing_count', 'Processing count', ["status"]
|
44
|
+
)
|
45
|
+
|
46
|
+
__class__.pubsub_metric.info({
|
47
|
+
"input_queue": input_queue,
|
48
|
+
"subscriber": subscriber,
|
49
|
+
"input_schema": input_schema.__name__,
|
50
|
+
})
|
51
|
+
|
52
|
+
self.consumer = self.client.subscribe(
|
53
|
+
input_queue, subscriber,
|
54
|
+
schema=JsonSchema(input_schema),
|
55
|
+
)
|
56
|
+
|
57
|
+
def run(self):
|
58
|
+
|
59
|
+
__class__.state_metric.state('running')
|
60
|
+
|
61
|
+
while True:
|
62
|
+
|
63
|
+
msg = self.consumer.receive()
|
64
|
+
|
65
|
+
try:
|
66
|
+
|
67
|
+
with __class__.request_metric.time():
|
68
|
+
self.handle(msg)
|
69
|
+
|
70
|
+
# Acknowledge successful processing of the message
|
71
|
+
self.consumer.acknowledge(msg)
|
72
|
+
|
73
|
+
__class__.processing_metric.labels(status="success").inc()
|
74
|
+
|
75
|
+
except TooManyRequests:
|
76
|
+
self.consumer.negative_acknowledge(msg)
|
77
|
+
print("TooManyRequests: will retry")
|
78
|
+
__class__.processing_metric.labels(status="rate-limit").inc()
|
79
|
+
time.sleep(5)
|
80
|
+
continue
|
81
|
+
|
82
|
+
except Exception as e:
|
83
|
+
|
84
|
+
print("Exception:", e, flush=True)
|
85
|
+
|
86
|
+
# Message failed to be processed
|
87
|
+
self.consumer.negative_acknowledge(msg)
|
88
|
+
|
89
|
+
__class__.processing_metric.labels(status="error").inc()
|
90
|
+
|
91
|
+
@staticmethod
|
92
|
+
def add_args(parser, default_input_queue, default_subscriber):
|
93
|
+
|
94
|
+
BaseProcessor.add_args(parser)
|
95
|
+
|
96
|
+
parser.add_argument(
|
97
|
+
'-i', '--input-queue',
|
98
|
+
default=default_input_queue,
|
99
|
+
help=f'Input queue (default: {default_input_queue})'
|
100
|
+
)
|
101
|
+
|
102
|
+
parser.add_argument(
|
103
|
+
'-s', '--subscriber',
|
104
|
+
default=default_subscriber,
|
105
|
+
help=f'Queue subscriber name (default: {default_subscriber})'
|
106
|
+
)
|
107
|
+
|