trustgraph 0.4.2__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of trustgraph might be problematic. Click here for more details.
- {trustgraph-0.4.2 → trustgraph-0.5.2}/PKG-INFO +11 -8
- {trustgraph-0.4.2 → trustgraph-0.5.2}/README.md +8 -6
- trustgraph-0.5.2/scripts/chunker-recursive +6 -0
- trustgraph-0.5.2/scripts/dump-parquet +12 -0
- trustgraph-0.5.2/scripts/ge-write-milvus +6 -0
- trustgraph-0.5.2/scripts/graph-rag +6 -0
- trustgraph-0.5.2/scripts/pdf-decoder +6 -0
- trustgraph-0.5.2/scripts/text-completion-azure +6 -0
- trustgraph-0.5.2/scripts/text-completion-claude +6 -0
- trustgraph-0.5.2/scripts/text-completion-ollama +6 -0
- trustgraph-0.5.2/scripts/text-completion-vertexai +6 -0
- trustgraph-0.5.2/scripts/triples-dump-parquet +6 -0
- trustgraph-0.5.2/scripts/triples-write-cassandra +6 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/setup.py +10 -7
- trustgraph-0.5.2/trustgraph/base/__init__.py +6 -0
- trustgraph-0.5.2/trustgraph/base/base_processor.py +117 -0
- trustgraph-0.5.2/trustgraph/base/consumer.py +87 -0
- trustgraph-0.5.2/trustgraph/base/consumer_producer.py +168 -0
- trustgraph-0.5.2/trustgraph/base/producer.py +55 -0
- {trustgraph-0.4.2/trustgraph/chunker → trustgraph-0.5.2/trustgraph/chunking}/recursive/chunker.py +7 -4
- {trustgraph-0.4.2/trustgraph/decoder → trustgraph-0.5.2/trustgraph/decoding}/pdf/pdf_decoder.py +7 -4
- trustgraph-0.5.2/trustgraph/dump/triples/parquet/processor.py +87 -0
- trustgraph-0.5.2/trustgraph/dump/triples/parquet/writer.py +96 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/embeddings/hf/hf.py +7 -4
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/embeddings/ollama/processor.py +7 -4
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/embeddings/vectorize/vectorize.py +14 -8
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/embeddings_client.py +16 -8
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/graph_rag.py +9 -2
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/graph_rag_client.py +16 -6
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/kg/extract_definitions/extract.py +14 -8
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/kg/extract_relationships/extract.py +18 -12
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/llm_client.py +17 -8
- {trustgraph-0.4.2/trustgraph/llm/azure_text → trustgraph-0.5.2/trustgraph/model/text_completion/azure}/llm.py +11 -7
- {trustgraph-0.4.2/trustgraph/llm/claude_text → trustgraph-0.5.2/trustgraph/model/text_completion/claude}/llm.py +11 -7
- {trustgraph-0.4.2/trustgraph/llm/ollama_text → trustgraph-0.5.2/trustgraph/model/text_completion/ollama}/llm.py +11 -7
- {trustgraph-0.4.2/trustgraph/llm/vertexai_text → trustgraph-0.5.2/trustgraph/model/text_completion/vertexai}/llm.py +11 -7
- trustgraph-0.5.2/trustgraph/retrieval/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/rag/graph → trustgraph-0.5.2/trustgraph/retrieval/graph_rag}/rag.py +7 -4
- trustgraph-0.5.2/trustgraph/schema.py +125 -0
- trustgraph-0.5.2/trustgraph/storage/__init__.py +0 -0
- trustgraph-0.5.2/trustgraph/storage/graph_embeddings/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/vector/milvus_write → trustgraph-0.5.2/trustgraph/storage/graph_embeddings/milvus}/write.py +11 -9
- trustgraph-0.5.2/trustgraph/storage/triples/__init__.py +0 -0
- trustgraph-0.5.2/trustgraph/storage/triples/cassandra/__main__.py +7 -0
- {trustgraph-0.4.2/trustgraph/graph/cassandra_write → trustgraph-0.5.2/trustgraph/storage/triples/cassandra}/write.py +10 -7
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph.egg-info/PKG-INFO +11 -8
- trustgraph-0.5.2/trustgraph.egg-info/SOURCES.txt +107 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph.egg-info/requires.txt +1 -0
- trustgraph-0.4.2/scripts/chunker-recursive +0 -6
- trustgraph-0.4.2/scripts/graph-rag +0 -6
- trustgraph-0.4.2/scripts/graph-write-cassandra +0 -6
- trustgraph-0.4.2/scripts/llm-azure-text +0 -6
- trustgraph-0.4.2/scripts/llm-claude-text +0 -6
- trustgraph-0.4.2/scripts/llm-ollama-text +0 -6
- trustgraph-0.4.2/scripts/llm-vertexai-text +0 -6
- trustgraph-0.4.2/scripts/pdf-decoder +0 -6
- trustgraph-0.4.2/scripts/vector-write-milvus +0 -6
- trustgraph-0.4.2/trustgraph/base/processor.py +0 -360
- trustgraph-0.4.2/trustgraph/schema.py +0 -67
- trustgraph-0.4.2/trustgraph.egg-info/SOURCES.txt +0 -94
- {trustgraph-0.4.2 → trustgraph-0.5.2}/LICENSE +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/scripts/embeddings-hf +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/scripts/embeddings-ollama +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/scripts/embeddings-vectorize +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/scripts/graph-show +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/scripts/graph-to-turtle +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/scripts/init-pulsar-manager +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/scripts/kg-extract-definitions +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/scripts/kg-extract-relationships +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/scripts/loader +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/scripts/query +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/scripts/run-processing +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/setup.cfg +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/chunker → trustgraph-0.5.2/trustgraph/chunking}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/chunker → trustgraph-0.5.2/trustgraph/chunking}/recursive/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/chunker → trustgraph-0.5.2/trustgraph/chunking}/recursive/__main__.py +0 -0
- {trustgraph-0.4.2/trustgraph/decoder → trustgraph-0.5.2/trustgraph/decoding}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/decoder → trustgraph-0.5.2/trustgraph/decoding}/pdf/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/decoder → trustgraph-0.5.2/trustgraph/decoding}/pdf/__main__.py +0 -0
- {trustgraph-0.4.2/trustgraph/embeddings → trustgraph-0.5.2/trustgraph/dump}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/graph → trustgraph-0.5.2/trustgraph/dump/triples}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/base → trustgraph-0.5.2/trustgraph/dump/triples/parquet}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/graph/cassandra_write → trustgraph-0.5.2/trustgraph/dump/triples/parquet}/__main__.py +0 -0
- {trustgraph-0.4.2/trustgraph/kg → trustgraph-0.5.2/trustgraph/embeddings}/__init__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/embeddings/hf/__init__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/embeddings/hf/__main__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/embeddings/ollama/__init__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/embeddings/ollama/__main__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/embeddings/vectorize/__init__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/embeddings/vectorize/__main__.py +0 -0
- {trustgraph-0.4.2/trustgraph/llm → trustgraph-0.5.2/trustgraph/kg}/__init__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/kg/extract_definitions/__init__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/kg/extract_definitions/__main__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/kg/extract_relationships/__init__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/kg/extract_relationships/__main__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/log_level.py +0 -0
- {trustgraph-0.4.2/trustgraph/rag → trustgraph-0.5.2/trustgraph/model}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/vector → trustgraph-0.5.2/trustgraph/model/text_completion}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/llm/azure_text → trustgraph-0.5.2/trustgraph/model/text_completion/azure}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/llm/azure_text → trustgraph-0.5.2/trustgraph/model/text_completion/azure}/__main__.py +0 -0
- {trustgraph-0.4.2/trustgraph/llm/claude_text → trustgraph-0.5.2/trustgraph/model/text_completion/claude}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/llm/claude_text → trustgraph-0.5.2/trustgraph/model/text_completion/claude}/__main__.py +0 -0
- {trustgraph-0.4.2/trustgraph/llm/ollama_text → trustgraph-0.5.2/trustgraph/model/text_completion/ollama}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/llm/ollama_text → trustgraph-0.5.2/trustgraph/model/text_completion/ollama}/__main__.py +0 -0
- {trustgraph-0.4.2/trustgraph/llm/vertexai_text → trustgraph-0.5.2/trustgraph/model/text_completion/vertexai}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/llm/vertexai_text → trustgraph-0.5.2/trustgraph/model/text_completion/vertexai}/__main__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/processing/__init__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/processing/__main__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/processing/processing.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/prompts.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/rdf.py +0 -0
- {trustgraph-0.4.2/trustgraph/rag/graph → trustgraph-0.5.2/trustgraph/retrieval/graph_rag}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/rag/graph → trustgraph-0.5.2/trustgraph/retrieval/graph_rag}/__main__.py +0 -0
- {trustgraph-0.4.2/trustgraph/graph/cassandra_write → trustgraph-0.5.2/trustgraph/storage/graph_embeddings/milvus}/__init__.py +0 -0
- {trustgraph-0.4.2/trustgraph/vector/milvus_write → trustgraph-0.5.2/trustgraph/storage/graph_embeddings/milvus}/__main__.py +0 -0
- {trustgraph-0.4.2/trustgraph/vector/milvus_write → trustgraph-0.5.2/trustgraph/storage/triples/cassandra}/__init__.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/triple_vectors.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph/trustgraph.py +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph.egg-info/dependency_links.txt +0 -0
- {trustgraph-0.4.2 → trustgraph-0.5.2}/trustgraph.egg-info/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: trustgraph
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
|
|
5
5
|
Home-page: https://github.com/trustgraph-ai/trustgraph
|
|
6
|
-
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.
|
|
6
|
+
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.5.2.tar.gz
|
|
7
7
|
Author: trustgraph.ai
|
|
8
8
|
Author-email: security@trustgraph.ai
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -32,6 +32,7 @@ Requires-Dist: anthropic
|
|
|
32
32
|
Requires-Dist: google-cloud-aiplatform
|
|
33
33
|
Requires-Dist: pyyaml
|
|
34
34
|
Requires-Dist: prometheus-client
|
|
35
|
+
Requires-Dist: pyarrow
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
# TrustGraph
|
|
@@ -95,11 +96,13 @@ package installed can also run the entire architecture.
|
|
|
95
96
|
chunking algorithm to produce smaller text chunks.
|
|
96
97
|
- `embeddings-hf` - A service which analyses text and returns a vector
|
|
97
98
|
embedding using one of the HuggingFace embeddings models.
|
|
99
|
+
- `embeddings-ollama` - A service which analyses text and returns a vector
|
|
100
|
+
embedding using an Ollama embeddings model.
|
|
98
101
|
- `embeddings-vectorize` - Uses an embeddings service to get a vector
|
|
99
102
|
embedding which is added to the processor payload.
|
|
100
103
|
- `graph-rag` - A query service which applies a Graph RAG algorithm to
|
|
101
104
|
provide a response to a text prompt.
|
|
102
|
-
- `
|
|
105
|
+
- `triples-write-cassandra` - Takes knowledge graph edges and writes them to
|
|
103
106
|
a Cassandra store.
|
|
104
107
|
- `kg-extract-definitions` - knowledge extractor - examines text and
|
|
105
108
|
produces graph edges.
|
|
@@ -115,15 +118,15 @@ package installed can also run the entire architecture.
|
|
|
115
118
|
format. For instance, the wrapping of text between lines in a PDF document
|
|
116
119
|
is not semantically encoded, so the decoder will see wrapped lines as
|
|
117
120
|
space-separated.
|
|
118
|
-
- `
|
|
121
|
+
- `ge-write-milvus` - Takes graph embeddings mappings and records them
|
|
119
122
|
in the vector embeddings store.
|
|
120
123
|
|
|
121
124
|
## LM Specific Modules
|
|
122
125
|
|
|
123
|
-
- `
|
|
124
|
-
- `
|
|
125
|
-
- `
|
|
126
|
-
- `
|
|
126
|
+
- `text-completion-azure` - Sends request to AzureAI serverless endpoint
|
|
127
|
+
- `text-completion-claude` - Sends request to Anthropic's API
|
|
128
|
+
- `text-completion-ollama` - Sends request to LM running using Ollama
|
|
129
|
+
- `text-completion-vertexai` - Sends request to model available through VertexAI API
|
|
127
130
|
|
|
128
131
|
## Quickstart Guide
|
|
129
132
|
|
|
@@ -60,11 +60,13 @@ package installed can also run the entire architecture.
|
|
|
60
60
|
chunking algorithm to produce smaller text chunks.
|
|
61
61
|
- `embeddings-hf` - A service which analyses text and returns a vector
|
|
62
62
|
embedding using one of the HuggingFace embeddings models.
|
|
63
|
+
- `embeddings-ollama` - A service which analyses text and returns a vector
|
|
64
|
+
embedding using an Ollama embeddings model.
|
|
63
65
|
- `embeddings-vectorize` - Uses an embeddings service to get a vector
|
|
64
66
|
embedding which is added to the processor payload.
|
|
65
67
|
- `graph-rag` - A query service which applies a Graph RAG algorithm to
|
|
66
68
|
provide a response to a text prompt.
|
|
67
|
-
- `
|
|
69
|
+
- `triples-write-cassandra` - Takes knowledge graph edges and writes them to
|
|
68
70
|
a Cassandra store.
|
|
69
71
|
- `kg-extract-definitions` - knowledge extractor - examines text and
|
|
70
72
|
produces graph edges.
|
|
@@ -80,15 +82,15 @@ package installed can also run the entire architecture.
|
|
|
80
82
|
format. For instance, the wrapping of text between lines in a PDF document
|
|
81
83
|
is not semantically encoded, so the decoder will see wrapped lines as
|
|
82
84
|
space-separated.
|
|
83
|
-
- `
|
|
85
|
+
- `ge-write-milvus` - Takes graph embeddings mappings and records them
|
|
84
86
|
in the vector embeddings store.
|
|
85
87
|
|
|
86
88
|
## LM Specific Modules
|
|
87
89
|
|
|
88
|
-
- `
|
|
89
|
-
- `
|
|
90
|
-
- `
|
|
91
|
-
- `
|
|
90
|
+
- `text-completion-azure` - Sends request to AzureAI serverless endpoint
|
|
91
|
+
- `text-completion-claude` - Sends request to Anthropic's API
|
|
92
|
+
- `text-completion-ollama` - Sends request to LM running using Ollama
|
|
93
|
+
- `text-completion-vertexai` - Sends request to model available through VertexAI API
|
|
92
94
|
|
|
93
95
|
## Quickstart Guide
|
|
94
96
|
|
|
@@ -4,7 +4,7 @@ import os
|
|
|
4
4
|
with open("README.md", "r") as fh:
|
|
5
5
|
long_description = fh.read()
|
|
6
6
|
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.5.2"
|
|
8
8
|
|
|
9
9
|
setuptools.setup(
|
|
10
10
|
name="trustgraph",
|
|
@@ -44,27 +44,30 @@ setuptools.setup(
|
|
|
44
44
|
"google-cloud-aiplatform",
|
|
45
45
|
"pyyaml",
|
|
46
46
|
"prometheus-client",
|
|
47
|
+
"pyarrow",
|
|
47
48
|
],
|
|
48
49
|
scripts=[
|
|
49
50
|
"scripts/chunker-recursive",
|
|
50
51
|
"scripts/embeddings-hf",
|
|
51
52
|
"scripts/embeddings-ollama",
|
|
52
53
|
"scripts/embeddings-vectorize",
|
|
54
|
+
"scripts/ge-write-milvus",
|
|
53
55
|
"scripts/graph-rag",
|
|
54
56
|
"scripts/graph-show",
|
|
55
57
|
"scripts/graph-to-turtle",
|
|
56
|
-
"scripts/graph-write-cassandra",
|
|
57
58
|
"scripts/init-pulsar-manager",
|
|
58
59
|
"scripts/kg-extract-definitions",
|
|
59
60
|
"scripts/kg-extract-relationships",
|
|
60
|
-
"scripts/llm-azure-text",
|
|
61
|
-
"scripts/llm-claude-text",
|
|
62
|
-
"scripts/llm-ollama-text",
|
|
63
|
-
"scripts/llm-vertexai-text",
|
|
64
61
|
"scripts/loader",
|
|
65
62
|
"scripts/pdf-decoder",
|
|
66
63
|
"scripts/query",
|
|
67
64
|
"scripts/run-processing",
|
|
68
|
-
"scripts/
|
|
65
|
+
"scripts/text-completion-azure",
|
|
66
|
+
"scripts/text-completion-claude",
|
|
67
|
+
"scripts/text-completion-ollama",
|
|
68
|
+
"scripts/text-completion-vertexai",
|
|
69
|
+
"scripts/triples-write-cassandra",
|
|
70
|
+
"scripts/dump-parquet",
|
|
71
|
+
"scripts/triples-dump-parquet",
|
|
69
72
|
]
|
|
70
73
|
)
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
import argparse
|
|
4
|
+
import pulsar
|
|
5
|
+
import _pulsar
|
|
6
|
+
import time
|
|
7
|
+
from prometheus_client import start_http_server, Info
|
|
8
|
+
|
|
9
|
+
from .. log_level import LogLevel
|
|
10
|
+
|
|
11
|
+
class BaseProcessor:
|
|
12
|
+
|
|
13
|
+
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
|
14
|
+
|
|
15
|
+
def __init__(self, **params):
|
|
16
|
+
|
|
17
|
+
self.client = None
|
|
18
|
+
|
|
19
|
+
if not hasattr(__class__, "params_metric"):
|
|
20
|
+
__class__.params_metric = Info(
|
|
21
|
+
'params', 'Parameters configuration'
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# FIXME: Maybe outputs information it should not
|
|
25
|
+
__class__.params_metric.info({
|
|
26
|
+
k: str(params[k])
|
|
27
|
+
for k in params
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
pulsar_host = params.get("pulsar_host", self.default_pulsar_host)
|
|
31
|
+
log_level = params.get("log_level", LogLevel.INFO)
|
|
32
|
+
|
|
33
|
+
self.pulsar_host = pulsar_host
|
|
34
|
+
|
|
35
|
+
self.client = pulsar.Client(
|
|
36
|
+
pulsar_host,
|
|
37
|
+
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def __del__(self):
|
|
41
|
+
|
|
42
|
+
if self.client:
|
|
43
|
+
self.client.close()
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def add_args(parser):
|
|
47
|
+
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
'-p', '--pulsar-host',
|
|
50
|
+
default=__class__.default_pulsar_host,
|
|
51
|
+
help=f'Pulsar host (default: {__class__.default_pulsar_host})',
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
parser.add_argument(
|
|
55
|
+
'-l', '--log-level',
|
|
56
|
+
type=LogLevel,
|
|
57
|
+
default=LogLevel.INFO,
|
|
58
|
+
choices=list(LogLevel),
|
|
59
|
+
help=f'Output queue (default: info)'
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
'-M', '--metrics-enabled',
|
|
64
|
+
type=bool,
|
|
65
|
+
default=True,
|
|
66
|
+
help=f'Pulsar host (default: true)',
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
parser.add_argument(
|
|
70
|
+
'-P', '--metrics-port',
|
|
71
|
+
type=int,
|
|
72
|
+
default=8000,
|
|
73
|
+
help=f'Pulsar host (default: 8000)',
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def run(self):
|
|
77
|
+
raise RuntimeError("Something should have implemented the run method")
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
def start(cls, prog, doc):
|
|
81
|
+
|
|
82
|
+
while True:
|
|
83
|
+
|
|
84
|
+
parser = argparse.ArgumentParser(
|
|
85
|
+
prog=prog,
|
|
86
|
+
description=doc
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
cls.add_args(parser)
|
|
90
|
+
|
|
91
|
+
args = parser.parse_args()
|
|
92
|
+
args = vars(args)
|
|
93
|
+
|
|
94
|
+
if args["metrics_enabled"]:
|
|
95
|
+
start_http_server(args["metrics_port"])
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
|
|
99
|
+
p = cls(**args)
|
|
100
|
+
p.run()
|
|
101
|
+
|
|
102
|
+
except KeyboardInterrupt:
|
|
103
|
+
print("Keyboard interrupt.")
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
except _pulsar.Interrupted:
|
|
107
|
+
print("Pulsar Interrupted.")
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
except Exception as e:
|
|
111
|
+
|
|
112
|
+
print(type(e))
|
|
113
|
+
|
|
114
|
+
print("Exception:", e, flush=True)
|
|
115
|
+
print("Will retry...", flush=True)
|
|
116
|
+
|
|
117
|
+
time.sleep(10)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
|
|
2
|
+
from pulsar.schema import JsonSchema
|
|
3
|
+
from prometheus_client import start_http_server, Histogram, Info, Counter
|
|
4
|
+
|
|
5
|
+
from . base_processor import BaseProcessor
|
|
6
|
+
|
|
7
|
+
class Consumer(BaseProcessor):
|
|
8
|
+
|
|
9
|
+
def __init__(self, **params):
|
|
10
|
+
|
|
11
|
+
super(Consumer, self).__init__(**params)
|
|
12
|
+
|
|
13
|
+
input_queue = params.get("input_queue")
|
|
14
|
+
subscriber = params.get("subscriber")
|
|
15
|
+
input_schema = params.get("input_schema")
|
|
16
|
+
|
|
17
|
+
if input_schema == None:
|
|
18
|
+
raise RuntimeError("input_schema must be specified")
|
|
19
|
+
|
|
20
|
+
if not hasattr(__class__, "request_metric"):
|
|
21
|
+
__class__.request_metric = Histogram(
|
|
22
|
+
'request_latency', 'Request latency (seconds)'
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
if not hasattr(__class__, "pubsub_metric"):
|
|
26
|
+
__class__.pubsub_metric = Info(
|
|
27
|
+
'pubsub', 'Pub/sub configuration'
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
if not hasattr(__class__, "processing_metric"):
|
|
31
|
+
__class__.processing_metric = Counter(
|
|
32
|
+
'processing_count', 'Processing count', ["status"]
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
__class__.pubsub_metric.info({
|
|
36
|
+
"input_queue": input_queue,
|
|
37
|
+
"subscriber": subscriber,
|
|
38
|
+
"input_schema": input_schema.__name__,
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
self.consumer = self.client.subscribe(
|
|
42
|
+
input_queue, subscriber,
|
|
43
|
+
schema=JsonSchema(input_schema),
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
def run(self):
|
|
47
|
+
|
|
48
|
+
while True:
|
|
49
|
+
|
|
50
|
+
msg = self.consumer.receive()
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
|
|
54
|
+
with __class__.request_metric.time():
|
|
55
|
+
self.handle(msg)
|
|
56
|
+
|
|
57
|
+
# Acknowledge successful processing of the message
|
|
58
|
+
self.consumer.acknowledge(msg)
|
|
59
|
+
|
|
60
|
+
__class__.processing_metric.labels(status="success").inc()
|
|
61
|
+
|
|
62
|
+
except Exception as e:
|
|
63
|
+
|
|
64
|
+
print("Exception:", e, flush=True)
|
|
65
|
+
|
|
66
|
+
# Message failed to be processed
|
|
67
|
+
self.consumer.negative_acknowledge(msg)
|
|
68
|
+
|
|
69
|
+
__class__.processing_metric.labels(status="error").inc()
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def add_args(parser, default_input_queue, default_subscriber):
|
|
73
|
+
|
|
74
|
+
BaseProcessor.add_args(parser)
|
|
75
|
+
|
|
76
|
+
parser.add_argument(
|
|
77
|
+
'-i', '--input-queue',
|
|
78
|
+
default=default_input_queue,
|
|
79
|
+
help=f'Input queue (default: {default_input_queue})'
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
parser.add_argument(
|
|
83
|
+
'-s', '--subscriber',
|
|
84
|
+
default=default_subscriber,
|
|
85
|
+
help=f'Queue subscriber name (default: {default_subscriber})'
|
|
86
|
+
)
|
|
87
|
+
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
|
|
2
|
+
from pulsar.schema import JsonSchema
|
|
3
|
+
from prometheus_client import Histogram, Info, Counter
|
|
4
|
+
|
|
5
|
+
from . base_processor import BaseProcessor
|
|
6
|
+
|
|
7
|
+
# FIXME: Derive from consumer? And producer?
|
|
8
|
+
|
|
9
|
+
class ConsumerProducer(BaseProcessor):
|
|
10
|
+
|
|
11
|
+
def __init__(self, **params):
|
|
12
|
+
|
|
13
|
+
input_queue = params.get("input_queue")
|
|
14
|
+
output_queue = params.get("output_queue")
|
|
15
|
+
subscriber = params.get("subscriber")
|
|
16
|
+
input_schema = params.get("input_schema")
|
|
17
|
+
output_schema = params.get("output_schema")
|
|
18
|
+
|
|
19
|
+
if not hasattr(__class__, "request_metric"):
|
|
20
|
+
__class__.request_metric = Histogram(
|
|
21
|
+
'request_latency', 'Request latency (seconds)'
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
if not hasattr(__class__, "output_metric"):
|
|
25
|
+
__class__.output_metric = Counter(
|
|
26
|
+
'output_count', 'Output items created'
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if not hasattr(__class__, "pubsub_metric"):
|
|
30
|
+
__class__.pubsub_metric = Info(
|
|
31
|
+
'pubsub', 'Pub/sub configuration'
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if not hasattr(__class__, "processing_metric"):
|
|
35
|
+
__class__.processing_metric = Counter(
|
|
36
|
+
'processing_count', 'Processing count', ["status"]
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
__class__.pubsub_metric.info({
|
|
40
|
+
"input_queue": input_queue,
|
|
41
|
+
"output_queue": output_queue,
|
|
42
|
+
"subscriber": subscriber,
|
|
43
|
+
"input_schema": input_schema.__name__,
|
|
44
|
+
"output_schema": output_schema.__name__,
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
super(ConsumerProducer, self).__init__(**params)
|
|
48
|
+
|
|
49
|
+
if input_schema == None:
|
|
50
|
+
raise RuntimeError("input_schema must be specified")
|
|
51
|
+
|
|
52
|
+
if output_schema == None:
|
|
53
|
+
raise RuntimeError("output_schema must be specified")
|
|
54
|
+
|
|
55
|
+
self.consumer = self.client.subscribe(
|
|
56
|
+
input_queue, subscriber,
|
|
57
|
+
schema=JsonSchema(input_schema),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
self.producer = self.client.create_producer(
|
|
61
|
+
topic=output_queue,
|
|
62
|
+
schema=JsonSchema(output_schema),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
def run(self):
|
|
66
|
+
|
|
67
|
+
while True:
|
|
68
|
+
|
|
69
|
+
msg = self.consumer.receive()
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
|
|
73
|
+
with __class__.request_metric.time():
|
|
74
|
+
resp = self.handle(msg)
|
|
75
|
+
|
|
76
|
+
# Acknowledge successful processing of the message
|
|
77
|
+
self.consumer.acknowledge(msg)
|
|
78
|
+
|
|
79
|
+
__class__.processing_metric.labels(status="success").inc()
|
|
80
|
+
|
|
81
|
+
except Exception as e:
|
|
82
|
+
|
|
83
|
+
print("Exception:", e, flush=True)
|
|
84
|
+
|
|
85
|
+
# Message failed to be processed
|
|
86
|
+
self.consumer.negative_acknowledge(msg)
|
|
87
|
+
|
|
88
|
+
__class__.processing_metric.labels(status="error").inc()
|
|
89
|
+
|
|
90
|
+
def send(self, msg, properties={}):
|
|
91
|
+
self.producer.send(msg, properties)
|
|
92
|
+
__class__.output_metric.inc()
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def add_args(
|
|
96
|
+
parser, default_input_queue, default_subscriber,
|
|
97
|
+
default_output_queue,
|
|
98
|
+
):
|
|
99
|
+
|
|
100
|
+
BaseProcessor.add_args(parser)
|
|
101
|
+
|
|
102
|
+
parser.add_argument(
|
|
103
|
+
'-i', '--input-queue',
|
|
104
|
+
default=default_input_queue,
|
|
105
|
+
help=f'Input queue (default: {default_input_queue})'
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
parser.add_argument(
|
|
109
|
+
'-s', '--subscriber',
|
|
110
|
+
default=default_subscriber,
|
|
111
|
+
help=f'Queue subscriber name (default: {default_subscriber})'
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
parser.add_argument(
|
|
115
|
+
'-o', '--output-queue',
|
|
116
|
+
default=default_output_queue,
|
|
117
|
+
help=f'Output queue (default: {default_output_queue})'
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
class Producer(BaseProcessor):
|
|
121
|
+
|
|
122
|
+
def __init__(self, **params):
|
|
123
|
+
|
|
124
|
+
output_queue = params.get("output_queue")
|
|
125
|
+
output_schema = params.get("output_schema")
|
|
126
|
+
|
|
127
|
+
if not hasattr(__class__, "output_metric"):
|
|
128
|
+
__class__.output_metric = Counter(
|
|
129
|
+
'output_count', 'Output items created'
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
if not hasattr(__class__, "pubsub_metric"):
|
|
133
|
+
__class__.pubsub_metric = Info(
|
|
134
|
+
'pubsub', 'Pub/sub configuration'
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
__class__.pubsub_metric.info({
|
|
138
|
+
"output_queue": output_queue,
|
|
139
|
+
"output_schema": output_schema.__name__,
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
super(Producer, self).__init__(**params)
|
|
143
|
+
|
|
144
|
+
if output_schema == None:
|
|
145
|
+
raise RuntimeError("output_schema must be specified")
|
|
146
|
+
|
|
147
|
+
self.producer = self.client.create_producer(
|
|
148
|
+
topic=output_queue,
|
|
149
|
+
schema=JsonSchema(output_schema),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
def send(self, msg, properties={}):
|
|
153
|
+
self.producer.send(msg, properties)
|
|
154
|
+
__class__.output_metric.inc()
|
|
155
|
+
|
|
156
|
+
@staticmethod
|
|
157
|
+
def add_args(
|
|
158
|
+
parser, default_input_queue, default_subscriber,
|
|
159
|
+
default_output_queue,
|
|
160
|
+
):
|
|
161
|
+
|
|
162
|
+
BaseProcessor.add_args(parser)
|
|
163
|
+
|
|
164
|
+
parser.add_argument(
|
|
165
|
+
'-o', '--output-queue',
|
|
166
|
+
default=default_output_queue,
|
|
167
|
+
help=f'Output queue (default: {default_output_queue})'
|
|
168
|
+
)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
|
|
2
|
+
from pulsar.schema import JsonSchema
|
|
3
|
+
from prometheus_client import Info, Counter
|
|
4
|
+
|
|
5
|
+
from . base_processor import BaseProcessor
|
|
6
|
+
|
|
7
|
+
class Producer(BaseProcessor):
|
|
8
|
+
|
|
9
|
+
def __init__(self, **params):
|
|
10
|
+
|
|
11
|
+
output_queue = params.get("output_queue")
|
|
12
|
+
output_schema = params.get("output_schema")
|
|
13
|
+
|
|
14
|
+
if not hasattr(__class__, "output_metric"):
|
|
15
|
+
__class__.output_metric = Counter(
|
|
16
|
+
'output_count', 'Output items created'
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
if not hasattr(__class__, "pubsub_metric"):
|
|
20
|
+
__class__.pubsub_metric = Info(
|
|
21
|
+
'pubsub', 'Pub/sub configuration'
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__class__.pubsub_metric.info({
|
|
25
|
+
"output_queue": output_queue,
|
|
26
|
+
"output_schema": output_schema.__name__,
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
super(Producer, self).__init__(**params)
|
|
30
|
+
|
|
31
|
+
if output_schema == None:
|
|
32
|
+
raise RuntimeError("output_schema must be specified")
|
|
33
|
+
|
|
34
|
+
self.producer = self.client.create_producer(
|
|
35
|
+
topic=output_queue,
|
|
36
|
+
schema=JsonSchema(output_schema),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def send(self, msg, properties={}):
|
|
40
|
+
self.producer.send(msg, properties)
|
|
41
|
+
__class__.output_metric.inc()
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def add_args(
|
|
45
|
+
parser, default_input_queue, default_subscriber,
|
|
46
|
+
default_output_queue,
|
|
47
|
+
):
|
|
48
|
+
|
|
49
|
+
BaseProcessor.add_args(parser)
|
|
50
|
+
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
'-o', '--output-queue',
|
|
53
|
+
default=default_output_queue,
|
|
54
|
+
help=f'Output queue (default: {default_output_queue})'
|
|
55
|
+
)
|