trustgraph 0.3.1__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of trustgraph might be problematic. Click here for more details.
- {trustgraph-0.3.1 → trustgraph-0.4.2}/PKG-INFO +3 -2
- {trustgraph-0.3.1 → trustgraph-0.4.2}/setup.py +2 -1
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/base/processor.py +140 -48
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/chunker/recursive/chunker.py +15 -18
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/decoder/pdf/pdf_decoder.py +12 -15
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/embeddings/hf/hf.py +13 -16
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/embeddings/ollama/processor.py +12 -17
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/embeddings/vectorize/vectorize.py +13 -16
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/graph/cassandra_write/write.py +10 -20
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/graph_rag.py +3 -3
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/kg/extract_definitions/extract.py +13 -16
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/kg/extract_relationships/extract.py +25 -17
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/azure_text/llm.py +15 -17
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/claude_text/llm.py +17 -19
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/ollama_text/llm.py +27 -17
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/vertexai_text/llm.py +15 -18
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/rag/graph/rag.py +24 -25
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/vector/milvus_write/write.py +12 -13
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph.egg-info/PKG-INFO +3 -2
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph.egg-info/requires.txt +1 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/LICENSE +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/README.md +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/chunker-recursive +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/embeddings-hf +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/embeddings-ollama +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/embeddings-vectorize +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/graph-rag +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/graph-show +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/graph-to-turtle +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/graph-write-cassandra +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/init-pulsar-manager +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/kg-extract-definitions +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/kg-extract-relationships +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/llm-azure-text +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/llm-claude-text +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/llm-ollama-text +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/llm-vertexai-text +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/loader +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/pdf-decoder +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/query +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/run-processing +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/scripts/vector-write-milvus +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/setup.cfg +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/base/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/chunker/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/chunker/recursive/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/chunker/recursive/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/decoder/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/decoder/pdf/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/decoder/pdf/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/embeddings/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/embeddings/hf/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/embeddings/hf/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/embeddings/ollama/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/embeddings/ollama/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/embeddings/vectorize/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/embeddings/vectorize/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/embeddings_client.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/graph/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/graph/cassandra_write/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/graph/cassandra_write/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/graph_rag_client.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/kg/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/kg/extract_definitions/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/kg/extract_definitions/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/kg/extract_relationships/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/kg/extract_relationships/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/azure_text/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/azure_text/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/claude_text/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/claude_text/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/ollama_text/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/ollama_text/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/vertexai_text/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm/vertexai_text/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/llm_client.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/log_level.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/processing/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/processing/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/processing/processing.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/prompts.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/rag/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/rag/graph/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/rag/graph/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/rdf.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/schema.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/triple_vectors.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/trustgraph.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/vector/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/vector/milvus_write/__init__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph/vector/milvus_write/__main__.py +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph.egg-info/SOURCES.txt +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph.egg-info/dependency_links.txt +0 -0
- {trustgraph-0.3.1 → trustgraph-0.4.2}/trustgraph.egg-info/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: trustgraph
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
|
|
5
5
|
Home-page: https://github.com/trustgraph-ai/trustgraph
|
|
6
|
-
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.
|
|
6
|
+
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.4.2.tar.gz
|
|
7
7
|
Author: trustgraph.ai
|
|
8
8
|
Author-email: security@trustgraph.ai
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -31,6 +31,7 @@ Requires-Dist: pypdf
|
|
|
31
31
|
Requires-Dist: anthropic
|
|
32
32
|
Requires-Dist: google-cloud-aiplatform
|
|
33
33
|
Requires-Dist: pyyaml
|
|
34
|
+
Requires-Dist: prometheus-client
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
# TrustGraph
|
|
@@ -4,7 +4,7 @@ import os
|
|
|
4
4
|
with open("README.md", "r") as fh:
|
|
5
5
|
long_description = fh.read()
|
|
6
6
|
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.4.2"
|
|
8
8
|
|
|
9
9
|
setuptools.setup(
|
|
10
10
|
name="trustgraph",
|
|
@@ -43,6 +43,7 @@ setuptools.setup(
|
|
|
43
43
|
"anthropic",
|
|
44
44
|
"google-cloud-aiplatform",
|
|
45
45
|
"pyyaml",
|
|
46
|
+
"prometheus-client",
|
|
46
47
|
],
|
|
47
48
|
scripts=[
|
|
48
49
|
"scripts/chunker-recursive",
|
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
import os
|
|
3
3
|
import argparse
|
|
4
4
|
import pulsar
|
|
5
|
+
import _pulsar
|
|
5
6
|
import time
|
|
6
7
|
from pulsar.schema import JsonSchema
|
|
8
|
+
from prometheus_client import start_http_server, Histogram, Info, Counter
|
|
7
9
|
|
|
8
10
|
from .. log_level import LogLevel
|
|
9
11
|
|
|
@@ -11,16 +13,23 @@ class BaseProcessor:
|
|
|
11
13
|
|
|
12
14
|
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
|
13
15
|
|
|
14
|
-
def __init__(
|
|
15
|
-
self,
|
|
16
|
-
pulsar_host=default_pulsar_host,
|
|
17
|
-
log_level=LogLevel.INFO,
|
|
18
|
-
):
|
|
16
|
+
def __init__(self, **params):
|
|
19
17
|
|
|
20
18
|
self.client = None
|
|
21
19
|
|
|
22
|
-
if
|
|
23
|
-
|
|
20
|
+
if not hasattr(__class__, "params_metric"):
|
|
21
|
+
__class__.params_metric = Info(
|
|
22
|
+
'params', 'Parameters configuration'
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# FIXME: Maybe outputs information it should not
|
|
26
|
+
__class__.params_metric.info({
|
|
27
|
+
k: str(params[k])
|
|
28
|
+
for k in params
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
pulsar_host = params.get("pulsar_host", self.default_pulsar_host)
|
|
32
|
+
log_level = params.get("log_level", LogLevel.INFO)
|
|
24
33
|
|
|
25
34
|
self.pulsar_host = pulsar_host
|
|
26
35
|
|
|
@@ -51,6 +60,20 @@ class BaseProcessor:
|
|
|
51
60
|
help=f'Output queue (default: info)'
|
|
52
61
|
)
|
|
53
62
|
|
|
63
|
+
parser.add_argument(
|
|
64
|
+
'-M', '--metrics-enabled',
|
|
65
|
+
type=bool,
|
|
66
|
+
default=True,
|
|
67
|
+
help=f'Pulsar host (default: true)',
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
'-P', '--metrics-port',
|
|
72
|
+
type=int,
|
|
73
|
+
default=8000,
|
|
74
|
+
help=f'Pulsar host (default: 8000)',
|
|
75
|
+
)
|
|
76
|
+
|
|
54
77
|
def run(self):
|
|
55
78
|
raise RuntimeError("Something should have implemented the run method")
|
|
56
79
|
|
|
@@ -69,13 +92,26 @@ class BaseProcessor:
|
|
|
69
92
|
args = parser.parse_args()
|
|
70
93
|
args = vars(args)
|
|
71
94
|
|
|
95
|
+
if args["metrics_enabled"]:
|
|
96
|
+
start_http_server(args["metrics_port"])
|
|
97
|
+
|
|
72
98
|
try:
|
|
73
99
|
|
|
74
100
|
p = cls(**args)
|
|
75
101
|
p.run()
|
|
76
102
|
|
|
103
|
+
except KeyboardInterrupt:
|
|
104
|
+
print("Keyboard interrupt.")
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
except _pulsar.Interrupted:
|
|
108
|
+
print("Pulsar Interrupted.")
|
|
109
|
+
return
|
|
110
|
+
|
|
77
111
|
except Exception as e:
|
|
78
112
|
|
|
113
|
+
print(type(e))
|
|
114
|
+
|
|
79
115
|
print("Exception:", e, flush=True)
|
|
80
116
|
print("Will retry...", flush=True)
|
|
81
117
|
|
|
@@ -83,23 +119,38 @@ class BaseProcessor:
|
|
|
83
119
|
|
|
84
120
|
class Consumer(BaseProcessor):
|
|
85
121
|
|
|
86
|
-
def __init__(
|
|
87
|
-
self,
|
|
88
|
-
pulsar_host=None,
|
|
89
|
-
log_level=LogLevel.INFO,
|
|
90
|
-
input_queue="input",
|
|
91
|
-
subscriber="subscriber",
|
|
92
|
-
input_schema=None,
|
|
93
|
-
):
|
|
122
|
+
def __init__(self, **params):
|
|
94
123
|
|
|
95
|
-
super(Consumer, self).__init__(
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
)
|
|
124
|
+
super(Consumer, self).__init__(**params)
|
|
125
|
+
|
|
126
|
+
input_queue = params.get("input_queue")
|
|
127
|
+
subscriber = params.get("subscriber")
|
|
128
|
+
input_schema = params.get("input_schema")
|
|
99
129
|
|
|
100
130
|
if input_schema == None:
|
|
101
131
|
raise RuntimeError("input_schema must be specified")
|
|
102
132
|
|
|
133
|
+
if not hasattr(__class__, "request_metric"):
|
|
134
|
+
__class__.request_metric = Histogram(
|
|
135
|
+
'request_latency', 'Request latency (seconds)'
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
if not hasattr(__class__, "pubsub_metric"):
|
|
139
|
+
__class__.pubsub_metric = Info(
|
|
140
|
+
'pubsub', 'Pub/sub configuration'
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if not hasattr(__class__, "processing_metric"):
|
|
144
|
+
__class__.processing_metric = Counter(
|
|
145
|
+
'processing_count', 'Processing count', ["status"]
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
__class__.pubsub_metric.info({
|
|
149
|
+
"input_queue": input_queue,
|
|
150
|
+
"subscriber": subscriber,
|
|
151
|
+
"input_schema": input_schema.__name__,
|
|
152
|
+
})
|
|
153
|
+
|
|
103
154
|
self.consumer = self.client.subscribe(
|
|
104
155
|
input_queue, subscriber,
|
|
105
156
|
schema=JsonSchema(input_schema),
|
|
@@ -113,11 +164,14 @@ class Consumer(BaseProcessor):
|
|
|
113
164
|
|
|
114
165
|
try:
|
|
115
166
|
|
|
116
|
-
|
|
167
|
+
with __class__.request_metric.time():
|
|
168
|
+
self.handle(msg)
|
|
117
169
|
|
|
118
170
|
# Acknowledge successful processing of the message
|
|
119
171
|
self.consumer.acknowledge(msg)
|
|
120
172
|
|
|
173
|
+
__class__.processing_metric.labels(status="success").inc()
|
|
174
|
+
|
|
121
175
|
except Exception as e:
|
|
122
176
|
|
|
123
177
|
print("Exception:", e, flush=True)
|
|
@@ -125,6 +179,8 @@ class Consumer(BaseProcessor):
|
|
|
125
179
|
# Message failed to be processed
|
|
126
180
|
self.consumer.negative_acknowledge(msg)
|
|
127
181
|
|
|
182
|
+
__class__.processing_metric.labels(status="error").inc()
|
|
183
|
+
|
|
128
184
|
@staticmethod
|
|
129
185
|
def add_args(parser, default_input_queue, default_subscriber):
|
|
130
186
|
|
|
@@ -144,21 +200,43 @@ class Consumer(BaseProcessor):
|
|
|
144
200
|
|
|
145
201
|
class ConsumerProducer(BaseProcessor):
|
|
146
202
|
|
|
147
|
-
def __init__(
|
|
148
|
-
self,
|
|
149
|
-
pulsar_host=None,
|
|
150
|
-
log_level=LogLevel.INFO,
|
|
151
|
-
input_queue="input",
|
|
152
|
-
output_queue="output",
|
|
153
|
-
subscriber="subscriber",
|
|
154
|
-
input_schema=None,
|
|
155
|
-
output_schema=None,
|
|
156
|
-
):
|
|
203
|
+
def __init__(self, **params):
|
|
157
204
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
)
|
|
205
|
+
input_queue = params.get("input_queue")
|
|
206
|
+
output_queue = params.get("output_queue")
|
|
207
|
+
subscriber = params.get("subscriber")
|
|
208
|
+
input_schema = params.get("input_schema")
|
|
209
|
+
output_schema = params.get("output_schema")
|
|
210
|
+
|
|
211
|
+
if not hasattr(__class__, "request_metric"):
|
|
212
|
+
__class__.request_metric = Histogram(
|
|
213
|
+
'request_latency', 'Request latency (seconds)'
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
if not hasattr(__class__, "output_metric"):
|
|
217
|
+
__class__.output_metric = Counter(
|
|
218
|
+
'output_count', 'Output items created'
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
if not hasattr(__class__, "pubsub_metric"):
|
|
222
|
+
__class__.pubsub_metric = Info(
|
|
223
|
+
'pubsub', 'Pub/sub configuration'
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
if not hasattr(__class__, "processing_metric"):
|
|
227
|
+
__class__.processing_metric = Counter(
|
|
228
|
+
'processing_count', 'Processing count', ["status"]
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
__class__.pubsub_metric.info({
|
|
232
|
+
"input_queue": input_queue,
|
|
233
|
+
"output_queue": output_queue,
|
|
234
|
+
"subscriber": subscriber,
|
|
235
|
+
"input_schema": input_schema.__name__,
|
|
236
|
+
"output_schema": output_schema.__name__,
|
|
237
|
+
})
|
|
238
|
+
|
|
239
|
+
super(ConsumerProducer, self).__init__(**params)
|
|
162
240
|
|
|
163
241
|
if input_schema == None:
|
|
164
242
|
raise RuntimeError("input_schema must be specified")
|
|
@@ -184,11 +262,14 @@ class ConsumerProducer(BaseProcessor):
|
|
|
184
262
|
|
|
185
263
|
try:
|
|
186
264
|
|
|
187
|
-
|
|
265
|
+
with __class__.request_metric.time():
|
|
266
|
+
resp = self.handle(msg)
|
|
188
267
|
|
|
189
268
|
# Acknowledge successful processing of the message
|
|
190
269
|
self.consumer.acknowledge(msg)
|
|
191
270
|
|
|
271
|
+
__class__.processing_metric.labels(status="success").inc()
|
|
272
|
+
|
|
192
273
|
except Exception as e:
|
|
193
274
|
|
|
194
275
|
print("Exception:", e, flush=True)
|
|
@@ -196,9 +277,11 @@ class ConsumerProducer(BaseProcessor):
|
|
|
196
277
|
# Message failed to be processed
|
|
197
278
|
self.consumer.negative_acknowledge(msg)
|
|
198
279
|
|
|
199
|
-
|
|
280
|
+
__class__.processing_metric.labels(status="error").inc()
|
|
200
281
|
|
|
282
|
+
def send(self, msg, properties={}):
|
|
201
283
|
self.producer.send(msg, properties)
|
|
284
|
+
__class__.output_metric.inc()
|
|
202
285
|
|
|
203
286
|
@staticmethod
|
|
204
287
|
def add_args(
|
|
@@ -228,18 +311,27 @@ class ConsumerProducer(BaseProcessor):
|
|
|
228
311
|
|
|
229
312
|
class Producer(BaseProcessor):
|
|
230
313
|
|
|
231
|
-
def __init__(
|
|
232
|
-
self,
|
|
233
|
-
pulsar_host=None,
|
|
234
|
-
log_level=LogLevel.INFO,
|
|
235
|
-
output_queue="output",
|
|
236
|
-
output_schema=None,
|
|
237
|
-
):
|
|
314
|
+
def __init__(self, **params):
|
|
238
315
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
)
|
|
316
|
+
output_queue = params.get("output_queue")
|
|
317
|
+
output_schema = params.get("output_schema")
|
|
318
|
+
|
|
319
|
+
if not hasattr(__class__, "output_metric"):
|
|
320
|
+
__class__.output_metric = Counter(
|
|
321
|
+
'output_count', 'Output items created'
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
if not hasattr(__class__, "pubsub_metric"):
|
|
325
|
+
__class__.pubsub_metric = Info(
|
|
326
|
+
'pubsub', 'Pub/sub configuration'
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
__class__.pubsub_metric.info({
|
|
330
|
+
"output_queue": output_queue,
|
|
331
|
+
"output_schema": output_schema.__name__,
|
|
332
|
+
})
|
|
333
|
+
|
|
334
|
+
super(Producer, self).__init__(**params)
|
|
243
335
|
|
|
244
336
|
if output_schema == None:
|
|
245
337
|
raise RuntimeError("output_schema must be specified")
|
|
@@ -250,8 +342,8 @@ class Producer(BaseProcessor):
|
|
|
250
342
|
)
|
|
251
343
|
|
|
252
344
|
def send(self, msg, properties={}):
|
|
253
|
-
|
|
254
345
|
self.producer.send(msg, properties)
|
|
346
|
+
__class__.output_metric.inc()
|
|
255
347
|
|
|
256
348
|
@staticmethod
|
|
257
349
|
def add_args(
|
|
@@ -17,25 +17,22 @@ default_subscriber = 'chunker-recursive'
|
|
|
17
17
|
|
|
18
18
|
class Processor(ConsumerProducer):
|
|
19
19
|
|
|
20
|
-
def __init__(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
chunk_overlap=100,
|
|
29
|
-
):
|
|
30
|
-
|
|
20
|
+
def __init__(self, **params):
|
|
21
|
+
|
|
22
|
+
input_queue = params.get("input_queue", default_input_queue)
|
|
23
|
+
output_queue = params.get("output_queue", default_output_queue)
|
|
24
|
+
subscriber = params.get("subscriber", default_subscriber)
|
|
25
|
+
chunk_size = params.get("chunk_size", 2000)
|
|
26
|
+
chunk_overlap = params.get("chunk_overlap", 100)
|
|
27
|
+
|
|
31
28
|
super(Processor, self).__init__(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
29
|
+
**params | {
|
|
30
|
+
"input_queue": input_queue,
|
|
31
|
+
"output_queue": output_queue,
|
|
32
|
+
"subscriber": subscriber,
|
|
33
|
+
"input_schema": TextDocument,
|
|
34
|
+
"output_schema": Chunk,
|
|
35
|
+
}
|
|
39
36
|
)
|
|
40
37
|
|
|
41
38
|
self.text_splitter = RecursiveCharacterTextSplitter(
|
|
@@ -18,23 +18,20 @@ default_subscriber = 'pdf-decoder'
|
|
|
18
18
|
|
|
19
19
|
class Processor(ConsumerProducer):
|
|
20
20
|
|
|
21
|
-
def __init__(
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
subscriber=default_subscriber,
|
|
27
|
-
log_level=LogLevel.INFO,
|
|
28
|
-
):
|
|
21
|
+
def __init__(self, **params):
|
|
22
|
+
|
|
23
|
+
input_queue = params.get("input_queue", default_input_queue)
|
|
24
|
+
output_queue = params.get("output_queue", default_output_queue)
|
|
25
|
+
subscriber = params.get("subscriber", default_subscriber)
|
|
29
26
|
|
|
30
27
|
super(Processor, self).__init__(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
28
|
+
**params | {
|
|
29
|
+
"input_queue": input_queue,
|
|
30
|
+
"output_queue": output_queue,
|
|
31
|
+
"subscriber": subscriber,
|
|
32
|
+
"input_schema": Document,
|
|
33
|
+
"output_schema": TextDocument,
|
|
34
|
+
}
|
|
38
35
|
)
|
|
39
36
|
|
|
40
37
|
print("PDF inited")
|
|
@@ -17,24 +17,21 @@ default_model="all-MiniLM-L6-v2"
|
|
|
17
17
|
|
|
18
18
|
class Processor(ConsumerProducer):
|
|
19
19
|
|
|
20
|
-
def __init__(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
log_level=LogLevel.INFO,
|
|
27
|
-
model=default_model,
|
|
28
|
-
):
|
|
20
|
+
def __init__(self, **params):
|
|
21
|
+
|
|
22
|
+
input_queue = params.get("input_queue", default_input_queue)
|
|
23
|
+
output_queue = params.get("output_queue", default_output_queue)
|
|
24
|
+
subscriber = params.get("subscriber", default_subscriber)
|
|
25
|
+
model = params.get("model", default_model)
|
|
29
26
|
|
|
30
27
|
super(Processor, self).__init__(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
28
|
+
**params | {
|
|
29
|
+
"input_queue": input_queue,
|
|
30
|
+
"output_queue": output_queue,
|
|
31
|
+
"subscriber": subscriber,
|
|
32
|
+
"input_schema": EmbeddingsRequest,
|
|
33
|
+
"output_schema": EmbeddingsResponse,
|
|
34
|
+
}
|
|
38
35
|
)
|
|
39
36
|
|
|
40
37
|
self.embeddings = HuggingFaceEmbeddings(model_name=model)
|
|
@@ -17,25 +17,20 @@ default_ollama = 'http://localhost:11434'
|
|
|
17
17
|
|
|
18
18
|
class Processor(ConsumerProducer):
|
|
19
19
|
|
|
20
|
-
def __init__(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
subscriber=default_subscriber,
|
|
26
|
-
log_level=LogLevel.INFO,
|
|
27
|
-
model=default_model,
|
|
28
|
-
ollama=default_ollama,
|
|
29
|
-
):
|
|
20
|
+
def __init__(self, **params):
|
|
21
|
+
|
|
22
|
+
input_queue = params.get("input_queue", default_input_queue)
|
|
23
|
+
output_queue = params.get("output_queue", default_output_queue)
|
|
24
|
+
subscriber = params.get("subscriber", default_subscriber)
|
|
30
25
|
|
|
31
26
|
super(Processor, self).__init__(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
27
|
+
**params | {
|
|
28
|
+
"input_queue": input_queue,
|
|
29
|
+
"output_queue": output_queue,
|
|
30
|
+
"subscriber": subscriber,
|
|
31
|
+
"input_schema": EmbeddingsRequest,
|
|
32
|
+
"output_schema": EmbeddingsResponse,
|
|
33
|
+
}
|
|
39
34
|
)
|
|
40
35
|
|
|
41
36
|
self.embeddings = OllamaEmbeddings(base_url=ollama, model=model)
|
|
@@ -15,26 +15,23 @@ default_subscriber = 'embeddings-vectorizer'
|
|
|
15
15
|
|
|
16
16
|
class Processor(ConsumerProducer):
|
|
17
17
|
|
|
18
|
-
def __init__(
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
subscriber=default_subscriber,
|
|
24
|
-
log_level=LogLevel.INFO,
|
|
25
|
-
):
|
|
18
|
+
def __init__(self, **params):
|
|
19
|
+
|
|
20
|
+
input_queue = params.get("input_queue", default_input_queue)
|
|
21
|
+
output_queue = params.get("output_queue", default_output_queue)
|
|
22
|
+
subscriber = params.get("subscriber", default_subscriber)
|
|
26
23
|
|
|
27
24
|
super(Processor, self).__init__(
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
25
|
+
**params | {
|
|
26
|
+
"input_queue": input_queue,
|
|
27
|
+
"output_queue": output_queue,
|
|
28
|
+
"subscriber": subscriber,
|
|
29
|
+
"input_schema": Chunk,
|
|
30
|
+
"output_schema": VectorsChunk,
|
|
31
|
+
}
|
|
35
32
|
)
|
|
36
33
|
|
|
37
|
-
self.embeddings = EmbeddingsClient(pulsar_host=pulsar_host)
|
|
34
|
+
self.embeddings = EmbeddingsClient(pulsar_host=self.pulsar_host)
|
|
38
35
|
|
|
39
36
|
def emit(self, source, chunk, vectors):
|
|
40
37
|
|
|
@@ -20,27 +20,22 @@ default_graph_host='localhost'
|
|
|
20
20
|
|
|
21
21
|
class Processor(Consumer):
|
|
22
22
|
|
|
23
|
-
def __init__(
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
graph_host=default_graph_host,
|
|
29
|
-
log_level=LogLevel.INFO,
|
|
30
|
-
):
|
|
23
|
+
def __init__(self, **params):
|
|
24
|
+
|
|
25
|
+
input_queue = params.get("input_queue", default_input_queue)
|
|
26
|
+
subscriber = params.get("subscriber", default_subscriber)
|
|
27
|
+
graph_host = params.get("graph_host", default_graph_host)
|
|
31
28
|
|
|
32
29
|
super(Processor, self).__init__(
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
30
|
+
**params | {
|
|
31
|
+
"input_queue": input_queue,
|
|
32
|
+
"subscriber": subscriber,
|
|
33
|
+
"input_schema": Triple,
|
|
34
|
+
}
|
|
38
35
|
)
|
|
39
36
|
|
|
40
37
|
self.tg = TrustGraph([graph_host])
|
|
41
38
|
|
|
42
|
-
self.count = 0
|
|
43
|
-
|
|
44
39
|
def handle(self, msg):
|
|
45
40
|
|
|
46
41
|
v = msg.value()
|
|
@@ -51,11 +46,6 @@ class Processor(Consumer):
|
|
|
51
46
|
v.o.value
|
|
52
47
|
)
|
|
53
48
|
|
|
54
|
-
self.count += 1
|
|
55
|
-
|
|
56
|
-
if (self.count % 1000) == 0:
|
|
57
|
-
print(self.count, "...", flush=True)
|
|
58
|
-
|
|
59
49
|
@staticmethod
|
|
60
50
|
def add_args(parser):
|
|
61
51
|
|
|
@@ -18,7 +18,7 @@ class GraphRag:
|
|
|
18
18
|
verbose=False,
|
|
19
19
|
entity_limit=50,
|
|
20
20
|
triple_limit=30,
|
|
21
|
-
|
|
21
|
+
max_subgraph_size=3000,
|
|
22
22
|
):
|
|
23
23
|
|
|
24
24
|
self.verbose=verbose
|
|
@@ -37,7 +37,7 @@ class GraphRag:
|
|
|
37
37
|
|
|
38
38
|
self.entity_limit=entity_limit
|
|
39
39
|
self.query_limit=triple_limit
|
|
40
|
-
self.
|
|
40
|
+
self.max_subgraph_size=max_subgraph_size
|
|
41
41
|
|
|
42
42
|
self.label_cache = {}
|
|
43
43
|
|
|
@@ -149,7 +149,7 @@ class GraphRag:
|
|
|
149
149
|
|
|
150
150
|
subgraph = list(subgraph)
|
|
151
151
|
|
|
152
|
-
subgraph = subgraph[0:self.
|
|
152
|
+
subgraph = subgraph[0:self.max_subgraph_size]
|
|
153
153
|
|
|
154
154
|
if self.verbose:
|
|
155
155
|
print("Subgraph:", flush=True)
|
|
@@ -22,26 +22,23 @@ default_subscriber = 'kg-extract-definitions'
|
|
|
22
22
|
|
|
23
23
|
class Processor(ConsumerProducer):
|
|
24
24
|
|
|
25
|
-
def __init__(
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
subscriber=default_subscriber,
|
|
31
|
-
log_level=LogLevel.INFO,
|
|
32
|
-
):
|
|
25
|
+
def __init__(self, **params):
|
|
26
|
+
|
|
27
|
+
input_queue = params.get("input_queue", default_input_queue)
|
|
28
|
+
output_queue = params.get("output_queue", default_output_queue)
|
|
29
|
+
subscriber = params.get("subscriber", default_subscriber)
|
|
33
30
|
|
|
34
31
|
super(Processor, self).__init__(
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
32
|
+
**params | {
|
|
33
|
+
"input_queue": input_queue,
|
|
34
|
+
"output_queue": output_queue,
|
|
35
|
+
"subscriber": subscriber,
|
|
36
|
+
"input_schema": VectorsChunk,
|
|
37
|
+
"output_schema": Triple,
|
|
38
|
+
}
|
|
42
39
|
)
|
|
43
40
|
|
|
44
|
-
self.llm = LlmClient(pulsar_host=pulsar_host)
|
|
41
|
+
self.llm = LlmClient(pulsar_host=self.pulsar_host)
|
|
45
42
|
|
|
46
43
|
def to_uri(self, text):
|
|
47
44
|
|