trustgraph 0.5.3__tar.gz → 0.11.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of trustgraph might be problematic. Click here for more details.
- trustgraph-0.11.20/PKG-INFO +22 -0
- trustgraph-0.11.20/README.md +1 -0
- trustgraph-0.11.20/setup.py +47 -0
- {trustgraph-0.5.3 → trustgraph-0.11.20}/trustgraph/base/base_processor.py +7 -5
- {trustgraph-0.5.3 → trustgraph-0.11.20}/trustgraph/base/consumer.py +21 -1
- {trustgraph-0.5.3 → trustgraph-0.11.20}/trustgraph/base/consumer_producer.py +26 -55
- trustgraph-0.11.20/trustgraph/base_version.py +1 -0
- trustgraph-0.11.20/trustgraph/clients/base.py +125 -0
- trustgraph-0.11.20/trustgraph/clients/document_embeddings_client.py +45 -0
- trustgraph-0.11.20/trustgraph/clients/document_rag_client.py +46 -0
- trustgraph-0.11.20/trustgraph/clients/embeddings_client.py +44 -0
- trustgraph-0.11.20/trustgraph/clients/graph_embeddings_client.py +45 -0
- trustgraph-0.11.20/trustgraph/clients/graph_rag_client.py +46 -0
- trustgraph-0.11.20/trustgraph/clients/llm_client.py +40 -0
- trustgraph-0.11.20/trustgraph/clients/prompt_client.py +100 -0
- trustgraph-0.11.20/trustgraph/clients/triples_query_client.py +59 -0
- trustgraph-0.11.20/trustgraph/exceptions.py +14 -0
- trustgraph-0.11.20/trustgraph/objects/field.py +72 -0
- trustgraph-0.11.20/trustgraph/objects/object.py +8 -0
- trustgraph-0.11.20/trustgraph/schema/__init__.py +12 -0
- trustgraph-0.11.20/trustgraph/schema/documents.py +68 -0
- trustgraph-0.11.20/trustgraph/schema/graph.py +69 -0
- trustgraph-0.11.20/trustgraph/schema/models.py +44 -0
- trustgraph-0.11.20/trustgraph/schema/object.py +33 -0
- trustgraph-0.11.20/trustgraph/schema/prompt.py +65 -0
- trustgraph-0.11.20/trustgraph/schema/retrieval.py +40 -0
- trustgraph-0.11.20/trustgraph/schema/topic.py +4 -0
- trustgraph-0.11.20/trustgraph/schema/types.py +25 -0
- trustgraph-0.11.20/trustgraph/trustgraph_version.py +1 -0
- trustgraph-0.11.20/trustgraph.egg-info/PKG-INFO +22 -0
- trustgraph-0.11.20/trustgraph.egg-info/SOURCES.txt +39 -0
- trustgraph-0.11.20/trustgraph.egg-info/requires.txt +7 -0
- trustgraph-0.5.3/LICENSE +0 -202
- trustgraph-0.5.3/PKG-INFO +0 -140
- trustgraph-0.5.3/README.md +0 -104
- trustgraph-0.5.3/scripts/chunker-recursive +0 -6
- trustgraph-0.5.3/scripts/concat-parquet +0 -45
- trustgraph-0.5.3/scripts/dump-parquet +0 -24
- trustgraph-0.5.3/scripts/embeddings-hf +0 -6
- trustgraph-0.5.3/scripts/embeddings-ollama +0 -6
- trustgraph-0.5.3/scripts/embeddings-vectorize +0 -6
- trustgraph-0.5.3/scripts/ge-dump-parquet +0 -6
- trustgraph-0.5.3/scripts/ge-write-milvus +0 -6
- trustgraph-0.5.3/scripts/graph-rag +0 -6
- trustgraph-0.5.3/scripts/graph-show +0 -45
- trustgraph-0.5.3/scripts/graph-to-turtle +0 -37
- trustgraph-0.5.3/scripts/init-pulsar-manager +0 -11
- trustgraph-0.5.3/scripts/kg-extract-definitions +0 -6
- trustgraph-0.5.3/scripts/kg-extract-relationships +0 -6
- trustgraph-0.5.3/scripts/load-graph-embeddings +0 -145
- trustgraph-0.5.3/scripts/load-triples +0 -144
- trustgraph-0.5.3/scripts/loader +0 -128
- trustgraph-0.5.3/scripts/pdf-decoder +0 -6
- trustgraph-0.5.3/scripts/query +0 -21
- trustgraph-0.5.3/scripts/run-processing +0 -6
- trustgraph-0.5.3/scripts/text-completion-azure +0 -6
- trustgraph-0.5.3/scripts/text-completion-claude +0 -6
- trustgraph-0.5.3/scripts/text-completion-ollama +0 -6
- trustgraph-0.5.3/scripts/text-completion-vertexai +0 -6
- trustgraph-0.5.3/scripts/triples-dump-parquet +0 -6
- trustgraph-0.5.3/scripts/triples-write-cassandra +0 -6
- trustgraph-0.5.3/setup.py +0 -77
- trustgraph-0.5.3/trustgraph/chunking/recursive/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/chunking/recursive/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/chunking/recursive/chunker.py +0 -99
- trustgraph-0.5.3/trustgraph/decoding/__init__.py +0 -0
- trustgraph-0.5.3/trustgraph/decoding/pdf/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/decoding/pdf/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/decoding/pdf/pdf_decoder.py +0 -87
- trustgraph-0.5.3/trustgraph/dump/__init__.py +0 -0
- trustgraph-0.5.3/trustgraph/dump/graph_embeddings/__init__.py +0 -0
- trustgraph-0.5.3/trustgraph/dump/graph_embeddings/parquet/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/dump/graph_embeddings/parquet/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/dump/graph_embeddings/parquet/processor.py +0 -87
- trustgraph-0.5.3/trustgraph/dump/graph_embeddings/parquet/writer.py +0 -94
- trustgraph-0.5.3/trustgraph/dump/triples/__init__.py +0 -0
- trustgraph-0.5.3/trustgraph/dump/triples/parquet/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/dump/triples/parquet/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/dump/triples/parquet/processor.py +0 -87
- trustgraph-0.5.3/trustgraph/dump/triples/parquet/writer.py +0 -96
- trustgraph-0.5.3/trustgraph/embeddings/__init__.py +0 -0
- trustgraph-0.5.3/trustgraph/embeddings/hf/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/embeddings/hf/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/embeddings/hf/hf.py +0 -77
- trustgraph-0.5.3/trustgraph/embeddings/ollama/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/embeddings/ollama/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/embeddings/ollama/processor.py +0 -84
- trustgraph-0.5.3/trustgraph/embeddings/vectorize/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/embeddings/vectorize/__main__.py +0 -6
- trustgraph-0.5.3/trustgraph/embeddings/vectorize/vectorize.py +0 -80
- trustgraph-0.5.3/trustgraph/embeddings_client.py +0 -79
- trustgraph-0.5.3/trustgraph/graph_rag.py +0 -239
- trustgraph-0.5.3/trustgraph/graph_rag_client.py +0 -78
- trustgraph-0.5.3/trustgraph/kg/__init__.py +0 -0
- trustgraph-0.5.3/trustgraph/kg/extract_definitions/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/kg/extract_definitions/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/kg/extract_definitions/extract.py +0 -114
- trustgraph-0.5.3/trustgraph/kg/extract_relationships/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/kg/extract_relationships/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/kg/extract_relationships/extract.py +0 -185
- trustgraph-0.5.3/trustgraph/llm_client.py +0 -80
- trustgraph-0.5.3/trustgraph/model/__init__.py +0 -0
- trustgraph-0.5.3/trustgraph/model/text_completion/__init__.py +0 -0
- trustgraph-0.5.3/trustgraph/model/text_completion/azure/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/model/text_completion/azure/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/model/text_completion/azure/llm.py +0 -128
- trustgraph-0.5.3/trustgraph/model/text_completion/claude/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/model/text_completion/claude/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/model/text_completion/claude/llm.py +0 -110
- trustgraph-0.5.3/trustgraph/model/text_completion/ollama/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/model/text_completion/ollama/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/model/text_completion/ollama/llm.py +0 -102
- trustgraph-0.5.3/trustgraph/model/text_completion/vertexai/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/model/text_completion/vertexai/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/model/text_completion/vertexai/llm.py +0 -177
- trustgraph-0.5.3/trustgraph/processing/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/processing/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/processing/processing.py +0 -171
- trustgraph-0.5.3/trustgraph/prompts.py +0 -138
- trustgraph-0.5.3/trustgraph/retrieval/__init__.py +0 -0
- trustgraph-0.5.3/trustgraph/retrieval/graph_rag/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/retrieval/graph_rag/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/retrieval/graph_rag/rag.py +0 -119
- trustgraph-0.5.3/trustgraph/schema.py +0 -125
- trustgraph-0.5.3/trustgraph/storage/__init__.py +0 -0
- trustgraph-0.5.3/trustgraph/storage/graph_embeddings/__init__.py +0 -0
- trustgraph-0.5.3/trustgraph/storage/graph_embeddings/milvus/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/storage/graph_embeddings/milvus/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/storage/graph_embeddings/milvus/write.py +0 -61
- trustgraph-0.5.3/trustgraph/storage/triples/__init__.py +0 -0
- trustgraph-0.5.3/trustgraph/storage/triples/cassandra/__init__.py +0 -3
- trustgraph-0.5.3/trustgraph/storage/triples/cassandra/__main__.py +0 -7
- trustgraph-0.5.3/trustgraph/storage/triples/cassandra/write.py +0 -68
- trustgraph-0.5.3/trustgraph/triple_vectors.py +0 -136
- trustgraph-0.5.3/trustgraph/trustgraph.py +0 -108
- trustgraph-0.5.3/trustgraph.egg-info/PKG-INFO +0 -140
- trustgraph-0.5.3/trustgraph.egg-info/SOURCES.txt +0 -116
- trustgraph-0.5.3/trustgraph.egg-info/requires.txt +0 -21
- {trustgraph-0.5.3 → trustgraph-0.11.20}/setup.cfg +0 -0
- {trustgraph-0.5.3 → trustgraph-0.11.20}/trustgraph/base/__init__.py +0 -0
- {trustgraph-0.5.3 → trustgraph-0.11.20}/trustgraph/base/producer.py +0 -0
- {trustgraph-0.5.3/trustgraph → trustgraph-0.11.20/trustgraph/clients}/__init__.py +0 -0
- {trustgraph-0.5.3 → trustgraph-0.11.20}/trustgraph/log_level.py +0 -0
- {trustgraph-0.5.3/trustgraph/chunking → trustgraph-0.11.20/trustgraph/objects}/__init__.py +0 -0
- {trustgraph-0.5.3 → trustgraph-0.11.20}/trustgraph/rdf.py +0 -0
- {trustgraph-0.5.3 → trustgraph-0.11.20}/trustgraph.egg-info/dependency_links.txt +0 -0
- {trustgraph-0.5.3 → trustgraph-0.11.20}/trustgraph.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: trustgraph
|
|
3
|
+
Version: 0.11.20
|
|
4
|
+
Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
|
|
5
|
+
Home-page: https://github.com/trustgraph-ai/trustgraph
|
|
6
|
+
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.11.20.tar.gz
|
|
7
|
+
Author: trustgraph.ai
|
|
8
|
+
Author-email: security@trustgraph.ai
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.8
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Requires-Dist: trustgraph-base<0.12
|
|
15
|
+
Requires-Dist: trustgraph-bedrock<0.12
|
|
16
|
+
Requires-Dist: trustgraph-cli<0.12
|
|
17
|
+
Requires-Dist: trustgraph-embeddings-hf<0.12
|
|
18
|
+
Requires-Dist: trustgraph-flow<0.12
|
|
19
|
+
Requires-Dist: trustgraph-parquet<0.12
|
|
20
|
+
Requires-Dist: trustgraph-vertexai<0.12
|
|
21
|
+
|
|
22
|
+
See https://trustgraph.ai/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
See https://trustgraph.ai/
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import setuptools
|
|
2
|
+
import os
|
|
3
|
+
import importlib
|
|
4
|
+
|
|
5
|
+
with open("README.md", "r") as fh:
|
|
6
|
+
long_description = fh.read()
|
|
7
|
+
|
|
8
|
+
# Load a version number module
|
|
9
|
+
spec = importlib.util.spec_from_file_location(
|
|
10
|
+
'version', 'trustgraph/trustgraph_version.py'
|
|
11
|
+
)
|
|
12
|
+
version_module = importlib.util.module_from_spec(spec)
|
|
13
|
+
spec.loader.exec_module(version_module)
|
|
14
|
+
|
|
15
|
+
version = version_module.__version__
|
|
16
|
+
|
|
17
|
+
setuptools.setup(
|
|
18
|
+
name="trustgraph",
|
|
19
|
+
version=version,
|
|
20
|
+
author="trustgraph.ai",
|
|
21
|
+
author_email="security@trustgraph.ai",
|
|
22
|
+
description="TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.",
|
|
23
|
+
long_description=long_description,
|
|
24
|
+
long_description_content_type="text/markdown",
|
|
25
|
+
url="https://github.com/trustgraph-ai/trustgraph",
|
|
26
|
+
packages=setuptools.find_namespace_packages(
|
|
27
|
+
where='./',
|
|
28
|
+
),
|
|
29
|
+
classifiers=[
|
|
30
|
+
"Programming Language :: Python :: 3",
|
|
31
|
+
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
|
32
|
+
"Operating System :: OS Independent",
|
|
33
|
+
],
|
|
34
|
+
python_requires='>=3.8',
|
|
35
|
+
download_url = "https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v" + version + ".tar.gz",
|
|
36
|
+
install_requires=[
|
|
37
|
+
"trustgraph-base<0.12",
|
|
38
|
+
"trustgraph-bedrock<0.12",
|
|
39
|
+
"trustgraph-cli<0.12",
|
|
40
|
+
"trustgraph-embeddings-hf<0.12",
|
|
41
|
+
"trustgraph-flow<0.12",
|
|
42
|
+
"trustgraph-parquet<0.12",
|
|
43
|
+
"trustgraph-vertexai<0.12",
|
|
44
|
+
],
|
|
45
|
+
scripts=[
|
|
46
|
+
]
|
|
47
|
+
)
|
|
@@ -60,10 +60,10 @@ class BaseProcessor:
|
|
|
60
60
|
)
|
|
61
61
|
|
|
62
62
|
parser.add_argument(
|
|
63
|
-
'
|
|
64
|
-
|
|
63
|
+
'--metrics',
|
|
64
|
+
action=argparse.BooleanOptionalAction,
|
|
65
65
|
default=True,
|
|
66
|
-
help=f'
|
|
66
|
+
help=f'Metrics enabled (default: true)',
|
|
67
67
|
)
|
|
68
68
|
|
|
69
69
|
parser.add_argument(
|
|
@@ -89,7 +89,9 @@ class BaseProcessor:
|
|
|
89
89
|
args = parser.parse_args()
|
|
90
90
|
args = vars(args)
|
|
91
91
|
|
|
92
|
-
|
|
92
|
+
print(args)
|
|
93
|
+
|
|
94
|
+
if args["metrics"]:
|
|
93
95
|
start_http_server(args["metrics_port"])
|
|
94
96
|
|
|
95
97
|
while True:
|
|
@@ -114,4 +116,4 @@ class BaseProcessor:
|
|
|
114
116
|
print("Exception:", e, flush=True)
|
|
115
117
|
print("Will retry...", flush=True)
|
|
116
118
|
|
|
117
|
-
time.sleep(
|
|
119
|
+
time.sleep(4)
|
|
@@ -1,13 +1,24 @@
|
|
|
1
1
|
|
|
2
2
|
from pulsar.schema import JsonSchema
|
|
3
|
-
from prometheus_client import
|
|
3
|
+
from prometheus_client import Histogram, Info, Counter, Enum
|
|
4
|
+
import time
|
|
4
5
|
|
|
5
6
|
from . base_processor import BaseProcessor
|
|
7
|
+
from .. exceptions import TooManyRequests
|
|
6
8
|
|
|
7
9
|
class Consumer(BaseProcessor):
|
|
8
10
|
|
|
9
11
|
def __init__(self, **params):
|
|
10
12
|
|
|
13
|
+
if not hasattr(__class__, "state_metric"):
|
|
14
|
+
__class__.state_metric = Enum(
|
|
15
|
+
'processor_state', 'Processor state',
|
|
16
|
+
states=['starting', 'running', 'stopped']
|
|
17
|
+
)
|
|
18
|
+
__class__.state_metric.state('starting')
|
|
19
|
+
|
|
20
|
+
__class__.state_metric.state('starting')
|
|
21
|
+
|
|
11
22
|
super(Consumer, self).__init__(**params)
|
|
12
23
|
|
|
13
24
|
input_queue = params.get("input_queue")
|
|
@@ -45,6 +56,8 @@ class Consumer(BaseProcessor):
|
|
|
45
56
|
|
|
46
57
|
def run(self):
|
|
47
58
|
|
|
59
|
+
__class__.state_metric.state('running')
|
|
60
|
+
|
|
48
61
|
while True:
|
|
49
62
|
|
|
50
63
|
msg = self.consumer.receive()
|
|
@@ -59,6 +72,13 @@ class Consumer(BaseProcessor):
|
|
|
59
72
|
|
|
60
73
|
__class__.processing_metric.labels(status="success").inc()
|
|
61
74
|
|
|
75
|
+
except TooManyRequests:
|
|
76
|
+
self.consumer.negative_acknowledge(msg)
|
|
77
|
+
print("TooManyRequests: will retry")
|
|
78
|
+
__class__.processing_metric.labels(status="rate-limit").inc()
|
|
79
|
+
time.sleep(5)
|
|
80
|
+
continue
|
|
81
|
+
|
|
62
82
|
except Exception as e:
|
|
63
83
|
|
|
64
84
|
print("Exception:", e, flush=True)
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
|
|
2
2
|
from pulsar.schema import JsonSchema
|
|
3
|
-
from prometheus_client import Histogram, Info, Counter
|
|
3
|
+
from prometheus_client import Histogram, Info, Counter, Enum
|
|
4
|
+
import time
|
|
4
5
|
|
|
5
6
|
from . base_processor import BaseProcessor
|
|
7
|
+
from .. exceptions import TooManyRequests
|
|
6
8
|
|
|
7
9
|
# FIXME: Derive from consumer? And producer?
|
|
8
10
|
|
|
@@ -10,6 +12,15 @@ class ConsumerProducer(BaseProcessor):
|
|
|
10
12
|
|
|
11
13
|
def __init__(self, **params):
|
|
12
14
|
|
|
15
|
+
if not hasattr(__class__, "state_metric"):
|
|
16
|
+
__class__.state_metric = Enum(
|
|
17
|
+
'processor_state', 'Processor state',
|
|
18
|
+
states=['starting', 'running', 'stopped']
|
|
19
|
+
)
|
|
20
|
+
__class__.state_metric.state('starting')
|
|
21
|
+
|
|
22
|
+
__class__.state_metric.state('starting')
|
|
23
|
+
|
|
13
24
|
input_queue = params.get("input_queue")
|
|
14
25
|
output_queue = params.get("output_queue")
|
|
15
26
|
subscriber = params.get("subscriber")
|
|
@@ -52,18 +63,20 @@ class ConsumerProducer(BaseProcessor):
|
|
|
52
63
|
if output_schema == None:
|
|
53
64
|
raise RuntimeError("output_schema must be specified")
|
|
54
65
|
|
|
55
|
-
self.consumer = self.client.subscribe(
|
|
56
|
-
input_queue, subscriber,
|
|
57
|
-
schema=JsonSchema(input_schema),
|
|
58
|
-
)
|
|
59
|
-
|
|
60
66
|
self.producer = self.client.create_producer(
|
|
61
67
|
topic=output_queue,
|
|
62
68
|
schema=JsonSchema(output_schema),
|
|
63
69
|
)
|
|
64
70
|
|
|
71
|
+
self.consumer = self.client.subscribe(
|
|
72
|
+
input_queue, subscriber,
|
|
73
|
+
schema=JsonSchema(input_schema),
|
|
74
|
+
)
|
|
75
|
+
|
|
65
76
|
def run(self):
|
|
66
77
|
|
|
78
|
+
__class__.state_metric.state('running')
|
|
79
|
+
|
|
67
80
|
while True:
|
|
68
81
|
|
|
69
82
|
msg = self.consumer.receive()
|
|
@@ -78,6 +91,13 @@ class ConsumerProducer(BaseProcessor):
|
|
|
78
91
|
|
|
79
92
|
__class__.processing_metric.labels(status="success").inc()
|
|
80
93
|
|
|
94
|
+
except TooManyRequests:
|
|
95
|
+
self.consumer.negative_acknowledge(msg)
|
|
96
|
+
print("TooManyRequests: will retry")
|
|
97
|
+
__class__.processing_metric.labels(status="rate-limit").inc()
|
|
98
|
+
time.sleep(5)
|
|
99
|
+
continue
|
|
100
|
+
|
|
81
101
|
except Exception as e:
|
|
82
102
|
|
|
83
103
|
print("Exception:", e, flush=True)
|
|
@@ -117,52 +137,3 @@ class ConsumerProducer(BaseProcessor):
|
|
|
117
137
|
help=f'Output queue (default: {default_output_queue})'
|
|
118
138
|
)
|
|
119
139
|
|
|
120
|
-
class Producer(BaseProcessor):
|
|
121
|
-
|
|
122
|
-
def __init__(self, **params):
|
|
123
|
-
|
|
124
|
-
output_queue = params.get("output_queue")
|
|
125
|
-
output_schema = params.get("output_schema")
|
|
126
|
-
|
|
127
|
-
if not hasattr(__class__, "output_metric"):
|
|
128
|
-
__class__.output_metric = Counter(
|
|
129
|
-
'output_count', 'Output items created'
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
if not hasattr(__class__, "pubsub_metric"):
|
|
133
|
-
__class__.pubsub_metric = Info(
|
|
134
|
-
'pubsub', 'Pub/sub configuration'
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
__class__.pubsub_metric.info({
|
|
138
|
-
"output_queue": output_queue,
|
|
139
|
-
"output_schema": output_schema.__name__,
|
|
140
|
-
})
|
|
141
|
-
|
|
142
|
-
super(Producer, self).__init__(**params)
|
|
143
|
-
|
|
144
|
-
if output_schema == None:
|
|
145
|
-
raise RuntimeError("output_schema must be specified")
|
|
146
|
-
|
|
147
|
-
self.producer = self.client.create_producer(
|
|
148
|
-
topic=output_queue,
|
|
149
|
-
schema=JsonSchema(output_schema),
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
def send(self, msg, properties={}):
|
|
153
|
-
self.producer.send(msg, properties)
|
|
154
|
-
__class__.output_metric.inc()
|
|
155
|
-
|
|
156
|
-
@staticmethod
|
|
157
|
-
def add_args(
|
|
158
|
-
parser, default_input_queue, default_subscriber,
|
|
159
|
-
default_output_queue,
|
|
160
|
-
):
|
|
161
|
-
|
|
162
|
-
BaseProcessor.add_args(parser)
|
|
163
|
-
|
|
164
|
-
parser.add_argument(
|
|
165
|
-
'-o', '--output-queue',
|
|
166
|
-
default=default_output_queue,
|
|
167
|
-
help=f'Output queue (default: {default_output_queue})'
|
|
168
|
-
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.12.0"
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
|
|
2
|
+
import pulsar
|
|
3
|
+
import _pulsar
|
|
4
|
+
import hashlib
|
|
5
|
+
import uuid
|
|
6
|
+
import time
|
|
7
|
+
from pulsar.schema import JsonSchema
|
|
8
|
+
|
|
9
|
+
from .. exceptions import *
|
|
10
|
+
|
|
11
|
+
# Default timeout for a request/response. In seconds.
|
|
12
|
+
DEFAULT_TIMEOUT=300
|
|
13
|
+
|
|
14
|
+
# Ugly
|
|
15
|
+
ERROR=_pulsar.LoggerLevel.Error
|
|
16
|
+
WARN=_pulsar.LoggerLevel.Warn
|
|
17
|
+
INFO=_pulsar.LoggerLevel.Info
|
|
18
|
+
DEBUG=_pulsar.LoggerLevel.Debug
|
|
19
|
+
|
|
20
|
+
class BaseClient:
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self, log_level=ERROR,
|
|
24
|
+
subscriber=None,
|
|
25
|
+
input_queue=None,
|
|
26
|
+
output_queue=None,
|
|
27
|
+
input_schema=None,
|
|
28
|
+
output_schema=None,
|
|
29
|
+
pulsar_host="pulsar://pulsar:6650",
|
|
30
|
+
):
|
|
31
|
+
|
|
32
|
+
if input_queue == None: raise RuntimeError("Need input_queue")
|
|
33
|
+
if output_queue == None: raise RuntimeError("Need output_queue")
|
|
34
|
+
if input_schema == None: raise RuntimeError("Need input_schema")
|
|
35
|
+
if output_schema == None: raise RuntimeError("Need output_schema")
|
|
36
|
+
|
|
37
|
+
if subscriber == None:
|
|
38
|
+
subscriber = str(uuid.uuid4())
|
|
39
|
+
|
|
40
|
+
self.client = pulsar.Client(
|
|
41
|
+
pulsar_host,
|
|
42
|
+
logger=pulsar.ConsoleLogger(log_level),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
self.producer = self.client.create_producer(
|
|
46
|
+
topic=input_queue,
|
|
47
|
+
schema=JsonSchema(input_schema),
|
|
48
|
+
chunking_enabled=True,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
self.consumer = self.client.subscribe(
|
|
52
|
+
output_queue, subscriber,
|
|
53
|
+
schema=JsonSchema(output_schema),
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
self.input_schema = input_schema
|
|
57
|
+
self.output_schema = output_schema
|
|
58
|
+
|
|
59
|
+
def call(self, **args):
|
|
60
|
+
|
|
61
|
+
timeout = args.get("timeout", DEFAULT_TIMEOUT)
|
|
62
|
+
|
|
63
|
+
if "timeout" in args:
|
|
64
|
+
del args["timeout"]
|
|
65
|
+
|
|
66
|
+
id = str(uuid.uuid4())
|
|
67
|
+
|
|
68
|
+
r = self.input_schema(**args)
|
|
69
|
+
|
|
70
|
+
end_time = time.time() + timeout
|
|
71
|
+
|
|
72
|
+
self.producer.send(r, properties={ "id": id })
|
|
73
|
+
|
|
74
|
+
while time.time() < end_time:
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
msg = self.consumer.receive(timeout_millis=2500)
|
|
78
|
+
except pulsar.exceptions.Timeout:
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
mid = msg.properties()["id"]
|
|
82
|
+
|
|
83
|
+
if mid == id:
|
|
84
|
+
|
|
85
|
+
value = msg.value()
|
|
86
|
+
|
|
87
|
+
if value.error:
|
|
88
|
+
|
|
89
|
+
self.consumer.acknowledge(msg)
|
|
90
|
+
|
|
91
|
+
if value.error.type == "llm-error":
|
|
92
|
+
raise LlmError(value.error.message)
|
|
93
|
+
|
|
94
|
+
elif value.error.type == "too-many-requests":
|
|
95
|
+
raise TooManyRequests(value.error.message)
|
|
96
|
+
|
|
97
|
+
elif value.error.type == "ParseError":
|
|
98
|
+
raise ParseError(value.error.message)
|
|
99
|
+
|
|
100
|
+
else:
|
|
101
|
+
|
|
102
|
+
raise RuntimeError(
|
|
103
|
+
f"{value.error.type}: {value.error.message}"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
resp = msg.value()
|
|
107
|
+
self.consumer.acknowledge(msg)
|
|
108
|
+
return resp
|
|
109
|
+
|
|
110
|
+
# Ignore messages with wrong ID
|
|
111
|
+
self.consumer.acknowledge(msg)
|
|
112
|
+
|
|
113
|
+
raise TimeoutError("Timed out waiting for response")
|
|
114
|
+
|
|
115
|
+
def __del__(self):
|
|
116
|
+
|
|
117
|
+
if hasattr(self, "consumer"):
|
|
118
|
+
self.consumer.close()
|
|
119
|
+
|
|
120
|
+
if hasattr(self, "producer"):
|
|
121
|
+
self.producer.flush()
|
|
122
|
+
self.producer.close()
|
|
123
|
+
|
|
124
|
+
self.client.close()
|
|
125
|
+
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
|
|
2
|
+
import _pulsar
|
|
3
|
+
|
|
4
|
+
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
|
|
5
|
+
from .. schema import document_embeddings_request_queue
|
|
6
|
+
from .. schema import document_embeddings_response_queue
|
|
7
|
+
from . base import BaseClient
|
|
8
|
+
|
|
9
|
+
# Ugly
|
|
10
|
+
ERROR=_pulsar.LoggerLevel.Error
|
|
11
|
+
WARN=_pulsar.LoggerLevel.Warn
|
|
12
|
+
INFO=_pulsar.LoggerLevel.Info
|
|
13
|
+
DEBUG=_pulsar.LoggerLevel.Debug
|
|
14
|
+
|
|
15
|
+
class DocumentEmbeddingsClient(BaseClient):
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self, log_level=ERROR,
|
|
19
|
+
subscriber=None,
|
|
20
|
+
input_queue=None,
|
|
21
|
+
output_queue=None,
|
|
22
|
+
pulsar_host="pulsar://pulsar:6650",
|
|
23
|
+
):
|
|
24
|
+
|
|
25
|
+
if input_queue == None:
|
|
26
|
+
input_queue = document_embeddings_request_queue
|
|
27
|
+
|
|
28
|
+
if output_queue == None:
|
|
29
|
+
output_queue = document_embeddings_response_queue
|
|
30
|
+
|
|
31
|
+
super(DocumentEmbeddingsClient, self).__init__(
|
|
32
|
+
log_level=log_level,
|
|
33
|
+
subscriber=subscriber,
|
|
34
|
+
input_queue=input_queue,
|
|
35
|
+
output_queue=output_queue,
|
|
36
|
+
pulsar_host=pulsar_host,
|
|
37
|
+
input_schema=DocumentEmbeddingsRequest,
|
|
38
|
+
output_schema=DocumentEmbeddingsResponse,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def request(self, vectors, limit=10, timeout=300):
|
|
42
|
+
return self.call(
|
|
43
|
+
vectors=vectors, limit=limit, timeout=timeout
|
|
44
|
+
).documents
|
|
45
|
+
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
|
|
2
|
+
import _pulsar
|
|
3
|
+
|
|
4
|
+
from .. schema import DocumentRagQuery, DocumentRagResponse
|
|
5
|
+
from .. schema import document_rag_request_queue, document_rag_response_queue
|
|
6
|
+
from . base import BaseClient
|
|
7
|
+
|
|
8
|
+
# Ugly
|
|
9
|
+
ERROR=_pulsar.LoggerLevel.Error
|
|
10
|
+
WARN=_pulsar.LoggerLevel.Warn
|
|
11
|
+
INFO=_pulsar.LoggerLevel.Info
|
|
12
|
+
DEBUG=_pulsar.LoggerLevel.Debug
|
|
13
|
+
|
|
14
|
+
class DocumentRagClient(BaseClient):
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
log_level=ERROR,
|
|
19
|
+
subscriber=None,
|
|
20
|
+
input_queue=None,
|
|
21
|
+
output_queue=None,
|
|
22
|
+
pulsar_host="pulsar://pulsar:6650",
|
|
23
|
+
):
|
|
24
|
+
|
|
25
|
+
if input_queue == None:
|
|
26
|
+
input_queue = document_rag_request_queue
|
|
27
|
+
|
|
28
|
+
if output_queue == None:
|
|
29
|
+
output_queue = document_rag_response_queue
|
|
30
|
+
|
|
31
|
+
super(DocumentRagClient, self).__init__(
|
|
32
|
+
log_level=log_level,
|
|
33
|
+
subscriber=subscriber,
|
|
34
|
+
input_queue=input_queue,
|
|
35
|
+
output_queue=output_queue,
|
|
36
|
+
pulsar_host=pulsar_host,
|
|
37
|
+
input_schema=DocumentRagQuery,
|
|
38
|
+
output_schema=DocumentRagResponse,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def request(self, query, timeout=500):
|
|
42
|
+
|
|
43
|
+
return self.call(
|
|
44
|
+
query=query, timeout=timeout
|
|
45
|
+
).response
|
|
46
|
+
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
|
|
2
|
+
from pulsar.schema import JsonSchema
|
|
3
|
+
from .. schema import EmbeddingsRequest, EmbeddingsResponse
|
|
4
|
+
from .. schema import embeddings_request_queue, embeddings_response_queue
|
|
5
|
+
from . base import BaseClient
|
|
6
|
+
|
|
7
|
+
import _pulsar
|
|
8
|
+
|
|
9
|
+
# Ugly
|
|
10
|
+
ERROR=_pulsar.LoggerLevel.Error
|
|
11
|
+
WARN=_pulsar.LoggerLevel.Warn
|
|
12
|
+
INFO=_pulsar.LoggerLevel.Info
|
|
13
|
+
DEBUG=_pulsar.LoggerLevel.Debug
|
|
14
|
+
|
|
15
|
+
class EmbeddingsClient(BaseClient):
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self, log_level=ERROR,
|
|
19
|
+
input_queue=None,
|
|
20
|
+
output_queue=None,
|
|
21
|
+
subscriber=None,
|
|
22
|
+
pulsar_host="pulsar://pulsar:6650",
|
|
23
|
+
):
|
|
24
|
+
|
|
25
|
+
if input_queue == None:
|
|
26
|
+
input_queue=embeddings_request_queue
|
|
27
|
+
|
|
28
|
+
if output_queue == None:
|
|
29
|
+
output_queue=embeddings_response_queue
|
|
30
|
+
|
|
31
|
+
super(EmbeddingsClient, self).__init__(
|
|
32
|
+
log_level=log_level,
|
|
33
|
+
subscriber=subscriber,
|
|
34
|
+
input_queue=input_queue,
|
|
35
|
+
output_queue=output_queue,
|
|
36
|
+
pulsar_host=pulsar_host,
|
|
37
|
+
input_schema=EmbeddingsRequest,
|
|
38
|
+
output_schema=EmbeddingsResponse,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def request(self, text, timeout=300):
|
|
42
|
+
return self.call(text=text, timeout=timeout).vectors
|
|
43
|
+
|
|
44
|
+
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
|
|
2
|
+
import _pulsar
|
|
3
|
+
|
|
4
|
+
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
|
5
|
+
from .. schema import graph_embeddings_request_queue
|
|
6
|
+
from .. schema import graph_embeddings_response_queue
|
|
7
|
+
from . base import BaseClient
|
|
8
|
+
|
|
9
|
+
# Ugly
|
|
10
|
+
ERROR=_pulsar.LoggerLevel.Error
|
|
11
|
+
WARN=_pulsar.LoggerLevel.Warn
|
|
12
|
+
INFO=_pulsar.LoggerLevel.Info
|
|
13
|
+
DEBUG=_pulsar.LoggerLevel.Debug
|
|
14
|
+
|
|
15
|
+
class GraphEmbeddingsClient(BaseClient):
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self, log_level=ERROR,
|
|
19
|
+
subscriber=None,
|
|
20
|
+
input_queue=None,
|
|
21
|
+
output_queue=None,
|
|
22
|
+
pulsar_host="pulsar://pulsar:6650",
|
|
23
|
+
):
|
|
24
|
+
|
|
25
|
+
if input_queue == None:
|
|
26
|
+
input_queue = graph_embeddings_request_queue
|
|
27
|
+
|
|
28
|
+
if output_queue == None:
|
|
29
|
+
output_queue = graph_embeddings_response_queue
|
|
30
|
+
|
|
31
|
+
super(GraphEmbeddingsClient, self).__init__(
|
|
32
|
+
log_level=log_level,
|
|
33
|
+
subscriber=subscriber,
|
|
34
|
+
input_queue=input_queue,
|
|
35
|
+
output_queue=output_queue,
|
|
36
|
+
pulsar_host=pulsar_host,
|
|
37
|
+
input_schema=GraphEmbeddingsRequest,
|
|
38
|
+
output_schema=GraphEmbeddingsResponse,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def request(self, vectors, limit=10, timeout=300):
|
|
42
|
+
return self.call(
|
|
43
|
+
vectors=vectors, limit=limit, timeout=timeout
|
|
44
|
+
).entities
|
|
45
|
+
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
|
|
2
|
+
import _pulsar
|
|
3
|
+
|
|
4
|
+
from .. schema import GraphRagQuery, GraphRagResponse
|
|
5
|
+
from .. schema import graph_rag_request_queue, graph_rag_response_queue
|
|
6
|
+
from . base import BaseClient
|
|
7
|
+
|
|
8
|
+
# Ugly
|
|
9
|
+
ERROR=_pulsar.LoggerLevel.Error
|
|
10
|
+
WARN=_pulsar.LoggerLevel.Warn
|
|
11
|
+
INFO=_pulsar.LoggerLevel.Info
|
|
12
|
+
DEBUG=_pulsar.LoggerLevel.Debug
|
|
13
|
+
|
|
14
|
+
class GraphRagClient(BaseClient):
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
log_level=ERROR,
|
|
19
|
+
subscriber=None,
|
|
20
|
+
input_queue=None,
|
|
21
|
+
output_queue=None,
|
|
22
|
+
pulsar_host="pulsar://pulsar:6650",
|
|
23
|
+
):
|
|
24
|
+
|
|
25
|
+
if input_queue == None:
|
|
26
|
+
input_queue = graph_rag_request_queue
|
|
27
|
+
|
|
28
|
+
if output_queue == None:
|
|
29
|
+
output_queue = graph_rag_response_queue
|
|
30
|
+
|
|
31
|
+
super(GraphRagClient, self).__init__(
|
|
32
|
+
log_level=log_level,
|
|
33
|
+
subscriber=subscriber,
|
|
34
|
+
input_queue=input_queue,
|
|
35
|
+
output_queue=output_queue,
|
|
36
|
+
pulsar_host=pulsar_host,
|
|
37
|
+
input_schema=GraphRagQuery,
|
|
38
|
+
output_schema=GraphRagResponse,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def request(self, query, timeout=500):
|
|
42
|
+
|
|
43
|
+
return self.call(
|
|
44
|
+
query=query, timeout=timeout
|
|
45
|
+
).response
|
|
46
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
|
|
2
|
+
import _pulsar
|
|
3
|
+
|
|
4
|
+
from .. schema import TextCompletionRequest, TextCompletionResponse
|
|
5
|
+
from .. schema import text_completion_request_queue
|
|
6
|
+
from .. schema import text_completion_response_queue
|
|
7
|
+
from . base import BaseClient
|
|
8
|
+
|
|
9
|
+
# Ugly
|
|
10
|
+
ERROR=_pulsar.LoggerLevel.Error
|
|
11
|
+
WARN=_pulsar.LoggerLevel.Warn
|
|
12
|
+
INFO=_pulsar.LoggerLevel.Info
|
|
13
|
+
DEBUG=_pulsar.LoggerLevel.Debug
|
|
14
|
+
|
|
15
|
+
class LlmClient(BaseClient):
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self, log_level=ERROR,
|
|
19
|
+
subscriber=None,
|
|
20
|
+
input_queue=None,
|
|
21
|
+
output_queue=None,
|
|
22
|
+
pulsar_host="pulsar://pulsar:6650",
|
|
23
|
+
):
|
|
24
|
+
|
|
25
|
+
if input_queue is None: input_queue = text_completion_request_queue
|
|
26
|
+
if output_queue is None: output_queue = text_completion_response_queue
|
|
27
|
+
|
|
28
|
+
super(LlmClient, self).__init__(
|
|
29
|
+
log_level=log_level,
|
|
30
|
+
subscriber=subscriber,
|
|
31
|
+
input_queue=input_queue,
|
|
32
|
+
output_queue=output_queue,
|
|
33
|
+
pulsar_host=pulsar_host,
|
|
34
|
+
input_schema=TextCompletionRequest,
|
|
35
|
+
output_schema=TextCompletionResponse,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def request(self, prompt, timeout=300):
|
|
39
|
+
return self.call(prompt=prompt, timeout=timeout).response
|
|
40
|
+
|