trustgraph-base 0.22.4__tar.gz → 0.22.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/PKG-INFO +2 -2
- trustgraph-base-0.22.6/trustgraph/base/__init__.py +31 -0
- trustgraph-base-0.22.6/trustgraph/base/agent_client.py +39 -0
- trustgraph-base-0.22.6/trustgraph/base/agent_service.py +100 -0
- trustgraph-base-0.22.6/trustgraph/base/async_processor.py +254 -0
- trustgraph-base-0.22.6/trustgraph/base/consumer.py +197 -0
- trustgraph-base-0.22.6/trustgraph/base/consumer_spec.py +36 -0
- trustgraph-base-0.22.6/trustgraph/base/document_embeddings_client.py +38 -0
- trustgraph-base-0.22.6/trustgraph/base/document_embeddings_query_service.py +84 -0
- trustgraph-base-0.22.6/trustgraph/base/document_embeddings_store_service.py +50 -0
- trustgraph-base-0.22.6/trustgraph/base/embeddings_client.py +31 -0
- trustgraph-base-0.22.6/trustgraph/base/embeddings_service.py +90 -0
- trustgraph-base-0.22.6/trustgraph/base/flow.py +32 -0
- trustgraph-base-0.22.6/trustgraph/base/flow_processor.py +115 -0
- trustgraph-base-0.22.6/trustgraph/base/graph_embeddings_client.py +45 -0
- trustgraph-base-0.22.6/trustgraph/base/graph_embeddings_query_service.py +84 -0
- trustgraph-base-0.22.6/trustgraph/base/graph_embeddings_store_service.py +50 -0
- trustgraph-base-0.22.6/trustgraph/base/graph_rag_client.py +33 -0
- trustgraph-base-0.22.6/trustgraph/base/llm_service.py +114 -0
- trustgraph-base-0.22.6/trustgraph/base/metrics.py +82 -0
- trustgraph-base-0.22.6/trustgraph/base/producer.py +69 -0
- trustgraph-base-0.22.6/trustgraph/base/producer_spec.py +25 -0
- trustgraph-base-0.22.6/trustgraph/base/prompt_client.py +93 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/base/publisher.py +23 -19
- trustgraph-base-0.22.6/trustgraph/base/pubsub.py +80 -0
- trustgraph-base-0.22.6/trustgraph/base/request_response_spec.py +136 -0
- trustgraph-base-0.22.6/trustgraph/base/setting_spec.py +19 -0
- trustgraph-base-0.22.6/trustgraph/base/spec.py +4 -0
- trustgraph-base-0.22.6/trustgraph/base/subscriber.py +140 -0
- trustgraph-base-0.22.6/trustgraph/base/subscriber_spec.py +30 -0
- trustgraph-base-0.22.6/trustgraph/base/text_completion_client.py +30 -0
- trustgraph-base-0.22.6/trustgraph/base/triples_client.py +61 -0
- trustgraph-base-0.22.6/trustgraph/base/triples_query_service.py +82 -0
- trustgraph-base-0.22.6/trustgraph/base/triples_store_service.py +47 -0
- trustgraph-base-0.22.6/trustgraph/base_version.py +1 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/agent.py +0 -7
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/config.py +1 -1
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/documents.py +0 -15
- trustgraph-base-0.22.6/trustgraph/schema/flows.py +66 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph_base.egg-info/PKG-INFO +2 -2
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph_base.egg-info/SOURCES.txt +29 -2
- trustgraph-base-0.22.4/trustgraph/base/__init__.py +0 -8
- trustgraph-base-0.22.4/trustgraph/base/base_processor.py +0 -210
- trustgraph-base-0.22.4/trustgraph/base/consumer.py +0 -173
- trustgraph-base-0.22.4/trustgraph/base/consumer_producer.py +0 -62
- trustgraph-base-0.22.4/trustgraph/base/producer.py +0 -56
- trustgraph-base-0.22.4/trustgraph/base/subscriber.py +0 -114
- trustgraph-base-0.22.4/trustgraph/base_version.py +0 -1
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/README.md +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/setup.cfg +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/setup.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/api/__init__.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/api/api.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/clients/__init__.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/clients/agent_client.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/clients/base.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/clients/config_client.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/clients/document_embeddings_client.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/clients/document_rag_client.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/clients/embeddings_client.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/clients/graph_embeddings_client.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/clients/graph_rag_client.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/clients/llm_client.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/clients/prompt_client.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/clients/triples_query_client.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/exceptions.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/knowledge/__init__.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/knowledge/defs.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/knowledge/document.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/knowledge/identifier.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/knowledge/organization.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/knowledge/publication.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/log_level.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/objects/__init__.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/objects/field.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/objects/object.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/rdf.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/__init__.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/graph.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/library.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/lookup.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/metadata.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/models.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/object.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/prompt.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/retrieval.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/topic.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph/schema/types.py +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph_base.egg-info/dependency_links.txt +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph_base.egg-info/requires.txt +0 -0
- {trustgraph-base-0.22.4 → trustgraph-base-0.22.6}/trustgraph_base.egg-info/top_level.txt +0 -0
@@ -1,9 +1,9 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: trustgraph-base
|
3
|
-
Version: 0.22.
|
3
|
+
Version: 0.22.6
|
4
4
|
Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
|
5
5
|
Home-page: https://github.com/trustgraph-ai/trustgraph
|
6
|
-
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.22.
|
6
|
+
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.22.6.tar.gz
|
7
7
|
Author: trustgraph.ai
|
8
8
|
Author-email: security@trustgraph.ai
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
@@ -0,0 +1,31 @@
|
|
1
|
+
|
2
|
+
from . pubsub import PulsarClient
|
3
|
+
from . async_processor import AsyncProcessor
|
4
|
+
from . consumer import Consumer
|
5
|
+
from . producer import Producer
|
6
|
+
from . publisher import Publisher
|
7
|
+
from . subscriber import Subscriber
|
8
|
+
from . metrics import ProcessorMetrics, ConsumerMetrics, ProducerMetrics
|
9
|
+
from . flow_processor import FlowProcessor
|
10
|
+
from . consumer_spec import ConsumerSpec
|
11
|
+
from . setting_spec import SettingSpec
|
12
|
+
from . producer_spec import ProducerSpec
|
13
|
+
from . subscriber_spec import SubscriberSpec
|
14
|
+
from . request_response_spec import RequestResponseSpec
|
15
|
+
from . llm_service import LlmService, LlmResult
|
16
|
+
from . embeddings_service import EmbeddingsService
|
17
|
+
from . embeddings_client import EmbeddingsClientSpec
|
18
|
+
from . text_completion_client import TextCompletionClientSpec
|
19
|
+
from . prompt_client import PromptClientSpec
|
20
|
+
from . triples_store_service import TriplesStoreService
|
21
|
+
from . graph_embeddings_store_service import GraphEmbeddingsStoreService
|
22
|
+
from . document_embeddings_store_service import DocumentEmbeddingsStoreService
|
23
|
+
from . triples_query_service import TriplesQueryService
|
24
|
+
from . graph_embeddings_query_service import GraphEmbeddingsQueryService
|
25
|
+
from . document_embeddings_query_service import DocumentEmbeddingsQueryService
|
26
|
+
from . graph_embeddings_client import GraphEmbeddingsClientSpec
|
27
|
+
from . triples_client import TriplesClientSpec
|
28
|
+
from . document_embeddings_client import DocumentEmbeddingsClientSpec
|
29
|
+
from . agent_service import AgentService
|
30
|
+
from . graph_rag_client import GraphRagClientSpec
|
31
|
+
|
@@ -0,0 +1,39 @@
|
|
1
|
+
|
2
|
+
from . request_response_spec import RequestResponse, RequestResponseSpec
|
3
|
+
from .. schema import AgentRequest, AgentResponse
|
4
|
+
from .. knowledge import Uri, Literal
|
5
|
+
|
6
|
+
class AgentClient(RequestResponse):
|
7
|
+
async def request(self, recipient, question, plan=None, state=None,
|
8
|
+
history=[], timeout=300):
|
9
|
+
|
10
|
+
resp = await self.request(
|
11
|
+
AgentRequest(
|
12
|
+
question = question,
|
13
|
+
plan = plan,
|
14
|
+
state = state,
|
15
|
+
history = history,
|
16
|
+
),
|
17
|
+
recipient=recipient,
|
18
|
+
timeout=timeout,
|
19
|
+
)
|
20
|
+
|
21
|
+
print(resp, flush=True)
|
22
|
+
|
23
|
+
if resp.error:
|
24
|
+
raise RuntimeError(resp.error.message)
|
25
|
+
|
26
|
+
return resp
|
27
|
+
|
28
|
+
class GraphEmbeddingsClientSpec(RequestResponseSpec):
|
29
|
+
def __init__(
|
30
|
+
self, request_name, response_name,
|
31
|
+
):
|
32
|
+
super(GraphEmbeddingsClientSpec, self).__init__(
|
33
|
+
request_name = request_name,
|
34
|
+
request_schema = GraphEmbeddingsRequest,
|
35
|
+
response_name = response_name,
|
36
|
+
response_schema = GraphEmbeddingsResponse,
|
37
|
+
impl = GraphEmbeddingsClient,
|
38
|
+
)
|
39
|
+
|
@@ -0,0 +1,100 @@
|
|
1
|
+
|
2
|
+
"""
|
3
|
+
Agent manager service completion base class
|
4
|
+
"""
|
5
|
+
|
6
|
+
import time
|
7
|
+
from prometheus_client import Histogram
|
8
|
+
|
9
|
+
from .. schema import AgentRequest, AgentResponse, Error
|
10
|
+
from .. exceptions import TooManyRequests
|
11
|
+
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
|
12
|
+
|
13
|
+
default_ident = "agent-manager"
|
14
|
+
|
15
|
+
class AgentService(FlowProcessor):
|
16
|
+
|
17
|
+
def __init__(self, **params):
|
18
|
+
|
19
|
+
id = params.get("id")
|
20
|
+
|
21
|
+
super(AgentService, self).__init__(**params | { "id": id })
|
22
|
+
|
23
|
+
self.register_specification(
|
24
|
+
ConsumerSpec(
|
25
|
+
name = "request",
|
26
|
+
schema = AgentRequest,
|
27
|
+
handler = self.on_request
|
28
|
+
)
|
29
|
+
)
|
30
|
+
|
31
|
+
self.register_specification(
|
32
|
+
ProducerSpec(
|
33
|
+
name = "next",
|
34
|
+
schema = AgentRequest
|
35
|
+
)
|
36
|
+
)
|
37
|
+
|
38
|
+
self.register_specification(
|
39
|
+
ProducerSpec(
|
40
|
+
name = "response",
|
41
|
+
schema = AgentResponse
|
42
|
+
)
|
43
|
+
)
|
44
|
+
|
45
|
+
async def on_request(self, msg, consumer, flow):
|
46
|
+
|
47
|
+
try:
|
48
|
+
|
49
|
+
request = msg.value()
|
50
|
+
|
51
|
+
# Sender-produced ID
|
52
|
+
id = msg.properties()["id"]
|
53
|
+
|
54
|
+
async def respond(resp):
|
55
|
+
|
56
|
+
await flow("response").send(
|
57
|
+
resp,
|
58
|
+
properties={"id": id}
|
59
|
+
)
|
60
|
+
|
61
|
+
async def next(resp):
|
62
|
+
|
63
|
+
await flow("next").send(
|
64
|
+
resp,
|
65
|
+
properties={"id": id}
|
66
|
+
)
|
67
|
+
|
68
|
+
await self.agent_request(
|
69
|
+
request = request, respond = respond, next = next,
|
70
|
+
flow = flow
|
71
|
+
)
|
72
|
+
|
73
|
+
except TooManyRequests as e:
|
74
|
+
raise e
|
75
|
+
|
76
|
+
except Exception as e:
|
77
|
+
|
78
|
+
# Apart from rate limits, treat all exceptions as unrecoverable
|
79
|
+
print(f"on_request Exception: {e}")
|
80
|
+
|
81
|
+
print("Send error response...", flush=True)
|
82
|
+
|
83
|
+
await flow.producer["response"].send(
|
84
|
+
AgentResponse(
|
85
|
+
error=Error(
|
86
|
+
type = "agent-error",
|
87
|
+
message = str(e),
|
88
|
+
),
|
89
|
+
thought = None,
|
90
|
+
observation = None,
|
91
|
+
answer = None,
|
92
|
+
),
|
93
|
+
properties={"id": id}
|
94
|
+
)
|
95
|
+
|
96
|
+
@staticmethod
|
97
|
+
def add_args(parser):
|
98
|
+
|
99
|
+
FlowProcessor.add_args(parser)
|
100
|
+
|
@@ -0,0 +1,254 @@
|
|
1
|
+
|
2
|
+
# Base class for processors. Implements:
|
3
|
+
# - Pulsar client, subscribe and consume basic
|
4
|
+
# - the async startup logic
|
5
|
+
# - Initialising metrics
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
import argparse
|
9
|
+
import _pulsar
|
10
|
+
import time
|
11
|
+
import uuid
|
12
|
+
from prometheus_client import start_http_server, Info
|
13
|
+
|
14
|
+
from .. schema import ConfigPush, config_push_queue
|
15
|
+
from .. log_level import LogLevel
|
16
|
+
from .. exceptions import TooManyRequests
|
17
|
+
from . pubsub import PulsarClient
|
18
|
+
from . producer import Producer
|
19
|
+
from . consumer import Consumer
|
20
|
+
from . metrics import ProcessorMetrics
|
21
|
+
|
22
|
+
default_config_queue = config_push_queue
|
23
|
+
|
24
|
+
# Async processor
|
25
|
+
class AsyncProcessor:
|
26
|
+
|
27
|
+
def __init__(self, **params):
|
28
|
+
|
29
|
+
# Store the identity
|
30
|
+
self.id = params.get("id")
|
31
|
+
|
32
|
+
# Register a pulsar client
|
33
|
+
self.pulsar_client = PulsarClient(**params)
|
34
|
+
|
35
|
+
# Initialise metrics, records the parameters
|
36
|
+
ProcessorMetrics(id=self.id).info({
|
37
|
+
k: str(params[k])
|
38
|
+
for k in params
|
39
|
+
if k != "id"
|
40
|
+
})
|
41
|
+
|
42
|
+
# The processor runs all activity in a taskgroup, it's mandatory
|
43
|
+
# that this is provded
|
44
|
+
self.taskgroup = params.get("taskgroup")
|
45
|
+
if self.taskgroup is None:
|
46
|
+
raise RuntimeError("Essential taskgroup missing")
|
47
|
+
|
48
|
+
# Get the configuration topic
|
49
|
+
self.config_push_queue = params.get(
|
50
|
+
"config_push_queue", default_config_queue
|
51
|
+
)
|
52
|
+
|
53
|
+
# This records registered configuration handlers
|
54
|
+
self.config_handlers = []
|
55
|
+
|
56
|
+
# Create a random ID for this subscription to the configuration
|
57
|
+
# service
|
58
|
+
config_subscriber_id = str(uuid.uuid4())
|
59
|
+
|
60
|
+
# Subscribe to config queue
|
61
|
+
self.config_sub_task = Consumer(
|
62
|
+
|
63
|
+
taskgroup = self.taskgroup,
|
64
|
+
client = self.client,
|
65
|
+
subscriber = config_subscriber_id,
|
66
|
+
flow = None,
|
67
|
+
|
68
|
+
topic = self.config_push_queue,
|
69
|
+
schema = ConfigPush,
|
70
|
+
|
71
|
+
handler = self.on_config_change,
|
72
|
+
|
73
|
+
# This causes new subscriptions to view the entire history of
|
74
|
+
# configuration
|
75
|
+
start_of_messages = True
|
76
|
+
)
|
77
|
+
|
78
|
+
self.running = True
|
79
|
+
|
80
|
+
# This is called to start dynamic behaviour. An over-ride point for
|
81
|
+
# extra functionality
|
82
|
+
async def start(self):
|
83
|
+
await self.config_sub_task.start()
|
84
|
+
|
85
|
+
# This is called to stop all threads. An over-ride point for extra
|
86
|
+
# functionality
|
87
|
+
def stop(self):
|
88
|
+
self.client.close()
|
89
|
+
self.running = False
|
90
|
+
|
91
|
+
# Returns the pulsar host
|
92
|
+
@property
|
93
|
+
def pulsar_host(self): return self.client.pulsar_host
|
94
|
+
|
95
|
+
# Returns the pulsar client
|
96
|
+
@property
|
97
|
+
def client(self): return self.pulsar_client.client
|
98
|
+
|
99
|
+
# Register a new event handler for configuration change
|
100
|
+
def register_config_handler(self, handler):
|
101
|
+
self.config_handlers.append(handler)
|
102
|
+
|
103
|
+
# Called when a new configuration message push occurs
|
104
|
+
async def on_config_change(self, message, consumer):
|
105
|
+
|
106
|
+
# Get configuration data and version number
|
107
|
+
config = message.value().config
|
108
|
+
version = message.value().version
|
109
|
+
|
110
|
+
# Acknowledge the message
|
111
|
+
consumer.acknowledge(message)
|
112
|
+
|
113
|
+
# Invoke message handlers
|
114
|
+
print("Config change event", config, version, flush=True)
|
115
|
+
for ch in self.config_handlers:
|
116
|
+
await ch(config, version)
|
117
|
+
|
118
|
+
# This is the 'main' body of the handler. It is a point to override
|
119
|
+
# if needed. By default does nothing. Processors are implemented
|
120
|
+
# by adding consumer/producer functionality so maybe nothing is needed
|
121
|
+
# in the run() body
|
122
|
+
async def run(self):
|
123
|
+
while self.running:
|
124
|
+
await asyncio.sleep(2)
|
125
|
+
|
126
|
+
# Startup fabric. This runs in 'async' mode, creates a taskgroup and
|
127
|
+
# runs the producer.
|
128
|
+
@classmethod
|
129
|
+
async def launch_async(cls, args):
|
130
|
+
|
131
|
+
try:
|
132
|
+
|
133
|
+
# Create a taskgroup. This seems complicated, when an exception
|
134
|
+
# occurs, unhandled it looks like it cancels all threads in the
|
135
|
+
# taskgroup. Needs the exception to be caught in the right
|
136
|
+
# place.
|
137
|
+
async with asyncio.TaskGroup() as tg:
|
138
|
+
|
139
|
+
|
140
|
+
# Create a processor instance, and include the taskgroup
|
141
|
+
# as a paramter. A processor identity ident is used as
|
142
|
+
# - subscriber name
|
143
|
+
# - an identifier for flow configuration
|
144
|
+
p = cls(**args | { "taskgroup": tg })
|
145
|
+
|
146
|
+
# Start the processor
|
147
|
+
await p.start()
|
148
|
+
|
149
|
+
# Run the processor
|
150
|
+
task = tg.create_task(p.run())
|
151
|
+
|
152
|
+
# The taskgroup causes everything to wait until
|
153
|
+
# all threads have stopped
|
154
|
+
|
155
|
+
# This is here to output a debug message, shouldn't be needed.
|
156
|
+
except Exception as e:
|
157
|
+
print("Exception, closing taskgroup", flush=True)
|
158
|
+
raise e
|
159
|
+
|
160
|
+
# Startup fabric. launch calls launch_async in async mode.
|
161
|
+
@classmethod
|
162
|
+
def launch(cls, ident, doc):
|
163
|
+
|
164
|
+
# Start assembling CLI arguments
|
165
|
+
parser = argparse.ArgumentParser(
|
166
|
+
prog=ident,
|
167
|
+
description=doc
|
168
|
+
)
|
169
|
+
|
170
|
+
parser.add_argument(
|
171
|
+
'--id',
|
172
|
+
default=ident,
|
173
|
+
help=f'Configuration identity (default: {ident})',
|
174
|
+
)
|
175
|
+
|
176
|
+
# Invoke the class-specific add_args, which manages adding all the
|
177
|
+
# command-line arguments
|
178
|
+
cls.add_args(parser)
|
179
|
+
|
180
|
+
# Parse arguments
|
181
|
+
args = parser.parse_args()
|
182
|
+
args = vars(args)
|
183
|
+
|
184
|
+
# Debug
|
185
|
+
print(args, flush=True)
|
186
|
+
|
187
|
+
# Start the Prometheus metrics service if needed
|
188
|
+
if args["metrics"]:
|
189
|
+
start_http_server(args["metrics_port"])
|
190
|
+
|
191
|
+
# Loop forever, exception handler
|
192
|
+
while True:
|
193
|
+
|
194
|
+
print("Starting...", flush=True)
|
195
|
+
|
196
|
+
try:
|
197
|
+
|
198
|
+
# Launch the processor in an asyncio handler
|
199
|
+
asyncio.run(cls.launch_async(
|
200
|
+
args
|
201
|
+
))
|
202
|
+
|
203
|
+
except KeyboardInterrupt:
|
204
|
+
print("Keyboard interrupt.", flush=True)
|
205
|
+
return
|
206
|
+
|
207
|
+
except _pulsar.Interrupted:
|
208
|
+
print("Pulsar Interrupted.", flush=True)
|
209
|
+
return
|
210
|
+
|
211
|
+
# Exceptions from a taskgroup come in as an exception group
|
212
|
+
except ExceptionGroup as e:
|
213
|
+
|
214
|
+
print("Exception group:", flush=True)
|
215
|
+
|
216
|
+
for se in e.exceptions:
|
217
|
+
print(" Type:", type(se), flush=True)
|
218
|
+
print(f" Exception: {se}", flush=True)
|
219
|
+
|
220
|
+
except Exception as e:
|
221
|
+
print("Type:", type(e), flush=True)
|
222
|
+
print("Exception:", e, flush=True)
|
223
|
+
|
224
|
+
# Retry occurs here
|
225
|
+
print("Will retry...", flush=True)
|
226
|
+
time.sleep(4)
|
227
|
+
print("Retrying...", flush=True)
|
228
|
+
|
229
|
+
# The command-line arguments are built using a stack of add_args
|
230
|
+
# invocations
|
231
|
+
@staticmethod
|
232
|
+
def add_args(parser):
|
233
|
+
|
234
|
+
PulsarClient.add_args(parser)
|
235
|
+
|
236
|
+
parser.add_argument(
|
237
|
+
'--config-push-queue',
|
238
|
+
default=default_config_queue,
|
239
|
+
help=f'Config push queue {default_config_queue}',
|
240
|
+
)
|
241
|
+
|
242
|
+
parser.add_argument(
|
243
|
+
'--metrics',
|
244
|
+
action=argparse.BooleanOptionalAction,
|
245
|
+
default=True,
|
246
|
+
help=f'Metrics enabled (default: true)',
|
247
|
+
)
|
248
|
+
|
249
|
+
parser.add_argument(
|
250
|
+
'-P', '--metrics-port',
|
251
|
+
type=int,
|
252
|
+
default=8000,
|
253
|
+
help=f'Pulsar host (default: 8000)',
|
254
|
+
)
|
@@ -0,0 +1,197 @@
|
|
1
|
+
|
2
|
+
from pulsar.schema import JsonSchema
|
3
|
+
import pulsar
|
4
|
+
import _pulsar
|
5
|
+
import asyncio
|
6
|
+
import time
|
7
|
+
|
8
|
+
from .. exceptions import TooManyRequests
|
9
|
+
|
10
|
+
class Consumer:
|
11
|
+
|
12
|
+
def __init__(
|
13
|
+
self, taskgroup, flow, client, topic, subscriber, schema,
|
14
|
+
handler,
|
15
|
+
metrics = None,
|
16
|
+
start_of_messages=False,
|
17
|
+
rate_limit_retry_time = 10, rate_limit_timeout = 7200,
|
18
|
+
reconnect_time = 5,
|
19
|
+
):
|
20
|
+
|
21
|
+
self.taskgroup = taskgroup
|
22
|
+
self.flow = flow
|
23
|
+
self.client = client
|
24
|
+
self.topic = topic
|
25
|
+
self.subscriber = subscriber
|
26
|
+
self.schema = schema
|
27
|
+
self.handler = handler
|
28
|
+
|
29
|
+
self.rate_limit_retry_time = rate_limit_retry_time
|
30
|
+
self.rate_limit_timeout = rate_limit_timeout
|
31
|
+
|
32
|
+
self.reconnect_time = 5
|
33
|
+
|
34
|
+
self.start_of_messages = start_of_messages
|
35
|
+
|
36
|
+
self.running = True
|
37
|
+
self.task = None
|
38
|
+
|
39
|
+
self.metrics = metrics
|
40
|
+
|
41
|
+
self.consumer = None
|
42
|
+
|
43
|
+
def __del__(self):
|
44
|
+
self.running = False
|
45
|
+
|
46
|
+
if hasattr(self, "consumer"):
|
47
|
+
if self.consumer:
|
48
|
+
self.consumer.close()
|
49
|
+
|
50
|
+
async def stop(self):
|
51
|
+
|
52
|
+
self.running = False
|
53
|
+
await self.task
|
54
|
+
|
55
|
+
async def start(self):
|
56
|
+
|
57
|
+
self.running = True
|
58
|
+
|
59
|
+
# Puts it in the stopped state, the run thread should set running
|
60
|
+
if self.metrics:
|
61
|
+
self.metrics.state("stopped")
|
62
|
+
|
63
|
+
self.task = self.taskgroup.create_task(self.run())
|
64
|
+
|
65
|
+
async def run(self):
|
66
|
+
|
67
|
+
while self.running:
|
68
|
+
|
69
|
+
if self.metrics:
|
70
|
+
self.metrics.state("stopped")
|
71
|
+
|
72
|
+
try:
|
73
|
+
|
74
|
+
print(self.topic, "subscribing...", flush=True)
|
75
|
+
|
76
|
+
if self.start_of_messages:
|
77
|
+
pos = pulsar.InitialPosition.Earliest
|
78
|
+
else:
|
79
|
+
pos = pulsar.InitialPosition.Latest
|
80
|
+
|
81
|
+
self.consumer = await asyncio.to_thread(
|
82
|
+
self.client.subscribe,
|
83
|
+
topic = self.topic,
|
84
|
+
subscription_name = self.subscriber,
|
85
|
+
schema = JsonSchema(self.schema),
|
86
|
+
initial_position = pos,
|
87
|
+
consumer_type = pulsar.ConsumerType.Shared,
|
88
|
+
)
|
89
|
+
|
90
|
+
except Exception as e:
|
91
|
+
|
92
|
+
print("consumer subs Exception:", e, flush=True)
|
93
|
+
await asyncio.sleep(self.reconnect_time)
|
94
|
+
continue
|
95
|
+
|
96
|
+
print(self.topic, "subscribed", flush=True)
|
97
|
+
|
98
|
+
if self.metrics:
|
99
|
+
self.metrics.state("running")
|
100
|
+
|
101
|
+
try:
|
102
|
+
|
103
|
+
await self.consume()
|
104
|
+
|
105
|
+
if self.metrics:
|
106
|
+
self.metrics.state("stopped")
|
107
|
+
|
108
|
+
except Exception as e:
|
109
|
+
|
110
|
+
print("consumer loop exception:", e, flush=True)
|
111
|
+
self.consumer.close()
|
112
|
+
self.consumer = None
|
113
|
+
await asyncio.sleep(self.reconnect_time)
|
114
|
+
continue
|
115
|
+
|
116
|
+
async def consume(self):
|
117
|
+
|
118
|
+
while self.running:
|
119
|
+
|
120
|
+
try:
|
121
|
+
msg = await asyncio.to_thread(
|
122
|
+
self.consumer.receive,
|
123
|
+
timeout_millis=2000
|
124
|
+
)
|
125
|
+
except _pulsar.Timeout:
|
126
|
+
continue
|
127
|
+
except Exception as e:
|
128
|
+
raise e
|
129
|
+
|
130
|
+
expiry = time.time() + self.rate_limit_timeout
|
131
|
+
|
132
|
+
# This loop is for retry on rate-limit / resource limits
|
133
|
+
while self.running:
|
134
|
+
|
135
|
+
if time.time() > expiry:
|
136
|
+
|
137
|
+
print("Gave up waiting for rate-limit retry", flush=True)
|
138
|
+
|
139
|
+
# Message failed to be processed, this causes it to
|
140
|
+
# be retried
|
141
|
+
self.consumer.negative_acknowledge(msg)
|
142
|
+
|
143
|
+
if self.metrics:
|
144
|
+
self.metrics.process("error")
|
145
|
+
|
146
|
+
# Break out of retry loop, processes next message
|
147
|
+
break
|
148
|
+
|
149
|
+
try:
|
150
|
+
|
151
|
+
print("Handle...", flush=True)
|
152
|
+
|
153
|
+
if self.metrics:
|
154
|
+
|
155
|
+
with self.metrics.record_time():
|
156
|
+
await self.handler(msg, self, self.flow)
|
157
|
+
|
158
|
+
else:
|
159
|
+
await self.handler(msg, self.consumer)
|
160
|
+
|
161
|
+
print("Handled.", flush=True)
|
162
|
+
|
163
|
+
# Acknowledge successful processing of the message
|
164
|
+
self.consumer.acknowledge(msg)
|
165
|
+
|
166
|
+
if self.metrics:
|
167
|
+
self.metrics.process("success")
|
168
|
+
|
169
|
+
# Break out of retry loop
|
170
|
+
break
|
171
|
+
|
172
|
+
except TooManyRequests:
|
173
|
+
|
174
|
+
print("TooManyRequests: will retry...", flush=True)
|
175
|
+
|
176
|
+
if self.metrics:
|
177
|
+
self.metrics.rate_limit()
|
178
|
+
|
179
|
+
# Sleep
|
180
|
+
await asyncio.sleep(self.rate_limit_retry_time)
|
181
|
+
|
182
|
+
# Contine from retry loop, just causes a reprocessing
|
183
|
+
continue
|
184
|
+
|
185
|
+
except Exception as e:
|
186
|
+
|
187
|
+
print("consume exception:", e, flush=True)
|
188
|
+
|
189
|
+
# Message failed to be processed, this causes it to
|
190
|
+
# be retried
|
191
|
+
self.consumer.negative_acknowledge(msg)
|
192
|
+
|
193
|
+
if self.metrics:
|
194
|
+
self.metrics.process("error")
|
195
|
+
|
196
|
+
# Break out of retry loop, processes next message
|
197
|
+
break
|
@@ -0,0 +1,36 @@
|
|
1
|
+
|
2
|
+
from . metrics import ConsumerMetrics
|
3
|
+
from . consumer import Consumer
|
4
|
+
from . spec import Spec
|
5
|
+
|
6
|
+
class ConsumerSpec(Spec):
|
7
|
+
def __init__(self, name, schema, handler):
|
8
|
+
self.name = name
|
9
|
+
self.schema = schema
|
10
|
+
self.handler = handler
|
11
|
+
|
12
|
+
def add(self, flow, processor, definition):
|
13
|
+
|
14
|
+
consumer_metrics = ConsumerMetrics(
|
15
|
+
flow.id, f"{flow.name}-{self.name}"
|
16
|
+
)
|
17
|
+
|
18
|
+
consumer = Consumer(
|
19
|
+
taskgroup = processor.taskgroup,
|
20
|
+
flow = flow,
|
21
|
+
client = processor.client,
|
22
|
+
topic = definition[self.name],
|
23
|
+
subscriber = processor.id + "--" + self.name,
|
24
|
+
schema = self.schema,
|
25
|
+
handler = self.handler,
|
26
|
+
metrics = consumer_metrics,
|
27
|
+
)
|
28
|
+
|
29
|
+
# Consumer handle gets access to producers and other
|
30
|
+
# metadata
|
31
|
+
consumer.id = flow.id
|
32
|
+
consumer.name = self.name
|
33
|
+
consumer.flow = flow
|
34
|
+
|
35
|
+
flow.consumer[self.name] = consumer
|
36
|
+
|