trustgraph 0.4.1__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of trustgraph might be problematic. Click here for more details.

Files changed (114) hide show
  1. {trustgraph-0.4.1 → trustgraph-0.5.1}/PKG-INFO +2 -2
  2. trustgraph-0.5.1/scripts/chunker-recursive +6 -0
  3. trustgraph-0.5.1/scripts/ge-write-milvus +6 -0
  4. trustgraph-0.5.1/scripts/graph-rag +6 -0
  5. trustgraph-0.5.1/scripts/pdf-decoder +6 -0
  6. trustgraph-0.5.1/scripts/text-completion-azure +6 -0
  7. trustgraph-0.5.1/scripts/text-completion-claude +6 -0
  8. trustgraph-0.5.1/scripts/text-completion-ollama +6 -0
  9. trustgraph-0.5.1/scripts/text-completion-vertexai +6 -0
  10. trustgraph-0.5.1/scripts/triples-write-cassandra +6 -0
  11. {trustgraph-0.4.1 → trustgraph-0.5.1}/setup.py +7 -7
  12. trustgraph-0.5.1/trustgraph/base/__init__.py +6 -0
  13. trustgraph-0.5.1/trustgraph/base/base_processor.py +117 -0
  14. trustgraph-0.5.1/trustgraph/base/consumer.py +87 -0
  15. trustgraph-0.5.1/trustgraph/base/consumer_producer.py +168 -0
  16. trustgraph-0.5.1/trustgraph/base/producer.py +55 -0
  17. {trustgraph-0.4.1/trustgraph/chunker → trustgraph-0.5.1/trustgraph/chunking}/recursive/chunker.py +7 -4
  18. {trustgraph-0.4.1/trustgraph/decoder → trustgraph-0.5.1/trustgraph/decoding}/pdf/pdf_decoder.py +7 -4
  19. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/embeddings/hf/hf.py +7 -4
  20. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/embeddings/ollama/processor.py +7 -4
  21. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/embeddings/vectorize/vectorize.py +10 -7
  22. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/embeddings_client.py +4 -3
  23. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/graph_rag_client.py +5 -3
  24. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/kg/extract_definitions/extract.py +11 -8
  25. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/kg/extract_relationships/extract.py +14 -11
  26. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/llm_client.py +6 -3
  27. {trustgraph-0.4.1/trustgraph/llm/azure_text → trustgraph-0.5.1/trustgraph/model/text_completion/azure}/llm.py +11 -7
  28. {trustgraph-0.4.1/trustgraph/llm/claude_text → trustgraph-0.5.1/trustgraph/model/text_completion/claude}/llm.py +11 -7
  29. {trustgraph-0.4.1/trustgraph/llm/ollama_text → trustgraph-0.5.1/trustgraph/model/text_completion/ollama}/llm.py +11 -7
  30. {trustgraph-0.4.1/trustgraph/llm/vertexai_text → trustgraph-0.5.1/trustgraph/model/text_completion/vertexai}/llm.py +11 -7
  31. {trustgraph-0.4.1/trustgraph/rag/graph → trustgraph-0.5.1/trustgraph/retrieval/graph_rag}/rag.py +7 -4
  32. trustgraph-0.5.1/trustgraph/schema.py +125 -0
  33. trustgraph-0.5.1/trustgraph/storage/graph_embeddings/__init__.py +0 -0
  34. {trustgraph-0.4.1/trustgraph/vector/milvus_write → trustgraph-0.5.1/trustgraph/storage/graph_embeddings/milvus}/write.py +11 -9
  35. trustgraph-0.5.1/trustgraph/storage/triples/__init__.py +0 -0
  36. {trustgraph-0.4.1/trustgraph/graph/cassandra_write → trustgraph-0.5.1/trustgraph/storage/triples/cassandra}/write.py +10 -7
  37. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph.egg-info/PKG-INFO +2 -2
  38. trustgraph-0.5.1/trustgraph.egg-info/SOURCES.txt +99 -0
  39. trustgraph-0.4.1/scripts/chunker-recursive +0 -6
  40. trustgraph-0.4.1/scripts/graph-rag +0 -6
  41. trustgraph-0.4.1/scripts/graph-write-cassandra +0 -6
  42. trustgraph-0.4.1/scripts/llm-azure-text +0 -6
  43. trustgraph-0.4.1/scripts/llm-claude-text +0 -6
  44. trustgraph-0.4.1/scripts/llm-ollama-text +0 -6
  45. trustgraph-0.4.1/scripts/llm-vertexai-text +0 -6
  46. trustgraph-0.4.1/scripts/pdf-decoder +0 -6
  47. trustgraph-0.4.1/scripts/vector-write-milvus +0 -6
  48. trustgraph-0.4.1/trustgraph/base/processor.py +0 -360
  49. trustgraph-0.4.1/trustgraph/embeddings/ollama/__init__.py +0 -3
  50. trustgraph-0.4.1/trustgraph/schema.py +0 -67
  51. trustgraph-0.4.1/trustgraph.egg-info/SOURCES.txt +0 -94
  52. {trustgraph-0.4.1 → trustgraph-0.5.1}/LICENSE +0 -0
  53. {trustgraph-0.4.1 → trustgraph-0.5.1}/README.md +0 -0
  54. {trustgraph-0.4.1 → trustgraph-0.5.1}/scripts/embeddings-hf +0 -0
  55. {trustgraph-0.4.1 → trustgraph-0.5.1}/scripts/embeddings-ollama +0 -0
  56. {trustgraph-0.4.1 → trustgraph-0.5.1}/scripts/embeddings-vectorize +0 -0
  57. {trustgraph-0.4.1 → trustgraph-0.5.1}/scripts/graph-show +0 -0
  58. {trustgraph-0.4.1 → trustgraph-0.5.1}/scripts/graph-to-turtle +0 -0
  59. {trustgraph-0.4.1 → trustgraph-0.5.1}/scripts/init-pulsar-manager +0 -0
  60. {trustgraph-0.4.1 → trustgraph-0.5.1}/scripts/kg-extract-definitions +0 -0
  61. {trustgraph-0.4.1 → trustgraph-0.5.1}/scripts/kg-extract-relationships +0 -0
  62. {trustgraph-0.4.1 → trustgraph-0.5.1}/scripts/loader +0 -0
  63. {trustgraph-0.4.1 → trustgraph-0.5.1}/scripts/query +0 -0
  64. {trustgraph-0.4.1 → trustgraph-0.5.1}/scripts/run-processing +0 -0
  65. {trustgraph-0.4.1 → trustgraph-0.5.1}/setup.cfg +0 -0
  66. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/__init__.py +0 -0
  67. {trustgraph-0.4.1/trustgraph/chunker → trustgraph-0.5.1/trustgraph/chunking}/__init__.py +0 -0
  68. {trustgraph-0.4.1/trustgraph/chunker → trustgraph-0.5.1/trustgraph/chunking}/recursive/__init__.py +0 -0
  69. {trustgraph-0.4.1/trustgraph/chunker → trustgraph-0.5.1/trustgraph/chunking}/recursive/__main__.py +0 -0
  70. {trustgraph-0.4.1/trustgraph/decoder → trustgraph-0.5.1/trustgraph/decoding}/__init__.py +0 -0
  71. {trustgraph-0.4.1/trustgraph/decoder → trustgraph-0.5.1/trustgraph/decoding}/pdf/__init__.py +0 -0
  72. {trustgraph-0.4.1/trustgraph/decoder → trustgraph-0.5.1/trustgraph/decoding}/pdf/__main__.py +0 -0
  73. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/embeddings/__init__.py +0 -0
  74. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/embeddings/hf/__init__.py +0 -0
  75. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/embeddings/hf/__main__.py +0 -0
  76. {trustgraph-0.4.1/trustgraph/base → trustgraph-0.5.1/trustgraph/embeddings/ollama}/__init__.py +0 -0
  77. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/embeddings/ollama/__main__.py +0 -0
  78. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/embeddings/vectorize/__init__.py +0 -0
  79. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/embeddings/vectorize/__main__.py +0 -0
  80. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/graph_rag.py +0 -0
  81. {trustgraph-0.4.1/trustgraph/graph → trustgraph-0.5.1/trustgraph/kg}/__init__.py +0 -0
  82. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/kg/extract_definitions/__init__.py +0 -0
  83. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/kg/extract_definitions/__main__.py +0 -0
  84. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/kg/extract_relationships/__init__.py +0 -0
  85. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/kg/extract_relationships/__main__.py +0 -0
  86. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/log_level.py +0 -0
  87. {trustgraph-0.4.1/trustgraph/kg → trustgraph-0.5.1/trustgraph/model}/__init__.py +0 -0
  88. {trustgraph-0.4.1/trustgraph/llm → trustgraph-0.5.1/trustgraph/model/text_completion}/__init__.py +0 -0
  89. {trustgraph-0.4.1/trustgraph/llm/azure_text → trustgraph-0.5.1/trustgraph/model/text_completion/azure}/__init__.py +0 -0
  90. {trustgraph-0.4.1/trustgraph/llm/azure_text → trustgraph-0.5.1/trustgraph/model/text_completion/azure}/__main__.py +0 -0
  91. {trustgraph-0.4.1/trustgraph/llm/claude_text → trustgraph-0.5.1/trustgraph/model/text_completion/claude}/__init__.py +0 -0
  92. {trustgraph-0.4.1/trustgraph/llm/claude_text → trustgraph-0.5.1/trustgraph/model/text_completion/claude}/__main__.py +0 -0
  93. {trustgraph-0.4.1/trustgraph/llm/ollama_text → trustgraph-0.5.1/trustgraph/model/text_completion/ollama}/__init__.py +0 -0
  94. {trustgraph-0.4.1/trustgraph/llm/ollama_text → trustgraph-0.5.1/trustgraph/model/text_completion/ollama}/__main__.py +0 -0
  95. {trustgraph-0.4.1/trustgraph/llm/vertexai_text → trustgraph-0.5.1/trustgraph/model/text_completion/vertexai}/__init__.py +0 -0
  96. {trustgraph-0.4.1/trustgraph/llm/vertexai_text → trustgraph-0.5.1/trustgraph/model/text_completion/vertexai}/__main__.py +0 -0
  97. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/processing/__init__.py +0 -0
  98. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/processing/__main__.py +0 -0
  99. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/processing/processing.py +0 -0
  100. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/prompts.py +0 -0
  101. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/rdf.py +0 -0
  102. {trustgraph-0.4.1/trustgraph/rag → trustgraph-0.5.1/trustgraph/retrieval}/__init__.py +0 -0
  103. {trustgraph-0.4.1/trustgraph/rag/graph → trustgraph-0.5.1/trustgraph/retrieval/graph_rag}/__init__.py +0 -0
  104. {trustgraph-0.4.1/trustgraph/rag/graph → trustgraph-0.5.1/trustgraph/retrieval/graph_rag}/__main__.py +0 -0
  105. {trustgraph-0.4.1/trustgraph/vector → trustgraph-0.5.1/trustgraph/storage}/__init__.py +0 -0
  106. {trustgraph-0.4.1/trustgraph/graph/cassandra_write → trustgraph-0.5.1/trustgraph/storage/graph_embeddings/milvus}/__init__.py +0 -0
  107. {trustgraph-0.4.1/trustgraph/graph/cassandra_write → trustgraph-0.5.1/trustgraph/storage/graph_embeddings/milvus}/__main__.py +0 -0
  108. {trustgraph-0.4.1/trustgraph/vector/milvus_write → trustgraph-0.5.1/trustgraph/storage/triples/cassandra}/__init__.py +0 -0
  109. {trustgraph-0.4.1/trustgraph/vector/milvus_write → trustgraph-0.5.1/trustgraph/storage/triples/cassandra}/__main__.py +0 -0
  110. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/triple_vectors.py +0 -0
  111. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph/trustgraph.py +0 -0
  112. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph.egg-info/dependency_links.txt +0 -0
  113. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph.egg-info/requires.txt +0 -0
  114. {trustgraph-0.4.1 → trustgraph-0.5.1}/trustgraph.egg-info/top_level.txt +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: trustgraph
3
- Version: 0.4.1
3
+ Version: 0.5.1
4
4
  Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
5
5
  Home-page: https://github.com/trustgraph-ai/trustgraph
6
- Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.4.1.tar.gz
6
+ Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.5.1.tar.gz
7
7
  Author: trustgraph.ai
8
8
  Author-email: security@trustgraph.ai
9
9
  Classifier: Programming Language :: Python :: 3
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from trustgraph.chunking.recursive import run
4
+
5
+ run()
6
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from trustgraph.storage.graph_embeddings.milvus import run
4
+
5
+ run()
6
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from trustgraph.retrieval.graph_rag import run
4
+
5
+ run()
6
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from trustgraph.decoding.pdf import run
4
+
5
+ run()
6
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from trustgraph.model.text_completion.azure import run
4
+
5
+ run()
6
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from trustgraph.model.text_completion.claude import run
4
+
5
+ run()
6
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from trustgraph.model.text_completion.ollama import run
4
+
5
+ run()
6
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from trustgraph.model.text_completion.vertexai import run
4
+
5
+ run()
6
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from trustgraph.storage.triples.cassandra import run
4
+
5
+ run()
6
+
@@ -4,7 +4,7 @@ import os
4
4
  with open("README.md", "r") as fh:
5
5
  long_description = fh.read()
6
6
 
7
- version = "0.4.1"
7
+ version = "0.5.1"
8
8
 
9
9
  setuptools.setup(
10
10
  name="trustgraph",
@@ -50,21 +50,21 @@ setuptools.setup(
50
50
  "scripts/embeddings-hf",
51
51
  "scripts/embeddings-ollama",
52
52
  "scripts/embeddings-vectorize",
53
+ "scripts/ge-write-milvus",
53
54
  "scripts/graph-rag",
54
55
  "scripts/graph-show",
55
56
  "scripts/graph-to-turtle",
56
- "scripts/graph-write-cassandra",
57
57
  "scripts/init-pulsar-manager",
58
58
  "scripts/kg-extract-definitions",
59
59
  "scripts/kg-extract-relationships",
60
- "scripts/llm-azure-text",
61
- "scripts/llm-claude-text",
62
- "scripts/llm-ollama-text",
63
- "scripts/llm-vertexai-text",
64
60
  "scripts/loader",
65
61
  "scripts/pdf-decoder",
66
62
  "scripts/query",
67
63
  "scripts/run-processing",
68
- "scripts/vector-write-milvus",
64
+ "scripts/text-completion-azure",
65
+ "scripts/text-completion-claude",
66
+ "scripts/text-completion-ollama",
67
+ "scripts/text-completion-vertexai",
68
+ "scripts/triples-write-cassandra",
69
69
  ]
70
70
  )
@@ -0,0 +1,6 @@
1
+
2
+ from . base_processor import BaseProcessor
3
+ from . consumer import Consumer
4
+ from . producer import Producer
5
+ from . consumer_producer import ConsumerProducer
6
+
@@ -0,0 +1,117 @@
1
+
2
+ import os
3
+ import argparse
4
+ import pulsar
5
+ import _pulsar
6
+ import time
7
+ from prometheus_client import start_http_server, Info
8
+
9
+ from .. log_level import LogLevel
10
+
11
+ class BaseProcessor:
12
+
13
+ default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
14
+
15
+ def __init__(self, **params):
16
+
17
+ self.client = None
18
+
19
+ if not hasattr(__class__, "params_metric"):
20
+ __class__.params_metric = Info(
21
+ 'params', 'Parameters configuration'
22
+ )
23
+
24
+ # FIXME: Maybe outputs information it should not
25
+ __class__.params_metric.info({
26
+ k: str(params[k])
27
+ for k in params
28
+ })
29
+
30
+ pulsar_host = params.get("pulsar_host", self.default_pulsar_host)
31
+ log_level = params.get("log_level", LogLevel.INFO)
32
+
33
+ self.pulsar_host = pulsar_host
34
+
35
+ self.client = pulsar.Client(
36
+ pulsar_host,
37
+ logger=pulsar.ConsoleLogger(log_level.to_pulsar())
38
+ )
39
+
40
+ def __del__(self):
41
+
42
+ if self.client:
43
+ self.client.close()
44
+
45
+ @staticmethod
46
+ def add_args(parser):
47
+
48
+ parser.add_argument(
49
+ '-p', '--pulsar-host',
50
+ default=__class__.default_pulsar_host,
51
+ help=f'Pulsar host (default: {__class__.default_pulsar_host})',
52
+ )
53
+
54
+ parser.add_argument(
55
+ '-l', '--log-level',
56
+ type=LogLevel,
57
+ default=LogLevel.INFO,
58
+ choices=list(LogLevel),
59
+ help=f'Output queue (default: info)'
60
+ )
61
+
62
+ parser.add_argument(
63
+ '-M', '--metrics-enabled',
64
+ type=bool,
65
+ default=True,
66
+ help=f'Pulsar host (default: true)',
67
+ )
68
+
69
+ parser.add_argument(
70
+ '-P', '--metrics-port',
71
+ type=int,
72
+ default=8000,
73
+ help=f'Pulsar host (default: 8000)',
74
+ )
75
+
76
+ def run(self):
77
+ raise RuntimeError("Something should have implemented the run method")
78
+
79
+ @classmethod
80
+ def start(cls, prog, doc):
81
+
82
+ while True:
83
+
84
+ parser = argparse.ArgumentParser(
85
+ prog=prog,
86
+ description=doc
87
+ )
88
+
89
+ cls.add_args(parser)
90
+
91
+ args = parser.parse_args()
92
+ args = vars(args)
93
+
94
+ if args["metrics_enabled"]:
95
+ start_http_server(args["metrics_port"])
96
+
97
+ try:
98
+
99
+ p = cls(**args)
100
+ p.run()
101
+
102
+ except KeyboardInterrupt:
103
+ print("Keyboard interrupt.")
104
+ return
105
+
106
+ except _pulsar.Interrupted:
107
+ print("Pulsar Interrupted.")
108
+ return
109
+
110
+ except Exception as e:
111
+
112
+ print(type(e))
113
+
114
+ print("Exception:", e, flush=True)
115
+ print("Will retry...", flush=True)
116
+
117
+ time.sleep(10)
@@ -0,0 +1,87 @@
1
+
2
+ from pulsar.schema import JsonSchema
3
+ from prometheus_client import start_http_server, Histogram, Info, Counter
4
+
5
+ from . base_processor import BaseProcessor
6
+
7
+ class Consumer(BaseProcessor):
8
+
9
+ def __init__(self, **params):
10
+
11
+ super(Consumer, self).__init__(**params)
12
+
13
+ input_queue = params.get("input_queue")
14
+ subscriber = params.get("subscriber")
15
+ input_schema = params.get("input_schema")
16
+
17
+ if input_schema == None:
18
+ raise RuntimeError("input_schema must be specified")
19
+
20
+ if not hasattr(__class__, "request_metric"):
21
+ __class__.request_metric = Histogram(
22
+ 'request_latency', 'Request latency (seconds)'
23
+ )
24
+
25
+ if not hasattr(__class__, "pubsub_metric"):
26
+ __class__.pubsub_metric = Info(
27
+ 'pubsub', 'Pub/sub configuration'
28
+ )
29
+
30
+ if not hasattr(__class__, "processing_metric"):
31
+ __class__.processing_metric = Counter(
32
+ 'processing_count', 'Processing count', ["status"]
33
+ )
34
+
35
+ __class__.pubsub_metric.info({
36
+ "input_queue": input_queue,
37
+ "subscriber": subscriber,
38
+ "input_schema": input_schema.__name__,
39
+ })
40
+
41
+ self.consumer = self.client.subscribe(
42
+ input_queue, subscriber,
43
+ schema=JsonSchema(input_schema),
44
+ )
45
+
46
+ def run(self):
47
+
48
+ while True:
49
+
50
+ msg = self.consumer.receive()
51
+
52
+ try:
53
+
54
+ with __class__.request_metric.time():
55
+ self.handle(msg)
56
+
57
+ # Acknowledge successful processing of the message
58
+ self.consumer.acknowledge(msg)
59
+
60
+ __class__.processing_metric.labels(status="success").inc()
61
+
62
+ except Exception as e:
63
+
64
+ print("Exception:", e, flush=True)
65
+
66
+ # Message failed to be processed
67
+ self.consumer.negative_acknowledge(msg)
68
+
69
+ __class__.processing_metric.labels(status="error").inc()
70
+
71
+ @staticmethod
72
+ def add_args(parser, default_input_queue, default_subscriber):
73
+
74
+ BaseProcessor.add_args(parser)
75
+
76
+ parser.add_argument(
77
+ '-i', '--input-queue',
78
+ default=default_input_queue,
79
+ help=f'Input queue (default: {default_input_queue})'
80
+ )
81
+
82
+ parser.add_argument(
83
+ '-s', '--subscriber',
84
+ default=default_subscriber,
85
+ help=f'Queue subscriber name (default: {default_subscriber})'
86
+ )
87
+
@@ -0,0 +1,168 @@
1
+
2
+ from pulsar.schema import JsonSchema
3
+ from prometheus_client import Histogram, Info, Counter
4
+
5
+ from . base_processor import BaseProcessor
6
+
7
+ # FIXME: Derive from consumer? And producer?
8
+
9
+ class ConsumerProducer(BaseProcessor):
10
+
11
+ def __init__(self, **params):
12
+
13
+ input_queue = params.get("input_queue")
14
+ output_queue = params.get("output_queue")
15
+ subscriber = params.get("subscriber")
16
+ input_schema = params.get("input_schema")
17
+ output_schema = params.get("output_schema")
18
+
19
+ if not hasattr(__class__, "request_metric"):
20
+ __class__.request_metric = Histogram(
21
+ 'request_latency', 'Request latency (seconds)'
22
+ )
23
+
24
+ if not hasattr(__class__, "output_metric"):
25
+ __class__.output_metric = Counter(
26
+ 'output_count', 'Output items created'
27
+ )
28
+
29
+ if not hasattr(__class__, "pubsub_metric"):
30
+ __class__.pubsub_metric = Info(
31
+ 'pubsub', 'Pub/sub configuration'
32
+ )
33
+
34
+ if not hasattr(__class__, "processing_metric"):
35
+ __class__.processing_metric = Counter(
36
+ 'processing_count', 'Processing count', ["status"]
37
+ )
38
+
39
+ __class__.pubsub_metric.info({
40
+ "input_queue": input_queue,
41
+ "output_queue": output_queue,
42
+ "subscriber": subscriber,
43
+ "input_schema": input_schema.__name__,
44
+ "output_schema": output_schema.__name__,
45
+ })
46
+
47
+ super(ConsumerProducer, self).__init__(**params)
48
+
49
+ if input_schema == None:
50
+ raise RuntimeError("input_schema must be specified")
51
+
52
+ if output_schema == None:
53
+ raise RuntimeError("output_schema must be specified")
54
+
55
+ self.consumer = self.client.subscribe(
56
+ input_queue, subscriber,
57
+ schema=JsonSchema(input_schema),
58
+ )
59
+
60
+ self.producer = self.client.create_producer(
61
+ topic=output_queue,
62
+ schema=JsonSchema(output_schema),
63
+ )
64
+
65
+ def run(self):
66
+
67
+ while True:
68
+
69
+ msg = self.consumer.receive()
70
+
71
+ try:
72
+
73
+ with __class__.request_metric.time():
74
+ resp = self.handle(msg)
75
+
76
+ # Acknowledge successful processing of the message
77
+ self.consumer.acknowledge(msg)
78
+
79
+ __class__.processing_metric.labels(status="success").inc()
80
+
81
+ except Exception as e:
82
+
83
+ print("Exception:", e, flush=True)
84
+
85
+ # Message failed to be processed
86
+ self.consumer.negative_acknowledge(msg)
87
+
88
+ __class__.processing_metric.labels(status="error").inc()
89
+
90
+ def send(self, msg, properties={}):
91
+ self.producer.send(msg, properties)
92
+ __class__.output_metric.inc()
93
+
94
+ @staticmethod
95
+ def add_args(
96
+ parser, default_input_queue, default_subscriber,
97
+ default_output_queue,
98
+ ):
99
+
100
+ BaseProcessor.add_args(parser)
101
+
102
+ parser.add_argument(
103
+ '-i', '--input-queue',
104
+ default=default_input_queue,
105
+ help=f'Input queue (default: {default_input_queue})'
106
+ )
107
+
108
+ parser.add_argument(
109
+ '-s', '--subscriber',
110
+ default=default_subscriber,
111
+ help=f'Queue subscriber name (default: {default_subscriber})'
112
+ )
113
+
114
+ parser.add_argument(
115
+ '-o', '--output-queue',
116
+ default=default_output_queue,
117
+ help=f'Output queue (default: {default_output_queue})'
118
+ )
119
+
120
+ class Producer(BaseProcessor):
121
+
122
+ def __init__(self, **params):
123
+
124
+ output_queue = params.get("output_queue")
125
+ output_schema = params.get("output_schema")
126
+
127
+ if not hasattr(__class__, "output_metric"):
128
+ __class__.output_metric = Counter(
129
+ 'output_count', 'Output items created'
130
+ )
131
+
132
+ if not hasattr(__class__, "pubsub_metric"):
133
+ __class__.pubsub_metric = Info(
134
+ 'pubsub', 'Pub/sub configuration'
135
+ )
136
+
137
+ __class__.pubsub_metric.info({
138
+ "output_queue": output_queue,
139
+ "output_schema": output_schema.__name__,
140
+ })
141
+
142
+ super(Producer, self).__init__(**params)
143
+
144
+ if output_schema == None:
145
+ raise RuntimeError("output_schema must be specified")
146
+
147
+ self.producer = self.client.create_producer(
148
+ topic=output_queue,
149
+ schema=JsonSchema(output_schema),
150
+ )
151
+
152
+ def send(self, msg, properties={}):
153
+ self.producer.send(msg, properties)
154
+ __class__.output_metric.inc()
155
+
156
+ @staticmethod
157
+ def add_args(
158
+ parser, default_input_queue, default_subscriber,
159
+ default_output_queue,
160
+ ):
161
+
162
+ BaseProcessor.add_args(parser)
163
+
164
+ parser.add_argument(
165
+ '-o', '--output-queue',
166
+ default=default_output_queue,
167
+ help=f'Output queue (default: {default_output_queue})'
168
+ )
@@ -0,0 +1,55 @@
1
+
2
+ from pulsar.schema import JsonSchema
3
+ from prometheus_client import Info, Counter
4
+
5
+ from . base_processor import BaseProcessor
6
+
7
+ class Producer(BaseProcessor):
8
+
9
+ def __init__(self, **params):
10
+
11
+ output_queue = params.get("output_queue")
12
+ output_schema = params.get("output_schema")
13
+
14
+ if not hasattr(__class__, "output_metric"):
15
+ __class__.output_metric = Counter(
16
+ 'output_count', 'Output items created'
17
+ )
18
+
19
+ if not hasattr(__class__, "pubsub_metric"):
20
+ __class__.pubsub_metric = Info(
21
+ 'pubsub', 'Pub/sub configuration'
22
+ )
23
+
24
+ __class__.pubsub_metric.info({
25
+ "output_queue": output_queue,
26
+ "output_schema": output_schema.__name__,
27
+ })
28
+
29
+ super(Producer, self).__init__(**params)
30
+
31
+ if output_schema == None:
32
+ raise RuntimeError("output_schema must be specified")
33
+
34
+ self.producer = self.client.create_producer(
35
+ topic=output_queue,
36
+ schema=JsonSchema(output_schema),
37
+ )
38
+
39
+ def send(self, msg, properties={}):
40
+ self.producer.send(msg, properties)
41
+ __class__.output_metric.inc()
42
+
43
+ @staticmethod
44
+ def add_args(
45
+ parser, default_input_queue, default_subscriber,
46
+ default_output_queue,
47
+ ):
48
+
49
+ BaseProcessor.add_args(parser)
50
+
51
+ parser.add_argument(
52
+ '-o', '--output-queue',
53
+ default=default_output_queue,
54
+ help=f'Output queue (default: {default_output_queue})'
55
+ )
@@ -8,12 +8,15 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter
8
8
 
9
9
 
10
10
  from ... schema import TextDocument, Chunk, Source
11
+ from ... schema import text_ingest_queue, chunk_ingest_queue
11
12
  from ... log_level import LogLevel
12
13
  from ... base import ConsumerProducer
13
14
 
14
- default_input_queue = 'text-doc-load'
15
- default_output_queue = 'chunk-load'
16
- default_subscriber = 'chunker-recursive'
15
+ module = ".".join(__name__.split(".")[1:-1])
16
+
17
+ default_input_queue = text_ingest_queue
18
+ default_output_queue = chunk_ingest_queue
19
+ default_subscriber = module
17
20
 
18
21
  class Processor(ConsumerProducer):
19
22
 
@@ -92,5 +95,5 @@ class Processor(ConsumerProducer):
92
95
 
93
96
  def run():
94
97
 
95
- Processor.start('chunker', __doc__)
98
+ Processor.start(module, __doc__)
96
99
 
@@ -9,12 +9,15 @@ import base64
9
9
  from langchain_community.document_loaders import PyPDFLoader
10
10
 
11
11
  from ... schema import Document, TextDocument, Source
12
+ from ... schema import document_ingest_queue, text_ingest_queue
12
13
  from ... log_level import LogLevel
13
14
  from ... base import ConsumerProducer
14
15
 
15
- default_input_queue = 'document-load'
16
- default_output_queue = 'text-doc-load'
17
- default_subscriber = 'pdf-decoder'
16
+ module = ".".join(__name__.split(".")[1:-1])
17
+
18
+ default_input_queue = document_ingest_queue
19
+ default_output_queue = text_ingest_queue
20
+ default_subscriber = module
18
21
 
19
22
  class Processor(ConsumerProducer):
20
23
 
@@ -80,5 +83,5 @@ class Processor(ConsumerProducer):
80
83
 
81
84
  def run():
82
85
 
83
- Processor.start("pdf-decoder", __doc__)
86
+ Processor.start(module, __doc__)
84
87
 
@@ -7,12 +7,15 @@ Input is text, output is embeddings vector.
7
7
  from langchain_huggingface import HuggingFaceEmbeddings
8
8
 
9
9
  from ... schema import EmbeddingsRequest, EmbeddingsResponse
10
+ from ... schema import embeddings_request_queue, embeddings_response_queue
10
11
  from ... log_level import LogLevel
11
12
  from ... base import ConsumerProducer
12
13
 
13
- default_input_queue = 'embeddings'
14
- default_output_queue = 'embeddings-response'
15
- default_subscriber = 'embeddings-hf'
14
+ module = ".".join(__name__.split(".")[1:-1])
15
+
16
+ default_input_queue = embeddings_request_queue
17
+ default_output_queue = embeddings_response_queue
18
+ default_subscriber = module
16
19
  default_model="all-MiniLM-L6-v2"
17
20
 
18
21
  class Processor(ConsumerProducer):
@@ -70,5 +73,5 @@ class Processor(ConsumerProducer):
70
73
 
71
74
  def run():
72
75
 
73
- Processor.start("embeddings-hf", __doc__)
76
+ Processor.start(module, __doc__)
74
77
 
@@ -6,12 +6,15 @@ Input is text, output is embeddings vector.
6
6
  from langchain_community.embeddings import OllamaEmbeddings
7
7
 
8
8
  from ... schema import EmbeddingsRequest, EmbeddingsResponse
9
+ from ... schema import embeddings_request_queue, embeddings_response_queue
9
10
  from ... log_level import LogLevel
10
11
  from ... base import ConsumerProducer
11
12
 
12
- default_input_queue = 'embeddings'
13
- default_output_queue = 'embeddings-response'
14
- default_subscriber = 'embeddings-ollama'
13
+ module = ".".join(__name__.split(".")[1:-1])
14
+
15
+ default_input_queue = embeddings_request_queue
16
+ default_output_queue = embeddings_response_queue
17
+ default_subscriber = module
15
18
  default_model="mxbai-embed-large"
16
19
  default_ollama = 'http://localhost:11434'
17
20
 
@@ -77,5 +80,5 @@ class Processor(ConsumerProducer):
77
80
 
78
81
  def run():
79
82
 
80
- Processor.start('embeddings-ollama', __doc__)
83
+ Processor.start(module, __doc__)
81
84