trustgraph 0.5.1__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of trustgraph might be problematic. Click here for more details.
- {trustgraph-0.5.1 → trustgraph-0.5.2}/PKG-INFO +11 -8
- {trustgraph-0.5.1 → trustgraph-0.5.2}/README.md +8 -6
- trustgraph-0.5.2/scripts/dump-parquet +12 -0
- trustgraph-0.5.2/scripts/triples-dump-parquet +6 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/setup.py +4 -1
- trustgraph-0.5.2/trustgraph/dump/triples/parquet/processor.py +87 -0
- trustgraph-0.5.2/trustgraph/dump/triples/parquet/writer.py +96 -0
- trustgraph-0.5.2/trustgraph/embeddings/ollama/__init__.py +3 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/vectorize/vectorize.py +4 -1
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings_client.py +13 -6
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/graph_rag.py +9 -2
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/graph_rag_client.py +12 -4
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/kg/extract_definitions/extract.py +4 -1
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/kg/extract_relationships/extract.py +4 -1
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/llm_client.py +12 -6
- trustgraph-0.5.2/trustgraph/storage/graph_embeddings/__init__.py +0 -0
- trustgraph-0.5.2/trustgraph/storage/triples/__init__.py +0 -0
- trustgraph-0.5.2/trustgraph/storage/triples/cassandra/__main__.py +7 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph.egg-info/PKG-INFO +11 -8
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph.egg-info/SOURCES.txt +8 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph.egg-info/requires.txt +1 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/LICENSE +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/chunker-recursive +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/embeddings-hf +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/embeddings-ollama +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/embeddings-vectorize +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/ge-write-milvus +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/graph-rag +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/graph-show +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/graph-to-turtle +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/init-pulsar-manager +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/kg-extract-definitions +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/kg-extract-relationships +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/loader +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/pdf-decoder +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/query +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/run-processing +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/text-completion-azure +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/text-completion-claude +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/text-completion-ollama +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/text-completion-vertexai +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/triples-write-cassandra +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/setup.cfg +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/base/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/base/base_processor.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/base/consumer.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/base/consumer_producer.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/base/producer.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/chunking/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/chunking/recursive/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/chunking/recursive/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/chunking/recursive/chunker.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/decoding/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/decoding/pdf/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/decoding/pdf/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/decoding/pdf/pdf_decoder.py +0 -0
- {trustgraph-0.5.1/trustgraph/embeddings → trustgraph-0.5.2/trustgraph/dump}/__init__.py +0 -0
- {trustgraph-0.5.1/trustgraph/kg → trustgraph-0.5.2/trustgraph/dump/triples}/__init__.py +0 -0
- {trustgraph-0.5.1/trustgraph/embeddings/ollama → trustgraph-0.5.2/trustgraph/dump/triples/parquet}/__init__.py +0 -0
- {trustgraph-0.5.1/trustgraph/storage/graph_embeddings/milvus → trustgraph-0.5.2/trustgraph/dump/triples/parquet}/__main__.py +0 -0
- {trustgraph-0.5.1/trustgraph/model → trustgraph-0.5.2/trustgraph/embeddings}/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/hf/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/hf/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/hf/hf.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/ollama/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/ollama/processor.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/vectorize/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/vectorize/__main__.py +0 -0
- {trustgraph-0.5.1/trustgraph/model/text_completion → trustgraph-0.5.2/trustgraph/kg}/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/kg/extract_definitions/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/kg/extract_definitions/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/kg/extract_relationships/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/kg/extract_relationships/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/log_level.py +0 -0
- {trustgraph-0.5.1/trustgraph/retrieval → trustgraph-0.5.2/trustgraph/model}/__init__.py +0 -0
- {trustgraph-0.5.1/trustgraph/storage → trustgraph-0.5.2/trustgraph/model/text_completion}/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/azure/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/azure/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/azure/llm.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/claude/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/claude/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/claude/llm.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/ollama/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/ollama/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/ollama/llm.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/vertexai/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/vertexai/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/vertexai/llm.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/processing/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/processing/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/processing/processing.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/prompts.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/rdf.py +0 -0
- {trustgraph-0.5.1/trustgraph/storage/graph_embeddings → trustgraph-0.5.2/trustgraph/retrieval}/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/retrieval/graph_rag/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/retrieval/graph_rag/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/retrieval/graph_rag/rag.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/schema.py +0 -0
- {trustgraph-0.5.1/trustgraph/storage/triples → trustgraph-0.5.2/trustgraph/storage}/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/storage/graph_embeddings/milvus/__init__.py +0 -0
- {trustgraph-0.5.1/trustgraph/storage/triples/cassandra → trustgraph-0.5.2/trustgraph/storage/graph_embeddings/milvus}/__main__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/storage/graph_embeddings/milvus/write.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/storage/triples/cassandra/__init__.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/storage/triples/cassandra/write.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/triple_vectors.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/trustgraph.py +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph.egg-info/dependency_links.txt +0 -0
- {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph.egg-info/top_level.txt +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: trustgraph
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
|
|
5
5
|
Home-page: https://github.com/trustgraph-ai/trustgraph
|
|
6
|
-
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.5.
|
|
6
|
+
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.5.2.tar.gz
|
|
7
7
|
Author: trustgraph.ai
|
|
8
8
|
Author-email: security@trustgraph.ai
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -32,6 +32,7 @@ Requires-Dist: anthropic
|
|
|
32
32
|
Requires-Dist: google-cloud-aiplatform
|
|
33
33
|
Requires-Dist: pyyaml
|
|
34
34
|
Requires-Dist: prometheus-client
|
|
35
|
+
Requires-Dist: pyarrow
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
# TrustGraph
|
|
@@ -95,11 +96,13 @@ package installed can also run the entire architecture.
|
|
|
95
96
|
chunking algorithm to produce smaller text chunks.
|
|
96
97
|
- `embeddings-hf` - A service which analyses text and returns a vector
|
|
97
98
|
embedding using one of the HuggingFace embeddings models.
|
|
99
|
+
- `embeddings-ollama` - A service which analyses text and returns a vector
|
|
100
|
+
embedding using an Ollama embeddings model.
|
|
98
101
|
- `embeddings-vectorize` - Uses an embeddings service to get a vector
|
|
99
102
|
embedding which is added to the processor payload.
|
|
100
103
|
- `graph-rag` - A query service which applies a Graph RAG algorithm to
|
|
101
104
|
provide a response to a text prompt.
|
|
102
|
-
- `
|
|
105
|
+
- `triples-write-cassandra` - Takes knowledge graph edges and writes them to
|
|
103
106
|
a Cassandra store.
|
|
104
107
|
- `kg-extract-definitions` - knowledge extractor - examines text and
|
|
105
108
|
produces graph edges.
|
|
@@ -115,15 +118,15 @@ package installed can also run the entire architecture.
|
|
|
115
118
|
format. For instance, the wrapping of text between lines in a PDF document
|
|
116
119
|
is not semantically encoded, so the decoder will see wrapped lines as
|
|
117
120
|
space-separated.
|
|
118
|
-
- `
|
|
121
|
+
- `ge-write-milvus` - Takes graph embeddings mappings and records them
|
|
119
122
|
in the vector embeddings store.
|
|
120
123
|
|
|
121
124
|
## LM Specific Modules
|
|
122
125
|
|
|
123
|
-
- `
|
|
124
|
-
- `
|
|
125
|
-
- `
|
|
126
|
-
- `
|
|
126
|
+
- `text-completion-azure` - Sends request to AzureAI serverless endpoint
|
|
127
|
+
- `text-completion-claude` - Sends request to Anthropic's API
|
|
128
|
+
- `text-completion-ollama` - Sends request to LM running using Ollama
|
|
129
|
+
- `text-completion-vertexai` - Sends request to model available through VertexAI API
|
|
127
130
|
|
|
128
131
|
## Quickstart Guide
|
|
129
132
|
|
|
@@ -60,11 +60,13 @@ package installed can also run the entire architecture.
|
|
|
60
60
|
chunking algorithm to produce smaller text chunks.
|
|
61
61
|
- `embeddings-hf` - A service which analyses text and returns a vector
|
|
62
62
|
embedding using one of the HuggingFace embeddings models.
|
|
63
|
+
- `embeddings-ollama` - A service which analyses text and returns a vector
|
|
64
|
+
embedding using an Ollama embeddings model.
|
|
63
65
|
- `embeddings-vectorize` - Uses an embeddings service to get a vector
|
|
64
66
|
embedding which is added to the processor payload.
|
|
65
67
|
- `graph-rag` - A query service which applies a Graph RAG algorithm to
|
|
66
68
|
provide a response to a text prompt.
|
|
67
|
-
- `
|
|
69
|
+
- `triples-write-cassandra` - Takes knowledge graph edges and writes them to
|
|
68
70
|
a Cassandra store.
|
|
69
71
|
- `kg-extract-definitions` - knowledge extractor - examines text and
|
|
70
72
|
produces graph edges.
|
|
@@ -80,15 +82,15 @@ package installed can also run the entire architecture.
|
|
|
80
82
|
format. For instance, the wrapping of text between lines in a PDF document
|
|
81
83
|
is not semantically encoded, so the decoder will see wrapped lines as
|
|
82
84
|
space-separated.
|
|
83
|
-
- `
|
|
85
|
+
- `ge-write-milvus` - Takes graph embeddings mappings and records them
|
|
84
86
|
in the vector embeddings store.
|
|
85
87
|
|
|
86
88
|
## LM Specific Modules
|
|
87
89
|
|
|
88
|
-
- `
|
|
89
|
-
- `
|
|
90
|
-
- `
|
|
91
|
-
- `
|
|
90
|
+
- `text-completion-azure` - Sends request to AzureAI serverless endpoint
|
|
91
|
+
- `text-completion-claude` - Sends request to Anthropic's API
|
|
92
|
+
- `text-completion-ollama` - Sends request to LM running using Ollama
|
|
93
|
+
- `text-completion-vertexai` - Sends request to model available through VertexAI API
|
|
92
94
|
|
|
93
95
|
## Quickstart Guide
|
|
94
96
|
|
|
@@ -4,7 +4,7 @@ import os
|
|
|
4
4
|
with open("README.md", "r") as fh:
|
|
5
5
|
long_description = fh.read()
|
|
6
6
|
|
|
7
|
-
version = "0.5.
|
|
7
|
+
version = "0.5.2"
|
|
8
8
|
|
|
9
9
|
setuptools.setup(
|
|
10
10
|
name="trustgraph",
|
|
@@ -44,6 +44,7 @@ setuptools.setup(
|
|
|
44
44
|
"google-cloud-aiplatform",
|
|
45
45
|
"pyyaml",
|
|
46
46
|
"prometheus-client",
|
|
47
|
+
"pyarrow",
|
|
47
48
|
],
|
|
48
49
|
scripts=[
|
|
49
50
|
"scripts/chunker-recursive",
|
|
@@ -66,5 +67,7 @@ setuptools.setup(
|
|
|
66
67
|
"scripts/text-completion-ollama",
|
|
67
68
|
"scripts/text-completion-vertexai",
|
|
68
69
|
"scripts/triples-write-cassandra",
|
|
70
|
+
"scripts/dump-parquet",
|
|
71
|
+
"scripts/triples-dump-parquet",
|
|
69
72
|
]
|
|
70
73
|
)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
|
|
2
|
+
"""
|
|
3
|
+
Write graphs triples to parquet files in a directory.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import pulsar
|
|
7
|
+
import base64
|
|
8
|
+
import os
|
|
9
|
+
import argparse
|
|
10
|
+
import time
|
|
11
|
+
|
|
12
|
+
from .... trustgraph import TrustGraph
|
|
13
|
+
from .... schema import Triple
|
|
14
|
+
from .... schema import triples_store_queue
|
|
15
|
+
from .... log_level import LogLevel
|
|
16
|
+
from .... base import Consumer
|
|
17
|
+
|
|
18
|
+
from . writer import ParquetWriter
|
|
19
|
+
|
|
20
|
+
module = ".".join(__name__.split(".")[1:-1])
|
|
21
|
+
|
|
22
|
+
default_input_queue = triples_store_queue
|
|
23
|
+
default_subscriber = module
|
|
24
|
+
default_graph_host='localhost'
|
|
25
|
+
default_directory = "."
|
|
26
|
+
default_file_template = "triples-{id}.parquet"
|
|
27
|
+
default_rotation_time = 60
|
|
28
|
+
|
|
29
|
+
class Processor(Consumer):
|
|
30
|
+
|
|
31
|
+
def __init__(self, **params):
|
|
32
|
+
|
|
33
|
+
input_queue = params.get("input_queue", default_input_queue)
|
|
34
|
+
subscriber = params.get("subscriber", default_subscriber)
|
|
35
|
+
directory = params.get("directory", default_directory)
|
|
36
|
+
file_template = params.get("file_template", default_file_template)
|
|
37
|
+
rotation_time = params.get("rotation_time", default_rotation_time)
|
|
38
|
+
|
|
39
|
+
super(Processor, self).__init__(
|
|
40
|
+
**params | {
|
|
41
|
+
"input_queue": input_queue,
|
|
42
|
+
"subscriber": subscriber,
|
|
43
|
+
"input_schema": Triple,
|
|
44
|
+
}
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
self.writer = ParquetWriter(directory, file_template, rotation_time)
|
|
48
|
+
|
|
49
|
+
def __del__(self):
|
|
50
|
+
if hasattr(self, "writer"):
|
|
51
|
+
del self.writer
|
|
52
|
+
|
|
53
|
+
def handle(self, msg):
|
|
54
|
+
|
|
55
|
+
v = msg.value()
|
|
56
|
+
self.writer.write(v.s.value, v.p.value, v.o.value)
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def add_args(parser):
|
|
60
|
+
|
|
61
|
+
Consumer.add_args(
|
|
62
|
+
parser, default_input_queue, default_subscriber,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
'-d', '--directory',
|
|
67
|
+
default=default_directory,
|
|
68
|
+
help=f'Directory to write to (default: {default_directory})'
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
parser.add_argument(
|
|
72
|
+
'-f', '--file-template',
|
|
73
|
+
default=default_file_template,
|
|
74
|
+
help=f'Directory to write to (default: {default_file_template})'
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
'-t', '--rotation-time',
|
|
79
|
+
type=int,
|
|
80
|
+
default=default_rotation_time,
|
|
81
|
+
help=f'Rotation time / seconds (default: {default_rotation_time})'
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def run():
|
|
85
|
+
|
|
86
|
+
Processor.start(module, __doc__)
|
|
87
|
+
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
|
|
2
|
+
import threading
|
|
3
|
+
import queue
|
|
4
|
+
import time
|
|
5
|
+
import uuid
|
|
6
|
+
import pyarrow as pa
|
|
7
|
+
import pyarrow.parquet as pq
|
|
8
|
+
|
|
9
|
+
class ParquetWriter:
|
|
10
|
+
|
|
11
|
+
def __init__(self, directory, file_template, rotation_time):
|
|
12
|
+
self.directory = directory
|
|
13
|
+
self.file_template = file_template
|
|
14
|
+
self.rotation_time = rotation_time
|
|
15
|
+
|
|
16
|
+
self.q = queue.Queue()
|
|
17
|
+
|
|
18
|
+
self.running = True
|
|
19
|
+
|
|
20
|
+
self.thread = threading.Thread(target=(self.writer_thread))
|
|
21
|
+
self.thread.start()
|
|
22
|
+
|
|
23
|
+
def writer_thread(self):
|
|
24
|
+
|
|
25
|
+
triples = []
|
|
26
|
+
|
|
27
|
+
timeout = None
|
|
28
|
+
|
|
29
|
+
while self.running:
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
|
|
33
|
+
item = self.q.get(timeout=1)
|
|
34
|
+
|
|
35
|
+
if timeout == None:
|
|
36
|
+
timeout = time.time() + self.rotation_time
|
|
37
|
+
|
|
38
|
+
triples.append(item)
|
|
39
|
+
|
|
40
|
+
except queue.Empty:
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
if timeout:
|
|
44
|
+
if time.time() > timeout:
|
|
45
|
+
|
|
46
|
+
self.write_file(triples)
|
|
47
|
+
timeout = None
|
|
48
|
+
triples = []
|
|
49
|
+
|
|
50
|
+
def write_file(self, triples):
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
|
|
54
|
+
schema = pa.schema([
|
|
55
|
+
pa.field('s', pa.string()),
|
|
56
|
+
pa.field('p', pa.string()),
|
|
57
|
+
pa.field('o', pa.string()),
|
|
58
|
+
])
|
|
59
|
+
|
|
60
|
+
fname = self.file_template.format(id=str(uuid.uuid4()))
|
|
61
|
+
path = f"{self.directory}/{fname}"
|
|
62
|
+
|
|
63
|
+
writer = pq.ParquetWriter(path, schema)
|
|
64
|
+
|
|
65
|
+
batch = pa.record_batch(
|
|
66
|
+
[
|
|
67
|
+
[tpl[0] for tpl in triples],
|
|
68
|
+
[tpl[1] for tpl in triples],
|
|
69
|
+
[tpl[2] for tpl in triples],
|
|
70
|
+
],
|
|
71
|
+
names=['s', 'p', 'o']
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
writer.write_batch(batch)
|
|
75
|
+
|
|
76
|
+
writer.close()
|
|
77
|
+
|
|
78
|
+
print(f"Wrote {path}.")
|
|
79
|
+
|
|
80
|
+
except Exception as e:
|
|
81
|
+
|
|
82
|
+
print("Parquet write:", e)
|
|
83
|
+
|
|
84
|
+
def write(self, s, p, o):
|
|
85
|
+
self.q.put((s, p, o))
|
|
86
|
+
|
|
87
|
+
def __del__(self):
|
|
88
|
+
|
|
89
|
+
self.running = False
|
|
90
|
+
|
|
91
|
+
if hasattr(self, "q"):
|
|
92
|
+
self.thread.join()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
|
|
@@ -34,7 +34,10 @@ class Processor(ConsumerProducer):
|
|
|
34
34
|
}
|
|
35
35
|
)
|
|
36
36
|
|
|
37
|
-
self.embeddings = EmbeddingsClient(
|
|
37
|
+
self.embeddings = EmbeddingsClient(
|
|
38
|
+
pulsar_host=self.pulsar_host,
|
|
39
|
+
subscriber=module + "emb",
|
|
40
|
+
)
|
|
38
41
|
|
|
39
42
|
def emit(self, source, chunk, vectors):
|
|
40
43
|
|
|
@@ -17,14 +17,14 @@ DEBUG=_pulsar.LoggerLevel.Debug
|
|
|
17
17
|
class EmbeddingsClient:
|
|
18
18
|
|
|
19
19
|
def __init__(
|
|
20
|
-
self, log_level=ERROR,
|
|
20
|
+
self, log_level=ERROR, subscriber=None,
|
|
21
21
|
pulsar_host="pulsar://pulsar:6650",
|
|
22
22
|
):
|
|
23
23
|
|
|
24
24
|
self.client = None
|
|
25
25
|
|
|
26
|
-
if
|
|
27
|
-
|
|
26
|
+
if subscriber == None:
|
|
27
|
+
subscriber = str(uuid.uuid4())
|
|
28
28
|
|
|
29
29
|
self.client = pulsar.Client(
|
|
30
30
|
pulsar_host,
|
|
@@ -38,7 +38,7 @@ class EmbeddingsClient:
|
|
|
38
38
|
)
|
|
39
39
|
|
|
40
40
|
self.consumer = self.client.subscribe(
|
|
41
|
-
embeddings_response_queue,
|
|
41
|
+
embeddings_response_queue, subscriber,
|
|
42
42
|
schema=JsonSchema(EmbeddingsResponse),
|
|
43
43
|
)
|
|
44
44
|
|
|
@@ -67,6 +67,13 @@ class EmbeddingsClient:
|
|
|
67
67
|
|
|
68
68
|
def __del__(self):
|
|
69
69
|
|
|
70
|
-
if self
|
|
71
|
-
|
|
70
|
+
if hasattr(self, "consumer"):
|
|
71
|
+
# self.consumer.unsubscribe()
|
|
72
|
+
self.consumer.close()
|
|
73
|
+
|
|
74
|
+
if hasattr(self, "producer"):
|
|
75
|
+
self.producer.flush()
|
|
76
|
+
self.producer.close()
|
|
77
|
+
|
|
78
|
+
self.client.close()
|
|
72
79
|
|
|
@@ -19,6 +19,7 @@ class GraphRag:
|
|
|
19
19
|
entity_limit=50,
|
|
20
20
|
triple_limit=30,
|
|
21
21
|
max_subgraph_size=3000,
|
|
22
|
+
module="test",
|
|
22
23
|
):
|
|
23
24
|
|
|
24
25
|
self.verbose=verbose
|
|
@@ -31,7 +32,10 @@ class GraphRag:
|
|
|
31
32
|
|
|
32
33
|
self.graph = TrustGraph(graph_hosts)
|
|
33
34
|
|
|
34
|
-
self.embeddings = EmbeddingsClient(
|
|
35
|
+
self.embeddings = EmbeddingsClient(
|
|
36
|
+
pulsar_host=pulsar_host,
|
|
37
|
+
subscriber=module + "-emb",
|
|
38
|
+
)
|
|
35
39
|
|
|
36
40
|
self.vecstore = TripleVectors(vector_store)
|
|
37
41
|
|
|
@@ -41,7 +45,10 @@ class GraphRag:
|
|
|
41
45
|
|
|
42
46
|
self.label_cache = {}
|
|
43
47
|
|
|
44
|
-
self.llm = LlmClient(
|
|
48
|
+
self.llm = LlmClient(
|
|
49
|
+
pulsar_host=pulsar_host,
|
|
50
|
+
subscriber=module + "-llm",
|
|
51
|
+
)
|
|
45
52
|
|
|
46
53
|
if self.verbose:
|
|
47
54
|
print("Initialised", flush=True)
|
|
@@ -18,12 +18,12 @@ DEBUG=_pulsar.LoggerLevel.Debug
|
|
|
18
18
|
class GraphRagClient:
|
|
19
19
|
|
|
20
20
|
def __init__(
|
|
21
|
-
self, log_level=ERROR,
|
|
21
|
+
self, log_level=ERROR, subscriber=None,
|
|
22
22
|
pulsar_host="pulsar://pulsar:6650",
|
|
23
23
|
):
|
|
24
24
|
|
|
25
|
-
if
|
|
26
|
-
|
|
25
|
+
if subscriber == None:
|
|
26
|
+
subscriber = str(uuid.uuid4())
|
|
27
27
|
|
|
28
28
|
self.client = pulsar.Client(
|
|
29
29
|
pulsar_host,
|
|
@@ -37,7 +37,7 @@ class GraphRagClient:
|
|
|
37
37
|
)
|
|
38
38
|
|
|
39
39
|
self.consumer = self.client.subscribe(
|
|
40
|
-
graph_rag_response_queue,
|
|
40
|
+
graph_rag_response_queue, subscriber,
|
|
41
41
|
schema=JsonSchema(GraphRagResponse),
|
|
42
42
|
)
|
|
43
43
|
|
|
@@ -66,5 +66,13 @@ class GraphRagClient:
|
|
|
66
66
|
|
|
67
67
|
def __del__(self):
|
|
68
68
|
|
|
69
|
+
if hasattr(self, "consumer"):
|
|
70
|
+
# self.consumer.unsubscribe()
|
|
71
|
+
self.consumer.close()
|
|
72
|
+
|
|
73
|
+
if hasattr(self, "producer"):
|
|
74
|
+
self.producer.flush()
|
|
75
|
+
self.producer.close()
|
|
76
|
+
|
|
69
77
|
self.client.close()
|
|
70
78
|
|
|
@@ -61,7 +61,10 @@ class Processor(ConsumerProducer):
|
|
|
61
61
|
"vector_schema": GraphEmbeddings.__name__,
|
|
62
62
|
})
|
|
63
63
|
|
|
64
|
-
self.llm = LlmClient(
|
|
64
|
+
self.llm = LlmClient(
|
|
65
|
+
pulsar_host = self.pulsar_host,
|
|
66
|
+
subscriber = module + "-llm",
|
|
67
|
+
)
|
|
65
68
|
|
|
66
69
|
def to_uri(self, text):
|
|
67
70
|
|
|
@@ -19,12 +19,12 @@ DEBUG=_pulsar.LoggerLevel.Debug
|
|
|
19
19
|
class LlmClient:
|
|
20
20
|
|
|
21
21
|
def __init__(
|
|
22
|
-
self, log_level=ERROR,
|
|
22
|
+
self, log_level=ERROR, subscriber=None,
|
|
23
23
|
pulsar_host="pulsar://pulsar:6650",
|
|
24
24
|
):
|
|
25
25
|
|
|
26
|
-
if
|
|
27
|
-
|
|
26
|
+
if subscriber == None:
|
|
27
|
+
subscriber = str(uuid.uuid4())
|
|
28
28
|
|
|
29
29
|
self.client = pulsar.Client(
|
|
30
30
|
pulsar_host,
|
|
@@ -38,7 +38,7 @@ class LlmClient:
|
|
|
38
38
|
)
|
|
39
39
|
|
|
40
40
|
self.consumer = self.client.subscribe(
|
|
41
|
-
text_completion_response_queue,
|
|
41
|
+
text_completion_response_queue, subscriber,
|
|
42
42
|
schema=JsonSchema(TextCompletionResponse),
|
|
43
43
|
)
|
|
44
44
|
|
|
@@ -68,7 +68,13 @@ class LlmClient:
|
|
|
68
68
|
|
|
69
69
|
def __del__(self):
|
|
70
70
|
|
|
71
|
-
self
|
|
72
|
-
|
|
71
|
+
if hasattr(self, "consumer"):
|
|
72
|
+
# self.consumer.unsubscribe()
|
|
73
|
+
self.consumer.close()
|
|
74
|
+
|
|
75
|
+
if hasattr(self, "producer"):
|
|
76
|
+
self.producer.flush()
|
|
77
|
+
self.producer.close()
|
|
78
|
+
|
|
73
79
|
self.client.close()
|
|
74
80
|
|
|
File without changes
|
|
File without changes
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: trustgraph
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
|
|
5
5
|
Home-page: https://github.com/trustgraph-ai/trustgraph
|
|
6
|
-
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.5.
|
|
6
|
+
Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.5.2.tar.gz
|
|
7
7
|
Author: trustgraph.ai
|
|
8
8
|
Author-email: security@trustgraph.ai
|
|
9
9
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -32,6 +32,7 @@ Requires-Dist: anthropic
|
|
|
32
32
|
Requires-Dist: google-cloud-aiplatform
|
|
33
33
|
Requires-Dist: pyyaml
|
|
34
34
|
Requires-Dist: prometheus-client
|
|
35
|
+
Requires-Dist: pyarrow
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
# TrustGraph
|
|
@@ -95,11 +96,13 @@ package installed can also run the entire architecture.
|
|
|
95
96
|
chunking algorithm to produce smaller text chunks.
|
|
96
97
|
- `embeddings-hf` - A service which analyses text and returns a vector
|
|
97
98
|
embedding using one of the HuggingFace embeddings models.
|
|
99
|
+
- `embeddings-ollama` - A service which analyses text and returns a vector
|
|
100
|
+
embedding using an Ollama embeddings model.
|
|
98
101
|
- `embeddings-vectorize` - Uses an embeddings service to get a vector
|
|
99
102
|
embedding which is added to the processor payload.
|
|
100
103
|
- `graph-rag` - A query service which applies a Graph RAG algorithm to
|
|
101
104
|
provide a response to a text prompt.
|
|
102
|
-
- `
|
|
105
|
+
- `triples-write-cassandra` - Takes knowledge graph edges and writes them to
|
|
103
106
|
a Cassandra store.
|
|
104
107
|
- `kg-extract-definitions` - knowledge extractor - examines text and
|
|
105
108
|
produces graph edges.
|
|
@@ -115,15 +118,15 @@ package installed can also run the entire architecture.
|
|
|
115
118
|
format. For instance, the wrapping of text between lines in a PDF document
|
|
116
119
|
is not semantically encoded, so the decoder will see wrapped lines as
|
|
117
120
|
space-separated.
|
|
118
|
-
- `
|
|
121
|
+
- `ge-write-milvus` - Takes graph embeddings mappings and records them
|
|
119
122
|
in the vector embeddings store.
|
|
120
123
|
|
|
121
124
|
## LM Specific Modules
|
|
122
125
|
|
|
123
|
-
- `
|
|
124
|
-
- `
|
|
125
|
-
- `
|
|
126
|
-
- `
|
|
126
|
+
- `text-completion-azure` - Sends request to AzureAI serverless endpoint
|
|
127
|
+
- `text-completion-claude` - Sends request to Anthropic's API
|
|
128
|
+
- `text-completion-ollama` - Sends request to LM running using Ollama
|
|
129
|
+
- `text-completion-vertexai` - Sends request to model available through VertexAI API
|
|
127
130
|
|
|
128
131
|
## Quickstart Guide
|
|
129
132
|
|
|
@@ -2,6 +2,7 @@ LICENSE
|
|
|
2
2
|
README.md
|
|
3
3
|
setup.py
|
|
4
4
|
scripts/chunker-recursive
|
|
5
|
+
scripts/dump-parquet
|
|
5
6
|
scripts/embeddings-hf
|
|
6
7
|
scripts/embeddings-ollama
|
|
7
8
|
scripts/embeddings-vectorize
|
|
@@ -20,6 +21,7 @@ scripts/text-completion-azure
|
|
|
20
21
|
scripts/text-completion-claude
|
|
21
22
|
scripts/text-completion-ollama
|
|
22
23
|
scripts/text-completion-vertexai
|
|
24
|
+
scripts/triples-dump-parquet
|
|
23
25
|
scripts/triples-write-cassandra
|
|
24
26
|
trustgraph/__init__.py
|
|
25
27
|
trustgraph/embeddings_client.py
|
|
@@ -50,6 +52,12 @@ trustgraph/decoding/__init__.py
|
|
|
50
52
|
trustgraph/decoding/pdf/__init__.py
|
|
51
53
|
trustgraph/decoding/pdf/__main__.py
|
|
52
54
|
trustgraph/decoding/pdf/pdf_decoder.py
|
|
55
|
+
trustgraph/dump/__init__.py
|
|
56
|
+
trustgraph/dump/triples/__init__.py
|
|
57
|
+
trustgraph/dump/triples/parquet/__init__.py
|
|
58
|
+
trustgraph/dump/triples/parquet/__main__.py
|
|
59
|
+
trustgraph/dump/triples/parquet/processor.py
|
|
60
|
+
trustgraph/dump/triples/parquet/writer.py
|
|
53
61
|
trustgraph/embeddings/__init__.py
|
|
54
62
|
trustgraph/embeddings/hf/__init__.py
|
|
55
63
|
trustgraph/embeddings/hf/__main__.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{trustgraph-0.5.1/trustgraph/model/text_completion → trustgraph-0.5.2/trustgraph/kg}/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{trustgraph-0.5.1/trustgraph/storage/triples → trustgraph-0.5.2/trustgraph/storage}/__init__.py
RENAMED
|
File without changes
|
{trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/storage/graph_embeddings/milvus/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|