trustgraph 0.5.1__tar.gz → 0.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of trustgraph might be problematic. Click here for more details.

Files changed (109) hide show
  1. {trustgraph-0.5.1 → trustgraph-0.5.2}/PKG-INFO +11 -8
  2. {trustgraph-0.5.1 → trustgraph-0.5.2}/README.md +8 -6
  3. trustgraph-0.5.2/scripts/dump-parquet +12 -0
  4. trustgraph-0.5.2/scripts/triples-dump-parquet +6 -0
  5. {trustgraph-0.5.1 → trustgraph-0.5.2}/setup.py +4 -1
  6. trustgraph-0.5.2/trustgraph/dump/triples/parquet/processor.py +87 -0
  7. trustgraph-0.5.2/trustgraph/dump/triples/parquet/writer.py +96 -0
  8. trustgraph-0.5.2/trustgraph/embeddings/ollama/__init__.py +3 -0
  9. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/vectorize/vectorize.py +4 -1
  10. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings_client.py +13 -6
  11. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/graph_rag.py +9 -2
  12. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/graph_rag_client.py +12 -4
  13. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/kg/extract_definitions/extract.py +4 -1
  14. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/kg/extract_relationships/extract.py +4 -1
  15. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/llm_client.py +12 -6
  16. trustgraph-0.5.2/trustgraph/storage/graph_embeddings/__init__.py +0 -0
  17. trustgraph-0.5.2/trustgraph/storage/triples/__init__.py +0 -0
  18. trustgraph-0.5.2/trustgraph/storage/triples/cassandra/__main__.py +7 -0
  19. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph.egg-info/PKG-INFO +11 -8
  20. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph.egg-info/SOURCES.txt +8 -0
  21. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph.egg-info/requires.txt +1 -0
  22. {trustgraph-0.5.1 → trustgraph-0.5.2}/LICENSE +0 -0
  23. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/chunker-recursive +0 -0
  24. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/embeddings-hf +0 -0
  25. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/embeddings-ollama +0 -0
  26. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/embeddings-vectorize +0 -0
  27. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/ge-write-milvus +0 -0
  28. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/graph-rag +0 -0
  29. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/graph-show +0 -0
  30. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/graph-to-turtle +0 -0
  31. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/init-pulsar-manager +0 -0
  32. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/kg-extract-definitions +0 -0
  33. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/kg-extract-relationships +0 -0
  34. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/loader +0 -0
  35. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/pdf-decoder +0 -0
  36. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/query +0 -0
  37. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/run-processing +0 -0
  38. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/text-completion-azure +0 -0
  39. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/text-completion-claude +0 -0
  40. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/text-completion-ollama +0 -0
  41. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/text-completion-vertexai +0 -0
  42. {trustgraph-0.5.1 → trustgraph-0.5.2}/scripts/triples-write-cassandra +0 -0
  43. {trustgraph-0.5.1 → trustgraph-0.5.2}/setup.cfg +0 -0
  44. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/__init__.py +0 -0
  45. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/base/__init__.py +0 -0
  46. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/base/base_processor.py +0 -0
  47. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/base/consumer.py +0 -0
  48. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/base/consumer_producer.py +0 -0
  49. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/base/producer.py +0 -0
  50. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/chunking/__init__.py +0 -0
  51. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/chunking/recursive/__init__.py +0 -0
  52. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/chunking/recursive/__main__.py +0 -0
  53. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/chunking/recursive/chunker.py +0 -0
  54. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/decoding/__init__.py +0 -0
  55. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/decoding/pdf/__init__.py +0 -0
  56. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/decoding/pdf/__main__.py +0 -0
  57. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/decoding/pdf/pdf_decoder.py +0 -0
  58. {trustgraph-0.5.1/trustgraph/embeddings → trustgraph-0.5.2/trustgraph/dump}/__init__.py +0 -0
  59. {trustgraph-0.5.1/trustgraph/kg → trustgraph-0.5.2/trustgraph/dump/triples}/__init__.py +0 -0
  60. {trustgraph-0.5.1/trustgraph/embeddings/ollama → trustgraph-0.5.2/trustgraph/dump/triples/parquet}/__init__.py +0 -0
  61. {trustgraph-0.5.1/trustgraph/storage/graph_embeddings/milvus → trustgraph-0.5.2/trustgraph/dump/triples/parquet}/__main__.py +0 -0
  62. {trustgraph-0.5.1/trustgraph/model → trustgraph-0.5.2/trustgraph/embeddings}/__init__.py +0 -0
  63. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/hf/__init__.py +0 -0
  64. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/hf/__main__.py +0 -0
  65. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/hf/hf.py +0 -0
  66. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/ollama/__main__.py +0 -0
  67. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/ollama/processor.py +0 -0
  68. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/vectorize/__init__.py +0 -0
  69. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/embeddings/vectorize/__main__.py +0 -0
  70. {trustgraph-0.5.1/trustgraph/model/text_completion → trustgraph-0.5.2/trustgraph/kg}/__init__.py +0 -0
  71. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/kg/extract_definitions/__init__.py +0 -0
  72. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/kg/extract_definitions/__main__.py +0 -0
  73. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/kg/extract_relationships/__init__.py +0 -0
  74. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/kg/extract_relationships/__main__.py +0 -0
  75. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/log_level.py +0 -0
  76. {trustgraph-0.5.1/trustgraph/retrieval → trustgraph-0.5.2/trustgraph/model}/__init__.py +0 -0
  77. {trustgraph-0.5.1/trustgraph/storage → trustgraph-0.5.2/trustgraph/model/text_completion}/__init__.py +0 -0
  78. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/azure/__init__.py +0 -0
  79. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/azure/__main__.py +0 -0
  80. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/azure/llm.py +0 -0
  81. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/claude/__init__.py +0 -0
  82. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/claude/__main__.py +0 -0
  83. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/claude/llm.py +0 -0
  84. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/ollama/__init__.py +0 -0
  85. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/ollama/__main__.py +0 -0
  86. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/ollama/llm.py +0 -0
  87. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/vertexai/__init__.py +0 -0
  88. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/vertexai/__main__.py +0 -0
  89. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/model/text_completion/vertexai/llm.py +0 -0
  90. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/processing/__init__.py +0 -0
  91. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/processing/__main__.py +0 -0
  92. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/processing/processing.py +0 -0
  93. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/prompts.py +0 -0
  94. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/rdf.py +0 -0
  95. {trustgraph-0.5.1/trustgraph/storage/graph_embeddings → trustgraph-0.5.2/trustgraph/retrieval}/__init__.py +0 -0
  96. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/retrieval/graph_rag/__init__.py +0 -0
  97. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/retrieval/graph_rag/__main__.py +0 -0
  98. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/retrieval/graph_rag/rag.py +0 -0
  99. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/schema.py +0 -0
  100. {trustgraph-0.5.1/trustgraph/storage/triples → trustgraph-0.5.2/trustgraph/storage}/__init__.py +0 -0
  101. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/storage/graph_embeddings/milvus/__init__.py +0 -0
  102. {trustgraph-0.5.1/trustgraph/storage/triples/cassandra → trustgraph-0.5.2/trustgraph/storage/graph_embeddings/milvus}/__main__.py +0 -0
  103. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/storage/graph_embeddings/milvus/write.py +0 -0
  104. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/storage/triples/cassandra/__init__.py +0 -0
  105. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/storage/triples/cassandra/write.py +0 -0
  106. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/triple_vectors.py +0 -0
  107. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph/trustgraph.py +0 -0
  108. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph.egg-info/dependency_links.txt +0 -0
  109. {trustgraph-0.5.1 → trustgraph-0.5.2}/trustgraph.egg-info/top_level.txt +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: trustgraph
3
- Version: 0.5.1
3
+ Version: 0.5.2
4
4
  Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
5
5
  Home-page: https://github.com/trustgraph-ai/trustgraph
6
- Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.5.1.tar.gz
6
+ Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.5.2.tar.gz
7
7
  Author: trustgraph.ai
8
8
  Author-email: security@trustgraph.ai
9
9
  Classifier: Programming Language :: Python :: 3
@@ -32,6 +32,7 @@ Requires-Dist: anthropic
32
32
  Requires-Dist: google-cloud-aiplatform
33
33
  Requires-Dist: pyyaml
34
34
  Requires-Dist: prometheus-client
35
+ Requires-Dist: pyarrow
35
36
 
36
37
 
37
38
  # TrustGraph
@@ -95,11 +96,13 @@ package installed can also run the entire architecture.
95
96
  chunking algorithm to produce smaller text chunks.
96
97
  - `embeddings-hf` - A service which analyses text and returns a vector
97
98
  embedding using one of the HuggingFace embeddings models.
99
+ - `embeddings-ollama` - A service which analyses text and returns a vector
100
+ embedding using an Ollama embeddings model.
98
101
  - `embeddings-vectorize` - Uses an embeddings service to get a vector
99
102
  embedding which is added to the processor payload.
100
103
  - `graph-rag` - A query service which applies a Graph RAG algorithm to
101
104
  provide a response to a text prompt.
102
- - `graph-write-cassandra` - Takes knowledge graph edges and writes them to
105
+ - `triples-write-cassandra` - Takes knowledge graph edges and writes them to
103
106
  a Cassandra store.
104
107
  - `kg-extract-definitions` - knowledge extractor - examines text and
105
108
  produces graph edges.
@@ -115,15 +118,15 @@ package installed can also run the entire architecture.
115
118
  format. For instance, the wrapping of text between lines in a PDF document
116
119
  is not semantically encoded, so the decoder will see wrapped lines as
117
120
  space-separated.
118
- - `vector-write-milvus` - Takes vector-entity mappings and records them
121
+ - `ge-write-milvus` - Takes graph embeddings mappings and records them
119
122
  in the vector embeddings store.
120
123
 
121
124
  ## LM Specific Modules
122
125
 
123
- - `llm-azure-text` - Sends request to AzureAI serverless endpoint
124
- - `llm-claude-text` - Sends request to Anthropic's API
125
- - `llm-ollama-text` - Sends request to LM running using Ollama
126
- - `llm-vertexai-text` - Sends request to model available through VertexAI API
126
+ - `text-completion-azure` - Sends request to AzureAI serverless endpoint
127
+ - `text-completion-claude` - Sends request to Anthropic's API
128
+ - `text-completion-ollama` - Sends request to LM running using Ollama
129
+ - `text-completion-vertexai` - Sends request to model available through VertexAI API
127
130
 
128
131
  ## Quickstart Guide
129
132
 
@@ -60,11 +60,13 @@ package installed can also run the entire architecture.
60
60
  chunking algorithm to produce smaller text chunks.
61
61
  - `embeddings-hf` - A service which analyses text and returns a vector
62
62
  embedding using one of the HuggingFace embeddings models.
63
+ - `embeddings-ollama` - A service which analyses text and returns a vector
64
+ embedding using an Ollama embeddings model.
63
65
  - `embeddings-vectorize` - Uses an embeddings service to get a vector
64
66
  embedding which is added to the processor payload.
65
67
  - `graph-rag` - A query service which applies a Graph RAG algorithm to
66
68
  provide a response to a text prompt.
67
- - `graph-write-cassandra` - Takes knowledge graph edges and writes them to
69
+ - `triples-write-cassandra` - Takes knowledge graph edges and writes them to
68
70
  a Cassandra store.
69
71
  - `kg-extract-definitions` - knowledge extractor - examines text and
70
72
  produces graph edges.
@@ -80,15 +82,15 @@ package installed can also run the entire architecture.
80
82
  format. For instance, the wrapping of text between lines in a PDF document
81
83
  is not semantically encoded, so the decoder will see wrapped lines as
82
84
  space-separated.
83
- - `vector-write-milvus` - Takes vector-entity mappings and records them
85
+ - `ge-write-milvus` - Takes graph embeddings mappings and records them
84
86
  in the vector embeddings store.
85
87
 
86
88
  ## LM Specific Modules
87
89
 
88
- - `llm-azure-text` - Sends request to AzureAI serverless endpoint
89
- - `llm-claude-text` - Sends request to Anthropic's API
90
- - `llm-ollama-text` - Sends request to LM running using Ollama
91
- - `llm-vertexai-text` - Sends request to model available through VertexAI API
90
+ - `text-completion-azure` - Sends request to AzureAI serverless endpoint
91
+ - `text-completion-claude` - Sends request to Anthropic's API
92
+ - `text-completion-ollama` - Sends request to LM running using Ollama
93
+ - `text-completion-vertexai` - Sends request to model available through VertexAI API
92
94
 
93
95
  ## Quickstart Guide
94
96
 
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import pyarrow.parquet as pq
4
+ import sys
5
+
6
+ for file in sys.argv[1:]:
7
+
8
+ table = pq.read_table(file).to_pandas()
9
+ print(table)
10
+
11
+
12
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from trustgraph.dump.triples.parquet import run
4
+
5
+ run()
6
+
@@ -4,7 +4,7 @@ import os
4
4
  with open("README.md", "r") as fh:
5
5
  long_description = fh.read()
6
6
 
7
- version = "0.5.1"
7
+ version = "0.5.2"
8
8
 
9
9
  setuptools.setup(
10
10
  name="trustgraph",
@@ -44,6 +44,7 @@ setuptools.setup(
44
44
  "google-cloud-aiplatform",
45
45
  "pyyaml",
46
46
  "prometheus-client",
47
+ "pyarrow",
47
48
  ],
48
49
  scripts=[
49
50
  "scripts/chunker-recursive",
@@ -66,5 +67,7 @@ setuptools.setup(
66
67
  "scripts/text-completion-ollama",
67
68
  "scripts/text-completion-vertexai",
68
69
  "scripts/triples-write-cassandra",
70
+ "scripts/dump-parquet",
71
+ "scripts/triples-dump-parquet",
69
72
  ]
70
73
  )
@@ -0,0 +1,87 @@
1
+
2
+ """
3
+ Write graphs triples to parquet files in a directory.
4
+ """
5
+
6
+ import pulsar
7
+ import base64
8
+ import os
9
+ import argparse
10
+ import time
11
+
12
+ from .... trustgraph import TrustGraph
13
+ from .... schema import Triple
14
+ from .... schema import triples_store_queue
15
+ from .... log_level import LogLevel
16
+ from .... base import Consumer
17
+
18
+ from . writer import ParquetWriter
19
+
20
+ module = ".".join(__name__.split(".")[1:-1])
21
+
22
+ default_input_queue = triples_store_queue
23
+ default_subscriber = module
24
+ default_graph_host='localhost'
25
+ default_directory = "."
26
+ default_file_template = "triples-{id}.parquet"
27
+ default_rotation_time = 60
28
+
29
+ class Processor(Consumer):
30
+
31
+ def __init__(self, **params):
32
+
33
+ input_queue = params.get("input_queue", default_input_queue)
34
+ subscriber = params.get("subscriber", default_subscriber)
35
+ directory = params.get("directory", default_directory)
36
+ file_template = params.get("file_template", default_file_template)
37
+ rotation_time = params.get("rotation_time", default_rotation_time)
38
+
39
+ super(Processor, self).__init__(
40
+ **params | {
41
+ "input_queue": input_queue,
42
+ "subscriber": subscriber,
43
+ "input_schema": Triple,
44
+ }
45
+ )
46
+
47
+ self.writer = ParquetWriter(directory, file_template, rotation_time)
48
+
49
+ def __del__(self):
50
+ if hasattr(self, "writer"):
51
+ del self.writer
52
+
53
+ def handle(self, msg):
54
+
55
+ v = msg.value()
56
+ self.writer.write(v.s.value, v.p.value, v.o.value)
57
+
58
+ @staticmethod
59
+ def add_args(parser):
60
+
61
+ Consumer.add_args(
62
+ parser, default_input_queue, default_subscriber,
63
+ )
64
+
65
+ parser.add_argument(
66
+ '-d', '--directory',
67
+ default=default_directory,
68
+ help=f'Directory to write to (default: {default_directory})'
69
+ )
70
+
71
+ parser.add_argument(
72
+ '-f', '--file-template',
73
+ default=default_file_template,
74
+ help=f'Directory to write to (default: {default_file_template})'
75
+ )
76
+
77
+ parser.add_argument(
78
+ '-t', '--rotation-time',
79
+ type=int,
80
+ default=default_rotation_time,
81
+ help=f'Rotation time / seconds (default: {default_rotation_time})'
82
+ )
83
+
84
+ def run():
85
+
86
+ Processor.start(module, __doc__)
87
+
@@ -0,0 +1,96 @@
1
+
2
+ import threading
3
+ import queue
4
+ import time
5
+ import uuid
6
+ import pyarrow as pa
7
+ import pyarrow.parquet as pq
8
+
9
+ class ParquetWriter:
10
+
11
+ def __init__(self, directory, file_template, rotation_time):
12
+ self.directory = directory
13
+ self.file_template = file_template
14
+ self.rotation_time = rotation_time
15
+
16
+ self.q = queue.Queue()
17
+
18
+ self.running = True
19
+
20
+ self.thread = threading.Thread(target=(self.writer_thread))
21
+ self.thread.start()
22
+
23
+ def writer_thread(self):
24
+
25
+ triples = []
26
+
27
+ timeout = None
28
+
29
+ while self.running:
30
+
31
+ try:
32
+
33
+ item = self.q.get(timeout=1)
34
+
35
+ if timeout == None:
36
+ timeout = time.time() + self.rotation_time
37
+
38
+ triples.append(item)
39
+
40
+ except queue.Empty:
41
+ pass
42
+
43
+ if timeout:
44
+ if time.time() > timeout:
45
+
46
+ self.write_file(triples)
47
+ timeout = None
48
+ triples = []
49
+
50
+ def write_file(self, triples):
51
+
52
+ try:
53
+
54
+ schema = pa.schema([
55
+ pa.field('s', pa.string()),
56
+ pa.field('p', pa.string()),
57
+ pa.field('o', pa.string()),
58
+ ])
59
+
60
+ fname = self.file_template.format(id=str(uuid.uuid4()))
61
+ path = f"{self.directory}/{fname}"
62
+
63
+ writer = pq.ParquetWriter(path, schema)
64
+
65
+ batch = pa.record_batch(
66
+ [
67
+ [tpl[0] for tpl in triples],
68
+ [tpl[1] for tpl in triples],
69
+ [tpl[2] for tpl in triples],
70
+ ],
71
+ names=['s', 'p', 'o']
72
+ )
73
+
74
+ writer.write_batch(batch)
75
+
76
+ writer.close()
77
+
78
+ print(f"Wrote {path}.")
79
+
80
+ except Exception as e:
81
+
82
+ print("Parquet write:", e)
83
+
84
+ def write(self, s, p, o):
85
+ self.q.put((s, p, o))
86
+
87
+ def __del__(self):
88
+
89
+ self.running = False
90
+
91
+ if hasattr(self, "q"):
92
+ self.thread.join()
93
+
94
+
95
+
96
+
@@ -0,0 +1,3 @@
1
+
2
+ from . processor import *
3
+
@@ -34,7 +34,10 @@ class Processor(ConsumerProducer):
34
34
  }
35
35
  )
36
36
 
37
- self.embeddings = EmbeddingsClient(pulsar_host=self.pulsar_host)
37
+ self.embeddings = EmbeddingsClient(
38
+ pulsar_host=self.pulsar_host,
39
+ subscriber=module + "emb",
40
+ )
38
41
 
39
42
  def emit(self, source, chunk, vectors):
40
43
 
@@ -17,14 +17,14 @@ DEBUG=_pulsar.LoggerLevel.Debug
17
17
  class EmbeddingsClient:
18
18
 
19
19
  def __init__(
20
- self, log_level=ERROR, client_id=None,
20
+ self, log_level=ERROR, subscriber=None,
21
21
  pulsar_host="pulsar://pulsar:6650",
22
22
  ):
23
23
 
24
24
  self.client = None
25
25
 
26
- if client_id == None:
27
- client_id = str(uuid.uuid4())
26
+ if subscriber == None:
27
+ subscriber = str(uuid.uuid4())
28
28
 
29
29
  self.client = pulsar.Client(
30
30
  pulsar_host,
@@ -38,7 +38,7 @@ class EmbeddingsClient:
38
38
  )
39
39
 
40
40
  self.consumer = self.client.subscribe(
41
- embeddings_response_queue, client_id,
41
+ embeddings_response_queue, subscriber,
42
42
  schema=JsonSchema(EmbeddingsResponse),
43
43
  )
44
44
 
@@ -67,6 +67,13 @@ class EmbeddingsClient:
67
67
 
68
68
  def __del__(self):
69
69
 
70
- if self.client:
71
- self.client.close()
70
+ if hasattr(self, "consumer"):
71
+ # self.consumer.unsubscribe()
72
+ self.consumer.close()
73
+
74
+ if hasattr(self, "producer"):
75
+ self.producer.flush()
76
+ self.producer.close()
77
+
78
+ self.client.close()
72
79
 
@@ -19,6 +19,7 @@ class GraphRag:
19
19
  entity_limit=50,
20
20
  triple_limit=30,
21
21
  max_subgraph_size=3000,
22
+ module="test",
22
23
  ):
23
24
 
24
25
  self.verbose=verbose
@@ -31,7 +32,10 @@ class GraphRag:
31
32
 
32
33
  self.graph = TrustGraph(graph_hosts)
33
34
 
34
- self.embeddings = EmbeddingsClient(pulsar_host=pulsar_host)
35
+ self.embeddings = EmbeddingsClient(
36
+ pulsar_host=pulsar_host,
37
+ subscriber=module + "-emb",
38
+ )
35
39
 
36
40
  self.vecstore = TripleVectors(vector_store)
37
41
 
@@ -41,7 +45,10 @@ class GraphRag:
41
45
 
42
46
  self.label_cache = {}
43
47
 
44
- self.llm = LlmClient(pulsar_host=pulsar_host)
48
+ self.llm = LlmClient(
49
+ pulsar_host=pulsar_host,
50
+ subscriber=module + "-llm",
51
+ )
45
52
 
46
53
  if self.verbose:
47
54
  print("Initialised", flush=True)
@@ -18,12 +18,12 @@ DEBUG=_pulsar.LoggerLevel.Debug
18
18
  class GraphRagClient:
19
19
 
20
20
  def __init__(
21
- self, log_level=ERROR, client_id=None,
21
+ self, log_level=ERROR, subscriber=None,
22
22
  pulsar_host="pulsar://pulsar:6650",
23
23
  ):
24
24
 
25
- if client_id == None:
26
- client_id = str(uuid.uuid4())
25
+ if subscriber == None:
26
+ subscriber = str(uuid.uuid4())
27
27
 
28
28
  self.client = pulsar.Client(
29
29
  pulsar_host,
@@ -37,7 +37,7 @@ class GraphRagClient:
37
37
  )
38
38
 
39
39
  self.consumer = self.client.subscribe(
40
- graph_rag_response_queue, client_id,
40
+ graph_rag_response_queue, subscriber,
41
41
  schema=JsonSchema(GraphRagResponse),
42
42
  )
43
43
 
@@ -66,5 +66,13 @@ class GraphRagClient:
66
66
 
67
67
  def __del__(self):
68
68
 
69
+ if hasattr(self, "consumer"):
70
+ # self.consumer.unsubscribe()
71
+ self.consumer.close()
72
+
73
+ if hasattr(self, "producer"):
74
+ self.producer.flush()
75
+ self.producer.close()
76
+
69
77
  self.client.close()
70
78
 
@@ -41,7 +41,10 @@ class Processor(ConsumerProducer):
41
41
  }
42
42
  )
43
43
 
44
- self.llm = LlmClient(pulsar_host=self.pulsar_host)
44
+ self.llm = LlmClient(
45
+ pulsar_host=self.pulsar_host,
46
+ subscriber = module + "-llm",
47
+ )
45
48
 
46
49
  def to_uri(self, text):
47
50
 
@@ -61,7 +61,10 @@ class Processor(ConsumerProducer):
61
61
  "vector_schema": GraphEmbeddings.__name__,
62
62
  })
63
63
 
64
- self.llm = LlmClient(pulsar_host=self.pulsar_host)
64
+ self.llm = LlmClient(
65
+ pulsar_host = self.pulsar_host,
66
+ subscriber = module + "-llm",
67
+ )
65
68
 
66
69
  def to_uri(self, text):
67
70
 
@@ -19,12 +19,12 @@ DEBUG=_pulsar.LoggerLevel.Debug
19
19
  class LlmClient:
20
20
 
21
21
  def __init__(
22
- self, log_level=ERROR, client_id=None,
22
+ self, log_level=ERROR, subscriber=None,
23
23
  pulsar_host="pulsar://pulsar:6650",
24
24
  ):
25
25
 
26
- if client_id == None:
27
- client_id = str(uuid.uuid4())
26
+ if subscriber == None:
27
+ subscriber = str(uuid.uuid4())
28
28
 
29
29
  self.client = pulsar.Client(
30
30
  pulsar_host,
@@ -38,7 +38,7 @@ class LlmClient:
38
38
  )
39
39
 
40
40
  self.consumer = self.client.subscribe(
41
- text_completion_response_queue, client_id,
41
+ text_completion_response_queue, subscriber,
42
42
  schema=JsonSchema(TextCompletionResponse),
43
43
  )
44
44
 
@@ -68,7 +68,13 @@ class LlmClient:
68
68
 
69
69
  def __del__(self):
70
70
 
71
- self.producer.close()
72
- self.consumer.close()
71
+ if hasattr(self, "consumer"):
72
+ # self.consumer.unsubscribe()
73
+ self.consumer.close()
74
+
75
+ if hasattr(self, "producer"):
76
+ self.producer.flush()
77
+ self.producer.close()
78
+
73
79
  self.client.close()
74
80
 
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from . write import run
4
+
5
+ if __name__ == '__main__':
6
+ run()
7
+
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: trustgraph
3
- Version: 0.5.1
3
+ Version: 0.5.2
4
4
  Summary: TrustGraph provides a means to run a pipeline of flexible AI processing components in a flexible means to achieve a processing pipeline.
5
5
  Home-page: https://github.com/trustgraph-ai/trustgraph
6
- Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.5.1.tar.gz
6
+ Download-URL: https://github.com/trustgraph-ai/trustgraph/archive/refs/tags/v0.5.2.tar.gz
7
7
  Author: trustgraph.ai
8
8
  Author-email: security@trustgraph.ai
9
9
  Classifier: Programming Language :: Python :: 3
@@ -32,6 +32,7 @@ Requires-Dist: anthropic
32
32
  Requires-Dist: google-cloud-aiplatform
33
33
  Requires-Dist: pyyaml
34
34
  Requires-Dist: prometheus-client
35
+ Requires-Dist: pyarrow
35
36
 
36
37
 
37
38
  # TrustGraph
@@ -95,11 +96,13 @@ package installed can also run the entire architecture.
95
96
  chunking algorithm to produce smaller text chunks.
96
97
  - `embeddings-hf` - A service which analyses text and returns a vector
97
98
  embedding using one of the HuggingFace embeddings models.
99
+ - `embeddings-ollama` - A service which analyses text and returns a vector
100
+ embedding using an Ollama embeddings model.
98
101
  - `embeddings-vectorize` - Uses an embeddings service to get a vector
99
102
  embedding which is added to the processor payload.
100
103
  - `graph-rag` - A query service which applies a Graph RAG algorithm to
101
104
  provide a response to a text prompt.
102
- - `graph-write-cassandra` - Takes knowledge graph edges and writes them to
105
+ - `triples-write-cassandra` - Takes knowledge graph edges and writes them to
103
106
  a Cassandra store.
104
107
  - `kg-extract-definitions` - knowledge extractor - examines text and
105
108
  produces graph edges.
@@ -115,15 +118,15 @@ package installed can also run the entire architecture.
115
118
  format. For instance, the wrapping of text between lines in a PDF document
116
119
  is not semantically encoded, so the decoder will see wrapped lines as
117
120
  space-separated.
118
- - `vector-write-milvus` - Takes vector-entity mappings and records them
121
+ - `ge-write-milvus` - Takes graph embeddings mappings and records them
119
122
  in the vector embeddings store.
120
123
 
121
124
  ## LM Specific Modules
122
125
 
123
- - `llm-azure-text` - Sends request to AzureAI serverless endpoint
124
- - `llm-claude-text` - Sends request to Anthropic's API
125
- - `llm-ollama-text` - Sends request to LM running using Ollama
126
- - `llm-vertexai-text` - Sends request to model available through VertexAI API
126
+ - `text-completion-azure` - Sends request to AzureAI serverless endpoint
127
+ - `text-completion-claude` - Sends request to Anthropic's API
128
+ - `text-completion-ollama` - Sends request to LM running using Ollama
129
+ - `text-completion-vertexai` - Sends request to model available through VertexAI API
127
130
 
128
131
  ## Quickstart Guide
129
132
 
@@ -2,6 +2,7 @@ LICENSE
2
2
  README.md
3
3
  setup.py
4
4
  scripts/chunker-recursive
5
+ scripts/dump-parquet
5
6
  scripts/embeddings-hf
6
7
  scripts/embeddings-ollama
7
8
  scripts/embeddings-vectorize
@@ -20,6 +21,7 @@ scripts/text-completion-azure
20
21
  scripts/text-completion-claude
21
22
  scripts/text-completion-ollama
22
23
  scripts/text-completion-vertexai
24
+ scripts/triples-dump-parquet
23
25
  scripts/triples-write-cassandra
24
26
  trustgraph/__init__.py
25
27
  trustgraph/embeddings_client.py
@@ -50,6 +52,12 @@ trustgraph/decoding/__init__.py
50
52
  trustgraph/decoding/pdf/__init__.py
51
53
  trustgraph/decoding/pdf/__main__.py
52
54
  trustgraph/decoding/pdf/pdf_decoder.py
55
+ trustgraph/dump/__init__.py
56
+ trustgraph/dump/triples/__init__.py
57
+ trustgraph/dump/triples/parquet/__init__.py
58
+ trustgraph/dump/triples/parquet/__main__.py
59
+ trustgraph/dump/triples/parquet/processor.py
60
+ trustgraph/dump/triples/parquet/writer.py
53
61
  trustgraph/embeddings/__init__.py
54
62
  trustgraph/embeddings/hf/__init__.py
55
63
  trustgraph/embeddings/hf/__main__.py
@@ -18,3 +18,4 @@ anthropic
18
18
  google-cloud-aiplatform
19
19
  pyyaml
20
20
  prometheus-client
21
+ pyarrow
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes