topologicpy 0.8.55__py3-none-any.whl → 0.8.58__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- topologicpy/Graph.py +568 -36
- topologicpy/Kuzu.py +950 -0
- topologicpy/Vertex.py +78 -48
- topologicpy/version.py +1 -1
- {topologicpy-0.8.55.dist-info → topologicpy-0.8.58.dist-info}/METADATA +1 -1
- {topologicpy-0.8.55.dist-info → topologicpy-0.8.58.dist-info}/RECORD +9 -8
- {topologicpy-0.8.55.dist-info → topologicpy-0.8.58.dist-info}/WHEEL +0 -0
- {topologicpy-0.8.55.dist-info → topologicpy-0.8.58.dist-info}/licenses/LICENSE +0 -0
- {topologicpy-0.8.55.dist-info → topologicpy-0.8.58.dist-info}/top_level.txt +0 -0
topologicpy/Kuzu.py
ADDED
@@ -0,0 +1,950 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
import threading, contextlib, time, json
|
3
|
+
from typing import Dict, Any, List, Optional
|
4
|
+
|
5
|
+
import os
|
6
|
+
import warnings
|
7
|
+
|
8
|
+
try:
|
9
|
+
import kuzu
|
10
|
+
except:
|
11
|
+
print("Kuzu - Installing required kuzu library.")
|
12
|
+
try:
|
13
|
+
os.system("pip install kuzu")
|
14
|
+
except:
|
15
|
+
os.system("pip install kuzu --user")
|
16
|
+
try:
|
17
|
+
import kuzu
|
18
|
+
except:
|
19
|
+
warnings.warn("Kuzu - Error: Could not import Kuzu.")
|
20
|
+
kuzu = None
|
21
|
+
|
22
|
+
|
23
|
+
class _DBCache:
|
24
|
+
"""
|
25
|
+
One kuzu.Database per path. Thread-safe and process-local.
|
26
|
+
"""
|
27
|
+
def __init__(self):
|
28
|
+
self._lock = threading.RLock()
|
29
|
+
self._cache: Dict[str, "kuzu.Database"] = {}
|
30
|
+
|
31
|
+
def get(self, path: str) -> "kuzu.Database":
|
32
|
+
if kuzu is None:
|
33
|
+
raise "Kuzu - Error: Kuzu is not available"
|
34
|
+
with self._lock:
|
35
|
+
db = self._cache.get(path)
|
36
|
+
if db is None:
|
37
|
+
db = kuzu.Database(path)
|
38
|
+
self._cache[path] = db
|
39
|
+
return db
|
40
|
+
|
41
|
+
class _WriteGate:
|
42
|
+
"""
|
43
|
+
Serialize writes to avoid IO lock contention.
|
44
|
+
"""
|
45
|
+
def __init__(self):
|
46
|
+
self._lock = threading.RLock()
|
47
|
+
|
48
|
+
@contextlib.contextmanager
|
49
|
+
def hold(self):
|
50
|
+
with self._lock:
|
51
|
+
yield
|
52
|
+
|
53
|
+
_db_cache = _DBCache()
|
54
|
+
_write_gate = _WriteGate()
|
55
|
+
|
56
|
+
class _ConnectionPool:
|
57
|
+
"""
|
58
|
+
Per-thread kuzu.Connection pool bound to a Database instance.
|
59
|
+
"""
|
60
|
+
def __init__(self, db: "kuzu.Database"):
|
61
|
+
self.db = db
|
62
|
+
self._local = threading.local()
|
63
|
+
|
64
|
+
def _ensure(self) -> "kuzu.Connection":
|
65
|
+
if not hasattr(self._local, "conn"):
|
66
|
+
self._local.conn = kuzu.Connection(self.db)
|
67
|
+
return self._local.conn
|
68
|
+
|
69
|
+
@contextlib.contextmanager
|
70
|
+
def connection(self, write: bool = False, retries: int = 5, backoff: float = 0.15):
|
71
|
+
conn = self._ensure()
|
72
|
+
if not write:
|
73
|
+
yield conn
|
74
|
+
return
|
75
|
+
# Serialize writes and retry transient failures
|
76
|
+
with _write_gate.hold():
|
77
|
+
attempt = 0
|
78
|
+
while True:
|
79
|
+
try:
|
80
|
+
yield conn
|
81
|
+
break
|
82
|
+
except Exception as e:
|
83
|
+
attempt += 1
|
84
|
+
if attempt > retries:
|
85
|
+
raise f"Kuzu write failed after {retries} retries: {e}"
|
86
|
+
time.sleep(backoff * attempt)
|
87
|
+
|
88
|
+
class _Mgr:
|
89
|
+
"""
|
90
|
+
Lightweight facade (per-db-path) providing read/write execution and schema bootstrap.
|
91
|
+
"""
|
92
|
+
def __init__(self, db_path: str):
|
93
|
+
self.db_path = db_path
|
94
|
+
self._db = _db_cache.get(db_path)
|
95
|
+
self._pool = _ConnectionPool(self._db)
|
96
|
+
|
97
|
+
@contextlib.contextmanager
|
98
|
+
def read(self):
|
99
|
+
with self._pool.connection(write=False) as c:
|
100
|
+
yield c
|
101
|
+
|
102
|
+
@contextlib.contextmanager
|
103
|
+
def write(self):
|
104
|
+
with self._pool.connection(write=True) as c:
|
105
|
+
yield c
|
106
|
+
|
107
|
+
def exec(self, query: str, params: Optional[dict] = None, write: bool = False):
|
108
|
+
with (self.write() if write else self.read()) as c:
|
109
|
+
with c.execute(query, parameters=params or {}) as res:
|
110
|
+
try:
|
111
|
+
return res.rows_as_dict().get_all()
|
112
|
+
except Exception:
|
113
|
+
return None
|
114
|
+
|
115
|
+
def ensure_schema(self):
|
116
|
+
# Node tables
|
117
|
+
self.exec("""
|
118
|
+
CREATE NODE TABLE IF NOT EXISTS Graph(
|
119
|
+
id STRING,
|
120
|
+
label STRING,
|
121
|
+
num_nodes INT64,
|
122
|
+
num_edges INT64,
|
123
|
+
props STRING,
|
124
|
+
PRIMARY KEY(id)
|
125
|
+
);
|
126
|
+
""", write=True)
|
127
|
+
self.exec("""
|
128
|
+
CREATE NODE TABLE IF NOT EXISTS Vertex(
|
129
|
+
id STRING,
|
130
|
+
graph_id STRING,
|
131
|
+
label STRING,
|
132
|
+
x DOUBLE,
|
133
|
+
y DOUBLE,
|
134
|
+
z DOUBLE,
|
135
|
+
props STRING,
|
136
|
+
PRIMARY KEY(id)
|
137
|
+
);
|
138
|
+
""", write=True)
|
139
|
+
|
140
|
+
# Relationship tables
|
141
|
+
self.exec("""
|
142
|
+
CREATE REL TABLE IF NOT EXISTS Edge(FROM Vertex TO Vertex, label STRING, props STRING);
|
143
|
+
""", write=True)
|
144
|
+
|
145
|
+
# Figure out later if we need sessions and steps
|
146
|
+
# self.exec("""
|
147
|
+
# CREATE NODE TABLE IF NOT EXISTS Session(
|
148
|
+
# id STRING,
|
149
|
+
# title STRING,
|
150
|
+
# created_at STRING,
|
151
|
+
# PRIMARY KEY(id)
|
152
|
+
# );
|
153
|
+
# """, write=True)
|
154
|
+
# self.exec("""
|
155
|
+
# CREATE NODE TABLE IF NOT EXISTS Step(
|
156
|
+
# id STRING,
|
157
|
+
# session_id STRING,
|
158
|
+
# idx INT64,
|
159
|
+
# action STRING,
|
160
|
+
# ok BOOL,
|
161
|
+
# message STRING,
|
162
|
+
# snapshot_before STRING,
|
163
|
+
# snapshot_after STRING,
|
164
|
+
# evidence STRING,
|
165
|
+
# created_at STRING,
|
166
|
+
# PRIMARY KEY(id)
|
167
|
+
# );
|
168
|
+
# """, write=True)
|
169
|
+
# self.exec("CREATE REL TABLE IF NOT EXISTS SessionHasStep(FROM Session TO Step);", write=True)
|
170
|
+
|
171
|
+
|
172
|
+
class Kuzu:
|
173
|
+
# ---------- Core (DB + Connection + Schema) ----------
|
174
|
+
@staticmethod
|
175
|
+
def EnsureSchema(manager, silent: bool = False) -> bool:
|
176
|
+
"""
|
177
|
+
Ensures the required Kùzu schema exists in the database at `path`.
|
178
|
+
|
179
|
+
Parameters
|
180
|
+
----------
|
181
|
+
manager : Kuzu.Manager
|
182
|
+
Path to the Kùzu database. It will be created if it does not exist.
|
183
|
+
silent : bool , optional
|
184
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
185
|
+
|
186
|
+
Returns
|
187
|
+
-------
|
188
|
+
bool
|
189
|
+
True if successful, False otherwise.
|
190
|
+
"""
|
191
|
+
try:
|
192
|
+
manager.ensure_schema()
|
193
|
+
return True
|
194
|
+
except Exception as e:
|
195
|
+
if not silent:
|
196
|
+
print(f"Kuzu.EnsureSchema - Error: {e}. Returning False.")
|
197
|
+
return False
|
198
|
+
|
199
|
+
@staticmethod
|
200
|
+
def Database(path: str, silent: bool = False):
|
201
|
+
"""
|
202
|
+
Returns the underlying `kuzu.Database` instance for `path`.
|
203
|
+
|
204
|
+
Parameters
|
205
|
+
----------
|
206
|
+
path : str
|
207
|
+
Path to the Kùzu database. It will be created if it does not exist.
|
208
|
+
silent : bool , optional
|
209
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
210
|
+
|
211
|
+
Returns
|
212
|
+
-------
|
213
|
+
kuzu.Database
|
214
|
+
The Kuzu database found at the path.
|
215
|
+
"""
|
216
|
+
try:
|
217
|
+
return _db_cache.get(path)
|
218
|
+
except Exception as e:
|
219
|
+
if not silent:
|
220
|
+
print(f"Kuzu.Database - Error: {e}. Returning None.")
|
221
|
+
return None
|
222
|
+
|
223
|
+
@staticmethod
|
224
|
+
def Connection(manager, silent: bool = False):
|
225
|
+
"""
|
226
|
+
Returns a `kuzu.Connection` bound to the database at `path`.
|
227
|
+
|
228
|
+
Parameters
|
229
|
+
----------
|
230
|
+
manager : Kuzu.Manager
|
231
|
+
The Manager to the Kùzu database.
|
232
|
+
silent : bool , optional
|
233
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
234
|
+
|
235
|
+
Returns
|
236
|
+
-------
|
237
|
+
kuzu.Connection
|
238
|
+
The Kuzu live connection. Do NOT use across threads.
|
239
|
+
"""
|
240
|
+
try:
|
241
|
+
with manager.read() as c:
|
242
|
+
return c # Note: returns a live connection (do not use across threads)
|
243
|
+
except Exception as e:
|
244
|
+
if not silent:
|
245
|
+
print(f"Kuzu.Connection - Error: {e}. Returning None.")
|
246
|
+
return None
|
247
|
+
|
248
|
+
@staticmethod
|
249
|
+
def Manager(path: str, silent: bool = False):
|
250
|
+
"""
|
251
|
+
Returns a lightweight manager bound to the database at `path`.
|
252
|
+
Parameters
|
253
|
+
----------
|
254
|
+
path : str
|
255
|
+
Path to the Kùzu database. It will be created if it does not exist.
|
256
|
+
silent : bool , optional
|
257
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
258
|
+
|
259
|
+
Returns
|
260
|
+
-------
|
261
|
+
Kuzu.Manager
|
262
|
+
The Kuzu Manager.
|
263
|
+
"""
|
264
|
+
try:
|
265
|
+
return _Mgr(path)
|
266
|
+
except Exception as e:
|
267
|
+
if not silent:
|
268
|
+
print(f"Kuzu.Manager - Error: {e}. Returning None.")
|
269
|
+
return None
|
270
|
+
|
271
|
+
@staticmethod
|
272
|
+
def UpsertGraph(manager,
|
273
|
+
graph,
|
274
|
+
graphIDKey: str = None,
|
275
|
+
vertexIDKey: str = None,
|
276
|
+
vertexLabelKey: str = None,
|
277
|
+
mantissa: int = 6,
|
278
|
+
silent: bool = False) -> str:
|
279
|
+
"""
|
280
|
+
Upserts (deletes prior + inserts new) a TopologicPy graph.
|
281
|
+
|
282
|
+
Parameters
|
283
|
+
----------
|
284
|
+
manager : Kuzu.Manager
|
285
|
+
The Kuzu database manager.
|
286
|
+
graph : topologicpy.Graph
|
287
|
+
The input TopologicPy graph.
|
288
|
+
graphIDKey : str , optional
|
289
|
+
The graph dictionary key under which the graph ID is stored. If None, a UUID is generated and stored under 'id'.
|
290
|
+
vertexIDKey : str , optional
|
291
|
+
The vertex dictionary key under which the vertex ID is stored. If None, a UUID is generated and stored under 'id'.
|
292
|
+
edgeIDKey : str , optional
|
293
|
+
The edge dictionary key under which the edge ID is stored. If None, a UUID is generated and stored under 'id'.
|
294
|
+
silent : bool , optional
|
295
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
296
|
+
|
297
|
+
Returns
|
298
|
+
-------
|
299
|
+
str
|
300
|
+
The graph_id used.
|
301
|
+
"""
|
302
|
+
from topologicpy.Graph import Graph
|
303
|
+
from topologicpy.Topology import Topology
|
304
|
+
from topologicpy.Dictionary import Dictionary
|
305
|
+
|
306
|
+
d = Topology.Dictionary(graph)
|
307
|
+
if graphIDKey is None:
|
308
|
+
gid = Topology.UUID(graph)
|
309
|
+
else:
|
310
|
+
gid = Dictionary.ValueAtKey(d, graphIDKey, Topology.UUID(graph))
|
311
|
+
g_props = Dictionary.PythonDictionary(d)
|
312
|
+
mesh_data = Graph.MeshData(graph, mantissa=mantissa)
|
313
|
+
verts = mesh_data['vertices']
|
314
|
+
v_props = mesh_data['vertexDictionaries']
|
315
|
+
edges = mesh_data['edges']
|
316
|
+
e_props = mesh_data['edgeDictionaries']
|
317
|
+
num_nodes = len(verts)
|
318
|
+
num_edges = len(edges)
|
319
|
+
try:
|
320
|
+
manager.ensure_schema()
|
321
|
+
# Upsert Graph
|
322
|
+
manager.exec("MATCH (g:Graph) WHERE g.id = $id DELETE g;", {"id": gid}, write=True)
|
323
|
+
manager.exec("""
|
324
|
+
CREATE (g:Graph {id:$id, num_nodes:$num_nodes, num_edges: $num_edges, props:$props});
|
325
|
+
""", {"id": gid, "num_nodes": num_nodes, "num_edges": num_edges, "props": json.dumps(g_props)}, write=True)
|
326
|
+
|
327
|
+
# Remove existing vertices/edges for this graph_id
|
328
|
+
manager.exec("""
|
329
|
+
MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
|
330
|
+
WHERE a.graph_id = $gid AND b.graph_id = $gid
|
331
|
+
DELETE r;
|
332
|
+
""", {"gid": gid}, write=True)
|
333
|
+
manager.exec("MATCH (v:Vertex) WHERE v.graph_id = $gid DELETE v;", {"gid": gid}, write=True)
|
334
|
+
|
335
|
+
# Insert vertices
|
336
|
+
for i, v in enumerate(verts):
|
337
|
+
x,y,z = v
|
338
|
+
if vertexIDKey is None:
|
339
|
+
vid = f"{gid}:{i}"
|
340
|
+
else:
|
341
|
+
vid = v_props[i].get(vertexIDKey, f"{gid}:{i}")
|
342
|
+
if vertexLabelKey is None:
|
343
|
+
label = str(i)
|
344
|
+
else:
|
345
|
+
label = v_props[i].get(vertexIDKey, str(i))
|
346
|
+
manager.exec("""
|
347
|
+
CREATE (v:Vertex {id:$id, graph_id:$gid, label:$label, props:$props, x:$x, y:$y, z:$z});
|
348
|
+
""", {"id": vid, "gid": gid, "label": label, "x": x, "y": y, "z": z,
|
349
|
+
"props": json.dumps(v_props[i])}, write=True)
|
350
|
+
|
351
|
+
# Insert edges
|
352
|
+
for i, e in enumerate(edges):
|
353
|
+
a_id = v_props[e[0]].get(vertexIDKey, f"{gid}:{e[0]}")
|
354
|
+
b_id = v_props[e[1]].get(vertexIDKey, f"{gid}:{e[1]}")
|
355
|
+
manager.exec("""
|
356
|
+
MATCH (a:Vertex {id:$a}), (b:Vertex {id:$b})
|
357
|
+
CREATE (a)-[:Edge {label:$label, props:$props}]->(b);
|
358
|
+
""", {"a": a_id, "b": b_id,
|
359
|
+
"label": e_props[i].get("label", str(i)),
|
360
|
+
"props": json.dumps(e_props[i])}, write=True)
|
361
|
+
|
362
|
+
return gid
|
363
|
+
except Exception as e:
|
364
|
+
if not silent:
|
365
|
+
print(f"Kuzu.UpsertGraph - Error: {e}. Returning None.")
|
366
|
+
return None
|
367
|
+
|
368
|
+
@staticmethod
|
369
|
+
def GraphByID(manager, graphID: str, silent: bool = False):
|
370
|
+
"""
|
371
|
+
Constructs a TopologicPy graph from from Kùzu using the graphID input parameter.
|
372
|
+
|
373
|
+
Parameters
|
374
|
+
----------
|
375
|
+
manager : Kuzu.Manager
|
376
|
+
The manager of the Kùzu database.
|
377
|
+
graphID : str , optional
|
378
|
+
The graph ID to retrieve from Kùzu.
|
379
|
+
silent : bool , optional
|
380
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
381
|
+
|
382
|
+
Returns
|
383
|
+
-------
|
384
|
+
topologicpy.Graph
|
385
|
+
A new TopologicPy Graph, or None on error.
|
386
|
+
"""
|
387
|
+
import random
|
388
|
+
from topologicpy.Graph import Graph
|
389
|
+
from topologicpy.Dictionary import Dictionary
|
390
|
+
from topologicpy.Vertex import Vertex
|
391
|
+
from topologicpy.Edge import Edge
|
392
|
+
from topologicpy.Topology import Topology
|
393
|
+
|
394
|
+
try:
|
395
|
+
manager.ensure_schema()
|
396
|
+
# Read the Graph
|
397
|
+
g = manager.exec("""
|
398
|
+
MATCH (g:Graph) WHERE g.id = $id
|
399
|
+
RETURN g.id AS id, g.num_nodes AS num_nodes, g.num_edges AS num_edges, g.props AS props
|
400
|
+
;
|
401
|
+
""", {"id": graphID}, write=False) or None
|
402
|
+
if g is None:
|
403
|
+
return None
|
404
|
+
g = g[0]
|
405
|
+
g_dict = dict(json.loads(g.get("props") or "{}") or {})
|
406
|
+
g_dict = Dictionary.ByPythonDictionary(g_dict)
|
407
|
+
# Read vertices
|
408
|
+
rows_v = manager.exec("""
|
409
|
+
MATCH (v:Vertex) WHERE v.graph_id = $gid
|
410
|
+
RETURN v.id AS id, v.label AS label, v.x AS x, v.y AS y, v.z AS z, v.props AS props
|
411
|
+
ORDER BY id;
|
412
|
+
""", {"gid": graphID}, write=False) or []
|
413
|
+
|
414
|
+
id_to_vertex = {}
|
415
|
+
vertices = []
|
416
|
+
for row in rows_v:
|
417
|
+
try:
|
418
|
+
x = row.get("x", random.uniform(0,1000))
|
419
|
+
y = row.get("y", random.uniform(0,1000))
|
420
|
+
z = row.get("z", random.uniform(0,1000))
|
421
|
+
except:
|
422
|
+
x = random.uniform(0,1000)
|
423
|
+
y = random.uniform(0,1000)
|
424
|
+
z = random.uniform(0,1000)
|
425
|
+
v = Vertex.ByCoordinates(x,y,z)
|
426
|
+
props = {}
|
427
|
+
try:
|
428
|
+
props = json.loads(row.get("props") or "{}")
|
429
|
+
except Exception:
|
430
|
+
props = {}
|
431
|
+
# Ensure 'label' key present
|
432
|
+
props = dict(props or {})
|
433
|
+
if "label" not in props:
|
434
|
+
props["label"] = row.get("label") or ""
|
435
|
+
d = Dictionary.ByKeysValues(list(props.keys()), list(props.values()))
|
436
|
+
v = Topology.SetDictionary(v, d)
|
437
|
+
id_to_vertex[row["id"]] = v
|
438
|
+
vertices.append(v)
|
439
|
+
|
440
|
+
# Read edges
|
441
|
+
rows_e = manager.exec("""
|
442
|
+
MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
|
443
|
+
WHERE a.graph_id = $gid AND b.graph_id = $gid
|
444
|
+
RETURN a.id AS a_id, b.id AS b_id, r.label AS label, r.props AS props;
|
445
|
+
""", {"gid": graphID}, write=False) or []
|
446
|
+
edges = []
|
447
|
+
for row in rows_e:
|
448
|
+
va = id_to_vertex.get(row["a_id"])
|
449
|
+
vb = id_to_vertex.get(row["b_id"])
|
450
|
+
if not va or not vb:
|
451
|
+
continue
|
452
|
+
e = Edge.ByStartVertexEndVertex(va, vb)
|
453
|
+
props = {}
|
454
|
+
try:
|
455
|
+
props = json.loads(row.get("props") or "{}")
|
456
|
+
except Exception:
|
457
|
+
props = {}
|
458
|
+
props = dict(props or {})
|
459
|
+
if "label" not in props:
|
460
|
+
props["label"] = row.get("label") or "connect"
|
461
|
+
d = Dictionary.ByKeysValues(list(props.keys()), list(props.values()))
|
462
|
+
e = Topology.SetDictionary(e, d)
|
463
|
+
edges.append(e)
|
464
|
+
if len(vertices) > 0:
|
465
|
+
g = Graph.ByVerticesEdges(vertices, edges)
|
466
|
+
g = Topology.SetDictionary(g, g_dict)
|
467
|
+
else:
|
468
|
+
g = None
|
469
|
+
return g
|
470
|
+
except Exception as e:
|
471
|
+
if not silent:
|
472
|
+
print(f"Kuzu.GraphByID - Error: {e}. Returning None.")
|
473
|
+
return None
|
474
|
+
|
475
|
+
@staticmethod
|
476
|
+
def GraphsByQuery(
|
477
|
+
manager,
|
478
|
+
query: str,
|
479
|
+
params: dict | None = None,
|
480
|
+
silent: bool = False,
|
481
|
+
):
|
482
|
+
"""
|
483
|
+
Executes a Kùzu Cypher query and returns a list of TopologicPy Graphs.
|
484
|
+
|
485
|
+
The method will:
|
486
|
+
1) run the query,
|
487
|
+
2) extract distinct graph IDs from the result set.
|
488
|
+
3) reconstruct each graph via Kuzu.GraphByID(...).
|
489
|
+
|
490
|
+
Parameters
|
491
|
+
----------
|
492
|
+
manager : Kuzu.Manager
|
493
|
+
The manager of the Kùzu database.
|
494
|
+
query : str
|
495
|
+
A valid Kùzu Cypher query.
|
496
|
+
params : dict , optional
|
497
|
+
Parameters to pass with the query.
|
498
|
+
silent : bool , optional
|
499
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
500
|
+
|
501
|
+
Returns
|
502
|
+
-------
|
503
|
+
list[topologic_core.Graph]
|
504
|
+
A list of reconstructed TopologicPy graphs.
|
505
|
+
|
506
|
+
"""
|
507
|
+
|
508
|
+
try:
|
509
|
+
manager.ensure_schema()
|
510
|
+
rows = manager.exec(query, params or {}, write=False) or []
|
511
|
+
|
512
|
+
# Collect distinct graph IDs
|
513
|
+
gids = []
|
514
|
+
for r in rows:
|
515
|
+
gid = r.get('graph_id')
|
516
|
+
|
517
|
+
# Fallback: try to infer from common id fields like "<graph_id>:<i>"
|
518
|
+
if gid is None:
|
519
|
+
for k in ("a_id", "b_id", "id"):
|
520
|
+
v = r.get(k)
|
521
|
+
if isinstance(v, str) and ":" in v:
|
522
|
+
gid = v.split(":", 1)[0]
|
523
|
+
break
|
524
|
+
|
525
|
+
if gid and gid not in gids:
|
526
|
+
gids.append(gid)
|
527
|
+
|
528
|
+
# Reconstruct each graph
|
529
|
+
graphs = []
|
530
|
+
for gid in gids:
|
531
|
+
g = Kuzu.GraphByID(path, gid, silent=silent)
|
532
|
+
if g is not None:
|
533
|
+
graphs.append(g)
|
534
|
+
return graphs
|
535
|
+
|
536
|
+
except Exception as e:
|
537
|
+
if not silent:
|
538
|
+
print(f"Kuzu.GraphsByQuery - Error: {e}. Returning None.")
|
539
|
+
return None
|
540
|
+
|
541
|
+
@staticmethod
|
542
|
+
def DeleteGraph(manager, graphID: str, silent: bool = False) -> bool:
|
543
|
+
"""
|
544
|
+
Deletes a graph (vertices, edges, and graphCard) by id.
|
545
|
+
|
546
|
+
Parameters
|
547
|
+
----------
|
548
|
+
manager : Kuzu.Manager
|
549
|
+
The manager of the Kùzu database.
|
550
|
+
graphID : str
|
551
|
+
The id of the graph to be deleted.
|
552
|
+
silent : bool , optional
|
553
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
554
|
+
|
555
|
+
Returns
|
556
|
+
-------
|
557
|
+
bool
|
558
|
+
True on success, False otherwise.
|
559
|
+
"""
|
560
|
+
try:
|
561
|
+
manager.ensure_schema()
|
562
|
+
# Delete edges
|
563
|
+
manager.exec("""
|
564
|
+
MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
|
565
|
+
WHERE a.graph_id = $gid AND b.graph_id = $gid
|
566
|
+
DELETE r;
|
567
|
+
""", {"gid": graphID}, write=True)
|
568
|
+
# Delete vertices
|
569
|
+
manager.exec("MATCH (v:Vertex) WHERE v.graph_id = $gid DELETE v;", {"gid": graphID}, write=True)
|
570
|
+
# Delete card
|
571
|
+
manager.exec("MATCH (g:Graph) WHERE g.id = $gid DELETE g;", {"gid": graphID}, write=True)
|
572
|
+
return True
|
573
|
+
except Exception as e:
|
574
|
+
if not silent:
|
575
|
+
print(f"Kuzu.DeleteGraph - Error: {e}. Returning False.")
|
576
|
+
return False
|
577
|
+
|
578
|
+
@staticmethod
|
579
|
+
def EmptyDatabase(manager, dropSchema: bool = False, recreateSchema: bool = True, silent: bool = False) -> bool:
|
580
|
+
"""
|
581
|
+
Empties the Kùzu database at `db_path`.
|
582
|
+
|
583
|
+
Two modes:
|
584
|
+
- Soft clear (default): delete ALL relationships, then ALL nodes across all tables.
|
585
|
+
- Hard reset (drop_schema=True): drop known node/rel tables, optionally recreate schema.
|
586
|
+
|
587
|
+
Parameters
|
588
|
+
----------
|
589
|
+
manager : Kuzu Manager
|
590
|
+
The manager of the Kùzu database.
|
591
|
+
dropSchema : bool , optional
|
592
|
+
If True, DROP the known tables instead of deleting rows. Default False.
|
593
|
+
recreateSchema : bool , optional
|
594
|
+
If True and drop_schema=True, re-create the minimal schema after dropping. Default True.
|
595
|
+
silent : bool , optional
|
596
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
597
|
+
|
598
|
+
Returns
|
599
|
+
-------
|
600
|
+
bool
|
601
|
+
True on success, False otherwise.
|
602
|
+
"""
|
603
|
+
try:
|
604
|
+
manager.ensure_schema()
|
605
|
+
|
606
|
+
if dropSchema:
|
607
|
+
# Drop relationship tables FIRST (to release dependencies), then node tables.
|
608
|
+
# IF EXISTS is convenient; if your Kùzu version doesn't support it, remove and ignore exceptions.
|
609
|
+
for stmt in [
|
610
|
+
"DROP TABLE IF EXISTS Edge;",
|
611
|
+
"DROP TABLE IF EXISTS Vertex;",
|
612
|
+
"DROP TABLE IF EXISTS Graph;",
|
613
|
+
]:
|
614
|
+
try:
|
615
|
+
manager.exec(stmt, write=True)
|
616
|
+
except Exception as _e:
|
617
|
+
if not silent:
|
618
|
+
print(f"Kuzu.EmptyDatabase - Warning dropping table: {_e}")
|
619
|
+
|
620
|
+
if recreateSchema:
|
621
|
+
manager.ensure_schema()
|
622
|
+
return True
|
623
|
+
|
624
|
+
# Soft clear: remove all relationships, then all nodes (covers all labels/tables).
|
625
|
+
# Delete all edges (any direction)
|
626
|
+
manager.exec("MATCH (a)-[r]->(b) DELETE r;", write=True)
|
627
|
+
# Delete all nodes (from all node tables)
|
628
|
+
manager.exec("MATCH (n) DELETE n;", write=True)
|
629
|
+
return True
|
630
|
+
|
631
|
+
except Exception as e:
|
632
|
+
if not silent:
|
633
|
+
print(f"Kuzu.EmptyDatabase - Error: {e}. Returning False.")
|
634
|
+
return False
|
635
|
+
|
636
|
+
@staticmethod
|
637
|
+
def ListGraphs(manager, where: dict = None, limit: int = 100, offset: int = 0, silent: bool = False) -> list[dict]:
|
638
|
+
"""
|
639
|
+
Lists Graph metadata with simple filtering and pagination.
|
640
|
+
|
641
|
+
Parameters
|
642
|
+
----------
|
643
|
+
manager : Kuzu.Manager
|
644
|
+
The manager of the Kùzu database.
|
645
|
+
where : dict , optional
|
646
|
+
The filter python dictionaries. Supported filters in `where` (all optional):
|
647
|
+
- id (exact match)
|
648
|
+
- label (substring match)
|
649
|
+
- props_contains (substring match against JSON/text in `props`)
|
650
|
+
- props_equals (exact string match against `props`)
|
651
|
+
- min_nodes / max_nodes (integers)
|
652
|
+
- min_edges / max_edges (integers)
|
653
|
+
limit : int , optional
|
654
|
+
The desired limit of returned Graphs. Default is 100.
|
655
|
+
offset : int , optional
|
656
|
+
The desired offset of the returned Graphs (skips the first number of Graphs specified by the offset and returns the remaining cards up to the specified limit). The offset is useful if pagination is needed. Default is 0.
|
657
|
+
silent : bool , optional
|
658
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
659
|
+
|
660
|
+
Returns
|
661
|
+
-------
|
662
|
+
list
|
663
|
+
The list of found Graph python dictionaries.
|
664
|
+
|
665
|
+
"""
|
666
|
+
|
667
|
+
manager.ensure_schema()
|
668
|
+
where = where or {}
|
669
|
+
|
670
|
+
conds: list[str] = []
|
671
|
+
params: dict = {}
|
672
|
+
|
673
|
+
if "id" in where and where["id"]:
|
674
|
+
conds.append("g.id = $id")
|
675
|
+
params["id"] = str(where["id"])
|
676
|
+
|
677
|
+
if "label" in where and where["label"]:
|
678
|
+
# Cypher-style infix CONTAINS
|
679
|
+
conds.append("g.label CONTAINS $label_sub")
|
680
|
+
params["label_sub"] = str(where["label"])
|
681
|
+
|
682
|
+
if "props_contains" in where and where["props_contains"]:
|
683
|
+
conds.append("g.props CONTAINS $props_sub")
|
684
|
+
params["props_sub"] = str(where["props_contains"])
|
685
|
+
|
686
|
+
if "props_equals" in where and where["props_equals"]:
|
687
|
+
conds.append("g.props = $props_equals")
|
688
|
+
params["props_equals"] = str(where["props_equals"])
|
689
|
+
|
690
|
+
if "min_nodes" in where and where["min_nodes"] is not None:
|
691
|
+
conds.append("g.num_nodes >= $min_nodes")
|
692
|
+
params["min_nodes"] = int(where["min_nodes"])
|
693
|
+
|
694
|
+
if "max_nodes" in where and where["max_nodes"] is not None:
|
695
|
+
conds.append("g.num_nodes <= $max_nodes")
|
696
|
+
params["max_nodes"] = int(where["max_nodes"])
|
697
|
+
|
698
|
+
if "min_edges" in where and where["min_edges"] is not None:
|
699
|
+
conds.append("g.num_edges >= $min_edges")
|
700
|
+
params["min_edges"] = int(where["min_edges"])
|
701
|
+
|
702
|
+
if "max_edges" in where and where["max_edges"] is not None:
|
703
|
+
conds.append("g.num_edges <= $max_edges")
|
704
|
+
params["max_edges"] = int(where["max_edges"])
|
705
|
+
|
706
|
+
where_clause = ("WHERE " + " AND ".join(conds)) if conds else ""
|
707
|
+
q = f"""
|
708
|
+
MATCH (g:Graph)
|
709
|
+
{where_clause}
|
710
|
+
RETURN g.id AS id, g.label AS label,
|
711
|
+
g.num_nodes AS num_nodes, g.num_edges AS num_edges,
|
712
|
+
g.props AS props
|
713
|
+
ORDER BY id
|
714
|
+
SKIP $__offset LIMIT $__limit;
|
715
|
+
"""
|
716
|
+
|
717
|
+
params["__offset"] = max(0, int(offset or 0))
|
718
|
+
params["__limit"] = max(0, int(limit or 100))
|
719
|
+
|
720
|
+
return manager.exec(q, params, write=False) or []
|
721
|
+
|
722
|
+
|
723
|
+
@staticmethod
|
724
|
+
def ByCSVPath(
|
725
|
+
manager,
|
726
|
+
path: str,
|
727
|
+
graphIDPrefix: str = "g",
|
728
|
+
graphIDHeader="graph_id",
|
729
|
+
graphLabelHeader="label",
|
730
|
+
edgeSRCHeader="src_id",
|
731
|
+
edgeDSTHeader="dst_id",
|
732
|
+
edgeLabelHeader="label",
|
733
|
+
nodeIDHeader="node_id",
|
734
|
+
nodeLabelHeader="label",
|
735
|
+
nodeXHeader="X",
|
736
|
+
nodeYHeader="Y",
|
737
|
+
nodeZHeader="Z",
|
738
|
+
silent: bool = False,
|
739
|
+
) -> Dict[str, Any]:
|
740
|
+
"""
|
741
|
+
Load node/edge/graph CSVs from a folder (using its .yaml meta) and upsert them
|
742
|
+
directly into Kùzu using the schema defined in Kuzu.py:
|
743
|
+
|
744
|
+
- NODE TABLE Graph(id STRING PRIMARY KEY, label STRING, num_nodes INT64, num_edges INT64, props STRING)
|
745
|
+
- NODE TABLE Vertex(id STRING PRIMARY KEY, graph_id STRING, label STRING, x DOUBLE, y DOUBLE, z DOUBLE, props STRING)
|
746
|
+
- REL TABLE Edge(FROM Vertex TO Vertex, label STRING, props STRING)
|
747
|
+
|
748
|
+
Parameters
|
749
|
+
----------
|
750
|
+
manager : Kuzu.Manager
|
751
|
+
An initialized Kùzu manager; must provide ensure_schema() and exec(query, params, write=True/False).
|
752
|
+
path : str
|
753
|
+
Folder containing a dataset YAML (e.g., meta.yaml) that points to nodes/edges/graphs CSVs.
|
754
|
+
graphIDPrefix : str
|
755
|
+
Prefix for materialized graph IDs (default "g"); e.g., graph 0 -> "g0".
|
756
|
+
graphIDHeader : str , optional
|
757
|
+
The column header string used to specify the graph id. Default is "graph_id".
|
758
|
+
graphLabelHeader : str , optional
|
759
|
+
The column header string used to specify the graph label. Default is "label".
|
760
|
+
edgeSRCHeader : str , optional
|
761
|
+
The column header string used to specify the source vertex id of edges. Default is "src_id".
|
762
|
+
edgeDSTHeader : str , optional
|
763
|
+
The column header string used to specify the destination vertex id of edges. Default is "dst_id".
|
764
|
+
edgeLabelHeader : str , optional
|
765
|
+
The column header string used to specify the label of edges. Default is "label".
|
766
|
+
nodeIDHeader : str , optional
|
767
|
+
The column header string used to specify the id of nodes. Default is "node_id".
|
768
|
+
nodeLabelHeader : str , optional
|
769
|
+
The column header string used to specify the label of nodes. Default is "label".
|
770
|
+
nodeXHeader : str , optional
|
771
|
+
The column header string used to specify the X coordinate of nodes. Default is "X".
|
772
|
+
nodeYHeader : str , optional
|
773
|
+
The column header string used to specify the Y coordinate of nodes. Default is "Y".
|
774
|
+
nodeZHeader : str , optional
|
775
|
+
The column header string used to specify the Z coordinate of nodes. Default is "Z".
|
776
|
+
silent : bool
|
777
|
+
If True, suppress warnings.
|
778
|
+
|
779
|
+
Returns
|
780
|
+
-------
|
781
|
+
dict
|
782
|
+
{"graphs_upserted": int, "graph_ids": [str, ...]}
|
783
|
+
"""
|
784
|
+
import os
|
785
|
+
import glob
|
786
|
+
import json
|
787
|
+
import numbers
|
788
|
+
import pandas as pd
|
789
|
+
import yaml
|
790
|
+
import random
|
791
|
+
|
792
|
+
# ---------- Helpers (mirroring your CSV loader’s patterns) ----------
|
793
|
+
def _find_yaml_files(folder_path: str):
|
794
|
+
return glob.glob(os.path.join(folder_path, "*.yaml"))
|
795
|
+
|
796
|
+
def _read_yaml(file_path: str):
|
797
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
798
|
+
data = yaml.safe_load(f) or {}
|
799
|
+
edge_data = data.get("edge_data", [])
|
800
|
+
node_data = data.get("node_data", [])
|
801
|
+
graph_data = data.get("graph_data", {})
|
802
|
+
edges_rel = edge_data[0].get("file_name") if edge_data else None
|
803
|
+
nodes_rel = node_data[0].get("file_name") if node_data else None
|
804
|
+
graphs_rel = graph_data.get("file_name")
|
805
|
+
return graphs_rel, edges_rel, nodes_rel
|
806
|
+
|
807
|
+
def _props_from_row(row: pd.Series, exclude: set) -> str:
|
808
|
+
d = {}
|
809
|
+
for k, v in row.items():
|
810
|
+
if k in exclude:
|
811
|
+
continue
|
812
|
+
# normalize NaN -> None for clean JSON
|
813
|
+
if isinstance(v, float) and pd.isna(v):
|
814
|
+
d[k] = None
|
815
|
+
else:
|
816
|
+
d[k] = v
|
817
|
+
try:
|
818
|
+
return json.dumps(d, ensure_ascii=False)
|
819
|
+
except Exception:
|
820
|
+
# Fallback: stringify everything
|
821
|
+
return json.dumps({k: (None if v is None else str(v)) for k, v in d.items()}, ensure_ascii=False)
|
822
|
+
|
823
|
+
# ---------- Validate path and locate YAML/CSVs ----------
|
824
|
+
if not os.path.exists(path) or not os.path.isdir(path):
|
825
|
+
if not silent:
|
826
|
+
print("ByCSVPath - Error: path must be an existing folder. Returning None.")
|
827
|
+
return None
|
828
|
+
|
829
|
+
yaml_files = _find_yaml_files(path)
|
830
|
+
if len(yaml_files) < 1:
|
831
|
+
if not silent:
|
832
|
+
print("ByCSVPath - Error: no YAML file found in the folder. Returning None.")
|
833
|
+
return None
|
834
|
+
yaml_file = yaml_files[0]
|
835
|
+
graphs_rel, edges_rel, nodes_rel = _read_yaml(yaml_file)
|
836
|
+
|
837
|
+
# Resolve CSV paths
|
838
|
+
graphs_csv = os.path.join(path, graphs_rel) if graphs_rel else None
|
839
|
+
edges_csv = os.path.join(path, edges_rel) if edges_rel else None
|
840
|
+
nodes_csv = os.path.join(path, nodes_rel) if nodes_rel else None
|
841
|
+
|
842
|
+
if not edges_csv or not os.path.exists(edges_csv):
|
843
|
+
if not silent:
|
844
|
+
print("ByCSVPath - Error: edges CSV not found. Returning None.")
|
845
|
+
return None
|
846
|
+
if not nodes_csv or not os.path.exists(nodes_csv):
|
847
|
+
if not silent:
|
848
|
+
print("ByCSVPath - Error: nodes CSV not found. Returning None.")
|
849
|
+
return None
|
850
|
+
|
851
|
+
# ---------- Load CSVs ----------
|
852
|
+
nodes_df = pd.read_csv(nodes_csv)
|
853
|
+
edges_df = pd.read_csv(edges_csv)
|
854
|
+
graphs_df = pd.read_csv(graphs_csv) if graphs_csv and os.path.exists(graphs_csv) else pd.DataFrame()
|
855
|
+
|
856
|
+
# Required columns
|
857
|
+
for req_cols, df_name, df in [
|
858
|
+
({graphIDHeader, nodeIDHeader}, "nodes", nodes_df),
|
859
|
+
({graphIDHeader, edgeSRCHeader, edgeDSTHeader}, "edges", edges_df),
|
860
|
+
]:
|
861
|
+
missing = req_cols.difference(df.columns)
|
862
|
+
if missing:
|
863
|
+
raise ValueError(f"ByCSVPath - {df_name}.csv is missing required columns: {missing}")
|
864
|
+
|
865
|
+
# Graph IDs present in the data
|
866
|
+
gids = pd.Index([]).union(nodes_df[graphIDHeader].dropna().unique()).union(
|
867
|
+
edges_df[graphIDHeader].dropna().unique()
|
868
|
+
)
|
869
|
+
|
870
|
+
# Prepare graphs_df lookup if provided
|
871
|
+
graphs_by_gid = {}
|
872
|
+
if graphIDHeader in graphs_df.columns:
|
873
|
+
graphs_by_gid = {gid: g.iloc[0].to_dict() for gid, g in graphs_df.groupby(graphIDHeader, dropna=False)}
|
874
|
+
|
875
|
+
# ---------- Ensure schema ----------
|
876
|
+
manager.ensure_schema() # Graph, Vertex, Edge
|
877
|
+
|
878
|
+
# ---------- Upsert per graph ----------
|
879
|
+
materialized_graph_ids = []
|
880
|
+
for raw_gid in gids:
|
881
|
+
gid_str = f"{graphIDPrefix}{int(raw_gid) if str(raw_gid).isdigit() else str(raw_gid)}"
|
882
|
+
materialized_graph_ids.append(gid_str)
|
883
|
+
|
884
|
+
nsub = nodes_df[nodes_df[graphIDHeader] == raw_gid].copy()
|
885
|
+
esub = edges_df[edges_df[graphIDHeader] == raw_gid].copy()
|
886
|
+
|
887
|
+
# Graph info
|
888
|
+
gcard_src = graphs_by_gid.get(raw_gid, {})
|
889
|
+
g_label = str(gcard_src.get(graphLabelHeader, "")) if gcard_src else ""
|
890
|
+
g_props = _props_from_row(pd.Series(gcard_src), exclude={graphIDHeader, graphLabelHeader}) if gcard_src else "{}"
|
891
|
+
num_nodes = int(nsub.shape[0])
|
892
|
+
num_edges = int(esub.shape[0])
|
893
|
+
|
894
|
+
# Remove any existing data for this graph id, then re-insert
|
895
|
+
manager.exec("""
|
896
|
+
MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
|
897
|
+
WHERE a.graph_id = $gid AND b.graph_id = $gid
|
898
|
+
DELETE r;
|
899
|
+
""", {"gid": gid_str}, write=True)
|
900
|
+
manager.exec("MATCH (v:Vertex) WHERE v.graph_id = $gid DELETE v;", {"gid": gid_str}, write=True)
|
901
|
+
manager.exec("MATCH (g:Graph) WHERE g.id = $gid DELETE g;", {"gid": gid_str}, write=True)
|
902
|
+
|
903
|
+
manager.exec("""
|
904
|
+
CREATE (g:Graph {id:$id, label:$label, num_nodes:$num_nodes, num_edges:$num_edges, props:$props});
|
905
|
+
""", {
|
906
|
+
"id": gid_str,
|
907
|
+
"label": g_label,
|
908
|
+
"num_nodes": num_nodes,
|
909
|
+
"num_edges": num_edges,
|
910
|
+
"props": g_props,
|
911
|
+
}, write=True)
|
912
|
+
|
913
|
+
# Insert vertices
|
914
|
+
for _, row in nsub.iterrows():
|
915
|
+
node_id = row[nodeIDHeader]
|
916
|
+
vid = f"{gid_str}:{node_id}"
|
917
|
+
v_label = str(row[nodeLabelHeader]) if "label" in row and pd.notna(row[nodeLabelHeader]) else str(node_id)
|
918
|
+
|
919
|
+
# X/Y/Z may be missing or non-numeric; store a random numeric value in that case
|
920
|
+
def _num_or_none(val):
|
921
|
+
try:
|
922
|
+
return float(val)
|
923
|
+
except Exception:
|
924
|
+
return None
|
925
|
+
|
926
|
+
x = _num_or_none(row[nodeXHeader]) if nodeXHeader in row else random.uniform(0,1000)
|
927
|
+
y = _num_or_none(row[nodeYHeader]) if nodeYHeader in row else random.uniform(0,1000)
|
928
|
+
z = _num_or_none(row[nodeZHeader]) if nodeZHeader in row else random.uniform(0,1000)
|
929
|
+
|
930
|
+
props = _props_from_row(row, exclude={graphIDHeader, nodeIDHeader, nodeLabelHeader, nodeXHeader, nodeYHeader, nodeZHeader})
|
931
|
+
manager.exec("""
|
932
|
+
CREATE (v:Vertex {id:$id, graph_id:$gid, label:$label, x:$x, y:$y, z:$z, props:$props});
|
933
|
+
""", {"id": vid, "gid": gid_str, "label": v_label, "x": x, "y": y, "z": z, "props": props}, write=True)
|
934
|
+
|
935
|
+
# Insert edges (Edge)
|
936
|
+
for _, row in esub.iterrows():
|
937
|
+
a_id = f"{gid_str}:{row[edgeSRCHeader]}"
|
938
|
+
b_id = f"{gid_str}:{row[edgeDSTHeader]}"
|
939
|
+
e_label = str(row[edgeLabelHeader]) if edgeLabelHeader in row and pd.notna(row[edgeLabelHeader]) else "connect"
|
940
|
+
e_props = _props_from_row(row, exclude={graphIDHeader, edgeSRCHeader, edgeDSTHeader, edgeLabelHeader})
|
941
|
+
|
942
|
+
manager.exec("""
|
943
|
+
MATCH (a:Vertex {id:$a_id}), (b:Vertex {id:$b_id})
|
944
|
+
CREATE (a)-[:Edge {label:$label, props:$props}]->(b);
|
945
|
+
""", {"a_id": a_id, "b_id": b_id, "label": e_label, "props": e_props}, write=True)
|
946
|
+
|
947
|
+
return {"graphs_upserted": len(materialized_graph_ids), "graph_ids": materialized_graph_ids}
|
948
|
+
|
949
|
+
|
950
|
+
|