topologicpy 0.8.57__py3-none-any.whl → 0.8.59__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- topologicpy/Graph.py +1633 -659
- topologicpy/Kuzu.py +495 -134
- topologicpy/Plotly.py +9 -7
- topologicpy/Topology.py +1 -0
- topologicpy/version.py +1 -1
- {topologicpy-0.8.57.dist-info → topologicpy-0.8.59.dist-info}/METADATA +1 -1
- {topologicpy-0.8.57.dist-info → topologicpy-0.8.59.dist-info}/RECORD +10 -10
- {topologicpy-0.8.57.dist-info → topologicpy-0.8.59.dist-info}/WHEEL +0 -0
- {topologicpy-0.8.57.dist-info → topologicpy-0.8.59.dist-info}/licenses/LICENSE +0 -0
- {topologicpy-0.8.57.dist-info → topologicpy-0.8.59.dist-info}/top_level.txt +0 -0
topologicpy/Kuzu.py
CHANGED
@@ -2,14 +2,6 @@ from __future__ import annotations
|
|
2
2
|
import threading, contextlib, time, json
|
3
3
|
from typing import Dict, Any, List, Optional
|
4
4
|
|
5
|
-
|
6
|
-
# Optional TopologicPy imports (make this file safe to import without TopologicPy)
|
7
|
-
from topologicpy.Graph import Graph
|
8
|
-
from topologicpy.Vertex import Vertex
|
9
|
-
from topologicpy.Edge import Edge
|
10
|
-
from topologicpy.Dictionary import Dictionary
|
11
|
-
from topologicpy.Topology import Topology
|
12
|
-
|
13
5
|
import os
|
14
6
|
import warnings
|
15
7
|
|
@@ -123,7 +115,7 @@ class _Mgr:
|
|
123
115
|
def ensure_schema(self):
|
124
116
|
# Node tables
|
125
117
|
self.exec("""
|
126
|
-
CREATE NODE TABLE IF NOT EXISTS
|
118
|
+
CREATE NODE TABLE IF NOT EXISTS Graph(
|
127
119
|
id STRING,
|
128
120
|
label STRING,
|
129
121
|
num_nodes INT64,
|
@@ -147,7 +139,7 @@ class _Mgr:
|
|
147
139
|
|
148
140
|
# Relationship tables
|
149
141
|
self.exec("""
|
150
|
-
CREATE REL TABLE IF NOT EXISTS
|
142
|
+
CREATE REL TABLE IF NOT EXISTS Edge(FROM Vertex TO Vertex, label STRING, props STRING);
|
151
143
|
""", write=True)
|
152
144
|
|
153
145
|
# Figure out later if we need sessions and steps
|
@@ -178,31 +170,18 @@ class _Mgr:
|
|
178
170
|
|
179
171
|
|
180
172
|
class Kuzu:
|
181
|
-
"""
|
182
|
-
TopologicPy-style class of static methods for Kùzu integration.
|
183
|
-
|
184
|
-
Notes
|
185
|
-
-----
|
186
|
-
- All methods are *static* to match TopologicPy's style.
|
187
|
-
- Graph persistence:
|
188
|
-
* Vertices: stored in `Vertex` with (id, graph_id, label, props JSON)
|
189
|
-
* Edges: stored as `CONNECT` relations a->b with label + props JSON
|
190
|
-
* We assume undirected design intent; only one CONNECT is stored (a->b),
|
191
|
-
but TopologicPy Graph treats edges as undirected by default.
|
192
|
-
"""
|
193
|
-
|
194
173
|
# ---------- Core (DB + Connection + Schema) ----------
|
195
174
|
@staticmethod
|
196
|
-
def EnsureSchema(
|
175
|
+
def EnsureSchema(manager, silent: bool = False) -> bool:
|
197
176
|
"""
|
198
|
-
Ensures the required Kùzu schema exists in the database at `
|
177
|
+
Ensures the required Kùzu schema exists in the database at `path`.
|
199
178
|
|
200
179
|
Parameters
|
201
180
|
----------
|
202
|
-
|
203
|
-
Path to the Kùzu database
|
181
|
+
manager : Kuzu.Manager
|
182
|
+
Path to the Kùzu database. It will be created if it does not exist.
|
204
183
|
silent : bool , optional
|
205
|
-
If True,
|
184
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
206
185
|
|
207
186
|
Returns
|
208
187
|
-------
|
@@ -210,63 +189,120 @@ class Kuzu:
|
|
210
189
|
True if successful, False otherwise.
|
211
190
|
"""
|
212
191
|
try:
|
213
|
-
|
214
|
-
mgr.ensure_schema()
|
192
|
+
manager.ensure_schema()
|
215
193
|
return True
|
216
194
|
except Exception as e:
|
217
195
|
if not silent:
|
218
|
-
print(f"Kuzu.EnsureSchema - Error: {e}")
|
196
|
+
print(f"Kuzu.EnsureSchema - Error: {e}. Returning False.")
|
219
197
|
return False
|
220
198
|
|
221
199
|
@staticmethod
|
222
|
-
def Database(
|
200
|
+
def Database(path: str, silent: bool = False):
|
223
201
|
"""
|
224
|
-
Returns the underlying `kuzu.Database` instance for `
|
202
|
+
Returns the underlying `kuzu.Database` instance for `path`.
|
203
|
+
|
204
|
+
Parameters
|
205
|
+
----------
|
206
|
+
path : str
|
207
|
+
Path to the Kùzu database. It will be created if it does not exist.
|
208
|
+
silent : bool , optional
|
209
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
210
|
+
|
211
|
+
Returns
|
212
|
+
-------
|
213
|
+
kuzu.Database
|
214
|
+
The Kuzu database found at the path.
|
225
215
|
"""
|
226
|
-
|
216
|
+
try:
|
217
|
+
return _db_cache.get(path)
|
218
|
+
except Exception as e:
|
219
|
+
if not silent:
|
220
|
+
print(f"Kuzu.Database - Error: {e}. Returning None.")
|
221
|
+
return None
|
227
222
|
|
228
223
|
@staticmethod
|
229
|
-
def Connection(
|
224
|
+
def Connection(manager, silent: bool = False):
|
225
|
+
"""
|
226
|
+
Returns a `kuzu.Connection` bound to the database at `path`.
|
227
|
+
|
228
|
+
Parameters
|
229
|
+
----------
|
230
|
+
manager : Kuzu.Manager
|
231
|
+
The Manager to the Kùzu database.
|
232
|
+
silent : bool , optional
|
233
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
234
|
+
|
235
|
+
Returns
|
236
|
+
-------
|
237
|
+
kuzu.Connection
|
238
|
+
The Kuzu live connection. Do NOT use across threads.
|
230
239
|
"""
|
231
|
-
|
240
|
+
try:
|
241
|
+
with manager.read() as c:
|
242
|
+
return c # Note: returns a live connection (do not use across threads)
|
243
|
+
except Exception as e:
|
244
|
+
if not silent:
|
245
|
+
print(f"Kuzu.Connection - Error: {e}. Returning None.")
|
246
|
+
return None
|
247
|
+
|
248
|
+
@staticmethod
|
249
|
+
def Manager(path: str, silent: bool = False):
|
232
250
|
"""
|
233
|
-
|
234
|
-
|
235
|
-
|
251
|
+
Returns a lightweight manager bound to the database at `path`.
|
252
|
+
Parameters
|
253
|
+
----------
|
254
|
+
path : str
|
255
|
+
Path to the Kùzu database. It will be created if it does not exist.
|
256
|
+
silent : bool , optional
|
257
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
236
258
|
|
237
|
-
|
259
|
+
Returns
|
260
|
+
-------
|
261
|
+
Kuzu.Manager
|
262
|
+
The Kuzu Manager.
|
263
|
+
"""
|
264
|
+
try:
|
265
|
+
return _Mgr(path)
|
266
|
+
except Exception as e:
|
267
|
+
if not silent:
|
268
|
+
print(f"Kuzu.Manager - Error: {e}. Returning None.")
|
269
|
+
return None
|
238
270
|
|
239
271
|
@staticmethod
|
240
|
-
def UpsertGraph(
|
272
|
+
def UpsertGraph(manager,
|
241
273
|
graph,
|
242
|
-
graphIDKey:
|
243
|
-
vertexIDKey:
|
244
|
-
vertexLabelKey:
|
274
|
+
graphIDKey: str = None,
|
275
|
+
vertexIDKey: str = None,
|
276
|
+
vertexLabelKey: str = None,
|
245
277
|
mantissa: int = 6,
|
246
278
|
silent: bool = False) -> str:
|
247
279
|
"""
|
248
|
-
Upserts (deletes prior + inserts new) a TopologicPy graph
|
280
|
+
Upserts (deletes prior + inserts new) a TopologicPy graph.
|
249
281
|
|
250
282
|
Parameters
|
251
283
|
----------
|
252
|
-
|
253
|
-
|
284
|
+
manager : Kuzu.Manager
|
285
|
+
The Kuzu database manager.
|
254
286
|
graph : topologicpy.Graph
|
255
287
|
The input TopologicPy graph.
|
256
288
|
graphIDKey : str , optional
|
257
|
-
The graph dictionary key under which the graph ID is stored. If None, a UUID is generated.
|
258
|
-
|
259
|
-
|
289
|
+
The graph dictionary key under which the graph ID is stored. If None, a UUID is generated and stored under 'id'.
|
290
|
+
vertexIDKey : str , optional
|
291
|
+
The vertex dictionary key under which the vertex ID is stored. If None, a UUID is generated and stored under 'id'.
|
292
|
+
edgeIDKey : str , optional
|
293
|
+
The edge dictionary key under which the edge ID is stored. If None, a UUID is generated and stored under 'id'.
|
260
294
|
silent : bool , optional
|
261
|
-
If True,
|
295
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
262
296
|
|
263
297
|
Returns
|
264
298
|
-------
|
265
299
|
str
|
266
300
|
The graph_id used.
|
267
301
|
"""
|
302
|
+
from topologicpy.Graph import Graph
|
268
303
|
from topologicpy.Topology import Topology
|
269
304
|
from topologicpy.Dictionary import Dictionary
|
305
|
+
|
270
306
|
d = Topology.Dictionary(graph)
|
271
307
|
if graphIDKey is None:
|
272
308
|
gid = Topology.UUID(graph)
|
@@ -280,22 +316,21 @@ class Kuzu:
|
|
280
316
|
e_props = mesh_data['edgeDictionaries']
|
281
317
|
num_nodes = len(verts)
|
282
318
|
num_edges = len(edges)
|
283
|
-
mgr = _Mgr(db_path)
|
284
319
|
try:
|
285
|
-
|
286
|
-
# Upsert
|
287
|
-
|
288
|
-
|
289
|
-
CREATE (g:
|
320
|
+
manager.ensure_schema()
|
321
|
+
# Upsert Graph
|
322
|
+
manager.exec("MATCH (g:Graph) WHERE g.id = $id DELETE g;", {"id": gid}, write=True)
|
323
|
+
manager.exec("""
|
324
|
+
CREATE (g:Graph {id:$id, num_nodes:$num_nodes, num_edges: $num_edges, props:$props});
|
290
325
|
""", {"id": gid, "num_nodes": num_nodes, "num_edges": num_edges, "props": json.dumps(g_props)}, write=True)
|
291
326
|
|
292
327
|
# Remove existing vertices/edges for this graph_id
|
293
|
-
|
294
|
-
MATCH (a:Vertex)-[r:
|
328
|
+
manager.exec("""
|
329
|
+
MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
|
295
330
|
WHERE a.graph_id = $gid AND b.graph_id = $gid
|
296
331
|
DELETE r;
|
297
332
|
""", {"gid": gid}, write=True)
|
298
|
-
|
333
|
+
manager.exec("MATCH (v:Vertex) WHERE v.graph_id = $gid DELETE v;", {"gid": gid}, write=True)
|
299
334
|
|
300
335
|
# Insert vertices
|
301
336
|
for i, v in enumerate(verts):
|
@@ -308,7 +343,7 @@ class Kuzu:
|
|
308
343
|
label = str(i)
|
309
344
|
else:
|
310
345
|
label = v_props[i].get(vertexIDKey, str(i))
|
311
|
-
|
346
|
+
manager.exec("""
|
312
347
|
CREATE (v:Vertex {id:$id, graph_id:$gid, label:$label, props:$props, x:$x, y:$y, z:$z});
|
313
348
|
""", {"id": vid, "gid": gid, "label": label, "x": x, "y": y, "z": z,
|
314
349
|
"props": json.dumps(v_props[i])}, write=True)
|
@@ -317,9 +352,9 @@ class Kuzu:
|
|
317
352
|
for i, e in enumerate(edges):
|
318
353
|
a_id = v_props[e[0]].get(vertexIDKey, f"{gid}:{e[0]}")
|
319
354
|
b_id = v_props[e[1]].get(vertexIDKey, f"{gid}:{e[1]}")
|
320
|
-
|
355
|
+
manager.exec("""
|
321
356
|
MATCH (a:Vertex {id:$a}), (b:Vertex {id:$b})
|
322
|
-
CREATE (a)-[:
|
357
|
+
CREATE (a)-[:Edge {label:$label, props:$props}]->(b);
|
323
358
|
""", {"a": a_id, "b": b_id,
|
324
359
|
"label": e_props[i].get("label", str(i)),
|
325
360
|
"props": json.dumps(e_props[i])}, write=True)
|
@@ -327,29 +362,40 @@ class Kuzu:
|
|
327
362
|
return gid
|
328
363
|
except Exception as e:
|
329
364
|
if not silent:
|
330
|
-
print(f"Kuzu.UpsertGraph - Error: {e}")
|
331
|
-
|
365
|
+
print(f"Kuzu.UpsertGraph - Error: {e}. Returning None.")
|
366
|
+
return None
|
332
367
|
|
333
368
|
@staticmethod
|
334
|
-
def GraphByID(
|
369
|
+
def GraphByID(manager, graphID: str, silent: bool = False):
|
335
370
|
"""
|
336
|
-
|
371
|
+
Constructs a TopologicPy graph from from Kùzu using the graphID input parameter.
|
372
|
+
|
373
|
+
Parameters
|
374
|
+
----------
|
375
|
+
manager : Kuzu.Manager
|
376
|
+
The manager of the Kùzu database.
|
377
|
+
graphID : str , optional
|
378
|
+
The graph ID to retrieve from Kùzu.
|
379
|
+
silent : bool , optional
|
380
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
337
381
|
|
338
382
|
Returns
|
339
383
|
-------
|
340
384
|
topologicpy.Graph
|
341
385
|
A new TopologicPy Graph, or None on error.
|
342
386
|
"""
|
343
|
-
# if TGraph is None:
|
344
|
-
# raise _KuzuError("TopologicPy is required to use Kuzu.ReadTopologicGraph.")
|
345
387
|
import random
|
346
|
-
|
388
|
+
from topologicpy.Graph import Graph
|
389
|
+
from topologicpy.Dictionary import Dictionary
|
390
|
+
from topologicpy.Vertex import Vertex
|
391
|
+
from topologicpy.Edge import Edge
|
392
|
+
from topologicpy.Topology import Topology
|
347
393
|
|
348
394
|
try:
|
349
|
-
|
350
|
-
# Read the
|
351
|
-
g =
|
352
|
-
MATCH (g:
|
395
|
+
manager.ensure_schema()
|
396
|
+
# Read the Graph
|
397
|
+
g = manager.exec("""
|
398
|
+
MATCH (g:Graph) WHERE g.id = $id
|
353
399
|
RETURN g.id AS id, g.num_nodes AS num_nodes, g.num_edges AS num_edges, g.props AS props
|
354
400
|
;
|
355
401
|
""", {"id": graphID}, write=False) or None
|
@@ -359,7 +405,7 @@ class Kuzu:
|
|
359
405
|
g_dict = dict(json.loads(g.get("props") or "{}") or {})
|
360
406
|
g_dict = Dictionary.ByPythonDictionary(g_dict)
|
361
407
|
# Read vertices
|
362
|
-
rows_v =
|
408
|
+
rows_v = manager.exec("""
|
363
409
|
MATCH (v:Vertex) WHERE v.graph_id = $gid
|
364
410
|
RETURN v.id AS id, v.label AS label, v.x AS x, v.y AS y, v.z AS z, v.props AS props
|
365
411
|
ORDER BY id;
|
@@ -369,9 +415,9 @@ class Kuzu:
|
|
369
415
|
vertices = []
|
370
416
|
for row in rows_v:
|
371
417
|
try:
|
372
|
-
x = row.get("x"
|
373
|
-
y = row.get("y"
|
374
|
-
z = row.get("z"
|
418
|
+
x = row.get("x", random.uniform(0,1000))
|
419
|
+
y = row.get("y", random.uniform(0,1000))
|
420
|
+
z = row.get("z", random.uniform(0,1000))
|
375
421
|
except:
|
376
422
|
x = random.uniform(0,1000)
|
377
423
|
y = random.uniform(0,1000)
|
@@ -392,8 +438,8 @@ class Kuzu:
|
|
392
438
|
vertices.append(v)
|
393
439
|
|
394
440
|
# Read edges
|
395
|
-
rows_e =
|
396
|
-
MATCH (a:Vertex)-[r:
|
441
|
+
rows_e = manager.exec("""
|
442
|
+
MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
|
397
443
|
WHERE a.graph_id = $gid AND b.graph_id = $gid
|
398
444
|
RETURN a.id AS a_id, b.id AS b_id, r.label AS label, r.props AS props;
|
399
445
|
""", {"gid": graphID}, write=False) or []
|
@@ -423,61 +469,50 @@ class Kuzu:
|
|
423
469
|
return g
|
424
470
|
except Exception as e:
|
425
471
|
if not silent:
|
426
|
-
print(f"Kuzu.GraphByID - Error: {e}")
|
472
|
+
print(f"Kuzu.GraphByID - Error: {e}. Returning None.")
|
427
473
|
return None
|
428
474
|
|
429
475
|
@staticmethod
|
430
476
|
def GraphsByQuery(
|
431
|
-
|
477
|
+
manager,
|
432
478
|
query: str,
|
433
479
|
params: dict | None = None,
|
434
|
-
graphIDKey: str = "graph_id",
|
435
480
|
silent: bool = False,
|
436
481
|
):
|
437
482
|
"""
|
438
483
|
Executes a Kùzu Cypher query and returns a list of TopologicPy Graphs.
|
439
|
-
The query should return at least one column identifying each graph.
|
440
|
-
By default this column is expected to be named 'graph_id', but you can
|
441
|
-
override that via `graph_id_field`.
|
442
484
|
|
443
485
|
The method will:
|
444
486
|
1) run the query,
|
445
|
-
2) extract distinct graph IDs from the result set
|
446
|
-
|
447
|
-
'a_id', 'b_id', or 'id' that look like '<graph_id>:<vertex_index>'),
|
448
|
-
3) reconstruct each graph via Kuzu.ReadTopologicGraph(...).
|
487
|
+
2) extract distinct graph IDs from the result set.
|
488
|
+
3) reconstruct each graph via Kuzu.GraphByID(...).
|
449
489
|
|
450
490
|
Parameters
|
451
491
|
----------
|
452
|
-
|
453
|
-
|
492
|
+
manager : Kuzu.Manager
|
493
|
+
The manager of the Kùzu database.
|
454
494
|
query : str
|
455
495
|
A valid Kùzu Cypher query.
|
456
496
|
params : dict , optional
|
457
497
|
Parameters to pass with the query.
|
458
|
-
graph_id_field : str , optional
|
459
|
-
The field name in the query result that contains the graph ID(s).
|
460
|
-
Default is "graph_id".
|
461
498
|
silent : bool , optional
|
462
|
-
If True,
|
499
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
463
500
|
|
464
501
|
Returns
|
465
502
|
-------
|
466
|
-
list[
|
503
|
+
list[topologic_core.Graph]
|
467
504
|
A list of reconstructed TopologicPy graphs.
|
505
|
+
|
468
506
|
"""
|
469
|
-
# if TGraph is None:
|
470
|
-
# raise _KuzuError("TopologicPy is required to use Kuzu.GraphsFromQuery.")
|
471
507
|
|
472
508
|
try:
|
473
|
-
|
474
|
-
|
475
|
-
rows = mgr.exec(query, params or {}, write=False) or []
|
509
|
+
manager.ensure_schema()
|
510
|
+
rows = manager.exec(query, params or {}, write=False) or []
|
476
511
|
|
477
512
|
# Collect distinct graph IDs
|
478
513
|
gids = []
|
479
514
|
for r in rows:
|
480
|
-
gid = r.get(
|
515
|
+
gid = r.get('graph_id')
|
481
516
|
|
482
517
|
# Fallback: try to infer from common id fields like "<graph_id>:<i>"
|
483
518
|
if gid is None:
|
@@ -493,42 +528,55 @@ class Kuzu:
|
|
493
528
|
# Reconstruct each graph
|
494
529
|
graphs = []
|
495
530
|
for gid in gids:
|
496
|
-
g = Kuzu.GraphByID(
|
531
|
+
g = Kuzu.GraphByID(path, gid, silent=silent)
|
497
532
|
if g is not None:
|
498
533
|
graphs.append(g)
|
499
534
|
return graphs
|
500
535
|
|
501
536
|
except Exception as e:
|
502
537
|
if not silent:
|
503
|
-
print(f"Kuzu.GraphsByQuery - Error: {e}")
|
504
|
-
return
|
538
|
+
print(f"Kuzu.GraphsByQuery - Error: {e}. Returning None.")
|
539
|
+
return None
|
505
540
|
|
506
541
|
@staticmethod
|
507
|
-
def DeleteGraph(
|
542
|
+
def DeleteGraph(manager, graphID: str, silent: bool = False) -> bool:
|
508
543
|
"""
|
509
|
-
Deletes a graph (vertices
|
544
|
+
Deletes a graph (vertices, edges, and graphCard) by id.
|
545
|
+
|
546
|
+
Parameters
|
547
|
+
----------
|
548
|
+
manager : Kuzu.Manager
|
549
|
+
The manager of the Kùzu database.
|
550
|
+
graphID : str
|
551
|
+
The id of the graph to be deleted.
|
552
|
+
silent : bool , optional
|
553
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
554
|
+
|
555
|
+
Returns
|
556
|
+
-------
|
557
|
+
bool
|
558
|
+
True on success, False otherwise.
|
510
559
|
"""
|
511
560
|
try:
|
512
|
-
|
513
|
-
mgr.ensure_schema()
|
561
|
+
manager.ensure_schema()
|
514
562
|
# Delete edges
|
515
|
-
|
516
|
-
MATCH (a:Vertex)-[r:
|
563
|
+
manager.exec("""
|
564
|
+
MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
|
517
565
|
WHERE a.graph_id = $gid AND b.graph_id = $gid
|
518
566
|
DELETE r;
|
519
|
-
""", {"gid":
|
567
|
+
""", {"gid": graphID}, write=True)
|
520
568
|
# Delete vertices
|
521
|
-
|
569
|
+
manager.exec("MATCH (v:Vertex) WHERE v.graph_id = $gid DELETE v;", {"gid": graphID}, write=True)
|
522
570
|
# Delete card
|
523
|
-
|
571
|
+
manager.exec("MATCH (g:Graph) WHERE g.id = $gid DELETE g;", {"gid": graphID}, write=True)
|
524
572
|
return True
|
525
573
|
except Exception as e:
|
526
574
|
if not silent:
|
527
|
-
print(f"Kuzu.DeleteGraph - Error: {e}")
|
575
|
+
print(f"Kuzu.DeleteGraph - Error: {e}. Returning False.")
|
528
576
|
return False
|
529
577
|
|
530
578
|
@staticmethod
|
531
|
-
def EmptyDatabase(
|
579
|
+
def EmptyDatabase(manager, dropSchema: bool = False, recreateSchema: bool = True, silent: bool = False) -> bool:
|
532
580
|
"""
|
533
581
|
Empties the Kùzu database at `db_path`.
|
534
582
|
|
@@ -538,14 +586,14 @@ class Kuzu:
|
|
538
586
|
|
539
587
|
Parameters
|
540
588
|
----------
|
541
|
-
|
542
|
-
|
543
|
-
|
589
|
+
manager : Kuzu Manager
|
590
|
+
The manager of the Kùzu database.
|
591
|
+
dropSchema : bool , optional
|
544
592
|
If True, DROP the known tables instead of deleting rows. Default False.
|
545
|
-
|
593
|
+
recreateSchema : bool , optional
|
546
594
|
If True and drop_schema=True, re-create the minimal schema after dropping. Default True.
|
547
595
|
silent : bool , optional
|
548
|
-
|
596
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
549
597
|
|
550
598
|
Returns
|
551
599
|
-------
|
@@ -553,37 +601,350 @@ class Kuzu:
|
|
553
601
|
True on success, False otherwise.
|
554
602
|
"""
|
555
603
|
try:
|
556
|
-
|
557
|
-
# Ensure DB exists (does not create tables unless needed)
|
558
|
-
mgr.ensure_schema()
|
604
|
+
manager.ensure_schema()
|
559
605
|
|
560
|
-
if
|
606
|
+
if dropSchema:
|
561
607
|
# Drop relationship tables FIRST (to release dependencies), then node tables.
|
562
608
|
# IF EXISTS is convenient; if your Kùzu version doesn't support it, remove and ignore exceptions.
|
563
609
|
for stmt in [
|
564
|
-
"DROP TABLE IF EXISTS
|
610
|
+
"DROP TABLE IF EXISTS Edge;",
|
565
611
|
"DROP TABLE IF EXISTS Vertex;",
|
566
|
-
"DROP TABLE IF EXISTS
|
612
|
+
"DROP TABLE IF EXISTS Graph;",
|
567
613
|
]:
|
568
614
|
try:
|
569
|
-
|
615
|
+
manager.exec(stmt, write=True)
|
570
616
|
except Exception as _e:
|
571
617
|
if not silent:
|
572
618
|
print(f"Kuzu.EmptyDatabase - Warning dropping table: {_e}")
|
573
619
|
|
574
|
-
if
|
575
|
-
|
620
|
+
if recreateSchema:
|
621
|
+
manager.ensure_schema()
|
576
622
|
return True
|
577
623
|
|
578
624
|
# Soft clear: remove all relationships, then all nodes (covers all labels/tables).
|
579
625
|
# Delete all edges (any direction)
|
580
|
-
|
626
|
+
manager.exec("MATCH (a)-[r]->(b) DELETE r;", write=True)
|
581
627
|
# Delete all nodes (from all node tables)
|
582
|
-
|
628
|
+
manager.exec("MATCH (n) DELETE n;", write=True)
|
583
629
|
return True
|
584
630
|
|
585
631
|
except Exception as e:
|
586
632
|
if not silent:
|
587
|
-
print(f"Kuzu.EmptyDatabase - Error: {e}")
|
633
|
+
print(f"Kuzu.EmptyDatabase - Error: {e}. Returning False.")
|
588
634
|
return False
|
589
635
|
|
636
|
+
@staticmethod
|
637
|
+
def ListGraphs(manager, where: dict = None, limit: int = 100, offset: int = 0, silent: bool = False) -> list[dict]:
|
638
|
+
"""
|
639
|
+
Lists Graph metadata with simple filtering and pagination.
|
640
|
+
|
641
|
+
Parameters
|
642
|
+
----------
|
643
|
+
manager : Kuzu.Manager
|
644
|
+
The manager of the Kùzu database.
|
645
|
+
where : dict , optional
|
646
|
+
The filter python dictionaries. Supported filters in `where` (all optional):
|
647
|
+
- id (exact match)
|
648
|
+
- label (substring match)
|
649
|
+
- props_contains (substring match against JSON/text in `props`)
|
650
|
+
- props_equals (exact string match against `props`)
|
651
|
+
- min_nodes / max_nodes (integers)
|
652
|
+
- min_edges / max_edges (integers)
|
653
|
+
limit : int , optional
|
654
|
+
The desired limit of returned Graphs. Default is 100.
|
655
|
+
offset : int , optional
|
656
|
+
The desired offset of the returned Graphs (skips the first number of Graphs specified by the offset and returns the remaining cards up to the specified limit). The offset is useful if pagination is needed. Default is 0.
|
657
|
+
silent : bool , optional
|
658
|
+
If set to True, error and warning messages are suppressed. Default is False.
|
659
|
+
|
660
|
+
Returns
|
661
|
+
-------
|
662
|
+
list
|
663
|
+
The list of found Graph python dictionaries.
|
664
|
+
|
665
|
+
"""
|
666
|
+
|
667
|
+
manager.ensure_schema()
|
668
|
+
where = where or {}
|
669
|
+
|
670
|
+
conds: list[str] = []
|
671
|
+
params: dict = {}
|
672
|
+
|
673
|
+
if "id" in where and where["id"]:
|
674
|
+
conds.append("g.id = $id")
|
675
|
+
params["id"] = str(where["id"])
|
676
|
+
|
677
|
+
if "label" in where and where["label"]:
|
678
|
+
# Cypher-style infix CONTAINS
|
679
|
+
conds.append("g.label CONTAINS $label_sub")
|
680
|
+
params["label_sub"] = str(where["label"])
|
681
|
+
|
682
|
+
if "props_contains" in where and where["props_contains"]:
|
683
|
+
conds.append("g.props CONTAINS $props_sub")
|
684
|
+
params["props_sub"] = str(where["props_contains"])
|
685
|
+
|
686
|
+
if "props_equals" in where and where["props_equals"]:
|
687
|
+
conds.append("g.props = $props_equals")
|
688
|
+
params["props_equals"] = str(where["props_equals"])
|
689
|
+
|
690
|
+
if "min_nodes" in where and where["min_nodes"] is not None:
|
691
|
+
conds.append("g.num_nodes >= $min_nodes")
|
692
|
+
params["min_nodes"] = int(where["min_nodes"])
|
693
|
+
|
694
|
+
if "max_nodes" in where and where["max_nodes"] is not None:
|
695
|
+
conds.append("g.num_nodes <= $max_nodes")
|
696
|
+
params["max_nodes"] = int(where["max_nodes"])
|
697
|
+
|
698
|
+
if "min_edges" in where and where["min_edges"] is not None:
|
699
|
+
conds.append("g.num_edges >= $min_edges")
|
700
|
+
params["min_edges"] = int(where["min_edges"])
|
701
|
+
|
702
|
+
if "max_edges" in where and where["max_edges"] is not None:
|
703
|
+
conds.append("g.num_edges <= $max_edges")
|
704
|
+
params["max_edges"] = int(where["max_edges"])
|
705
|
+
|
706
|
+
where_clause = ("WHERE " + " AND ".join(conds)) if conds else ""
|
707
|
+
q = f"""
|
708
|
+
MATCH (g:Graph)
|
709
|
+
{where_clause}
|
710
|
+
RETURN g.id AS id, g.label AS label,
|
711
|
+
g.num_nodes AS num_nodes, g.num_edges AS num_edges,
|
712
|
+
g.props AS props
|
713
|
+
ORDER BY id
|
714
|
+
SKIP $__offset LIMIT $__limit;
|
715
|
+
"""
|
716
|
+
|
717
|
+
params["__offset"] = max(0, int(offset or 0))
|
718
|
+
params["__limit"] = max(0, int(limit or 100))
|
719
|
+
|
720
|
+
return manager.exec(q, params, write=False) or []
|
721
|
+
|
722
|
+
|
723
|
+
@staticmethod
|
724
|
+
def ByCSVPath(
|
725
|
+
manager,
|
726
|
+
path: str,
|
727
|
+
graphIDPrefix: str = "g",
|
728
|
+
graphIDHeader="graph_id",
|
729
|
+
graphLabelHeader="label",
|
730
|
+
edgeSRCHeader="src_id",
|
731
|
+
edgeDSTHeader="dst_id",
|
732
|
+
edgeLabelHeader="label",
|
733
|
+
nodeIDHeader="node_id",
|
734
|
+
nodeLabelHeader="label",
|
735
|
+
nodeXHeader="X",
|
736
|
+
nodeYHeader="Y",
|
737
|
+
nodeZHeader="Z",
|
738
|
+
silent: bool = False,
|
739
|
+
) -> Dict[str, Any]:
|
740
|
+
"""
|
741
|
+
Load node/edge/graph CSVs from a folder (using its .yaml meta) and upsert them
|
742
|
+
directly into Kùzu using the schema defined in Kuzu.py:
|
743
|
+
|
744
|
+
- NODE TABLE Graph(id STRING PRIMARY KEY, label STRING, num_nodes INT64, num_edges INT64, props STRING)
|
745
|
+
- NODE TABLE Vertex(id STRING PRIMARY KEY, graph_id STRING, label STRING, x DOUBLE, y DOUBLE, z DOUBLE, props STRING)
|
746
|
+
- REL TABLE Edge(FROM Vertex TO Vertex, label STRING, props STRING)
|
747
|
+
|
748
|
+
Parameters
|
749
|
+
----------
|
750
|
+
manager : Kuzu.Manager
|
751
|
+
An initialized Kùzu manager; must provide ensure_schema() and exec(query, params, write=True/False).
|
752
|
+
path : str
|
753
|
+
Folder containing a dataset YAML (e.g., meta.yaml) that points to nodes/edges/graphs CSVs.
|
754
|
+
graphIDPrefix : str
|
755
|
+
Prefix for materialized graph IDs (default "g"); e.g., graph 0 -> "g0".
|
756
|
+
graphIDHeader : str , optional
|
757
|
+
The column header string used to specify the graph id. Default is "graph_id".
|
758
|
+
graphLabelHeader : str , optional
|
759
|
+
The column header string used to specify the graph label. Default is "label".
|
760
|
+
edgeSRCHeader : str , optional
|
761
|
+
The column header string used to specify the source vertex id of edges. Default is "src_id".
|
762
|
+
edgeDSTHeader : str , optional
|
763
|
+
The column header string used to specify the destination vertex id of edges. Default is "dst_id".
|
764
|
+
edgeLabelHeader : str , optional
|
765
|
+
The column header string used to specify the label of edges. Default is "label".
|
766
|
+
nodeIDHeader : str , optional
|
767
|
+
The column header string used to specify the id of nodes. Default is "node_id".
|
768
|
+
nodeLabelHeader : str , optional
|
769
|
+
The column header string used to specify the label of nodes. Default is "label".
|
770
|
+
nodeXHeader : str , optional
|
771
|
+
The column header string used to specify the X coordinate of nodes. Default is "X".
|
772
|
+
nodeYHeader : str , optional
|
773
|
+
The column header string used to specify the Y coordinate of nodes. Default is "Y".
|
774
|
+
nodeZHeader : str , optional
|
775
|
+
The column header string used to specify the Z coordinate of nodes. Default is "Z".
|
776
|
+
silent : bool
|
777
|
+
If True, suppress warnings.
|
778
|
+
|
779
|
+
Returns
|
780
|
+
-------
|
781
|
+
dict
|
782
|
+
{"graphs_upserted": int, "graph_ids": [str, ...]}
|
783
|
+
"""
|
784
|
+
import os
|
785
|
+
import glob
|
786
|
+
import json
|
787
|
+
import numbers
|
788
|
+
import pandas as pd
|
789
|
+
import yaml
|
790
|
+
import random
|
791
|
+
|
792
|
+
# ---------- Helpers (mirroring your CSV loader’s patterns) ----------
|
793
|
+
def _find_yaml_files(folder_path: str):
|
794
|
+
return glob.glob(os.path.join(folder_path, "*.yaml"))
|
795
|
+
|
796
|
+
def _read_yaml(file_path: str):
|
797
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
798
|
+
data = yaml.safe_load(f) or {}
|
799
|
+
edge_data = data.get("edge_data", [])
|
800
|
+
node_data = data.get("node_data", [])
|
801
|
+
graph_data = data.get("graph_data", {})
|
802
|
+
edges_rel = edge_data[0].get("file_name") if edge_data else None
|
803
|
+
nodes_rel = node_data[0].get("file_name") if node_data else None
|
804
|
+
graphs_rel = graph_data.get("file_name")
|
805
|
+
return graphs_rel, edges_rel, nodes_rel
|
806
|
+
|
807
|
+
def _props_from_row(row: pd.Series, exclude: set) -> str:
|
808
|
+
d = {}
|
809
|
+
for k, v in row.items():
|
810
|
+
if k in exclude:
|
811
|
+
continue
|
812
|
+
# normalize NaN -> None for clean JSON
|
813
|
+
if isinstance(v, float) and pd.isna(v):
|
814
|
+
d[k] = None
|
815
|
+
else:
|
816
|
+
d[k] = v
|
817
|
+
try:
|
818
|
+
return json.dumps(d, ensure_ascii=False)
|
819
|
+
except Exception:
|
820
|
+
# Fallback: stringify everything
|
821
|
+
return json.dumps({k: (None if v is None else str(v)) for k, v in d.items()}, ensure_ascii=False)
|
822
|
+
|
823
|
+
# ---------- Validate path and locate YAML/CSVs ----------
|
824
|
+
if not os.path.exists(path) or not os.path.isdir(path):
|
825
|
+
if not silent:
|
826
|
+
print("ByCSVPath - Error: path must be an existing folder. Returning None.")
|
827
|
+
return None
|
828
|
+
|
829
|
+
yaml_files = _find_yaml_files(path)
|
830
|
+
if len(yaml_files) < 1:
|
831
|
+
if not silent:
|
832
|
+
print("ByCSVPath - Error: no YAML file found in the folder. Returning None.")
|
833
|
+
return None
|
834
|
+
yaml_file = yaml_files[0]
|
835
|
+
graphs_rel, edges_rel, nodes_rel = _read_yaml(yaml_file)
|
836
|
+
|
837
|
+
# Resolve CSV paths
|
838
|
+
graphs_csv = os.path.join(path, graphs_rel) if graphs_rel else None
|
839
|
+
edges_csv = os.path.join(path, edges_rel) if edges_rel else None
|
840
|
+
nodes_csv = os.path.join(path, nodes_rel) if nodes_rel else None
|
841
|
+
|
842
|
+
if not edges_csv or not os.path.exists(edges_csv):
|
843
|
+
if not silent:
|
844
|
+
print("ByCSVPath - Error: edges CSV not found. Returning None.")
|
845
|
+
return None
|
846
|
+
if not nodes_csv or not os.path.exists(nodes_csv):
|
847
|
+
if not silent:
|
848
|
+
print("ByCSVPath - Error: nodes CSV not found. Returning None.")
|
849
|
+
return None
|
850
|
+
|
851
|
+
# ---------- Load CSVs ----------
|
852
|
+
nodes_df = pd.read_csv(nodes_csv)
|
853
|
+
edges_df = pd.read_csv(edges_csv)
|
854
|
+
graphs_df = pd.read_csv(graphs_csv) if graphs_csv and os.path.exists(graphs_csv) else pd.DataFrame()
|
855
|
+
|
856
|
+
# Required columns
|
857
|
+
for req_cols, df_name, df in [
|
858
|
+
({graphIDHeader, nodeIDHeader}, "nodes", nodes_df),
|
859
|
+
({graphIDHeader, edgeSRCHeader, edgeDSTHeader}, "edges", edges_df),
|
860
|
+
]:
|
861
|
+
missing = req_cols.difference(df.columns)
|
862
|
+
if missing:
|
863
|
+
raise ValueError(f"ByCSVPath - {df_name}.csv is missing required columns: {missing}")
|
864
|
+
|
865
|
+
# Graph IDs present in the data
|
866
|
+
gids = pd.Index([]).union(nodes_df[graphIDHeader].dropna().unique()).union(
|
867
|
+
edges_df[graphIDHeader].dropna().unique()
|
868
|
+
)
|
869
|
+
|
870
|
+
# Prepare graphs_df lookup if provided
|
871
|
+
graphs_by_gid = {}
|
872
|
+
if graphIDHeader in graphs_df.columns:
|
873
|
+
graphs_by_gid = {gid: g.iloc[0].to_dict() for gid, g in graphs_df.groupby(graphIDHeader, dropna=False)}
|
874
|
+
|
875
|
+
# ---------- Ensure schema ----------
|
876
|
+
manager.ensure_schema() # Graph, Vertex, Edge
|
877
|
+
|
878
|
+
# ---------- Upsert per graph ----------
|
879
|
+
materialized_graph_ids = []
|
880
|
+
for raw_gid in gids:
|
881
|
+
gid_str = f"{graphIDPrefix}{int(raw_gid) if str(raw_gid).isdigit() else str(raw_gid)}"
|
882
|
+
materialized_graph_ids.append(gid_str)
|
883
|
+
|
884
|
+
nsub = nodes_df[nodes_df[graphIDHeader] == raw_gid].copy()
|
885
|
+
esub = edges_df[edges_df[graphIDHeader] == raw_gid].copy()
|
886
|
+
|
887
|
+
# Graph info
|
888
|
+
gcard_src = graphs_by_gid.get(raw_gid, {})
|
889
|
+
g_label = str(gcard_src.get(graphLabelHeader, "")) if gcard_src else ""
|
890
|
+
g_props = _props_from_row(pd.Series(gcard_src), exclude={graphIDHeader, graphLabelHeader}) if gcard_src else "{}"
|
891
|
+
num_nodes = int(nsub.shape[0])
|
892
|
+
num_edges = int(esub.shape[0])
|
893
|
+
|
894
|
+
# Remove any existing data for this graph id, then re-insert
|
895
|
+
manager.exec("""
|
896
|
+
MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
|
897
|
+
WHERE a.graph_id = $gid AND b.graph_id = $gid
|
898
|
+
DELETE r;
|
899
|
+
""", {"gid": gid_str}, write=True)
|
900
|
+
manager.exec("MATCH (v:Vertex) WHERE v.graph_id = $gid DELETE v;", {"gid": gid_str}, write=True)
|
901
|
+
manager.exec("MATCH (g:Graph) WHERE g.id = $gid DELETE g;", {"gid": gid_str}, write=True)
|
902
|
+
|
903
|
+
manager.exec("""
|
904
|
+
CREATE (g:Graph {id:$id, label:$label, num_nodes:$num_nodes, num_edges:$num_edges, props:$props});
|
905
|
+
""", {
|
906
|
+
"id": gid_str,
|
907
|
+
"label": g_label,
|
908
|
+
"num_nodes": num_nodes,
|
909
|
+
"num_edges": num_edges,
|
910
|
+
"props": g_props,
|
911
|
+
}, write=True)
|
912
|
+
|
913
|
+
# Insert vertices
|
914
|
+
for _, row in nsub.iterrows():
|
915
|
+
node_id = row[nodeIDHeader]
|
916
|
+
vid = f"{gid_str}:{node_id}"
|
917
|
+
v_label = str(row[nodeLabelHeader]) if "label" in row and pd.notna(row[nodeLabelHeader]) else str(node_id)
|
918
|
+
|
919
|
+
# X/Y/Z may be missing or non-numeric; store a random numeric value in that case
|
920
|
+
def _num_or_none(val):
|
921
|
+
try:
|
922
|
+
return float(val)
|
923
|
+
except Exception:
|
924
|
+
return None
|
925
|
+
|
926
|
+
x = _num_or_none(row[nodeXHeader]) if nodeXHeader in row else random.uniform(0,1000)
|
927
|
+
y = _num_or_none(row[nodeYHeader]) if nodeYHeader in row else random.uniform(0,1000)
|
928
|
+
z = _num_or_none(row[nodeZHeader]) if nodeZHeader in row else random.uniform(0,1000)
|
929
|
+
|
930
|
+
props = _props_from_row(row, exclude={graphIDHeader, nodeIDHeader, nodeLabelHeader, nodeXHeader, nodeYHeader, nodeZHeader})
|
931
|
+
manager.exec("""
|
932
|
+
CREATE (v:Vertex {id:$id, graph_id:$gid, label:$label, x:$x, y:$y, z:$z, props:$props});
|
933
|
+
""", {"id": vid, "gid": gid_str, "label": v_label, "x": x, "y": y, "z": z, "props": props}, write=True)
|
934
|
+
|
935
|
+
# Insert edges (Edge)
|
936
|
+
for _, row in esub.iterrows():
|
937
|
+
a_id = f"{gid_str}:{row[edgeSRCHeader]}"
|
938
|
+
b_id = f"{gid_str}:{row[edgeDSTHeader]}"
|
939
|
+
e_label = str(row[edgeLabelHeader]) if edgeLabelHeader in row and pd.notna(row[edgeLabelHeader]) else "connect"
|
940
|
+
e_props = _props_from_row(row, exclude={graphIDHeader, edgeSRCHeader, edgeDSTHeader, edgeLabelHeader})
|
941
|
+
|
942
|
+
manager.exec("""
|
943
|
+
MATCH (a:Vertex {id:$a_id}), (b:Vertex {id:$b_id})
|
944
|
+
CREATE (a)-[:Edge {label:$label, props:$props}]->(b);
|
945
|
+
""", {"a_id": a_id, "b_id": b_id, "label": e_label, "props": e_props}, write=True)
|
946
|
+
|
947
|
+
return {"graphs_upserted": len(materialized_graph_ids), "graph_ids": materialized_graph_ids}
|
948
|
+
|
949
|
+
|
950
|
+
|