topologicpy 0.8.55__py3-none-any.whl → 0.8.58__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
topologicpy/Kuzu.py ADDED
@@ -0,0 +1,950 @@
1
+ from __future__ import annotations
2
+ import threading, contextlib, time, json
3
+ from typing import Dict, Any, List, Optional
4
+
5
+ import os
6
+ import warnings
7
+
8
+ try:
9
+ import kuzu
10
+ except:
11
+ print("Kuzu - Installing required kuzu library.")
12
+ try:
13
+ os.system("pip install kuzu")
14
+ except:
15
+ os.system("pip install kuzu --user")
16
+ try:
17
+ import kuzu
18
+ except:
19
+ warnings.warn("Kuzu - Error: Could not import Kuzu.")
20
+ kuzu = None
21
+
22
+
23
+ class _DBCache:
24
+ """
25
+ One kuzu.Database per path. Thread-safe and process-local.
26
+ """
27
+ def __init__(self):
28
+ self._lock = threading.RLock()
29
+ self._cache: Dict[str, "kuzu.Database"] = {}
30
+
31
+ def get(self, path: str) -> "kuzu.Database":
32
+ if kuzu is None:
33
+ raise "Kuzu - Error: Kuzu is not available"
34
+ with self._lock:
35
+ db = self._cache.get(path)
36
+ if db is None:
37
+ db = kuzu.Database(path)
38
+ self._cache[path] = db
39
+ return db
40
+
41
+ class _WriteGate:
42
+ """
43
+ Serialize writes to avoid IO lock contention.
44
+ """
45
+ def __init__(self):
46
+ self._lock = threading.RLock()
47
+
48
+ @contextlib.contextmanager
49
+ def hold(self):
50
+ with self._lock:
51
+ yield
52
+
53
+ _db_cache = _DBCache()
54
+ _write_gate = _WriteGate()
55
+
56
+ class _ConnectionPool:
57
+ """
58
+ Per-thread kuzu.Connection pool bound to a Database instance.
59
+ """
60
+ def __init__(self, db: "kuzu.Database"):
61
+ self.db = db
62
+ self._local = threading.local()
63
+
64
+ def _ensure(self) -> "kuzu.Connection":
65
+ if not hasattr(self._local, "conn"):
66
+ self._local.conn = kuzu.Connection(self.db)
67
+ return self._local.conn
68
+
69
+ @contextlib.contextmanager
70
+ def connection(self, write: bool = False, retries: int = 5, backoff: float = 0.15):
71
+ conn = self._ensure()
72
+ if not write:
73
+ yield conn
74
+ return
75
+ # Serialize writes and retry transient failures
76
+ with _write_gate.hold():
77
+ attempt = 0
78
+ while True:
79
+ try:
80
+ yield conn
81
+ break
82
+ except Exception as e:
83
+ attempt += 1
84
+ if attempt > retries:
85
+ raise f"Kuzu write failed after {retries} retries: {e}"
86
+ time.sleep(backoff * attempt)
87
+
88
+ class _Mgr:
89
+ """
90
+ Lightweight facade (per-db-path) providing read/write execution and schema bootstrap.
91
+ """
92
+ def __init__(self, db_path: str):
93
+ self.db_path = db_path
94
+ self._db = _db_cache.get(db_path)
95
+ self._pool = _ConnectionPool(self._db)
96
+
97
+ @contextlib.contextmanager
98
+ def read(self):
99
+ with self._pool.connection(write=False) as c:
100
+ yield c
101
+
102
+ @contextlib.contextmanager
103
+ def write(self):
104
+ with self._pool.connection(write=True) as c:
105
+ yield c
106
+
107
+ def exec(self, query: str, params: Optional[dict] = None, write: bool = False):
108
+ with (self.write() if write else self.read()) as c:
109
+ with c.execute(query, parameters=params or {}) as res:
110
+ try:
111
+ return res.rows_as_dict().get_all()
112
+ except Exception:
113
+ return None
114
+
115
+ def ensure_schema(self):
116
+ # Node tables
117
+ self.exec("""
118
+ CREATE NODE TABLE IF NOT EXISTS Graph(
119
+ id STRING,
120
+ label STRING,
121
+ num_nodes INT64,
122
+ num_edges INT64,
123
+ props STRING,
124
+ PRIMARY KEY(id)
125
+ );
126
+ """, write=True)
127
+ self.exec("""
128
+ CREATE NODE TABLE IF NOT EXISTS Vertex(
129
+ id STRING,
130
+ graph_id STRING,
131
+ label STRING,
132
+ x DOUBLE,
133
+ y DOUBLE,
134
+ z DOUBLE,
135
+ props STRING,
136
+ PRIMARY KEY(id)
137
+ );
138
+ """, write=True)
139
+
140
+ # Relationship tables
141
+ self.exec("""
142
+ CREATE REL TABLE IF NOT EXISTS Edge(FROM Vertex TO Vertex, label STRING, props STRING);
143
+ """, write=True)
144
+
145
+ # Figure out later if we need sessions and steps
146
+ # self.exec("""
147
+ # CREATE NODE TABLE IF NOT EXISTS Session(
148
+ # id STRING,
149
+ # title STRING,
150
+ # created_at STRING,
151
+ # PRIMARY KEY(id)
152
+ # );
153
+ # """, write=True)
154
+ # self.exec("""
155
+ # CREATE NODE TABLE IF NOT EXISTS Step(
156
+ # id STRING,
157
+ # session_id STRING,
158
+ # idx INT64,
159
+ # action STRING,
160
+ # ok BOOL,
161
+ # message STRING,
162
+ # snapshot_before STRING,
163
+ # snapshot_after STRING,
164
+ # evidence STRING,
165
+ # created_at STRING,
166
+ # PRIMARY KEY(id)
167
+ # );
168
+ # """, write=True)
169
+ # self.exec("CREATE REL TABLE IF NOT EXISTS SessionHasStep(FROM Session TO Step);", write=True)
170
+
171
+
172
+ class Kuzu:
173
+ # ---------- Core (DB + Connection + Schema) ----------
174
+ @staticmethod
175
+ def EnsureSchema(manager, silent: bool = False) -> bool:
176
+ """
177
+ Ensures the required Kùzu schema exists in the database at `path`.
178
+
179
+ Parameters
180
+ ----------
181
+ manager : Kuzu.Manager
182
+ Path to the Kùzu database. It will be created if it does not exist.
183
+ silent : bool , optional
184
+ If set to True, error and warning messages are suppressed. Default is False.
185
+
186
+ Returns
187
+ -------
188
+ bool
189
+ True if successful, False otherwise.
190
+ """
191
+ try:
192
+ manager.ensure_schema()
193
+ return True
194
+ except Exception as e:
195
+ if not silent:
196
+ print(f"Kuzu.EnsureSchema - Error: {e}. Returning False.")
197
+ return False
198
+
199
+ @staticmethod
200
+ def Database(path: str, silent: bool = False):
201
+ """
202
+ Returns the underlying `kuzu.Database` instance for `path`.
203
+
204
+ Parameters
205
+ ----------
206
+ path : str
207
+ Path to the Kùzu database. It will be created if it does not exist.
208
+ silent : bool , optional
209
+ If set to True, error and warning messages are suppressed. Default is False.
210
+
211
+ Returns
212
+ -------
213
+ kuzu.Database
214
+ The Kuzu database found at the path.
215
+ """
216
+ try:
217
+ return _db_cache.get(path)
218
+ except Exception as e:
219
+ if not silent:
220
+ print(f"Kuzu.Database - Error: {e}. Returning None.")
221
+ return None
222
+
223
+ @staticmethod
224
+ def Connection(manager, silent: bool = False):
225
+ """
226
+ Returns a `kuzu.Connection` bound to the database at `path`.
227
+
228
+ Parameters
229
+ ----------
230
+ manager : Kuzu.Manager
231
+ The Manager to the Kùzu database.
232
+ silent : bool , optional
233
+ If set to True, error and warning messages are suppressed. Default is False.
234
+
235
+ Returns
236
+ -------
237
+ kuzu.Connection
238
+ The Kuzu live connection. Do NOT use across threads.
239
+ """
240
+ try:
241
+ with manager.read() as c:
242
+ return c # Note: returns a live connection (do not use across threads)
243
+ except Exception as e:
244
+ if not silent:
245
+ print(f"Kuzu.Connection - Error: {e}. Returning None.")
246
+ return None
247
+
248
+ @staticmethod
249
+ def Manager(path: str, silent: bool = False):
250
+ """
251
+ Returns a lightweight manager bound to the database at `path`.
252
+ Parameters
253
+ ----------
254
+ path : str
255
+ Path to the Kùzu database. It will be created if it does not exist.
256
+ silent : bool , optional
257
+ If set to True, error and warning messages are suppressed. Default is False.
258
+
259
+ Returns
260
+ -------
261
+ Kuzu.Manager
262
+ The Kuzu Manager.
263
+ """
264
+ try:
265
+ return _Mgr(path)
266
+ except Exception as e:
267
+ if not silent:
268
+ print(f"Kuzu.Manager - Error: {e}. Returning None.")
269
+ return None
270
+
271
+ @staticmethod
272
+ def UpsertGraph(manager,
273
+ graph,
274
+ graphIDKey: str = None,
275
+ vertexIDKey: str = None,
276
+ vertexLabelKey: str = None,
277
+ mantissa: int = 6,
278
+ silent: bool = False) -> str:
279
+ """
280
+ Upserts (deletes prior + inserts new) a TopologicPy graph.
281
+
282
+ Parameters
283
+ ----------
284
+ manager : Kuzu.Manager
285
+ The Kuzu database manager.
286
+ graph : topologicpy.Graph
287
+ The input TopologicPy graph.
288
+ graphIDKey : str , optional
289
+ The graph dictionary key under which the graph ID is stored. If None, a UUID is generated and stored under 'id'.
290
+ vertexIDKey : str , optional
291
+ The vertex dictionary key under which the vertex ID is stored. If None, a UUID is generated and stored under 'id'.
292
+ edgeIDKey : str , optional
293
+ The edge dictionary key under which the edge ID is stored. If None, a UUID is generated and stored under 'id'.
294
+ silent : bool , optional
295
+ If set to True, error and warning messages are suppressed. Default is False.
296
+
297
+ Returns
298
+ -------
299
+ str
300
+ The graph_id used.
301
+ """
302
+ from topologicpy.Graph import Graph
303
+ from topologicpy.Topology import Topology
304
+ from topologicpy.Dictionary import Dictionary
305
+
306
+ d = Topology.Dictionary(graph)
307
+ if graphIDKey is None:
308
+ gid = Topology.UUID(graph)
309
+ else:
310
+ gid = Dictionary.ValueAtKey(d, graphIDKey, Topology.UUID(graph))
311
+ g_props = Dictionary.PythonDictionary(d)
312
+ mesh_data = Graph.MeshData(graph, mantissa=mantissa)
313
+ verts = mesh_data['vertices']
314
+ v_props = mesh_data['vertexDictionaries']
315
+ edges = mesh_data['edges']
316
+ e_props = mesh_data['edgeDictionaries']
317
+ num_nodes = len(verts)
318
+ num_edges = len(edges)
319
+ try:
320
+ manager.ensure_schema()
321
+ # Upsert Graph
322
+ manager.exec("MATCH (g:Graph) WHERE g.id = $id DELETE g;", {"id": gid}, write=True)
323
+ manager.exec("""
324
+ CREATE (g:Graph {id:$id, num_nodes:$num_nodes, num_edges: $num_edges, props:$props});
325
+ """, {"id": gid, "num_nodes": num_nodes, "num_edges": num_edges, "props": json.dumps(g_props)}, write=True)
326
+
327
+ # Remove existing vertices/edges for this graph_id
328
+ manager.exec("""
329
+ MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
330
+ WHERE a.graph_id = $gid AND b.graph_id = $gid
331
+ DELETE r;
332
+ """, {"gid": gid}, write=True)
333
+ manager.exec("MATCH (v:Vertex) WHERE v.graph_id = $gid DELETE v;", {"gid": gid}, write=True)
334
+
335
+ # Insert vertices
336
+ for i, v in enumerate(verts):
337
+ x,y,z = v
338
+ if vertexIDKey is None:
339
+ vid = f"{gid}:{i}"
340
+ else:
341
+ vid = v_props[i].get(vertexIDKey, f"{gid}:{i}")
342
+ if vertexLabelKey is None:
343
+ label = str(i)
344
+ else:
345
+ label = v_props[i].get(vertexIDKey, str(i))
346
+ manager.exec("""
347
+ CREATE (v:Vertex {id:$id, graph_id:$gid, label:$label, props:$props, x:$x, y:$y, z:$z});
348
+ """, {"id": vid, "gid": gid, "label": label, "x": x, "y": y, "z": z,
349
+ "props": json.dumps(v_props[i])}, write=True)
350
+
351
+ # Insert edges
352
+ for i, e in enumerate(edges):
353
+ a_id = v_props[e[0]].get(vertexIDKey, f"{gid}:{e[0]}")
354
+ b_id = v_props[e[1]].get(vertexIDKey, f"{gid}:{e[1]}")
355
+ manager.exec("""
356
+ MATCH (a:Vertex {id:$a}), (b:Vertex {id:$b})
357
+ CREATE (a)-[:Edge {label:$label, props:$props}]->(b);
358
+ """, {"a": a_id, "b": b_id,
359
+ "label": e_props[i].get("label", str(i)),
360
+ "props": json.dumps(e_props[i])}, write=True)
361
+
362
+ return gid
363
+ except Exception as e:
364
+ if not silent:
365
+ print(f"Kuzu.UpsertGraph - Error: {e}. Returning None.")
366
+ return None
367
+
368
+ @staticmethod
369
+ def GraphByID(manager, graphID: str, silent: bool = False):
370
+ """
371
+ Constructs a TopologicPy graph from from Kùzu using the graphID input parameter.
372
+
373
+ Parameters
374
+ ----------
375
+ manager : Kuzu.Manager
376
+ The manager of the Kùzu database.
377
+ graphID : str , optional
378
+ The graph ID to retrieve from Kùzu.
379
+ silent : bool , optional
380
+ If set to True, error and warning messages are suppressed. Default is False.
381
+
382
+ Returns
383
+ -------
384
+ topologicpy.Graph
385
+ A new TopologicPy Graph, or None on error.
386
+ """
387
+ import random
388
+ from topologicpy.Graph import Graph
389
+ from topologicpy.Dictionary import Dictionary
390
+ from topologicpy.Vertex import Vertex
391
+ from topologicpy.Edge import Edge
392
+ from topologicpy.Topology import Topology
393
+
394
+ try:
395
+ manager.ensure_schema()
396
+ # Read the Graph
397
+ g = manager.exec("""
398
+ MATCH (g:Graph) WHERE g.id = $id
399
+ RETURN g.id AS id, g.num_nodes AS num_nodes, g.num_edges AS num_edges, g.props AS props
400
+ ;
401
+ """, {"id": graphID}, write=False) or None
402
+ if g is None:
403
+ return None
404
+ g = g[0]
405
+ g_dict = dict(json.loads(g.get("props") or "{}") or {})
406
+ g_dict = Dictionary.ByPythonDictionary(g_dict)
407
+ # Read vertices
408
+ rows_v = manager.exec("""
409
+ MATCH (v:Vertex) WHERE v.graph_id = $gid
410
+ RETURN v.id AS id, v.label AS label, v.x AS x, v.y AS y, v.z AS z, v.props AS props
411
+ ORDER BY id;
412
+ """, {"gid": graphID}, write=False) or []
413
+
414
+ id_to_vertex = {}
415
+ vertices = []
416
+ for row in rows_v:
417
+ try:
418
+ x = row.get("x", random.uniform(0,1000))
419
+ y = row.get("y", random.uniform(0,1000))
420
+ z = row.get("z", random.uniform(0,1000))
421
+ except:
422
+ x = random.uniform(0,1000)
423
+ y = random.uniform(0,1000)
424
+ z = random.uniform(0,1000)
425
+ v = Vertex.ByCoordinates(x,y,z)
426
+ props = {}
427
+ try:
428
+ props = json.loads(row.get("props") or "{}")
429
+ except Exception:
430
+ props = {}
431
+ # Ensure 'label' key present
432
+ props = dict(props or {})
433
+ if "label" not in props:
434
+ props["label"] = row.get("label") or ""
435
+ d = Dictionary.ByKeysValues(list(props.keys()), list(props.values()))
436
+ v = Topology.SetDictionary(v, d)
437
+ id_to_vertex[row["id"]] = v
438
+ vertices.append(v)
439
+
440
+ # Read edges
441
+ rows_e = manager.exec("""
442
+ MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
443
+ WHERE a.graph_id = $gid AND b.graph_id = $gid
444
+ RETURN a.id AS a_id, b.id AS b_id, r.label AS label, r.props AS props;
445
+ """, {"gid": graphID}, write=False) or []
446
+ edges = []
447
+ for row in rows_e:
448
+ va = id_to_vertex.get(row["a_id"])
449
+ vb = id_to_vertex.get(row["b_id"])
450
+ if not va or not vb:
451
+ continue
452
+ e = Edge.ByStartVertexEndVertex(va, vb)
453
+ props = {}
454
+ try:
455
+ props = json.loads(row.get("props") or "{}")
456
+ except Exception:
457
+ props = {}
458
+ props = dict(props or {})
459
+ if "label" not in props:
460
+ props["label"] = row.get("label") or "connect"
461
+ d = Dictionary.ByKeysValues(list(props.keys()), list(props.values()))
462
+ e = Topology.SetDictionary(e, d)
463
+ edges.append(e)
464
+ if len(vertices) > 0:
465
+ g = Graph.ByVerticesEdges(vertices, edges)
466
+ g = Topology.SetDictionary(g, g_dict)
467
+ else:
468
+ g = None
469
+ return g
470
+ except Exception as e:
471
+ if not silent:
472
+ print(f"Kuzu.GraphByID - Error: {e}. Returning None.")
473
+ return None
474
+
475
+ @staticmethod
476
+ def GraphsByQuery(
477
+ manager,
478
+ query: str,
479
+ params: dict | None = None,
480
+ silent: bool = False,
481
+ ):
482
+ """
483
+ Executes a Kùzu Cypher query and returns a list of TopologicPy Graphs.
484
+
485
+ The method will:
486
+ 1) run the query,
487
+ 2) extract distinct graph IDs from the result set.
488
+ 3) reconstruct each graph via Kuzu.GraphByID(...).
489
+
490
+ Parameters
491
+ ----------
492
+ manager : Kuzu.Manager
493
+ The manager of the Kùzu database.
494
+ query : str
495
+ A valid Kùzu Cypher query.
496
+ params : dict , optional
497
+ Parameters to pass with the query.
498
+ silent : bool , optional
499
+ If set to True, error and warning messages are suppressed. Default is False.
500
+
501
+ Returns
502
+ -------
503
+ list[topologic_core.Graph]
504
+ A list of reconstructed TopologicPy graphs.
505
+
506
+ """
507
+
508
+ try:
509
+ manager.ensure_schema()
510
+ rows = manager.exec(query, params or {}, write=False) or []
511
+
512
+ # Collect distinct graph IDs
513
+ gids = []
514
+ for r in rows:
515
+ gid = r.get('graph_id')
516
+
517
+ # Fallback: try to infer from common id fields like "<graph_id>:<i>"
518
+ if gid is None:
519
+ for k in ("a_id", "b_id", "id"):
520
+ v = r.get(k)
521
+ if isinstance(v, str) and ":" in v:
522
+ gid = v.split(":", 1)[0]
523
+ break
524
+
525
+ if gid and gid not in gids:
526
+ gids.append(gid)
527
+
528
+ # Reconstruct each graph
529
+ graphs = []
530
+ for gid in gids:
531
+ g = Kuzu.GraphByID(path, gid, silent=silent)
532
+ if g is not None:
533
+ graphs.append(g)
534
+ return graphs
535
+
536
+ except Exception as e:
537
+ if not silent:
538
+ print(f"Kuzu.GraphsByQuery - Error: {e}. Returning None.")
539
+ return None
540
+
541
+ @staticmethod
542
+ def DeleteGraph(manager, graphID: str, silent: bool = False) -> bool:
543
+ """
544
+ Deletes a graph (vertices, edges, and graphCard) by id.
545
+
546
+ Parameters
547
+ ----------
548
+ manager : Kuzu.Manager
549
+ The manager of the Kùzu database.
550
+ graphID : str
551
+ The id of the graph to be deleted.
552
+ silent : bool , optional
553
+ If set to True, error and warning messages are suppressed. Default is False.
554
+
555
+ Returns
556
+ -------
557
+ bool
558
+ True on success, False otherwise.
559
+ """
560
+ try:
561
+ manager.ensure_schema()
562
+ # Delete edges
563
+ manager.exec("""
564
+ MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
565
+ WHERE a.graph_id = $gid AND b.graph_id = $gid
566
+ DELETE r;
567
+ """, {"gid": graphID}, write=True)
568
+ # Delete vertices
569
+ manager.exec("MATCH (v:Vertex) WHERE v.graph_id = $gid DELETE v;", {"gid": graphID}, write=True)
570
+ # Delete card
571
+ manager.exec("MATCH (g:Graph) WHERE g.id = $gid DELETE g;", {"gid": graphID}, write=True)
572
+ return True
573
+ except Exception as e:
574
+ if not silent:
575
+ print(f"Kuzu.DeleteGraph - Error: {e}. Returning False.")
576
+ return False
577
+
578
+ @staticmethod
579
+ def EmptyDatabase(manager, dropSchema: bool = False, recreateSchema: bool = True, silent: bool = False) -> bool:
580
+ """
581
+ Empties the Kùzu database at `db_path`.
582
+
583
+ Two modes:
584
+ - Soft clear (default): delete ALL relationships, then ALL nodes across all tables.
585
+ - Hard reset (drop_schema=True): drop known node/rel tables, optionally recreate schema.
586
+
587
+ Parameters
588
+ ----------
589
+ manager : Kuzu Manager
590
+ The manager of the Kùzu database.
591
+ dropSchema : bool , optional
592
+ If True, DROP the known tables instead of deleting rows. Default False.
593
+ recreateSchema : bool , optional
594
+ If True and drop_schema=True, re-create the minimal schema after dropping. Default True.
595
+ silent : bool , optional
596
+ If set to True, error and warning messages are suppressed. Default is False.
597
+
598
+ Returns
599
+ -------
600
+ bool
601
+ True on success, False otherwise.
602
+ """
603
+ try:
604
+ manager.ensure_schema()
605
+
606
+ if dropSchema:
607
+ # Drop relationship tables FIRST (to release dependencies), then node tables.
608
+ # IF EXISTS is convenient; if your Kùzu version doesn't support it, remove and ignore exceptions.
609
+ for stmt in [
610
+ "DROP TABLE IF EXISTS Edge;",
611
+ "DROP TABLE IF EXISTS Vertex;",
612
+ "DROP TABLE IF EXISTS Graph;",
613
+ ]:
614
+ try:
615
+ manager.exec(stmt, write=True)
616
+ except Exception as _e:
617
+ if not silent:
618
+ print(f"Kuzu.EmptyDatabase - Warning dropping table: {_e}")
619
+
620
+ if recreateSchema:
621
+ manager.ensure_schema()
622
+ return True
623
+
624
+ # Soft clear: remove all relationships, then all nodes (covers all labels/tables).
625
+ # Delete all edges (any direction)
626
+ manager.exec("MATCH (a)-[r]->(b) DELETE r;", write=True)
627
+ # Delete all nodes (from all node tables)
628
+ manager.exec("MATCH (n) DELETE n;", write=True)
629
+ return True
630
+
631
+ except Exception as e:
632
+ if not silent:
633
+ print(f"Kuzu.EmptyDatabase - Error: {e}. Returning False.")
634
+ return False
635
+
636
+ @staticmethod
637
+ def ListGraphs(manager, where: dict = None, limit: int = 100, offset: int = 0, silent: bool = False) -> list[dict]:
638
+ """
639
+ Lists Graph metadata with simple filtering and pagination.
640
+
641
+ Parameters
642
+ ----------
643
+ manager : Kuzu.Manager
644
+ The manager of the Kùzu database.
645
+ where : dict , optional
646
+ The filter python dictionaries. Supported filters in `where` (all optional):
647
+ - id (exact match)
648
+ - label (substring match)
649
+ - props_contains (substring match against JSON/text in `props`)
650
+ - props_equals (exact string match against `props`)
651
+ - min_nodes / max_nodes (integers)
652
+ - min_edges / max_edges (integers)
653
+ limit : int , optional
654
+ The desired limit of returned Graphs. Default is 100.
655
+ offset : int , optional
656
+ The desired offset of the returned Graphs (skips the first number of Graphs specified by the offset and returns the remaining cards up to the specified limit). The offset is useful if pagination is needed. Default is 0.
657
+ silent : bool , optional
658
+ If set to True, error and warning messages are suppressed. Default is False.
659
+
660
+ Returns
661
+ -------
662
+ list
663
+ The list of found Graph python dictionaries.
664
+
665
+ """
666
+
667
+ manager.ensure_schema()
668
+ where = where or {}
669
+
670
+ conds: list[str] = []
671
+ params: dict = {}
672
+
673
+ if "id" in where and where["id"]:
674
+ conds.append("g.id = $id")
675
+ params["id"] = str(where["id"])
676
+
677
+ if "label" in where and where["label"]:
678
+ # Cypher-style infix CONTAINS
679
+ conds.append("g.label CONTAINS $label_sub")
680
+ params["label_sub"] = str(where["label"])
681
+
682
+ if "props_contains" in where and where["props_contains"]:
683
+ conds.append("g.props CONTAINS $props_sub")
684
+ params["props_sub"] = str(where["props_contains"])
685
+
686
+ if "props_equals" in where and where["props_equals"]:
687
+ conds.append("g.props = $props_equals")
688
+ params["props_equals"] = str(where["props_equals"])
689
+
690
+ if "min_nodes" in where and where["min_nodes"] is not None:
691
+ conds.append("g.num_nodes >= $min_nodes")
692
+ params["min_nodes"] = int(where["min_nodes"])
693
+
694
+ if "max_nodes" in where and where["max_nodes"] is not None:
695
+ conds.append("g.num_nodes <= $max_nodes")
696
+ params["max_nodes"] = int(where["max_nodes"])
697
+
698
+ if "min_edges" in where and where["min_edges"] is not None:
699
+ conds.append("g.num_edges >= $min_edges")
700
+ params["min_edges"] = int(where["min_edges"])
701
+
702
+ if "max_edges" in where and where["max_edges"] is not None:
703
+ conds.append("g.num_edges <= $max_edges")
704
+ params["max_edges"] = int(where["max_edges"])
705
+
706
+ where_clause = ("WHERE " + " AND ".join(conds)) if conds else ""
707
+ q = f"""
708
+ MATCH (g:Graph)
709
+ {where_clause}
710
+ RETURN g.id AS id, g.label AS label,
711
+ g.num_nodes AS num_nodes, g.num_edges AS num_edges,
712
+ g.props AS props
713
+ ORDER BY id
714
+ SKIP $__offset LIMIT $__limit;
715
+ """
716
+
717
+ params["__offset"] = max(0, int(offset or 0))
718
+ params["__limit"] = max(0, int(limit or 100))
719
+
720
+ return manager.exec(q, params, write=False) or []
721
+
722
+
723
+ @staticmethod
724
+ def ByCSVPath(
725
+ manager,
726
+ path: str,
727
+ graphIDPrefix: str = "g",
728
+ graphIDHeader="graph_id",
729
+ graphLabelHeader="label",
730
+ edgeSRCHeader="src_id",
731
+ edgeDSTHeader="dst_id",
732
+ edgeLabelHeader="label",
733
+ nodeIDHeader="node_id",
734
+ nodeLabelHeader="label",
735
+ nodeXHeader="X",
736
+ nodeYHeader="Y",
737
+ nodeZHeader="Z",
738
+ silent: bool = False,
739
+ ) -> Dict[str, Any]:
740
+ """
741
+ Load node/edge/graph CSVs from a folder (using its .yaml meta) and upsert them
742
+ directly into Kùzu using the schema defined in Kuzu.py:
743
+
744
+ - NODE TABLE Graph(id STRING PRIMARY KEY, label STRING, num_nodes INT64, num_edges INT64, props STRING)
745
+ - NODE TABLE Vertex(id STRING PRIMARY KEY, graph_id STRING, label STRING, x DOUBLE, y DOUBLE, z DOUBLE, props STRING)
746
+ - REL TABLE Edge(FROM Vertex TO Vertex, label STRING, props STRING)
747
+
748
+ Parameters
749
+ ----------
750
+ manager : Kuzu.Manager
751
+ An initialized Kùzu manager; must provide ensure_schema() and exec(query, params, write=True/False).
752
+ path : str
753
+ Folder containing a dataset YAML (e.g., meta.yaml) that points to nodes/edges/graphs CSVs.
754
+ graphIDPrefix : str
755
+ Prefix for materialized graph IDs (default "g"); e.g., graph 0 -> "g0".
756
+ graphIDHeader : str , optional
757
+ The column header string used to specify the graph id. Default is "graph_id".
758
+ graphLabelHeader : str , optional
759
+ The column header string used to specify the graph label. Default is "label".
760
+ edgeSRCHeader : str , optional
761
+ The column header string used to specify the source vertex id of edges. Default is "src_id".
762
+ edgeDSTHeader : str , optional
763
+ The column header string used to specify the destination vertex id of edges. Default is "dst_id".
764
+ edgeLabelHeader : str , optional
765
+ The column header string used to specify the label of edges. Default is "label".
766
+ nodeIDHeader : str , optional
767
+ The column header string used to specify the id of nodes. Default is "node_id".
768
+ nodeLabelHeader : str , optional
769
+ The column header string used to specify the label of nodes. Default is "label".
770
+ nodeXHeader : str , optional
771
+ The column header string used to specify the X coordinate of nodes. Default is "X".
772
+ nodeYHeader : str , optional
773
+ The column header string used to specify the Y coordinate of nodes. Default is "Y".
774
+ nodeZHeader : str , optional
775
+ The column header string used to specify the Z coordinate of nodes. Default is "Z".
776
+ silent : bool
777
+ If True, suppress warnings.
778
+
779
+ Returns
780
+ -------
781
+ dict
782
+ {"graphs_upserted": int, "graph_ids": [str, ...]}
783
+ """
784
+ import os
785
+ import glob
786
+ import json
787
+ import numbers
788
+ import pandas as pd
789
+ import yaml
790
+ import random
791
+
792
+ # ---------- Helpers (mirroring your CSV loader’s patterns) ----------
793
+ def _find_yaml_files(folder_path: str):
794
+ return glob.glob(os.path.join(folder_path, "*.yaml"))
795
+
796
+ def _read_yaml(file_path: str):
797
+ with open(file_path, "r", encoding="utf-8") as f:
798
+ data = yaml.safe_load(f) or {}
799
+ edge_data = data.get("edge_data", [])
800
+ node_data = data.get("node_data", [])
801
+ graph_data = data.get("graph_data", {})
802
+ edges_rel = edge_data[0].get("file_name") if edge_data else None
803
+ nodes_rel = node_data[0].get("file_name") if node_data else None
804
+ graphs_rel = graph_data.get("file_name")
805
+ return graphs_rel, edges_rel, nodes_rel
806
+
807
+ def _props_from_row(row: pd.Series, exclude: set) -> str:
808
+ d = {}
809
+ for k, v in row.items():
810
+ if k in exclude:
811
+ continue
812
+ # normalize NaN -> None for clean JSON
813
+ if isinstance(v, float) and pd.isna(v):
814
+ d[k] = None
815
+ else:
816
+ d[k] = v
817
+ try:
818
+ return json.dumps(d, ensure_ascii=False)
819
+ except Exception:
820
+ # Fallback: stringify everything
821
+ return json.dumps({k: (None if v is None else str(v)) for k, v in d.items()}, ensure_ascii=False)
822
+
823
+ # ---------- Validate path and locate YAML/CSVs ----------
824
+ if not os.path.exists(path) or not os.path.isdir(path):
825
+ if not silent:
826
+ print("ByCSVPath - Error: path must be an existing folder. Returning None.")
827
+ return None
828
+
829
+ yaml_files = _find_yaml_files(path)
830
+ if len(yaml_files) < 1:
831
+ if not silent:
832
+ print("ByCSVPath - Error: no YAML file found in the folder. Returning None.")
833
+ return None
834
+ yaml_file = yaml_files[0]
835
+ graphs_rel, edges_rel, nodes_rel = _read_yaml(yaml_file)
836
+
837
+ # Resolve CSV paths
838
+ graphs_csv = os.path.join(path, graphs_rel) if graphs_rel else None
839
+ edges_csv = os.path.join(path, edges_rel) if edges_rel else None
840
+ nodes_csv = os.path.join(path, nodes_rel) if nodes_rel else None
841
+
842
+ if not edges_csv or not os.path.exists(edges_csv):
843
+ if not silent:
844
+ print("ByCSVPath - Error: edges CSV not found. Returning None.")
845
+ return None
846
+ if not nodes_csv or not os.path.exists(nodes_csv):
847
+ if not silent:
848
+ print("ByCSVPath - Error: nodes CSV not found. Returning None.")
849
+ return None
850
+
851
+ # ---------- Load CSVs ----------
852
+ nodes_df = pd.read_csv(nodes_csv)
853
+ edges_df = pd.read_csv(edges_csv)
854
+ graphs_df = pd.read_csv(graphs_csv) if graphs_csv and os.path.exists(graphs_csv) else pd.DataFrame()
855
+
856
+ # Required columns
857
+ for req_cols, df_name, df in [
858
+ ({graphIDHeader, nodeIDHeader}, "nodes", nodes_df),
859
+ ({graphIDHeader, edgeSRCHeader, edgeDSTHeader}, "edges", edges_df),
860
+ ]:
861
+ missing = req_cols.difference(df.columns)
862
+ if missing:
863
+ raise ValueError(f"ByCSVPath - {df_name}.csv is missing required columns: {missing}")
864
+
865
+ # Graph IDs present in the data
866
+ gids = pd.Index([]).union(nodes_df[graphIDHeader].dropna().unique()).union(
867
+ edges_df[graphIDHeader].dropna().unique()
868
+ )
869
+
870
+ # Prepare graphs_df lookup if provided
871
+ graphs_by_gid = {}
872
+ if graphIDHeader in graphs_df.columns:
873
+ graphs_by_gid = {gid: g.iloc[0].to_dict() for gid, g in graphs_df.groupby(graphIDHeader, dropna=False)}
874
+
875
+ # ---------- Ensure schema ----------
876
+ manager.ensure_schema() # Graph, Vertex, Edge
877
+
878
+ # ---------- Upsert per graph ----------
879
+ materialized_graph_ids = []
880
+ for raw_gid in gids:
881
+ gid_str = f"{graphIDPrefix}{int(raw_gid) if str(raw_gid).isdigit() else str(raw_gid)}"
882
+ materialized_graph_ids.append(gid_str)
883
+
884
+ nsub = nodes_df[nodes_df[graphIDHeader] == raw_gid].copy()
885
+ esub = edges_df[edges_df[graphIDHeader] == raw_gid].copy()
886
+
887
+ # Graph info
888
+ gcard_src = graphs_by_gid.get(raw_gid, {})
889
+ g_label = str(gcard_src.get(graphLabelHeader, "")) if gcard_src else ""
890
+ g_props = _props_from_row(pd.Series(gcard_src), exclude={graphIDHeader, graphLabelHeader}) if gcard_src else "{}"
891
+ num_nodes = int(nsub.shape[0])
892
+ num_edges = int(esub.shape[0])
893
+
894
+ # Remove any existing data for this graph id, then re-insert
895
+ manager.exec("""
896
+ MATCH (a:Vertex)-[r:Edge]->(b:Vertex)
897
+ WHERE a.graph_id = $gid AND b.graph_id = $gid
898
+ DELETE r;
899
+ """, {"gid": gid_str}, write=True)
900
+ manager.exec("MATCH (v:Vertex) WHERE v.graph_id = $gid DELETE v;", {"gid": gid_str}, write=True)
901
+ manager.exec("MATCH (g:Graph) WHERE g.id = $gid DELETE g;", {"gid": gid_str}, write=True)
902
+
903
+ manager.exec("""
904
+ CREATE (g:Graph {id:$id, label:$label, num_nodes:$num_nodes, num_edges:$num_edges, props:$props});
905
+ """, {
906
+ "id": gid_str,
907
+ "label": g_label,
908
+ "num_nodes": num_nodes,
909
+ "num_edges": num_edges,
910
+ "props": g_props,
911
+ }, write=True)
912
+
913
+ # Insert vertices
914
+ for _, row in nsub.iterrows():
915
+ node_id = row[nodeIDHeader]
916
+ vid = f"{gid_str}:{node_id}"
917
+ v_label = str(row[nodeLabelHeader]) if "label" in row and pd.notna(row[nodeLabelHeader]) else str(node_id)
918
+
919
+ # X/Y/Z may be missing or non-numeric; store a random numeric value in that case
920
+ def _num_or_none(val):
921
+ try:
922
+ return float(val)
923
+ except Exception:
924
+ return None
925
+
926
+ x = _num_or_none(row[nodeXHeader]) if nodeXHeader in row else random.uniform(0,1000)
927
+ y = _num_or_none(row[nodeYHeader]) if nodeYHeader in row else random.uniform(0,1000)
928
+ z = _num_or_none(row[nodeZHeader]) if nodeZHeader in row else random.uniform(0,1000)
929
+
930
+ props = _props_from_row(row, exclude={graphIDHeader, nodeIDHeader, nodeLabelHeader, nodeXHeader, nodeYHeader, nodeZHeader})
931
+ manager.exec("""
932
+ CREATE (v:Vertex {id:$id, graph_id:$gid, label:$label, x:$x, y:$y, z:$z, props:$props});
933
+ """, {"id": vid, "gid": gid_str, "label": v_label, "x": x, "y": y, "z": z, "props": props}, write=True)
934
+
935
+ # Insert edges (Edge)
936
+ for _, row in esub.iterrows():
937
+ a_id = f"{gid_str}:{row[edgeSRCHeader]}"
938
+ b_id = f"{gid_str}:{row[edgeDSTHeader]}"
939
+ e_label = str(row[edgeLabelHeader]) if edgeLabelHeader in row and pd.notna(row[edgeLabelHeader]) else "connect"
940
+ e_props = _props_from_row(row, exclude={graphIDHeader, edgeSRCHeader, edgeDSTHeader, edgeLabelHeader})
941
+
942
+ manager.exec("""
943
+ MATCH (a:Vertex {id:$a_id}), (b:Vertex {id:$b_id})
944
+ CREATE (a)-[:Edge {label:$label, props:$props}]->(b);
945
+ """, {"a_id": a_id, "b_id": b_id, "label": e_label, "props": e_props}, write=True)
946
+
947
+ return {"graphs_upserted": len(materialized_graph_ids), "graph_ids": materialized_graph_ids}
948
+
949
+
950
+