structured2graph 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. __init__.py +47 -0
  2. core/__init__.py +23 -0
  3. core/hygm/__init__.py +74 -0
  4. core/hygm/hygm.py +2351 -0
  5. core/hygm/models/__init__.py +82 -0
  6. core/hygm/models/graph_models.py +667 -0
  7. core/hygm/models/llm_models.py +229 -0
  8. core/hygm/models/operations.py +176 -0
  9. core/hygm/models/sources.py +68 -0
  10. core/hygm/models/user_operations.py +139 -0
  11. core/hygm/strategies/__init__.py +17 -0
  12. core/hygm/strategies/base.py +36 -0
  13. core/hygm/strategies/deterministic.py +262 -0
  14. core/hygm/strategies/llm.py +904 -0
  15. core/hygm/validation/__init__.py +38 -0
  16. core/hygm/validation/base.py +194 -0
  17. core/hygm/validation/graph_schema_validator.py +687 -0
  18. core/hygm/validation/memgraph_data_validator.py +991 -0
  19. core/migration_agent.py +1369 -0
  20. core/schema/spec.json +155 -0
  21. core/utils/meta_graph.py +108 -0
  22. database/__init__.py +36 -0
  23. database/adapters/__init__.py +11 -0
  24. database/adapters/memgraph.py +318 -0
  25. database/adapters/mysql.py +311 -0
  26. database/adapters/postgresql.py +335 -0
  27. database/analyzer.py +396 -0
  28. database/factory.py +219 -0
  29. database/models.py +209 -0
  30. main.py +518 -0
  31. query_generation/__init__.py +20 -0
  32. query_generation/cypher_generator.py +129 -0
  33. query_generation/schema_utilities.py +88 -0
  34. structured2graph-0.1.1.dist-info/METADATA +197 -0
  35. structured2graph-0.1.1.dist-info/RECORD +41 -0
  36. structured2graph-0.1.1.dist-info/WHEEL +4 -0
  37. structured2graph-0.1.1.dist-info/entry_points.txt +2 -0
  38. structured2graph-0.1.1.dist-info/licenses/LICENSE +21 -0
  39. utils/__init__.py +57 -0
  40. utils/config.py +235 -0
  41. utils/environment.py +404 -0
@@ -0,0 +1,687 @@
1
+ """
2
+ Graph Schema Validator for graph models.
3
+
4
+ This module provides comprehensive validation of GraphModel objects against
5
+ the original database structure to ensure complete coverage and correctness
6
+ before migration begins. This is Type 1 validation in the two-tier system.
7
+ """
8
+
9
+ import logging
10
+ from typing import Dict, Any, TYPE_CHECKING
11
+ from .base import (
12
+ BaseValidator,
13
+ ValidationResult,
14
+ ValidationSeverity,
15
+ ValidationCategory,
16
+ )
17
+
18
+ if TYPE_CHECKING:
19
+ from ..models.graph_models import GraphModel
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class GraphSchemaValidator(BaseValidator):
25
+ """
26
+ Validates GraphModel against original database structure.
27
+
28
+ This validator ensures that the GraphModel properly represents
29
+ all tables, properties, relationships, indexes, and constraints
30
+ from the source database before migration begins.
31
+ """
32
+
33
+ def validate(
34
+ self, graph_model: "GraphModel", database_structure: Dict[str, Any]
35
+ ) -> ValidationResult:
36
+ """
37
+ Perform comprehensive graph schema validation.
38
+
39
+ Args:
40
+ graph_model: GraphModel to validate
41
+ database_structure: Original database structure from data_interface
42
+
43
+ Returns:
44
+ ValidationResult with detailed validation results
45
+ """
46
+ self.reset()
47
+
48
+ logger.info("Starting graph schema validation...")
49
+
50
+ try:
51
+ # Basic validation checks
52
+ if not graph_model:
53
+ self.add_issue(
54
+ ValidationSeverity.CRITICAL,
55
+ ValidationCategory.STRUCTURE,
56
+ "Graph model is not provided",
57
+ recommendation="Ensure graph model is created",
58
+ )
59
+
60
+ if not database_structure:
61
+ self.add_issue(
62
+ ValidationSeverity.CRITICAL,
63
+ ValidationCategory.STRUCTURE,
64
+ "Database structure is not provided",
65
+ recommendation="Ensure database structure is extracted",
66
+ )
67
+
68
+ # If basic checks fail, return early
69
+ critical_issues = [
70
+ issue
71
+ for issue in self.issues
72
+ if issue.severity == ValidationSeverity.CRITICAL
73
+ ]
74
+ if critical_issues:
75
+ return ValidationResult(
76
+ validation_type="graph_schema",
77
+ success=False,
78
+ summary="Validation failed: Missing required inputs",
79
+ issues=self.issues,
80
+ metrics=self.metrics,
81
+ )
82
+
83
+ # Core validations
84
+ self._validate_table_coverage(graph_model, database_structure)
85
+ self._validate_property_coverage(graph_model, database_structure)
86
+ self._validate_relationship_coverage(graph_model, database_structure)
87
+ self._validate_index_coverage(graph_model, database_structure)
88
+ self._validate_constraint_coverage(graph_model, database_structure)
89
+
90
+ # Quality validations
91
+ self._validate_schema_consistency(graph_model)
92
+ self._validate_naming_conventions(graph_model)
93
+ self._validate_performance_considerations(graph_model)
94
+
95
+ # Calculate final metrics
96
+ self.metrics.calculate_coverage()
97
+
98
+ # Generate summary
99
+ summary = self._generate_summary()
100
+ success = not any(
101
+ issue.severity == ValidationSeverity.CRITICAL for issue in self.issues
102
+ )
103
+
104
+ result = ValidationResult(
105
+ validation_type="graph_schema",
106
+ success=success,
107
+ summary=summary,
108
+ issues=self.issues,
109
+ metrics=self.metrics,
110
+ details={
111
+ "database_structure_summary": self._get_db_summary(
112
+ database_structure
113
+ ),
114
+ "graph_model_summary": self._get_model_summary(graph_model),
115
+ },
116
+ )
117
+
118
+ logger.info("Graph schema validation completed: %s", summary)
119
+ return result
120
+
121
+ except Exception as e:
122
+ logger.error("Graph schema validation failed: %s", str(e))
123
+ self.add_issue(
124
+ ValidationSeverity.CRITICAL,
125
+ ValidationCategory.STRUCTURE,
126
+ f"Validation process failed: {str(e)}",
127
+ recommendation=("Check graph model and database structure format"),
128
+ )
129
+
130
+ return ValidationResult(
131
+ validation_type="graph_schema",
132
+ success=False,
133
+ summary=f"Validation failed: {str(e)}",
134
+ issues=self.issues,
135
+ metrics=self.metrics,
136
+ )
137
+
138
+ def _validate_table_coverage(self, graph_model, database_structure):
139
+ """Validate that all entity tables are represented as nodes."""
140
+ logger.debug("Validating table coverage...")
141
+
142
+ # Handle both new structured format and legacy format
143
+ if hasattr(database_structure, "entity_tables"):
144
+ # New structured format - work directly with objects
145
+ entity_tables = database_structure.entity_tables
146
+ else:
147
+ # Legacy format fallback
148
+ entity_tables = database_structure.get("entity_tables", {})
149
+
150
+ self.metrics.tables_total = len(entity_tables)
151
+
152
+ if not entity_tables:
153
+ self.add_issue(
154
+ ValidationSeverity.WARNING,
155
+ ValidationCategory.COVERAGE,
156
+ "No entity tables found in database structure",
157
+ recommendation="Verify database structure extraction",
158
+ )
159
+ return
160
+
161
+ # Get tables covered by nodes
162
+ covered_tables = set()
163
+ for node in graph_model.nodes:
164
+ if node.source and hasattr(node.source, "name"):
165
+ covered_tables.add(node.source.name)
166
+
167
+ self.metrics.tables_covered = len(covered_tables)
168
+
169
+ # Check for missing tables
170
+ missing_tables = set(entity_tables.keys()) - covered_tables
171
+ if missing_tables:
172
+ # Get details about missing tables
173
+ missing_table_details = []
174
+ for table_name in sorted(missing_tables):
175
+ if hasattr(database_structure, "entity_tables"):
176
+ table_info = entity_tables[table_name]
177
+ column_count = len(table_info.columns)
178
+ pk_count = len(table_info.primary_keys)
179
+ fk_count = len(table_info.foreign_keys)
180
+ else:
181
+ table_info = entity_tables[table_name]
182
+ column_count = len(table_info.get("schema", []))
183
+ pk_count = len(table_info.get("primary_keys", []))
184
+ fk_count = len(table_info.get("foreign_keys", []))
185
+
186
+ missing_table_details.append(
187
+ f"'{table_name}' ({column_count} cols, {pk_count} PKs, "
188
+ f"{fk_count} FKs)"
189
+ )
190
+
191
+ details_str = "; ".join(missing_table_details)
192
+
193
+ self.add_issue(
194
+ ValidationSeverity.CRITICAL,
195
+ ValidationCategory.COVERAGE,
196
+ (
197
+ f"Missing {len(missing_tables)} entity tables in graph "
198
+ f"model: {details_str}. These tables should be "
199
+ f"represented as nodes in the graph model."
200
+ ),
201
+ expected=list(entity_tables.keys()),
202
+ actual=list(covered_tables),
203
+ recommendation=(
204
+ "Create nodes for each missing table. For example:\n"
205
+ + "\n".join(
206
+ [
207
+ f" - Add '{table}' node with appropriate labels"
208
+ for table in sorted(missing_tables)
209
+ ]
210
+ )
211
+ ),
212
+ details={"missing_tables": list(missing_tables)},
213
+ )
214
+
215
+ logger.debug(
216
+ "Table coverage: %d/%d tables covered",
217
+ self.metrics.tables_covered,
218
+ self.metrics.tables_total,
219
+ )
220
+
221
+ def _validate_property_coverage(self, graph_model, database_structure):
222
+ """Validate that all table columns are represented as properties."""
223
+ logger.debug("Validating property coverage...")
224
+
225
+ # Handle both new structured format and legacy format
226
+ if hasattr(database_structure, "entity_tables"):
227
+ # New structured format - work directly with objects
228
+ entity_tables = database_structure.entity_tables
229
+ foreign_key_columns = self._get_foreign_key_columns(database_structure)
230
+
231
+ total_properties = 0
232
+ covered_properties = 0
233
+ missing_by_table = {}
234
+
235
+ for table_name, table_info in entity_tables.items():
236
+ # Get column names directly from ColumnInfo objects
237
+ table_columns = {col.name for col in table_info.columns}
238
+ total_properties += len(table_columns)
239
+
240
+ # Find corresponding node
241
+ node = self._find_node_for_table(graph_model, table_name)
242
+ if not node:
243
+ missing_by_table[table_name] = list(table_columns)
244
+ continue
245
+
246
+ # Check property coverage for this node
247
+ node_properties = {prop.key for prop in node.properties}
248
+ missing_props = table_columns - node_properties
249
+
250
+ # Separate foreign key columns from regular missing properties
251
+ table_foreign_keys = foreign_key_columns.get(table_name, set())
252
+ missing_foreign_keys = missing_props & table_foreign_keys
253
+ missing_regular_props = missing_props - table_foreign_keys
254
+
255
+ # Count coverage: covered + foreign keys that became relationships
256
+ covered_properties += len(table_columns) - len(missing_regular_props)
257
+
258
+ # Only report regular properties as missing (not foreign keys)
259
+ if missing_regular_props:
260
+ missing_by_table[table_name] = list(missing_regular_props)
261
+
262
+ # Log foreign keys that became relationships (for debugging)
263
+ if missing_foreign_keys:
264
+ logger.debug(
265
+ "Table %s: %d foreign key columns became relationships: %s",
266
+ table_name,
267
+ len(missing_foreign_keys),
268
+ missing_foreign_keys,
269
+ )
270
+ else:
271
+ # Legacy format fallback
272
+ entity_tables = database_structure.get("entity_tables", {})
273
+ foreign_key_columns = self._get_foreign_key_columns(database_structure)
274
+
275
+ total_properties = 0
276
+ covered_properties = 0
277
+ missing_by_table = {}
278
+
279
+ for table_name, table_info in entity_tables.items():
280
+ table_columns = {col["field"] for col in table_info.get("schema", [])}
281
+ total_properties += len(table_columns)
282
+
283
+ # Find corresponding node
284
+ node = self._find_node_for_table(graph_model, table_name)
285
+ if not node:
286
+ missing_by_table[table_name] = list(table_columns)
287
+ continue
288
+
289
+ # Check property coverage for this node
290
+ node_properties = {prop.key for prop in node.properties}
291
+ missing_props = table_columns - node_properties
292
+
293
+ # Separate foreign key columns from regular missing properties
294
+ table_foreign_keys = foreign_key_columns.get(table_name, set())
295
+ missing_foreign_keys = missing_props & table_foreign_keys
296
+ missing_regular_props = missing_props - table_foreign_keys
297
+
298
+ # Count coverage: covered + foreign keys that became relationships
299
+ covered_properties += len(table_columns) - len(missing_regular_props)
300
+
301
+ # Only report regular properties as missing (not foreign keys)
302
+ if missing_regular_props:
303
+ missing_by_table[table_name] = list(missing_regular_props)
304
+
305
+ # Log foreign keys that became relationships (for debugging)
306
+ if missing_foreign_keys:
307
+ logger.debug(
308
+ "Table %s: %d foreign key columns became relationships: %s",
309
+ table_name,
310
+ len(missing_foreign_keys),
311
+ missing_foreign_keys,
312
+ )
313
+
314
+ self.metrics.properties_total = total_properties
315
+ self.metrics.properties_covered = covered_properties
316
+
317
+ if missing_by_table:
318
+ total_missing = sum(len(props) for props in missing_by_table.values())
319
+
320
+ # Create detailed message about missing properties
321
+ missing_details = []
322
+ for table_name, missing_props in missing_by_table.items():
323
+ props_str = ", ".join(sorted(missing_props))
324
+ missing_details.append(f"Table '{table_name}': {props_str}")
325
+
326
+ details_message = "; ".join(missing_details)
327
+
328
+ self.add_issue(
329
+ ValidationSeverity.CRITICAL,
330
+ ValidationCategory.COVERAGE,
331
+ (
332
+ f"Missing {total_missing} non-foreign-key properties "
333
+ f"across {len(missing_by_table)} tables. Details: "
334
+ f"{details_message}. These properties may contain "
335
+ "important data that should be preserved in "
336
+ "the graph model."
337
+ ),
338
+ expected=f"{total_properties} properties",
339
+ actual=f"{covered_properties} properties",
340
+ recommendation=(
341
+ "Add missing properties to corresponding nodes:\n"
342
+ + "\n".join(
343
+ [
344
+ f" - Add to '{table}' node: {', '.join(props)}"
345
+ for table, props in missing_by_table.items()
346
+ ]
347
+ )
348
+ + "\nNote: Foreign key columns are correctly modeled "
349
+ "as relationships, not properties."
350
+ ),
351
+ details={"missing_by_table": missing_by_table},
352
+ )
353
+
354
+ logger.debug(
355
+ "Property coverage: %d/%d properties covered (including foreign keys as relationships)",
356
+ covered_properties,
357
+ total_properties,
358
+ )
359
+
360
+ def _validate_relationship_coverage(self, graph_model, database_structure):
361
+ """Validate that foreign keys are represented as relationships."""
362
+ logger.debug("Validating relationship coverage...")
363
+
364
+ relationships = database_structure.get("relationships", [])
365
+ self.metrics.relationships_total = len(relationships)
366
+
367
+ if not relationships:
368
+ self.add_issue(
369
+ ValidationSeverity.INFO,
370
+ ValidationCategory.COVERAGE,
371
+ "No relationships found in database structure",
372
+ recommendation="Verify foreign key extraction is correct",
373
+ )
374
+ return
375
+
376
+ # Simple coverage check - count modeled vs database relationships
377
+ modeled_count = len(graph_model.edges)
378
+ self.metrics.relationships_covered = min(modeled_count, len(relationships))
379
+
380
+ if modeled_count < len(relationships):
381
+ # Get details about database relationships
382
+ db_relationship_details = []
383
+ for rel in relationships[:5]: # Show first 5 for brevity
384
+ if isinstance(rel, dict):
385
+ from_table = rel.get("from_table", "unknown")
386
+ to_table = rel.get("to_table", "unknown")
387
+ column = rel.get("column", "unknown")
388
+ db_relationship_details.append(
389
+ f"{from_table}.{column} -> {to_table}"
390
+ )
391
+ else:
392
+ # Handle object format if needed
393
+ db_relationship_details.append(str(rel))
394
+
395
+ missing_count = len(relationships) - modeled_count
396
+ details_str = "; ".join(db_relationship_details)
397
+ if len(relationships) > 5:
398
+ details_str += f" (and {len(relationships) - 5} more)"
399
+
400
+ self.add_issue(
401
+ ValidationSeverity.WARNING,
402
+ ValidationCategory.COVERAGE,
403
+ (
404
+ f"Fewer relationships modeled ({modeled_count}) than in "
405
+ f"database ({len(relationships)}). Missing "
406
+ f"{missing_count} potential relationships. Database "
407
+ f"relationships include: {details_str}. These foreign key "
408
+ "relationships may represent important "
409
+ "connections that should be modeled as graph "
410
+ "relationships."
411
+ ),
412
+ expected=f"{len(relationships)} relationships",
413
+ actual=f"{modeled_count} relationships",
414
+ recommendation=(
415
+ "Review database foreign keys and consider adding "
416
+ "missing relationships."
417
+ ),
418
+ )
419
+
420
+ logger.debug(
421
+ "Relationship coverage: %d/%d relationships covered",
422
+ self.metrics.relationships_covered,
423
+ len(relationships),
424
+ )
425
+
426
+ def _validate_index_coverage(self, graph_model, database_structure):
427
+ """Validate that important indexes are planned."""
428
+ logger.debug("Validating index coverage...")
429
+
430
+ # Get planned indexes from model
431
+ planned_indexes = len(graph_model.node_indexes) + len(graph_model.edge_indexes)
432
+ self.metrics.indexes_covered = planned_indexes
433
+
434
+ # Handle both new structured format and legacy format
435
+ if hasattr(database_structure, "entity_tables"):
436
+ # New structured format - work directly with objects
437
+ db_indexes_count = sum(
438
+ len(table_info.indexes)
439
+ for table_info in database_structure.entity_tables.values()
440
+ )
441
+ else:
442
+ # Legacy format fallback
443
+ entity_tables = database_structure.get("entity_tables", {})
444
+ db_indexes_count = 0
445
+ for table_info in entity_tables.values():
446
+ db_indexes_count += len(table_info.get("indexes", []))
447
+
448
+ self.metrics.indexes_total = db_indexes_count
449
+
450
+ if db_indexes_count > 0 and planned_indexes == 0:
451
+ self.add_issue(
452
+ ValidationSeverity.WARNING,
453
+ ValidationCategory.PERFORMANCE,
454
+ f"No indexes planned, but {db_indexes_count} exist in source database",
455
+ expected="Indexes planned for performance",
456
+ actual="No indexes planned",
457
+ recommendation="Consider adding indexes for frequently queried properties",
458
+ )
459
+
460
+ logger.debug(
461
+ "Index planning: %d indexes planned vs %d in source database",
462
+ planned_indexes,
463
+ db_indexes_count,
464
+ )
465
+
466
+ def _validate_constraint_coverage(self, graph_model, database_structure):
467
+ """Validate that database constraints are represented."""
468
+ logger.debug("Validating constraint coverage...")
469
+
470
+ # Get planned constraints from model
471
+ planned_constraints = len(graph_model.node_constraints) + len(
472
+ graph_model.edge_constraints
473
+ )
474
+ self.metrics.constraints_covered = planned_constraints
475
+
476
+ # Handle both new structured format and legacy format
477
+ if hasattr(database_structure, "entity_tables"):
478
+ # New structured format - work directly with objects
479
+ db_constraints_count = 0
480
+ for table_info in database_structure.entity_tables.values():
481
+ db_constraints_count += len(table_info.primary_keys)
482
+ db_constraints_count += len(table_info.foreign_keys)
483
+ else:
484
+ # Legacy format fallback
485
+ entity_tables = database_structure.get("entity_tables", {})
486
+ db_constraints_count = 0
487
+ for table_info in entity_tables.values():
488
+ db_constraints_count += len(table_info.get("primary_keys", []))
489
+ db_constraints_count += len(table_info.get("foreign_keys", []))
490
+
491
+ self.metrics.constraints_total = db_constraints_count
492
+
493
+ if db_constraints_count > 0 and planned_constraints == 0:
494
+ self.add_issue(
495
+ ValidationSeverity.WARNING,
496
+ ValidationCategory.CONSISTENCY,
497
+ f"No constraints planned, but {db_constraints_count} "
498
+ "exist in source",
499
+ expected="Constraints planned for data integrity",
500
+ actual="No constraints planned",
501
+ recommendation="Consider adding constraints for data " "integrity",
502
+ )
503
+
504
+ logger.debug(
505
+ "Constraint planning: %d constraints planned vs %d in source",
506
+ planned_constraints,
507
+ db_constraints_count,
508
+ )
509
+
510
+ def _validate_schema_consistency(self, graph_model):
511
+ """Validate internal consistency of the graph model."""
512
+ logger.debug("Validating schema consistency...")
513
+
514
+ # Check for duplicate node labels
515
+ seen_labels = set()
516
+ for node in graph_model.nodes:
517
+ for label in node.labels:
518
+ if label in seen_labels:
519
+ self.add_issue(
520
+ ValidationSeverity.WARNING,
521
+ ValidationCategory.CONSISTENCY,
522
+ f"Duplicate node label '{label}' found",
523
+ recommendation="Ensure node labels are unique",
524
+ )
525
+ seen_labels.add(label)
526
+
527
+ # Check for orphaned relationships
528
+ node_labels = set()
529
+ for node in graph_model.nodes:
530
+ node_labels.update(node.labels)
531
+
532
+ for edge in graph_model.edges:
533
+ # Check start node labels
534
+ missing_start = set(edge.start_node_labels) - node_labels
535
+ if missing_start:
536
+ self.add_issue(
537
+ ValidationSeverity.CRITICAL,
538
+ ValidationCategory.CONSISTENCY,
539
+ f"Relationship '{edge.edge_type}' references missing "
540
+ f"start node labels: {missing_start}",
541
+ recommendation="Ensure all relationship endpoints "
542
+ "reference existing node labels",
543
+ )
544
+
545
+ # Check end node labels
546
+ missing_end = set(edge.end_node_labels) - node_labels
547
+ if missing_end:
548
+ self.add_issue(
549
+ ValidationSeverity.CRITICAL,
550
+ ValidationCategory.CONSISTENCY,
551
+ f"Relationship '{edge.edge_type}' references missing "
552
+ f"end node labels: {missing_end}",
553
+ recommendation="Ensure all relationship endpoints "
554
+ "reference existing node labels",
555
+ )
556
+
557
+ def _validate_naming_conventions(self, graph_model):
558
+ """Validate naming conventions and best practices."""
559
+ logger.debug("Validating naming conventions...")
560
+
561
+ # Check node label conventions
562
+ for node in graph_model.nodes:
563
+ for label in node.labels:
564
+ if not label[0].isupper():
565
+ self.add_issue(
566
+ ValidationSeverity.INFO,
567
+ ValidationCategory.CONSISTENCY,
568
+ f"Node label '{label}' should start with uppercase",
569
+ recommendation="Use PascalCase for node labels",
570
+ )
571
+
572
+ # Check relationship type conventions
573
+ for edge in graph_model.edges:
574
+ if not edge.edge_type.isupper():
575
+ self.add_issue(
576
+ ValidationSeverity.INFO,
577
+ ValidationCategory.CONSISTENCY,
578
+ f"Relationship type '{edge.edge_type}' should be " "uppercase",
579
+ recommendation="Use UPPER_CASE for relationship types",
580
+ )
581
+
582
+ def _validate_performance_considerations(self, graph_model):
583
+ """Validate performance-related aspects."""
584
+ logger.debug("Validating performance considerations...")
585
+
586
+ # Check for nodes without indexes on key properties
587
+ for node in graph_model.nodes:
588
+ key_properties = [
589
+ prop.key
590
+ for prop in node.properties
591
+ if "id" in prop.key.lower() or "key" in prop.key.lower()
592
+ ]
593
+
594
+ if key_properties:
595
+ # Check if any of these have planned indexes
596
+ has_index = any(
597
+ set(key_properties) & set(index.properties)
598
+ for index in graph_model.node_indexes
599
+ if index.labels
600
+ and any(label in node.labels for label in index.labels)
601
+ )
602
+
603
+ if not has_index:
604
+ node_label = "/".join(node.labels)
605
+ key_props = ", ".join(key_properties)
606
+ self.add_issue(
607
+ ValidationSeverity.INFO,
608
+ ValidationCategory.PERFORMANCE,
609
+ f"Node {node_label} has key properties without " "indexes",
610
+ recommendation=f"Consider adding indexes for: " f"{key_props}",
611
+ )
612
+
613
+ # Helper methods
614
+
615
+ def _get_foreign_key_columns(self, database_structure):
616
+ """
617
+ Extract foreign key columns from database structure.
618
+
619
+ Returns a dict mapping table_name -> set of foreign key column names.
620
+ """
621
+ foreign_key_columns = {}
622
+
623
+ # Check if we have the new DatabaseStructure model
624
+ if hasattr(database_structure, "entity_tables"):
625
+ # New structured format - work directly with objects
626
+ for table_name, table_info in database_structure.entity_tables.items():
627
+ fk_columns = {fk.column_name for fk in table_info.foreign_keys}
628
+ if fk_columns:
629
+ foreign_key_columns[table_name] = fk_columns
630
+ else:
631
+ # Legacy format fallback
632
+ entity_tables = database_structure.get("entity_tables", {})
633
+ for table_name, table_info in entity_tables.items():
634
+ foreign_keys = table_info.get("foreign_keys", [])
635
+ fk_columns = set()
636
+
637
+ for fk in foreign_keys:
638
+ if isinstance(fk, dict):
639
+ # Handle dict format
640
+ if "column" in fk:
641
+ fk_columns.add(fk["column"])
642
+ elif "column_name" in fk:
643
+ fk_columns.add(fk["column_name"])
644
+ else:
645
+ # Handle object format
646
+ if hasattr(fk, "column_name"):
647
+ fk_columns.add(fk.column_name)
648
+
649
+ if fk_columns:
650
+ foreign_key_columns[table_name] = fk_columns
651
+
652
+ return foreign_key_columns
653
+
654
+ def _find_node_for_table(self, graph_model, table_name: str):
655
+ """Find the node that represents a given table."""
656
+ for node in graph_model.nodes:
657
+ if node.source and hasattr(node.source, "name"):
658
+ if node.source.name == table_name:
659
+ return node
660
+ return None
661
+
662
+ def _get_model_summary(self, graph_model) -> Dict[str, Any]:
663
+ """Get a summary of the graph model."""
664
+ return {
665
+ "nodes": len(graph_model.nodes),
666
+ "relationships": len(graph_model.edges),
667
+ "node_indexes": len(graph_model.node_indexes),
668
+ "edge_indexes": len(graph_model.edge_indexes),
669
+ "node_constraints": len(graph_model.node_constraints),
670
+ "edge_constraints": len(graph_model.edge_constraints),
671
+ "node_labels": [
672
+ label for node in graph_model.nodes for label in node.labels
673
+ ],
674
+ "relationship_types": [edge.edge_type for edge in graph_model.edges],
675
+ }
676
+
677
+ def _get_db_summary(self, database_structure) -> Dict[str, Any]:
678
+ """Get a summary of the database structure."""
679
+ entity_tables = database_structure.get("entity_tables", {})
680
+ relationships = database_structure.get("relationships", [])
681
+
682
+ return {
683
+ "entity_tables": len(entity_tables),
684
+ "relationships": len(relationships),
685
+ "database_type": database_structure.get("database_type", "unknown"),
686
+ "database_name": database_structure.get("database_name", "unknown"),
687
+ }