structured2graph 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. __init__.py +47 -0
  2. core/__init__.py +23 -0
  3. core/hygm/__init__.py +74 -0
  4. core/hygm/hygm.py +2351 -0
  5. core/hygm/models/__init__.py +82 -0
  6. core/hygm/models/graph_models.py +667 -0
  7. core/hygm/models/llm_models.py +229 -0
  8. core/hygm/models/operations.py +176 -0
  9. core/hygm/models/sources.py +68 -0
  10. core/hygm/models/user_operations.py +139 -0
  11. core/hygm/strategies/__init__.py +17 -0
  12. core/hygm/strategies/base.py +36 -0
  13. core/hygm/strategies/deterministic.py +262 -0
  14. core/hygm/strategies/llm.py +904 -0
  15. core/hygm/validation/__init__.py +38 -0
  16. core/hygm/validation/base.py +194 -0
  17. core/hygm/validation/graph_schema_validator.py +687 -0
  18. core/hygm/validation/memgraph_data_validator.py +991 -0
  19. core/migration_agent.py +1369 -0
  20. core/schema/spec.json +155 -0
  21. core/utils/meta_graph.py +108 -0
  22. database/__init__.py +36 -0
  23. database/adapters/__init__.py +11 -0
  24. database/adapters/memgraph.py +318 -0
  25. database/adapters/mysql.py +311 -0
  26. database/adapters/postgresql.py +335 -0
  27. database/analyzer.py +396 -0
  28. database/factory.py +219 -0
  29. database/models.py +209 -0
  30. main.py +518 -0
  31. query_generation/__init__.py +20 -0
  32. query_generation/cypher_generator.py +129 -0
  33. query_generation/schema_utilities.py +88 -0
  34. structured2graph-0.1.1.dist-info/METADATA +197 -0
  35. structured2graph-0.1.1.dist-info/RECORD +41 -0
  36. structured2graph-0.1.1.dist-info/WHEEL +4 -0
  37. structured2graph-0.1.1.dist-info/entry_points.txt +2 -0
  38. structured2graph-0.1.1.dist-info/licenses/LICENSE +21 -0
  39. utils/__init__.py +57 -0
  40. utils/config.py +235 -0
  41. utils/environment.py +404 -0
core/hygm/hygm.py ADDED
@@ -0,0 +1,2351 @@
1
+ # flake8: noqa
2
+ """
3
+ Main HyGM (Hypothetical Graph Modeling) class.
4
+
5
+ This is the primary interface for the modular HyGM system.
6
+ """
7
+
8
+ import copy
9
+ import uuid
10
+ import logging
11
+ from enum import Enum
12
+ from typing import Dict, Any, Optional, List, Tuple, TYPE_CHECKING
13
+
14
+ if TYPE_CHECKING:
15
+ from .models.graph_models import GraphModel, GraphNode, GraphRelationship
16
+ from .models.operations import ModelModifications
17
+
18
+ try:
19
+ from .strategies import (
20
+ BaseModelingStrategy,
21
+ DeterministicStrategy,
22
+ LLMStrategy,
23
+ )
24
+ from .validation import GraphSchemaValidator
25
+ except ImportError:
26
+ from core.hygm.strategies import (
27
+ BaseModelingStrategy,
28
+ DeterministicStrategy,
29
+ LLMStrategy,
30
+ )
31
+ from core.hygm.validation import GraphSchemaValidator
32
+
33
+ try:
34
+ from ..utils.meta_graph import (
35
+ node_key as meta_node_key,
36
+ summarize_node as meta_summarize_node,
37
+ relationship_key as meta_relationship_key,
38
+ summarize_relationship as meta_summarize_relationship,
39
+ )
40
+ except ImportError:
41
+ from core.utils.meta_graph import ( # type: ignore
42
+ node_key as meta_node_key,
43
+ summarize_node as meta_summarize_node,
44
+ relationship_key as meta_relationship_key,
45
+ summarize_relationship as meta_summarize_relationship,
46
+ )
47
+
48
+ try:
49
+ from .models.user_operations import UserOperationHistory
50
+ except ImportError:
51
+ from core.hygm.models.user_operations import UserOperationHistory
52
+
53
+ logger = logging.getLogger(__name__)
54
+
55
+
56
+ class ModelingMode(Enum):
57
+ """Modeling modes for HyGM."""
58
+
59
+ AUTOMATIC = "automatic"
60
+ INCREMENTAL = "incremental"
61
+
62
+
63
+ class GraphModelingStrategy(Enum):
64
+ """Graph modeling strategies available."""
65
+
66
+ DETERMINISTIC = "deterministic" # Rule-based graph creation
67
+ LLM_POWERED = "llm_powered" # LLM generates the graph model
68
+
69
+
70
+ class HyGM:
71
+ """
72
+ Main Hypothetical Graph Modeling class.
73
+
74
+ Uses different strategies to create intelligent graph models from
75
+ relational schemas. Supports automatic generation or incremental
76
+ refinement with user feedback.
77
+ """
78
+
79
+ def __init__(
80
+ self,
81
+ llm=None,
82
+ mode: ModelingMode = ModelingMode.AUTOMATIC,
83
+ strategy: GraphModelingStrategy = GraphModelingStrategy.DETERMINISTIC,
84
+ existing_meta_graph: Optional[Dict[str, Any]] = None,
85
+ ):
86
+ """
87
+ Initialize HyGM with modeling configuration.
88
+
89
+ Args:
90
+ llm: Unified LLM client instance for AI-powered modeling
91
+ mode: AUTOMATIC or INCREMENTAL modeling mode
92
+ strategy: DETERMINISTIC or LLM_POWERED strategy
93
+ existing_meta_graph: Existing meta graph for incremental modeling
94
+ """
95
+ self.llm = llm
96
+ self.mode = mode
97
+ self.strategy = strategy
98
+ self.current_graph_model = None
99
+ self.iteration_count = 0
100
+ self.database_structure = None
101
+ self._strategy_cache = {}
102
+ # User operation tracking
103
+ self.user_operation_history: Optional["UserOperationHistory"] = None
104
+ self.session_id = str(uuid.uuid4())
105
+ self.existing_meta_graph = existing_meta_graph or {}
106
+
107
+ def create_graph_model(
108
+ self,
109
+ database_structure: Dict[str, Any],
110
+ domain_context: Optional[str] = None,
111
+ strategy: Optional[GraphModelingStrategy] = None,
112
+ ) -> "GraphModel":
113
+ """
114
+ Create a graph model using the specified strategy.
115
+
116
+ Args:
117
+ database_structure: Database structure from data_interface
118
+ domain_context: Optional domain context for better modeling
119
+ strategy: Override the default strategy for this call
120
+
121
+ Returns:
122
+ GraphModel created using the specified strategy
123
+ """
124
+ used_strategy = strategy or self.strategy
125
+ self.database_structure = database_structure
126
+
127
+ logger.info("Creating graph model using %s strategy...", used_strategy.value)
128
+
129
+ # Check if incremental mode is enabled
130
+ if self.mode == ModelingMode.INCREMENTAL:
131
+ return self._incremental_modeling(
132
+ database_structure, domain_context, used_strategy
133
+ )
134
+
135
+ # For automatic mode, use the specified strategy
136
+ strategy_instance = self._get_strategy_instance(used_strategy)
137
+ graph_model = strategy_instance.create_model(database_structure, domain_context)
138
+
139
+ # Store the created model as current
140
+ self.current_graph_model = graph_model
141
+
142
+ # For LLM strategy in automatic mode, perform validation and retry if needed
143
+ if used_strategy == GraphModelingStrategy.LLM_POWERED:
144
+ graph_model = self._validate_and_improve_automatic_llm_model(
145
+ graph_model, database_structure, domain_context
146
+ )
147
+ self.current_graph_model = graph_model
148
+
149
+ return graph_model
150
+
151
+ def _get_strategy_instance(
152
+ self, strategy: GraphModelingStrategy
153
+ ) -> BaseModelingStrategy:
154
+ """Get or create a strategy instance."""
155
+ if strategy not in self._strategy_cache:
156
+ if strategy == GraphModelingStrategy.LLM_POWERED:
157
+ self._strategy_cache[strategy] = LLMStrategy(
158
+ llm_client=self.llm, model_name="gpt-4", temperature=0.1
159
+ )
160
+ elif strategy == GraphModelingStrategy.DETERMINISTIC:
161
+ self._strategy_cache[strategy] = DeterministicStrategy()
162
+ else:
163
+ msg = f"Unknown strategy: {strategy}"
164
+ raise ValueError(msg)
165
+
166
+ return self._strategy_cache[strategy]
167
+
168
+ def _interactive_refinement_loop(
169
+ self,
170
+ model: "GraphModel",
171
+ strategy: GraphModelingStrategy,
172
+ ) -> "GraphModel":
173
+ """
174
+ Interactive refinement loop for the incremental modeling flow.
175
+
176
+ Allows users to inspect the aggregated graph model, provide feedback,
177
+ and iteratively refine nodes, relationships, indexes, and constraints
178
+ using the natural-language modification helpers.
179
+ """
180
+ logger.info("Entering interactive refinement loop for incremental modeling")
181
+
182
+ self.current_graph_model = model
183
+ if self.iteration_count == 0:
184
+ self.iteration_count = 1
185
+
186
+ while True:
187
+ self._display_current_model(model)
188
+
189
+ user_choice = self._get_refinement_choice()
190
+
191
+ if user_choice == "accept":
192
+ logger.info("Combined model accepted by user")
193
+ break
194
+ if user_choice == "modify":
195
+ logger.info("Modifying combined model interactively...")
196
+ model = self._modify_model_interactively(model, strategy)
197
+ self.current_graph_model = model
198
+ continue
199
+ if user_choice == "validate":
200
+ logger.info("Validating combined model on user request")
201
+ model = self._perform_manual_validation(model, strategy)
202
+ self.current_graph_model = model
203
+
204
+ return model
205
+
206
+ def _table_review_decision(
207
+ self,
208
+ table_name: str,
209
+ table_model: "GraphModel",
210
+ ) -> Tuple[bool, List[str], str]:
211
+ """Determine whether a table needs user review based on metadata."""
212
+ if not self.existing_meta_graph:
213
+ return True, ["No stored migration metadata"], ""
214
+
215
+ node_summaries = self.existing_meta_graph.get("node_summaries", {}) or {}
216
+ rel_summaries = self.existing_meta_graph.get("relationship_summaries", {}) or {}
217
+ existing_counts = self.existing_meta_graph.get("table_counts", {}) or {}
218
+
219
+ current_counts: Dict[str, Any] = {}
220
+ if isinstance(self.database_structure, dict):
221
+ current_counts = self.database_structure.get("table_counts", {}) or {}
222
+
223
+ change_reasons: List[str] = []
224
+ produced_node_keys = set()
225
+ produced_rel_keys = set()
226
+
227
+ new_count = current_counts.get(table_name)
228
+ old_count = existing_counts.get(table_name)
229
+ if new_count is not None and old_count is not None and new_count != old_count:
230
+ if new_count > old_count:
231
+ change_reasons.append("Row count increased since last migration")
232
+ else:
233
+ change_reasons.append("Row count decreased since last migration")
234
+
235
+ for node_def in getattr(table_model, "nodes", []):
236
+ key = meta_node_key(node_def)
237
+ produced_node_keys.add(key)
238
+ summary = meta_summarize_node(node_def)
239
+ stored = node_summaries.get(key)
240
+ display_name = summary.get("source") or key.replace("source::", "")
241
+
242
+ if not stored:
243
+ change_reasons.append(f"New node definition for {display_name}")
244
+ continue
245
+
246
+ if summary.get("properties") != stored.get("properties", []):
247
+ change_reasons.append(f"Properties changed for {display_name}")
248
+ if summary.get("id_field") != stored.get("id_field"):
249
+ change_reasons.append(f"Identifier field changed for {display_name}")
250
+ if summary.get("mapping") != stored.get("mapping", {}):
251
+ change_reasons.append(f"Mapping changed for {display_name}")
252
+
253
+ stored_node_keys = {
254
+ key
255
+ for key, summary in node_summaries.items()
256
+ if summary.get("source") == table_name
257
+ }
258
+ missing_nodes = stored_node_keys - produced_node_keys
259
+ if missing_nodes:
260
+ change_reasons.append("Existing node definition removed from model")
261
+
262
+ for rel_def in getattr(table_model, "edges", []):
263
+ key = meta_relationship_key(rel_def)
264
+ produced_rel_keys.add(key)
265
+ summary = meta_summarize_relationship(rel_def)
266
+ if table_name not in {
267
+ summary.get("start_table"),
268
+ summary.get("end_table"),
269
+ summary.get("join_table"),
270
+ }:
271
+ continue
272
+
273
+ stored = rel_summaries.get(key)
274
+ rel_name = summary.get("edge_type") or key
275
+
276
+ if not stored:
277
+ change_reasons.append(f"New relationship {rel_name}")
278
+ continue
279
+
280
+ if summary.get("mapping") != stored.get("mapping", {}):
281
+ change_reasons.append(f"Relationship mapping changed for {rel_name}")
282
+ if summary.get("start") != stored.get("start", []):
283
+ change_reasons.append(
284
+ f"Relationship start labels changed for {rel_name}"
285
+ )
286
+ if summary.get("end") != stored.get("end", []):
287
+ change_reasons.append(f"Relationship end labels changed for {rel_name}")
288
+
289
+ stored_rel_keys = {
290
+ key
291
+ for key, summary in rel_summaries.items()
292
+ if table_name
293
+ in {
294
+ summary.get("start_table"),
295
+ summary.get("end_table"),
296
+ summary.get("join_table"),
297
+ }
298
+ }
299
+ missing_rels = stored_rel_keys - produced_rel_keys
300
+ if missing_rels:
301
+ change_reasons.append("Existing relationship removed from model")
302
+
303
+ if change_reasons:
304
+ return True, change_reasons, ""
305
+
306
+ if new_count is not None:
307
+ skip_message = f"Metadata unchanged (rows: {new_count})"
308
+ else:
309
+ skip_message = "Metadata unchanged"
310
+
311
+ return False, [], skip_message
312
+
313
+ def _incremental_modeling(
314
+ self,
315
+ database_structure: Dict[str, Any],
316
+ domain_context: Optional[str] = None,
317
+ strategy: GraphModelingStrategy = GraphModelingStrategy.DETERMINISTIC,
318
+ ) -> "GraphModel":
319
+ """
320
+ Incremental modeling process with table-by-table confirmation.
321
+
322
+ This method processes each table individually, showing the user
323
+ what node will be created and asking for confirmation before
324
+ proceeding to the next table.
325
+ """
326
+ logger.info("Starting incremental modeling session...")
327
+
328
+ # Get the strategy instance
329
+ strategy_instance = self._get_strategy_instance(strategy)
330
+
331
+ # Generate a complete draft model once using the selected strategy
332
+ logger.info(
333
+ "Generating full graph model before incremental review using %s strategy",
334
+ strategy.value,
335
+ )
336
+ full_model = strategy_instance.create_model(database_structure, domain_context)
337
+ self.current_graph_model = full_model
338
+
339
+ # Initialize an empty graph model to build incrementally
340
+ from .models.graph_models import GraphModel
341
+
342
+ incremental_model = GraphModel(
343
+ nodes=[], edges=[], node_indexes=[], node_constraints=[]
344
+ )
345
+
346
+ # Extract tables from database structure
347
+ tables = database_structure.get("tables", {})
348
+ if not tables:
349
+ print("āŒ No tables found in database structure")
350
+ return incremental_model
351
+
352
+ self._print_banner("INCREMENTAL MODELING SESSION")
353
+ print("\n🧠 Generated a draft graph model for the entire database.")
354
+ print(
355
+ "We will now review tables with detected changes so you can "
356
+ "approve or adjust them one by one.\n"
357
+ )
358
+
359
+ table_models = self._build_table_model_map(full_model, tables)
360
+
361
+ processed_tables = []
362
+ skipped_tables = []
363
+ auto_accepted_tables: List[Tuple[str, str]] = []
364
+
365
+ # Process each table individually
366
+ for table_name, table_info in tables.items():
367
+ print("=" * 60)
368
+ print(f"PROCESSING TABLE: {table_name}")
369
+ print("=" * 60)
370
+ try:
371
+ table_model = table_models.get(table_name)
372
+ if table_model is None:
373
+ table_model = GraphModel(
374
+ nodes=[],
375
+ edges=[],
376
+ node_indexes=[],
377
+ node_constraints=[],
378
+ )
379
+
380
+ (
381
+ needs_review,
382
+ change_reasons,
383
+ skip_message,
384
+ ) = self._table_review_decision(table_name, table_model)
385
+
386
+ if not needs_review:
387
+ self._merge_table_model_into_incremental(
388
+ incremental_model, table_model
389
+ )
390
+ processed_tables.append(table_name)
391
+ message = skip_message or "No changes detected"
392
+ print(f"šŸ¤– Auto-accepted {table_name}: {message}")
393
+ auto_accepted_tables.append((table_name, message))
394
+ continue
395
+
396
+ if change_reasons:
397
+ print("āš ļø Changes detected:")
398
+ for reason in change_reasons:
399
+ print(f" - {reason}")
400
+
401
+ self._display_table_details_and_proposals(
402
+ table_name,
403
+ table_info,
404
+ table_model,
405
+ )
406
+
407
+ # Get user decision
408
+ user_choice = self._get_incremental_choice(table_name)
409
+
410
+ if user_choice == "accept":
411
+ # Add this table's nodes to the incremental model
412
+ self._merge_table_model_into_incremental(
413
+ incremental_model, table_model
414
+ )
415
+ processed_tables.append(table_name)
416
+ print(f"āœ… Added {table_name} to the graph model")
417
+ # Refinement is now offered after the full session summary.
418
+ elif user_choice == "skip":
419
+ skipped_tables.append(table_name)
420
+ print(f"ā­ļø Skipped {table_name}")
421
+ elif user_choice == "modify":
422
+ # Allow user to modify the proposed node
423
+ modified_model = self._modify_table_node_interactively(
424
+ table_model, table_name
425
+ )
426
+ self._merge_table_model_into_incremental(
427
+ incremental_model, modified_model
428
+ )
429
+ processed_tables.append(table_name)
430
+ print(f"āœ… Added modified {table_name} to the graph model")
431
+ # Refinement is now offered after the full session summary.
432
+ elif user_choice == "finish":
433
+ print("šŸ Finishing incremental modeling session...")
434
+ break
435
+
436
+ except Exception as e:
437
+ logger.error("Error processing table %s: %s", table_name, e)
438
+ print(f"āŒ Error processing {table_name}: {e}")
439
+ continue
440
+
441
+ # Final summary
442
+ self._merge_source_less_elements(incremental_model, full_model)
443
+ self._print_banner("INCREMENTAL MODELING SUMMARY")
444
+ print(f"āœ… Processed tables: {len(processed_tables)}")
445
+ if processed_tables:
446
+ print(f" {', '.join(processed_tables)}")
447
+
448
+ if auto_accepted_tables:
449
+ print(f"šŸ¤– Auto-accepted tables: {len(auto_accepted_tables)}")
450
+ auto_details = [
451
+ f"{name}{f' ({msg})' if msg else ''}"
452
+ for name, msg in auto_accepted_tables
453
+ ]
454
+ print(f" {', '.join(auto_details)}")
455
+
456
+ if skipped_tables:
457
+ print(f"ā­ļø Skipped tables: {len(skipped_tables)}")
458
+ print(f" {', '.join(skipped_tables)}")
459
+
460
+ print("\nšŸ“Š Final model statistics:")
461
+ print(f" Nodes: {len(incremental_model.nodes)}")
462
+ print(f" Relationships: {len(incremental_model.edges)}")
463
+ print(f" Indexes: {len(incremental_model.node_indexes)}")
464
+ print(f" Constraints: {len(incremental_model.node_constraints)}")
465
+
466
+ review_choice = self._get_user_input_choice(
467
+ "\nWould you like to review and refine the combined model?\n"
468
+ "1. Finish with the incremental result\n"
469
+ "2. Enter the interactive refinement loop\n"
470
+ "\nSelect option (1-2) or press Enter to finish: ",
471
+ {"1": "finish", "2": "review", "": "finish"},
472
+ "finish",
473
+ )
474
+
475
+ if review_choice == "review":
476
+ logger.info(
477
+ "Switching from incremental flow to interactive refinement",
478
+ )
479
+ incremental_model = self._interactive_refinement_loop(
480
+ incremental_model,
481
+ strategy,
482
+ )
483
+
484
+ self.current_graph_model = incremental_model
485
+ return incremental_model
486
+
487
+ def _display_table_details_and_proposals(
488
+ self,
489
+ table_name: str,
490
+ table_info: Dict[str, Any],
491
+ table_model: "GraphModel",
492
+ ) -> None:
493
+ """Display table information and proposed graph elements."""
494
+ print(f"\nšŸ“‹ TABLE: {table_name}")
495
+
496
+ # Show table columns
497
+ columns = table_info.get("schema") or table_info.get("columns", [])
498
+ if columns:
499
+ if isinstance(columns, dict):
500
+ items = columns.items()
501
+ else:
502
+ items = [
503
+ (
504
+ col.get("field") or col.get("name"),
505
+ {
506
+ "type": col.get("type") or col.get("data_type"),
507
+ "null": col.get("null"),
508
+ },
509
+ )
510
+ for col in columns
511
+ ]
512
+
513
+ print(f" Columns ({len(columns)}):")
514
+ for col_name, col_info in items:
515
+ if not col_name:
516
+ continue
517
+ col_type = col_info.get("type", "unknown")
518
+ nullable_flag = col_info.get("null")
519
+ is_nullable = nullable_flag not in {"NO", False, "false", 0}
520
+ nullable = " (nullable)" if is_nullable else ""
521
+ print(f" - {col_name}: {col_type}{nullable}")
522
+
523
+ # Show primary keys
524
+ primary_keys = table_info.get("primary_keys", [])
525
+ if primary_keys:
526
+ print(f" Primary Keys: {', '.join(primary_keys)}")
527
+
528
+ # Show proposed node(s)
529
+ print("\nšŸŽÆ PROPOSED NODE(S):")
530
+ proposed_nodes = table_model.nodes
531
+ if proposed_nodes:
532
+ for i, node in enumerate(proposed_nodes, 1):
533
+ labels = " | ".join(node.labels)
534
+ properties = [p.key for p in node.properties]
535
+ print(f" {i}. Node Labels: {labels}")
536
+ print(f" Properties: {properties}")
537
+ else:
538
+ print(" āŒ No nodes proposed for this table")
539
+
540
+ # Show proposed relationships related to this table
541
+ relationships = table_model.edges
542
+ print("\nšŸ”— PROPOSED RELATIONSHIPS:")
543
+ if relationships:
544
+ for i, relationship in enumerate(relationships, 1):
545
+ start_labels = " | ".join(relationship.start_node_labels)
546
+ end_labels = " | ".join(relationship.end_node_labels)
547
+ props = [p.key for p in relationship.properties]
548
+ print(
549
+ f" {i}. ({start_labels})-[:{relationship.edge_type}]->({end_labels})"
550
+ )
551
+ if props:
552
+ print(f" Properties: {props}")
553
+ else:
554
+ print(" āŒ No relationships proposed for this table")
555
+
556
+ def _get_incremental_choice(self, table_name: str) -> str:
557
+ """Get user choice for incremental modeling."""
558
+ print(f"\nWhat would you like to do with table '{table_name}'?")
559
+ print("1. Accept - Add proposed changes to the graph model")
560
+ print("2. Skip - Skip this table for now")
561
+ print("3. Modify - Modify the proposed graph entities before adding")
562
+ print("4. Finish - Stop incremental modeling and return current model")
563
+
564
+ choices = {
565
+ "1": "accept",
566
+ "2": "skip",
567
+ "3": "modify",
568
+ "4": "finish",
569
+ }
570
+ return self._get_user_input_choice(
571
+ f"\nEnter your choice for {table_name} (1-4): ", choices, "accept"
572
+ )
573
+
574
+ def _merge_table_model_into_incremental(
575
+ self, incremental_model: "GraphModel", table_model: "GraphModel"
576
+ ) -> None:
577
+ """Merge a single table's model into the incremental model."""
578
+ # Add nodes (avoid duplicates based on labels)
579
+ existing_node_labels = {
580
+ tuple(sorted(node.labels)) for node in incremental_model.nodes
581
+ }
582
+
583
+ for node in table_model.nodes:
584
+ node_labels_tuple = tuple(sorted(node.labels))
585
+ if node_labels_tuple not in existing_node_labels:
586
+ incremental_model.nodes.append(node)
587
+ existing_node_labels.add(node_labels_tuple)
588
+
589
+ # Add edges (avoid duplicates)
590
+ existing_edges = {
591
+ (
592
+ edge.edge_type,
593
+ tuple(edge.start_node_labels),
594
+ tuple(edge.end_node_labels),
595
+ )
596
+ for edge in incremental_model.edges
597
+ }
598
+
599
+ for edge in table_model.edges:
600
+ edge_key = (
601
+ edge.edge_type,
602
+ tuple(edge.start_node_labels),
603
+ tuple(edge.end_node_labels),
604
+ )
605
+ if edge_key not in existing_edges:
606
+ incremental_model.edges.append(edge)
607
+ existing_edges.add(edge_key)
608
+
609
+ # Add indexes
610
+ incremental_model.node_indexes.extend(table_model.node_indexes)
611
+
612
+ # Add constraints
613
+ incremental_model.node_constraints.extend(table_model.node_constraints)
614
+
615
+ def _build_table_model_map(
616
+ self,
617
+ full_model: "GraphModel",
618
+ tables: Dict[str, Any],
619
+ ) -> Dict[str, "GraphModel"]:
620
+ """Build lightweight sub-models per table from the complete model."""
621
+ from .models.graph_models import GraphModel
622
+
623
+ table_models: Dict[str, GraphModel] = {}
624
+ for table_name in tables.keys():
625
+ table_nodes = [
626
+ copy.deepcopy(node)
627
+ for node in full_model.nodes
628
+ if self._node_matches_table(node, table_name)
629
+ ]
630
+
631
+ table_edges = [
632
+ copy.deepcopy(edge)
633
+ for edge in full_model.edges
634
+ if self._relationship_matches_table(edge, table_name)
635
+ ]
636
+
637
+ node_label_keys = {tuple(sorted(node.labels)) for node in table_nodes}
638
+
639
+ table_indexes = [
640
+ copy.deepcopy(index)
641
+ for index in full_model.node_indexes
642
+ if index.labels and tuple(sorted(index.labels)) in node_label_keys
643
+ ]
644
+
645
+ table_constraints = [
646
+ copy.deepcopy(constraint)
647
+ for constraint in full_model.node_constraints
648
+ if constraint.labels
649
+ and tuple(sorted(constraint.labels)) in node_label_keys
650
+ ]
651
+
652
+ table_models[table_name] = GraphModel(
653
+ nodes=table_nodes,
654
+ edges=table_edges,
655
+ node_indexes=table_indexes,
656
+ node_constraints=table_constraints,
657
+ )
658
+
659
+ return table_models
660
+
661
+ def _node_matches_table(self, node: "GraphNode", table_name: str) -> bool:
662
+ """Return True if a node traces back to the specified table."""
663
+ source = getattr(node, "source", None)
664
+ if not source:
665
+ return False
666
+
667
+ source_name = getattr(source, "name", None)
668
+ if isinstance(source_name, str) and source_name.lower() == table_name.lower():
669
+ return True
670
+
671
+ mapping = getattr(source, "mapping", {}) or {}
672
+ for key in ("table", "source", "source_table", "primary_table"):
673
+ value = mapping.get(key)
674
+ if isinstance(value, str) and value.lower() == table_name.lower():
675
+ return True
676
+
677
+ for value in mapping.values():
678
+ if isinstance(value, str) and value.lower() == table_name.lower():
679
+ return True
680
+
681
+ labels = getattr(node, "labels", []) or []
682
+ for label in labels:
683
+ if isinstance(label, str) and table_name.lower() in label.lower():
684
+ return True
685
+
686
+ return False
687
+
688
+ def _relationship_matches_table(
689
+ self, relationship: "GraphRelationship", table_name: str
690
+ ) -> bool:
691
+ """Return True if a relationship is associated with the table."""
692
+ source = getattr(relationship, "source", None)
693
+ mapping = getattr(source, "mapping", {}) or {}
694
+
695
+ if source:
696
+ source_name = getattr(source, "name", None)
697
+ if (
698
+ isinstance(source_name, str)
699
+ and source_name.lower() == table_name.lower()
700
+ ):
701
+ return True
702
+
703
+ for key in (
704
+ "from_table",
705
+ "to_table",
706
+ "join_table",
707
+ "through_table",
708
+ "source",
709
+ ):
710
+ value = mapping.get(key)
711
+ if isinstance(value, str) and value.lower() == table_name.lower():
712
+ return True
713
+
714
+ for value in mapping.values():
715
+ if isinstance(value, str) and value.lower() == table_name.lower():
716
+ return True
717
+
718
+ # Fallback to matching on label names when mapping metadata is unavailable
719
+ combined_labels = list(relationship.start_node_labels) + list(
720
+ relationship.end_node_labels
721
+ )
722
+ for label in combined_labels:
723
+ if isinstance(label, str) and table_name.lower() in label.lower():
724
+ return True
725
+
726
+ return False
727
+
728
+ def _merge_source_less_elements(
729
+ self,
730
+ incremental_model: "GraphModel",
731
+ full_model: "GraphModel",
732
+ ) -> None:
733
+ """Add global elements that don't belong to a specific table."""
734
+ if not full_model:
735
+ return
736
+
737
+ from .models.graph_models import GraphModel
738
+
739
+ anonymous_nodes = [
740
+ copy.deepcopy(node)
741
+ for node in full_model.nodes
742
+ if getattr(node, "source", None) is None
743
+ ]
744
+ anonymous_edges = [
745
+ copy.deepcopy(edge)
746
+ for edge in full_model.edges
747
+ if getattr(edge, "source", None) is None
748
+ ]
749
+
750
+ if not anonymous_nodes and not anonymous_edges:
751
+ return
752
+
753
+ placeholder_model = GraphModel(
754
+ nodes=anonymous_nodes,
755
+ edges=anonymous_edges,
756
+ node_indexes=[],
757
+ node_constraints=[],
758
+ )
759
+
760
+ self._merge_table_model_into_incremental(incremental_model, placeholder_model)
761
+
762
+ def _modify_table_node_interactively(
763
+ self, table_model: "GraphModel", table_name: str
764
+ ) -> "GraphModel":
765
+ """Allow user to modify the proposed node for a table."""
766
+ print(f"\nšŸ”§ MODIFYING NODE FOR TABLE: {table_name}")
767
+ print("You can use natural language to modify the proposed node.")
768
+ print("Examples:")
769
+ print(" - Change the label from 'User' to 'Person'")
770
+ print(" - Remove the 'email' property")
771
+ print(" - Add a 'full_name' property")
772
+
773
+ if not self.llm:
774
+ print("āŒ LLM not available for natural language modifications.")
775
+ print("Returning original model.")
776
+ return table_model
777
+
778
+ while True:
779
+ try:
780
+ user_input = input(
781
+ f"\nDescribe changes for {table_name} " f"(or 'done' to finish): "
782
+ ).strip()
783
+
784
+ if user_input.lower() == "done":
785
+ break
786
+ elif not user_input:
787
+ print("Please describe the change you'd like to make.")
788
+ continue
789
+
790
+ # Use existing natural language parsing
791
+ operations = self._parse_natural_language_to_operations(
792
+ user_input, table_model
793
+ )
794
+
795
+ if operations:
796
+ print(f"āœ… Understood: {operations.reasoning}")
797
+ table_model = self._apply_operations_to_model(
798
+ table_model, operations
799
+ )
800
+
801
+ # Show updated model
802
+ print(f"\nšŸ“‹ UPDATED NODE FOR {table_name}:")
803
+ for node in table_model.nodes:
804
+ labels = " | ".join(node.labels)
805
+ properties = [p.key for p in node.properties]
806
+ print(f" Labels: {labels}")
807
+ print(f" Properties: {properties}")
808
+ else:
809
+ print("āŒ I didn't understand that command. " "Please try again.")
810
+
811
+ except (EOFError, KeyboardInterrupt):
812
+ print(f"\nFinished modifying {table_name}")
813
+ break
814
+
815
+ return table_model
816
+
817
+ def _print_banner(self, title: str, width: int = 60) -> None:
818
+ """Print a formatted banner with title."""
819
+ print("\n" + "=" * width)
820
+ print(title)
821
+ print("=" * width)
822
+
823
+ def _display_current_model(self, model: "GraphModel") -> None:
824
+ """Display the current graph model to the user."""
825
+ self._print_banner(f"GRAPH MODEL - ITERATION {self.iteration_count}")
826
+
827
+ print(f"\nNODES ({len(model.nodes)}):")
828
+ for i, node in enumerate(model.nodes, 1):
829
+ print(f" {i}. {' | '.join(node.labels)}")
830
+ print(f" Properties: {[p.key for p in node.properties]}")
831
+
832
+ print(f"\nRELATIONSHIPS ({len(model.edges)}):")
833
+ for i, edge in enumerate(model.edges, 1):
834
+ start_labels = " | ".join(edge.start_node_labels)
835
+ end_labels = " | ".join(edge.end_node_labels)
836
+ edge_display = f"({start_labels})-[:{edge.edge_type}]->({end_labels})"
837
+ print(f" {i}. {edge_display}")
838
+
839
+ print(f"\nINDEXES ({len(model.node_indexes)}):")
840
+ for i, index in enumerate(model.node_indexes, 1):
841
+ labels = " | ".join(index.labels or [])
842
+ props = ", ".join(index.properties)
843
+ print(f" {i}. {labels}.{props}")
844
+
845
+ print(f"\nCONSTRAINTS ({len(model.node_constraints)}):")
846
+ for i, constraint in enumerate(model.node_constraints, 1):
847
+ labels = " | ".join(constraint.labels or [])
848
+ props = ", ".join(constraint.properties)
849
+ print(f" {i}. {constraint.type.upper()}: {labels}.{props}")
850
+
851
+ def _get_user_input_choice(
852
+ self,
853
+ prompt: str,
854
+ choices: Dict[str, str],
855
+ default_action: str = "accept",
856
+ ) -> str:
857
+ """Get validated user input from multiple choices.
858
+
859
+ Args:
860
+ prompt: The prompt to display to user
861
+ choices: Dict mapping choice keys to return values
862
+ default_action: Action to return on EOF/interrupt
863
+
864
+ Returns:
865
+ The selected choice value
866
+ """
867
+ while True:
868
+ try:
869
+ choice = input(prompt).strip()
870
+ if choice in choices:
871
+ return choices[choice]
872
+ else:
873
+ valid_choices = ", ".join(sorted(choices.keys()))
874
+ print(f"Invalid choice. Please enter {valid_choices}.")
875
+ except (EOFError, KeyboardInterrupt):
876
+ print(f"\nDefaulting to {default_action}...")
877
+ return default_action
878
+
879
+ def _get_user_choice(self) -> str:
880
+ """Get user choice for next action."""
881
+ print("\nWhat would you like to do?")
882
+ print("1. Accept this model")
883
+ print("2. Modify the model")
884
+ print("3. Regenerate model (same strategy)")
885
+ print("4. Switch modeling strategy")
886
+
887
+ choices = {
888
+ "1": "accept",
889
+ "2": "modify",
890
+ "3": "regenerate",
891
+ "4": "switch_strategy",
892
+ }
893
+ return self._get_user_input_choice(
894
+ "\nEnter your choice (1-4): ", choices, "accept"
895
+ )
896
+
897
+ def _get_refinement_choice(self) -> str:
898
+ """Get user choice while refining the combined graph model."""
899
+ print("\nWhat would you like to do with the combined graph model?")
900
+ print("1. Accept and continue")
901
+ print("2. Modify the model using natural language commands")
902
+ print("3. Run graph schema validation")
903
+
904
+ choices = {
905
+ "1": "accept",
906
+ "2": "modify",
907
+ "3": "validate",
908
+ }
909
+ return self._get_user_input_choice(
910
+ "\nEnter your choice (1-3): ", choices, "accept"
911
+ )
912
+
913
+ def _switch_strategy(
914
+ self, current_strategy: GraphModelingStrategy
915
+ ) -> GraphModelingStrategy:
916
+ """Allow user to switch between modeling strategies."""
917
+ print("\nAvailable strategies:")
918
+ print("1. Deterministic (rule-based)")
919
+ print("2. LLM-powered (AI-generated)")
920
+
921
+ if current_strategy == GraphModelingStrategy.DETERMINISTIC:
922
+ current_name = "Deterministic"
923
+ else:
924
+ current_name = "LLM-powered"
925
+ print(f"\nCurrent strategy: {current_name}")
926
+
927
+ while True:
928
+ try:
929
+ choice = input("Choose new strategy (1-2): ").strip()
930
+ if choice == "1":
931
+ return GraphModelingStrategy.DETERMINISTIC
932
+ elif choice == "2":
933
+ if self.llm is None:
934
+ print("LLM not available. Please choose option 1.")
935
+ continue
936
+ return GraphModelingStrategy.LLM_POWERED
937
+ else:
938
+ print("Invalid choice. Please enter 1 or 2.")
939
+ except (EOFError, KeyboardInterrupt):
940
+ print(f"\nKeeping current strategy: {current_name}")
941
+ return current_strategy
942
+
943
+ def _modify_model_interactively(
944
+ self, model: "GraphModel", strategy: GraphModelingStrategy
945
+ ) -> "GraphModel":
946
+ """Allow user to modify the model using natural language commands."""
947
+ while True:
948
+ try:
949
+ print("\n" + "=" * 60)
950
+ print("INTERACTIVE MODEL MODIFICATION")
951
+ print("=" * 60)
952
+ print("\nDescribe the changes you'd like to make to the graph model.")
953
+ print("You can use natural language like:")
954
+ print(" - Change the label...")
955
+ print(" - Delete a relationship...")
956
+ print(" - Add a new property...")
957
+ print(" - Remove a property...")
958
+
959
+ print("\nType 'done' when finished, 'cancel' to return unchanged.")
960
+ user_input = input("Describe your change: ").strip()
961
+
962
+ if user_input.lower() == "done":
963
+ print("Applying changes to model...")
964
+ break
965
+ elif user_input.lower() == "cancel":
966
+ print("Cancelling changes...")
967
+ return model
968
+ elif not user_input:
969
+ print("Please describe the change you'd like to make.")
970
+ continue
971
+
972
+ # Use LLM to parse natural language into operations
973
+ if self.llm:
974
+ print("šŸ¤– Processing your request... " "(this may take a moment)")
975
+ operations = self._parse_natural_language_to_operations(
976
+ user_input, model
977
+ )
978
+ if operations:
979
+ print(f"āœ… Understood: {operations.reasoning}")
980
+
981
+ # Initialize user operation history if not exists
982
+ if not self.user_operation_history:
983
+ self.user_operation_history = UserOperationHistory(
984
+ self.session_id
985
+ )
986
+
987
+ # Track user operations before applying them
988
+ for operation in operations.operations:
989
+ self.user_operation_history.add_operation(operation)
990
+
991
+ # Apply operations to model
992
+ model = self._apply_operations_to_model(model, operations)
993
+ print("Changes applied!")
994
+
995
+ # Show the updated model after changes
996
+ print("\nApplying changes to model...")
997
+ self.iteration_count += 1
998
+ self._display_current_model(model)
999
+
1000
+ # Perform validation after applying changes
1001
+ model = self._perform_post_operation_validation(
1002
+ model, strategy, operations
1003
+ )
1004
+ else:
1005
+ print(
1006
+ "āŒ I didn't understand that command. " "Please try again."
1007
+ )
1008
+ else:
1009
+ print("āŒ LLM not available for natural language processing.")
1010
+ print("Please use the basic modification menu instead.")
1011
+ break
1012
+
1013
+ except (EOFError, KeyboardInterrupt):
1014
+ print("\nCancelling changes...")
1015
+ return model
1016
+
1017
+ return model
1018
+
1019
+ def _parse_natural_language_to_operations(
1020
+ self, user_input: str, model: "GraphModel"
1021
+ ) -> Optional["ModelModifications"]:
1022
+ """Parse natural language input into structured operations."""
1023
+ if not self.llm:
1024
+ return None
1025
+
1026
+ # Get current model structure for context
1027
+ model_context = self._get_model_context_for_llm(model)
1028
+
1029
+ system_prompt = (
1030
+ "You are an expert at translating natural language instructions "
1031
+ "into structured graph model operations.\n\n"
1032
+ f"Current graph model structure:\n{model_context}\n\n"
1033
+ "Available operations:\n"
1034
+ "- change_node_label: Change a node's label\n"
1035
+ "- rename_property: Rename a property on a node\n"
1036
+ "- drop_property: Remove a property from a node\n"
1037
+ "- add_property: Add a new property to a node\n"
1038
+ "- change_relationship_name: Change a relationship name\n"
1039
+ "- drop_relationship: Remove a relationship\n"
1040
+ "- add_node: Add a new node type with specified properties\n"
1041
+ "- drop_node: Remove a node type (and related relationships)\n"
1042
+ "- add_relationship: Add a new relationship between nodes\n"
1043
+ "- add_index: Add an index on a property\n"
1044
+ "- drop_index: Remove an index\n"
1045
+ "- add_constraint: Add a constraint "
1046
+ "(unique, existence, data_type)\n"
1047
+ "- drop_constraint: Remove a constraint\n\n"
1048
+ "IMPORTANT: When user says 'all' (e.g., 'drop all unique constraints'), "
1049
+ "you must identify ALL matching items from the current model context "
1050
+ "and create operations for each one."
1051
+ "Parse the user's request into appropriate operations. "
1052
+ "Return a ModelModifications object with the operations and "
1053
+ "reasoning."
1054
+ )
1055
+
1056
+ try:
1057
+ from langchain_core.output_parsers import PydanticOutputParser
1058
+
1059
+ # Import at runtime to avoid circular imports
1060
+ try:
1061
+ from .models.operations import ModelModifications
1062
+ except ImportError:
1063
+ from core.hygm.models.operations import ModelModifications
1064
+
1065
+ parser = PydanticOutputParser(pydantic_object=ModelModifications)
1066
+
1067
+ prompt = f"""
1068
+ User request: {user_input}
1069
+
1070
+ {parser.get_format_instructions()}
1071
+ """
1072
+
1073
+ response = self.llm.invoke(
1074
+ [
1075
+ {"role": "system", "content": system_prompt},
1076
+ {"role": "user", "content": prompt},
1077
+ ]
1078
+ )
1079
+
1080
+ # Parse the response
1081
+ operations = parser.parse(response.content)
1082
+ return operations
1083
+
1084
+ except (ImportError, ValueError, AttributeError) as e:
1085
+ logger.error("Error parsing natural language: %s", e)
1086
+ return None
1087
+ except Exception as e: # noqa: BLE001 - Catch-all for LLM errors
1088
+ logger.error("Unexpected error in natural language parsing: %s", e)
1089
+ return None
1090
+
1091
+ def _get_model_context_for_llm(self, model: "GraphModel") -> str:
1092
+ """Get a text description of the current model for LLM context."""
1093
+ context_parts = []
1094
+
1095
+ # Nodes
1096
+ context_parts.append("NODES:")
1097
+ for node in model.nodes:
1098
+ props = [p.key for p in node.properties]
1099
+ context_parts.append(f" - {node.primary_label}: {props}")
1100
+
1101
+ # Relationships
1102
+ context_parts.append("\nRELATIONSHIPS:")
1103
+ for edge in model.edges:
1104
+ start = " | ".join(edge.start_node_labels)
1105
+ end = " | ".join(edge.end_node_labels)
1106
+ context_parts.append(f" - ({start})-[:{edge.edge_type}]->({end})")
1107
+
1108
+ # Indexes
1109
+ if model.node_indexes:
1110
+ context_parts.append("\nINDEXES:")
1111
+ for index in model.node_indexes:
1112
+ labels = " | ".join(index.labels or [])
1113
+ props = ", ".join(index.properties)
1114
+ context_parts.append(f" - {labels}.{props}")
1115
+
1116
+ # Constraints
1117
+ if model.node_constraints:
1118
+ context_parts.append("\nCONSTRAINTS:")
1119
+ for constraint in model.node_constraints:
1120
+ labels = " | ".join(constraint.labels or [])
1121
+ props = ", ".join(constraint.properties)
1122
+ constraint_desc = f"{constraint.type.upper()}: {labels}.{props}"
1123
+ context_parts.append(f" - {constraint_desc}")
1124
+
1125
+ return "\n".join(context_parts)
1126
+
1127
+ def _apply_operations_to_model(
1128
+ self, model: "GraphModel", operations: "ModelModifications"
1129
+ ) -> "GraphModel":
1130
+ """Apply structured operations to the graph model."""
1131
+ # Create a deep copy of the model to modify
1132
+ modified_model = copy.deepcopy(model)
1133
+
1134
+ print(f"\nApplying {len(operations.operations)} operations:")
1135
+
1136
+ for op in operations.operations:
1137
+ if isinstance(op, type(op)) and hasattr(op, "operation_type"):
1138
+ if op.operation_type == "change_node_label":
1139
+ print(f" - Change node label: {op.old_label} → {op.new_label}")
1140
+ modified_model = self._apply_change_node_label(
1141
+ modified_model, op.old_label, op.new_label
1142
+ )
1143
+ elif op.operation_type == "rename_property":
1144
+ print(
1145
+ f" - Rename property on {op.node_label}: "
1146
+ f"{op.old_property} → {op.new_property}"
1147
+ )
1148
+ modified_model = self._apply_rename_property(
1149
+ modified_model,
1150
+ op.node_label,
1151
+ op.old_property,
1152
+ op.new_property,
1153
+ )
1154
+ elif op.operation_type == "drop_property":
1155
+ print(f" - Drop property: {op.node_label}.{op.property_name}")
1156
+ modified_model = self._apply_drop_property(
1157
+ modified_model, op.node_label, op.property_name
1158
+ )
1159
+ elif op.operation_type == "add_property":
1160
+ print(f" - Add property: {op.node_label}.{op.property_name}")
1161
+ modified_model = self._apply_add_property(
1162
+ modified_model, op.node_label, op.property_name
1163
+ )
1164
+ elif op.operation_type == "change_relationship_name":
1165
+ print(f" - Change relationship: {op.old_name} → {op.new_name}")
1166
+ modified_model = self._apply_change_relationship_name(
1167
+ modified_model, op.old_name, op.new_name
1168
+ )
1169
+ elif op.operation_type == "drop_relationship":
1170
+ print(f" - Drop relationship: {op.relationship_name}")
1171
+ modified_model = self._apply_drop_relationship(
1172
+ modified_model, op.relationship_name
1173
+ )
1174
+ elif op.operation_type == "add_index":
1175
+ print(f" - Add index: {op.node_label}.{op.property_name}")
1176
+ modified_model = self._apply_add_index(
1177
+ modified_model, op.node_label, op.property_name
1178
+ )
1179
+ elif op.operation_type == "drop_index":
1180
+ print(f" - Drop index: {op.node_label}.{op.property_name}")
1181
+ modified_model = self._apply_drop_index(
1182
+ modified_model, op.node_label, op.property_name
1183
+ )
1184
+ elif op.operation_type == "add_constraint":
1185
+ constraint_desc = f"{op.constraint_type.upper()}"
1186
+ if op.constraint_type == "data_type" and op.data_type:
1187
+ constraint_desc += f" ({op.data_type})"
1188
+ print(
1189
+ f" - Add constraint: {constraint_desc} on "
1190
+ f"{op.node_label}.{op.property_name}"
1191
+ )
1192
+ modified_model = self._apply_add_constraint(
1193
+ modified_model,
1194
+ op.node_label,
1195
+ op.property_name,
1196
+ op.constraint_type,
1197
+ op.data_type,
1198
+ )
1199
+ elif op.operation_type == "drop_constraint":
1200
+ constraint_desc = f"{op.constraint_type.upper()}"
1201
+ print(
1202
+ f" - Drop constraint: {constraint_desc} on "
1203
+ f"{op.node_label}.{op.property_name}"
1204
+ )
1205
+ modified_model = self._apply_drop_constraint(
1206
+ modified_model,
1207
+ op.node_label,
1208
+ op.property_name,
1209
+ op.constraint_type,
1210
+ )
1211
+ elif op.operation_type == "add_node":
1212
+ print(f" - Add node: {op.node_label}")
1213
+ modified_model = self._apply_add_node(
1214
+ modified_model,
1215
+ op.node_label,
1216
+ op.properties,
1217
+ op.source_table,
1218
+ )
1219
+ elif op.operation_type == "drop_node":
1220
+ print(f" - Drop node: {op.node_label}")
1221
+ modified_model = self._apply_drop_node(
1222
+ modified_model, op.node_label
1223
+ )
1224
+ elif op.operation_type == "add_relationship":
1225
+ print(
1226
+ f" - Add relationship: ({op.start_node_label})"
1227
+ f"-[:{op.relationship_name}]->({op.end_node_label})"
1228
+ )
1229
+ modified_model = self._apply_add_relationship(
1230
+ modified_model,
1231
+ op.relationship_name,
1232
+ op.start_node_label,
1233
+ op.end_node_label,
1234
+ op.properties,
1235
+ )
1236
+
1237
+ return modified_model
1238
+
1239
+ def _perform_manual_validation(
1240
+ self,
1241
+ model: "GraphModel",
1242
+ strategy: GraphModelingStrategy,
1243
+ ) -> "GraphModel":
1244
+ """Run validation on demand without additional user operations."""
1245
+ try:
1246
+ from .models.operations import ModelModifications
1247
+ except ImportError:
1248
+ from core.hygm.models.operations import ModelModifications
1249
+
1250
+ dummy_operations = ModelModifications(
1251
+ operations=[],
1252
+ reasoning="Interactive refinement validation request",
1253
+ )
1254
+
1255
+ return self._validate_and_improve_model(
1256
+ model=model,
1257
+ strategy=strategy,
1258
+ operations=dummy_operations,
1259
+ database_structure=self.database_structure or {},
1260
+ mode="interactive",
1261
+ )
1262
+
1263
+ def _apply_change_node_label(
1264
+ self, model: "GraphModel", old_label: str, new_label: str
1265
+ ) -> "GraphModel":
1266
+ """Apply change node label operation."""
1267
+ for node in model.nodes:
1268
+ if old_label in node.labels:
1269
+ # Update labels
1270
+ node.labels = [
1271
+ new_label if label == old_label else label for label in node.labels
1272
+ ]
1273
+ # Update source mapping if it exists
1274
+ if node.source and "labels" in node.source.mapping:
1275
+ node.source.mapping["labels"] = [
1276
+ new_label if label == old_label else label
1277
+ for label in node.source.mapping["labels"]
1278
+ ]
1279
+
1280
+ # Update relationships that reference this label
1281
+ for edge in model.edges:
1282
+ edge.start_node_labels = [
1283
+ new_label if label == old_label else label
1284
+ for label in edge.start_node_labels
1285
+ ]
1286
+ edge.end_node_labels = [
1287
+ new_label if label == old_label else label
1288
+ for label in edge.end_node_labels
1289
+ ]
1290
+
1291
+ # Update indexes
1292
+ for index in model.node_indexes:
1293
+ if index.labels:
1294
+ index.labels = [
1295
+ new_label if label == old_label else label for label in index.labels
1296
+ ]
1297
+
1298
+ # Update constraints
1299
+ for constraint in model.node_constraints:
1300
+ if constraint.labels:
1301
+ constraint.labels = [
1302
+ new_label if label == old_label else label
1303
+ for label in constraint.labels
1304
+ ]
1305
+
1306
+ return model
1307
+
1308
+ def _apply_rename_property(
1309
+ self,
1310
+ model: "GraphModel",
1311
+ node_label: str,
1312
+ old_property: str,
1313
+ new_property: str,
1314
+ ) -> "GraphModel":
1315
+ """Apply rename property operation."""
1316
+ for node in model.nodes:
1317
+ if node_label in node.labels:
1318
+ for prop in node.properties:
1319
+ if prop.key == old_property:
1320
+ prop.key = new_property
1321
+ # Update source field reference
1322
+ if prop.source:
1323
+ old_field = prop.source.field
1324
+ table_name = old_field.split(".")[0]
1325
+ prop.source.field = f"{table_name}.{new_property}"
1326
+ return model
1327
+
1328
+ def _apply_drop_property(
1329
+ self, model: "GraphModel", node_label: str, property_name: str
1330
+ ) -> "GraphModel":
1331
+ """Apply drop property operation."""
1332
+ for node in model.nodes:
1333
+ if node_label in node.labels:
1334
+ node.properties = [
1335
+ prop for prop in node.properties if prop.key != property_name
1336
+ ]
1337
+ return model
1338
+
1339
+ def _apply_add_property(
1340
+ self, model: "GraphModel", node_label: str, property_name: str
1341
+ ) -> "GraphModel":
1342
+ """Apply add property operation."""
1343
+ from .models.graph_models import GraphProperty
1344
+ from .models.sources import PropertySource
1345
+
1346
+ for node in model.nodes:
1347
+ if node_label in node.labels:
1348
+ # Check if property already exists
1349
+ existing_props = [prop.key for prop in node.properties]
1350
+ if property_name not in existing_props:
1351
+ # Create new property with basic source tracking
1352
+ table_name = node.source.name if node.source else "unknown"
1353
+ prop_source = PropertySource(field=f"{table_name}.{property_name}")
1354
+ new_prop = GraphProperty(key=property_name, source=prop_source)
1355
+ node.properties.append(new_prop)
1356
+ return model
1357
+
1358
+ def _apply_change_relationship_name(
1359
+ self, model: "GraphModel", old_name: str, new_name: str
1360
+ ) -> "GraphModel":
1361
+ """Apply change relationship name operation."""
1362
+ for edge in model.edges:
1363
+ if edge.edge_type == old_name:
1364
+ edge.edge_type = new_name
1365
+ # Update source mapping if it exists
1366
+ if edge.source and "edge_type" in edge.source.mapping:
1367
+ edge.source.mapping["edge_type"] = new_name
1368
+ return model
1369
+
1370
+ def _apply_drop_relationship(
1371
+ self, model: "GraphModel", relationship_name: str
1372
+ ) -> "GraphModel":
1373
+ """Apply drop relationship operation."""
1374
+ model.edges = [
1375
+ edge for edge in model.edges if edge.edge_type != relationship_name
1376
+ ]
1377
+ return model
1378
+
1379
+ def _apply_add_index(
1380
+ self, model: "GraphModel", node_label: str, property_name: str
1381
+ ) -> "GraphModel":
1382
+ """Apply add index operation."""
1383
+ from .models.graph_models import GraphIndex
1384
+ from .models.sources import IndexSource
1385
+
1386
+ # Check if index already exists
1387
+ for index in model.node_indexes:
1388
+ if (
1389
+ index.labels
1390
+ and node_label in index.labels
1391
+ and property_name in index.properties
1392
+ ):
1393
+ return model # Index already exists
1394
+
1395
+ # Create new index
1396
+ index_source = IndexSource(
1397
+ origin="user_request",
1398
+ reason="performance_optimization",
1399
+ created_by="interactive_modification",
1400
+ )
1401
+ new_index = GraphIndex(
1402
+ labels=[node_label],
1403
+ properties=[property_name],
1404
+ type="label+property",
1405
+ source=index_source,
1406
+ )
1407
+ model.node_indexes.append(new_index)
1408
+ return model
1409
+
1410
+ def _apply_drop_index(
1411
+ self, model: "GraphModel", node_label: str, property_name: str
1412
+ ) -> "GraphModel":
1413
+ """Apply drop index operation."""
1414
+ model.node_indexes = [
1415
+ index
1416
+ for index in model.node_indexes
1417
+ if not (
1418
+ index.labels
1419
+ and node_label in index.labels
1420
+ and property_name in index.properties
1421
+ )
1422
+ ]
1423
+ return model
1424
+
1425
+ def _apply_add_constraint(
1426
+ self,
1427
+ model: "GraphModel",
1428
+ node_label: str,
1429
+ property_name: str,
1430
+ constraint_type: str,
1431
+ data_type: str = "",
1432
+ ) -> "GraphModel":
1433
+ """Apply add constraint operation."""
1434
+ from .models.graph_models import GraphConstraint
1435
+ from .models.sources import ConstraintSource
1436
+
1437
+ # Check if constraint already exists
1438
+ for constraint in model.node_constraints:
1439
+ if (
1440
+ constraint.labels
1441
+ and node_label in constraint.labels
1442
+ and property_name in constraint.properties
1443
+ and constraint.type == constraint_type
1444
+ ):
1445
+ return model # Constraint already exists
1446
+
1447
+ # Create new constraint
1448
+ constraint_source = ConstraintSource(
1449
+ origin="user_request",
1450
+ reason="data_integrity",
1451
+ created_by="interactive_modification",
1452
+ )
1453
+ new_constraint = GraphConstraint(
1454
+ type=constraint_type,
1455
+ labels=[node_label],
1456
+ properties=[property_name],
1457
+ data_type=data_type if constraint_type == "data_type" else None,
1458
+ source=constraint_source,
1459
+ )
1460
+ model.node_constraints.append(new_constraint)
1461
+ return model
1462
+
1463
+ def _apply_drop_constraint(
1464
+ self,
1465
+ model: "GraphModel",
1466
+ node_label: str,
1467
+ property_name: str,
1468
+ constraint_type: str,
1469
+ ) -> "GraphModel":
1470
+ """Apply drop constraint operation."""
1471
+ model.node_constraints = [
1472
+ constraint
1473
+ for constraint in model.node_constraints
1474
+ if not (
1475
+ constraint.labels
1476
+ and node_label in constraint.labels
1477
+ and property_name in constraint.properties
1478
+ and constraint.type == constraint_type
1479
+ )
1480
+ ]
1481
+ return model
1482
+
1483
+ def _apply_add_node(
1484
+ self,
1485
+ model: "GraphModel",
1486
+ node_label: str,
1487
+ properties: List[str],
1488
+ source_table: str = "",
1489
+ ) -> "GraphModel":
1490
+ """Apply add node operation."""
1491
+ from .models.graph_models import GraphNode, GraphProperty
1492
+ from .models.sources import PropertySource, NodeSource
1493
+
1494
+ # Check if node already exists
1495
+ for node in model.nodes:
1496
+ if node_label in node.labels:
1497
+ print(f" āš ļø Node {node_label} already exists, skipping")
1498
+ return model
1499
+
1500
+ # Create node source metadata
1501
+ source_name = source_table or node_label.lower()
1502
+ node_source = NodeSource(
1503
+ type="manual",
1504
+ name=source_name,
1505
+ location=source_name,
1506
+ mapping={
1507
+ "labels": [node_label],
1508
+ "properties": properties,
1509
+ "created_by": "interactive_modification",
1510
+ },
1511
+ )
1512
+
1513
+ # Create properties
1514
+ node_properties = []
1515
+ for prop_name in properties:
1516
+ prop_source = PropertySource(
1517
+ field=f"{source_table or node_label.lower()}.{prop_name}"
1518
+ )
1519
+ node_properties.append(GraphProperty(key=prop_name, source=prop_source))
1520
+
1521
+ # Create new node
1522
+ new_node = GraphNode(
1523
+ labels=[node_label],
1524
+ properties=node_properties,
1525
+ source=node_source,
1526
+ )
1527
+ model.nodes.append(new_node)
1528
+ return model
1529
+
1530
+ def _apply_drop_node(self, model: "GraphModel", node_label: str) -> "GraphModel":
1531
+ """Apply drop node operation."""
1532
+ # Remove the node
1533
+ model.nodes = [node for node in model.nodes if node_label not in node.labels]
1534
+
1535
+ # Remove relationships involving this node
1536
+ model.edges = [
1537
+ edge
1538
+ for edge in model.edges
1539
+ if (
1540
+ node_label not in edge.start_node_labels
1541
+ and node_label not in edge.end_node_labels
1542
+ )
1543
+ ]
1544
+
1545
+ # Remove indexes for this node
1546
+ model.node_indexes = [
1547
+ index
1548
+ for index in model.node_indexes
1549
+ if not (index.labels and node_label in index.labels)
1550
+ ]
1551
+
1552
+ # Remove constraints for this node
1553
+ model.node_constraints = [
1554
+ constraint
1555
+ for constraint in model.node_constraints
1556
+ if not (constraint.labels and node_label in constraint.labels)
1557
+ ]
1558
+
1559
+ return model
1560
+
1561
+ def _apply_add_relationship(
1562
+ self,
1563
+ model: "GraphModel",
1564
+ relationship_name: str,
1565
+ start_node_label: str,
1566
+ end_node_label: str,
1567
+ properties: List[str],
1568
+ ) -> "GraphModel":
1569
+ """Apply add relationship operation."""
1570
+ from .models.graph_models import GraphRelationship, GraphProperty
1571
+ from .models.sources import PropertySource, RelationshipSource
1572
+
1573
+ # Check if relationship already exists
1574
+ for edge in model.edges:
1575
+ if (
1576
+ edge.edge_type == relationship_name
1577
+ and start_node_label in edge.start_node_labels
1578
+ and end_node_label in edge.end_node_labels
1579
+ ):
1580
+ print(
1581
+ f" āš ļø Relationship {relationship_name} already exists "
1582
+ f"between {start_node_label} and {end_node_label}, "
1583
+ "skipping"
1584
+ )
1585
+ return model
1586
+
1587
+ # Create edge source metadata
1588
+ edge_source = RelationshipSource(
1589
+ type="manual",
1590
+ name=relationship_name,
1591
+ location=relationship_name.lower(),
1592
+ mapping={
1593
+ "start_node": start_node_label,
1594
+ "end_node": end_node_label,
1595
+ "properties": properties,
1596
+ "created_by": "interactive_modification",
1597
+ },
1598
+ )
1599
+
1600
+ # Create properties
1601
+ edge_properties = []
1602
+ for prop_name in properties:
1603
+ prop_source = PropertySource(
1604
+ field=f"{relationship_name.lower()}.{prop_name}"
1605
+ )
1606
+ edge_properties.append(GraphProperty(key=prop_name, source=prop_source))
1607
+
1608
+ # Create new relationship
1609
+ new_edge = GraphRelationship(
1610
+ edge_type=relationship_name,
1611
+ start_node_labels=[start_node_label],
1612
+ end_node_labels=[end_node_label],
1613
+ properties=edge_properties,
1614
+ source=edge_source,
1615
+ )
1616
+ model.edges.append(new_edge)
1617
+ return model
1618
+
1619
+ def _validate_and_improve_automatic_llm_model(
1620
+ self,
1621
+ graph_model: "GraphModel",
1622
+ database_structure: Dict[str, Any],
1623
+ domain_context: Optional[str] = None,
1624
+ ) -> "GraphModel":
1625
+ """
1626
+ Validate and automatically improve LLM-generated model in automatic mode.
1627
+
1628
+ This method performs validation and gives the LLM an opportunity to fix
1629
+ itself based on validation feedback without user interaction.
1630
+
1631
+ Args:
1632
+ graph_model: Initial LLM-generated graph model
1633
+ database_structure: Original database structure
1634
+ domain_context: Optional domain context for modeling
1635
+
1636
+ Returns:
1637
+ Final graph model (potentially improved)
1638
+ """
1639
+ # Create dummy operations for automatic mode
1640
+ from .models.operations import ModelModifications
1641
+
1642
+ dummy_operations = ModelModifications(
1643
+ operations=[],
1644
+ reasoning="Automatic validation improvement iteration",
1645
+ )
1646
+
1647
+ return self._validate_and_improve_model(
1648
+ model=graph_model,
1649
+ strategy=GraphModelingStrategy.LLM_POWERED,
1650
+ operations=dummy_operations,
1651
+ database_structure=database_structure,
1652
+ mode="automatic",
1653
+ )
1654
+
1655
+ def validate_graph_model(
1656
+ self, graph_model: "GraphModel", database_structure: Dict[str, Any]
1657
+ ) -> Dict[str, Any]:
1658
+ """
1659
+ Comprehensive validation of graph model against database structure.
1660
+
1661
+ This method uses the new modular validation system to perform
1662
+ graph schema validation of the graph model.
1663
+
1664
+ Args:
1665
+ graph_model: The graph model to validate
1666
+ database_structure: Original database structure
1667
+
1668
+ Returns:
1669
+ Dict with detailed validation results including:
1670
+ - is_valid: Boolean indicating if model passes all critical checks
1671
+ - issues: List of critical problems that must be fixed
1672
+ - warnings: List of recommendations and potential improvements
1673
+ - summary: High-level summary of validation results
1674
+ - metrics: Quantitative validation metrics
1675
+ """
1676
+ logger.info("Performing comprehensive graph model validation...")
1677
+
1678
+ # Use the GraphSchemaValidator
1679
+ validator = GraphSchemaValidator()
1680
+ result = validator.validate(graph_model, database_structure)
1681
+
1682
+ # Convert to the expected format for backward compatibility
1683
+ return {
1684
+ "is_valid": result.success,
1685
+ "issues": [issue.message for issue in result.critical_issues],
1686
+ "warnings": [issue.message for issue in result.warnings],
1687
+ "suggestions": [issue.message for issue in result.info_issues],
1688
+ "summary": result.summary,
1689
+ "metrics": {
1690
+ "coverage_percentage": result.metrics.coverage_percentage,
1691
+ "tables_covered": result.metrics.tables_covered,
1692
+ "tables_total": result.metrics.tables_total,
1693
+ "properties_covered": result.metrics.properties_covered,
1694
+ "properties_total": result.metrics.properties_total,
1695
+ "relationships_covered": result.metrics.relationships_covered,
1696
+ "relationships_total": result.metrics.relationships_total,
1697
+ },
1698
+ # Include full result for advanced usage
1699
+ "validation_result": result,
1700
+ }
1701
+
1702
+ def _validate_and_improve_model(
1703
+ self,
1704
+ model: "GraphModel",
1705
+ strategy: GraphModelingStrategy,
1706
+ operations: "ModelModifications",
1707
+ database_structure: Dict[str, Any],
1708
+ mode: str = "interactive",
1709
+ ) -> "GraphModel":
1710
+ """
1711
+ Unified validation and improvement method for both automatic and interactive modes.
1712
+
1713
+ Args:
1714
+ model: Graph model to validate
1715
+ strategy: Current modeling strategy
1716
+ operations: Operations that were applied (or dummy for automatic mode)
1717
+ database_structure: Original database structure
1718
+ mode: "automatic" or "interactive" - affects UI and logging behavior
1719
+
1720
+ Returns:
1721
+ Final model (potentially improved)
1722
+ """
1723
+ # Mode-specific initialization
1724
+ if mode == "automatic":
1725
+ logger.info("Validating and improving LLM model in automatic mode...")
1726
+ print("šŸ” Performing automatic validation and improvement for LLM model...")
1727
+ else: # interactive mode
1728
+ print("\n" + "=" * 60)
1729
+ print("GRAPH SCHEMA VALIDATION")
1730
+ print("=" * 60)
1731
+
1732
+ if not database_structure:
1733
+ error_msg = "āŒ Cannot validate: Original database structure not available"
1734
+ print(error_msg)
1735
+ return model
1736
+
1737
+ current_model = model
1738
+ max_improvement_iterations = 3
1739
+ improvement_count = 0
1740
+
1741
+ while improvement_count < max_improvement_iterations:
1742
+ # Perform validation using the GraphSchemaValidator
1743
+ validator = GraphSchemaValidator()
1744
+ validation_result = validator.validate(current_model, database_structure)
1745
+
1746
+ # Mode-specific result display
1747
+ if mode == "automatic":
1748
+ # Compact display for automatic mode
1749
+ status_emoji = "āœ…" if validation_result.success else "āŒ"
1750
+ print(
1751
+ f"\n{status_emoji} Validation iteration {improvement_count + 1}: "
1752
+ f"{'PASSED' if validation_result.success else 'FAILED'}"
1753
+ )
1754
+
1755
+ if validation_result.metrics:
1756
+ coverage = validation_result.metrics.coverage_percentage
1757
+ print(f"šŸ“Š Coverage: {coverage:.1f}%")
1758
+
1759
+ logger.info(
1760
+ "Validation iteration %d: %s (Coverage: %.1f%%)",
1761
+ improvement_count + 1,
1762
+ "PASSED" if validation_result.success else "FAILED",
1763
+ (
1764
+ validation_result.metrics.coverage_percentage
1765
+ if validation_result.metrics
1766
+ else 0
1767
+ ),
1768
+ )
1769
+ else:
1770
+ # Detailed display for interactive mode
1771
+ self._display_validation_results(validation_result, strategy)
1772
+
1773
+ # Handle validation results based on strategy
1774
+ if strategy == GraphModelingStrategy.DETERMINISTIC:
1775
+ if mode == "interactive":
1776
+ self._handle_deterministic_validation(validation_result)
1777
+ break # No automatic improvement for deterministic
1778
+ elif strategy == GraphModelingStrategy.LLM_POWERED:
1779
+ if validation_result.success:
1780
+ success_msg = "āœ… Graph model validation passed successfully!"
1781
+ if mode == "automatic":
1782
+ logger.info(success_msg)
1783
+ print(success_msg)
1784
+ else:
1785
+ print("\nāœ… All validation checks passed!")
1786
+ break
1787
+ else:
1788
+ # Mode-specific improvement attempt messaging
1789
+ if mode == "automatic":
1790
+ critical_count = len(validation_result.critical_issues)
1791
+ warning_count = len(validation_result.warnings)
1792
+ print(
1793
+ f"šŸ”§ Found {critical_count} critical issues and "
1794
+ f"{warning_count} warnings"
1795
+ )
1796
+
1797
+ logger.info(
1798
+ "šŸ¤– Attempting automatic LLM improvement (iteration %d/%d)",
1799
+ improvement_count + 1,
1800
+ max_improvement_iterations,
1801
+ )
1802
+ print(
1803
+ f"šŸ¤– Attempting automatic LLM improvement "
1804
+ f"(iteration {improvement_count + 1}/{max_improvement_iterations})..."
1805
+ )
1806
+
1807
+ # Try LLM improvement
1808
+ improved_model = self._handle_llm_validation(
1809
+ validation_result, current_model, operations, mode
1810
+ )
1811
+
1812
+ if improved_model != current_model:
1813
+ # Model was improved, continue with the improved version
1814
+ current_model = improved_model
1815
+ improvement_count += 1
1816
+
1817
+ if mode == "automatic":
1818
+ print("šŸ”„ Model improved! Re-validating...")
1819
+ logger.info(
1820
+ "šŸ”„ Model improved, re-validating (iteration %d)",
1821
+ improvement_count,
1822
+ )
1823
+ else:
1824
+ # Interactive mode - show detailed iteration info
1825
+ self.iteration_count += 1
1826
+ print(
1827
+ f"\nšŸ”„ ITERATION {self.iteration_count} - IMPROVED MODEL"
1828
+ )
1829
+ self._display_current_model(current_model)
1830
+
1831
+ if improvement_count < max_improvement_iterations:
1832
+ print(
1833
+ f"\nšŸ” Re-validating improved model "
1834
+ f"(iteration {improvement_count + 1}/{max_improvement_iterations})..."
1835
+ )
1836
+ else:
1837
+ print(
1838
+ f"\nāš ļø Reached maximum improvement iterations "
1839
+ f"({max_improvement_iterations})"
1840
+ )
1841
+ break
1842
+ continue
1843
+ else:
1844
+ # No improvement was made, break the loop
1845
+ if mode == "automatic":
1846
+ logger.warning("āŒ LLM could not improve the model further")
1847
+ print("āŒ LLM could not improve the model further")
1848
+ else:
1849
+ # Interactive mode - no improvement accepted
1850
+ pass
1851
+ break
1852
+
1853
+ # Check if we should continue (only for automatic mode without improvement)
1854
+ if mode == "automatic" and not validation_result.success and not self.llm:
1855
+ reason = "LLM not available" if not self.llm else "Unknown issue"
1856
+ logger.warning("āš ļø Cannot improve model: %s", reason)
1857
+ print(f"āš ļø Cannot improve model: {reason}")
1858
+ break
1859
+
1860
+ # Final summary for automatic mode
1861
+ if mode == "automatic":
1862
+ if improvement_count > 0:
1863
+ logger.info(
1864
+ "✨ Automatic LLM improvement completed after %d iterations",
1865
+ improvement_count,
1866
+ )
1867
+ print(
1868
+ f"✨ Automatic LLM improvement completed after "
1869
+ f"{improvement_count} iterations"
1870
+ )
1871
+ else:
1872
+ logger.info("šŸ“Š Using original LLM model (no improvements needed)")
1873
+ print("šŸ“Š Using original LLM model")
1874
+
1875
+ return current_model
1876
+
1877
+ def _perform_post_operation_validation(
1878
+ self,
1879
+ model: "GraphModel",
1880
+ strategy: GraphModelingStrategy,
1881
+ operations: "ModelModifications",
1882
+ ) -> "GraphModel":
1883
+ """
1884
+ Perform Graph Schema Validation after applying operations.
1885
+
1886
+ This method validates that the modified graph model still properly
1887
+ represents the original database structure, accounting for the
1888
+ applied changes. For LLM strategy, it includes automatic model
1889
+ improvement loop.
1890
+
1891
+ Args:
1892
+ model: Modified graph model to validate
1893
+ strategy: Current modeling strategy (affects validation response)
1894
+ operations: Operations that were just applied
1895
+
1896
+ Returns:
1897
+ The final model (potentially improved by LLM)
1898
+ """
1899
+ return self._validate_and_improve_model(
1900
+ model=model,
1901
+ strategy=strategy,
1902
+ operations=operations,
1903
+ database_structure=self.database_structure or {},
1904
+ mode="interactive",
1905
+ )
1906
+
1907
+ def _display_validation_results(
1908
+ self, validation_result, strategy: GraphModelingStrategy
1909
+ ) -> None:
1910
+ """Display validation results in a user-friendly format."""
1911
+ status = "āœ… PASSED" if validation_result.success else "āŒ FAILED"
1912
+ print(f"\nValidation Status: {status}")
1913
+ print(f"Strategy: {strategy.value.upper()}")
1914
+ print(f"Summary: {validation_result.summary}")
1915
+
1916
+ # Display metrics
1917
+ metrics = validation_result.metrics
1918
+ print("\nCoverage Metrics:")
1919
+ print(f" Tables: {metrics.tables_covered}/{metrics.tables_total}")
1920
+ print(
1921
+ f" Properties: {metrics.properties_covered}/" f"{metrics.properties_total}"
1922
+ )
1923
+ print(
1924
+ f" Relationships: {metrics.relationships_covered}/"
1925
+ f"{metrics.relationships_total}"
1926
+ )
1927
+ print(f" Overall Coverage: {metrics.coverage_percentage:.1f}%")
1928
+
1929
+ # Display critical issues
1930
+ critical_issues = validation_result.critical_issues
1931
+ if critical_issues:
1932
+ print(f"\n🚨 Critical Issues ({len(critical_issues)}):")
1933
+ for i, issue in enumerate(critical_issues[:5], 1): # Show max 5
1934
+ print(f" {i}. {issue.message}")
1935
+ if len(critical_issues) > 5:
1936
+ print(f" ... and {len(critical_issues) - 5} more issues")
1937
+
1938
+ # Display warnings
1939
+ warnings = validation_result.warnings
1940
+ if warnings:
1941
+ print(f"\nāš ļø Warnings ({len(warnings)}):")
1942
+ for i, warning in enumerate(warnings[:3], 1): # Show max 3
1943
+ print(f" {i}. {warning.message}")
1944
+ if len(warnings) > 3:
1945
+ print(f" ... and {len(warnings) - 3} more warnings")
1946
+
1947
+ def _handle_deterministic_validation(self, validation_result) -> None:
1948
+ """Handle validation results for deterministic strategy."""
1949
+ if not validation_result.success:
1950
+ print("\nšŸ”§ DETERMINISTIC STRATEGY GUIDANCE:")
1951
+ print("The changes you made have introduced validation issues.")
1952
+ print("Consider the following:")
1953
+
1954
+ critical_issues = validation_result.critical_issues
1955
+ if critical_issues:
1956
+ print("\nšŸ“‹ Required Fixes:")
1957
+ for i, issue in enumerate(critical_issues[:3], 1):
1958
+ has_rec = hasattr(issue, "recommendation") and issue.recommendation
1959
+ if has_rec:
1960
+ print(f" {i}. {issue.recommendation}")
1961
+ else:
1962
+ print(f" {i}. {issue.message}")
1963
+
1964
+ print("\nYou can:")
1965
+ print(" - Make additional changes to fix these issues")
1966
+ print(" - Type 'done' to accept the model as-is")
1967
+ print(" - Type 'cancel' to revert your changes")
1968
+ else:
1969
+ print("\nāœ… All validation checks passed!")
1970
+ print("Your changes maintain proper database coverage.")
1971
+
1972
+ def _handle_llm_error(self, operation: str, error: Exception, fallback_result=None):
1973
+ """Handle LLM errors with consistent logging and fallback."""
1974
+ logger.error("Error %s: %s", operation, error)
1975
+ print(f"āŒ Error {operation}: {error}")
1976
+ return fallback_result
1977
+
1978
+ def _handle_llm_validation(
1979
+ self,
1980
+ validation_result,
1981
+ model: "GraphModel",
1982
+ operations: "ModelModifications",
1983
+ mode: str = "interactive",
1984
+ ) -> "GraphModel":
1985
+ """Handle validation results for LLM strategy with regeneration."""
1986
+ if not validation_result.success and self.llm:
1987
+ print("\nšŸ¤– LLM STRATEGY: AUTOMATIC MODEL IMPROVEMENT")
1988
+ print(
1989
+ "The LLM will analyze validation issues and regenerate " "the model..."
1990
+ )
1991
+
1992
+ # Preserve user operation history before LLM improvement
1993
+ saved_user_operations = None
1994
+ if self.user_operation_history and mode == "interactive":
1995
+ saved_user_operations = self.user_operation_history.copy()
1996
+ print("šŸ“‹ Preserving your operation history...")
1997
+
1998
+ # Prepare context for LLM
1999
+ validation_context = self._prepare_validation_context_for_llm(
2000
+ validation_result, model, operations
2001
+ )
2002
+
2003
+ try:
2004
+ # Use LLM to regenerate the improved model
2005
+ improved_model = self._regenerate_model_with_llm_fixes(
2006
+ model, validation_context, validation_result
2007
+ )
2008
+
2009
+ if improved_model:
2010
+ print("\nāœ… LLM has generated an improved model!")
2011
+
2012
+ # Offer the user to review the improved model
2013
+ should_apply = self._should_apply_llm_improvements(
2014
+ improved_model, model, mode
2015
+ )
2016
+ if should_apply:
2017
+ print("āœ… Applying LLM improvements...")
2018
+
2019
+ # Restore user operation history after improvements
2020
+ if saved_user_operations and mode == "interactive":
2021
+ self.user_operation_history = saved_user_operations
2022
+ print("šŸ“‹ Restored your operation history!")
2023
+
2024
+ return improved_model
2025
+ else:
2026
+ print("āŒ User rejected LLM improvements")
2027
+
2028
+ # Restore history if improvements were rejected
2029
+ if saved_user_operations and mode == "interactive":
2030
+ self.user_operation_history = saved_user_operations
2031
+ print("šŸ“‹ Restored your operation history!")
2032
+
2033
+ return model
2034
+ else:
2035
+ print("āŒ LLM could not generate improved model")
2036
+
2037
+ # Restore history if no improvement was generated
2038
+ if saved_user_operations and mode == "interactive":
2039
+ self.user_operation_history = saved_user_operations
2040
+ print("šŸ“‹ Restored your operation history!")
2041
+
2042
+ return model
2043
+ except Exception as e:
2044
+ logger.error("Error getting LLM model improvements: %s", e)
2045
+ print(f"āŒ Error getting LLM improvements: {e}")
2046
+
2047
+ # Restore user operation history if there was an error
2048
+ if saved_user_operations and mode == "interactive":
2049
+ self.user_operation_history = saved_user_operations
2050
+ print("šŸ“‹ Restored your operation history!")
2051
+
2052
+ return model
2053
+ else:
2054
+ print("\nāœ… All validation checks passed!")
2055
+ return model
2056
+
2057
+ def _prepare_validation_context_for_llm(
2058
+ self,
2059
+ validation_result,
2060
+ model: "GraphModel",
2061
+ operations: "ModelModifications",
2062
+ ) -> str:
2063
+ """Prepare validation context for LLM analysis."""
2064
+ context_parts = []
2065
+
2066
+ # Validation summary
2067
+ context_parts.append("VALIDATION RESULTS:")
2068
+ status = "PASSED" if validation_result.success else "FAILED"
2069
+ context_parts.append(f"Status: {status}")
2070
+ context_parts.append(f"Summary: {validation_result.summary}")
2071
+
2072
+ # Critical issues
2073
+ critical_issues = validation_result.critical_issues
2074
+ if critical_issues:
2075
+ context_parts.append(f"\nCRITICAL ISSUES ({len(critical_issues)}):")
2076
+ for i, issue in enumerate(critical_issues, 1):
2077
+ context_parts.append(f"{i}. {issue.message}")
2078
+ has_rec = hasattr(issue, "recommendation") and issue.recommendation
2079
+ if has_rec:
2080
+ context_parts.append(f" Recommendation: {issue.recommendation}")
2081
+
2082
+ # Recent operations
2083
+ context_parts.append(f"\nRECENT OPERATIONS ({len(operations.operations)}):")
2084
+ for i, op in enumerate(operations.operations, 1):
2085
+ context_parts.append(f"{i}. {op.operation_type}: {op.description}")
2086
+
2087
+ # Current model state
2088
+ context_parts.append("\nCURRENT MODEL STATE:")
2089
+ context_parts.append(f"Nodes: {len(model.nodes)}")
2090
+ context_parts.append(f"Relationships: {len(model.edges)}")
2091
+ context_parts.append(f"Indexes: {len(model.node_indexes)}")
2092
+
2093
+ return "\n".join(context_parts)
2094
+
2095
+ def _regenerate_model_with_llm_fixes(
2096
+ self,
2097
+ current_model: "GraphModel",
2098
+ validation_context: str,
2099
+ validation_result,
2100
+ ) -> Optional["GraphModel"]:
2101
+ """Use LLM to regenerate an improved model based on validation."""
2102
+ if not self.llm or not self.database_structure:
2103
+ return None
2104
+
2105
+ # Get the modeling strategy instance for regeneration
2106
+ strategy_instance = self._get_strategy_instance(
2107
+ GraphModelingStrategy.LLM_POWERED
2108
+ )
2109
+
2110
+ # Prepare enhanced context for the LLM
2111
+ improvement_context = self._prepare_improvement_context(
2112
+ current_model, validation_result, validation_context
2113
+ )
2114
+
2115
+ # Extract user operation context separately to preserve user changes
2116
+ user_context = None
2117
+ if self.user_operation_history and self.user_operation_history.operations:
2118
+ user_context = self.user_operation_history.to_llm_context()
2119
+
2120
+ try:
2121
+ print("šŸ”„ LLM is analyzing validation issues and regenerating model...")
2122
+
2123
+ # Use the LLM strategy but with enhanced context
2124
+ # Pass user operations as user_operation_context to ensure preservation
2125
+ if isinstance(strategy_instance, LLMStrategy):
2126
+ # LLM strategy supports user_operation_context
2127
+ improved_model = strategy_instance.create_model(
2128
+ self.database_structure,
2129
+ domain_context=improvement_context,
2130
+ user_operation_context=user_context,
2131
+ )
2132
+ else:
2133
+ # Fallback for strategies that don't support user_operation_context
2134
+ improved_model = strategy_instance.create_model(
2135
+ self.database_structure, domain_context=improvement_context
2136
+ )
2137
+
2138
+ if improved_model:
2139
+ print("āœ… LLM generated improved model")
2140
+ return improved_model
2141
+ else:
2142
+ print("āŒ LLM failed to generate improved model")
2143
+ return None
2144
+
2145
+ except Exception as e:
2146
+ logger.error("Error regenerating model with LLM: %s", e)
2147
+ return None
2148
+
2149
+ def _prepare_improvement_context(
2150
+ self,
2151
+ current_model: "GraphModel",
2152
+ validation_result,
2153
+ validation_context: str,
2154
+ ) -> str:
2155
+ """Prepare comprehensive context for LLM model improvement."""
2156
+ context_parts = []
2157
+
2158
+ # User operations must be preserved - add this FIRST
2159
+ if self.user_operation_history and self.user_operation_history.operations:
2160
+ user_context = self.user_operation_history.to_llm_context()
2161
+ if user_context: # Only add if there's actual content
2162
+ context_parts.append(user_context)
2163
+ context_parts.append("")
2164
+
2165
+ # Previous model structure
2166
+ context_parts.append("CURRENT MODEL TO IMPROVE:")
2167
+ context_parts.append(self._get_model_context_for_llm(current_model))
2168
+
2169
+ # Validation issues that need fixing
2170
+ context_parts.append("\nVALIDATION ISSUES TO RESOLVE:")
2171
+ context_parts.append(validation_context)
2172
+
2173
+ # Critical requirements
2174
+ context_parts.append("\nCRITICAL REQUIREMENTS:")
2175
+ critical_issues = validation_result.critical_issues
2176
+ for i, issue in enumerate(critical_issues, 1):
2177
+ context_parts.append(f"{i}. {issue.message}")
2178
+ if hasattr(issue, "recommendation") and issue.recommendation:
2179
+ context_parts.append(f" → {issue.recommendation}")
2180
+
2181
+ # Coverage targets
2182
+ metrics = validation_result.metrics
2183
+ context_parts.append("\nCOVERAGE TARGETS:")
2184
+ context_parts.append(f"- Target tables: {metrics.tables_total}")
2185
+ context_parts.append(f"- Target properties: {metrics.properties_total}")
2186
+ context_parts.append(f"- Target relationships: {metrics.relationships_total}")
2187
+
2188
+ return "\n".join(context_parts)
2189
+
2190
+ def _should_apply_llm_improvements(
2191
+ self,
2192
+ improved_model: "GraphModel",
2193
+ current_model: "GraphModel",
2194
+ mode: str = "interactive",
2195
+ ) -> bool:
2196
+ """Ask user to apply LLM improvements or auto-apply in automatic mode."""
2197
+
2198
+ # In automatic mode, always apply improvements
2199
+ if mode == "automatic":
2200
+ print("šŸ¤– Automatic mode: Applying LLM improvements...")
2201
+ return True
2202
+
2203
+ # In interactive mode, ask the user
2204
+ print("\n" + "=" * 60)
2205
+ print("šŸ¤– LLM MODEL IMPROVEMENT REVIEW")
2206
+ print("=" * 60)
2207
+
2208
+ # Show comparison
2209
+ print("\nCURRENT MODEL:")
2210
+ print(f" Nodes: {len(current_model.nodes)}")
2211
+ print(f" Relationships: {len(current_model.edges)}")
2212
+ print(f" Indexes: {len(current_model.node_indexes)}")
2213
+ print(f" Constraints: {len(current_model.node_constraints)}")
2214
+
2215
+ print("\nIMPROVED MODEL:")
2216
+ print(f" Nodes: {len(improved_model.nodes)}")
2217
+ print(f" Relationships: {len(improved_model.edges)}")
2218
+ print(f" Indexes: {len(improved_model.node_indexes)}")
2219
+ print(f" Constraints: {len(improved_model.node_constraints)}")
2220
+
2221
+ # Show what's new/different
2222
+ self._show_model_differences(current_model, improved_model)
2223
+
2224
+ print("\nWould you like to apply these LLM improvements?")
2225
+ print("1. Yes - Apply improvements and continue")
2226
+ print("2. No - Keep current model")
2227
+ print("3. Review - Show detailed improved model first")
2228
+
2229
+ while True:
2230
+ try:
2231
+ choice = input("\nEnter your choice (1-3): ").strip()
2232
+ if choice == "1":
2233
+ return True
2234
+ elif choice == "2":
2235
+ return False
2236
+ elif choice == "3":
2237
+ self._display_current_model(improved_model)
2238
+ print(
2239
+ "\nAfter reviewing, would you like to apply these "
2240
+ "improvements?"
2241
+ )
2242
+ print("1. Yes - Apply improvements")
2243
+ print("2. No - Keep current model")
2244
+ continue
2245
+ else:
2246
+ print("Invalid choice. Please enter 1, 2, or 3.")
2247
+ except (EOFError, KeyboardInterrupt):
2248
+ print("\nKeeping current model...")
2249
+ return False
2250
+
2251
+ def _show_model_differences(
2252
+ self, current_model: "GraphModel", improved_model: "GraphModel"
2253
+ ) -> None:
2254
+ """Show differences between current and improved models."""
2255
+ # Compare nodes
2256
+ current_node_labels = {node.primary_label for node in current_model.nodes}
2257
+ improved_node_labels = {node.primary_label for node in improved_model.nodes}
2258
+
2259
+ new_nodes = improved_node_labels - current_node_labels
2260
+ removed_nodes = current_node_labels - improved_node_labels
2261
+
2262
+ if new_nodes:
2263
+ print(f"\nāž• NEW NODES ({len(new_nodes)}):")
2264
+ for label in sorted(new_nodes):
2265
+ print(f" + {label}")
2266
+
2267
+ if removed_nodes:
2268
+ print(f"\nāž– REMOVED NODES ({len(removed_nodes)}):")
2269
+ for label in sorted(removed_nodes):
2270
+ print(f" - {label}")
2271
+
2272
+ # Compare relationships
2273
+ current_relationships = {edge.edge_type for edge in current_model.edges}
2274
+ improved_relationships = {edge.edge_type for edge in improved_model.edges}
2275
+
2276
+ new_relationships = improved_relationships - current_relationships
2277
+ removed_relationships = current_relationships - improved_relationships
2278
+
2279
+ if new_relationships:
2280
+ print(f"\nāž• NEW RELATIONSHIPS ({len(new_relationships)}):")
2281
+ for rel_type in sorted(new_relationships):
2282
+ print(f" + {rel_type}")
2283
+
2284
+ if removed_relationships:
2285
+ print(f"\nāž– REMOVED RELATIONSHIPS ({len(removed_relationships)}):")
2286
+ for rel_type in sorted(removed_relationships):
2287
+ print(f" - {rel_type}")
2288
+
2289
+ # Show property changes for existing nodes
2290
+ common_nodes = current_node_labels & improved_node_labels
2291
+ if common_nodes:
2292
+ property_changes = []
2293
+ for label in common_nodes:
2294
+ current_node = next(
2295
+ n for n in current_model.nodes if n.primary_label == label
2296
+ )
2297
+ improved_node = next(
2298
+ n for n in improved_model.nodes if n.primary_label == label
2299
+ )
2300
+
2301
+ current_props = {p.key for p in current_node.properties}
2302
+ improved_props = {p.key for p in improved_node.properties}
2303
+
2304
+ new_props = improved_props - current_props
2305
+ removed_props = current_props - improved_props
2306
+
2307
+ if new_props or removed_props:
2308
+ property_changes.append((label, new_props, removed_props))
2309
+
2310
+ if property_changes:
2311
+ print("\nšŸ”„ PROPERTY CHANGES:")
2312
+ for label, new_props, removed_props in property_changes:
2313
+ if new_props:
2314
+ for prop in sorted(new_props):
2315
+ print(f" + {label}.{prop}")
2316
+ if removed_props:
2317
+ for prop in sorted(removed_props):
2318
+ print(f" - {label}.{prop}")
2319
+
2320
+ def _get_llm_validation_fixes(self, validation_context: str) -> Optional[str]:
2321
+ """Get LLM suggestions for fixing validation issues."""
2322
+ if not self.llm:
2323
+ return None
2324
+
2325
+ system_prompt = (
2326
+ "You are an expert graph modeling assistant. "
2327
+ "Analyze the validation results and suggest specific fixes "
2328
+ "to improve the graph model's coverage of the original "
2329
+ "database structure. "
2330
+ "Focus on critical issues that affect data completeness."
2331
+ )
2332
+
2333
+ try:
2334
+ response = self.llm.invoke(
2335
+ [
2336
+ {"role": "system", "content": system_prompt},
2337
+ {
2338
+ "role": "user",
2339
+ "content": (
2340
+ "Please analyze these validation results and "
2341
+ f"suggest fixes:\n\n{validation_context}\n\n"
2342
+ "Provide specific, actionable recommendations "
2343
+ "to fix the critical issues."
2344
+ ),
2345
+ },
2346
+ ]
2347
+ )
2348
+ return response.content
2349
+ except Exception as e:
2350
+ logger.error("Error calling LLM for validation fixes: %s", e)
2351
+ return None