structured2graph 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. __init__.py +47 -0
  2. core/__init__.py +23 -0
  3. core/hygm/__init__.py +74 -0
  4. core/hygm/hygm.py +2351 -0
  5. core/hygm/models/__init__.py +82 -0
  6. core/hygm/models/graph_models.py +667 -0
  7. core/hygm/models/llm_models.py +229 -0
  8. core/hygm/models/operations.py +176 -0
  9. core/hygm/models/sources.py +68 -0
  10. core/hygm/models/user_operations.py +139 -0
  11. core/hygm/strategies/__init__.py +17 -0
  12. core/hygm/strategies/base.py +36 -0
  13. core/hygm/strategies/deterministic.py +262 -0
  14. core/hygm/strategies/llm.py +904 -0
  15. core/hygm/validation/__init__.py +38 -0
  16. core/hygm/validation/base.py +194 -0
  17. core/hygm/validation/graph_schema_validator.py +687 -0
  18. core/hygm/validation/memgraph_data_validator.py +991 -0
  19. core/migration_agent.py +1369 -0
  20. core/schema/spec.json +155 -0
  21. core/utils/meta_graph.py +108 -0
  22. database/__init__.py +36 -0
  23. database/adapters/__init__.py +11 -0
  24. database/adapters/memgraph.py +318 -0
  25. database/adapters/mysql.py +311 -0
  26. database/adapters/postgresql.py +335 -0
  27. database/analyzer.py +396 -0
  28. database/factory.py +219 -0
  29. database/models.py +209 -0
  30. main.py +518 -0
  31. query_generation/__init__.py +20 -0
  32. query_generation/cypher_generator.py +129 -0
  33. query_generation/schema_utilities.py +88 -0
  34. structured2graph-0.1.1.dist-info/METADATA +197 -0
  35. structured2graph-0.1.1.dist-info/RECORD +41 -0
  36. structured2graph-0.1.1.dist-info/WHEEL +4 -0
  37. structured2graph-0.1.1.dist-info/entry_points.txt +2 -0
  38. structured2graph-0.1.1.dist-info/licenses/LICENSE +21 -0
  39. utils/__init__.py +57 -0
  40. utils/config.py +235 -0
  41. utils/environment.py +404 -0
@@ -0,0 +1,262 @@
1
+ """
2
+ Deterministic modeling strategy for Hypothetical Graph Modeling (HyGM).
3
+
4
+ This strategy creates graph models using rule-based approaches without AI.
5
+ """
6
+
7
+ import logging
8
+ from typing import Dict, Any, Optional, List, TYPE_CHECKING
9
+
10
+ if TYPE_CHECKING:
11
+ from core.hygm.models.graph_models import GraphModel
12
+
13
+ try:
14
+ from .base import BaseModelingStrategy
15
+ except ImportError:
16
+ from core.hygm.strategies.base import BaseModelingStrategy
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class DeterministicStrategy(BaseModelingStrategy):
22
+ """Deterministic graph modeling strategy using rule-based approaches."""
23
+
24
+ def get_strategy_name(self) -> str:
25
+ """Return the name of this strategy."""
26
+ return "deterministic"
27
+
28
+ def create_model(
29
+ self,
30
+ database_structure: Dict[str, Any],
31
+ domain_context: Optional[str] = None, # noqa: ARG002
32
+ ) -> "GraphModel":
33
+ """
34
+ Create a basic graph model deterministically from database structure.
35
+ This method creates a straightforward mapping without AI assistance.
36
+ """
37
+ logger.info("Creating deterministic graph model...")
38
+
39
+ # Import here to avoid circular imports
40
+ from core.hygm.models.graph_models import (
41
+ GraphModel,
42
+ GraphNode,
43
+ GraphRelationship,
44
+ GraphProperty,
45
+ GraphIndex,
46
+ GraphConstraint,
47
+ )
48
+ from core.hygm.models.sources import (
49
+ NodeSource,
50
+ PropertySource,
51
+ RelationshipSource,
52
+ IndexSource,
53
+ ConstraintSource,
54
+ )
55
+
56
+ nodes = []
57
+ relationships = []
58
+ node_indexes = []
59
+ node_constraints = []
60
+
61
+ # Convert entity tables to nodes
62
+ entity_tables = database_structure.get("entity_tables", {})
63
+ for table_name, table_info in entity_tables.items():
64
+ # Get primary key from explicit field
65
+ primary_keys = table_info.get("primary_keys", [])
66
+ id_field = primary_keys[0] if primary_keys else "id"
67
+
68
+ # Create source information
69
+ source = NodeSource(
70
+ type="table",
71
+ name=table_name,
72
+ location=f"database.schema.{table_name}",
73
+ mapping={
74
+ "labels": [table_name.title()],
75
+ "id_field": id_field,
76
+ },
77
+ )
78
+
79
+ # Extract properties
80
+ properties = []
81
+ node_props = self._extract_node_properties_from_table(table_info)
82
+ for prop_name in node_props:
83
+ prop_source = PropertySource(field=f"{table_name}.{prop_name}")
84
+ graph_prop = GraphProperty(key=prop_name, source=prop_source)
85
+ properties.append(graph_prop)
86
+
87
+ # Create node
88
+ node = GraphNode(
89
+ labels=[table_name.title()],
90
+ properties=properties,
91
+ source=source,
92
+ )
93
+ nodes.append(node)
94
+
95
+ # Create indexes for this node
96
+ for index_prop in self._extract_indexes_from_table(table_info):
97
+ index_source = IndexSource(
98
+ origin="migration_requirement",
99
+ reason="performance_optimization",
100
+ created_by="migration_agent",
101
+ index_name=None,
102
+ migrated_from=None,
103
+ )
104
+ graph_index = GraphIndex(
105
+ labels=[table_name.title()],
106
+ properties=[index_prop],
107
+ type="label+property",
108
+ source=index_source,
109
+ )
110
+ node_indexes.append(graph_index)
111
+
112
+ # Create constraints for this node
113
+ constraints = self._extract_constraints_from_table(table_info)
114
+ for constraint_str in constraints:
115
+ if "UNIQUE" in constraint_str.upper():
116
+ prop_name = constraint_str.replace("UNIQUE(", "")
117
+ prop_name = prop_name.replace(")", "")
118
+ constraint_source = ConstraintSource(
119
+ origin="source_database_constraint",
120
+ constraint_name=f"{table_name}_{prop_name}_unique",
121
+ migrated_from=f"database.schema.{table_name}",
122
+ )
123
+ graph_constraint = GraphConstraint(
124
+ type="unique",
125
+ labels=[table_name.title()],
126
+ properties=[prop_name],
127
+ source=constraint_source,
128
+ )
129
+ node_constraints.append(graph_constraint)
130
+
131
+ # Convert relationships
132
+ relationships_data = database_structure.get("relationships", [])
133
+ for rel_data in relationships_data:
134
+ rel_name = self._generate_relationship_name(rel_data)
135
+
136
+ # Find source and target node labels
137
+ from_table = rel_data.get("from_table", "")
138
+ to_table = rel_data.get("to_table", "")
139
+
140
+ start_labels = [from_table.title()]
141
+ end_labels = [to_table.title()]
142
+
143
+ # Get primary key for the from_table
144
+ from_table_info = database_structure.get("entity_tables", {}).get(
145
+ from_table, {}
146
+ )
147
+ primary_keys = from_table_info.get("primary_keys", [])
148
+ from_pk = primary_keys[0] if primary_keys else f"{from_table}_id"
149
+
150
+ # Create relationship source
151
+ rel_source = RelationshipSource(
152
+ type="table",
153
+ name=rel_data.get("constraint_name", rel_name),
154
+ location=f"database.schema.{from_table}",
155
+ mapping={
156
+ "start_node": (f"{from_table}.{rel_data.get('from_column', 'id')}"),
157
+ "end_node": (f"{to_table}.{rel_data.get('to_column', 'id')}"),
158
+ "edge_type": rel_name,
159
+ "from_pk": from_pk, # Add primary key for migration agent
160
+ },
161
+ )
162
+
163
+ # Create relationship
164
+ relationship = GraphRelationship(
165
+ edge_type=rel_name,
166
+ start_node_labels=start_labels,
167
+ end_node_labels=end_labels,
168
+ properties=[],
169
+ source=rel_source,
170
+ directionality="directed",
171
+ )
172
+ relationships.append(relationship)
173
+
174
+ return GraphModel(
175
+ nodes=nodes,
176
+ edges=relationships,
177
+ node_indexes=node_indexes,
178
+ node_constraints=node_constraints,
179
+ )
180
+
181
+ def _extract_node_properties_from_table(
182
+ self, table_info: Dict[str, Any]
183
+ ) -> List[str]:
184
+ """Extract properties that should be included in the node."""
185
+ properties = []
186
+
187
+ # Use standardized schema format (always available from models.py)
188
+ schema_list = table_info.get("schema", [])
189
+ for col_info in schema_list:
190
+ col_name = col_info.get("field")
191
+ if not col_name:
192
+ continue
193
+
194
+ # Include all columns except foreign key columns that aren't PKs
195
+ # Primary keys (PRI) included, foreign keys (MUL) excluded
196
+ if col_info.get("key") != "MUL":
197
+ properties.append(col_name)
198
+ return properties
199
+
200
+ def _extract_indexes_from_table(self, table_info: Dict[str, Any]) -> List[str]:
201
+ """Extract properties that should have indexes."""
202
+ indexes = set() # Use set to avoid duplicates
203
+
204
+ # First, preserve indexes from the source database
205
+ source_indexes = table_info.get("indexes", [])
206
+ for index_info in source_indexes:
207
+ # Each index_info is a dict with 'columns' list
208
+ if isinstance(index_info, dict) and "columns" in index_info:
209
+ for column in index_info["columns"]:
210
+ indexes.add(column)
211
+
212
+ # Then add essential indexes for PKs, unique columns, and foreign keys
213
+ # This ensures we have indexes even if source DB doesn't have them
214
+ schema_list = table_info.get("schema", [])
215
+ for col_info in schema_list:
216
+ col_name = col_info.get("field")
217
+ if not col_name:
218
+ continue
219
+
220
+ # Add indexes for PKs, unique columns, and foreign keys
221
+ # Foreign keys need indexes for efficient relationship lookups
222
+ if col_info.get("key") in ["PRI", "UNI", "MUL"]:
223
+ indexes.add(col_name)
224
+
225
+ return list(indexes)
226
+
227
+ def _extract_constraints_from_table(self, table_info: Dict[str, Any]) -> List[str]:
228
+ """Extract constraint definitions from table info."""
229
+ constraints = []
230
+
231
+ # Use standardized schema format (always available from models.py)
232
+ schema_list = table_info.get("schema", [])
233
+ for col_info in schema_list:
234
+ col_name = col_info.get("field")
235
+ if not col_name:
236
+ continue
237
+
238
+ # Add unique constraints for primary keys and unique columns
239
+ # This preserves the source database constraint information
240
+ if col_info.get("key") in ["PRI", "UNI"]:
241
+ constraints.append(f"UNIQUE({col_name})")
242
+
243
+ return constraints
244
+
245
+ def _generate_relationship_name(self, rel_data: Dict[str, Any]) -> str:
246
+ """Generate a semantic relationship name from relationship data."""
247
+ constraint_name = rel_data.get("constraint_name", "")
248
+ if constraint_name:
249
+ # Extract meaningful name from constraint
250
+ if "_fk" in constraint_name:
251
+ join_table = constraint_name.split("_fk")[0]
252
+ else:
253
+ join_table = constraint_name
254
+
255
+ if join_table:
256
+ return join_table.upper()
257
+ else:
258
+ return "CONNECTS"
259
+ else:
260
+ from_table = rel_data.get("from_table", "")
261
+ to_table = rel_data.get("to_table", "")
262
+ return f"{from_table.upper()}_TO_{to_table.upper()}"