structured2graph 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +47 -0
- core/__init__.py +23 -0
- core/hygm/__init__.py +74 -0
- core/hygm/hygm.py +2351 -0
- core/hygm/models/__init__.py +82 -0
- core/hygm/models/graph_models.py +667 -0
- core/hygm/models/llm_models.py +229 -0
- core/hygm/models/operations.py +176 -0
- core/hygm/models/sources.py +68 -0
- core/hygm/models/user_operations.py +139 -0
- core/hygm/strategies/__init__.py +17 -0
- core/hygm/strategies/base.py +36 -0
- core/hygm/strategies/deterministic.py +262 -0
- core/hygm/strategies/llm.py +904 -0
- core/hygm/validation/__init__.py +38 -0
- core/hygm/validation/base.py +194 -0
- core/hygm/validation/graph_schema_validator.py +687 -0
- core/hygm/validation/memgraph_data_validator.py +991 -0
- core/migration_agent.py +1369 -0
- core/schema/spec.json +155 -0
- core/utils/meta_graph.py +108 -0
- database/__init__.py +36 -0
- database/adapters/__init__.py +11 -0
- database/adapters/memgraph.py +318 -0
- database/adapters/mysql.py +311 -0
- database/adapters/postgresql.py +335 -0
- database/analyzer.py +396 -0
- database/factory.py +219 -0
- database/models.py +209 -0
- main.py +518 -0
- query_generation/__init__.py +20 -0
- query_generation/cypher_generator.py +129 -0
- query_generation/schema_utilities.py +88 -0
- structured2graph-0.1.1.dist-info/METADATA +197 -0
- structured2graph-0.1.1.dist-info/RECORD +41 -0
- structured2graph-0.1.1.dist-info/WHEEL +4 -0
- structured2graph-0.1.1.dist-info/entry_points.txt +2 -0
- structured2graph-0.1.1.dist-info/licenses/LICENSE +21 -0
- utils/__init__.py +57 -0
- utils/config.py +235 -0
- utils/environment.py +404 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Deterministic modeling strategy for Hypothetical Graph Modeling (HyGM).
|
|
3
|
+
|
|
4
|
+
This strategy creates graph models using rule-based approaches without AI.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Dict, Any, Optional, List, TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from core.hygm.models.graph_models import GraphModel
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from .base import BaseModelingStrategy
|
|
15
|
+
except ImportError:
|
|
16
|
+
from core.hygm.strategies.base import BaseModelingStrategy
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DeterministicStrategy(BaseModelingStrategy):
|
|
22
|
+
"""Deterministic graph modeling strategy using rule-based approaches."""
|
|
23
|
+
|
|
24
|
+
def get_strategy_name(self) -> str:
|
|
25
|
+
"""Return the name of this strategy."""
|
|
26
|
+
return "deterministic"
|
|
27
|
+
|
|
28
|
+
def create_model(
|
|
29
|
+
self,
|
|
30
|
+
database_structure: Dict[str, Any],
|
|
31
|
+
domain_context: Optional[str] = None, # noqa: ARG002
|
|
32
|
+
) -> "GraphModel":
|
|
33
|
+
"""
|
|
34
|
+
Create a basic graph model deterministically from database structure.
|
|
35
|
+
This method creates a straightforward mapping without AI assistance.
|
|
36
|
+
"""
|
|
37
|
+
logger.info("Creating deterministic graph model...")
|
|
38
|
+
|
|
39
|
+
# Import here to avoid circular imports
|
|
40
|
+
from core.hygm.models.graph_models import (
|
|
41
|
+
GraphModel,
|
|
42
|
+
GraphNode,
|
|
43
|
+
GraphRelationship,
|
|
44
|
+
GraphProperty,
|
|
45
|
+
GraphIndex,
|
|
46
|
+
GraphConstraint,
|
|
47
|
+
)
|
|
48
|
+
from core.hygm.models.sources import (
|
|
49
|
+
NodeSource,
|
|
50
|
+
PropertySource,
|
|
51
|
+
RelationshipSource,
|
|
52
|
+
IndexSource,
|
|
53
|
+
ConstraintSource,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
nodes = []
|
|
57
|
+
relationships = []
|
|
58
|
+
node_indexes = []
|
|
59
|
+
node_constraints = []
|
|
60
|
+
|
|
61
|
+
# Convert entity tables to nodes
|
|
62
|
+
entity_tables = database_structure.get("entity_tables", {})
|
|
63
|
+
for table_name, table_info in entity_tables.items():
|
|
64
|
+
# Get primary key from explicit field
|
|
65
|
+
primary_keys = table_info.get("primary_keys", [])
|
|
66
|
+
id_field = primary_keys[0] if primary_keys else "id"
|
|
67
|
+
|
|
68
|
+
# Create source information
|
|
69
|
+
source = NodeSource(
|
|
70
|
+
type="table",
|
|
71
|
+
name=table_name,
|
|
72
|
+
location=f"database.schema.{table_name}",
|
|
73
|
+
mapping={
|
|
74
|
+
"labels": [table_name.title()],
|
|
75
|
+
"id_field": id_field,
|
|
76
|
+
},
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Extract properties
|
|
80
|
+
properties = []
|
|
81
|
+
node_props = self._extract_node_properties_from_table(table_info)
|
|
82
|
+
for prop_name in node_props:
|
|
83
|
+
prop_source = PropertySource(field=f"{table_name}.{prop_name}")
|
|
84
|
+
graph_prop = GraphProperty(key=prop_name, source=prop_source)
|
|
85
|
+
properties.append(graph_prop)
|
|
86
|
+
|
|
87
|
+
# Create node
|
|
88
|
+
node = GraphNode(
|
|
89
|
+
labels=[table_name.title()],
|
|
90
|
+
properties=properties,
|
|
91
|
+
source=source,
|
|
92
|
+
)
|
|
93
|
+
nodes.append(node)
|
|
94
|
+
|
|
95
|
+
# Create indexes for this node
|
|
96
|
+
for index_prop in self._extract_indexes_from_table(table_info):
|
|
97
|
+
index_source = IndexSource(
|
|
98
|
+
origin="migration_requirement",
|
|
99
|
+
reason="performance_optimization",
|
|
100
|
+
created_by="migration_agent",
|
|
101
|
+
index_name=None,
|
|
102
|
+
migrated_from=None,
|
|
103
|
+
)
|
|
104
|
+
graph_index = GraphIndex(
|
|
105
|
+
labels=[table_name.title()],
|
|
106
|
+
properties=[index_prop],
|
|
107
|
+
type="label+property",
|
|
108
|
+
source=index_source,
|
|
109
|
+
)
|
|
110
|
+
node_indexes.append(graph_index)
|
|
111
|
+
|
|
112
|
+
# Create constraints for this node
|
|
113
|
+
constraints = self._extract_constraints_from_table(table_info)
|
|
114
|
+
for constraint_str in constraints:
|
|
115
|
+
if "UNIQUE" in constraint_str.upper():
|
|
116
|
+
prop_name = constraint_str.replace("UNIQUE(", "")
|
|
117
|
+
prop_name = prop_name.replace(")", "")
|
|
118
|
+
constraint_source = ConstraintSource(
|
|
119
|
+
origin="source_database_constraint",
|
|
120
|
+
constraint_name=f"{table_name}_{prop_name}_unique",
|
|
121
|
+
migrated_from=f"database.schema.{table_name}",
|
|
122
|
+
)
|
|
123
|
+
graph_constraint = GraphConstraint(
|
|
124
|
+
type="unique",
|
|
125
|
+
labels=[table_name.title()],
|
|
126
|
+
properties=[prop_name],
|
|
127
|
+
source=constraint_source,
|
|
128
|
+
)
|
|
129
|
+
node_constraints.append(graph_constraint)
|
|
130
|
+
|
|
131
|
+
# Convert relationships
|
|
132
|
+
relationships_data = database_structure.get("relationships", [])
|
|
133
|
+
for rel_data in relationships_data:
|
|
134
|
+
rel_name = self._generate_relationship_name(rel_data)
|
|
135
|
+
|
|
136
|
+
# Find source and target node labels
|
|
137
|
+
from_table = rel_data.get("from_table", "")
|
|
138
|
+
to_table = rel_data.get("to_table", "")
|
|
139
|
+
|
|
140
|
+
start_labels = [from_table.title()]
|
|
141
|
+
end_labels = [to_table.title()]
|
|
142
|
+
|
|
143
|
+
# Get primary key for the from_table
|
|
144
|
+
from_table_info = database_structure.get("entity_tables", {}).get(
|
|
145
|
+
from_table, {}
|
|
146
|
+
)
|
|
147
|
+
primary_keys = from_table_info.get("primary_keys", [])
|
|
148
|
+
from_pk = primary_keys[0] if primary_keys else f"{from_table}_id"
|
|
149
|
+
|
|
150
|
+
# Create relationship source
|
|
151
|
+
rel_source = RelationshipSource(
|
|
152
|
+
type="table",
|
|
153
|
+
name=rel_data.get("constraint_name", rel_name),
|
|
154
|
+
location=f"database.schema.{from_table}",
|
|
155
|
+
mapping={
|
|
156
|
+
"start_node": (f"{from_table}.{rel_data.get('from_column', 'id')}"),
|
|
157
|
+
"end_node": (f"{to_table}.{rel_data.get('to_column', 'id')}"),
|
|
158
|
+
"edge_type": rel_name,
|
|
159
|
+
"from_pk": from_pk, # Add primary key for migration agent
|
|
160
|
+
},
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Create relationship
|
|
164
|
+
relationship = GraphRelationship(
|
|
165
|
+
edge_type=rel_name,
|
|
166
|
+
start_node_labels=start_labels,
|
|
167
|
+
end_node_labels=end_labels,
|
|
168
|
+
properties=[],
|
|
169
|
+
source=rel_source,
|
|
170
|
+
directionality="directed",
|
|
171
|
+
)
|
|
172
|
+
relationships.append(relationship)
|
|
173
|
+
|
|
174
|
+
return GraphModel(
|
|
175
|
+
nodes=nodes,
|
|
176
|
+
edges=relationships,
|
|
177
|
+
node_indexes=node_indexes,
|
|
178
|
+
node_constraints=node_constraints,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
def _extract_node_properties_from_table(
|
|
182
|
+
self, table_info: Dict[str, Any]
|
|
183
|
+
) -> List[str]:
|
|
184
|
+
"""Extract properties that should be included in the node."""
|
|
185
|
+
properties = []
|
|
186
|
+
|
|
187
|
+
# Use standardized schema format (always available from models.py)
|
|
188
|
+
schema_list = table_info.get("schema", [])
|
|
189
|
+
for col_info in schema_list:
|
|
190
|
+
col_name = col_info.get("field")
|
|
191
|
+
if not col_name:
|
|
192
|
+
continue
|
|
193
|
+
|
|
194
|
+
# Include all columns except foreign key columns that aren't PKs
|
|
195
|
+
# Primary keys (PRI) included, foreign keys (MUL) excluded
|
|
196
|
+
if col_info.get("key") != "MUL":
|
|
197
|
+
properties.append(col_name)
|
|
198
|
+
return properties
|
|
199
|
+
|
|
200
|
+
def _extract_indexes_from_table(self, table_info: Dict[str, Any]) -> List[str]:
|
|
201
|
+
"""Extract properties that should have indexes."""
|
|
202
|
+
indexes = set() # Use set to avoid duplicates
|
|
203
|
+
|
|
204
|
+
# First, preserve indexes from the source database
|
|
205
|
+
source_indexes = table_info.get("indexes", [])
|
|
206
|
+
for index_info in source_indexes:
|
|
207
|
+
# Each index_info is a dict with 'columns' list
|
|
208
|
+
if isinstance(index_info, dict) and "columns" in index_info:
|
|
209
|
+
for column in index_info["columns"]:
|
|
210
|
+
indexes.add(column)
|
|
211
|
+
|
|
212
|
+
# Then add essential indexes for PKs, unique columns, and foreign keys
|
|
213
|
+
# This ensures we have indexes even if source DB doesn't have them
|
|
214
|
+
schema_list = table_info.get("schema", [])
|
|
215
|
+
for col_info in schema_list:
|
|
216
|
+
col_name = col_info.get("field")
|
|
217
|
+
if not col_name:
|
|
218
|
+
continue
|
|
219
|
+
|
|
220
|
+
# Add indexes for PKs, unique columns, and foreign keys
|
|
221
|
+
# Foreign keys need indexes for efficient relationship lookups
|
|
222
|
+
if col_info.get("key") in ["PRI", "UNI", "MUL"]:
|
|
223
|
+
indexes.add(col_name)
|
|
224
|
+
|
|
225
|
+
return list(indexes)
|
|
226
|
+
|
|
227
|
+
def _extract_constraints_from_table(self, table_info: Dict[str, Any]) -> List[str]:
|
|
228
|
+
"""Extract constraint definitions from table info."""
|
|
229
|
+
constraints = []
|
|
230
|
+
|
|
231
|
+
# Use standardized schema format (always available from models.py)
|
|
232
|
+
schema_list = table_info.get("schema", [])
|
|
233
|
+
for col_info in schema_list:
|
|
234
|
+
col_name = col_info.get("field")
|
|
235
|
+
if not col_name:
|
|
236
|
+
continue
|
|
237
|
+
|
|
238
|
+
# Add unique constraints for primary keys and unique columns
|
|
239
|
+
# This preserves the source database constraint information
|
|
240
|
+
if col_info.get("key") in ["PRI", "UNI"]:
|
|
241
|
+
constraints.append(f"UNIQUE({col_name})")
|
|
242
|
+
|
|
243
|
+
return constraints
|
|
244
|
+
|
|
245
|
+
def _generate_relationship_name(self, rel_data: Dict[str, Any]) -> str:
|
|
246
|
+
"""Generate a semantic relationship name from relationship data."""
|
|
247
|
+
constraint_name = rel_data.get("constraint_name", "")
|
|
248
|
+
if constraint_name:
|
|
249
|
+
# Extract meaningful name from constraint
|
|
250
|
+
if "_fk" in constraint_name:
|
|
251
|
+
join_table = constraint_name.split("_fk")[0]
|
|
252
|
+
else:
|
|
253
|
+
join_table = constraint_name
|
|
254
|
+
|
|
255
|
+
if join_table:
|
|
256
|
+
return join_table.upper()
|
|
257
|
+
else:
|
|
258
|
+
return "CONNECTS"
|
|
259
|
+
else:
|
|
260
|
+
from_table = rel_data.get("from_table", "")
|
|
261
|
+
to_table = rel_data.get("to_table", "")
|
|
262
|
+
return f"{from_table.upper()}_TO_{to_table.upper()}"
|