structured2graph 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +47 -0
- core/__init__.py +23 -0
- core/hygm/__init__.py +74 -0
- core/hygm/hygm.py +2351 -0
- core/hygm/models/__init__.py +82 -0
- core/hygm/models/graph_models.py +667 -0
- core/hygm/models/llm_models.py +229 -0
- core/hygm/models/operations.py +176 -0
- core/hygm/models/sources.py +68 -0
- core/hygm/models/user_operations.py +139 -0
- core/hygm/strategies/__init__.py +17 -0
- core/hygm/strategies/base.py +36 -0
- core/hygm/strategies/deterministic.py +262 -0
- core/hygm/strategies/llm.py +904 -0
- core/hygm/validation/__init__.py +38 -0
- core/hygm/validation/base.py +194 -0
- core/hygm/validation/graph_schema_validator.py +687 -0
- core/hygm/validation/memgraph_data_validator.py +991 -0
- core/migration_agent.py +1369 -0
- core/schema/spec.json +155 -0
- core/utils/meta_graph.py +108 -0
- database/__init__.py +36 -0
- database/adapters/__init__.py +11 -0
- database/adapters/memgraph.py +318 -0
- database/adapters/mysql.py +311 -0
- database/adapters/postgresql.py +335 -0
- database/analyzer.py +396 -0
- database/factory.py +219 -0
- database/models.py +209 -0
- main.py +518 -0
- query_generation/__init__.py +20 -0
- query_generation/cypher_generator.py +129 -0
- query_generation/schema_utilities.py +88 -0
- structured2graph-0.1.1.dist-info/METADATA +197 -0
- structured2graph-0.1.1.dist-info/RECORD +41 -0
- structured2graph-0.1.1.dist-info/WHEEL +4 -0
- structured2graph-0.1.1.dist-info/entry_points.txt +2 -0
- structured2graph-0.1.1.dist-info/licenses/LICENSE +21 -0
- utils/__init__.py +57 -0
- utils/config.py +235 -0
- utils/environment.py +404 -0
|
@@ -0,0 +1,667 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core graph models for Hypothetical Graph Modeling (HyGM).
|
|
3
|
+
|
|
4
|
+
These models represent the graph structure and provide schema format conversion.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import datetime
|
|
8
|
+
import re
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Dict, List, Any, Optional
|
|
11
|
+
|
|
12
|
+
# Import from within the same package
|
|
13
|
+
try:
|
|
14
|
+
from .sources import (
|
|
15
|
+
PropertySource,
|
|
16
|
+
NodeSource,
|
|
17
|
+
RelationshipSource,
|
|
18
|
+
IndexSource,
|
|
19
|
+
ConstraintSource,
|
|
20
|
+
EnumSource,
|
|
21
|
+
)
|
|
22
|
+
except ImportError:
|
|
23
|
+
# Fallback for when imported from different contexts
|
|
24
|
+
from sources import (
|
|
25
|
+
PropertySource,
|
|
26
|
+
NodeSource,
|
|
27
|
+
RelationshipSource,
|
|
28
|
+
IndexSource,
|
|
29
|
+
ConstraintSource,
|
|
30
|
+
EnumSource,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class GraphProperty:
|
|
36
|
+
"""Represents a property with full schema format details."""
|
|
37
|
+
|
|
38
|
+
key: str
|
|
39
|
+
count: int = 1
|
|
40
|
+
filling_factor: float = 100.0
|
|
41
|
+
types: List[Dict[str, Any]] = None
|
|
42
|
+
source: Optional[PropertySource] = None
|
|
43
|
+
|
|
44
|
+
def __post_init__(self):
|
|
45
|
+
if self.types is None:
|
|
46
|
+
self.types = [{"type": "String", "count": 1, "examples": [""]}]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class GraphNode:
|
|
51
|
+
"""Represents a node in the graph model aligned with schema format."""
|
|
52
|
+
|
|
53
|
+
labels: List[str] # Node labels
|
|
54
|
+
count: int = 1
|
|
55
|
+
properties: List[GraphProperty] = None
|
|
56
|
+
examples: List[Dict[str, Any]] = None
|
|
57
|
+
source: Optional[NodeSource] = None
|
|
58
|
+
|
|
59
|
+
def __post_init__(self):
|
|
60
|
+
if self.properties is None:
|
|
61
|
+
self.properties = []
|
|
62
|
+
if self.examples is None:
|
|
63
|
+
self.examples = [{"gid": 0}]
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def primary_label(self) -> str:
|
|
67
|
+
"""Get the primary (first) label for backward compatibility."""
|
|
68
|
+
return self.labels[0] if self.labels else "Unknown"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class GraphRelationship:
|
|
73
|
+
"""Represents a relationship in the graph model aligned with schema format."""
|
|
74
|
+
|
|
75
|
+
edge_type: str
|
|
76
|
+
start_node_labels: List[str]
|
|
77
|
+
end_node_labels: List[str]
|
|
78
|
+
count: int = 1
|
|
79
|
+
properties: List[GraphProperty] = None
|
|
80
|
+
examples: List[Dict[str, Any]] = None
|
|
81
|
+
source: Optional[RelationshipSource] = None
|
|
82
|
+
directionality: str = "directed"
|
|
83
|
+
|
|
84
|
+
def __post_init__(self):
|
|
85
|
+
if self.properties is None:
|
|
86
|
+
self.properties = []
|
|
87
|
+
if self.examples is None:
|
|
88
|
+
self.examples = [{}]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class GraphIndex:
|
|
93
|
+
"""Represents an index aligned with schema format."""
|
|
94
|
+
|
|
95
|
+
labels: Optional[List[str]] = None # For node indexes
|
|
96
|
+
edge_type: Optional[str] = None # For edge indexes
|
|
97
|
+
properties: List[str] = None
|
|
98
|
+
count: int = 0
|
|
99
|
+
examples: List[Dict[str, Any]] = None
|
|
100
|
+
type: str = "label+property" # Index type
|
|
101
|
+
source: Optional[IndexSource] = None
|
|
102
|
+
|
|
103
|
+
def __post_init__(self):
|
|
104
|
+
if self.properties is None:
|
|
105
|
+
self.properties = []
|
|
106
|
+
if self.examples is None:
|
|
107
|
+
self.examples = [{}]
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@dataclass
|
|
111
|
+
class GraphConstraint:
|
|
112
|
+
"""Represents a constraint aligned with schema format."""
|
|
113
|
+
|
|
114
|
+
type: str # "unique", "existence", "data_type"
|
|
115
|
+
labels: Optional[List[str]] = None # For node constraints
|
|
116
|
+
edge_type: Optional[str] = None # For edge constraints
|
|
117
|
+
properties: List[str] = None
|
|
118
|
+
data_type: Optional[str] = None
|
|
119
|
+
source: Optional[ConstraintSource] = None
|
|
120
|
+
|
|
121
|
+
def __post_init__(self):
|
|
122
|
+
if self.properties is None:
|
|
123
|
+
self.properties = []
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class GraphEnum:
|
|
128
|
+
"""Represents an enum aligned with schema format."""
|
|
129
|
+
|
|
130
|
+
name: str
|
|
131
|
+
values: List[str]
|
|
132
|
+
source: Optional[EnumSource] = None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@dataclass
|
|
136
|
+
class GraphModel:
|
|
137
|
+
"""Complete graph model aligned with schema format."""
|
|
138
|
+
|
|
139
|
+
nodes: List[GraphNode]
|
|
140
|
+
edges: List[GraphRelationship]
|
|
141
|
+
node_indexes: List[GraphIndex] = None
|
|
142
|
+
edge_indexes: List[GraphIndex] = None
|
|
143
|
+
node_constraints: List[GraphConstraint] = None
|
|
144
|
+
edge_constraints: List[GraphConstraint] = None
|
|
145
|
+
enums: List[GraphEnum] = None
|
|
146
|
+
|
|
147
|
+
def __post_init__(self):
|
|
148
|
+
if self.node_indexes is None:
|
|
149
|
+
self.node_indexes = []
|
|
150
|
+
if self.edge_indexes is None:
|
|
151
|
+
self.edge_indexes = []
|
|
152
|
+
if self.node_constraints is None:
|
|
153
|
+
self.node_constraints = []
|
|
154
|
+
if self.edge_constraints is None:
|
|
155
|
+
self.edge_constraints = []
|
|
156
|
+
if self.enums is None:
|
|
157
|
+
self.enums = []
|
|
158
|
+
|
|
159
|
+
@classmethod
|
|
160
|
+
def from_schema_format(cls, schema_dict: Dict[str, Any]) -> "GraphModel":
|
|
161
|
+
"""Create a GraphModel from schema format dictionary."""
|
|
162
|
+
# Convert nodes
|
|
163
|
+
nodes = []
|
|
164
|
+
for node_dict in schema_dict.get("nodes", []):
|
|
165
|
+
# Convert properties
|
|
166
|
+
properties = []
|
|
167
|
+
for prop_dict in node_dict.get("properties", []):
|
|
168
|
+
prop_source = None
|
|
169
|
+
if "source" in prop_dict:
|
|
170
|
+
prop_source = PropertySource(
|
|
171
|
+
field=prop_dict["source"]["field"],
|
|
172
|
+
transformation=prop_dict["source"].get("transformation"),
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
graph_prop = GraphProperty(
|
|
176
|
+
key=prop_dict["key"],
|
|
177
|
+
count=prop_dict.get("count", 1),
|
|
178
|
+
filling_factor=prop_dict.get("filling_factor", 100.0),
|
|
179
|
+
types=prop_dict.get("types", []),
|
|
180
|
+
source=prop_source,
|
|
181
|
+
)
|
|
182
|
+
properties.append(graph_prop)
|
|
183
|
+
|
|
184
|
+
# Convert source
|
|
185
|
+
node_source = None
|
|
186
|
+
if "source" in node_dict:
|
|
187
|
+
node_source = NodeSource(
|
|
188
|
+
type=node_dict["source"]["type"],
|
|
189
|
+
name=node_dict["source"]["name"],
|
|
190
|
+
location=node_dict["source"]["location"],
|
|
191
|
+
mapping=node_dict["source"]["mapping"],
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
node = GraphNode(
|
|
195
|
+
labels=node_dict["labels"],
|
|
196
|
+
count=node_dict.get("count", 1),
|
|
197
|
+
properties=properties,
|
|
198
|
+
examples=node_dict.get("examples", [{"gid": 0}]),
|
|
199
|
+
source=node_source,
|
|
200
|
+
)
|
|
201
|
+
nodes.append(node)
|
|
202
|
+
|
|
203
|
+
# Convert edges
|
|
204
|
+
edges = []
|
|
205
|
+
for edge_dict in schema_dict.get("edges", []):
|
|
206
|
+
# Convert properties
|
|
207
|
+
properties = []
|
|
208
|
+
for prop_dict in edge_dict.get("properties", []):
|
|
209
|
+
prop_source = None
|
|
210
|
+
if "source" in prop_dict:
|
|
211
|
+
prop_source = PropertySource(
|
|
212
|
+
field=prop_dict["source"]["field"],
|
|
213
|
+
transformation=prop_dict["source"].get("transformation"),
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
graph_prop = GraphProperty(
|
|
217
|
+
key=prop_dict["key"],
|
|
218
|
+
count=prop_dict.get("count", 1),
|
|
219
|
+
filling_factor=prop_dict.get("filling_factor", 100.0),
|
|
220
|
+
types=prop_dict.get("types", []),
|
|
221
|
+
source=prop_source,
|
|
222
|
+
)
|
|
223
|
+
properties.append(graph_prop)
|
|
224
|
+
|
|
225
|
+
# Convert source
|
|
226
|
+
edge_source = None
|
|
227
|
+
if "source" in edge_dict:
|
|
228
|
+
edge_source = RelationshipSource(
|
|
229
|
+
type=edge_dict["source"]["type"],
|
|
230
|
+
name=edge_dict["source"]["name"],
|
|
231
|
+
location=edge_dict["source"]["location"],
|
|
232
|
+
mapping=edge_dict["source"]["mapping"],
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
edge = GraphRelationship(
|
|
236
|
+
edge_type=edge_dict["edge_type"],
|
|
237
|
+
start_node_labels=edge_dict["start_node_labels"],
|
|
238
|
+
end_node_labels=edge_dict["end_node_labels"],
|
|
239
|
+
count=edge_dict.get("count", 1),
|
|
240
|
+
properties=properties,
|
|
241
|
+
examples=edge_dict.get("examples", [{}]),
|
|
242
|
+
source=edge_source,
|
|
243
|
+
)
|
|
244
|
+
edges.append(edge)
|
|
245
|
+
|
|
246
|
+
# Convert indexes
|
|
247
|
+
node_indexes = []
|
|
248
|
+
for index_dict in schema_dict.get("node_indexes", []):
|
|
249
|
+
index_source = None
|
|
250
|
+
if "source" in index_dict:
|
|
251
|
+
index_source = IndexSource(
|
|
252
|
+
origin=index_dict["source"]["origin"],
|
|
253
|
+
reason=index_dict["source"]["reason"],
|
|
254
|
+
created_by=index_dict["source"]["created_by"],
|
|
255
|
+
index_name=index_dict["source"].get("index_name"),
|
|
256
|
+
migrated_from=index_dict["source"].get("migrated_from"),
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
index = GraphIndex(
|
|
260
|
+
labels=index_dict.get("labels"),
|
|
261
|
+
properties=index_dict.get("properties", []),
|
|
262
|
+
count=index_dict.get("count", 0),
|
|
263
|
+
examples=index_dict.get("examples", [{}]),
|
|
264
|
+
type=index_dict.get("type", "label+property"),
|
|
265
|
+
source=index_source,
|
|
266
|
+
)
|
|
267
|
+
node_indexes.append(index)
|
|
268
|
+
|
|
269
|
+
edge_indexes = []
|
|
270
|
+
for index_dict in schema_dict.get("edge_indexes", []):
|
|
271
|
+
index_source = None
|
|
272
|
+
if "source" in index_dict:
|
|
273
|
+
index_source = IndexSource(
|
|
274
|
+
origin=index_dict["source"]["origin"],
|
|
275
|
+
reason=index_dict["source"]["reason"],
|
|
276
|
+
created_by=index_dict["source"]["created_by"],
|
|
277
|
+
index_name=index_dict["source"].get("index_name"),
|
|
278
|
+
migrated_from=index_dict["source"].get("migrated_from"),
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
index = GraphIndex(
|
|
282
|
+
edge_type=index_dict.get("edge_type"),
|
|
283
|
+
properties=index_dict.get("properties", []),
|
|
284
|
+
count=index_dict.get("count", 0),
|
|
285
|
+
examples=index_dict.get("examples", [{}]),
|
|
286
|
+
type=index_dict.get("type", "edge_type+property"),
|
|
287
|
+
source=index_source,
|
|
288
|
+
)
|
|
289
|
+
edge_indexes.append(index)
|
|
290
|
+
|
|
291
|
+
# Convert constraints
|
|
292
|
+
node_constraints = []
|
|
293
|
+
for constraint_dict in schema_dict.get("node_constraints", []):
|
|
294
|
+
constraint_source = None
|
|
295
|
+
if "source" in constraint_dict:
|
|
296
|
+
constraint_source = ConstraintSource(
|
|
297
|
+
origin=constraint_dict["source"]["origin"],
|
|
298
|
+
constraint_name=constraint_dict["source"].get("constraint_name"),
|
|
299
|
+
migrated_from=constraint_dict["source"].get("migrated_from"),
|
|
300
|
+
reason=constraint_dict["source"].get("reason"),
|
|
301
|
+
created_by=constraint_dict["source"].get("created_by"),
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
constraint = GraphConstraint(
|
|
305
|
+
type=constraint_dict["type"],
|
|
306
|
+
labels=constraint_dict.get("labels"),
|
|
307
|
+
properties=constraint_dict.get("properties", []),
|
|
308
|
+
data_type=constraint_dict.get("data_type"),
|
|
309
|
+
source=constraint_source,
|
|
310
|
+
)
|
|
311
|
+
node_constraints.append(constraint)
|
|
312
|
+
|
|
313
|
+
edge_constraints = []
|
|
314
|
+
for constraint_dict in schema_dict.get("edge_constraints", []):
|
|
315
|
+
constraint_source = None
|
|
316
|
+
if "source" in constraint_dict:
|
|
317
|
+
constraint_source = ConstraintSource(
|
|
318
|
+
origin=constraint_dict["source"]["origin"],
|
|
319
|
+
constraint_name=constraint_dict["source"].get("constraint_name"),
|
|
320
|
+
migrated_from=constraint_dict["source"].get("migrated_from"),
|
|
321
|
+
reason=constraint_dict["source"].get("reason"),
|
|
322
|
+
created_by=constraint_dict["source"].get("created_by"),
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
constraint = GraphConstraint(
|
|
326
|
+
type=constraint_dict["type"],
|
|
327
|
+
edge_type=constraint_dict.get("edge_type"),
|
|
328
|
+
properties=constraint_dict.get("properties", []),
|
|
329
|
+
data_type=constraint_dict.get("data_type"),
|
|
330
|
+
source=constraint_source,
|
|
331
|
+
)
|
|
332
|
+
edge_constraints.append(constraint)
|
|
333
|
+
|
|
334
|
+
# Convert enums
|
|
335
|
+
enums = []
|
|
336
|
+
for enum_dict in schema_dict.get("enums", []):
|
|
337
|
+
enum_source = None
|
|
338
|
+
if "source" in enum_dict:
|
|
339
|
+
enum_source = EnumSource(
|
|
340
|
+
origin=enum_dict["source"]["origin"],
|
|
341
|
+
enum_name=enum_dict["source"].get("enum_name"),
|
|
342
|
+
migrated_from=enum_dict["source"].get("migrated_from"),
|
|
343
|
+
created_by=enum_dict["source"].get("created_by"),
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
enum = GraphEnum(
|
|
347
|
+
name=enum_dict["name"], values=enum_dict["values"], source=enum_source
|
|
348
|
+
)
|
|
349
|
+
enums.append(enum)
|
|
350
|
+
|
|
351
|
+
return cls(
|
|
352
|
+
nodes=nodes,
|
|
353
|
+
edges=edges,
|
|
354
|
+
node_indexes=node_indexes,
|
|
355
|
+
edge_indexes=edge_indexes,
|
|
356
|
+
node_constraints=node_constraints,
|
|
357
|
+
edge_constraints=edge_constraints,
|
|
358
|
+
enums=enums,
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
def to_schema_format(
|
|
362
|
+
self, sample_data: Optional[Dict[str, List[Dict[str, Any]]]] = None
|
|
363
|
+
) -> Dict[str, Any]:
|
|
364
|
+
"""Convert to comprehensive schema format dictionary."""
|
|
365
|
+
schema_nodes = []
|
|
366
|
+
for node in self.nodes:
|
|
367
|
+
schema_node = self._node_to_schema_dict(node, sample_data)
|
|
368
|
+
schema_nodes.append(schema_node)
|
|
369
|
+
|
|
370
|
+
schema_edges = []
|
|
371
|
+
for edge in self.edges:
|
|
372
|
+
schema_edge = self._edge_to_schema_dict(edge, sample_data)
|
|
373
|
+
schema_edges.append(schema_edge)
|
|
374
|
+
|
|
375
|
+
schema_node_indexes = []
|
|
376
|
+
for index in self.node_indexes:
|
|
377
|
+
schema_index = self._index_to_schema_dict(index)
|
|
378
|
+
schema_node_indexes.append(schema_index)
|
|
379
|
+
|
|
380
|
+
schema_edge_indexes = []
|
|
381
|
+
for index in self.edge_indexes:
|
|
382
|
+
schema_index = self._index_to_schema_dict(index)
|
|
383
|
+
schema_edge_indexes.append(schema_index)
|
|
384
|
+
|
|
385
|
+
schema_node_constraints = []
|
|
386
|
+
for constraint in self.node_constraints:
|
|
387
|
+
schema_constraint = self._constraint_to_schema_dict(constraint)
|
|
388
|
+
schema_node_constraints.append(schema_constraint)
|
|
389
|
+
|
|
390
|
+
schema_edge_constraints = []
|
|
391
|
+
for constraint in self.edge_constraints:
|
|
392
|
+
schema_constraint = self._constraint_to_schema_dict(constraint)
|
|
393
|
+
schema_edge_constraints.append(schema_constraint)
|
|
394
|
+
|
|
395
|
+
schema_enums = []
|
|
396
|
+
for enum in self.enums:
|
|
397
|
+
schema_enum = self._enum_to_schema_dict(enum)
|
|
398
|
+
schema_enums.append(schema_enum)
|
|
399
|
+
|
|
400
|
+
return {
|
|
401
|
+
"nodes": schema_nodes,
|
|
402
|
+
"edges": schema_edges,
|
|
403
|
+
"node_indexes": schema_node_indexes,
|
|
404
|
+
"edge_indexes": schema_edge_indexes,
|
|
405
|
+
"node_constraints": schema_node_constraints,
|
|
406
|
+
"edge_constraints": schema_edge_constraints,
|
|
407
|
+
"enums": schema_enums,
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
def _node_to_schema_dict(
|
|
411
|
+
self, node: GraphNode, sample_data: Optional[Dict[str, List[Dict[str, Any]]]]
|
|
412
|
+
) -> Dict[str, Any]:
|
|
413
|
+
"""Convert GraphNode to schema dictionary format."""
|
|
414
|
+
# Convert properties to schema format
|
|
415
|
+
schema_properties = []
|
|
416
|
+
for prop in node.properties:
|
|
417
|
+
if isinstance(prop, GraphProperty):
|
|
418
|
+
prop_dict = {
|
|
419
|
+
"key": prop.key,
|
|
420
|
+
"count": prop.count,
|
|
421
|
+
"filling_factor": prop.filling_factor,
|
|
422
|
+
"types": prop.types,
|
|
423
|
+
}
|
|
424
|
+
if prop.source:
|
|
425
|
+
prop_dict["source"] = {
|
|
426
|
+
"field": prop.source.field,
|
|
427
|
+
"transformation": prop.source.transformation,
|
|
428
|
+
}
|
|
429
|
+
schema_properties.append(prop_dict)
|
|
430
|
+
else:
|
|
431
|
+
# Handle legacy string properties
|
|
432
|
+
prop_dict = self._convert_property_to_schema(prop, [])
|
|
433
|
+
schema_properties.append(prop_dict)
|
|
434
|
+
|
|
435
|
+
schema_node = {
|
|
436
|
+
"labels": node.labels,
|
|
437
|
+
"count": node.count,
|
|
438
|
+
"properties": schema_properties,
|
|
439
|
+
"examples": node.examples,
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
if node.source:
|
|
443
|
+
schema_node["source"] = {
|
|
444
|
+
"type": node.source.type,
|
|
445
|
+
"name": node.source.name,
|
|
446
|
+
"location": node.source.location,
|
|
447
|
+
"mapping": node.source.mapping,
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
return schema_node
|
|
451
|
+
|
|
452
|
+
def _edge_to_schema_dict(
|
|
453
|
+
self,
|
|
454
|
+
edge: GraphRelationship,
|
|
455
|
+
sample_data: Optional[Dict[str, List[Dict[str, Any]]]],
|
|
456
|
+
) -> Dict[str, Any]:
|
|
457
|
+
"""Convert GraphRelationship to schema dictionary format."""
|
|
458
|
+
# Convert properties to schema format
|
|
459
|
+
schema_properties = []
|
|
460
|
+
for prop in edge.properties:
|
|
461
|
+
if isinstance(prop, GraphProperty):
|
|
462
|
+
prop_dict = {
|
|
463
|
+
"key": prop.key,
|
|
464
|
+
"count": prop.count,
|
|
465
|
+
"filling_factor": prop.filling_factor,
|
|
466
|
+
"types": prop.types,
|
|
467
|
+
}
|
|
468
|
+
if prop.source:
|
|
469
|
+
prop_dict["source"] = {
|
|
470
|
+
"field": prop.source.field,
|
|
471
|
+
"transformation": prop.source.transformation,
|
|
472
|
+
}
|
|
473
|
+
schema_properties.append(prop_dict)
|
|
474
|
+
else:
|
|
475
|
+
# Handle legacy string properties
|
|
476
|
+
prop_dict = self._convert_property_to_schema(prop, [])
|
|
477
|
+
schema_properties.append(prop_dict)
|
|
478
|
+
|
|
479
|
+
schema_edge = {
|
|
480
|
+
"edge_type": edge.edge_type,
|
|
481
|
+
"start_node_labels": edge.start_node_labels,
|
|
482
|
+
"end_node_labels": edge.end_node_labels,
|
|
483
|
+
"count": edge.count,
|
|
484
|
+
"properties": schema_properties,
|
|
485
|
+
"examples": edge.examples,
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
if edge.source:
|
|
489
|
+
schema_edge["source"] = {
|
|
490
|
+
"type": edge.source.type,
|
|
491
|
+
"name": edge.source.name,
|
|
492
|
+
"location": edge.source.location,
|
|
493
|
+
"mapping": edge.source.mapping,
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
return schema_edge
|
|
497
|
+
|
|
498
|
+
def _index_to_schema_dict(self, index: GraphIndex) -> Dict[str, Any]:
|
|
499
|
+
"""Convert GraphIndex to schema dictionary format."""
|
|
500
|
+
schema_index = {
|
|
501
|
+
"properties": index.properties,
|
|
502
|
+
"count": index.count,
|
|
503
|
+
"examples": index.examples,
|
|
504
|
+
"type": index.type,
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
if index.labels:
|
|
508
|
+
schema_index["labels"] = index.labels
|
|
509
|
+
if index.edge_type:
|
|
510
|
+
schema_index["edge_type"] = index.edge_type
|
|
511
|
+
|
|
512
|
+
if index.source:
|
|
513
|
+
schema_index["source"] = {
|
|
514
|
+
"origin": index.source.origin,
|
|
515
|
+
"reason": index.source.reason,
|
|
516
|
+
"created_by": index.source.created_by,
|
|
517
|
+
}
|
|
518
|
+
if index.source.index_name:
|
|
519
|
+
schema_index["source"]["index_name"] = index.source.index_name
|
|
520
|
+
if index.source.migrated_from:
|
|
521
|
+
schema_index["source"]["migrated_from"] = index.source.migrated_from
|
|
522
|
+
|
|
523
|
+
return schema_index
|
|
524
|
+
|
|
525
|
+
def _constraint_to_schema_dict(self, constraint: GraphConstraint) -> Dict[str, Any]:
|
|
526
|
+
"""Convert GraphConstraint to schema dictionary format."""
|
|
527
|
+
schema_constraint = {
|
|
528
|
+
"type": constraint.type,
|
|
529
|
+
"properties": constraint.properties,
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
if constraint.labels:
|
|
533
|
+
schema_constraint["labels"] = constraint.labels
|
|
534
|
+
if constraint.edge_type:
|
|
535
|
+
schema_constraint["edge_type"] = constraint.edge_type
|
|
536
|
+
if constraint.data_type:
|
|
537
|
+
schema_constraint["data_type"] = constraint.data_type
|
|
538
|
+
|
|
539
|
+
if constraint.source:
|
|
540
|
+
source_dict = {
|
|
541
|
+
"origin": constraint.source.origin,
|
|
542
|
+
}
|
|
543
|
+
if constraint.source.constraint_name:
|
|
544
|
+
source_dict["constraint_name"] = constraint.source.constraint_name
|
|
545
|
+
if constraint.source.migrated_from:
|
|
546
|
+
source_dict["migrated_from"] = constraint.source.migrated_from
|
|
547
|
+
if constraint.source.reason:
|
|
548
|
+
source_dict["reason"] = constraint.source.reason
|
|
549
|
+
if constraint.source.created_by:
|
|
550
|
+
source_dict["created_by"] = constraint.source.created_by
|
|
551
|
+
schema_constraint["source"] = source_dict
|
|
552
|
+
|
|
553
|
+
return schema_constraint
|
|
554
|
+
|
|
555
|
+
def _enum_to_schema_dict(self, enum: GraphEnum) -> Dict[str, Any]:
|
|
556
|
+
"""Convert GraphEnum to schema dictionary format."""
|
|
557
|
+
schema_enum = {
|
|
558
|
+
"name": enum.name,
|
|
559
|
+
"values": enum.values,
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
if enum.source:
|
|
563
|
+
source_dict = {
|
|
564
|
+
"origin": enum.source.origin,
|
|
565
|
+
}
|
|
566
|
+
if enum.source.enum_name:
|
|
567
|
+
source_dict["enum_name"] = enum.source.enum_name
|
|
568
|
+
if enum.source.migrated_from:
|
|
569
|
+
source_dict["migrated_from"] = enum.source.migrated_from
|
|
570
|
+
if enum.source.created_by:
|
|
571
|
+
source_dict["created_by"] = enum.source.created_by
|
|
572
|
+
schema_enum["source"] = source_dict
|
|
573
|
+
|
|
574
|
+
return schema_enum
|
|
575
|
+
|
|
576
|
+
def _convert_property_to_schema(
|
|
577
|
+
self, prop_name: str, sample_rows: List[Dict[str, Any]]
|
|
578
|
+
) -> Dict[str, Any]:
|
|
579
|
+
"""Convert a property to schema format with type detection."""
|
|
580
|
+
prop_schema = {
|
|
581
|
+
"key": prop_name,
|
|
582
|
+
"count": 1,
|
|
583
|
+
"filling_factor": 100.00,
|
|
584
|
+
"types": [],
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
# Analyze sample data to determine types
|
|
588
|
+
if sample_rows:
|
|
589
|
+
type_counts = {}
|
|
590
|
+
examples_by_type = {}
|
|
591
|
+
|
|
592
|
+
for row in sample_rows:
|
|
593
|
+
value = row.get(prop_name)
|
|
594
|
+
detected_type = self._detect_value_type(value)
|
|
595
|
+
|
|
596
|
+
current_count = type_counts.get(detected_type, 0)
|
|
597
|
+
type_counts[detected_type] = current_count + 1
|
|
598
|
+
if detected_type not in examples_by_type:
|
|
599
|
+
examples_by_type[detected_type] = []
|
|
600
|
+
if len(examples_by_type[detected_type]) < 3:
|
|
601
|
+
examples_by_type[detected_type].append(value)
|
|
602
|
+
|
|
603
|
+
# Convert to schema types
|
|
604
|
+
for type_name, count in type_counts.items():
|
|
605
|
+
type_schema = {
|
|
606
|
+
"type": type_name,
|
|
607
|
+
"count": count,
|
|
608
|
+
"examples": examples_by_type[type_name],
|
|
609
|
+
}
|
|
610
|
+
prop_schema["types"].append(type_schema)
|
|
611
|
+
else:
|
|
612
|
+
prop_schema["types"] = [{"type": "String", "count": 1, "examples": [""]}]
|
|
613
|
+
|
|
614
|
+
return prop_schema
|
|
615
|
+
|
|
616
|
+
def _detect_value_type(self, value: Any) -> str:
|
|
617
|
+
"""Detect the type of a value and return the schema type name."""
|
|
618
|
+
if value is None:
|
|
619
|
+
return "Null"
|
|
620
|
+
elif isinstance(value, bool):
|
|
621
|
+
return "Boolean"
|
|
622
|
+
elif isinstance(value, int):
|
|
623
|
+
return "Integer"
|
|
624
|
+
elif isinstance(value, float):
|
|
625
|
+
return "Double"
|
|
626
|
+
elif isinstance(value, datetime.datetime):
|
|
627
|
+
return "LocalDateTime"
|
|
628
|
+
elif isinstance(value, datetime.date):
|
|
629
|
+
return "Date"
|
|
630
|
+
elif isinstance(value, datetime.time):
|
|
631
|
+
return "LocalTime"
|
|
632
|
+
elif isinstance(value, str):
|
|
633
|
+
if self._is_datetime_string(value):
|
|
634
|
+
return "LocalDateTime"
|
|
635
|
+
elif self._is_date_string(value):
|
|
636
|
+
return "Date"
|
|
637
|
+
elif self._is_time_string(value):
|
|
638
|
+
return "LocalTime"
|
|
639
|
+
else:
|
|
640
|
+
return "String"
|
|
641
|
+
else:
|
|
642
|
+
return "String"
|
|
643
|
+
|
|
644
|
+
def _is_date_string(self, value: str) -> bool:
|
|
645
|
+
"""Check if string looks like a date."""
|
|
646
|
+
date_patterns = [
|
|
647
|
+
r"^\d{4}-\d{2}-\d{2}$", # YYYY-MM-DD
|
|
648
|
+
r"^\d{2}/\d{2}/\d{4}$", # MM/DD/YYYY
|
|
649
|
+
r"^\d{2}-\d{2}-\d{4}$", # MM-DD-YYYY
|
|
650
|
+
]
|
|
651
|
+
return any(re.match(pattern, value) for pattern in date_patterns)
|
|
652
|
+
|
|
653
|
+
def _is_datetime_string(self, value: str) -> bool:
|
|
654
|
+
"""Check if string looks like a datetime."""
|
|
655
|
+
datetime_patterns = [
|
|
656
|
+
r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", # YYYY-MM-DD HH:MM:SS
|
|
657
|
+
r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}", # ISO format
|
|
658
|
+
]
|
|
659
|
+
return any(re.match(pattern, value) for pattern in datetime_patterns)
|
|
660
|
+
|
|
661
|
+
def _is_time_string(self, value: str) -> bool:
|
|
662
|
+
"""Check if string looks like a time."""
|
|
663
|
+
time_patterns = [
|
|
664
|
+
r"^\d{2}:\d{2}:\d{2}$", # HH:MM:SS
|
|
665
|
+
r"^\d{2}:\d{2}$", # HH:MM
|
|
666
|
+
]
|
|
667
|
+
return any(re.match(pattern, value) for pattern in time_patterns)
|