structurize 2.16.5__py3-none-any.whl → 2.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,502 @@
1
+ # coding: utf-8
2
+ """
3
+ Module to convert JSON Structure schema to GraphQL schema.
4
+ """
5
+
6
+ import json
7
+ import os
8
+ from typing import Dict, List, Optional, Set
9
+
10
+ from avrotize.common import get_longest_namespace_prefix
11
+
12
+ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | bool | int | None
13
+
14
+
15
+ class StructureToGraphQLConverter:
16
+ """
17
+ Class to convert JSON Structure schema to GraphQL schema.
18
+ """
19
+
20
+ def __init__(self, structure_schema_path, graphql_schema_path):
21
+ """
22
+ Initialize the converter with file paths.
23
+
24
+ :param structure_schema_path: Path to the JSON Structure schema file.
25
+ :param graphql_schema_path: Path to save the GraphQL schema file.
26
+ """
27
+ self.structure_schema_path = structure_schema_path
28
+ self.graphql_schema_path = graphql_schema_path
29
+ self.records = {} # qualified_name -> schema
30
+ self.record_names = {} # qualified_name -> simple_name
31
+ self.enums = {} # qualified_name -> schema
32
+ self.enum_names = {} # qualified_name -> simple_name
33
+ self.scalars = {}
34
+ self.schema_doc: JsonNode = None
35
+ self.definitions: Dict = {}
36
+ self.schema_registry: Dict[str, Dict] = {}
37
+ self.longest_namespace_prefix = ""
38
+ self.type_order = [] # Track order of type definitions for dependency ordering
39
+
40
+ def convert(self: 'StructureToGraphQLConverter'):
41
+ """
42
+ Convert JSON Structure schema to GraphQL schema and save to file.
43
+ """
44
+ with open(self.structure_schema_path, 'r', encoding='utf-8') as file:
45
+ structure_schemas: JsonNode = json.load(file)
46
+
47
+ # Normalize to list
48
+ if isinstance(structure_schemas, dict):
49
+ structure_schemas = [structure_schemas]
50
+
51
+ if not isinstance(structure_schemas, list):
52
+ raise ValueError("Expected a single JSON Structure schema as a JSON object, or a list of schemas")
53
+
54
+ self.schema_doc = structure_schemas
55
+
56
+ # Register all schemas with $id
57
+ for schema in structure_schemas:
58
+ if isinstance(schema, dict):
59
+ self.register_schema_ids(schema)
60
+
61
+ # Extract named types from all schemas
62
+ for schema in structure_schemas:
63
+ if isinstance(schema, dict):
64
+ # Process root type if it exists
65
+ if 'type' in schema:
66
+ self.extract_named_types_from_structure(schema, schema.get('namespace', ''))
67
+ elif '$root' in schema:
68
+ root_ref = schema['$root']
69
+ root_schema = self.resolve_ref(root_ref, schema)
70
+ if root_schema:
71
+ ref_path = root_ref.split('/')
72
+ ref_namespace = '.'.join(ref_path[2:-1]) if len(ref_path) > 3 else ''
73
+ self.extract_named_types_from_structure(root_schema, ref_namespace)
74
+
75
+ # Process definitions
76
+ if 'definitions' in schema:
77
+ self.definitions = schema['definitions']
78
+ self.process_definitions(self.definitions, '')
79
+
80
+ graphql_content = self.generate_graphql()
81
+
82
+ with open(self.graphql_schema_path, "w", encoding="utf-8") as file:
83
+ file.write(graphql_content)
84
+ if not graphql_content.endswith('\n'):
85
+ file.write('\n')
86
+
87
+ def register_schema_ids(self, schema: Dict, base_uri: str = '') -> None:
88
+ """Recursively registers schemas with $id keywords"""
89
+ if not isinstance(schema, dict):
90
+ return
91
+
92
+ if '$id' in schema:
93
+ schema_id = schema['$id']
94
+ if base_uri and not schema_id.startswith(('http://', 'https://', 'urn:')):
95
+ from urllib.parse import urljoin
96
+ schema_id = urljoin(base_uri, schema_id)
97
+ self.schema_registry[schema_id] = schema
98
+ base_uri = schema_id
99
+
100
+ if 'definitions' in schema:
101
+ for def_name, def_schema in schema['definitions'].items():
102
+ if isinstance(def_schema, dict):
103
+ self.register_schema_ids(def_schema, base_uri)
104
+
105
+ if 'properties' in schema:
106
+ for prop_name, prop_schema in schema['properties'].items():
107
+ if isinstance(prop_schema, dict):
108
+ self.register_schema_ids(prop_schema, base_uri)
109
+
110
+ for key in ['items', 'values', 'additionalProperties']:
111
+ if key in schema and isinstance(schema[key], dict):
112
+ self.register_schema_ids(schema[key], base_uri)
113
+
114
+ def resolve_ref(self, ref: str, context_schema: Optional[Dict] = None) -> Optional[Dict]:
115
+ """Resolves a $ref to the actual schema definition"""
116
+ if not ref.startswith('#/'):
117
+ if ref in self.schema_registry:
118
+ return self.schema_registry[ref]
119
+ return None
120
+
121
+ path = ref[2:].split('/')
122
+ schema = context_schema if context_schema else self.schema_doc
123
+ for part in path:
124
+ if not isinstance(schema, dict) or part not in schema:
125
+ return None
126
+ schema = schema[part]
127
+ return schema
128
+
129
+ def process_definitions(self, definitions: Dict, namespace_path: str) -> None:
130
+ """Processes the definitions section recursively"""
131
+ for name, definition in definitions.items():
132
+ if isinstance(definition, dict):
133
+ if 'type' in definition or 'enum' in definition:
134
+ # This is a type definition
135
+ current_namespace = self.concat_namespace(namespace_path, '')
136
+ self.extract_named_types_from_structure(definition, current_namespace, explicit_name=name)
137
+ else:
138
+ # This is a namespace
139
+ new_namespace = self.concat_namespace(namespace_path, name)
140
+ self.process_definitions(definition, new_namespace)
141
+
142
+ def concat_namespace(self, namespace: str, name: str) -> str:
143
+ """Concatenates namespace and name with a dot separator"""
144
+ if namespace and name:
145
+ return f"{namespace}.{name}"
146
+ elif namespace:
147
+ return namespace
148
+ else:
149
+ return name
150
+
151
+ def qualified_name(self, schema: Dict[str, JsonNode], parent_namespace: str = '') -> str:
152
+ """
153
+ Get the full name of a record type.
154
+ """
155
+ name = str(schema.get('name', 'UnnamedType'))
156
+ namespace = str(schema.get('namespace', parent_namespace))
157
+ if namespace:
158
+ return f"{namespace}_{name}".replace('.', '_')
159
+ return name
160
+
161
+ def extract_named_types_from_structure(self, schema: Dict, parent_namespace: str, explicit_name: str = ''):
162
+ """
163
+ Extract all named types (objects, enums) from a JSON Structure schema.
164
+ """
165
+ if not isinstance(schema, dict):
166
+ return
167
+
168
+ def extract_named_types_from_structure(self, schema: Dict, parent_namespace: str, explicit_name: str = ''):
169
+ """
170
+ Extract all named types (objects, enums) from a JSON Structure schema.
171
+ """
172
+ if not isinstance(schema, dict):
173
+ return
174
+
175
+ # Handle $ref FIRST before anything else
176
+ if '$ref' in schema:
177
+ # Use self.schema_doc as context for resolving refs
178
+ context = self.schema_doc[0] if isinstance(self.schema_doc, list) and len(self.schema_doc) > 0 else self.schema_doc
179
+ ref_schema = self.resolve_ref(schema['$ref'], context)
180
+ if ref_schema:
181
+ # Extract type name from $ref path for explicit naming
182
+ ref_path = schema['$ref'].split('/')
183
+ ref_name = ref_path[-1]
184
+ self.extract_named_types_from_structure(ref_schema, parent_namespace, explicit_name=ref_name)
185
+ return
186
+
187
+ # Use explicit name if provided, otherwise get from schema
188
+ name = explicit_name if explicit_name else schema.get('name', '')
189
+
190
+ # Handle enum keyword
191
+ if 'enum' in schema:
192
+ qualified = self.qualified_name({**schema, 'name': name, 'namespace': parent_namespace}, parent_namespace)
193
+ if qualified not in self.enums:
194
+ self.enums[qualified] = schema
195
+ self.enum_names[qualified] = name
196
+ self.type_order.append(('enum', qualified))
197
+ return
198
+
199
+ # Handle type keyword
200
+ struct_type = schema.get('type')
201
+
202
+ if struct_type == 'object':
203
+ # Process nested properties FIRST to ensure dependencies come before this type
204
+ if 'properties' in schema and isinstance(schema['properties'], dict):
205
+ for prop_name, prop_schema in schema['properties'].items():
206
+ if isinstance(prop_schema, dict):
207
+ self.extract_named_types_from_structure(prop_schema, parent_namespace)
208
+
209
+ # NOW add this type after all dependencies have been processed
210
+ if name:
211
+ qualified = self.qualified_name({**schema, 'name': name, 'namespace': parent_namespace}, parent_namespace)
212
+ if qualified not in self.records:
213
+ self.records[qualified] = schema
214
+ self.record_names[qualified] = name
215
+ self.type_order.append(('record', qualified))
216
+
217
+ elif struct_type == 'array' and 'items' in schema:
218
+ if isinstance(schema['items'], dict):
219
+ self.extract_named_types_from_structure(schema['items'], parent_namespace)
220
+
221
+ elif struct_type == 'set' and 'items' in schema:
222
+ if isinstance(schema['items'], dict):
223
+ self.extract_named_types_from_structure(schema['items'], parent_namespace)
224
+
225
+ elif struct_type == 'map' and 'values' in schema:
226
+ if isinstance(schema['values'], dict):
227
+ self.extract_named_types_from_structure(schema['values'], parent_namespace)
228
+
229
+ elif struct_type == 'choice':
230
+ # Process choice types
231
+ choices = schema.get('choices', {})
232
+ for choice_name, choice_schema in choices.items():
233
+ if isinstance(choice_schema, dict):
234
+ if '$ref' in choice_schema:
235
+ ref_schema = self.resolve_ref(choice_schema['$ref'], schema)
236
+ if ref_schema:
237
+ self.extract_named_types_from_structure(ref_schema, parent_namespace)
238
+ else:
239
+ self.extract_named_types_from_structure(choice_schema, parent_namespace)
240
+
241
+ def generate_graphql(self):
242
+ """
243
+ Generate GraphQL content from the extracted types.
244
+
245
+ :return: GraphQL content as a string.
246
+ """
247
+ graphql = []
248
+
249
+ # Generate scalars for custom types
250
+ custom_scalars = set()
251
+
252
+ # Add commonly used custom scalars
253
+ if any('date' in str(record).lower() or 'datetime' in str(record).lower() or 'timestamp' in str(record).lower()
254
+ for record in self.records.values()):
255
+ custom_scalars.add('scalar Date')
256
+ custom_scalars.add('scalar DateTime')
257
+
258
+ if any('uuid' in str(record).lower() for record in self.records.values()):
259
+ custom_scalars.add('scalar UUID')
260
+
261
+ if any('uri' in str(record).lower() or 'url' in str(record).lower() for record in self.records.values()):
262
+ custom_scalars.add('scalar URI')
263
+
264
+ if any('decimal' in str(record).lower() for record in self.records.values()):
265
+ custom_scalars.add('scalar Decimal')
266
+
267
+ if any('binary' in str(record).lower() or 'bytes' in str(record).lower() for record in self.records.values()):
268
+ custom_scalars.add('scalar Binary')
269
+
270
+ # Add JSON scalar for map types
271
+ custom_scalars.add('scalar JSON')
272
+
273
+ for scalar in sorted(custom_scalars):
274
+ graphql.append(scalar)
275
+
276
+ if custom_scalars:
277
+ graphql.append('') # Empty line after scalars
278
+
279
+ # Generate types in dependency order
280
+ for type_kind, qualified_name in self.type_order:
281
+ if type_kind == 'enum' and qualified_name in self.enums:
282
+ graphql.append(self.generate_graphql_enum(self.enums[qualified_name]))
283
+ elif type_kind == 'record' and qualified_name in self.records:
284
+ graphql.append(self.generate_graphql_record(self.records[qualified_name]))
285
+
286
+ return "\n".join(graphql)
287
+
288
+ def generate_graphql_record(self, record):
289
+ """
290
+ Generate GraphQL content for a record.
291
+
292
+ :param record: Record schema as a dictionary.
293
+ :return: GraphQL content as a string.
294
+ """
295
+ name = record.get('name', 'UnnamedType')
296
+ doc = record.get('description', record.get('doc', ''))
297
+
298
+ # Add description as comment if present
299
+ output = []
300
+ if doc:
301
+ output.append(f'"""\n{doc}\n"""')
302
+
303
+ fields = []
304
+ properties = record.get('properties', {})
305
+ required_props = record.get('required', [])
306
+
307
+ for prop_name, prop_schema in properties.items():
308
+ field_type = self.get_graphql_type(prop_schema)
309
+
310
+ # Add ! for required fields
311
+ is_required = prop_name in required_props if isinstance(required_props, list) else False
312
+ if is_required and not field_type.endswith('!'):
313
+ field_type = f"{field_type}!"
314
+
315
+ # Add field description as comment if present
316
+ field_doc = prop_schema.get('description', prop_schema.get('doc', ''))
317
+ if field_doc:
318
+ fields.append(f' """{field_doc}"""')
319
+ fields.append(f" {prop_name}: {field_type}")
320
+
321
+ output.append(f"type {name} {{")
322
+ if fields:
323
+ output.extend(fields)
324
+ else:
325
+ output.append(" _empty: String") # GraphQL doesn't allow empty types
326
+ output.append("}")
327
+
328
+ return "\n".join(output)
329
+
330
+ def generate_graphql_enum(self, enum):
331
+ """
332
+ Generate GraphQL content for an enum.
333
+
334
+ :param enum: Enum schema as a dictionary.
335
+ :return: GraphQL content as a string.
336
+ """
337
+ name = enum.get('name', 'UnnamedEnum')
338
+ doc = enum.get('description', enum.get('doc', ''))
339
+ symbols = enum.get('enum', [])
340
+
341
+ output = []
342
+ if doc:
343
+ output.append(f'"""\n{doc}\n"""')
344
+
345
+ # GraphQL enum members must be valid identifiers
346
+ enum_members = []
347
+ for symbol in symbols:
348
+ # Convert to valid GraphQL identifier
349
+ member = str(symbol).replace('-', '_').replace(' ', '_').replace('.', '_')
350
+ # Ensure it starts with a letter or underscore
351
+ if member and not member[0].isalpha() and member[0] != '_':
352
+ member = f"_{member}"
353
+ enum_members.append(f" {member}")
354
+
355
+ output.append(f"enum {name} {{")
356
+ if enum_members:
357
+ output.extend(enum_members)
358
+ else:
359
+ output.append(" _EMPTY")
360
+ output.append("}")
361
+
362
+ return "\n".join(output)
363
+
364
+ def get_graphql_type(self, structure_type):
365
+ """
366
+ Get GraphQL type from JSON Structure type.
367
+
368
+ :param structure_type: JSON Structure type as a string, dict, or list.
369
+ :return: GraphQL type as a string.
370
+ """
371
+ if isinstance(structure_type, list):
372
+ # Handle type unions (e.g., ["null", "string"])
373
+ non_null_types = [t for t in structure_type if t != "null"]
374
+ if non_null_types:
375
+ return self.get_graphql_type(non_null_types[0])
376
+ return "String"
377
+
378
+ if isinstance(structure_type, dict):
379
+ # Handle $ref
380
+ if '$ref' in structure_type:
381
+ # Use self.schema_doc as context for resolving refs
382
+ context = self.schema_doc[0] if isinstance(self.schema_doc, list) and len(self.schema_doc) > 0 else self.schema_doc
383
+ ref_schema = self.resolve_ref(structure_type['$ref'], context)
384
+ if ref_schema:
385
+ # First try to get the name from the schema
386
+ ref_name = ref_schema.get('name')
387
+ if ref_name:
388
+ return ref_name
389
+ # Try to extract name from $ref path
390
+ ref_path = structure_type['$ref'].split('/')
391
+ # The last element is the type name
392
+ type_name = ref_path[-1]
393
+ return type_name
394
+ return "JSON"
395
+
396
+ # Handle enum
397
+ if 'enum' in structure_type:
398
+ # Return inline enum as String (GraphQL doesn't support inline enums)
399
+ return "String"
400
+
401
+ # Handle type keyword
402
+ struct_type = structure_type.get('type')
403
+
404
+ # Handle type unions (e.g., ["string", "null"])
405
+ if isinstance(struct_type, list):
406
+ return self.get_graphql_type(struct_type)
407
+
408
+ if struct_type == 'array':
409
+ items_type = self.get_graphql_type(structure_type.get('items', {'type': 'any'}))
410
+ return f"[{items_type}]"
411
+
412
+ if struct_type == 'set':
413
+ items_type = self.get_graphql_type(structure_type.get('items', {'type': 'any'}))
414
+ return f"[{items_type}]" # GraphQL doesn't have Set, use array
415
+
416
+ if struct_type == 'map':
417
+ return "JSON" # GraphQL doesn't have Map, use custom scalar
418
+
419
+ if struct_type == 'object':
420
+ # Inline object - return JSON scalar
421
+ return "JSON"
422
+
423
+ if struct_type == 'choice':
424
+ # Union types in GraphQL are more complex, return JSON for now
425
+ return "JSON"
426
+
427
+ if struct_type == 'tuple':
428
+ # Tuples are represented as arrays in GraphQL
429
+ return "JSON"
430
+
431
+ if struct_type:
432
+ return self.get_graphql_primitive_type(struct_type)
433
+
434
+ if isinstance(structure_type, str):
435
+ return self.get_graphql_primitive_type(structure_type)
436
+
437
+ return "JSON"
438
+
439
+ def get_graphql_primitive_type(self, structure_type):
440
+ """
441
+ Map JSON Structure primitive types to GraphQL types.
442
+
443
+ :param structure_type: JSON Structure type as a string.
444
+ :return: GraphQL type as a string.
445
+ """
446
+ type_mapping = {
447
+ # JSON primitive types
448
+ "string": "String",
449
+ "number": "Float",
450
+ "integer": "Int",
451
+ "boolean": "Boolean",
452
+ "null": "String",
453
+
454
+ # Extended integer types
455
+ "int8": "Int",
456
+ "uint8": "Int",
457
+ "int16": "Int",
458
+ "uint16": "Int",
459
+ "int32": "Int",
460
+ "uint32": "Int",
461
+ "int64": "Int",
462
+ "uint64": "Int",
463
+ "int128": "String", # GraphQL Int is 32-bit, use String for larger
464
+ "uint128": "String",
465
+
466
+ # Floating point types
467
+ "float8": "Float",
468
+ "float": "Float",
469
+ "double": "Float",
470
+ "binary32": "Float",
471
+ "binary64": "Float",
472
+ "decimal": "Decimal",
473
+
474
+ # Binary types
475
+ "binary": "Binary",
476
+
477
+ # Date/time types
478
+ "date": "Date",
479
+ "time": "String",
480
+ "datetime": "DateTime",
481
+ "timestamp": "DateTime",
482
+ "duration": "String",
483
+
484
+ # Other special types
485
+ "uuid": "UUID",
486
+ "uri": "URI",
487
+ "jsonpointer": "String",
488
+ "any": "JSON"
489
+ }
490
+
491
+ return type_mapping.get(structure_type, 'String')
492
+
493
+
494
+ def convert_structure_to_graphql(structure_schema_path, graphql_schema_path):
495
+ """
496
+ Convert a JSON Structure schema file to a GraphQL schema file.
497
+
498
+ :param structure_schema_path: Path to the JSON Structure schema file.
499
+ :param graphql_schema_path: Path to save the GraphQL schema file.
500
+ """
501
+ converter = StructureToGraphQLConverter(structure_schema_path, graphql_schema_path)
502
+ converter.convert()