structurize 2.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. avrotize/__init__.py +63 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp.py +483 -0
  7. avrotize/avrotocsharp.py +992 -0
  8. avrotize/avrotocsv.py +121 -0
  9. avrotize/avrotodatapackage.py +173 -0
  10. avrotize/avrotodb.py +1383 -0
  11. avrotize/avrotogo.py +476 -0
  12. avrotize/avrotographql.py +197 -0
  13. avrotize/avrotoiceberg.py +210 -0
  14. avrotize/avrotojava.py +1023 -0
  15. avrotize/avrotojs.py +250 -0
  16. avrotize/avrotojsons.py +481 -0
  17. avrotize/avrotojstruct.py +345 -0
  18. avrotize/avrotokusto.py +364 -0
  19. avrotize/avrotomd.py +137 -0
  20. avrotize/avrotools.py +168 -0
  21. avrotize/avrotoparquet.py +208 -0
  22. avrotize/avrotoproto.py +359 -0
  23. avrotize/avrotopython.py +622 -0
  24. avrotize/avrotorust.py +435 -0
  25. avrotize/avrotots.py +598 -0
  26. avrotize/avrotoxsd.py +344 -0
  27. avrotize/commands.json +2433 -0
  28. avrotize/common.py +829 -0
  29. avrotize/constants.py +5 -0
  30. avrotize/csvtoavro.py +132 -0
  31. avrotize/datapackagetoavro.py +76 -0
  32. avrotize/dependency_resolver.py +348 -0
  33. avrotize/jsonstoavro.py +1698 -0
  34. avrotize/jsonstostructure.py +2642 -0
  35. avrotize/jstructtoavro.py +878 -0
  36. avrotize/kstructtoavro.py +93 -0
  37. avrotize/kustotoavro.py +455 -0
  38. avrotize/parquettoavro.py +157 -0
  39. avrotize/proto2parser.py +498 -0
  40. avrotize/proto3parser.py +403 -0
  41. avrotize/prototoavro.py +382 -0
  42. avrotize/structuretocsharp.py +2005 -0
  43. avrotize/structuretojsons.py +498 -0
  44. avrotize/structuretopython.py +772 -0
  45. avrotize/xsdtoavro.py +413 -0
  46. structurize-2.16.2.dist-info/METADATA +805 -0
  47. structurize-2.16.2.dist-info/RECORD +51 -0
  48. structurize-2.16.2.dist-info/WHEEL +5 -0
  49. structurize-2.16.2.dist-info/entry_points.txt +2 -0
  50. structurize-2.16.2.dist-info/licenses/LICENSE +201 -0
  51. structurize-2.16.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,498 @@
1
+ """ JSON Structure to JSON Schema converter. """
2
+
3
+ # pylint: disable=too-many-lines, line-too-long, too-many-branches, too-many-statements, too-many-locals, too-many-nested-blocks, too-many-arguments, too-many-instance-attributes, too-many-public-methods, too-many-boolean-expressions
4
+
5
+ import json
6
+ import os
7
+ from typing import Any, Dict, List, Union, Optional
8
+
9
+
10
+ class StructureToJsonConverter:
11
+ """
12
+ Converts JSON Structure documents to JSON Schema format.
13
+
14
+ JSON Structure is more constrained than JSON Schema, making this conversion
15
+ straightforward with well-defined mappings.
16
+ """
17
+
18
+ def __init__(self) -> None:
19
+ """Initialize the converter."""
20
+ self.definitions: Dict[str, Any] = {}
21
+ self.base_uri = ""
22
+ self.structure_document: Optional[Dict[str, Any]] = None
23
+
24
+ def convert_type(self, structure_type: str) -> Dict[str, Any]:
25
+ """
26
+ Convert a JSON Structure type to JSON Schema type.
27
+
28
+ Args:
29
+ structure_type (str): The JSON Structure type
30
+
31
+ Returns:
32
+ Dict[str, Any]: JSON Schema type definition
33
+ """
34
+ # Basic type mappings from JSON Structure to JSON Schema
35
+ # Note: JSON Structure types have specific serialization rules:
36
+ # - int64, uint64, decimal are serialized as strings in JSON due to precision/range limits
37
+ # - binary32/binary64 are IEEE 754 binary formats
38
+ # - timestamp is Unix epoch time as number
39
+ type_mappings = {
40
+ 'null': {'type': 'null'},
41
+ 'string': {'type': 'string'},
42
+ 'boolean': {'type': 'boolean'},
43
+ 'bytes': {'type': 'string', 'format': 'byte'},
44
+ 'int8': {'type': 'integer', 'minimum': -128, 'maximum': 127},
45
+ 'int16': {'type': 'integer', 'minimum': -32768, 'maximum': 32767},
46
+ 'int32': {'type': 'integer', 'minimum': -2147483648, 'maximum': 2147483647},
47
+ 'int64': {'type': 'string', 'pattern': '^-?[0-9]+$'}, # Serialized as string
48
+ 'uint8': {'type': 'integer', 'minimum': 0, 'maximum': 255},
49
+ 'uint16': {'type': 'integer', 'minimum': 0, 'maximum': 65535}, 'uint32': {'type': 'integer', 'minimum': 0, 'maximum': 4294967295},
50
+ 'uint64': {'type': 'string', 'pattern': '^[0-9]+$'}, # Serialized as string
51
+ 'int128': {'type': 'string', 'pattern': '^-?[0-9]+$'}, # Serialized as string
52
+ 'uint128': {'type': 'string', 'pattern': '^[0-9]+$'}, # Serialized as string
53
+ 'float8': {'type': 'number'}, # 8-bit float
54
+ 'float': {'type': 'number', 'format': 'float'},
55
+ 'double': {'type': 'number', 'format': 'double'},
56
+ 'float32': {'type': 'number', 'format': 'float'}, # 32-bit float
57
+ 'float64': {'type': 'number', 'format': 'double'}, # 64-bit float
58
+ 'binary32': {'type': 'number', 'format': 'float'}, # IEEE 754 binary32 (alias)
59
+ 'binary64': {'type': 'number', 'format': 'double'}, # IEEE 754 binary64 (alias)
60
+ 'decimal': {'type': 'string', 'pattern': '^-?[0-9]+(\\.[0-9]+)?$'}, # Serialized as string
61
+ 'binary': {'type': 'string', 'contentEncoding': 'base64'}, # Binary data
62
+ 'date': {'type': 'string', 'format': 'date'},
63
+ 'time': {'type': 'string', 'format': 'time'},
64
+ 'datetime': {'type': 'string', 'format': 'date-time'},
65
+ 'timestamp': {'type': 'number'}, # Unix epoch time
66
+ 'duration': {'type': 'string', 'format': 'duration'},
67
+ 'uuid': {'type': 'string', 'format': 'uuid'},
68
+ 'uri': {'type': 'string', 'format': 'uri'},
69
+ 'jsonpointer': {'type': 'string', 'format': 'json-pointer'},
70
+ }
71
+
72
+ return type_mappings.get(structure_type, {'type': 'string'})
73
+
74
+ def convert_structure_schema(self, structure: Dict[str, Any]) -> Dict[str, Any]:
75
+ """
76
+ Convert a JSON Structure schema to JSON Schema.
77
+
78
+ Args:
79
+ structure (Dict[str, Any]): The JSON Structure schema
80
+
81
+ Returns:
82
+ Dict[str, Any]: The converted JSON Schema
83
+ """
84
+ # Store the structure document for reference resolution
85
+ self.structure_document = structure
86
+
87
+ schema: Dict[str, Any] = {
88
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
89
+ }
90
+ # Preserve $id if present
91
+ if '$id' in structure:
92
+ schema['$id'] = structure['$id']
93
+
94
+ # Preserve $uses as extension if present
95
+ if '$uses' in structure:
96
+ schema['x-uses'] = structure['$uses']
97
+
98
+ # Convert title and description
99
+ if 'name' in structure:
100
+ schema['title'] = structure['name']
101
+ if 'description' in structure:
102
+ schema['description'] = structure['description']
103
+
104
+ # Convert the main type
105
+ if 'type' in structure:
106
+ schema_type = self._convert_type_definition(structure)
107
+ schema.update(schema_type)
108
+ # Add definitions if we collected any
109
+ if self.definitions:
110
+ schema['$defs'] = self.definitions
111
+
112
+ return schema
113
+
114
+ def _convert_type_definition(self, type_def: Dict[str, Any]) -> Dict[str, Any]:
115
+ """
116
+ Convert a JSON Structure type definition to JSON Schema.
117
+
118
+ Args:
119
+ type_def (Dict[str, Any]): The type definition
120
+
121
+ Returns:
122
+ Dict[str, Any]: JSON Schema type definition
123
+ """
124
+ if not isinstance(type_def, dict) or 'type' not in type_def:
125
+ return {'type': 'string'}
126
+
127
+ structure_type = type_def['type']
128
+
129
+ # Handle type unions (arrays of types)
130
+ if isinstance(structure_type, list):
131
+ return self._convert_type_union(structure_type, type_def)
132
+
133
+ if structure_type == 'object':
134
+ return self._convert_object_type(type_def)
135
+ elif structure_type == 'array':
136
+ return self._convert_array_type(type_def)
137
+ elif structure_type == 'set':
138
+ # Sets in JSON Structure become arrays with uniqueItems in JSON Schema
139
+ return self._convert_set_type(type_def)
140
+ elif structure_type == 'map':
141
+ return self._convert_map_type(type_def)
142
+ elif structure_type == 'choice':
143
+ return self._convert_choice_type(type_def)
144
+ else:
145
+ # Primitive type
146
+ result = self.convert_type(structure_type)
147
+
148
+ # Add validation constraints
149
+ self._add_validation_constraints(result, type_def)
150
+
151
+ return result
152
+
153
+ def _convert_object_type(self, type_def: Dict[str, Any]) -> Dict[str, Any]:
154
+ """Convert a JSON Structure object type to JSON Schema."""
155
+ result = {'type': 'object'}
156
+
157
+ if 'properties' in type_def:
158
+ result['properties'] = {}
159
+ for prop_name, prop_def in type_def['properties'].items():
160
+ result['properties'][prop_name] = self._convert_type_definition(prop_def)
161
+
162
+ if 'required' in type_def:
163
+ result['required'] = type_def['required']
164
+
165
+ if 'additionalProperties' in type_def:
166
+ if isinstance(type_def['additionalProperties'], bool):
167
+ result['additionalProperties'] = type_def['additionalProperties']
168
+ else:
169
+ result['additionalProperties'] = self._convert_type_definition(type_def['additionalProperties'])
170
+
171
+ return result
172
+
173
+ def _convert_array_type(self, type_def: Dict[str, Any]) -> Dict[str, Any]:
174
+ """Convert a JSON Structure array type to JSON Schema."""
175
+ result = {'type': 'array'}
176
+
177
+ if 'items' in type_def:
178
+ result['items'] = self._convert_type_definition(type_def['items'])
179
+
180
+ # Add validation constraints
181
+ if 'minItems' in type_def:
182
+ result['minItems'] = type_def['minItems']
183
+ if 'maxItems' in type_def:
184
+ result['maxItems'] = type_def['maxItems']
185
+
186
+ return result
187
+
188
+ def _convert_set_type(self, type_def: Dict[str, Any]) -> Dict[str, Any]:
189
+ """Convert a JSON Structure set type to JSON Schema array with uniqueItems."""
190
+ result = {'type': 'array', 'uniqueItems': True}
191
+
192
+ if 'items' in type_def:
193
+ result['items'] = self._convert_type_definition(type_def['items'])
194
+
195
+ # Add validation constraints
196
+ if 'minItems' in type_def:
197
+ result['minItems'] = type_def['minItems']
198
+ if 'maxItems' in type_def:
199
+ result['maxItems'] = type_def['maxItems']
200
+
201
+ return result
202
+
203
+ def _convert_map_type(self, type_def: Dict[str, Any]) -> Dict[str, Any]:
204
+ """Convert a JSON Structure map type to JSON Schema object with additionalProperties."""
205
+ result = {'type': 'object'}
206
+
207
+ if 'values' in type_def:
208
+ result['additionalProperties'] = self._convert_type_definition(type_def['values'])
209
+ else:
210
+ result['additionalProperties'] = True
211
+
212
+ return result
213
+
214
+ def _convert_choice_type(self, type_def: Dict[str, Any]) -> Dict[str, Any]:
215
+ """Convert a JSON Structure choice type to JSON Schema oneOf."""
216
+ result = {}
217
+
218
+ if 'choices' in type_def:
219
+ choices = type_def['choices']
220
+ if isinstance(choices, dict):
221
+ # Tagged union: choices is a dict mapping names to types
222
+ result['oneOf'] = []
223
+ for choice_name, choice_def in choices.items():
224
+ converted_choice = None
225
+ resolved_choice = None
226
+
227
+ if isinstance(choice_def, dict):
228
+ # Handle $ref resolution
229
+ if '$ref' in choice_def:
230
+ ref_type = self._resolve_reference(choice_def['$ref'])
231
+ if ref_type:
232
+ resolved_choice = self._resolve_type_with_inheritance(ref_type)
233
+ converted_choice = self._convert_type_definition(resolved_choice)
234
+ else:
235
+ # Fallback if reference can't be resolved
236
+ converted_choice = {'type': 'string'}
237
+ else:
238
+ resolved_choice = choice_def
239
+ converted_choice = self._convert_type_definition(choice_def)
240
+ elif isinstance(choice_def, str):
241
+ # Simple type name
242
+ converted_choice = self.convert_type(choice_def)
243
+ else:
244
+ # Fallback to string type
245
+ converted_choice = {'type': 'string'}
246
+
247
+ # For tagged unions, wrap in an object with the choice name
248
+ if not type_def.get('selector'): # Tagged union
249
+ tagged_choice = {
250
+ 'type': 'object',
251
+ 'properties': {
252
+ choice_name: converted_choice
253
+ },
254
+ 'required': [choice_name],
255
+ 'additionalProperties': False
256
+ }
257
+ result['oneOf'].append(tagged_choice)
258
+ else: # Inline union with selector - resolve the base type and merge with choices
259
+ if '$extends' in type_def:
260
+ # Resolve the base type that provides common properties
261
+ base_type = self._resolve_reference(type_def['$extends'])
262
+ if base_type:
263
+ resolved_base = self._resolve_type_with_inheritance(base_type)
264
+ # Merge base properties with the choice-specific properties
265
+ if resolved_choice and 'properties' in resolved_base:
266
+ choice_properties = {}
267
+ base_props = resolved_base.get('properties', {})
268
+ choice_props = resolved_choice.get('properties', {})
269
+
270
+ # Merge properties from base and choice
271
+ if isinstance(base_props, dict):
272
+ choice_properties.update(base_props)
273
+ if isinstance(choice_props, dict):
274
+ choice_properties.update(choice_props)
275
+ # Convert each property definition from JSON Structure to JSON Schema
276
+ converted_properties = {}
277
+ for prop_name, prop_def in choice_properties.items():
278
+ converted_properties[prop_name] = self._convert_type_definition(prop_def)
279
+
280
+ # Add selector constraint if this is an inline choice with selector
281
+ if 'selector' in type_def:
282
+ selector_field = type_def['selector']
283
+ if selector_field in converted_properties:
284
+ # Constrain the selector field to the specific choice name
285
+ converted_properties[selector_field] = {'const': choice_name}
286
+
287
+ merged_choice = {
288
+ 'type': 'object',
289
+ 'properties': converted_properties
290
+ }
291
+
292
+ # Add required fields from both base and choice
293
+ required_fields = set()
294
+ if 'required' in resolved_base and isinstance(resolved_base['required'], list):
295
+ required_fields.update(resolved_base['required'])
296
+ if 'required' in resolved_choice and isinstance(resolved_choice['required'], list):
297
+ required_fields.update(resolved_choice['required'])
298
+ if required_fields:
299
+ merged_choice['required'] = sorted(list(required_fields))
300
+
301
+ result['oneOf'].append(merged_choice)
302
+ else:
303
+ result['oneOf'].append(converted_choice)
304
+ else:
305
+ result['oneOf'].append(converted_choice)
306
+ else:
307
+ # No inheritance, just use the converted choice
308
+ result['oneOf'].append(converted_choice)
309
+ else:
310
+ # Legacy format or error
311
+ result['oneOf'] = []
312
+ for choice in choices:
313
+ result['oneOf'].append(self._convert_type_definition(choice))
314
+
315
+ return result
316
+
317
+ def _convert_type_union(self, type_list: List[str], type_def: Dict[str, Any]) -> Dict[str, Any]:
318
+ """Convert a JSON Structure type union to JSON Schema."""
319
+ if len(type_list) == 1:
320
+ # Single type, convert directly
321
+ single_type_def = {'type': type_list[0]}
322
+ single_type_def.update({k: v for k, v in type_def.items() if k != 'type'})
323
+ return self._convert_type_definition(single_type_def)
324
+
325
+ # Multiple types - create anyOf
326
+ result = {
327
+ 'anyOf': []
328
+ }
329
+
330
+ for type_name in type_list:
331
+ single_type_def = {'type': type_name}
332
+ # Don't inherit other properties for union members
333
+ converted = self.convert_type(type_name)
334
+ result['anyOf'].append(converted)
335
+
336
+ # Add any validation constraints to the union itself
337
+ self._add_validation_constraints(result, type_def)
338
+
339
+ return result
340
+
341
+ def _add_validation_constraints(self, result: Dict[str, Any], type_def: Dict[str, Any]) -> None:
342
+ """Add validation constraints from JSON Structure to JSON Schema.""" # String constraints
343
+ if 'minLength' in type_def:
344
+ result['minLength'] = type_def['minLength']
345
+ if 'maxLength' in type_def:
346
+ result['maxLength'] = type_def['maxLength']
347
+ if 'pattern' in type_def:
348
+ result['pattern'] = type_def['pattern']
349
+ if 'format' in type_def:
350
+ result['format'] = type_def['format']
351
+ if 'enum' in type_def:
352
+ result['enum'] = type_def['enum']
353
+ if 'const' in type_def:
354
+ result['const'] = type_def['const']
355
+
356
+ # Numeric constraints
357
+ if 'minimum' in type_def:
358
+ result['minimum'] = type_def['minimum']
359
+ if 'maximum' in type_def:
360
+ result['maximum'] = type_def['maximum']
361
+ if 'exclusiveMinimum' in type_def:
362
+ result['exclusiveMinimum'] = type_def['exclusiveMinimum']
363
+ if 'exclusiveMaximum' in type_def:
364
+ result['exclusiveMaximum'] = type_def['exclusiveMaximum']
365
+ if 'multipleOf' in type_def:
366
+ result['multipleOf'] = type_def['multipleOf']
367
+
368
+ # For decimal types, handle precision/scale
369
+ if 'precision' in type_def or 'scale' in type_def:
370
+ # Add custom keywords for precision/scale as JSON Schema doesn't have direct equivalents
371
+ if 'precision' in type_def:
372
+ result['x-precision'] = type_def['precision']
373
+ if 'scale' in type_def:
374
+ result['x-scale'] = type_def['scale']
375
+
376
+ # Handle units and currencies as custom properties
377
+ if 'unit' in type_def:
378
+ result['x-unit'] = type_def['unit']
379
+ if 'currency' in type_def:
380
+ result['x-currency'] = type_def['currency']
381
+
382
+ # Handle alternate names
383
+ if 'altnames' in type_def:
384
+ result['x-altnames'] = type_def['altnames']
385
+
386
+ def _resolve_reference(self, ref_path: str) -> Optional[Dict[str, Any]]:
387
+ """
388
+ Resolve a $ref path to the actual type definition.
389
+
390
+ Args:
391
+ ref_path (str): The reference path (e.g., "#/definitions/Person")
392
+
393
+ Returns:
394
+ Optional[Dict[str, Any]]: The resolved type definition or None if not found
395
+ """
396
+ if not self.structure_document or not ref_path.startswith('#/'):
397
+ return None
398
+
399
+ # Remove the '#/' prefix and split the path
400
+ path_parts = ref_path[2:].split('/')
401
+
402
+ current = self.structure_document
403
+ for part in path_parts:
404
+ if isinstance(current, dict) and part in current:
405
+ current = current[part]
406
+ else:
407
+ return None
408
+
409
+ return current if isinstance(current, dict) else None
410
+
411
+ def _resolve_type_with_inheritance(self, type_def: Dict[str, Any]) -> Dict[str, Any]:
412
+ """
413
+ Resolve a type definition, handling $extends inheritance.
414
+
415
+ Args:
416
+ type_def (Dict[str, Any]): The type definition that may have $extends
417
+
418
+ Returns:
419
+ Dict[str, Any]: The resolved type definition with inheritance applied
420
+ """
421
+ if '$extends' not in type_def:
422
+ return type_def
423
+
424
+ # Resolve the base type
425
+ base_ref = type_def['$extends']
426
+ base_type = self._resolve_reference(base_ref)
427
+
428
+ if not base_type:
429
+ # If we can't resolve the base, return the original type
430
+ return type_def
431
+
432
+ # Recursively resolve the base type's inheritance
433
+ resolved_base = self._resolve_type_with_inheritance(base_type)
434
+
435
+ # Merge the base type with the current type
436
+ # The current type's properties override the base type's properties
437
+ merged = {}
438
+ merged.update(resolved_base)
439
+ merged.update(type_def)
440
+
441
+ # Special handling for properties - merge them
442
+ if 'properties' in resolved_base and 'properties' in type_def:
443
+ merged_props = {}
444
+ merged_props.update(resolved_base['properties'])
445
+ merged_props.update(type_def['properties'])
446
+ merged['properties'] = merged_props
447
+
448
+ # Remove $extends from the final result as it's not part of JSON Schema
449
+ if '$extends' in merged:
450
+ del merged['$extends']
451
+
452
+ return merged
453
+
454
+
455
+ def convert_structure_to_json_schema(
456
+ structure_file_path: str,
457
+ json_schema_path: str
458
+ ) -> None:
459
+ """
460
+ Convert a JSON Structure file to JSON Schema format.
461
+
462
+ Args:
463
+ structure_file_path (str): Path to the input JSON Structure file
464
+ json_schema_path (str): Path to the output JSON Schema file
465
+ """
466
+ # Read the JSON Structure file
467
+ with open(structure_file_path, 'r', encoding='utf-8') as f:
468
+ structure_content = f.read()
469
+
470
+ # Convert to JSON Schema
471
+ result = convert_structure_to_json_schema_string(structure_content)
472
+
473
+ # Write the result
474
+ with open(json_schema_path, 'w', encoding='utf-8') as f:
475
+ f.write(result)
476
+
477
+
478
+ def convert_structure_to_json_schema_string(structure_content: str) -> str:
479
+ """
480
+ Convert a JSON Structure string to JSON Schema format.
481
+
482
+ Args:
483
+ structure_content (str): The JSON Structure document as a string
484
+
485
+ Returns:
486
+ str: The converted JSON Schema document as a string
487
+ """
488
+ converter = StructureToJsonConverter()
489
+
490
+ try:
491
+ structure_schema = json.loads(structure_content)
492
+ except json.JSONDecodeError as e:
493
+ raise ValueError(f"Invalid JSON Structure document: {e}") from e
494
+
495
+ # Convert the JSON Structure to JSON Schema
496
+ result = converter.convert_structure_schema(structure_schema)
497
+
498
+ return json.dumps(result, indent=2)