structurize 2.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. avrotize/__init__.py +64 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp.py +483 -0
  7. avrotize/avrotocsharp.py +1075 -0
  8. avrotize/avrotocsv.py +121 -0
  9. avrotize/avrotodatapackage.py +173 -0
  10. avrotize/avrotodb.py +1383 -0
  11. avrotize/avrotogo.py +476 -0
  12. avrotize/avrotographql.py +197 -0
  13. avrotize/avrotoiceberg.py +210 -0
  14. avrotize/avrotojava.py +2156 -0
  15. avrotize/avrotojs.py +250 -0
  16. avrotize/avrotojsons.py +481 -0
  17. avrotize/avrotojstruct.py +345 -0
  18. avrotize/avrotokusto.py +364 -0
  19. avrotize/avrotomd.py +137 -0
  20. avrotize/avrotools.py +168 -0
  21. avrotize/avrotoparquet.py +208 -0
  22. avrotize/avrotoproto.py +359 -0
  23. avrotize/avrotopython.py +624 -0
  24. avrotize/avrotorust.py +435 -0
  25. avrotize/avrotots.py +598 -0
  26. avrotize/avrotoxsd.py +344 -0
  27. avrotize/cddltostructure.py +1841 -0
  28. avrotize/commands.json +3337 -0
  29. avrotize/common.py +834 -0
  30. avrotize/constants.py +72 -0
  31. avrotize/csvtoavro.py +132 -0
  32. avrotize/datapackagetoavro.py +76 -0
  33. avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
  34. avrotize/dependencies/typescript/node22/package.json +16 -0
  35. avrotize/dependency_resolver.py +348 -0
  36. avrotize/dependency_version.py +432 -0
  37. avrotize/jsonstoavro.py +2167 -0
  38. avrotize/jsonstostructure.py +2642 -0
  39. avrotize/jstructtoavro.py +878 -0
  40. avrotize/kstructtoavro.py +93 -0
  41. avrotize/kustotoavro.py +455 -0
  42. avrotize/parquettoavro.py +157 -0
  43. avrotize/proto2parser.py +498 -0
  44. avrotize/proto3parser.py +403 -0
  45. avrotize/prototoavro.py +382 -0
  46. avrotize/structuretocddl.py +597 -0
  47. avrotize/structuretocpp.py +697 -0
  48. avrotize/structuretocsharp.py +2295 -0
  49. avrotize/structuretocsv.py +365 -0
  50. avrotize/structuretodatapackage.py +659 -0
  51. avrotize/structuretodb.py +1125 -0
  52. avrotize/structuretogo.py +720 -0
  53. avrotize/structuretographql.py +502 -0
  54. avrotize/structuretoiceberg.py +355 -0
  55. avrotize/structuretojava.py +853 -0
  56. avrotize/structuretojsons.py +498 -0
  57. avrotize/structuretokusto.py +639 -0
  58. avrotize/structuretomd.py +322 -0
  59. avrotize/structuretoproto.py +764 -0
  60. avrotize/structuretopython.py +772 -0
  61. avrotize/structuretorust.py +714 -0
  62. avrotize/structuretots.py +653 -0
  63. avrotize/structuretoxsd.py +679 -0
  64. avrotize/xsdtoavro.py +413 -0
  65. structurize-2.19.0.dist-info/METADATA +107 -0
  66. structurize-2.19.0.dist-info/RECORD +70 -0
  67. structurize-2.19.0.dist-info/WHEEL +5 -0
  68. structurize-2.19.0.dist-info/entry_points.txt +2 -0
  69. structurize-2.19.0.dist-info/licenses/LICENSE +201 -0
  70. structurize-2.19.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,365 @@
1
+ # coding: utf-8
2
+ """
3
+ Module to convert JSON Structure schema to CSV schema.
4
+ """
5
+
6
+ import json
7
+ import os
8
+ from typing import Any, Dict, List, Optional, Union
9
+
10
+ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | None
11
+
12
+
13
+ class StructureToCSVConverter:
14
+ """
15
+ Class to convert JSON Structure schema to CSV schema.
16
+ """
17
+
18
+ def __init__(self, structure_schema_path: str, csv_schema_path: str):
19
+ """
20
+ Initialize the converter with file paths.
21
+
22
+ :param structure_schema_path: Path to the JSON Structure schema file.
23
+ :param csv_schema_path: Path to save the CSV schema file.
24
+ """
25
+ self.structure_schema_path = structure_schema_path
26
+ self.csv_schema_path = csv_schema_path
27
+ self.schema_doc: Optional[Dict] = None
28
+ self.schema_registry: Dict[str, Dict] = {}
29
+ self.definitions: Dict[str, Any] = {}
30
+
31
+ def convert(self):
32
+ """
33
+ Convert JSON Structure schema to CSV schema and save to file.
34
+ """
35
+ with open(self.structure_schema_path, 'r', encoding='utf-8') as file:
36
+ structure_schema = json.load(file)
37
+
38
+ # Store schema for reference resolution
39
+ self.schema_doc = structure_schema
40
+
41
+ # Register schema IDs for $ref resolution
42
+ self.register_schema_ids(structure_schema)
43
+
44
+ # Store definitions
45
+ if 'definitions' in structure_schema:
46
+ self.definitions = structure_schema['definitions']
47
+
48
+ csv_schema = self.convert_structure_to_csv_schema(structure_schema)
49
+
50
+ with open(self.csv_schema_path, 'w', encoding='utf-8') as file:
51
+ json.dump(csv_schema, file, indent=2)
52
+
53
+ def register_schema_ids(self, schema: Dict, base_uri: str = '') -> None:
54
+ """
55
+ Recursively registers schemas with $id keywords for cross-references.
56
+ """
57
+ if not isinstance(schema, dict):
58
+ return
59
+
60
+ # Register this schema if it has an $id
61
+ if '$id' in schema:
62
+ schema_id = schema['$id']
63
+ # Handle relative URIs
64
+ if base_uri and not schema_id.startswith(('http://', 'https://', 'urn:')):
65
+ from urllib.parse import urljoin
66
+ schema_id = urljoin(base_uri, schema_id)
67
+ self.schema_registry[schema_id] = schema
68
+ base_uri = schema_id
69
+
70
+ # Recursively process definitions
71
+ if 'definitions' in schema:
72
+ for def_name, def_schema in schema['definitions'].items():
73
+ if isinstance(def_schema, dict):
74
+ self.register_schema_ids(def_schema, base_uri)
75
+
76
+ # Recursively process properties
77
+ if 'properties' in schema:
78
+ for prop_name, prop_schema in schema['properties'].items():
79
+ if isinstance(prop_schema, dict):
80
+ self.register_schema_ids(prop_schema, base_uri)
81
+
82
+ # Recursively process items, values, etc.
83
+ for key in ['items', 'values', 'additionalProperties']:
84
+ if key in schema and isinstance(schema[key], dict):
85
+ self.register_schema_ids(schema[key], base_uri)
86
+
87
+ def resolve_ref(self, ref: str, context_schema: Optional[Dict] = None) -> Optional[Dict]:
88
+ """
89
+ Resolves a $ref to the actual schema definition.
90
+ """
91
+ # Check if it's an absolute URI reference (schema with $id)
92
+ if not ref.startswith('#/'):
93
+ # Try to resolve from schema registry
94
+ if ref in self.schema_registry:
95
+ return self.schema_registry[ref]
96
+ return None
97
+
98
+ # Handle fragment-only references (internal to document)
99
+ path = ref[2:].split('/')
100
+ schema = context_schema if context_schema else self.schema_doc
101
+
102
+ for part in path:
103
+ if not isinstance(schema, dict) or part not in schema:
104
+ return None
105
+ schema = schema[part]
106
+
107
+ return schema
108
+
109
+ def convert_structure_to_csv_schema(self, structure_schema: Dict) -> Dict:
110
+ """
111
+ Convert a JSON Structure schema to a CSV schema.
112
+
113
+ :param structure_schema: JSON Structure schema as a dictionary.
114
+ :return: CSV schema as a dictionary.
115
+ """
116
+ csv_schema = {
117
+ "fields": []
118
+ }
119
+
120
+ # Handle object type with properties
121
+ if structure_schema.get('type') == 'object' and 'properties' in structure_schema:
122
+ for prop_name, prop_schema in structure_schema['properties'].items():
123
+ csv_field = self.convert_structure_field_to_csv_field(prop_name, prop_schema, structure_schema)
124
+ csv_schema['fields'].append(csv_field)
125
+
126
+ # Handle definitions
127
+ elif 'definitions' in structure_schema:
128
+ # Look for a root type or the first object type in definitions
129
+ root_ref = structure_schema.get('$root')
130
+ if root_ref:
131
+ root_schema = self.resolve_ref(root_ref, structure_schema)
132
+ if root_schema and root_schema.get('type') == 'object':
133
+ return self.convert_structure_to_csv_schema(root_schema)
134
+
135
+ # Otherwise, try to find the first object type
136
+ for def_name, def_schema in structure_schema.get('definitions', {}).items():
137
+ if isinstance(def_schema, dict) and def_schema.get('type') == 'object':
138
+ return self.convert_structure_to_csv_schema(def_schema)
139
+
140
+ return csv_schema
141
+
142
+ def convert_structure_field_to_csv_field(self, field_name: str, field_schema: Dict, parent_schema: Dict) -> Dict:
143
+ """
144
+ Convert a JSON Structure field to a CSV field.
145
+
146
+ :param field_name: Name of the field.
147
+ :param field_schema: JSON Structure field schema as a dictionary.
148
+ :param parent_schema: Parent schema for context (e.g., required fields).
149
+ :return: CSV field as a dictionary.
150
+ """
151
+ csv_field = {
152
+ "name": field_name,
153
+ "type": self.convert_structure_type_to_csv_type(field_schema)
154
+ }
155
+
156
+ # Handle description
157
+ if 'description' in field_schema:
158
+ csv_field['description'] = field_schema['description']
159
+ elif 'doc' in field_schema:
160
+ csv_field['description'] = field_schema['doc']
161
+
162
+ # Handle default values
163
+ if 'default' in field_schema:
164
+ csv_field['default'] = field_schema['default']
165
+
166
+ # Handle const values
167
+ if 'const' in field_schema:
168
+ csv_field['const'] = field_schema['const']
169
+
170
+ # Check if field is required
171
+ required_props = parent_schema.get('required', [])
172
+ is_required = field_name in required_props if isinstance(required_props, list) else False
173
+
174
+ # Set nullable based on required status and type
175
+ if not is_required:
176
+ csv_field['nullable'] = True
177
+
178
+ # Handle enum values
179
+ if 'enum' in field_schema:
180
+ csv_field['enum'] = field_schema['enum']
181
+
182
+ # Handle format/constraints for specific types
183
+ self.add_type_constraints(csv_field, field_schema)
184
+
185
+ return csv_field
186
+
187
+ def add_type_constraints(self, csv_field: Dict, field_schema: Dict) -> None:
188
+ """
189
+ Add type-specific constraints to CSV field based on JSON Structure annotations.
190
+ """
191
+ # String constraints
192
+ if 'maxLength' in field_schema:
193
+ csv_field['maxLength'] = field_schema['maxLength']
194
+ if 'minLength' in field_schema:
195
+ csv_field['minLength'] = field_schema['minLength']
196
+ if 'pattern' in field_schema:
197
+ csv_field['pattern'] = field_schema['pattern']
198
+
199
+ # Numeric constraints
200
+ if 'minimum' in field_schema:
201
+ csv_field['minimum'] = field_schema['minimum']
202
+ if 'maximum' in field_schema:
203
+ csv_field['maximum'] = field_schema['maximum']
204
+ if 'exclusiveMinimum' in field_schema:
205
+ csv_field['exclusiveMinimum'] = field_schema['exclusiveMinimum']
206
+ if 'exclusiveMaximum' in field_schema:
207
+ csv_field['exclusiveMaximum'] = field_schema['exclusiveMaximum']
208
+
209
+ # Decimal/numeric precision
210
+ if 'precision' in field_schema:
211
+ csv_field['precision'] = field_schema['precision']
212
+ if 'scale' in field_schema:
213
+ csv_field['scale'] = field_schema['scale']
214
+
215
+ # Content encoding
216
+ if 'contentEncoding' in field_schema:
217
+ csv_field['contentEncoding'] = field_schema['contentEncoding']
218
+
219
+ def convert_structure_type_to_csv_type(self, field_schema: Union[Dict, str, List]) -> str:
220
+ """
221
+ Convert a JSON Structure type to a CSV type.
222
+
223
+ :param field_schema: JSON Structure type as a string, dict, or list (for unions).
224
+ :return: CSV type as a string.
225
+ """
226
+ # Handle union types (array of types)
227
+ if isinstance(field_schema, list):
228
+ # Filter out null
229
+ non_null_types = [t for t in field_schema if t != 'null']
230
+ if len(non_null_types) == 1:
231
+ # Simple nullable type
232
+ return self.convert_structure_type_to_csv_type(non_null_types[0])
233
+ else:
234
+ # Complex union - use string as fallback
235
+ return "string"
236
+
237
+ # Handle string primitive types
238
+ if isinstance(field_schema, str):
239
+ return self.map_primitive_type_to_csv(field_schema)
240
+
241
+ # Handle dict/object schemas
242
+ if isinstance(field_schema, dict):
243
+ # Handle $ref
244
+ if '$ref' in field_schema:
245
+ ref_schema = self.resolve_ref(field_schema['$ref'], self.schema_doc)
246
+ if ref_schema:
247
+ return self.convert_structure_type_to_csv_type(ref_schema)
248
+ return 'string'
249
+
250
+ # Handle enum
251
+ if 'enum' in field_schema:
252
+ # Determine enum base type
253
+ base_type = field_schema.get('type', 'string')
254
+ return self.map_primitive_type_to_csv(base_type)
255
+
256
+ # Handle type keyword
257
+ if 'type' not in field_schema:
258
+ return 'string'
259
+
260
+ struct_type = field_schema['type']
261
+
262
+ # Handle union types when type is a list within the dict
263
+ if isinstance(struct_type, list):
264
+ non_null_types = [t for t in struct_type if t != 'null']
265
+ if len(non_null_types) == 1:
266
+ # Simple nullable type
267
+ return self.map_primitive_type_to_csv(non_null_types[0])
268
+ else:
269
+ # Complex union - use string as fallback
270
+ return "string"
271
+
272
+ # Handle compound types
273
+ if struct_type == 'array':
274
+ return 'string' # CSV doesn't have native array support
275
+ elif struct_type == 'set':
276
+ return 'string'
277
+ elif struct_type == 'map':
278
+ return 'string' # Maps become JSON strings in CSV
279
+ elif struct_type == 'object':
280
+ return 'string' # Nested objects become JSON strings
281
+ elif struct_type == 'choice':
282
+ return 'string' # Choice types become strings
283
+ elif struct_type == 'tuple':
284
+ return 'string' # Tuples become strings
285
+ else:
286
+ # Primitive type specified in type field
287
+ return self.map_primitive_type_to_csv(struct_type)
288
+
289
+ return 'string'
290
+
291
+ def map_primitive_type_to_csv(self, structure_type: str) -> str:
292
+ """
293
+ Maps JSON Structure primitive types to CSV types.
294
+
295
+ :param structure_type: JSON Structure type name.
296
+ :return: CSV type name.
297
+ """
298
+ type_mapping = {
299
+ # JSON primitive types
300
+ 'null': 'string',
301
+ 'boolean': 'boolean',
302
+ 'string': 'string',
303
+ 'integer': 'integer',
304
+ 'number': 'number',
305
+
306
+ # Extended integer types
307
+ 'int8': 'integer',
308
+ 'uint8': 'integer',
309
+ 'int16': 'integer',
310
+ 'uint16': 'integer',
311
+ 'int32': 'integer',
312
+ 'uint32': 'integer',
313
+ 'int64': 'integer',
314
+ 'uint64': 'integer',
315
+ 'int128': 'integer',
316
+ 'uint128': 'integer',
317
+
318
+ # Floating point types
319
+ 'float8': 'number',
320
+ 'float': 'number',
321
+ 'double': 'number',
322
+ 'binary32': 'number',
323
+ 'binary64': 'number',
324
+ 'decimal': 'number',
325
+
326
+ # Binary data
327
+ 'binary': 'string', # Base64 encoded
328
+
329
+ # Date/time types
330
+ 'date': 'string',
331
+ 'time': 'string',
332
+ 'datetime': 'string',
333
+ 'timestamp': 'string',
334
+ 'duration': 'string',
335
+
336
+ # Special types
337
+ 'uuid': 'string',
338
+ 'uri': 'string',
339
+ 'jsonpointer': 'string',
340
+ 'any': 'string',
341
+
342
+ # Compound types (when referenced directly)
343
+ 'object': 'string',
344
+ 'array': 'string',
345
+ 'set': 'string',
346
+ 'map': 'string',
347
+ 'choice': 'string',
348
+ 'tuple': 'string',
349
+ }
350
+
351
+ return type_mapping.get(structure_type, 'string')
352
+
353
+
354
+ def convert_structure_to_csv_schema(structure_schema_path: str, csv_schema_path: str):
355
+ """
356
+ Convert a JSON Structure schema file to a CSV schema file.
357
+
358
+ :param structure_schema_path: Path to the JSON Structure schema file.
359
+ :param csv_schema_path: Path to save the CSV schema file.
360
+ """
361
+ if not os.path.exists(structure_schema_path):
362
+ raise FileNotFoundError(f"JSON Structure schema file not found: {structure_schema_path}")
363
+
364
+ converter = StructureToCSVConverter(structure_schema_path, csv_schema_path)
365
+ converter.convert()