structurize 2.16.6__py3-none-any.whl → 2.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avrotize/__init__.py +1 -0
- avrotize/_version.py +3 -3
- avrotize/avrotocsharp.py +74 -10
- avrotize/avrotojava.py +1130 -51
- avrotize/avrotopython.py +4 -2
- avrotize/commands.json +671 -53
- avrotize/common.py +6 -1
- avrotize/jsonstoavro.py +518 -49
- avrotize/structuretocpp.py +697 -0
- avrotize/structuretocsv.py +365 -0
- avrotize/structuretodatapackage.py +659 -0
- avrotize/structuretodb.py +1125 -0
- avrotize/structuretogo.py +720 -0
- avrotize/structuretographql.py +502 -0
- avrotize/structuretoiceberg.py +355 -0
- avrotize/structuretojava.py +853 -0
- avrotize/structuretokusto.py +639 -0
- avrotize/structuretomd.py +322 -0
- avrotize/structuretoproto.py +764 -0
- avrotize/structuretorust.py +714 -0
- avrotize/structuretoxsd.py +679 -0
- {structurize-2.16.6.dist-info → structurize-2.17.0.dist-info}/METADATA +1 -1
- {structurize-2.16.6.dist-info → structurize-2.17.0.dist-info}/RECORD +27 -14
- {structurize-2.16.6.dist-info → structurize-2.17.0.dist-info}/WHEEL +0 -0
- {structurize-2.16.6.dist-info → structurize-2.17.0.dist-info}/entry_points.txt +0 -0
- {structurize-2.16.6.dist-info → structurize-2.17.0.dist-info}/licenses/LICENSE +0 -0
- {structurize-2.16.6.dist-info → structurize-2.17.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
"""
|
|
3
|
+
Module to convert JSON Structure schema to CSV schema.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
from typing import Any, Dict, List, Optional, Union
|
|
9
|
+
|
|
10
|
+
JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class StructureToCSVConverter:
|
|
14
|
+
"""
|
|
15
|
+
Class to convert JSON Structure schema to CSV schema.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, structure_schema_path: str, csv_schema_path: str):
|
|
19
|
+
"""
|
|
20
|
+
Initialize the converter with file paths.
|
|
21
|
+
|
|
22
|
+
:param structure_schema_path: Path to the JSON Structure schema file.
|
|
23
|
+
:param csv_schema_path: Path to save the CSV schema file.
|
|
24
|
+
"""
|
|
25
|
+
self.structure_schema_path = structure_schema_path
|
|
26
|
+
self.csv_schema_path = csv_schema_path
|
|
27
|
+
self.schema_doc: Optional[Dict] = None
|
|
28
|
+
self.schema_registry: Dict[str, Dict] = {}
|
|
29
|
+
self.definitions: Dict[str, Any] = {}
|
|
30
|
+
|
|
31
|
+
def convert(self):
|
|
32
|
+
"""
|
|
33
|
+
Convert JSON Structure schema to CSV schema and save to file.
|
|
34
|
+
"""
|
|
35
|
+
with open(self.structure_schema_path, 'r', encoding='utf-8') as file:
|
|
36
|
+
structure_schema = json.load(file)
|
|
37
|
+
|
|
38
|
+
# Store schema for reference resolution
|
|
39
|
+
self.schema_doc = structure_schema
|
|
40
|
+
|
|
41
|
+
# Register schema IDs for $ref resolution
|
|
42
|
+
self.register_schema_ids(structure_schema)
|
|
43
|
+
|
|
44
|
+
# Store definitions
|
|
45
|
+
if 'definitions' in structure_schema:
|
|
46
|
+
self.definitions = structure_schema['definitions']
|
|
47
|
+
|
|
48
|
+
csv_schema = self.convert_structure_to_csv_schema(structure_schema)
|
|
49
|
+
|
|
50
|
+
with open(self.csv_schema_path, 'w', encoding='utf-8') as file:
|
|
51
|
+
json.dump(csv_schema, file, indent=2)
|
|
52
|
+
|
|
53
|
+
def register_schema_ids(self, schema: Dict, base_uri: str = '') -> None:
|
|
54
|
+
"""
|
|
55
|
+
Recursively registers schemas with $id keywords for cross-references.
|
|
56
|
+
"""
|
|
57
|
+
if not isinstance(schema, dict):
|
|
58
|
+
return
|
|
59
|
+
|
|
60
|
+
# Register this schema if it has an $id
|
|
61
|
+
if '$id' in schema:
|
|
62
|
+
schema_id = schema['$id']
|
|
63
|
+
# Handle relative URIs
|
|
64
|
+
if base_uri and not schema_id.startswith(('http://', 'https://', 'urn:')):
|
|
65
|
+
from urllib.parse import urljoin
|
|
66
|
+
schema_id = urljoin(base_uri, schema_id)
|
|
67
|
+
self.schema_registry[schema_id] = schema
|
|
68
|
+
base_uri = schema_id
|
|
69
|
+
|
|
70
|
+
# Recursively process definitions
|
|
71
|
+
if 'definitions' in schema:
|
|
72
|
+
for def_name, def_schema in schema['definitions'].items():
|
|
73
|
+
if isinstance(def_schema, dict):
|
|
74
|
+
self.register_schema_ids(def_schema, base_uri)
|
|
75
|
+
|
|
76
|
+
# Recursively process properties
|
|
77
|
+
if 'properties' in schema:
|
|
78
|
+
for prop_name, prop_schema in schema['properties'].items():
|
|
79
|
+
if isinstance(prop_schema, dict):
|
|
80
|
+
self.register_schema_ids(prop_schema, base_uri)
|
|
81
|
+
|
|
82
|
+
# Recursively process items, values, etc.
|
|
83
|
+
for key in ['items', 'values', 'additionalProperties']:
|
|
84
|
+
if key in schema and isinstance(schema[key], dict):
|
|
85
|
+
self.register_schema_ids(schema[key], base_uri)
|
|
86
|
+
|
|
87
|
+
def resolve_ref(self, ref: str, context_schema: Optional[Dict] = None) -> Optional[Dict]:
|
|
88
|
+
"""
|
|
89
|
+
Resolves a $ref to the actual schema definition.
|
|
90
|
+
"""
|
|
91
|
+
# Check if it's an absolute URI reference (schema with $id)
|
|
92
|
+
if not ref.startswith('#/'):
|
|
93
|
+
# Try to resolve from schema registry
|
|
94
|
+
if ref in self.schema_registry:
|
|
95
|
+
return self.schema_registry[ref]
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
# Handle fragment-only references (internal to document)
|
|
99
|
+
path = ref[2:].split('/')
|
|
100
|
+
schema = context_schema if context_schema else self.schema_doc
|
|
101
|
+
|
|
102
|
+
for part in path:
|
|
103
|
+
if not isinstance(schema, dict) or part not in schema:
|
|
104
|
+
return None
|
|
105
|
+
schema = schema[part]
|
|
106
|
+
|
|
107
|
+
return schema
|
|
108
|
+
|
|
109
|
+
def convert_structure_to_csv_schema(self, structure_schema: Dict) -> Dict:
|
|
110
|
+
"""
|
|
111
|
+
Convert a JSON Structure schema to a CSV schema.
|
|
112
|
+
|
|
113
|
+
:param structure_schema: JSON Structure schema as a dictionary.
|
|
114
|
+
:return: CSV schema as a dictionary.
|
|
115
|
+
"""
|
|
116
|
+
csv_schema = {
|
|
117
|
+
"fields": []
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
# Handle object type with properties
|
|
121
|
+
if structure_schema.get('type') == 'object' and 'properties' in structure_schema:
|
|
122
|
+
for prop_name, prop_schema in structure_schema['properties'].items():
|
|
123
|
+
csv_field = self.convert_structure_field_to_csv_field(prop_name, prop_schema, structure_schema)
|
|
124
|
+
csv_schema['fields'].append(csv_field)
|
|
125
|
+
|
|
126
|
+
# Handle definitions
|
|
127
|
+
elif 'definitions' in structure_schema:
|
|
128
|
+
# Look for a root type or the first object type in definitions
|
|
129
|
+
root_ref = structure_schema.get('$root')
|
|
130
|
+
if root_ref:
|
|
131
|
+
root_schema = self.resolve_ref(root_ref, structure_schema)
|
|
132
|
+
if root_schema and root_schema.get('type') == 'object':
|
|
133
|
+
return self.convert_structure_to_csv_schema(root_schema)
|
|
134
|
+
|
|
135
|
+
# Otherwise, try to find the first object type
|
|
136
|
+
for def_name, def_schema in structure_schema.get('definitions', {}).items():
|
|
137
|
+
if isinstance(def_schema, dict) and def_schema.get('type') == 'object':
|
|
138
|
+
return self.convert_structure_to_csv_schema(def_schema)
|
|
139
|
+
|
|
140
|
+
return csv_schema
|
|
141
|
+
|
|
142
|
+
def convert_structure_field_to_csv_field(self, field_name: str, field_schema: Dict, parent_schema: Dict) -> Dict:
|
|
143
|
+
"""
|
|
144
|
+
Convert a JSON Structure field to a CSV field.
|
|
145
|
+
|
|
146
|
+
:param field_name: Name of the field.
|
|
147
|
+
:param field_schema: JSON Structure field schema as a dictionary.
|
|
148
|
+
:param parent_schema: Parent schema for context (e.g., required fields).
|
|
149
|
+
:return: CSV field as a dictionary.
|
|
150
|
+
"""
|
|
151
|
+
csv_field = {
|
|
152
|
+
"name": field_name,
|
|
153
|
+
"type": self.convert_structure_type_to_csv_type(field_schema)
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Handle description
|
|
157
|
+
if 'description' in field_schema:
|
|
158
|
+
csv_field['description'] = field_schema['description']
|
|
159
|
+
elif 'doc' in field_schema:
|
|
160
|
+
csv_field['description'] = field_schema['doc']
|
|
161
|
+
|
|
162
|
+
# Handle default values
|
|
163
|
+
if 'default' in field_schema:
|
|
164
|
+
csv_field['default'] = field_schema['default']
|
|
165
|
+
|
|
166
|
+
# Handle const values
|
|
167
|
+
if 'const' in field_schema:
|
|
168
|
+
csv_field['const'] = field_schema['const']
|
|
169
|
+
|
|
170
|
+
# Check if field is required
|
|
171
|
+
required_props = parent_schema.get('required', [])
|
|
172
|
+
is_required = field_name in required_props if isinstance(required_props, list) else False
|
|
173
|
+
|
|
174
|
+
# Set nullable based on required status and type
|
|
175
|
+
if not is_required:
|
|
176
|
+
csv_field['nullable'] = True
|
|
177
|
+
|
|
178
|
+
# Handle enum values
|
|
179
|
+
if 'enum' in field_schema:
|
|
180
|
+
csv_field['enum'] = field_schema['enum']
|
|
181
|
+
|
|
182
|
+
# Handle format/constraints for specific types
|
|
183
|
+
self.add_type_constraints(csv_field, field_schema)
|
|
184
|
+
|
|
185
|
+
return csv_field
|
|
186
|
+
|
|
187
|
+
def add_type_constraints(self, csv_field: Dict, field_schema: Dict) -> None:
|
|
188
|
+
"""
|
|
189
|
+
Add type-specific constraints to CSV field based on JSON Structure annotations.
|
|
190
|
+
"""
|
|
191
|
+
# String constraints
|
|
192
|
+
if 'maxLength' in field_schema:
|
|
193
|
+
csv_field['maxLength'] = field_schema['maxLength']
|
|
194
|
+
if 'minLength' in field_schema:
|
|
195
|
+
csv_field['minLength'] = field_schema['minLength']
|
|
196
|
+
if 'pattern' in field_schema:
|
|
197
|
+
csv_field['pattern'] = field_schema['pattern']
|
|
198
|
+
|
|
199
|
+
# Numeric constraints
|
|
200
|
+
if 'minimum' in field_schema:
|
|
201
|
+
csv_field['minimum'] = field_schema['minimum']
|
|
202
|
+
if 'maximum' in field_schema:
|
|
203
|
+
csv_field['maximum'] = field_schema['maximum']
|
|
204
|
+
if 'exclusiveMinimum' in field_schema:
|
|
205
|
+
csv_field['exclusiveMinimum'] = field_schema['exclusiveMinimum']
|
|
206
|
+
if 'exclusiveMaximum' in field_schema:
|
|
207
|
+
csv_field['exclusiveMaximum'] = field_schema['exclusiveMaximum']
|
|
208
|
+
|
|
209
|
+
# Decimal/numeric precision
|
|
210
|
+
if 'precision' in field_schema:
|
|
211
|
+
csv_field['precision'] = field_schema['precision']
|
|
212
|
+
if 'scale' in field_schema:
|
|
213
|
+
csv_field['scale'] = field_schema['scale']
|
|
214
|
+
|
|
215
|
+
# Content encoding
|
|
216
|
+
if 'contentEncoding' in field_schema:
|
|
217
|
+
csv_field['contentEncoding'] = field_schema['contentEncoding']
|
|
218
|
+
|
|
219
|
+
def convert_structure_type_to_csv_type(self, field_schema: Union[Dict, str, List]) -> str:
|
|
220
|
+
"""
|
|
221
|
+
Convert a JSON Structure type to a CSV type.
|
|
222
|
+
|
|
223
|
+
:param field_schema: JSON Structure type as a string, dict, or list (for unions).
|
|
224
|
+
:return: CSV type as a string.
|
|
225
|
+
"""
|
|
226
|
+
# Handle union types (array of types)
|
|
227
|
+
if isinstance(field_schema, list):
|
|
228
|
+
# Filter out null
|
|
229
|
+
non_null_types = [t for t in field_schema if t != 'null']
|
|
230
|
+
if len(non_null_types) == 1:
|
|
231
|
+
# Simple nullable type
|
|
232
|
+
return self.convert_structure_type_to_csv_type(non_null_types[0])
|
|
233
|
+
else:
|
|
234
|
+
# Complex union - use string as fallback
|
|
235
|
+
return "string"
|
|
236
|
+
|
|
237
|
+
# Handle string primitive types
|
|
238
|
+
if isinstance(field_schema, str):
|
|
239
|
+
return self.map_primitive_type_to_csv(field_schema)
|
|
240
|
+
|
|
241
|
+
# Handle dict/object schemas
|
|
242
|
+
if isinstance(field_schema, dict):
|
|
243
|
+
# Handle $ref
|
|
244
|
+
if '$ref' in field_schema:
|
|
245
|
+
ref_schema = self.resolve_ref(field_schema['$ref'], self.schema_doc)
|
|
246
|
+
if ref_schema:
|
|
247
|
+
return self.convert_structure_type_to_csv_type(ref_schema)
|
|
248
|
+
return 'string'
|
|
249
|
+
|
|
250
|
+
# Handle enum
|
|
251
|
+
if 'enum' in field_schema:
|
|
252
|
+
# Determine enum base type
|
|
253
|
+
base_type = field_schema.get('type', 'string')
|
|
254
|
+
return self.map_primitive_type_to_csv(base_type)
|
|
255
|
+
|
|
256
|
+
# Handle type keyword
|
|
257
|
+
if 'type' not in field_schema:
|
|
258
|
+
return 'string'
|
|
259
|
+
|
|
260
|
+
struct_type = field_schema['type']
|
|
261
|
+
|
|
262
|
+
# Handle union types when type is a list within the dict
|
|
263
|
+
if isinstance(struct_type, list):
|
|
264
|
+
non_null_types = [t for t in struct_type if t != 'null']
|
|
265
|
+
if len(non_null_types) == 1:
|
|
266
|
+
# Simple nullable type
|
|
267
|
+
return self.map_primitive_type_to_csv(non_null_types[0])
|
|
268
|
+
else:
|
|
269
|
+
# Complex union - use string as fallback
|
|
270
|
+
return "string"
|
|
271
|
+
|
|
272
|
+
# Handle compound types
|
|
273
|
+
if struct_type == 'array':
|
|
274
|
+
return 'string' # CSV doesn't have native array support
|
|
275
|
+
elif struct_type == 'set':
|
|
276
|
+
return 'string'
|
|
277
|
+
elif struct_type == 'map':
|
|
278
|
+
return 'string' # Maps become JSON strings in CSV
|
|
279
|
+
elif struct_type == 'object':
|
|
280
|
+
return 'string' # Nested objects become JSON strings
|
|
281
|
+
elif struct_type == 'choice':
|
|
282
|
+
return 'string' # Choice types become strings
|
|
283
|
+
elif struct_type == 'tuple':
|
|
284
|
+
return 'string' # Tuples become strings
|
|
285
|
+
else:
|
|
286
|
+
# Primitive type specified in type field
|
|
287
|
+
return self.map_primitive_type_to_csv(struct_type)
|
|
288
|
+
|
|
289
|
+
return 'string'
|
|
290
|
+
|
|
291
|
+
def map_primitive_type_to_csv(self, structure_type: str) -> str:
|
|
292
|
+
"""
|
|
293
|
+
Maps JSON Structure primitive types to CSV types.
|
|
294
|
+
|
|
295
|
+
:param structure_type: JSON Structure type name.
|
|
296
|
+
:return: CSV type name.
|
|
297
|
+
"""
|
|
298
|
+
type_mapping = {
|
|
299
|
+
# JSON primitive types
|
|
300
|
+
'null': 'string',
|
|
301
|
+
'boolean': 'boolean',
|
|
302
|
+
'string': 'string',
|
|
303
|
+
'integer': 'integer',
|
|
304
|
+
'number': 'number',
|
|
305
|
+
|
|
306
|
+
# Extended integer types
|
|
307
|
+
'int8': 'integer',
|
|
308
|
+
'uint8': 'integer',
|
|
309
|
+
'int16': 'integer',
|
|
310
|
+
'uint16': 'integer',
|
|
311
|
+
'int32': 'integer',
|
|
312
|
+
'uint32': 'integer',
|
|
313
|
+
'int64': 'integer',
|
|
314
|
+
'uint64': 'integer',
|
|
315
|
+
'int128': 'integer',
|
|
316
|
+
'uint128': 'integer',
|
|
317
|
+
|
|
318
|
+
# Floating point types
|
|
319
|
+
'float8': 'number',
|
|
320
|
+
'float': 'number',
|
|
321
|
+
'double': 'number',
|
|
322
|
+
'binary32': 'number',
|
|
323
|
+
'binary64': 'number',
|
|
324
|
+
'decimal': 'number',
|
|
325
|
+
|
|
326
|
+
# Binary data
|
|
327
|
+
'binary': 'string', # Base64 encoded
|
|
328
|
+
|
|
329
|
+
# Date/time types
|
|
330
|
+
'date': 'string',
|
|
331
|
+
'time': 'string',
|
|
332
|
+
'datetime': 'string',
|
|
333
|
+
'timestamp': 'string',
|
|
334
|
+
'duration': 'string',
|
|
335
|
+
|
|
336
|
+
# Special types
|
|
337
|
+
'uuid': 'string',
|
|
338
|
+
'uri': 'string',
|
|
339
|
+
'jsonpointer': 'string',
|
|
340
|
+
'any': 'string',
|
|
341
|
+
|
|
342
|
+
# Compound types (when referenced directly)
|
|
343
|
+
'object': 'string',
|
|
344
|
+
'array': 'string',
|
|
345
|
+
'set': 'string',
|
|
346
|
+
'map': 'string',
|
|
347
|
+
'choice': 'string',
|
|
348
|
+
'tuple': 'string',
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
return type_mapping.get(structure_type, 'string')
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def convert_structure_to_csv_schema(structure_schema_path: str, csv_schema_path: str):
|
|
355
|
+
"""
|
|
356
|
+
Convert a JSON Structure schema file to a CSV schema file.
|
|
357
|
+
|
|
358
|
+
:param structure_schema_path: Path to the JSON Structure schema file.
|
|
359
|
+
:param csv_schema_path: Path to save the CSV schema file.
|
|
360
|
+
"""
|
|
361
|
+
if not os.path.exists(structure_schema_path):
|
|
362
|
+
raise FileNotFoundError(f"JSON Structure schema file not found: {structure_schema_path}")
|
|
363
|
+
|
|
364
|
+
converter = StructureToCSVConverter(structure_schema_path, csv_schema_path)
|
|
365
|
+
converter.convert()
|