structurize 2.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. avrotize/__init__.py +64 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp.py +483 -0
  7. avrotize/avrotocsharp.py +1075 -0
  8. avrotize/avrotocsv.py +121 -0
  9. avrotize/avrotodatapackage.py +173 -0
  10. avrotize/avrotodb.py +1383 -0
  11. avrotize/avrotogo.py +476 -0
  12. avrotize/avrotographql.py +197 -0
  13. avrotize/avrotoiceberg.py +210 -0
  14. avrotize/avrotojava.py +2156 -0
  15. avrotize/avrotojs.py +250 -0
  16. avrotize/avrotojsons.py +481 -0
  17. avrotize/avrotojstruct.py +345 -0
  18. avrotize/avrotokusto.py +364 -0
  19. avrotize/avrotomd.py +137 -0
  20. avrotize/avrotools.py +168 -0
  21. avrotize/avrotoparquet.py +208 -0
  22. avrotize/avrotoproto.py +359 -0
  23. avrotize/avrotopython.py +624 -0
  24. avrotize/avrotorust.py +435 -0
  25. avrotize/avrotots.py +598 -0
  26. avrotize/avrotoxsd.py +344 -0
  27. avrotize/cddltostructure.py +1841 -0
  28. avrotize/commands.json +3337 -0
  29. avrotize/common.py +834 -0
  30. avrotize/constants.py +72 -0
  31. avrotize/csvtoavro.py +132 -0
  32. avrotize/datapackagetoavro.py +76 -0
  33. avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
  34. avrotize/dependencies/typescript/node22/package.json +16 -0
  35. avrotize/dependency_resolver.py +348 -0
  36. avrotize/dependency_version.py +432 -0
  37. avrotize/jsonstoavro.py +2167 -0
  38. avrotize/jsonstostructure.py +2642 -0
  39. avrotize/jstructtoavro.py +878 -0
  40. avrotize/kstructtoavro.py +93 -0
  41. avrotize/kustotoavro.py +455 -0
  42. avrotize/parquettoavro.py +157 -0
  43. avrotize/proto2parser.py +498 -0
  44. avrotize/proto3parser.py +403 -0
  45. avrotize/prototoavro.py +382 -0
  46. avrotize/structuretocddl.py +597 -0
  47. avrotize/structuretocpp.py +697 -0
  48. avrotize/structuretocsharp.py +2295 -0
  49. avrotize/structuretocsv.py +365 -0
  50. avrotize/structuretodatapackage.py +659 -0
  51. avrotize/structuretodb.py +1125 -0
  52. avrotize/structuretogo.py +720 -0
  53. avrotize/structuretographql.py +502 -0
  54. avrotize/structuretoiceberg.py +355 -0
  55. avrotize/structuretojava.py +853 -0
  56. avrotize/structuretojsons.py +498 -0
  57. avrotize/structuretokusto.py +639 -0
  58. avrotize/structuretomd.py +322 -0
  59. avrotize/structuretoproto.py +764 -0
  60. avrotize/structuretopython.py +772 -0
  61. avrotize/structuretorust.py +714 -0
  62. avrotize/structuretots.py +653 -0
  63. avrotize/structuretoxsd.py +679 -0
  64. avrotize/xsdtoavro.py +413 -0
  65. structurize-2.19.0.dist-info/METADATA +107 -0
  66. structurize-2.19.0.dist-info/RECORD +70 -0
  67. structurize-2.19.0.dist-info/WHEEL +5 -0
  68. structurize-2.19.0.dist-info/entry_points.txt +2 -0
  69. structurize-2.19.0.dist-info/licenses/LICENSE +201 -0
  70. structurize-2.19.0.dist-info/top_level.txt +1 -0
avrotize/xsdtoavro.py ADDED
@@ -0,0 +1,413 @@
1
+ # pylint: disable=line-too-long, consider-iterating-dictionary, too-many-locals, too-many-branches
2
+
3
+ """Converts XSD to Avro schema."""
4
+
5
+ import os
6
+ import re
7
+ from typing import Dict, List, Tuple
8
+ import xml.etree.ElementTree as ET
9
+ import json
10
+ from urllib.parse import urlparse
11
+ from avrotize.common import avro_namespace, generic_type
12
+
13
+ from avrotize.dependency_resolver import inline_dependencies_of, sort_messages_by_dependencies
14
+
15
+ XSD_NAMESPACE = 'http://www.w3.org/2001/XMLSchema'
16
+
17
+
18
+ class XSDToAvro:
19
+ """ Convert XSD to Avro schema."""
20
+
21
+ def __init__(self) -> None:
22
+ """ Initialize the class. """
23
+ self.simple_type_map: Dict[str, str | dict] = {}
24
+ self.avro_namespace = ''
25
+ self.xml_namespace = ''
26
+
27
+ def xsd_targetnamespace_to_avro_namespace(self, targetnamespace: str) -> str:
28
+ """Convert a XSD namespace to Avro Namespace."""
29
+ parsed_url = urlparse(targetnamespace)
30
+ if parsed_url.scheme == 'urn':
31
+ path_segments = parsed_url.path.strip(
32
+ ':').replace('.', '-').split(':')
33
+ # join all path segments that start with a number with the previous one
34
+ new_path_segments: List[str] = []
35
+ n = len(path_segments)
36
+ for i in range(n):
37
+ if path_segments[i][0].isdigit():
38
+ if i == 0:
39
+ new_path_segments.append('_'+path_segments[i])
40
+ else:
41
+ new_path_segments[-1] = f"{new_path_segments[-1]}-{path_segments[i]}"
42
+ else:
43
+ new_path_segments.append(path_segments[i])
44
+ path_segments = new_path_segments
45
+ else:
46
+ path_segments = parsed_url.path.strip('/').split('/')
47
+ path_segments = list(reversed(path_segments))
48
+ namespace_prefix = '.'.join(path_segments)
49
+ if parsed_url.hostname:
50
+ namespace_suffix = parsed_url.hostname
51
+ namespace = f"{namespace_prefix}.{namespace_suffix}"
52
+ else:
53
+ namespace = namespace_prefix
54
+ return avro_namespace(namespace)
55
+
56
+ def xsd_to_avro_type(self, xsd_type: str, namespaces: dict):
57
+ """Convert a XSD type to an Avro type."""
58
+ if xsd_type in self.simple_type_map:
59
+ return self.simple_type_map[xsd_type]
60
+
61
+ # split the type on the first colon
62
+ if ':' not in xsd_type:
63
+ type_name = xsd_type
64
+ prefix = ''
65
+ else:
66
+ prefix, type_name = xsd_type.split(':', 1)
67
+ if not type_name:
68
+ type_name = prefix
69
+ prefix = ''
70
+ # find the namespace for the prefix
71
+ ns = namespaces.get(XSD_NAMESPACE, '')
72
+ if ns == prefix:
73
+ base_type_map = {
74
+ 'string': 'string',
75
+ 'int': 'int',
76
+ 'integer': 'int',
77
+ 'long': 'long',
78
+ 'short': 'int',
79
+ 'decimal': {'type': 'bytes', 'logicalType': 'decimal', 'precision': 32, 'scale': 6},
80
+ 'float': 'float',
81
+ 'double': 'double',
82
+ 'boolean': 'boolean',
83
+ 'byte': 'int',
84
+ 'date': {'type': 'int', 'logicalType': 'date'},
85
+ 'dateTime': {'type': 'long', 'logicalType': 'timestamp-millis'},
86
+ 'time': {'type': 'int', 'logicalType': 'time-millis'},
87
+ 'duration': {'type': 'int', 'logicalType': 'duration'},
88
+ 'gYear': {'type': 'string'},
89
+ 'gYearMonth': {'type': 'string'},
90
+ 'gMonth': {'type': 'string'},
91
+ 'gMonthDay': {'type': 'string'},
92
+ 'gDay': {'type': 'string'},
93
+ 'nonNegativeInteger': 'int',
94
+ 'positiveInteger': 'int',
95
+ 'unsignedInt': 'int',
96
+ 'unsignedShort': 'int',
97
+ 'unsignedByte': 'int',
98
+ 'unsignedLong': 'long',
99
+ 'yearMonthDuration': {'type': 'string', 'logicalType': 'duration'},
100
+ 'dayTimeDuration': {'type': 'string', 'logicalType': 'duration'},
101
+ 'dateTimeStamp': {'type': 'long', 'logicalType': 'timestamp-millis'},
102
+ 'hexBinary': 'bytes',
103
+ 'base64Binary': 'bytes',
104
+ 'anyURI': 'string',
105
+ 'normalizedString': 'string',
106
+ 'token': 'string',
107
+ 'language': 'string',
108
+ 'Name': 'string',
109
+ 'NCName': 'string',
110
+ 'ENTITY': 'string',
111
+ 'ENTITIES': 'string',
112
+ 'ID': 'string',
113
+ 'IDREF': 'string',
114
+ 'IDREFS': 'string',
115
+ 'NMTOKEN': 'string',
116
+ 'NMTOKENS': 'string',
117
+ 'QName': 'string',
118
+ 'NOTATION': 'string'
119
+ }
120
+ return base_type_map.get(type_name, self.avro_namespace+'.'+type_name)
121
+ else:
122
+ return self.avro_namespace+'.'+type_name
123
+
124
+ def process_element(self, element: ET.Element, namespaces: dict, dependencies: list):
125
+ """Process an element in the XSD schema."""
126
+ name = element.get('name')
127
+ type_value = element.get('type', '')
128
+ if type_value:
129
+ avro_type = self.xsd_to_avro_type(type_value, namespaces)
130
+ if not type_value.startswith(f'{namespaces[XSD_NAMESPACE]}:') and type_value not in self.simple_type_map.keys():
131
+ dependencies.append(avro_type if isinstance(
132
+ avro_type, str) else avro_type.get('namespace')+'.'+avro_type.get('name'))
133
+ dependencies = list(set(dependencies))
134
+ else:
135
+ complex_type = element.find(
136
+ f'{{{XSD_NAMESPACE}}}complexType', namespaces)
137
+ if complex_type is not None:
138
+ complex_type.set('name', name)
139
+ avro_type = self.process_complex_type(complex_type, namespaces)
140
+ else:
141
+ simple_type = element.find(
142
+ f'{{{XSD_NAMESPACE}}}simpleType', namespaces)
143
+ if simple_type is not None:
144
+ add_to_schema, simple_type_type = self.process_simple_type(
145
+ simple_type, namespaces)
146
+ if add_to_schema:
147
+ avro_type = simple_type_type
148
+ else:
149
+ avro_type = self.simple_type_map[name]
150
+ else:
151
+ raise ValueError('element must have a type or complexType')
152
+
153
+ max_occurs = element.get('maxOccurs')
154
+ if max_occurs is not None and max_occurs != '1':
155
+ avro_type = {'type': 'array', 'items': avro_type}
156
+ min_occurs = element.get('minOccurs')
157
+ if min_occurs is not None and min_occurs == '0':
158
+ avro_type = ['null', avro_type]
159
+ avro_field = {'name': name, 'type': avro_type}
160
+ annotation = element.find(f'{{{XSD_NAMESPACE}}}annotation', namespaces)
161
+ if annotation is not None:
162
+ documentation = annotation.find(
163
+ f'{{{XSD_NAMESPACE}}}documentation', namespaces)
164
+ if documentation is not None and documentation.text is not None:
165
+ avro_field['doc'] = documentation.text.strip()
166
+ return avro_field
167
+
168
+ def process_complex_type(self, complex_type: ET.Element, namespaces: dict) -> dict | str:
169
+ """ Process a complex type in the XSD schema."""
170
+ dependencies: List[str] = []
171
+ avro_type: dict = {
172
+ 'type': 'record',
173
+ 'name': complex_type.attrib.get('name'),
174
+ 'namespace': self.avro_namespace,
175
+ 'fields': []
176
+ }
177
+ avro_doc = ''
178
+ annotation = complex_type.find(
179
+ f'{{{XSD_NAMESPACE}}}annotation', namespaces)
180
+ if annotation is not None:
181
+ documentation = annotation.find(
182
+ f'{{{XSD_NAMESPACE}}}documentation', namespaces)
183
+ if documentation is not None and documentation.text is not None:
184
+ avro_doc = documentation.text.strip()
185
+ avro_type['doc'] = avro_doc
186
+ fields = []
187
+ for sequence in complex_type.findall(f'{{{XSD_NAMESPACE}}}sequence', namespaces):
188
+ for el in sequence.findall(f'{{{XSD_NAMESPACE}}}element', namespaces):
189
+ field = self.process_element(el, namespaces, dependencies)
190
+ field['xmlkind'] = 'element'
191
+ fields.append(field)
192
+ if sequence.findall(f'{{{XSD_NAMESPACE}}}any', namespaces):
193
+ fields.append({"name": "any", "xmlkind": "any", "type": generic_type()})
194
+ for all_types in complex_type.findall(f'{{{XSD_NAMESPACE}}}all', namespaces):
195
+ for el in all_types.findall(f'{{{XSD_NAMESPACE}}}element', namespaces):
196
+ field = self.process_element(el, namespaces, dependencies)
197
+ field['xmlkind'] = 'element'
198
+ fields.append(field)
199
+ for choice in complex_type.findall(f'{{{XSD_NAMESPACE}}}choice', namespaces):
200
+ choices: list = []
201
+ for el in choice.findall(f'{{{XSD_NAMESPACE}}}element', namespaces):
202
+ deps: List[str] = []
203
+ choice_field = self.process_element(el, namespaces, deps)
204
+ choice_field['xmlkind'] = 'element'
205
+ choice_record = {
206
+ 'type': 'record',
207
+ 'name': f'{complex_type.attrib.get("name")}_{choice_field["name"]}',
208
+ 'fields': [choice_field],
209
+ 'namespace': self.avro_namespace
210
+ }
211
+ if avro_doc:
212
+ choice_record['doc'] = avro_doc
213
+ choices.append(choice_record)
214
+ dependencies.extend(deps)
215
+ dependencies = list(set(dependencies))
216
+ choices_field = {
217
+ 'name': f'{complex_type.attrib.get("name")}',
218
+ 'type': choices
219
+ }
220
+ fields.append(choices_field)
221
+ for attribute in complex_type.findall(f'.{{{XSD_NAMESPACE}}}attribute', namespaces):
222
+ field = self.process_element(attribute, namespaces, dependencies)
223
+ field['xmlkind'] = 'attribute'
224
+ fields.append(field)
225
+ for el in complex_type.findall(f'{{{XSD_NAMESPACE}}}simpleContent', namespaces):
226
+ simple_content = el.find(
227
+ f'{{{XSD_NAMESPACE}}}extension', namespaces)
228
+ if simple_content is not None:
229
+ base_type = simple_content.attrib.get('base')
230
+ if base_type:
231
+ fields.append(
232
+ {"name": "value", "type": self.xsd_to_avro_type(base_type, namespaces)})
233
+ for se in simple_content.findall(f'{{{XSD_NAMESPACE}}}attribute', namespaces):
234
+ field = self.process_element(se, namespaces, dependencies)
235
+ field['xmlkind'] = 'attribute'
236
+ fields.append(field)
237
+ else:
238
+ raise ValueError("No base found in simpleContent")
239
+
240
+ avro_type['fields'] = fields
241
+ if dependencies:
242
+ avro_type['dependencies'] = dependencies
243
+ return avro_type
244
+
245
+ def process_simple_type(self, simple_type: ET.Element, namespaces: dict) -> Tuple[bool, dict | str]:
246
+ """ Process a simple type in the XSD schema. """
247
+ type_name = simple_type.attrib.get('name')
248
+ if not type_name:
249
+ raise ValueError("SimpleType must have a name")
250
+ avro_doc = ''
251
+ annotation = simple_type.find(
252
+ f'{{{XSD_NAMESPACE}}}annotation', namespaces)
253
+ if annotation is not None:
254
+ documentation = annotation.find(
255
+ f'{{{XSD_NAMESPACE}}}documentation', namespaces)
256
+ if documentation is not None and documentation.text is not None:
257
+ avro_doc = documentation.text.strip()
258
+
259
+ for restriction in simple_type.findall(f'{{{XSD_NAMESPACE}}}restriction', namespaces):
260
+ base_type = restriction.get('base')
261
+ enums: List[str] = [el.attrib.get('value', 'Empty') for el in restriction.findall(
262
+ f'{{{XSD_NAMESPACE}}}enumeration', namespaces)]
263
+ # if any of the enum entries start with a digit, we need to prefix the entry with _
264
+ if enums:
265
+ for i, enum in enumerate(enums):
266
+ if enums[i][0].isdigit():
267
+ enums[i] = '_'+enum
268
+ enum_type = {
269
+ 'type': 'enum',
270
+ 'name': simple_type.attrib.get('name'),
271
+ 'namespace': self.avro_namespace,
272
+ 'symbols': enums
273
+ }
274
+ if avro_doc:
275
+ enum_type['doc'] = avro_doc
276
+ return True, enum_type
277
+ elif base_type:
278
+ # if the baseType is a decimal, get the precision and scale sub-element value attributes to set the logicalType
279
+ if base_type == namespaces[XSD_NAMESPACE]+':'+'decimal':
280
+ precision = restriction.find(
281
+ f'{{{XSD_NAMESPACE}}}totalDigits', namespaces)
282
+ scale = restriction.find(
283
+ f'{{{XSD_NAMESPACE}}}fractionDigits', namespaces)
284
+ logical_type = {
285
+ 'type': 'bytes',
286
+ 'logicalType': 'decimal',
287
+ 'precision': int(precision.attrib.get('value', 32)) if isinstance(precision, ET.Element) else 32,
288
+ 'scale': int(scale.attrib.get('value', 6)) if isinstance(scale, ET.Element) else 6,
289
+ }
290
+ if avro_doc:
291
+ logical_type['doc'] = avro_doc
292
+ self.simple_type_map[type_name] = logical_type
293
+ return False, logical_type
294
+ else:
295
+ self.simple_type_map[type_name] = self.xsd_to_avro_type(
296
+ base_type, namespaces)
297
+ return False, self.simple_type_map[type_name]
298
+ raise ValueError("No content found in simple type")
299
+
300
+ def process_top_level_element(self, element: ET.Element, namespaces: dict):
301
+ """ Process a top level element in the XSD schema. """
302
+ dependencies: List[str] = []
303
+ avro_type: dict = {
304
+ 'type': 'record',
305
+ 'name': 'Root',
306
+ 'namespace': self.avro_namespace,
307
+ 'xmlns': self.xml_namespace,
308
+ 'fields': []
309
+ }
310
+ annotation = element.find(f'{{{XSD_NAMESPACE}}}annotation', namespaces)
311
+ if annotation is not None:
312
+ documentation = annotation.find(
313
+ f'{{{XSD_NAMESPACE}}}documentation', namespaces)
314
+ if documentation is not None and documentation.text is not None:
315
+ avro_type['doc'] = documentation.text.strip()
316
+
317
+ if 'type' in element.attrib:
318
+ field = self.process_element(element, namespaces, dependencies)
319
+ field['xmlkind'] = 'element'
320
+ avro_type['fields'].append(field)
321
+ if dependencies:
322
+ avro_type['dependencies'] = dependencies
323
+ return avro_type
324
+ else:
325
+ complex_type = element.find(
326
+ f'{{{XSD_NAMESPACE}}}complexType', namespaces)
327
+ if complex_type is None:
328
+ raise ValueError(
329
+ 'top level element must have a type or be complexType')
330
+ complex_type.set('name', element.get('name', ''))
331
+ avro_complex_type = self.process_complex_type(
332
+ complex_type, namespaces)
333
+ return avro_complex_type
334
+
335
+ def extract_xml_namespaces(self, xml_str: str):
336
+ """ Extract XML namespaces from an XML string."""
337
+ # This regex finds all xmlns:prefix="uri" declarations
338
+ pattern = re.compile(r'xmlns:([\w]+)="([^"]+)"')
339
+ namespaces = {m.group(2): m.group(1)
340
+ for m in pattern.finditer(xml_str)}
341
+ return namespaces
342
+
343
+ def xsd_to_avro(self, xsd_path: str, code_namespace: str | None = None):
344
+ """ Convert XSD to Avro schema. """
345
+ # load the XSD file into a string
346
+ with open(xsd_path, 'r', encoding='utf-8') as f:
347
+ xsd = f.read()
348
+
349
+ namespaces = self.extract_xml_namespaces(xsd)
350
+ root = ET.fromstring(xsd)
351
+ target_namespace = root.get('targetNamespace')
352
+ if target_namespace is None:
353
+ raise ValueError('targetNamespace not found')
354
+ self.xml_namespace = target_namespace
355
+ if not code_namespace:
356
+ self.avro_namespace = self.xsd_targetnamespace_to_avro_namespace(target_namespace)
357
+ else:
358
+ self.avro_namespace = code_namespace
359
+ ET.register_namespace(namespaces[XSD_NAMESPACE], XSD_NAMESPACE)
360
+ avro_schema: List[dict | list | str] = []
361
+
362
+ for simple_type in root.findall(f'{{{XSD_NAMESPACE}}}simpleType', namespaces):
363
+ add_to_schema, simple_type_type = self.process_simple_type(
364
+ simple_type, namespaces)
365
+ # we only want to append simple types if they are not resolved to one of the base types
366
+ if add_to_schema:
367
+ avro_schema.append(simple_type_type)
368
+ for complex_type in root.findall(f'{{{XSD_NAMESPACE}}}complexType', namespaces):
369
+ avro_schema.append(self.process_complex_type(
370
+ complex_type, namespaces))
371
+
372
+ top_level_elements = root.findall(
373
+ f'{{{XSD_NAMESPACE}}}element', namespaces)
374
+ if len(top_level_elements) == 1:
375
+ record = self.process_top_level_element(
376
+ top_level_elements[0], namespaces)
377
+ inline_dependencies_of(avro_schema, record)
378
+ return record
379
+ for element in top_level_elements:
380
+ avro_schema.append(self.process_top_level_element(
381
+ element, namespaces))
382
+
383
+ avro_schema = sort_messages_by_dependencies(avro_schema)
384
+ if len(avro_schema) == 1:
385
+ return avro_schema[0]
386
+ else:
387
+ return avro_schema
388
+
389
+ def convert_xsd_to_avro(self, xsd_path: str, avro_path: str, namespace: str | None = None):
390
+ """Convert XSD to Avro schema and write to a file."""
391
+
392
+
393
+ avro_schema = self.xsd_to_avro(xsd_path, code_namespace=namespace)
394
+ with open(avro_path, 'w', encoding='utf-8') as f:
395
+ json.dump(avro_schema, f, indent=4)
396
+
397
+
398
+ def convert_xsd_to_avro(xsd_path: str, avro_path: str, namespace: str | None = None):
399
+ """
400
+ Convert XSD to Avro schema and write to a file.
401
+
402
+ Params:
403
+ xsd_path: str - Path to the XSD file.
404
+ avro_path: str - Path to the Avro file.
405
+ namespace: str | None - Namespace of the Avro schema.
406
+ """
407
+
408
+ if not os.path.exists(xsd_path):
409
+ raise FileNotFoundError(f"XSD file not found at {xsd_path}")
410
+ if not namespace:
411
+ namespace = os.path.splitext(os.path.basename(xsd_path))[0].lower().replace('-', '_')
412
+ xsd_to_avro = XSDToAvro()
413
+ xsd_to_avro.convert_xsd_to_avro(xsd_path, avro_path, namespace)
@@ -0,0 +1,107 @@
1
+ Metadata-Version: 2.4
2
+ Name: structurize
3
+ Version: 2.19.0
4
+ Summary: Tools to convert from and to JSON Structure from various other schema languages.
5
+ Author-email: Clemens Vasters <clemensv@microsoft.com>
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.10
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: jsonschema>=4.23.0
13
+ Requires-Dist: lark>=1.1.9
14
+ Requires-Dist: pyarrow>=22.0.0
15
+ Requires-Dist: asn1tools>=0.167.0
16
+ Requires-Dist: jsonpointer>=3.0.0
17
+ Requires-Dist: jsonpath-ng>=1.6.1
18
+ Requires-Dist: jsoncomparison>=1.1.0
19
+ Requires-Dist: requests>=2.32.3
20
+ Requires-Dist: azure-kusto-data>=5.0.5
21
+ Requires-Dist: azure-identity>=1.17.1
22
+ Requires-Dist: datapackage>=1.15.4
23
+ Requires-Dist: jinja2>=3.1.4
24
+ Requires-Dist: pyiceberg>=0.10.0
25
+ Requires-Dist: pandas>=2.2.2
26
+ Requires-Dist: docker>=7.1.0
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest>=8.3.2; extra == "dev"
29
+ Requires-Dist: fastavro>=1.9.5; extra == "dev"
30
+ Requires-Dist: xmlschema>=3.3.2; extra == "dev"
31
+ Requires-Dist: xmlunittest>=1.0.1; extra == "dev"
32
+ Requires-Dist: pylint>=3.2.6; extra == "dev"
33
+ Requires-Dist: dataclasses_json>=0.6.7; extra == "dev"
34
+ Requires-Dist: dataclasses>=0.8; extra == "dev"
35
+ Requires-Dist: pydantic>=2.8.2; extra == "dev"
36
+ Requires-Dist: avro>=1.12.0; extra == "dev"
37
+ Requires-Dist: testcontainers>=4.7.2; extra == "dev"
38
+ Requires-Dist: pymysql>=1.1.1; extra == "dev"
39
+ Requires-Dist: psycopg2>=2.9.9; extra == "dev"
40
+ Requires-Dist: pyodbc>=5.1.0; extra == "dev"
41
+ Requires-Dist: pymongo>=4.8.0; extra == "dev"
42
+ Requires-Dist: oracledb>=2.3.0; extra == "dev"
43
+ Requires-Dist: cassandra-driver>=3.29.1; extra == "dev"
44
+ Requires-Dist: sqlalchemy>=2.0.32; extra == "dev"
45
+ Dynamic: license-file
46
+
47
+ # Structurize / Avrotize
48
+
49
+ **Structurize** is a powerful schema conversion toolkit that helps you transform between various schema formats including JSON Schema, JSON Structure, Avro Schema, Protocol Buffers, XSD, SQL, and many more.
50
+
51
+ This package is published under two names:
52
+
53
+ - **`structurize`** - The primary package name, emphasizing JSON Structure conversion capabilities
54
+ - **`avrotize`** - The original package name, emphasizing Avro Schema conversion capabilities
55
+
56
+ Both packages currently share the same features and codebase. However, in future releases, Avro-focused and JSON Structure-focused features may be split across the two tools to make the feature list more manageable and focused for users. Choose whichever variant better aligns with your primary use case.
57
+
58
+ ## Quick Start
59
+
60
+ Install the package:
61
+
62
+ ```bash
63
+ pip install structurize
64
+ ```
65
+
66
+ or
67
+
68
+ ```bash
69
+ pip install avrotize
70
+ ```
71
+
72
+ Use the CLI:
73
+
74
+ ```bash
75
+ # Using structurize
76
+ structurize --help
77
+
78
+ # Or using avrotize
79
+ avrotize --help
80
+ ```
81
+
82
+ ## Key Features
83
+
84
+ - Convert between JSON Schema, JSON Structure, and Avro Schema
85
+ - Transform schemas to and from Protocol Buffers, XSD, ASN.1
86
+ - Generate code in C#, Python, TypeScript, Java, Go, Rust, C++, JavaScript
87
+ - Export schemas to SQL databases (MySQL, PostgreSQL, SQL Server, Oracle, Cassandra, MongoDB, DynamoDB, and more)
88
+ - Convert to Parquet, Iceberg, Kusto, and other data formats
89
+ - Generate documentation in Markdown
90
+
91
+ ## Documentation
92
+
93
+ For complete documentation, examples, and detailed usage instructions, please see the main repository:
94
+
95
+ **[📖 Full Documentation](https://github.com/clemensv/avrotize)**
96
+
97
+ The main README includes:
98
+
99
+ - Comprehensive command reference
100
+ - Conversion examples and use cases
101
+ - Code generation guides
102
+ - Database schema export instructions
103
+ - API documentation
104
+
105
+ ## License
106
+
107
+ MIT License - see the [LICENSE](../LICENSE) file in the repository root.
@@ -0,0 +1,70 @@
1
+ avrotize/__init__.py,sha256=JjPSX7c686TV00J_x0Py9JwXS0aJl8vpLn81Y0ondkw,3606
2
+ avrotize/__main__.py,sha256=5pY8dYAURcOnFRvgb6fgaOIa_SOzPLIWbU8-ZTQ0jG4,88
3
+ avrotize/_version.py,sha256=whWmGdYhR_wJ7JC69oWgfcPaP80_UeZLbA-k7pBts8I,714
4
+ avrotize/asn1toavro.py,sha256=QDNwfBfXMxSH-k487CA3CaGCGDzOLs4PpVbbENm5uF0,8386
5
+ avrotize/avrotize.py,sha256=VHFpBltMVBpyt0ju3ZWW725BKjQ4Fk-nrAy8udW-X44,5713
6
+ avrotize/avrotocpp.py,sha256=hRZV247_TDD7Sm6_8sFx-UH5SueLLx2Wg6TvAVUX0iE,25693
7
+ avrotize/avrotocsharp.py,sha256=YpJRci_UoZ2tSpl3etsCZ_tSkn2YCwXUpnGbcxSck5M,65404
8
+ avrotize/avrotocsv.py,sha256=PaDEW2aGRFVNLwewWhJ3OwxbKFI3PBg_mTgtT4uLMko,3689
9
+ avrotize/avrotodatapackage.py,sha256=zSCphLvCYiBKRAUCdccsr-4JysH3PyAS6fSgwa65Tss,7259
10
+ avrotize/avrotodb.py,sha256=5fNJgz00VMimyOl7eI0lIxlcaN_JnN0mb2Q9lzCRecw,46989
11
+ avrotize/avrotogo.py,sha256=RnycgAuGejq00hDdsUGdMHiJX6nr0VAqNArbCkTzUMg,21880
12
+ avrotize/avrotographql.py,sha256=i6G7xWjH_Lsn_CLiM4BCPb8OyZuCCpsYjXwXNTRMwEE,7394
13
+ avrotize/avrotoiceberg.py,sha256=plVHGWkED1YDLcMDxL7NMdJl2f8G32hwlNWFrBLcsD8,9057
14
+ avrotize/avrotojava.py,sha256=_G_67xi1H0Ctj9KagiCnVNETvyPicOYO8ASvz6e1XYE,131861
15
+ avrotize/avrotojs.py,sha256=QjB6XjFnDrpZBZrrWqS0TN8fQfRXBfhHabfG73FOIo8,12249
16
+ avrotize/avrotojsons.py,sha256=WXWniQqwcl8eU35VibDv7qJJwbiLV_yoWZ4JxiZ8mHA,21588
17
+ avrotize/avrotojstruct.py,sha256=-Hs4Ta958bRKmOfSTzRFENABCZ6lQPSPbIBEXvOQD1M,14660
18
+ avrotize/avrotokusto.py,sha256=D6AiRPa5uiZbqo9dqrFvAknsF5oNXgIzk8_08uZTZ2M,17636
19
+ avrotize/avrotomd.py,sha256=WHPHnfmkI3xDNIHKZ3ReYxj6tib1eCny3JOznNSN6r8,5348
20
+ avrotize/avrotools.py,sha256=dTbGgWQyKdSuvCf4yuoymwhYO5gX9ywPu-klIXYwKZM,6052
21
+ avrotize/avrotoparquet.py,sha256=qm5hfia5elW1Yn4KACG8bbudLAqQSwGk3fIkTvdT5Rg,9088
22
+ avrotize/avrotoproto.py,sha256=STqbdGjVrgKrlKXt-6dZlekW_Oq0W0StRx80St1XqIc,22486
23
+ avrotize/avrotopython.py,sha256=sPsSLseSq-toKHnsFsYRRtGePGYospRz2mwGLep-POw,31147
24
+ avrotize/avrotorust.py,sha256=QMIBNkFpDlH44kuQo24k5D-f0lmdhoA5b7hEbhKsnMw,22214
25
+ avrotize/avrotots.py,sha256=EalaA9feiHMyw7lfD8rifs24LsUVigmuPOu7nPh-wUc,29002
26
+ avrotize/avrotoxsd.py,sha256=iGQq_8kC0kfKsqvqS6s_mO-kJ8N5G8vXOwqRI_DZUxc,17744
27
+ avrotize/cddltostructure.py,sha256=MA2c-P3CIEAxEaBX-FF299gR55xcLEV3FrfTr2QfayM,74491
28
+ avrotize/commands.json,sha256=afZ36gTItJdtM8z-qOTOw0MazKz1WKZMztY86JUHcgU,88538
29
+ avrotize/common.py,sha256=enqNR1I9-SbW7fNJE3w7N2R87kiN6_9Oa7VB4b2AUBc,31913
30
+ avrotize/constants.py,sha256=SvI_WLdiESe05snUfqD_jF6UWjE_2eAzGyLeRkK2WMg,2214
31
+ avrotize/csvtoavro.py,sha256=TuIYm_Xv8gioEHl1YgWQKOYkFGGHfuwmK5RuEAEXbt8,4293
32
+ avrotize/datapackagetoavro.py,sha256=lw1S3H6UpKtjJj9ywDTuRw-qcihFx0vFJNPK7SlgKvY,2607
33
+ avrotize/dependency_resolver.py,sha256=LGOTutpobJ4kMjAwvs-l0Mt2tEoZFaXCazs-u38qnYk,19374
34
+ avrotize/dependency_version.py,sha256=tvbpO2VstTSTmNA5jbzQl48u6jnIM7BHyASQrrgsRYU,16844
35
+ avrotize/jsonstoavro.py,sha256=ZzigsCjAxw_TflXCjTLKHTrPmkiZRZMpuaZICfT_r_I,120069
36
+ avrotize/jsonstostructure.py,sha256=WVoKcFoDvtA_QIScZJa8oXZbwpc7Au-bTV3_pUXzTik,135445
37
+ avrotize/jstructtoavro.py,sha256=sOq7Ru1b8_ZLCEsnBqx3gsMWd7dPAaYxoraAD0bz6rk,33891
38
+ avrotize/kstructtoavro.py,sha256=t97JY22n0uILK3WcvQu_Yp9ONvouJRLAC2bZ3rvZ1L0,2856
39
+ avrotize/kustotoavro.py,sha256=1oEk9mbqmP3N5-V7mBHSXpbSlYFzjJ7ajIDNJZxA1r8,21756
40
+ avrotize/parquettoavro.py,sha256=iAPrSYNkiH3fBKNVDfIgeXkQbAiopa252ULJrGgmBDI,5531
41
+ avrotize/proto2parser.py,sha256=__9R3cqiUJXc_efPCZZcF7rt18kA7mfhmX0qm2v0eSw,19742
42
+ avrotize/proto3parser.py,sha256=MfE84c-oAWWuzYmKlEZ5g5LUF7YzZaASFh2trX3UCaw,15604
43
+ avrotize/prototoavro.py,sha256=hqXBGRxYojaEbEgoHZxXwMG4R1nWC7UMl_XNLWfqH38,17346
44
+ avrotize/structuretocddl.py,sha256=RK_dTJf0oAo6BIBM48NHRcWC96OtUjlgUC6HzXs5Lkk,21234
45
+ avrotize/structuretocpp.py,sha256=tBWOvyZPYQ1CHN6RgDnWlmzJ1giOyQ9SlHBHWvhPyiw,35898
46
+ avrotize/structuretocsharp.py,sha256=Y5TLMxUA0yt9QlXb8L9mSKFhLGmJkanwsg0yo0QfugI,122209
47
+ avrotize/structuretocsv.py,sha256=w9cwXAnnakKaeTtXsLWWO8KwYnXUxyXvC7a-ZKs-E94,13851
48
+ avrotize/structuretodatapackage.py,sha256=NEHRt30KfVDWH1EQakvuMdRZTtfVXx8fsaYud0ofb2g,29768
49
+ avrotize/structuretodb.py,sha256=3QE_TCdNklGH5ymzGsEnX1sI4OhvX2AYKPH7xtR5tHk,43926
50
+ avrotize/structuretogo.py,sha256=VCEUz-5J8uRqX1hWaTimtfVzEsIB-gs4wxa308rYD0s,32470
51
+ avrotize/structuretographql.py,sha256=wcGXnrup5v5saRa1BhR6o-X8q8ujsQMVqrFHQTBPjww,20468
52
+ avrotize/structuretoiceberg.py,sha256=itKb33Kj-7-udk4eHTLmTEasIeh1ggpZ3e_bwCxLABM,15344
53
+ avrotize/structuretojava.py,sha256=jG2Vcf1KdezWrZo5lsecxLnmnMw1rA96uOxVWJQ4Rso,43372
54
+ avrotize/structuretojsons.py,sha256=PJrQBaf6yQHu5eFkePxbjPBEmL-fYfX2wj6OmH1jsWw,22495
55
+ avrotize/structuretokusto.py,sha256=rOKgYIcm7ZK8RS-VvMFPNzPzwtv7c4dIKU-fKjrJLyM,30618
56
+ avrotize/structuretomd.py,sha256=exfCldYbieVdduhotSoLrxsbphmyJQyeQso9qv4qyUw,13642
57
+ avrotize/structuretoproto.py,sha256=Aq0-fwMXSjjAxgZ5mq1kpo_TauigMRrJK9LNyoN-YGs,42679
58
+ avrotize/structuretopython.py,sha256=ePlXrwbqA9r63Vw6xL-Gq3hBUScdcF9aqCQSi_xtaGo,37980
59
+ avrotize/structuretorust.py,sha256=ChRmO7uzU-pMdDdS0Vtg-MVUaOaNhNUPwH-ZKKOHglU,35134
60
+ avrotize/structuretots.py,sha256=PLV6W8k-yd7xkspUaQ-Vj90F26PTkB5HO0OkPJolkJ0,30800
61
+ avrotize/structuretoxsd.py,sha256=01VpasyWSMOx04sILHLP7H-WkhGdXAEGKohUUfgrNf0,32797
62
+ avrotize/xsdtoavro.py,sha256=nQtNH_3pEZBp67oUCPqzhvItEExHTe-8obsIfNRXt8Y,19064
63
+ avrotize/dependencies/cpp/vcpkg/vcpkg.json,sha256=se5qnUVQ1Q6wN_DqgIioqKg_y7ouh9oly2iBAJJXkgw,414
64
+ avrotize/dependencies/typescript/node22/package.json,sha256=qAJ_dHE0YefuIRqkuN5tsXsHWSL8u6qZGRbYwVB_zq0,388
65
+ structurize-2.19.0.dist-info/licenses/LICENSE,sha256=xGtQGygTETTtDQJafZCUbpsed3GxO6grmqig-jGEuSk,11348
66
+ structurize-2.19.0.dist-info/METADATA,sha256=9e5yUdsXaK2Vy7TPQCPfHhbzqlQfOfwxTgHoIgG7WGc,3670
67
+ structurize-2.19.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ structurize-2.19.0.dist-info/entry_points.txt,sha256=biIH7jA5auhVqfbwHVk2gmD_gvrPYKgjpCAn0JWZ-Rs,55
69
+ structurize-2.19.0.dist-info/top_level.txt,sha256=yn-yQ0Cm1O9fbF8KJgv4IIvX4YRGelKgPqZF1wS5P50,9
70
+ structurize-2.19.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ structurize = avrotize.avrotize:main