structurize 2.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avrotize/__init__.py +64 -0
- avrotize/__main__.py +6 -0
- avrotize/_version.py +34 -0
- avrotize/asn1toavro.py +160 -0
- avrotize/avrotize.py +152 -0
- avrotize/avrotocpp.py +483 -0
- avrotize/avrotocsharp.py +1075 -0
- avrotize/avrotocsv.py +121 -0
- avrotize/avrotodatapackage.py +173 -0
- avrotize/avrotodb.py +1383 -0
- avrotize/avrotogo.py +476 -0
- avrotize/avrotographql.py +197 -0
- avrotize/avrotoiceberg.py +210 -0
- avrotize/avrotojava.py +2156 -0
- avrotize/avrotojs.py +250 -0
- avrotize/avrotojsons.py +481 -0
- avrotize/avrotojstruct.py +345 -0
- avrotize/avrotokusto.py +364 -0
- avrotize/avrotomd.py +137 -0
- avrotize/avrotools.py +168 -0
- avrotize/avrotoparquet.py +208 -0
- avrotize/avrotoproto.py +359 -0
- avrotize/avrotopython.py +624 -0
- avrotize/avrotorust.py +435 -0
- avrotize/avrotots.py +598 -0
- avrotize/avrotoxsd.py +344 -0
- avrotize/cddltostructure.py +1841 -0
- avrotize/commands.json +3337 -0
- avrotize/common.py +834 -0
- avrotize/constants.py +72 -0
- avrotize/csvtoavro.py +132 -0
- avrotize/datapackagetoavro.py +76 -0
- avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
- avrotize/dependencies/typescript/node22/package.json +16 -0
- avrotize/dependency_resolver.py +348 -0
- avrotize/dependency_version.py +432 -0
- avrotize/jsonstoavro.py +2167 -0
- avrotize/jsonstostructure.py +2642 -0
- avrotize/jstructtoavro.py +878 -0
- avrotize/kstructtoavro.py +93 -0
- avrotize/kustotoavro.py +455 -0
- avrotize/parquettoavro.py +157 -0
- avrotize/proto2parser.py +498 -0
- avrotize/proto3parser.py +403 -0
- avrotize/prototoavro.py +382 -0
- avrotize/structuretocddl.py +597 -0
- avrotize/structuretocpp.py +697 -0
- avrotize/structuretocsharp.py +2295 -0
- avrotize/structuretocsv.py +365 -0
- avrotize/structuretodatapackage.py +659 -0
- avrotize/structuretodb.py +1125 -0
- avrotize/structuretogo.py +720 -0
- avrotize/structuretographql.py +502 -0
- avrotize/structuretoiceberg.py +355 -0
- avrotize/structuretojava.py +853 -0
- avrotize/structuretojsons.py +498 -0
- avrotize/structuretokusto.py +639 -0
- avrotize/structuretomd.py +322 -0
- avrotize/structuretoproto.py +764 -0
- avrotize/structuretopython.py +772 -0
- avrotize/structuretorust.py +714 -0
- avrotize/structuretots.py +653 -0
- avrotize/structuretoxsd.py +679 -0
- avrotize/xsdtoavro.py +413 -0
- structurize-2.19.0.dist-info/METADATA +107 -0
- structurize-2.19.0.dist-info/RECORD +70 -0
- structurize-2.19.0.dist-info/WHEEL +5 -0
- structurize-2.19.0.dist-info/entry_points.txt +2 -0
- structurize-2.19.0.dist-info/licenses/LICENSE +201 -0
- structurize-2.19.0.dist-info/top_level.txt +1 -0
avrotize/avrotopython.py
ADDED
|
@@ -0,0 +1,624 @@
|
|
|
1
|
+
"""Converts Avro schema to Python data classes"""
|
|
2
|
+
|
|
3
|
+
# pylint: disable=line-too-long,too-many-instance-attributes
|
|
4
|
+
|
|
5
|
+
import copy
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import random
|
|
10
|
+
from typing import Dict, List, Set, Tuple, Union, Any
|
|
11
|
+
from avrotize.common import fullname, get_typing_args_from_string, is_generic_avro_type, pascal, process_template, build_flat_type_dict, inline_avro_references, is_type_with_alternate, strip_alternate_type
|
|
12
|
+
|
|
13
|
+
INDENT = ' '
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def is_python_reserved_word(word: str) -> bool:
|
|
17
|
+
"""Checks if a word is a Python reserved word"""
|
|
18
|
+
reserved_words = [
|
|
19
|
+
'False', 'None', 'True', 'and', 'as', 'assert', 'async', 'await',
|
|
20
|
+
'break', 'class', 'continue', 'def', 'del', 'elif', 'else', 'except',
|
|
21
|
+
'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is',
|
|
22
|
+
'lambda', 'nonlocal', 'not', 'or', 'pass', 'raise', 'return',
|
|
23
|
+
'try', 'while', 'with', 'yield', 'record', 'self', 'cls'
|
|
24
|
+
]
|
|
25
|
+
return word in reserved_words
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AvroToPython:
|
|
29
|
+
"""Converts Avro schema to Python data classes"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, base_package: str = '', dataclasses_json_annotation=False, avro_annotation=False) -> None:
|
|
32
|
+
self.base_package = base_package
|
|
33
|
+
self.dataclasses_json_annotation = dataclasses_json_annotation
|
|
34
|
+
self.avro_annotation = avro_annotation
|
|
35
|
+
self.output_dir = os.getcwd()
|
|
36
|
+
self.main_schema = None
|
|
37
|
+
self.type_dict = None
|
|
38
|
+
self.generated_types: Dict[str, str] = {}
|
|
39
|
+
|
|
40
|
+
def is_python_primitive(self, type_name: str) -> bool:
|
|
41
|
+
""" Checks if a type is a Python primitive type """
|
|
42
|
+
return type_name in ['None', 'bool', 'int', 'float', 'str', 'bytes']
|
|
43
|
+
|
|
44
|
+
def is_python_typing_struct(self, type_name: str) -> bool:
|
|
45
|
+
""" Checks if a type is a Python typing type """
|
|
46
|
+
return type_name.startswith('typing.Dict[') or type_name.startswith('typing.List[') or type_name.startswith('typing.Optional[') or type_name.startswith('typing.Union[') or type_name == 'typing.Any'
|
|
47
|
+
|
|
48
|
+
def safe_name(self, name: str) -> str:
|
|
49
|
+
"""Converts a name to a safe Python name"""
|
|
50
|
+
if is_python_reserved_word(name):
|
|
51
|
+
return name + "_"
|
|
52
|
+
return name
|
|
53
|
+
|
|
54
|
+
def pascal_type_name(self, ref: str) -> str:
|
|
55
|
+
"""Converts a reference to a type name"""
|
|
56
|
+
return '_'.join([pascal(part) for part in ref.split('.')[-1].split('_')])
|
|
57
|
+
|
|
58
|
+
def python_package_from_avro_type(self, namespace: str, type_name: str) -> str:
|
|
59
|
+
"""Gets the Python package from a type name"""
|
|
60
|
+
type_name_package = '.'.join([part.lower() for part in type_name.split('.')]) if '.' in type_name else type_name.lower()
|
|
61
|
+
if '.' in type_name:
|
|
62
|
+
# if the type name was already qualified, we don't need to add the namespace
|
|
63
|
+
package = type_name_package
|
|
64
|
+
else:
|
|
65
|
+
namespace_package = '.'.join([part.lower() for part in namespace.split('.')]) if namespace else ''
|
|
66
|
+
package = namespace_package + ('.' if namespace_package and type_name_package else '') + type_name_package
|
|
67
|
+
if self.base_package:
|
|
68
|
+
package = self.base_package + '.' + package
|
|
69
|
+
return package
|
|
70
|
+
|
|
71
|
+
def python_type_from_avro_type(self, type_name: str) -> str:
|
|
72
|
+
"""Gets the Python class from a type name"""
|
|
73
|
+
return self.pascal_type_name(type_name)
|
|
74
|
+
|
|
75
|
+
def python_fully_qualified_name_from_avro_type(self, namespace: str, type_name: str) -> str:
|
|
76
|
+
"""
|
|
77
|
+
Gets the fully qualified Python class name from an Avro type.
|
|
78
|
+
"""
|
|
79
|
+
package = self.python_package_from_avro_type(namespace, type_name)
|
|
80
|
+
return package + ('.' if package else '') + self.python_type_from_avro_type(type_name)
|
|
81
|
+
|
|
82
|
+
def strip_package_from_fully_qualified_name(self, fully_qualified_name: str) -> str:
|
|
83
|
+
"""Strips the package from a fully qualified name"""
|
|
84
|
+
return fully_qualified_name.split('.')[-1]
|
|
85
|
+
|
|
86
|
+
def map_plain_type_reference_to_python(self, parent_namespace: str, avro_type: str) -> Tuple[bool, str]:
|
|
87
|
+
"""
|
|
88
|
+
Maps an Avro type to a Python type
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
avro_type (str): Avro type
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Tuple[bool, str]: A tuple containing a boolean indicating
|
|
95
|
+
if the type is a primitive type and the Python type
|
|
96
|
+
"""
|
|
97
|
+
mapping = {
|
|
98
|
+
'null': 'None',
|
|
99
|
+
'boolean': 'bool',
|
|
100
|
+
'int': 'int',
|
|
101
|
+
'long': 'int',
|
|
102
|
+
'float': 'float',
|
|
103
|
+
'double': 'float',
|
|
104
|
+
'bytes': 'bytes',
|
|
105
|
+
'string': 'str',
|
|
106
|
+
}
|
|
107
|
+
if is_generic_avro_type(avro_type):
|
|
108
|
+
return True, 'typing.Any'
|
|
109
|
+
mapped = mapping.get(avro_type, None)
|
|
110
|
+
if mapped:
|
|
111
|
+
return True, mapped
|
|
112
|
+
return False, self.python_fully_qualified_name_from_avro_type(parent_namespace, avro_type)
|
|
113
|
+
|
|
114
|
+
def convert_logical_type_to_python(self, avro_type: Dict, import_types: Set[str]) -> str:
|
|
115
|
+
"""Converts Avro logical type to Python type"""
|
|
116
|
+
if avro_type['logicalType'] == 'decimal':
|
|
117
|
+
import_types.add('decimal.Decimal')
|
|
118
|
+
return 'decimal.Decimal'
|
|
119
|
+
elif avro_type['logicalType'] == 'date':
|
|
120
|
+
import_types.add('datetime.date')
|
|
121
|
+
return 'datetime.date'
|
|
122
|
+
elif avro_type['logicalType'] == 'time-millis':
|
|
123
|
+
import_types.add('datetime.time')
|
|
124
|
+
return 'datetime.time'
|
|
125
|
+
elif avro_type['logicalType'] == 'time-micros':
|
|
126
|
+
import_types.add('datetime.time')
|
|
127
|
+
return 'datetime.time'
|
|
128
|
+
elif avro_type['logicalType'] == 'timestamp-millis':
|
|
129
|
+
import_types.add('datetime.datetime')
|
|
130
|
+
return 'datetime.datetime'
|
|
131
|
+
elif avro_type['logicalType'] == 'timestamp-micros':
|
|
132
|
+
import_types.add('datetime.datetime')
|
|
133
|
+
return 'datetime.datetime'
|
|
134
|
+
elif avro_type['logicalType'] == 'duration':
|
|
135
|
+
import_types.add('datetime.timedelta')
|
|
136
|
+
return 'datetime.timedelta'
|
|
137
|
+
return 'typing.Any'
|
|
138
|
+
|
|
139
|
+
def convert_avro_type_to_python(self, avro_type: Union[str, Dict, List], parent_package: str, import_types: set) -> str:
|
|
140
|
+
"""Converts Avro type to Python type"""
|
|
141
|
+
if isinstance(avro_type, str):
|
|
142
|
+
is_primitive, mapped_type = self.map_plain_type_reference_to_python(parent_package, avro_type)
|
|
143
|
+
if not is_primitive:
|
|
144
|
+
import_types.add(mapped_type)
|
|
145
|
+
return self.pascal_type_name(mapped_type)
|
|
146
|
+
return mapped_type
|
|
147
|
+
elif isinstance(avro_type, list):
|
|
148
|
+
if is_generic_avro_type(avro_type):
|
|
149
|
+
return 'typing.Any'
|
|
150
|
+
if is_type_with_alternate(avro_type):
|
|
151
|
+
return self.convert_avro_type_to_python(strip_alternate_type(avro_type), parent_package, import_types)
|
|
152
|
+
non_null_types = [t for t in avro_type if t != 'null']
|
|
153
|
+
if len(non_null_types) == 1:
|
|
154
|
+
t = self.convert_avro_type_to_python(non_null_types[0], parent_package, import_types)
|
|
155
|
+
if 'null' in avro_type:
|
|
156
|
+
return f'typing.Optional[{t}]'
|
|
157
|
+
else:
|
|
158
|
+
return t
|
|
159
|
+
else:
|
|
160
|
+
return f"typing.Union[{', '.join(self.convert_avro_type_to_python(t, parent_package, import_types) for t in non_null_types)}]"
|
|
161
|
+
elif isinstance(avro_type, dict):
|
|
162
|
+
if avro_type['type'] == 'record':
|
|
163
|
+
class_ref = self.generate_class(avro_type, parent_package, write_file=True)
|
|
164
|
+
import_types.add(class_ref)
|
|
165
|
+
return self.strip_package_from_fully_qualified_name(class_ref)
|
|
166
|
+
elif avro_type['type'] == 'enum':
|
|
167
|
+
enum_ref = self.generate_enum(avro_type, parent_package, write_file=True)
|
|
168
|
+
import_types.add(enum_ref)
|
|
169
|
+
return self.strip_package_from_fully_qualified_name(enum_ref)
|
|
170
|
+
elif avro_type['type'] == 'array':
|
|
171
|
+
return f"typing.List[{self.convert_avro_type_to_python(avro_type['items'], parent_package, import_types)}]"
|
|
172
|
+
elif avro_type['type'] == 'map':
|
|
173
|
+
return f"typing.Dict[str,{self.convert_avro_type_to_python(avro_type['values'], parent_package, import_types)}]"
|
|
174
|
+
elif 'logicalType' in avro_type:
|
|
175
|
+
return self.convert_logical_type_to_python(avro_type, import_types)
|
|
176
|
+
return self.convert_avro_type_to_python(avro_type['type'], parent_package, import_types)
|
|
177
|
+
return 'typing.Any'
|
|
178
|
+
|
|
179
|
+
# pylint: disable=eval-used
|
|
180
|
+
def init_field_value(self, field_type: str, field_name: str, field_is_enum: bool, field_ref: str, enum_types: List[str]):
|
|
181
|
+
""" Initialize the field value based on its type. """
|
|
182
|
+
if field_type == "typing.Any":
|
|
183
|
+
return field_ref
|
|
184
|
+
elif field_type in ['datetime.datetime', 'datetime.date', 'datetime.time', 'datetime.timedelta']:
|
|
185
|
+
return f"{field_ref}"
|
|
186
|
+
elif field_type in ['int', 'str', 'float', 'bool', 'bytes', 'Decimal']:
|
|
187
|
+
return f"{field_type}({field_ref})"
|
|
188
|
+
elif field_type.startswith("typing.List["):
|
|
189
|
+
inner_type = get_typing_args_from_string(field_type)[0]
|
|
190
|
+
return f"{field_ref} if isinstance({field_ref}, list) else [{self.init_field_value(inner_type, field_name, field_is_enum, 'v', enum_types)} for v in {field_ref}] if {field_ref} else None"
|
|
191
|
+
elif field_type.startswith("typing.Dict["):
|
|
192
|
+
inner_type = get_typing_args_from_string(field_type)[1]
|
|
193
|
+
return f"{field_ref} if isinstance({field_ref}, dict) else {{k: {self.init_field_value(inner_type, field_name, field_is_enum, 'v', enum_types)} for k, v in {field_ref}.items()}} if {field_ref} else None"
|
|
194
|
+
elif field_type.startswith("typing.Optional["):
|
|
195
|
+
inner_type = get_typing_args_from_string(field_type)[0]
|
|
196
|
+
return self.init_field_value(inner_type, field_name, field_is_enum, field_ref, enum_types) + ' if ' + field_ref + ' else None'
|
|
197
|
+
elif field_type.startswith("typing.Union["):
|
|
198
|
+
return self.init_field_value_from_union(get_typing_args_from_string(field_type), field_name, field_ref, enum_types)
|
|
199
|
+
elif field_is_enum or field_type in enum_types:
|
|
200
|
+
return f"{field_type}({field_ref})"
|
|
201
|
+
else:
|
|
202
|
+
return f"{field_ref} if isinstance({field_ref}, {field_type}) else {field_type}.from_serializer_dict({field_ref}) if {field_ref} else None"
|
|
203
|
+
|
|
204
|
+
def init_field_value_from_union(self, union_args: List[str], field_name, field_ref, enum_types):
|
|
205
|
+
"""Initialize the field value based on the Union type."""
|
|
206
|
+
init_statements = []
|
|
207
|
+
for field_union_type in union_args:
|
|
208
|
+
init_statements.append(
|
|
209
|
+
f"{self.init_field_value(field_union_type, field_name, field_union_type in enum_types, field_ref, enum_types)} if isinstance({field_ref}, {field_union_type}) else")
|
|
210
|
+
return ' '.join(init_statements) + ' None'
|
|
211
|
+
|
|
212
|
+
def init_fields(self, fields: List[Dict[str, Any]], enum_types: List[str]) -> str:
|
|
213
|
+
"""Initialize the fields of a class."""
|
|
214
|
+
init_statements = []
|
|
215
|
+
for field in fields:
|
|
216
|
+
if field['is_enum'] or field['type'] in enum_types or field['is_primitive']:
|
|
217
|
+
init_statements.append(
|
|
218
|
+
f"self.{field['name']}={self.init_field_value(field['type'], field['name'], field['is_enum'], 'self.'+field['name'], enum_types)}")
|
|
219
|
+
else:
|
|
220
|
+
init_statements.append(f"value_{field['name']} = self.{field['name']}")
|
|
221
|
+
init_statements.append(
|
|
222
|
+
f"self.{field['name']} = {self.init_field_value(field['type'], field['name'], field['is_enum'], 'value_'+field['name'], enum_types)}")
|
|
223
|
+
return '\n'.join(init_statements)
|
|
224
|
+
|
|
225
|
+
def generate_class(self, avro_schema: Dict, parent_package: str, write_file: bool) -> str:
|
|
226
|
+
"""
|
|
227
|
+
Generates a Python data class from an Avro record schema
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
avro_schema (Dict): Avro record schema
|
|
231
|
+
parent_package (str): Parent package
|
|
232
|
+
write_file (bool): Write the class to a file
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
str: Python fully qualified class name
|
|
236
|
+
"""
|
|
237
|
+
|
|
238
|
+
import_types: Set[str] = set()
|
|
239
|
+
class_name = self.python_type_from_avro_type(avro_schema['name'])
|
|
240
|
+
package_name = self.python_package_from_avro_type(avro_schema.get('namespace', parent_package), avro_schema['name'])
|
|
241
|
+
python_qualified_name = self.python_fully_qualified_name_from_avro_type(avro_schema.get('namespace', parent_package), avro_schema['name'])
|
|
242
|
+
if python_qualified_name in self.generated_types:
|
|
243
|
+
return python_qualified_name
|
|
244
|
+
|
|
245
|
+
fields = [{
|
|
246
|
+
'definition': self.generate_field(field, avro_schema.get('namespace', parent_package), import_types),
|
|
247
|
+
'docstring': self.generate_field_docstring(field, avro_schema.get('namespace', parent_package))
|
|
248
|
+
} for field in avro_schema.get('fields', [])]
|
|
249
|
+
fields = [{
|
|
250
|
+
'name': self.safe_name(field['definition']['name']),
|
|
251
|
+
'original_name': field['definition']['name'],
|
|
252
|
+
'type': field['definition']['type'],
|
|
253
|
+
'is_primitive': field['definition']['is_primitive'],
|
|
254
|
+
'is_enum': field['definition']['is_enum'],
|
|
255
|
+
'docstring': field['docstring'],
|
|
256
|
+
'test_value': self.generate_test_value(field),
|
|
257
|
+
} for field in fields]
|
|
258
|
+
|
|
259
|
+
# we are including a copy of the avro schema of this type. Since that may
|
|
260
|
+
# depend on other types, we need to inline all references to other types
|
|
261
|
+
# into this schema. We use deepcopy to avoid mutating the original schema
|
|
262
|
+
# which may be shared with type_dict entries
|
|
263
|
+
local_avro_schema = inline_avro_references(copy.deepcopy(avro_schema), self.type_dict, '')
|
|
264
|
+
avro_schema_json = json.dumps(local_avro_schema).replace('\\"', '\'').replace('"', '\\"')
|
|
265
|
+
enum_types = []
|
|
266
|
+
for import_type in import_types:
|
|
267
|
+
if import_type in self.generated_types and self.generated_types[import_type] == "enum":
|
|
268
|
+
enum_types.append(self.strip_package_from_fully_qualified_name(import_type))
|
|
269
|
+
|
|
270
|
+
class_definition = process_template(
|
|
271
|
+
"avrotopython/dataclass_core.jinja",
|
|
272
|
+
class_name=class_name,
|
|
273
|
+
docstring=avro_schema.get('doc', '').strip() if 'doc' in avro_schema else f'A {class_name} record.',
|
|
274
|
+
fields=fields,
|
|
275
|
+
import_types=import_types,
|
|
276
|
+
base_package=self.base_package,
|
|
277
|
+
avro_annotation=self.avro_annotation,
|
|
278
|
+
dataclasses_json_annotation=self.dataclasses_json_annotation,
|
|
279
|
+
avro_schema_json=avro_schema_json,
|
|
280
|
+
init_fields=self.init_fields(fields, enum_types),
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
if write_file:
|
|
284
|
+
self.write_to_file(package_name, class_name, class_definition)
|
|
285
|
+
self.generate_test_class(package_name, class_name, fields, import_types)
|
|
286
|
+
self.generated_types[python_qualified_name] = 'class'
|
|
287
|
+
return python_qualified_name
|
|
288
|
+
|
|
289
|
+
def generate_enum(self, avro_schema: Dict, parent_package: str, write_file: bool) -> str:
|
|
290
|
+
"""
|
|
291
|
+
Generates a Python enum from an Avro enum schema
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
avro_schema (Dict): Avro enum schema
|
|
295
|
+
parent_package (str): Parent package
|
|
296
|
+
write_file (bool): Write the enum to a file
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
str: Python fully qualified enum name
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
class_name = self.python_type_from_avro_type(avro_schema['name'])
|
|
303
|
+
package_name = self.python_package_from_avro_type(avro_schema.get('namespace', parent_package), avro_schema['name'])
|
|
304
|
+
python_qualified_name = self.python_fully_qualified_name_from_avro_type(avro_schema.get('namespace', parent_package), avro_schema['name'])
|
|
305
|
+
if python_qualified_name in self.generated_types:
|
|
306
|
+
return python_qualified_name
|
|
307
|
+
|
|
308
|
+
symbols = [symbol if not is_python_reserved_word(
|
|
309
|
+
symbol) else symbol + "_" for symbol in avro_schema.get('symbols', [])]
|
|
310
|
+
ordinals = avro_schema.get('ordinals', {})
|
|
311
|
+
|
|
312
|
+
enum_definition = process_template(
|
|
313
|
+
"avrotopython/enum_core.jinja",
|
|
314
|
+
class_name=class_name,
|
|
315
|
+
docstring=avro_schema.get('doc', '').strip(
|
|
316
|
+
) if 'doc' in avro_schema else f'A {class_name} enum.',
|
|
317
|
+
symbols=symbols,
|
|
318
|
+
ordinals=ordinals
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
if write_file:
|
|
322
|
+
self.write_to_file(package_name, class_name, enum_definition)
|
|
323
|
+
self.generate_test_enum(package_name, class_name, symbols)
|
|
324
|
+
self.generated_types[python_qualified_name] = 'enum'
|
|
325
|
+
return python_qualified_name
|
|
326
|
+
|
|
327
|
+
def generate_test_class(self, package_name: str, class_name: str, fields: List[Dict[str, str]], import_types: Set[str]) -> None:
|
|
328
|
+
"""Generates a unit test class for a Python data class"""
|
|
329
|
+
test_class_name = f"Test_{class_name}"
|
|
330
|
+
tests_package_name = "test_"+package_name.replace('.', '_').lower()
|
|
331
|
+
test_class_definition = process_template(
|
|
332
|
+
"avrotopython/test_class.jinja",
|
|
333
|
+
package_name=package_name,
|
|
334
|
+
class_name=class_name,
|
|
335
|
+
test_class_name=test_class_name,
|
|
336
|
+
fields=fields,
|
|
337
|
+
avro_annotation=self.avro_annotation,
|
|
338
|
+
import_types=import_types
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
base_dir = os.path.join(self.output_dir, "tests")
|
|
342
|
+
test_file_path = os.path.join(base_dir, f"{tests_package_name.replace('.', '_').lower()}.py")
|
|
343
|
+
if not os.path.exists(os.path.dirname(test_file_path)):
|
|
344
|
+
os.makedirs(os.path.dirname(test_file_path), exist_ok=True)
|
|
345
|
+
with open(test_file_path, 'w', encoding='utf-8') as file:
|
|
346
|
+
file.write(test_class_definition)
|
|
347
|
+
|
|
348
|
+
def generate_test_enum(self, package_name: str, class_name: str, symbols: List[str]) -> None:
|
|
349
|
+
"""Generates a unit test class for a Python enum"""
|
|
350
|
+
test_class_name = f"Test_{class_name}"
|
|
351
|
+
tests_package_name = "test_"+package_name.replace('.', '_').lower()
|
|
352
|
+
test_class_definition = process_template(
|
|
353
|
+
"avrotopython/test_enum.jinja",
|
|
354
|
+
package_name=package_name,
|
|
355
|
+
class_name=class_name,
|
|
356
|
+
test_class_name=test_class_name,
|
|
357
|
+
symbols=symbols
|
|
358
|
+
)
|
|
359
|
+
base_dir = os.path.join(self.output_dir, "tests")
|
|
360
|
+
test_file_path = os.path.join(base_dir, f"{tests_package_name.replace('.', '_').lower()}.py")
|
|
361
|
+
if not os.path.exists(os.path.dirname(test_file_path)):
|
|
362
|
+
os.makedirs(os.path.dirname(test_file_path), exist_ok=True)
|
|
363
|
+
with open(test_file_path, 'w', encoding='utf-8') as file:
|
|
364
|
+
file.write(test_class_definition)
|
|
365
|
+
|
|
366
|
+
def generate_test_value(self, field: Dict) -> Any:
|
|
367
|
+
"""Generates a test value for a given field"""
|
|
368
|
+
field_type = field['definition']['type']
|
|
369
|
+
|
|
370
|
+
def generate_value(field_type: str):
|
|
371
|
+
test_values = {
|
|
372
|
+
'str': chr(39)+''.join([chr(random.randint(97, 122)) for _ in range(0, 20)])+chr(39),
|
|
373
|
+
'bool': str(random.choice([True, False])),
|
|
374
|
+
'int': f'int({random.randint(0, 100)})',
|
|
375
|
+
'float': f'float({random.uniform(0, 100)})',
|
|
376
|
+
'bytes': 'b"test_bytes"',
|
|
377
|
+
'None': 'None',
|
|
378
|
+
'datetime.date': random.choice(['datetime.date.today()', 'datetime.date(2021, 1, 1)']),
|
|
379
|
+
'datetime.datetime': 'datetime.datetime.now(datetime.timezone.utc)',
|
|
380
|
+
'datetime.time': 'datetime.datetime.now(datetime.timezone.utc).time()',
|
|
381
|
+
'decimal.Decimal': f'decimal.Decimal("{random.randint(0, 100)}.{random.randint(0, 100)}")',
|
|
382
|
+
'datetime.timedelta': 'datetime.timedelta(days=1)',
|
|
383
|
+
'typing.Any': '{"test": "test"}'
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
def resolve(field_type: str) -> str:
|
|
387
|
+
# Regex pattern to find the inner type
|
|
388
|
+
pattern = re.compile(r'^(?:typing\.)*(Optional|List|Dict|Union)\[(.+)\]$')
|
|
389
|
+
|
|
390
|
+
match = pattern.match(field_type)
|
|
391
|
+
if not match:
|
|
392
|
+
return field_type
|
|
393
|
+
|
|
394
|
+
outer_type, inner_type = match.groups()
|
|
395
|
+
|
|
396
|
+
if outer_type == 'Optional':
|
|
397
|
+
return inner_type
|
|
398
|
+
elif outer_type == 'List':
|
|
399
|
+
return resolve(inner_type)
|
|
400
|
+
elif outer_type == 'Dict':
|
|
401
|
+
# For Dict, only return the value type
|
|
402
|
+
_, value_type = inner_type.split(',', 1)
|
|
403
|
+
return resolve(value_type.strip())
|
|
404
|
+
elif outer_type == 'Union':
|
|
405
|
+
first_type = inner_type.split(',', 1)[0]
|
|
406
|
+
return resolve(first_type.strip())
|
|
407
|
+
|
|
408
|
+
return field_type
|
|
409
|
+
|
|
410
|
+
if field_type.startswith('typing.Optional['):
|
|
411
|
+
field_type = resolve(field_type)
|
|
412
|
+
|
|
413
|
+
if field_type.startswith('typing.List['):
|
|
414
|
+
field_type = resolve(field_type)
|
|
415
|
+
array_range = random.randint(1, 5)
|
|
416
|
+
return f"[{', '.join([generate_value(field_type) for _ in range(array_range)])}]"
|
|
417
|
+
elif field_type.startswith('typing.Dict['):
|
|
418
|
+
field_type = resolve(field_type)
|
|
419
|
+
dict_range = random.randint(1, 5)
|
|
420
|
+
dict_data = {}
|
|
421
|
+
for _ in range(dict_range):
|
|
422
|
+
dict_data[''.join([chr(random.randint(97, 122)) for _ in range(
|
|
423
|
+
0, 20)])] = generate_value(field_type)
|
|
424
|
+
return f"{{{', '.join([chr(39)+key+chr(39)+f': {value}' for key, value in dict_data.items()])}}}"
|
|
425
|
+
elif field_type.startswith('typing.Union['):
|
|
426
|
+
field_type = resolve(field_type)
|
|
427
|
+
return generate_value(field_type)
|
|
428
|
+
return test_values.get(field_type, 'Test_'+field_type + '.create_instance()')
|
|
429
|
+
|
|
430
|
+
return generate_value(field_type)
|
|
431
|
+
|
|
432
|
+
def generate_field(self, field: Dict, parent_package: str, import_types: set) -> Any:
|
|
433
|
+
"""Generates a field for a Python data class"""
|
|
434
|
+
field_type = self.convert_avro_type_to_python(field['type'], parent_package, import_types)
|
|
435
|
+
field_name = field['name']
|
|
436
|
+
return {
|
|
437
|
+
'name': field_name,
|
|
438
|
+
'type': field_type,
|
|
439
|
+
'is_primitive': self.is_python_primitive(field_type) or self.is_python_typing_struct(field_type),
|
|
440
|
+
'is_enum': field_type in self.generated_types and self.generated_types[field_type] == 'enum'
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
def generate_field_docstring(self, field: Dict, parent_package: str) -> str:
|
|
444
|
+
"""Generates a field docstring for a Python data class"""
|
|
445
|
+
field_type = self.convert_avro_type_to_python(field['type'], parent_package, set())
|
|
446
|
+
field_name = self.safe_name(field['name'])
|
|
447
|
+
field_doc = field.get('doc', '').strip()
|
|
448
|
+
if is_python_reserved_word(field_name):
|
|
449
|
+
field_name += "_"
|
|
450
|
+
field_docstring = f"{field_name} ({field_type}): {field_doc}"
|
|
451
|
+
return field_docstring
|
|
452
|
+
|
|
453
|
+
def write_to_file(self, package: str, class_name: str, python_code: str):
|
|
454
|
+
"""
|
|
455
|
+
Writes a Python class to a file
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
package (str): Python package
|
|
459
|
+
class_name (str): Python class name
|
|
460
|
+
python_code (str): Python class definition
|
|
461
|
+
"""
|
|
462
|
+
|
|
463
|
+
# the containing directory is the parent package
|
|
464
|
+
parent_package_name = '.'.join(package.split('.')[:-1])
|
|
465
|
+
parent_package_path = os.sep.join(parent_package_name.split('.')).lower()
|
|
466
|
+
directory_path = os.path.join(self.output_dir, "src", parent_package_path)
|
|
467
|
+
if not os.path.exists(directory_path):
|
|
468
|
+
os.makedirs(directory_path, exist_ok=True)
|
|
469
|
+
file_path = os.path.join(directory_path, f"{class_name.lower()}.py")
|
|
470
|
+
|
|
471
|
+
with open(file_path, 'w', encoding='utf-8') as file:
|
|
472
|
+
file.write(python_code)
|
|
473
|
+
|
|
474
|
+
def write_init_files(self):
|
|
475
|
+
"""Writes __init__.py files to the output directories"""
|
|
476
|
+
|
|
477
|
+
def organize_generated_types():
|
|
478
|
+
"""
|
|
479
|
+
Organizes the generated_types into a tree structure.
|
|
480
|
+
|
|
481
|
+
For a fully qualified name like 'address.example.com.record.Record':
|
|
482
|
+
- The package.module path is 'address.example.com.record'
|
|
483
|
+
- The class name is 'Record'
|
|
484
|
+
- The module file is 'record.py' in directory 'address/example/com/'
|
|
485
|
+
|
|
486
|
+
The tree structure should navigate through package parts up to the parent directory,
|
|
487
|
+
then store the module name (last package part) as the key with class name as value:
|
|
488
|
+
{
|
|
489
|
+
'address': {
|
|
490
|
+
'example': {
|
|
491
|
+
'com': {
|
|
492
|
+
'record': 'Record' # module 'record.py' -> class 'Record'
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
"""
|
|
498
|
+
generated_types_tree = {}
|
|
499
|
+
for generated_type, _ in self.generated_types.items():
|
|
500
|
+
parts = generated_type.split('.')
|
|
501
|
+
if len(parts) < 2:
|
|
502
|
+
continue # Need at least module.Class
|
|
503
|
+
|
|
504
|
+
class_name = parts[-1] # Last part is the class name
|
|
505
|
+
module_name = parts[-2] # Second-to-last is the module name
|
|
506
|
+
package_parts = parts[:-2] # Everything before module and class is the package path
|
|
507
|
+
|
|
508
|
+
# Navigate through the package hierarchy
|
|
509
|
+
current_node = generated_types_tree
|
|
510
|
+
for part in package_parts:
|
|
511
|
+
if part not in current_node:
|
|
512
|
+
current_node[part] = {}
|
|
513
|
+
current_node = current_node[part]
|
|
514
|
+
|
|
515
|
+
# Store module -> class mapping at this level
|
|
516
|
+
current_node[module_name] = class_name
|
|
517
|
+
|
|
518
|
+
return generated_types_tree
|
|
519
|
+
|
|
520
|
+
def collect_class_names(node):
|
|
521
|
+
"""
|
|
522
|
+
Recursively collect all class names from a tree node.
|
|
523
|
+
Returns a list of class names (PascalCase) available in this package level.
|
|
524
|
+
"""
|
|
525
|
+
class_names = []
|
|
526
|
+
for key, value in node.items():
|
|
527
|
+
if isinstance(value, dict):
|
|
528
|
+
# Recursively collect from subpackages
|
|
529
|
+
class_names.extend(collect_class_names(value))
|
|
530
|
+
else:
|
|
531
|
+
# This is a leaf node with a class name
|
|
532
|
+
class_names.append(value)
|
|
533
|
+
return class_names
|
|
534
|
+
|
|
535
|
+
def write_init_files_recursive(generated_types_tree, current_package: str):
|
|
536
|
+
"""
|
|
537
|
+
Writes __init__.py files recursively.
|
|
538
|
+
|
|
539
|
+
For each package level:
|
|
540
|
+
- Import classes from module files (from .modulename import ClassName)
|
|
541
|
+
- Import classes from subpackages (from .subpackage import ClassName)
|
|
542
|
+
- Re-export all class names in __all__
|
|
543
|
+
"""
|
|
544
|
+
import_statements = []
|
|
545
|
+
all_statement = []
|
|
546
|
+
|
|
547
|
+
for package_or_module_name, content in generated_types_tree.items():
|
|
548
|
+
if isinstance(content, dict):
|
|
549
|
+
# This is a subpackage - collect all class names from it
|
|
550
|
+
class_names = collect_class_names(content)
|
|
551
|
+
if class_names:
|
|
552
|
+
import_statements.append(f"from .{package_or_module_name} import {', '.join(class_names)}")
|
|
553
|
+
all_statement.extend([f'"{name}"' for name in class_names])
|
|
554
|
+
# Recursively write __init__.py for the subpackage
|
|
555
|
+
write_init_files_recursive(content, current_package + ('.' if current_package else '') + package_or_module_name)
|
|
556
|
+
else:
|
|
557
|
+
# This is a module file - import the class from it
|
|
558
|
+
class_name = content
|
|
559
|
+
import_statements.append(f"from .{package_or_module_name} import {class_name}")
|
|
560
|
+
all_statement.append(f'"{class_name}"')
|
|
561
|
+
|
|
562
|
+
if current_package and (import_statements or all_statement):
|
|
563
|
+
package_path = os.path.join(self.output_dir, 'src', current_package.replace('.', os.sep).lower())
|
|
564
|
+
init_file_path = os.path.join(package_path, '__init__.py')
|
|
565
|
+
if not os.path.exists(package_path):
|
|
566
|
+
os.makedirs(package_path, exist_ok=True)
|
|
567
|
+
with open(init_file_path, 'w', encoding='utf-8') as file:
|
|
568
|
+
file.write('\n'.join(import_statements) + '\n\n__all__ = [' + ', '.join(all_statement) + ']\n')
|
|
569
|
+
|
|
570
|
+
# main function
|
|
571
|
+
write_init_files_recursive(organize_generated_types(), '')
|
|
572
|
+
|
|
573
|
+
def write_pyproject_toml(self):
|
|
574
|
+
"""Writes pyproject.toml file to the output directory"""
|
|
575
|
+
pyproject_content = process_template(
|
|
576
|
+
"avrotopython/pyproject_toml.jinja",
|
|
577
|
+
package_name=self.base_package.replace('_', '-')
|
|
578
|
+
)
|
|
579
|
+
with open(os.path.join(self.output_dir, 'pyproject.toml'), 'w', encoding='utf-8') as file:
|
|
580
|
+
file.write(pyproject_content)
|
|
581
|
+
|
|
582
|
+
def convert_schemas(self, avro_schemas: List, output_dir: str):
|
|
583
|
+
""" Converts Avro schema to Python data classes"""
|
|
584
|
+
self.main_schema = avro_schemas
|
|
585
|
+
self.type_dict = build_flat_type_dict(avro_schemas)
|
|
586
|
+
self.output_dir = output_dir
|
|
587
|
+
if not os.path.exists(self.output_dir):
|
|
588
|
+
os.makedirs(self.output_dir, exist_ok=True)
|
|
589
|
+
for avro_schema in avro_schemas:
|
|
590
|
+
if avro_schema['type'] == 'enum':
|
|
591
|
+
self.generate_enum(
|
|
592
|
+
avro_schema, self.base_package, write_file=True)
|
|
593
|
+
elif avro_schema['type'] == 'record':
|
|
594
|
+
self.generate_class(avro_schema, self.base_package, write_file=True)
|
|
595
|
+
self.write_init_files()
|
|
596
|
+
self.write_pyproject_toml()
|
|
597
|
+
|
|
598
|
+
def convert(self, avro_schema_path: str, output_dir: str):
|
|
599
|
+
"""Converts Avro schema to Python data classes"""
|
|
600
|
+
with open(avro_schema_path, 'r', encoding='utf-8') as file:
|
|
601
|
+
schema = json.load(file)
|
|
602
|
+
if isinstance(schema, dict):
|
|
603
|
+
schema = [schema]
|
|
604
|
+
return self.convert_schemas(schema, output_dir)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def convert_avro_to_python(avro_schema_path, py_file_path, package_name='', dataclasses_json_annotation=False, avro_annotation=False):
|
|
608
|
+
"""Converts Avro schema to Python data classes"""
|
|
609
|
+
if not package_name:
|
|
610
|
+
package_name = os.path.splitext(os.path.basename(avro_schema_path))[
|
|
611
|
+
0].lower().replace('-', '_')
|
|
612
|
+
|
|
613
|
+
avro_to_python = AvroToPython(
|
|
614
|
+
package_name, dataclasses_json_annotation=dataclasses_json_annotation, avro_annotation=avro_annotation)
|
|
615
|
+
avro_to_python.convert(avro_schema_path, py_file_path)
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def convert_avro_schema_to_python(avro_schema, py_file_path, package_name='', dataclasses_json_annotation=False, avro_annotation=False):
|
|
619
|
+
"""Converts Avro schema to Python data classes"""
|
|
620
|
+
avro_to_python = AvroToPython(
|
|
621
|
+
package_name, dataclasses_json_annotation=dataclasses_json_annotation, avro_annotation=avro_annotation)
|
|
622
|
+
if isinstance(avro_schema, dict):
|
|
623
|
+
avro_schema = [avro_schema]
|
|
624
|
+
avro_to_python.convert_schemas(avro_schema, py_file_path)
|