structurize 2.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. avrotize/__init__.py +64 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp.py +483 -0
  7. avrotize/avrotocsharp.py +1075 -0
  8. avrotize/avrotocsv.py +121 -0
  9. avrotize/avrotodatapackage.py +173 -0
  10. avrotize/avrotodb.py +1383 -0
  11. avrotize/avrotogo.py +476 -0
  12. avrotize/avrotographql.py +197 -0
  13. avrotize/avrotoiceberg.py +210 -0
  14. avrotize/avrotojava.py +2156 -0
  15. avrotize/avrotojs.py +250 -0
  16. avrotize/avrotojsons.py +481 -0
  17. avrotize/avrotojstruct.py +345 -0
  18. avrotize/avrotokusto.py +364 -0
  19. avrotize/avrotomd.py +137 -0
  20. avrotize/avrotools.py +168 -0
  21. avrotize/avrotoparquet.py +208 -0
  22. avrotize/avrotoproto.py +359 -0
  23. avrotize/avrotopython.py +624 -0
  24. avrotize/avrotorust.py +435 -0
  25. avrotize/avrotots.py +598 -0
  26. avrotize/avrotoxsd.py +344 -0
  27. avrotize/cddltostructure.py +1841 -0
  28. avrotize/commands.json +3337 -0
  29. avrotize/common.py +834 -0
  30. avrotize/constants.py +72 -0
  31. avrotize/csvtoavro.py +132 -0
  32. avrotize/datapackagetoavro.py +76 -0
  33. avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
  34. avrotize/dependencies/typescript/node22/package.json +16 -0
  35. avrotize/dependency_resolver.py +348 -0
  36. avrotize/dependency_version.py +432 -0
  37. avrotize/jsonstoavro.py +2167 -0
  38. avrotize/jsonstostructure.py +2642 -0
  39. avrotize/jstructtoavro.py +878 -0
  40. avrotize/kstructtoavro.py +93 -0
  41. avrotize/kustotoavro.py +455 -0
  42. avrotize/parquettoavro.py +157 -0
  43. avrotize/proto2parser.py +498 -0
  44. avrotize/proto3parser.py +403 -0
  45. avrotize/prototoavro.py +382 -0
  46. avrotize/structuretocddl.py +597 -0
  47. avrotize/structuretocpp.py +697 -0
  48. avrotize/structuretocsharp.py +2295 -0
  49. avrotize/structuretocsv.py +365 -0
  50. avrotize/structuretodatapackage.py +659 -0
  51. avrotize/structuretodb.py +1125 -0
  52. avrotize/structuretogo.py +720 -0
  53. avrotize/structuretographql.py +502 -0
  54. avrotize/structuretoiceberg.py +355 -0
  55. avrotize/structuretojava.py +853 -0
  56. avrotize/structuretojsons.py +498 -0
  57. avrotize/structuretokusto.py +639 -0
  58. avrotize/structuretomd.py +322 -0
  59. avrotize/structuretoproto.py +764 -0
  60. avrotize/structuretopython.py +772 -0
  61. avrotize/structuretorust.py +714 -0
  62. avrotize/structuretots.py +653 -0
  63. avrotize/structuretoxsd.py +679 -0
  64. avrotize/xsdtoavro.py +413 -0
  65. structurize-2.19.0.dist-info/METADATA +107 -0
  66. structurize-2.19.0.dist-info/RECORD +70 -0
  67. structurize-2.19.0.dist-info/WHEEL +5 -0
  68. structurize-2.19.0.dist-info/entry_points.txt +2 -0
  69. structurize-2.19.0.dist-info/licenses/LICENSE +201 -0
  70. structurize-2.19.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,624 @@
1
+ """Converts Avro schema to Python data classes"""
2
+
3
+ # pylint: disable=line-too-long,too-many-instance-attributes
4
+
5
+ import copy
6
+ import json
7
+ import os
8
+ import re
9
+ import random
10
+ from typing import Dict, List, Set, Tuple, Union, Any
11
+ from avrotize.common import fullname, get_typing_args_from_string, is_generic_avro_type, pascal, process_template, build_flat_type_dict, inline_avro_references, is_type_with_alternate, strip_alternate_type
12
+
13
+ INDENT = ' '
14
+
15
+
16
+ def is_python_reserved_word(word: str) -> bool:
17
+ """Checks if a word is a Python reserved word"""
18
+ reserved_words = [
19
+ 'False', 'None', 'True', 'and', 'as', 'assert', 'async', 'await',
20
+ 'break', 'class', 'continue', 'def', 'del', 'elif', 'else', 'except',
21
+ 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is',
22
+ 'lambda', 'nonlocal', 'not', 'or', 'pass', 'raise', 'return',
23
+ 'try', 'while', 'with', 'yield', 'record', 'self', 'cls'
24
+ ]
25
+ return word in reserved_words
26
+
27
+
28
+ class AvroToPython:
29
+ """Converts Avro schema to Python data classes"""
30
+
31
+ def __init__(self, base_package: str = '', dataclasses_json_annotation=False, avro_annotation=False) -> None:
32
+ self.base_package = base_package
33
+ self.dataclasses_json_annotation = dataclasses_json_annotation
34
+ self.avro_annotation = avro_annotation
35
+ self.output_dir = os.getcwd()
36
+ self.main_schema = None
37
+ self.type_dict = None
38
+ self.generated_types: Dict[str, str] = {}
39
+
40
+ def is_python_primitive(self, type_name: str) -> bool:
41
+ """ Checks if a type is a Python primitive type """
42
+ return type_name in ['None', 'bool', 'int', 'float', 'str', 'bytes']
43
+
44
+ def is_python_typing_struct(self, type_name: str) -> bool:
45
+ """ Checks if a type is a Python typing type """
46
+ return type_name.startswith('typing.Dict[') or type_name.startswith('typing.List[') or type_name.startswith('typing.Optional[') or type_name.startswith('typing.Union[') or type_name == 'typing.Any'
47
+
48
+ def safe_name(self, name: str) -> str:
49
+ """Converts a name to a safe Python name"""
50
+ if is_python_reserved_word(name):
51
+ return name + "_"
52
+ return name
53
+
54
+ def pascal_type_name(self, ref: str) -> str:
55
+ """Converts a reference to a type name"""
56
+ return '_'.join([pascal(part) for part in ref.split('.')[-1].split('_')])
57
+
58
+ def python_package_from_avro_type(self, namespace: str, type_name: str) -> str:
59
+ """Gets the Python package from a type name"""
60
+ type_name_package = '.'.join([part.lower() for part in type_name.split('.')]) if '.' in type_name else type_name.lower()
61
+ if '.' in type_name:
62
+ # if the type name was already qualified, we don't need to add the namespace
63
+ package = type_name_package
64
+ else:
65
+ namespace_package = '.'.join([part.lower() for part in namespace.split('.')]) if namespace else ''
66
+ package = namespace_package + ('.' if namespace_package and type_name_package else '') + type_name_package
67
+ if self.base_package:
68
+ package = self.base_package + '.' + package
69
+ return package
70
+
71
+ def python_type_from_avro_type(self, type_name: str) -> str:
72
+ """Gets the Python class from a type name"""
73
+ return self.pascal_type_name(type_name)
74
+
75
+ def python_fully_qualified_name_from_avro_type(self, namespace: str, type_name: str) -> str:
76
+ """
77
+ Gets the fully qualified Python class name from an Avro type.
78
+ """
79
+ package = self.python_package_from_avro_type(namespace, type_name)
80
+ return package + ('.' if package else '') + self.python_type_from_avro_type(type_name)
81
+
82
+ def strip_package_from_fully_qualified_name(self, fully_qualified_name: str) -> str:
83
+ """Strips the package from a fully qualified name"""
84
+ return fully_qualified_name.split('.')[-1]
85
+
86
+ def map_plain_type_reference_to_python(self, parent_namespace: str, avro_type: str) -> Tuple[bool, str]:
87
+ """
88
+ Maps an Avro type to a Python type
89
+
90
+ Args:
91
+ avro_type (str): Avro type
92
+
93
+ Returns:
94
+ Tuple[bool, str]: A tuple containing a boolean indicating
95
+ if the type is a primitive type and the Python type
96
+ """
97
+ mapping = {
98
+ 'null': 'None',
99
+ 'boolean': 'bool',
100
+ 'int': 'int',
101
+ 'long': 'int',
102
+ 'float': 'float',
103
+ 'double': 'float',
104
+ 'bytes': 'bytes',
105
+ 'string': 'str',
106
+ }
107
+ if is_generic_avro_type(avro_type):
108
+ return True, 'typing.Any'
109
+ mapped = mapping.get(avro_type, None)
110
+ if mapped:
111
+ return True, mapped
112
+ return False, self.python_fully_qualified_name_from_avro_type(parent_namespace, avro_type)
113
+
114
+ def convert_logical_type_to_python(self, avro_type: Dict, import_types: Set[str]) -> str:
115
+ """Converts Avro logical type to Python type"""
116
+ if avro_type['logicalType'] == 'decimal':
117
+ import_types.add('decimal.Decimal')
118
+ return 'decimal.Decimal'
119
+ elif avro_type['logicalType'] == 'date':
120
+ import_types.add('datetime.date')
121
+ return 'datetime.date'
122
+ elif avro_type['logicalType'] == 'time-millis':
123
+ import_types.add('datetime.time')
124
+ return 'datetime.time'
125
+ elif avro_type['logicalType'] == 'time-micros':
126
+ import_types.add('datetime.time')
127
+ return 'datetime.time'
128
+ elif avro_type['logicalType'] == 'timestamp-millis':
129
+ import_types.add('datetime.datetime')
130
+ return 'datetime.datetime'
131
+ elif avro_type['logicalType'] == 'timestamp-micros':
132
+ import_types.add('datetime.datetime')
133
+ return 'datetime.datetime'
134
+ elif avro_type['logicalType'] == 'duration':
135
+ import_types.add('datetime.timedelta')
136
+ return 'datetime.timedelta'
137
+ return 'typing.Any'
138
+
139
+ def convert_avro_type_to_python(self, avro_type: Union[str, Dict, List], parent_package: str, import_types: set) -> str:
140
+ """Converts Avro type to Python type"""
141
+ if isinstance(avro_type, str):
142
+ is_primitive, mapped_type = self.map_plain_type_reference_to_python(parent_package, avro_type)
143
+ if not is_primitive:
144
+ import_types.add(mapped_type)
145
+ return self.pascal_type_name(mapped_type)
146
+ return mapped_type
147
+ elif isinstance(avro_type, list):
148
+ if is_generic_avro_type(avro_type):
149
+ return 'typing.Any'
150
+ if is_type_with_alternate(avro_type):
151
+ return self.convert_avro_type_to_python(strip_alternate_type(avro_type), parent_package, import_types)
152
+ non_null_types = [t for t in avro_type if t != 'null']
153
+ if len(non_null_types) == 1:
154
+ t = self.convert_avro_type_to_python(non_null_types[0], parent_package, import_types)
155
+ if 'null' in avro_type:
156
+ return f'typing.Optional[{t}]'
157
+ else:
158
+ return t
159
+ else:
160
+ return f"typing.Union[{', '.join(self.convert_avro_type_to_python(t, parent_package, import_types) for t in non_null_types)}]"
161
+ elif isinstance(avro_type, dict):
162
+ if avro_type['type'] == 'record':
163
+ class_ref = self.generate_class(avro_type, parent_package, write_file=True)
164
+ import_types.add(class_ref)
165
+ return self.strip_package_from_fully_qualified_name(class_ref)
166
+ elif avro_type['type'] == 'enum':
167
+ enum_ref = self.generate_enum(avro_type, parent_package, write_file=True)
168
+ import_types.add(enum_ref)
169
+ return self.strip_package_from_fully_qualified_name(enum_ref)
170
+ elif avro_type['type'] == 'array':
171
+ return f"typing.List[{self.convert_avro_type_to_python(avro_type['items'], parent_package, import_types)}]"
172
+ elif avro_type['type'] == 'map':
173
+ return f"typing.Dict[str,{self.convert_avro_type_to_python(avro_type['values'], parent_package, import_types)}]"
174
+ elif 'logicalType' in avro_type:
175
+ return self.convert_logical_type_to_python(avro_type, import_types)
176
+ return self.convert_avro_type_to_python(avro_type['type'], parent_package, import_types)
177
+ return 'typing.Any'
178
+
179
+ # pylint: disable=eval-used
180
+ def init_field_value(self, field_type: str, field_name: str, field_is_enum: bool, field_ref: str, enum_types: List[str]):
181
+ """ Initialize the field value based on its type. """
182
+ if field_type == "typing.Any":
183
+ return field_ref
184
+ elif field_type in ['datetime.datetime', 'datetime.date', 'datetime.time', 'datetime.timedelta']:
185
+ return f"{field_ref}"
186
+ elif field_type in ['int', 'str', 'float', 'bool', 'bytes', 'Decimal']:
187
+ return f"{field_type}({field_ref})"
188
+ elif field_type.startswith("typing.List["):
189
+ inner_type = get_typing_args_from_string(field_type)[0]
190
+ return f"{field_ref} if isinstance({field_ref}, list) else [{self.init_field_value(inner_type, field_name, field_is_enum, 'v', enum_types)} for v in {field_ref}] if {field_ref} else None"
191
+ elif field_type.startswith("typing.Dict["):
192
+ inner_type = get_typing_args_from_string(field_type)[1]
193
+ return f"{field_ref} if isinstance({field_ref}, dict) else {{k: {self.init_field_value(inner_type, field_name, field_is_enum, 'v', enum_types)} for k, v in {field_ref}.items()}} if {field_ref} else None"
194
+ elif field_type.startswith("typing.Optional["):
195
+ inner_type = get_typing_args_from_string(field_type)[0]
196
+ return self.init_field_value(inner_type, field_name, field_is_enum, field_ref, enum_types) + ' if ' + field_ref + ' else None'
197
+ elif field_type.startswith("typing.Union["):
198
+ return self.init_field_value_from_union(get_typing_args_from_string(field_type), field_name, field_ref, enum_types)
199
+ elif field_is_enum or field_type in enum_types:
200
+ return f"{field_type}({field_ref})"
201
+ else:
202
+ return f"{field_ref} if isinstance({field_ref}, {field_type}) else {field_type}.from_serializer_dict({field_ref}) if {field_ref} else None"
203
+
204
+ def init_field_value_from_union(self, union_args: List[str], field_name, field_ref, enum_types):
205
+ """Initialize the field value based on the Union type."""
206
+ init_statements = []
207
+ for field_union_type in union_args:
208
+ init_statements.append(
209
+ f"{self.init_field_value(field_union_type, field_name, field_union_type in enum_types, field_ref, enum_types)} if isinstance({field_ref}, {field_union_type}) else")
210
+ return ' '.join(init_statements) + ' None'
211
+
212
+ def init_fields(self, fields: List[Dict[str, Any]], enum_types: List[str]) -> str:
213
+ """Initialize the fields of a class."""
214
+ init_statements = []
215
+ for field in fields:
216
+ if field['is_enum'] or field['type'] in enum_types or field['is_primitive']:
217
+ init_statements.append(
218
+ f"self.{field['name']}={self.init_field_value(field['type'], field['name'], field['is_enum'], 'self.'+field['name'], enum_types)}")
219
+ else:
220
+ init_statements.append(f"value_{field['name']} = self.{field['name']}")
221
+ init_statements.append(
222
+ f"self.{field['name']} = {self.init_field_value(field['type'], field['name'], field['is_enum'], 'value_'+field['name'], enum_types)}")
223
+ return '\n'.join(init_statements)
224
+
225
+ def generate_class(self, avro_schema: Dict, parent_package: str, write_file: bool) -> str:
226
+ """
227
+ Generates a Python data class from an Avro record schema
228
+
229
+ Args:
230
+ avro_schema (Dict): Avro record schema
231
+ parent_package (str): Parent package
232
+ write_file (bool): Write the class to a file
233
+
234
+ Returns:
235
+ str: Python fully qualified class name
236
+ """
237
+
238
+ import_types: Set[str] = set()
239
+ class_name = self.python_type_from_avro_type(avro_schema['name'])
240
+ package_name = self.python_package_from_avro_type(avro_schema.get('namespace', parent_package), avro_schema['name'])
241
+ python_qualified_name = self.python_fully_qualified_name_from_avro_type(avro_schema.get('namespace', parent_package), avro_schema['name'])
242
+ if python_qualified_name in self.generated_types:
243
+ return python_qualified_name
244
+
245
+ fields = [{
246
+ 'definition': self.generate_field(field, avro_schema.get('namespace', parent_package), import_types),
247
+ 'docstring': self.generate_field_docstring(field, avro_schema.get('namespace', parent_package))
248
+ } for field in avro_schema.get('fields', [])]
249
+ fields = [{
250
+ 'name': self.safe_name(field['definition']['name']),
251
+ 'original_name': field['definition']['name'],
252
+ 'type': field['definition']['type'],
253
+ 'is_primitive': field['definition']['is_primitive'],
254
+ 'is_enum': field['definition']['is_enum'],
255
+ 'docstring': field['docstring'],
256
+ 'test_value': self.generate_test_value(field),
257
+ } for field in fields]
258
+
259
+ # we are including a copy of the avro schema of this type. Since that may
260
+ # depend on other types, we need to inline all references to other types
261
+ # into this schema. We use deepcopy to avoid mutating the original schema
262
+ # which may be shared with type_dict entries
263
+ local_avro_schema = inline_avro_references(copy.deepcopy(avro_schema), self.type_dict, '')
264
+ avro_schema_json = json.dumps(local_avro_schema).replace('\\"', '\'').replace('"', '\\"')
265
+ enum_types = []
266
+ for import_type in import_types:
267
+ if import_type in self.generated_types and self.generated_types[import_type] == "enum":
268
+ enum_types.append(self.strip_package_from_fully_qualified_name(import_type))
269
+
270
+ class_definition = process_template(
271
+ "avrotopython/dataclass_core.jinja",
272
+ class_name=class_name,
273
+ docstring=avro_schema.get('doc', '').strip() if 'doc' in avro_schema else f'A {class_name} record.',
274
+ fields=fields,
275
+ import_types=import_types,
276
+ base_package=self.base_package,
277
+ avro_annotation=self.avro_annotation,
278
+ dataclasses_json_annotation=self.dataclasses_json_annotation,
279
+ avro_schema_json=avro_schema_json,
280
+ init_fields=self.init_fields(fields, enum_types),
281
+ )
282
+
283
+ if write_file:
284
+ self.write_to_file(package_name, class_name, class_definition)
285
+ self.generate_test_class(package_name, class_name, fields, import_types)
286
+ self.generated_types[python_qualified_name] = 'class'
287
+ return python_qualified_name
288
+
289
+ def generate_enum(self, avro_schema: Dict, parent_package: str, write_file: bool) -> str:
290
+ """
291
+ Generates a Python enum from an Avro enum schema
292
+
293
+ Args:
294
+ avro_schema (Dict): Avro enum schema
295
+ parent_package (str): Parent package
296
+ write_file (bool): Write the enum to a file
297
+
298
+ Returns:
299
+ str: Python fully qualified enum name
300
+ """
301
+
302
+ class_name = self.python_type_from_avro_type(avro_schema['name'])
303
+ package_name = self.python_package_from_avro_type(avro_schema.get('namespace', parent_package), avro_schema['name'])
304
+ python_qualified_name = self.python_fully_qualified_name_from_avro_type(avro_schema.get('namespace', parent_package), avro_schema['name'])
305
+ if python_qualified_name in self.generated_types:
306
+ return python_qualified_name
307
+
308
+ symbols = [symbol if not is_python_reserved_word(
309
+ symbol) else symbol + "_" for symbol in avro_schema.get('symbols', [])]
310
+ ordinals = avro_schema.get('ordinals', {})
311
+
312
+ enum_definition = process_template(
313
+ "avrotopython/enum_core.jinja",
314
+ class_name=class_name,
315
+ docstring=avro_schema.get('doc', '').strip(
316
+ ) if 'doc' in avro_schema else f'A {class_name} enum.',
317
+ symbols=symbols,
318
+ ordinals=ordinals
319
+ )
320
+
321
+ if write_file:
322
+ self.write_to_file(package_name, class_name, enum_definition)
323
+ self.generate_test_enum(package_name, class_name, symbols)
324
+ self.generated_types[python_qualified_name] = 'enum'
325
+ return python_qualified_name
326
+
327
+ def generate_test_class(self, package_name: str, class_name: str, fields: List[Dict[str, str]], import_types: Set[str]) -> None:
328
+ """Generates a unit test class for a Python data class"""
329
+ test_class_name = f"Test_{class_name}"
330
+ tests_package_name = "test_"+package_name.replace('.', '_').lower()
331
+ test_class_definition = process_template(
332
+ "avrotopython/test_class.jinja",
333
+ package_name=package_name,
334
+ class_name=class_name,
335
+ test_class_name=test_class_name,
336
+ fields=fields,
337
+ avro_annotation=self.avro_annotation,
338
+ import_types=import_types
339
+ )
340
+
341
+ base_dir = os.path.join(self.output_dir, "tests")
342
+ test_file_path = os.path.join(base_dir, f"{tests_package_name.replace('.', '_').lower()}.py")
343
+ if not os.path.exists(os.path.dirname(test_file_path)):
344
+ os.makedirs(os.path.dirname(test_file_path), exist_ok=True)
345
+ with open(test_file_path, 'w', encoding='utf-8') as file:
346
+ file.write(test_class_definition)
347
+
348
+ def generate_test_enum(self, package_name: str, class_name: str, symbols: List[str]) -> None:
349
+ """Generates a unit test class for a Python enum"""
350
+ test_class_name = f"Test_{class_name}"
351
+ tests_package_name = "test_"+package_name.replace('.', '_').lower()
352
+ test_class_definition = process_template(
353
+ "avrotopython/test_enum.jinja",
354
+ package_name=package_name,
355
+ class_name=class_name,
356
+ test_class_name=test_class_name,
357
+ symbols=symbols
358
+ )
359
+ base_dir = os.path.join(self.output_dir, "tests")
360
+ test_file_path = os.path.join(base_dir, f"{tests_package_name.replace('.', '_').lower()}.py")
361
+ if not os.path.exists(os.path.dirname(test_file_path)):
362
+ os.makedirs(os.path.dirname(test_file_path), exist_ok=True)
363
+ with open(test_file_path, 'w', encoding='utf-8') as file:
364
+ file.write(test_class_definition)
365
+
366
+ def generate_test_value(self, field: Dict) -> Any:
367
+ """Generates a test value for a given field"""
368
+ field_type = field['definition']['type']
369
+
370
+ def generate_value(field_type: str):
371
+ test_values = {
372
+ 'str': chr(39)+''.join([chr(random.randint(97, 122)) for _ in range(0, 20)])+chr(39),
373
+ 'bool': str(random.choice([True, False])),
374
+ 'int': f'int({random.randint(0, 100)})',
375
+ 'float': f'float({random.uniform(0, 100)})',
376
+ 'bytes': 'b"test_bytes"',
377
+ 'None': 'None',
378
+ 'datetime.date': random.choice(['datetime.date.today()', 'datetime.date(2021, 1, 1)']),
379
+ 'datetime.datetime': 'datetime.datetime.now(datetime.timezone.utc)',
380
+ 'datetime.time': 'datetime.datetime.now(datetime.timezone.utc).time()',
381
+ 'decimal.Decimal': f'decimal.Decimal("{random.randint(0, 100)}.{random.randint(0, 100)}")',
382
+ 'datetime.timedelta': 'datetime.timedelta(days=1)',
383
+ 'typing.Any': '{"test": "test"}'
384
+ }
385
+
386
+ def resolve(field_type: str) -> str:
387
+ # Regex pattern to find the inner type
388
+ pattern = re.compile(r'^(?:typing\.)*(Optional|List|Dict|Union)\[(.+)\]$')
389
+
390
+ match = pattern.match(field_type)
391
+ if not match:
392
+ return field_type
393
+
394
+ outer_type, inner_type = match.groups()
395
+
396
+ if outer_type == 'Optional':
397
+ return inner_type
398
+ elif outer_type == 'List':
399
+ return resolve(inner_type)
400
+ elif outer_type == 'Dict':
401
+ # For Dict, only return the value type
402
+ _, value_type = inner_type.split(',', 1)
403
+ return resolve(value_type.strip())
404
+ elif outer_type == 'Union':
405
+ first_type = inner_type.split(',', 1)[0]
406
+ return resolve(first_type.strip())
407
+
408
+ return field_type
409
+
410
+ if field_type.startswith('typing.Optional['):
411
+ field_type = resolve(field_type)
412
+
413
+ if field_type.startswith('typing.List['):
414
+ field_type = resolve(field_type)
415
+ array_range = random.randint(1, 5)
416
+ return f"[{', '.join([generate_value(field_type) for _ in range(array_range)])}]"
417
+ elif field_type.startswith('typing.Dict['):
418
+ field_type = resolve(field_type)
419
+ dict_range = random.randint(1, 5)
420
+ dict_data = {}
421
+ for _ in range(dict_range):
422
+ dict_data[''.join([chr(random.randint(97, 122)) for _ in range(
423
+ 0, 20)])] = generate_value(field_type)
424
+ return f"{{{', '.join([chr(39)+key+chr(39)+f': {value}' for key, value in dict_data.items()])}}}"
425
+ elif field_type.startswith('typing.Union['):
426
+ field_type = resolve(field_type)
427
+ return generate_value(field_type)
428
+ return test_values.get(field_type, 'Test_'+field_type + '.create_instance()')
429
+
430
+ return generate_value(field_type)
431
+
432
+ def generate_field(self, field: Dict, parent_package: str, import_types: set) -> Any:
433
+ """Generates a field for a Python data class"""
434
+ field_type = self.convert_avro_type_to_python(field['type'], parent_package, import_types)
435
+ field_name = field['name']
436
+ return {
437
+ 'name': field_name,
438
+ 'type': field_type,
439
+ 'is_primitive': self.is_python_primitive(field_type) or self.is_python_typing_struct(field_type),
440
+ 'is_enum': field_type in self.generated_types and self.generated_types[field_type] == 'enum'
441
+ }
442
+
443
+ def generate_field_docstring(self, field: Dict, parent_package: str) -> str:
444
+ """Generates a field docstring for a Python data class"""
445
+ field_type = self.convert_avro_type_to_python(field['type'], parent_package, set())
446
+ field_name = self.safe_name(field['name'])
447
+ field_doc = field.get('doc', '').strip()
448
+ if is_python_reserved_word(field_name):
449
+ field_name += "_"
450
+ field_docstring = f"{field_name} ({field_type}): {field_doc}"
451
+ return field_docstring
452
+
453
+ def write_to_file(self, package: str, class_name: str, python_code: str):
454
+ """
455
+ Writes a Python class to a file
456
+
457
+ Args:
458
+ package (str): Python package
459
+ class_name (str): Python class name
460
+ python_code (str): Python class definition
461
+ """
462
+
463
+ # the containing directory is the parent package
464
+ parent_package_name = '.'.join(package.split('.')[:-1])
465
+ parent_package_path = os.sep.join(parent_package_name.split('.')).lower()
466
+ directory_path = os.path.join(self.output_dir, "src", parent_package_path)
467
+ if not os.path.exists(directory_path):
468
+ os.makedirs(directory_path, exist_ok=True)
469
+ file_path = os.path.join(directory_path, f"{class_name.lower()}.py")
470
+
471
+ with open(file_path, 'w', encoding='utf-8') as file:
472
+ file.write(python_code)
473
+
474
+ def write_init_files(self):
475
+ """Writes __init__.py files to the output directories"""
476
+
477
+ def organize_generated_types():
478
+ """
479
+ Organizes the generated_types into a tree structure.
480
+
481
+ For a fully qualified name like 'address.example.com.record.Record':
482
+ - The package.module path is 'address.example.com.record'
483
+ - The class name is 'Record'
484
+ - The module file is 'record.py' in directory 'address/example/com/'
485
+
486
+ The tree structure should navigate through package parts up to the parent directory,
487
+ then store the module name (last package part) as the key with class name as value:
488
+ {
489
+ 'address': {
490
+ 'example': {
491
+ 'com': {
492
+ 'record': 'Record' # module 'record.py' -> class 'Record'
493
+ }
494
+ }
495
+ }
496
+ }
497
+ """
498
+ generated_types_tree = {}
499
+ for generated_type, _ in self.generated_types.items():
500
+ parts = generated_type.split('.')
501
+ if len(parts) < 2:
502
+ continue # Need at least module.Class
503
+
504
+ class_name = parts[-1] # Last part is the class name
505
+ module_name = parts[-2] # Second-to-last is the module name
506
+ package_parts = parts[:-2] # Everything before module and class is the package path
507
+
508
+ # Navigate through the package hierarchy
509
+ current_node = generated_types_tree
510
+ for part in package_parts:
511
+ if part not in current_node:
512
+ current_node[part] = {}
513
+ current_node = current_node[part]
514
+
515
+ # Store module -> class mapping at this level
516
+ current_node[module_name] = class_name
517
+
518
+ return generated_types_tree
519
+
520
+ def collect_class_names(node):
521
+ """
522
+ Recursively collect all class names from a tree node.
523
+ Returns a list of class names (PascalCase) available in this package level.
524
+ """
525
+ class_names = []
526
+ for key, value in node.items():
527
+ if isinstance(value, dict):
528
+ # Recursively collect from subpackages
529
+ class_names.extend(collect_class_names(value))
530
+ else:
531
+ # This is a leaf node with a class name
532
+ class_names.append(value)
533
+ return class_names
534
+
535
+ def write_init_files_recursive(generated_types_tree, current_package: str):
536
+ """
537
+ Writes __init__.py files recursively.
538
+
539
+ For each package level:
540
+ - Import classes from module files (from .modulename import ClassName)
541
+ - Import classes from subpackages (from .subpackage import ClassName)
542
+ - Re-export all class names in __all__
543
+ """
544
+ import_statements = []
545
+ all_statement = []
546
+
547
+ for package_or_module_name, content in generated_types_tree.items():
548
+ if isinstance(content, dict):
549
+ # This is a subpackage - collect all class names from it
550
+ class_names = collect_class_names(content)
551
+ if class_names:
552
+ import_statements.append(f"from .{package_or_module_name} import {', '.join(class_names)}")
553
+ all_statement.extend([f'"{name}"' for name in class_names])
554
+ # Recursively write __init__.py for the subpackage
555
+ write_init_files_recursive(content, current_package + ('.' if current_package else '') + package_or_module_name)
556
+ else:
557
+ # This is a module file - import the class from it
558
+ class_name = content
559
+ import_statements.append(f"from .{package_or_module_name} import {class_name}")
560
+ all_statement.append(f'"{class_name}"')
561
+
562
+ if current_package and (import_statements or all_statement):
563
+ package_path = os.path.join(self.output_dir, 'src', current_package.replace('.', os.sep).lower())
564
+ init_file_path = os.path.join(package_path, '__init__.py')
565
+ if not os.path.exists(package_path):
566
+ os.makedirs(package_path, exist_ok=True)
567
+ with open(init_file_path, 'w', encoding='utf-8') as file:
568
+ file.write('\n'.join(import_statements) + '\n\n__all__ = [' + ', '.join(all_statement) + ']\n')
569
+
570
+ # main function
571
+ write_init_files_recursive(organize_generated_types(), '')
572
+
573
+ def write_pyproject_toml(self):
574
+ """Writes pyproject.toml file to the output directory"""
575
+ pyproject_content = process_template(
576
+ "avrotopython/pyproject_toml.jinja",
577
+ package_name=self.base_package.replace('_', '-')
578
+ )
579
+ with open(os.path.join(self.output_dir, 'pyproject.toml'), 'w', encoding='utf-8') as file:
580
+ file.write(pyproject_content)
581
+
582
+ def convert_schemas(self, avro_schemas: List, output_dir: str):
583
+ """ Converts Avro schema to Python data classes"""
584
+ self.main_schema = avro_schemas
585
+ self.type_dict = build_flat_type_dict(avro_schemas)
586
+ self.output_dir = output_dir
587
+ if not os.path.exists(self.output_dir):
588
+ os.makedirs(self.output_dir, exist_ok=True)
589
+ for avro_schema in avro_schemas:
590
+ if avro_schema['type'] == 'enum':
591
+ self.generate_enum(
592
+ avro_schema, self.base_package, write_file=True)
593
+ elif avro_schema['type'] == 'record':
594
+ self.generate_class(avro_schema, self.base_package, write_file=True)
595
+ self.write_init_files()
596
+ self.write_pyproject_toml()
597
+
598
+ def convert(self, avro_schema_path: str, output_dir: str):
599
+ """Converts Avro schema to Python data classes"""
600
+ with open(avro_schema_path, 'r', encoding='utf-8') as file:
601
+ schema = json.load(file)
602
+ if isinstance(schema, dict):
603
+ schema = [schema]
604
+ return self.convert_schemas(schema, output_dir)
605
+
606
+
607
+ def convert_avro_to_python(avro_schema_path, py_file_path, package_name='', dataclasses_json_annotation=False, avro_annotation=False):
608
+ """Converts Avro schema to Python data classes"""
609
+ if not package_name:
610
+ package_name = os.path.splitext(os.path.basename(avro_schema_path))[
611
+ 0].lower().replace('-', '_')
612
+
613
+ avro_to_python = AvroToPython(
614
+ package_name, dataclasses_json_annotation=dataclasses_json_annotation, avro_annotation=avro_annotation)
615
+ avro_to_python.convert(avro_schema_path, py_file_path)
616
+
617
+
618
+ def convert_avro_schema_to_python(avro_schema, py_file_path, package_name='', dataclasses_json_annotation=False, avro_annotation=False):
619
+ """Converts Avro schema to Python data classes"""
620
+ avro_to_python = AvroToPython(
621
+ package_name, dataclasses_json_annotation=dataclasses_json_annotation, avro_annotation=avro_annotation)
622
+ if isinstance(avro_schema, dict):
623
+ avro_schema = [avro_schema]
624
+ avro_to_python.convert_schemas(avro_schema, py_file_path)