structurize 2.16.2__py3-none-any.whl → 2.16.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. avrotize/__init__.py +63 -63
  2. avrotize/__main__.py +5 -5
  3. avrotize/_version.py +34 -34
  4. avrotize/asn1toavro.py +160 -160
  5. avrotize/avrotize.py +152 -152
  6. avrotize/avrotocpp.py +483 -483
  7. avrotize/avrotocsharp.py +992 -992
  8. avrotize/avrotocsv.py +121 -121
  9. avrotize/avrotodatapackage.py +173 -173
  10. avrotize/avrotodb.py +1383 -1383
  11. avrotize/avrotogo.py +476 -476
  12. avrotize/avrotographql.py +197 -197
  13. avrotize/avrotoiceberg.py +210 -210
  14. avrotize/avrotojava.py +1023 -1023
  15. avrotize/avrotojs.py +250 -250
  16. avrotize/avrotojsons.py +481 -481
  17. avrotize/avrotojstruct.py +345 -345
  18. avrotize/avrotokusto.py +363 -363
  19. avrotize/avrotomd.py +137 -137
  20. avrotize/avrotools.py +168 -168
  21. avrotize/avrotoparquet.py +208 -208
  22. avrotize/avrotoproto.py +358 -358
  23. avrotize/avrotopython.py +622 -622
  24. avrotize/avrotorust.py +435 -435
  25. avrotize/avrotots.py +598 -598
  26. avrotize/avrotoxsd.py +344 -344
  27. avrotize/commands.json +2493 -2433
  28. avrotize/common.py +828 -828
  29. avrotize/constants.py +4 -4
  30. avrotize/csvtoavro.py +131 -131
  31. avrotize/datapackagetoavro.py +76 -76
  32. avrotize/dependency_resolver.py +348 -348
  33. avrotize/jsonstoavro.py +1698 -1698
  34. avrotize/jsonstostructure.py +2642 -2642
  35. avrotize/jstructtoavro.py +878 -878
  36. avrotize/kstructtoavro.py +93 -93
  37. avrotize/kustotoavro.py +455 -455
  38. avrotize/parquettoavro.py +157 -157
  39. avrotize/proto2parser.py +497 -497
  40. avrotize/proto3parser.py +402 -402
  41. avrotize/prototoavro.py +382 -382
  42. avrotize/structuretocsharp.py +2005 -2005
  43. avrotize/structuretojsons.py +498 -498
  44. avrotize/structuretopython.py +772 -772
  45. avrotize/structuretots.py +653 -0
  46. avrotize/xsdtoavro.py +413 -413
  47. structurize-2.16.6.dist-info/METADATA +107 -0
  48. structurize-2.16.6.dist-info/RECORD +52 -0
  49. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/licenses/LICENSE +200 -200
  50. structurize-2.16.2.dist-info/METADATA +0 -805
  51. structurize-2.16.2.dist-info/RECORD +0 -51
  52. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/WHEEL +0 -0
  53. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/entry_points.txt +0 -0
  54. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/top_level.txt +0 -0
avrotize/avrotogo.py CHANGED
@@ -1,476 +1,476 @@
1
- import json
2
- import os
3
- from typing import Dict, List, Union, Set
4
- from avrotize.common import get_longest_namespace_prefix, is_generic_avro_type, pascal, render_template
5
-
6
- INDENT = ' '
7
-
8
- JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | None
9
-
10
- class AvroToGo:
11
- """Converts Avro schema to Go structs, including JSON and Avro marshalling methods"""
12
-
13
- def __init__(self, base_package: str = '') -> None:
14
- self.base_package = base_package
15
- self.output_dir = os.getcwd()
16
- self.generated_types_avro_namespace: Dict[str, str] = {}
17
- self.generated_types_go_package: Dict[str, str] = {}
18
- self.referenced_packages: Dict[str, Set[str]] = {}
19
- self.referenced_packages_stack: List[Dict[str, Set[str]]] = []
20
- self.avro_annotation = False
21
- self.json_annotation = False
22
- self.longest_common_prefix = ''
23
- self.package_site = 'github.com'
24
- self.package_username = 'username'
25
- self.structs = []
26
- self.enums = []
27
-
28
- def safe_identifier(self, name: str) -> str:
29
- """Converts a name to a safe Go identifier"""
30
- reserved_words = [
31
- 'break', 'default', 'func', 'interface', 'select', 'case', 'defer', 'go', 'map', 'struct', 'chan',
32
- 'else', 'goto', 'package', 'switch', 'const', 'fallthrough', 'if', 'range', 'type', 'continue', 'for',
33
- 'import', 'return', 'var',
34
- ]
35
- if name in reserved_words:
36
- return f"{name}_"
37
- return name
38
-
39
- def go_type_name(self, name: str, namespace: str) -> str:
40
- """Returns a qualified name for a Go struct or enum"""
41
- if namespace:
42
- if namespace.startswith(self.longest_common_prefix):
43
- namespace = namespace[len(self.longest_common_prefix):]
44
- namespace = ''.join([pascal(t[:-6] if t.endswith("_types") else t) for t in namespace.split('.')])
45
- return f"{namespace}{pascal(name)}"
46
- return pascal(name)
47
-
48
- def map_primitive_to_go(self, avro_type: str, is_optional: bool) -> str:
49
- """Maps Avro primitive types to Go types"""
50
- optional_mapping = {
51
- 'null': 'interface{}',
52
- 'boolean': '*bool',
53
- 'int': '*int32',
54
- 'long': '*int64',
55
- 'float': '*float32',
56
- 'double': '*float64',
57
- 'bytes': '[]byte',
58
- 'string': '*string',
59
- }
60
- required_mapping = {
61
- 'null': 'interface{}',
62
- 'boolean': 'bool',
63
- 'int': 'int32',
64
- 'long': 'int64',
65
- 'float': 'float32',
66
- 'double': 'float64',
67
- 'bytes': '[]byte',
68
- 'string': 'string',
69
- }
70
- if avro_type in self.generated_types_avro_namespace:
71
- type_name = avro_type.rsplit('.', 1)[-1]
72
- namespace = avro_type.rsplit('.', 1)[0] if '.' in avro_type else ''
73
- return self.go_type_name(type_name, namespace)
74
- else:
75
- return required_mapping.get(avro_type, avro_type) if not is_optional else optional_mapping.get(avro_type, avro_type)
76
-
77
- def concat_package(self, package: str, name: str) -> str:
78
- """Concatenates package and name using a slash separator"""
79
- return f"{package.lower()}/{name}" if package else name
80
-
81
- def convert_avro_type_to_go(self, field_name: str, avro_type: Union[str, Dict, List], nullable: bool = False, parent_namespace: str = '') -> str:
82
- """Converts Avro type to Go type"""
83
- if isinstance(avro_type, str):
84
- return self.map_primitive_to_go(avro_type, nullable)
85
- elif isinstance(avro_type, list):
86
- if is_generic_avro_type(avro_type):
87
- return 'interface{}'
88
- non_null_types = [t for t in avro_type if t != 'null']
89
- if len(non_null_types) == 1:
90
- if isinstance(non_null_types[0], str):
91
- return self.map_primitive_to_go(non_null_types[0], True)
92
- else:
93
- return self.convert_avro_type_to_go(field_name, non_null_types[0], nullable, parent_namespace)
94
- else:
95
- return self.generate_union_class(field_name, avro_type, parent_namespace)
96
- elif isinstance(avro_type, dict):
97
- if avro_type['type'] in ['record', 'enum']:
98
- return self.generate_class_or_enum(avro_type, parent_namespace)
99
- elif avro_type['type'] == 'fixed' or avro_type['type'] == 'bytes' and 'logicalType' in avro_type:
100
- if avro_type['logicalType'] == 'decimal':
101
- return 'float64'
102
- elif avro_type['type'] == 'array':
103
- item_type = self.convert_avro_type_to_go(field_name, avro_type['items'], nullable=True, parent_namespace=parent_namespace)
104
- if item_type.startswith('*'):
105
- return f"[]{item_type[1:]}"
106
- return f"[]{item_type}"
107
- elif avro_type['type'] == 'map':
108
- values_type = self.convert_avro_type_to_go(field_name, avro_type['values'], nullable=True, parent_namespace=parent_namespace)
109
- if values_type.startswith('*'):
110
- return f"map[string]{values_type}"
111
- return f"map[string]{values_type}"
112
- elif 'logicalType' in avro_type:
113
- if avro_type['logicalType'] == 'date':
114
- return 'time.Time'
115
- elif avro_type['logicalType'] == 'time-millis' or avro_type['logicalType'] == 'time-micros':
116
- return 'time.Time'
117
- elif avro_type['logicalType'] == 'timestamp-millis' or avro_type['logicalType'] == 'timestamp-micros':
118
- return 'time.Time'
119
- elif avro_type['logicalType'] == 'uuid':
120
- return 'string'
121
- return self.convert_avro_type_to_go(field_name, avro_type['type'], parent_namespace=parent_namespace)
122
- return 'interface{}'
123
-
124
- def generate_class_or_enum(self, avro_schema: Dict, parent_namespace: str = '') -> str:
125
- """Generates a Go struct or enum from an Avro schema"""
126
- self.referenced_packages_stack.append(self.referenced_packages)
127
- self.referenced_packages = {}
128
- namespace = avro_schema.get('namespace', parent_namespace)
129
- qualified_type = ''
130
- if avro_schema['type'] == 'record':
131
- qualified_type = self.generate_struct(avro_schema, namespace)
132
- elif avro_schema['type'] == 'enum':
133
- qualified_type = self.generate_enum(avro_schema, namespace)
134
- if not qualified_type:
135
- return 'interface{}'
136
- self.referenced_packages = self.referenced_packages_stack.pop()
137
- type_name = qualified_type
138
- if '/' in qualified_type:
139
- package_name = qualified_type.rsplit('/', 1)[0]
140
- type_name = qualified_type.rsplit('/', 1)[1]
141
- self.referenced_packages.setdefault(package_name, set()).add(type_name)
142
- return type_name
143
-
144
- def generate_struct(self, avro_schema: Dict, parent_namespace: str) -> str:
145
- """Generates a Go struct from an Avro record schema"""
146
- namespace = avro_schema.get('namespace', parent_namespace)
147
- avro_fullname = namespace + '.' + avro_schema['name'] if namespace else avro_schema['name']
148
- go_struct_name = self.go_type_name(avro_schema['name'], namespace)
149
- if avro_fullname in self.generated_types_avro_namespace:
150
- return go_struct_name
151
- self.generated_types_avro_namespace[avro_fullname] = "struct"
152
- self.generated_types_go_package[go_struct_name] = "struct"
153
-
154
- fields = [{
155
- 'name': pascal(field['name']),
156
- 'type': self.convert_avro_type_to_go(field['name'], field['type'], parent_namespace=namespace),
157
- 'original_name': field['name']
158
- } for field in avro_schema.get('fields', [])]
159
-
160
- context = {
161
- 'doc': avro_schema.get('doc', ''),
162
- 'struct_name': go_struct_name,
163
- 'fields': fields,
164
- 'avro_schema': json.dumps(avro_schema),
165
- 'json_annotation': self.json_annotation,
166
- 'avro_annotation': self.avro_annotation,
167
- 'json_match_predicates': [self.get_is_json_match_clause(f['name'], f['type']) for f in fields],
168
- 'base_package': self.base_package,
169
- }
170
-
171
- pkg_dir = os.path.join(self.output_dir, 'pkg', self.base_package)
172
- if not os.path.exists(pkg_dir):
173
- os.makedirs(pkg_dir, exist_ok=True)
174
- file_name = os.path.join(pkg_dir, f"{go_struct_name}.go")
175
- render_template('avrotogo/go_struct.jinja', file_name, **context)
176
-
177
- self.structs.append({
178
- 'name': go_struct_name,
179
- 'fields': fields,
180
- })
181
-
182
- self.generate_unit_test('struct', go_struct_name, fields)
183
-
184
- return go_struct_name
185
-
186
-
187
- def generate_enum(self, avro_schema: Dict, parent_namespace: str) -> str:
188
- """Generates a Go enum from an Avro enum schema"""
189
- namespace = avro_schema.get('namespace', parent_namespace)
190
- avro_fullname = namespace + '.' + avro_schema['name'] if namespace else avro_schema['name']
191
- enum_name = self.go_type_name(avro_schema['name'], namespace)
192
- self.generated_types_avro_namespace[avro_fullname] = "enum"
193
- self.generated_types_go_package[enum_name] = "enum"
194
-
195
- go_types = [] # Enums do not require additional imports based on field types
196
- imports = self.get_imports_for_definition(go_types)
197
-
198
- context = {
199
- 'doc': avro_schema.get('doc', ''),
200
- 'struct_name': enum_name,
201
- 'symbols': avro_schema.get('symbols', []),
202
- 'imports': imports,
203
- 'base_package': self.base_package,
204
- 'referenced_packages': self.referenced_packages.keys()
205
- }
206
-
207
- pkg_dir = os.path.join(self.output_dir, 'pkg', self.base_package)
208
- if not os.path.exists(pkg_dir):
209
- os.makedirs(pkg_dir, exist_ok=True)
210
- file_name = os.path.join(pkg_dir, f"{enum_name}.go")
211
- render_template('avrotogo/go_enum.jinja', file_name, **context)
212
-
213
- self.enums.append({
214
- 'name': enum_name,
215
- 'symbols': avro_schema.get('symbols', []),
216
- })
217
-
218
- self.generate_unit_test('enum', enum_name, context['symbols'])
219
-
220
- return enum_name
221
-
222
- def generate_union_class(self, field_name: str, avro_type: List, parent_namespace: str) -> str:
223
- """Generates a union class for Go"""
224
- union_class_name = self.go_type_name(pascal(field_name) + 'Union', parent_namespace)
225
- union_types = [self.convert_avro_type_to_go(field_name + "Option" + str(i), t, parent_namespace=parent_namespace) for i, t in enumerate(avro_type)]
226
- if union_class_name in self.generated_types_go_package:
227
- return union_class_name
228
-
229
- self.generated_types_go_package[union_class_name] = "union"
230
- context = {
231
- 'union_class_name': union_class_name,
232
- 'union_types': union_types,
233
- 'json_annotation': self.json_annotation,
234
- 'avro_annotation': self.avro_annotation,
235
- 'get_is_json_match_clause': self.get_is_json_match_clause,
236
- 'base_package': self.base_package,
237
- }
238
-
239
- pkg_dir = os.path.join(self.output_dir, 'pkg', self.base_package)
240
- if not os.path.exists(pkg_dir):
241
- os.makedirs(pkg_dir, exist_ok=True)
242
- file_name = os.path.join(pkg_dir, f"{union_class_name}.go")
243
- render_template('avrotogo/go_union.jinja', file_name, **context)
244
-
245
- fields = []
246
- for i, field_type in enumerate(union_types):
247
- v = self.random_value(field_type)
248
- fields.append({
249
- 'name': pascal(field_type),
250
- 'type': field_type,
251
- 'value': f'Opt({v})' if v != 'nil' else 'nil',
252
- })
253
- self.structs.append({
254
- 'name': union_class_name,
255
- 'fields': fields
256
- })
257
-
258
- self.generate_unit_test('union', union_class_name, union_types)
259
-
260
- return union_class_name
261
-
262
-
263
- def get_is_json_match_clause(self, field_name: str, field_type: str) -> str:
264
- """Generates the isJsonMatch clause for a field"""
265
- if field_type == 'string' or field_type == '*string':
266
- return f"if _, ok := node[\"{field_name}\"].(string); !ok {{ return false }}"
267
- elif field_type == 'bool' or field_type == '*bool':
268
- return f"if _, ok := node[\"{field_name}\"].(bool); !ok {{ return false }}"
269
- elif field_type == 'int32' or field_type == '*int32':
270
- return f"if _, ok := node[\"{field_name}\"].(int); !ok {{ return false }}"
271
- elif field_type == 'int64' or field_type == '*int64':
272
- return f"if _, ok := node[\"{field_name}\"].(int); !ok {{ return false }}"
273
- elif field_type == 'float32' or field_type == '*float32':
274
- return f"if _, ok := node[\"{field_name}\"].(float64); !ok {{ return false }}"
275
- elif field_type == 'float64' or field_type == '*float64':
276
- return f"if _, ok := node[\"{field_name}\"].(float64); !ok {{ return false }}"
277
- elif field_type == '[]byte':
278
- return f"if _, ok := node[\"{field_name}\"].([]byte); !ok {{ return false }}"
279
- elif field_type == 'interface{}':
280
- return f"if _, ok := node[\"{field_name}\"].(interface{{}}); !ok {{ return false }}"
281
- elif field_type.startswith('map[string]'):
282
- return f"if _, ok := node[\"{field_name}\"].(map[string]interface{{}}); !ok {{ return false }}"
283
- elif field_type.startswith('[]'):
284
- return f"if _, ok := node[\"{field_name}\"].([]interface{{}}); !ok {{ return false }}"
285
- elif field_type in self.generated_types_go_package:
286
- return f"if _, ok := node[\"{field_name}\"].({field_type}); !ok {{ return false }}"
287
- else:
288
- return f"if _, ok := node[\"{field_name}\"].(map[string.interface{{}}); !ok {{ return false }}"
289
-
290
- def get_imports_for_definition(self, types: List[str]) -> Set[str]:
291
- """Collects necessary imports for the Go definition based on the Go types"""
292
- imports = set()
293
- for field_type in types:
294
- if "time.Time" in field_type:
295
- imports.add("time")
296
- if "gzip." in field_type:
297
- imports.add("compress/gzip")
298
- if "json." in field_type:
299
- imports.add("encoding/json")
300
- if "bytes." in field_type:
301
- imports.add("bytes")
302
- if "fmt." in field_type:
303
- imports.add("fmt")
304
- if "io." in field_type:
305
- imports.add("io")
306
- if "strings." in field_type:
307
- imports.add("strings")
308
- if "avro." in field_type:
309
- imports.add("github.com/hamba/avro/v2")
310
- return imports
311
-
312
- def random_value(self, go_type: str) -> str:
313
- """Generates a random value for a given Go type"""
314
- import random
315
- import string
316
-
317
- is_optional = False
318
- if go_type.startswith('*'):
319
- is_optional = True
320
- go_type = go_type[1:]
321
-
322
- if go_type == 'string':
323
- v = '"' + ''.join(random.choices(string.ascii_letters + string.digits, k=10)) + '"'
324
- v = f"string({v})"
325
- elif go_type == 'bool':
326
- v = 'true' if random.choice([True, False]) else 'false'
327
- v = f"bool({v})"
328
- elif go_type == 'int32' or go_type == 'int':
329
- v = str(random.randint(-100,100))
330
- v = f"int32({v})"
331
- elif go_type == 'int64':
332
- v = str(random.randint(-100,100))
333
- v = f"int64({v})"
334
- elif go_type == 'float32':
335
- v = str(random.uniform(-100,100))
336
- v = f"float32({v})"
337
- elif go_type == 'float64':
338
- v = str(random.uniform(-100,100))
339
- v = f"float64({v})"
340
- elif go_type == '[]byte':
341
- v = '[]byte("' + ''.join(random.choices(string.ascii_letters + string.digits, k=10)) + '")'
342
- elif go_type.startswith('[]'):
343
- v = f'{go_type}{{{self.random_value(go_type[2:])}}}'
344
- elif go_type.startswith('map[string]'):
345
- v = f'map[string]{go_type[11:]}{{"key": {self.random_value(go_type[11:])}}}'
346
- elif go_type in self.generated_types_go_package:
347
- v = f'random{go_type}()'
348
- elif go_type == 'interface{}':
349
- v = 'nil'
350
- else:
351
- return 'nil'
352
- if is_optional and v != 'nil':
353
- return f'Opt({v})'
354
- return v
355
-
356
- def generate_helpers(self) -> None:
357
- """Generates helper functions for initializing structs with random values"""
358
- context = {
359
- 'structs': self.structs,
360
- 'enums': self.enums,
361
- 'base_package': self.base_package,
362
- }
363
- for struct in context['structs']:
364
- for field in struct['fields']:
365
- if not 'value' in field:
366
- field['value'] = self.random_value(field['type'])
367
- helpers_file_name = os.path.join(self.output_dir, 'pkg', self.base_package, f"{self.base_package}_helpers.go")
368
- render_template('avrotogo/go_helpers.jinja', helpers_file_name, **context)
369
-
370
- def generate_unit_test(self, kind: str, name: str, fields: List[Dict[str, str]]):
371
- """Generates unit tests for Go struct, enum, or union"""
372
- context = {
373
- 'struct_name': name,
374
- 'fields': fields,
375
- 'kind': kind,
376
- 'base_package': self.base_package,
377
- 'package_site': self.package_site,
378
- 'package_username': self.package_username,
379
- 'json_annotation': self.json_annotation,
380
- 'avro_annotation': self.avro_annotation
381
- }
382
-
383
- pkg_dir = os.path.join(self.output_dir, 'pkg', self.base_package)
384
- if not os.path.exists(pkg_dir):
385
- os.makedirs(pkg_dir, exist_ok=True)
386
- test_file_name = os.path.join(pkg_dir, f"{name}_test.go")
387
- render_template('avrotogo/go_test.jinja', test_file_name, **context)
388
-
389
- def convert_schema(self, schema: JsonNode, output_dir: str):
390
- """Converts Avro schema to Go"""
391
- if not isinstance(schema, list):
392
- schema = [schema]
393
- if not os.path.exists(output_dir):
394
- os.makedirs(output_dir, exist_ok=True)
395
- self.output_dir = output_dir
396
-
397
- self.longest_common_prefix = get_longest_namespace_prefix(schema)
398
- self.structs = []
399
-
400
- for avro_schema in (x for x in schema if isinstance(x, dict)):
401
- self.generate_class_or_enum(avro_schema)
402
- self.write_go_mod_file()
403
- self.write_modname_go_file()
404
- self.generate_helpers()
405
-
406
- def write_go_mod_file(self):
407
- """Writes the go.mod file for the Go project"""
408
- go_mod_content = ""
409
- go_mod_content += "module " + self.package_site + "/" + self.package_username + "/" + self.base_package + "\n\n"
410
- go_mod_content += "go 1.21\n\n"
411
- if self.avro_annotation:
412
- go_mod_content += "require (\n"
413
- go_mod_content += " github.com/hamba/avro/v2 v2.27.0\n"
414
- go_mod_content += ")\n"
415
-
416
- go_mod_path = os.path.join(self.output_dir, "go.mod")
417
- with open(go_mod_path, 'w', encoding='utf-8') as file:
418
- file.write(go_mod_content)
419
-
420
- def write_modname_go_file(self):
421
- """Writes the modname.go file for the Go project"""
422
- modname_go_content = ""
423
- modname_go_content += "package " + self.base_package + "\n\n"
424
- modname_go_content += "const ModName = \"" + self.base_package + "\"\n"
425
-
426
- modname_go_path = os.path.join(self.output_dir, 'pkg', self.base_package, f"{self.base_package}.go")
427
- with open(modname_go_path, 'w', encoding='utf-8') as file:
428
- file.write(modname_go_content)
429
-
430
- def convert(self, avro_schema_path: str, output_dir: str):
431
- """Converts Avro schema to Go"""
432
- if not self.base_package:
433
- self.base_package = os.path.splitext(os.path.basename(avro_schema_path))[0]
434
-
435
- with open(avro_schema_path, 'r', encoding='utf-8') as file:
436
- schema = json.load(file)
437
- self.convert_schema(schema, output_dir)
438
-
439
-
440
- def convert_avro_to_go(avro_schema_path, go_file_path, package_name='', avro_annotation=False, json_annotation=False, package_site='github.com', package_username='username'):
441
- """Converts Avro schema to Go structs
442
-
443
- Args:
444
- avro_schema_path (str): Avro input schema path
445
- go_file_path (str): Output Go file path
446
- package_name (str): Base package name
447
- avro_annotation (bool): Include Avro annotations
448
- json_annotation (bool): Include JSON annotations
449
- """
450
- if not package_name:
451
- package_name = os.path.splitext(os.path.basename(avro_schema_path))[0]
452
-
453
- avrotogo = AvroToGo(package_name)
454
- avrotogo.avro_annotation = avro_annotation
455
- avrotogo.json_annotation = json_annotation
456
- avrotogo.package_site = package_site
457
- avrotogo.package_username = package_username
458
- avrotogo.convert(avro_schema_path, go_file_path)
459
-
460
-
461
- def convert_avro_schema_to_go(avro_schema: JsonNode, output_dir: str, package_name='', avro_annotation=False, json_annotation=False, package_site='github.com', package_username='username'):
462
- """Converts Avro schema to Go structs
463
-
464
- Args:
465
- avro_schema (JsonNode): Avro schema as a dictionary or list of dictionaries
466
- output_dir (str): Output directory path
467
- package_name (str): Base package name
468
- avro_annotation (bool): Include Avro annotations
469
- json_annotation (bool): Include JSON annotations
470
- """
471
- avrotogo = AvroToGo(package_name)
472
- avrotogo.avro_annotation = avro_annotation
473
- avrotogo.json_annotation = json_annotation
474
- avrotogo.package_site = package_site
475
- avrotogo.package_username = package_username
476
- avrotogo.convert_schema(avro_schema, output_dir)
1
+ import json
2
+ import os
3
+ from typing import Dict, List, Union, Set
4
+ from avrotize.common import get_longest_namespace_prefix, is_generic_avro_type, pascal, render_template
5
+
6
+ INDENT = ' '
7
+
8
+ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | None
9
+
10
+ class AvroToGo:
11
+ """Converts Avro schema to Go structs, including JSON and Avro marshalling methods"""
12
+
13
+ def __init__(self, base_package: str = '') -> None:
14
+ self.base_package = base_package
15
+ self.output_dir = os.getcwd()
16
+ self.generated_types_avro_namespace: Dict[str, str] = {}
17
+ self.generated_types_go_package: Dict[str, str] = {}
18
+ self.referenced_packages: Dict[str, Set[str]] = {}
19
+ self.referenced_packages_stack: List[Dict[str, Set[str]]] = []
20
+ self.avro_annotation = False
21
+ self.json_annotation = False
22
+ self.longest_common_prefix = ''
23
+ self.package_site = 'github.com'
24
+ self.package_username = 'username'
25
+ self.structs = []
26
+ self.enums = []
27
+
28
+ def safe_identifier(self, name: str) -> str:
29
+ """Converts a name to a safe Go identifier"""
30
+ reserved_words = [
31
+ 'break', 'default', 'func', 'interface', 'select', 'case', 'defer', 'go', 'map', 'struct', 'chan',
32
+ 'else', 'goto', 'package', 'switch', 'const', 'fallthrough', 'if', 'range', 'type', 'continue', 'for',
33
+ 'import', 'return', 'var',
34
+ ]
35
+ if name in reserved_words:
36
+ return f"{name}_"
37
+ return name
38
+
39
+ def go_type_name(self, name: str, namespace: str) -> str:
40
+ """Returns a qualified name for a Go struct or enum"""
41
+ if namespace:
42
+ if namespace.startswith(self.longest_common_prefix):
43
+ namespace = namespace[len(self.longest_common_prefix):]
44
+ namespace = ''.join([pascal(t[:-6] if t.endswith("_types") else t) for t in namespace.split('.')])
45
+ return f"{namespace}{pascal(name)}"
46
+ return pascal(name)
47
+
48
+ def map_primitive_to_go(self, avro_type: str, is_optional: bool) -> str:
49
+ """Maps Avro primitive types to Go types"""
50
+ optional_mapping = {
51
+ 'null': 'interface{}',
52
+ 'boolean': '*bool',
53
+ 'int': '*int32',
54
+ 'long': '*int64',
55
+ 'float': '*float32',
56
+ 'double': '*float64',
57
+ 'bytes': '[]byte',
58
+ 'string': '*string',
59
+ }
60
+ required_mapping = {
61
+ 'null': 'interface{}',
62
+ 'boolean': 'bool',
63
+ 'int': 'int32',
64
+ 'long': 'int64',
65
+ 'float': 'float32',
66
+ 'double': 'float64',
67
+ 'bytes': '[]byte',
68
+ 'string': 'string',
69
+ }
70
+ if avro_type in self.generated_types_avro_namespace:
71
+ type_name = avro_type.rsplit('.', 1)[-1]
72
+ namespace = avro_type.rsplit('.', 1)[0] if '.' in avro_type else ''
73
+ return self.go_type_name(type_name, namespace)
74
+ else:
75
+ return required_mapping.get(avro_type, avro_type) if not is_optional else optional_mapping.get(avro_type, avro_type)
76
+
77
+ def concat_package(self, package: str, name: str) -> str:
78
+ """Concatenates package and name using a slash separator"""
79
+ return f"{package.lower()}/{name}" if package else name
80
+
81
+ def convert_avro_type_to_go(self, field_name: str, avro_type: Union[str, Dict, List], nullable: bool = False, parent_namespace: str = '') -> str:
82
+ """Converts Avro type to Go type"""
83
+ if isinstance(avro_type, str):
84
+ return self.map_primitive_to_go(avro_type, nullable)
85
+ elif isinstance(avro_type, list):
86
+ if is_generic_avro_type(avro_type):
87
+ return 'interface{}'
88
+ non_null_types = [t for t in avro_type if t != 'null']
89
+ if len(non_null_types) == 1:
90
+ if isinstance(non_null_types[0], str):
91
+ return self.map_primitive_to_go(non_null_types[0], True)
92
+ else:
93
+ return self.convert_avro_type_to_go(field_name, non_null_types[0], nullable, parent_namespace)
94
+ else:
95
+ return self.generate_union_class(field_name, avro_type, parent_namespace)
96
+ elif isinstance(avro_type, dict):
97
+ if avro_type['type'] in ['record', 'enum']:
98
+ return self.generate_class_or_enum(avro_type, parent_namespace)
99
+ elif avro_type['type'] == 'fixed' or avro_type['type'] == 'bytes' and 'logicalType' in avro_type:
100
+ if avro_type['logicalType'] == 'decimal':
101
+ return 'float64'
102
+ elif avro_type['type'] == 'array':
103
+ item_type = self.convert_avro_type_to_go(field_name, avro_type['items'], nullable=True, parent_namespace=parent_namespace)
104
+ if item_type.startswith('*'):
105
+ return f"[]{item_type[1:]}"
106
+ return f"[]{item_type}"
107
+ elif avro_type['type'] == 'map':
108
+ values_type = self.convert_avro_type_to_go(field_name, avro_type['values'], nullable=True, parent_namespace=parent_namespace)
109
+ if values_type.startswith('*'):
110
+ return f"map[string]{values_type}"
111
+ return f"map[string]{values_type}"
112
+ elif 'logicalType' in avro_type:
113
+ if avro_type['logicalType'] == 'date':
114
+ return 'time.Time'
115
+ elif avro_type['logicalType'] == 'time-millis' or avro_type['logicalType'] == 'time-micros':
116
+ return 'time.Time'
117
+ elif avro_type['logicalType'] == 'timestamp-millis' or avro_type['logicalType'] == 'timestamp-micros':
118
+ return 'time.Time'
119
+ elif avro_type['logicalType'] == 'uuid':
120
+ return 'string'
121
+ return self.convert_avro_type_to_go(field_name, avro_type['type'], parent_namespace=parent_namespace)
122
+ return 'interface{}'
123
+
124
+ def generate_class_or_enum(self, avro_schema: Dict, parent_namespace: str = '') -> str:
125
+ """Generates a Go struct or enum from an Avro schema"""
126
+ self.referenced_packages_stack.append(self.referenced_packages)
127
+ self.referenced_packages = {}
128
+ namespace = avro_schema.get('namespace', parent_namespace)
129
+ qualified_type = ''
130
+ if avro_schema['type'] == 'record':
131
+ qualified_type = self.generate_struct(avro_schema, namespace)
132
+ elif avro_schema['type'] == 'enum':
133
+ qualified_type = self.generate_enum(avro_schema, namespace)
134
+ if not qualified_type:
135
+ return 'interface{}'
136
+ self.referenced_packages = self.referenced_packages_stack.pop()
137
+ type_name = qualified_type
138
+ if '/' in qualified_type:
139
+ package_name = qualified_type.rsplit('/', 1)[0]
140
+ type_name = qualified_type.rsplit('/', 1)[1]
141
+ self.referenced_packages.setdefault(package_name, set()).add(type_name)
142
+ return type_name
143
+
144
+ def generate_struct(self, avro_schema: Dict, parent_namespace: str) -> str:
145
+ """Generates a Go struct from an Avro record schema"""
146
+ namespace = avro_schema.get('namespace', parent_namespace)
147
+ avro_fullname = namespace + '.' + avro_schema['name'] if namespace else avro_schema['name']
148
+ go_struct_name = self.go_type_name(avro_schema['name'], namespace)
149
+ if avro_fullname in self.generated_types_avro_namespace:
150
+ return go_struct_name
151
+ self.generated_types_avro_namespace[avro_fullname] = "struct"
152
+ self.generated_types_go_package[go_struct_name] = "struct"
153
+
154
+ fields = [{
155
+ 'name': pascal(field['name']),
156
+ 'type': self.convert_avro_type_to_go(field['name'], field['type'], parent_namespace=namespace),
157
+ 'original_name': field['name']
158
+ } for field in avro_schema.get('fields', [])]
159
+
160
+ context = {
161
+ 'doc': avro_schema.get('doc', ''),
162
+ 'struct_name': go_struct_name,
163
+ 'fields': fields,
164
+ 'avro_schema': json.dumps(avro_schema),
165
+ 'json_annotation': self.json_annotation,
166
+ 'avro_annotation': self.avro_annotation,
167
+ 'json_match_predicates': [self.get_is_json_match_clause(f['name'], f['type']) for f in fields],
168
+ 'base_package': self.base_package,
169
+ }
170
+
171
+ pkg_dir = os.path.join(self.output_dir, 'pkg', self.base_package)
172
+ if not os.path.exists(pkg_dir):
173
+ os.makedirs(pkg_dir, exist_ok=True)
174
+ file_name = os.path.join(pkg_dir, f"{go_struct_name}.go")
175
+ render_template('avrotogo/go_struct.jinja', file_name, **context)
176
+
177
+ self.structs.append({
178
+ 'name': go_struct_name,
179
+ 'fields': fields,
180
+ })
181
+
182
+ self.generate_unit_test('struct', go_struct_name, fields)
183
+
184
+ return go_struct_name
185
+
186
+
187
+ def generate_enum(self, avro_schema: Dict, parent_namespace: str) -> str:
188
+ """Generates a Go enum from an Avro enum schema"""
189
+ namespace = avro_schema.get('namespace', parent_namespace)
190
+ avro_fullname = namespace + '.' + avro_schema['name'] if namespace else avro_schema['name']
191
+ enum_name = self.go_type_name(avro_schema['name'], namespace)
192
+ self.generated_types_avro_namespace[avro_fullname] = "enum"
193
+ self.generated_types_go_package[enum_name] = "enum"
194
+
195
+ go_types = [] # Enums do not require additional imports based on field types
196
+ imports = self.get_imports_for_definition(go_types)
197
+
198
+ context = {
199
+ 'doc': avro_schema.get('doc', ''),
200
+ 'struct_name': enum_name,
201
+ 'symbols': avro_schema.get('symbols', []),
202
+ 'imports': imports,
203
+ 'base_package': self.base_package,
204
+ 'referenced_packages': self.referenced_packages.keys()
205
+ }
206
+
207
+ pkg_dir = os.path.join(self.output_dir, 'pkg', self.base_package)
208
+ if not os.path.exists(pkg_dir):
209
+ os.makedirs(pkg_dir, exist_ok=True)
210
+ file_name = os.path.join(pkg_dir, f"{enum_name}.go")
211
+ render_template('avrotogo/go_enum.jinja', file_name, **context)
212
+
213
+ self.enums.append({
214
+ 'name': enum_name,
215
+ 'symbols': avro_schema.get('symbols', []),
216
+ })
217
+
218
+ self.generate_unit_test('enum', enum_name, context['symbols'])
219
+
220
+ return enum_name
221
+
222
+ def generate_union_class(self, field_name: str, avro_type: List, parent_namespace: str) -> str:
223
+ """Generates a union class for Go"""
224
+ union_class_name = self.go_type_name(pascal(field_name) + 'Union', parent_namespace)
225
+ union_types = [self.convert_avro_type_to_go(field_name + "Option" + str(i), t, parent_namespace=parent_namespace) for i, t in enumerate(avro_type)]
226
+ if union_class_name in self.generated_types_go_package:
227
+ return union_class_name
228
+
229
+ self.generated_types_go_package[union_class_name] = "union"
230
+ context = {
231
+ 'union_class_name': union_class_name,
232
+ 'union_types': union_types,
233
+ 'json_annotation': self.json_annotation,
234
+ 'avro_annotation': self.avro_annotation,
235
+ 'get_is_json_match_clause': self.get_is_json_match_clause,
236
+ 'base_package': self.base_package,
237
+ }
238
+
239
+ pkg_dir = os.path.join(self.output_dir, 'pkg', self.base_package)
240
+ if not os.path.exists(pkg_dir):
241
+ os.makedirs(pkg_dir, exist_ok=True)
242
+ file_name = os.path.join(pkg_dir, f"{union_class_name}.go")
243
+ render_template('avrotogo/go_union.jinja', file_name, **context)
244
+
245
+ fields = []
246
+ for i, field_type in enumerate(union_types):
247
+ v = self.random_value(field_type)
248
+ fields.append({
249
+ 'name': pascal(field_type),
250
+ 'type': field_type,
251
+ 'value': f'Opt({v})' if v != 'nil' else 'nil',
252
+ })
253
+ self.structs.append({
254
+ 'name': union_class_name,
255
+ 'fields': fields
256
+ })
257
+
258
+ self.generate_unit_test('union', union_class_name, union_types)
259
+
260
+ return union_class_name
261
+
262
+
263
+ def get_is_json_match_clause(self, field_name: str, field_type: str) -> str:
264
+ """Generates the isJsonMatch clause for a field"""
265
+ if field_type == 'string' or field_type == '*string':
266
+ return f"if _, ok := node[\"{field_name}\"].(string); !ok {{ return false }}"
267
+ elif field_type == 'bool' or field_type == '*bool':
268
+ return f"if _, ok := node[\"{field_name}\"].(bool); !ok {{ return false }}"
269
+ elif field_type == 'int32' or field_type == '*int32':
270
+ return f"if _, ok := node[\"{field_name}\"].(int); !ok {{ return false }}"
271
+ elif field_type == 'int64' or field_type == '*int64':
272
+ return f"if _, ok := node[\"{field_name}\"].(int); !ok {{ return false }}"
273
+ elif field_type == 'float32' or field_type == '*float32':
274
+ return f"if _, ok := node[\"{field_name}\"].(float64); !ok {{ return false }}"
275
+ elif field_type == 'float64' or field_type == '*float64':
276
+ return f"if _, ok := node[\"{field_name}\"].(float64); !ok {{ return false }}"
277
+ elif field_type == '[]byte':
278
+ return f"if _, ok := node[\"{field_name}\"].([]byte); !ok {{ return false }}"
279
+ elif field_type == 'interface{}':
280
+ return f"if _, ok := node[\"{field_name}\"].(interface{{}}); !ok {{ return false }}"
281
+ elif field_type.startswith('map[string]'):
282
+ return f"if _, ok := node[\"{field_name}\"].(map[string]interface{{}}); !ok {{ return false }}"
283
+ elif field_type.startswith('[]'):
284
+ return f"if _, ok := node[\"{field_name}\"].([]interface{{}}); !ok {{ return false }}"
285
+ elif field_type in self.generated_types_go_package:
286
+ return f"if _, ok := node[\"{field_name}\"].({field_type}); !ok {{ return false }}"
287
+ else:
288
+ return f"if _, ok := node[\"{field_name}\"].(map[string.interface{{}}); !ok {{ return false }}"
289
+
290
+ def get_imports_for_definition(self, types: List[str]) -> Set[str]:
291
+ """Collects necessary imports for the Go definition based on the Go types"""
292
+ imports = set()
293
+ for field_type in types:
294
+ if "time.Time" in field_type:
295
+ imports.add("time")
296
+ if "gzip." in field_type:
297
+ imports.add("compress/gzip")
298
+ if "json." in field_type:
299
+ imports.add("encoding/json")
300
+ if "bytes." in field_type:
301
+ imports.add("bytes")
302
+ if "fmt." in field_type:
303
+ imports.add("fmt")
304
+ if "io." in field_type:
305
+ imports.add("io")
306
+ if "strings." in field_type:
307
+ imports.add("strings")
308
+ if "avro." in field_type:
309
+ imports.add("github.com/hamba/avro/v2")
310
+ return imports
311
+
312
+ def random_value(self, go_type: str) -> str:
313
+ """Generates a random value for a given Go type"""
314
+ import random
315
+ import string
316
+
317
+ is_optional = False
318
+ if go_type.startswith('*'):
319
+ is_optional = True
320
+ go_type = go_type[1:]
321
+
322
+ if go_type == 'string':
323
+ v = '"' + ''.join(random.choices(string.ascii_letters + string.digits, k=10)) + '"'
324
+ v = f"string({v})"
325
+ elif go_type == 'bool':
326
+ v = 'true' if random.choice([True, False]) else 'false'
327
+ v = f"bool({v})"
328
+ elif go_type == 'int32' or go_type == 'int':
329
+ v = str(random.randint(-100,100))
330
+ v = f"int32({v})"
331
+ elif go_type == 'int64':
332
+ v = str(random.randint(-100,100))
333
+ v = f"int64({v})"
334
+ elif go_type == 'float32':
335
+ v = str(random.uniform(-100,100))
336
+ v = f"float32({v})"
337
+ elif go_type == 'float64':
338
+ v = str(random.uniform(-100,100))
339
+ v = f"float64({v})"
340
+ elif go_type == '[]byte':
341
+ v = '[]byte("' + ''.join(random.choices(string.ascii_letters + string.digits, k=10)) + '")'
342
+ elif go_type.startswith('[]'):
343
+ v = f'{go_type}{{{self.random_value(go_type[2:])}}}'
344
+ elif go_type.startswith('map[string]'):
345
+ v = f'map[string]{go_type[11:]}{{"key": {self.random_value(go_type[11:])}}}'
346
+ elif go_type in self.generated_types_go_package:
347
+ v = f'random{go_type}()'
348
+ elif go_type == 'interface{}':
349
+ v = 'nil'
350
+ else:
351
+ return 'nil'
352
+ if is_optional and v != 'nil':
353
+ return f'Opt({v})'
354
+ return v
355
+
356
+ def generate_helpers(self) -> None:
357
+ """Generates helper functions for initializing structs with random values"""
358
+ context = {
359
+ 'structs': self.structs,
360
+ 'enums': self.enums,
361
+ 'base_package': self.base_package,
362
+ }
363
+ for struct in context['structs']:
364
+ for field in struct['fields']:
365
+ if not 'value' in field:
366
+ field['value'] = self.random_value(field['type'])
367
+ helpers_file_name = os.path.join(self.output_dir, 'pkg', self.base_package, f"{self.base_package}_helpers.go")
368
+ render_template('avrotogo/go_helpers.jinja', helpers_file_name, **context)
369
+
370
+ def generate_unit_test(self, kind: str, name: str, fields: List[Dict[str, str]]):
371
+ """Generates unit tests for Go struct, enum, or union"""
372
+ context = {
373
+ 'struct_name': name,
374
+ 'fields': fields,
375
+ 'kind': kind,
376
+ 'base_package': self.base_package,
377
+ 'package_site': self.package_site,
378
+ 'package_username': self.package_username,
379
+ 'json_annotation': self.json_annotation,
380
+ 'avro_annotation': self.avro_annotation
381
+ }
382
+
383
+ pkg_dir = os.path.join(self.output_dir, 'pkg', self.base_package)
384
+ if not os.path.exists(pkg_dir):
385
+ os.makedirs(pkg_dir, exist_ok=True)
386
+ test_file_name = os.path.join(pkg_dir, f"{name}_test.go")
387
+ render_template('avrotogo/go_test.jinja', test_file_name, **context)
388
+
389
+ def convert_schema(self, schema: JsonNode, output_dir: str):
390
+ """Converts Avro schema to Go"""
391
+ if not isinstance(schema, list):
392
+ schema = [schema]
393
+ if not os.path.exists(output_dir):
394
+ os.makedirs(output_dir, exist_ok=True)
395
+ self.output_dir = output_dir
396
+
397
+ self.longest_common_prefix = get_longest_namespace_prefix(schema)
398
+ self.structs = []
399
+
400
+ for avro_schema in (x for x in schema if isinstance(x, dict)):
401
+ self.generate_class_or_enum(avro_schema)
402
+ self.write_go_mod_file()
403
+ self.write_modname_go_file()
404
+ self.generate_helpers()
405
+
406
+ def write_go_mod_file(self):
407
+ """Writes the go.mod file for the Go project"""
408
+ go_mod_content = ""
409
+ go_mod_content += "module " + self.package_site + "/" + self.package_username + "/" + self.base_package + "\n\n"
410
+ go_mod_content += "go 1.21\n\n"
411
+ if self.avro_annotation:
412
+ go_mod_content += "require (\n"
413
+ go_mod_content += " github.com/hamba/avro/v2 v2.27.0\n"
414
+ go_mod_content += ")\n"
415
+
416
+ go_mod_path = os.path.join(self.output_dir, "go.mod")
417
+ with open(go_mod_path, 'w', encoding='utf-8') as file:
418
+ file.write(go_mod_content)
419
+
420
+ def write_modname_go_file(self):
421
+ """Writes the modname.go file for the Go project"""
422
+ modname_go_content = ""
423
+ modname_go_content += "package " + self.base_package + "\n\n"
424
+ modname_go_content += "const ModName = \"" + self.base_package + "\"\n"
425
+
426
+ modname_go_path = os.path.join(self.output_dir, 'pkg', self.base_package, f"{self.base_package}.go")
427
+ with open(modname_go_path, 'w', encoding='utf-8') as file:
428
+ file.write(modname_go_content)
429
+
430
+ def convert(self, avro_schema_path: str, output_dir: str):
431
+ """Converts Avro schema to Go"""
432
+ if not self.base_package:
433
+ self.base_package = os.path.splitext(os.path.basename(avro_schema_path))[0]
434
+
435
+ with open(avro_schema_path, 'r', encoding='utf-8') as file:
436
+ schema = json.load(file)
437
+ self.convert_schema(schema, output_dir)
438
+
439
+
440
+ def convert_avro_to_go(avro_schema_path, go_file_path, package_name='', avro_annotation=False, json_annotation=False, package_site='github.com', package_username='username'):
441
+ """Converts Avro schema to Go structs
442
+
443
+ Args:
444
+ avro_schema_path (str): Avro input schema path
445
+ go_file_path (str): Output Go file path
446
+ package_name (str): Base package name
447
+ avro_annotation (bool): Include Avro annotations
448
+ json_annotation (bool): Include JSON annotations
449
+ """
450
+ if not package_name:
451
+ package_name = os.path.splitext(os.path.basename(avro_schema_path))[0]
452
+
453
+ avrotogo = AvroToGo(package_name)
454
+ avrotogo.avro_annotation = avro_annotation
455
+ avrotogo.json_annotation = json_annotation
456
+ avrotogo.package_site = package_site
457
+ avrotogo.package_username = package_username
458
+ avrotogo.convert(avro_schema_path, go_file_path)
459
+
460
+
461
+ def convert_avro_schema_to_go(avro_schema: JsonNode, output_dir: str, package_name='', avro_annotation=False, json_annotation=False, package_site='github.com', package_username='username'):
462
+ """Converts Avro schema to Go structs
463
+
464
+ Args:
465
+ avro_schema (JsonNode): Avro schema as a dictionary or list of dictionaries
466
+ output_dir (str): Output directory path
467
+ package_name (str): Base package name
468
+ avro_annotation (bool): Include Avro annotations
469
+ json_annotation (bool): Include JSON annotations
470
+ """
471
+ avrotogo = AvroToGo(package_name)
472
+ avrotogo.avro_annotation = avro_annotation
473
+ avrotogo.json_annotation = json_annotation
474
+ avrotogo.package_site = package_site
475
+ avrotogo.package_username = package_username
476
+ avrotogo.convert_schema(avro_schema, output_dir)