structurize 2.16.2__py3-none-any.whl → 2.16.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. avrotize/__init__.py +63 -63
  2. avrotize/__main__.py +5 -5
  3. avrotize/_version.py +34 -34
  4. avrotize/asn1toavro.py +160 -160
  5. avrotize/avrotize.py +152 -152
  6. avrotize/avrotocpp.py +483 -483
  7. avrotize/avrotocsharp.py +992 -992
  8. avrotize/avrotocsv.py +121 -121
  9. avrotize/avrotodatapackage.py +173 -173
  10. avrotize/avrotodb.py +1383 -1383
  11. avrotize/avrotogo.py +476 -476
  12. avrotize/avrotographql.py +197 -197
  13. avrotize/avrotoiceberg.py +210 -210
  14. avrotize/avrotojava.py +1023 -1023
  15. avrotize/avrotojs.py +250 -250
  16. avrotize/avrotojsons.py +481 -481
  17. avrotize/avrotojstruct.py +345 -345
  18. avrotize/avrotokusto.py +363 -363
  19. avrotize/avrotomd.py +137 -137
  20. avrotize/avrotools.py +168 -168
  21. avrotize/avrotoparquet.py +208 -208
  22. avrotize/avrotoproto.py +358 -358
  23. avrotize/avrotopython.py +622 -622
  24. avrotize/avrotorust.py +435 -435
  25. avrotize/avrotots.py +598 -598
  26. avrotize/avrotoxsd.py +344 -344
  27. avrotize/commands.json +2493 -2433
  28. avrotize/common.py +828 -828
  29. avrotize/constants.py +4 -4
  30. avrotize/csvtoavro.py +131 -131
  31. avrotize/datapackagetoavro.py +76 -76
  32. avrotize/dependency_resolver.py +348 -348
  33. avrotize/jsonstoavro.py +1698 -1698
  34. avrotize/jsonstostructure.py +2642 -2642
  35. avrotize/jstructtoavro.py +878 -878
  36. avrotize/kstructtoavro.py +93 -93
  37. avrotize/kustotoavro.py +455 -455
  38. avrotize/parquettoavro.py +157 -157
  39. avrotize/proto2parser.py +497 -497
  40. avrotize/proto3parser.py +402 -402
  41. avrotize/prototoavro.py +382 -382
  42. avrotize/structuretocsharp.py +2005 -2005
  43. avrotize/structuretojsons.py +498 -498
  44. avrotize/structuretopython.py +772 -772
  45. avrotize/structuretots.py +653 -0
  46. avrotize/xsdtoavro.py +413 -413
  47. structurize-2.16.6.dist-info/METADATA +107 -0
  48. structurize-2.16.6.dist-info/RECORD +52 -0
  49. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/licenses/LICENSE +200 -200
  50. structurize-2.16.2.dist-info/METADATA +0 -805
  51. structurize-2.16.2.dist-info/RECORD +0 -51
  52. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/WHEEL +0 -0
  53. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/entry_points.txt +0 -0
  54. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/top_level.txt +0 -0
avrotize/avrotoproto.py CHANGED
@@ -1,359 +1,359 @@
1
- import copy
2
- import json
3
- import argparse
4
- import os
5
- from typing import Literal, NamedTuple, Dict, Any, List
6
-
7
- indent = ' '
8
-
9
- Comment = NamedTuple('Comment', [('content', str), ('tags', Dict[str, Any])])
10
- Oneof = NamedTuple('Oneof', [('comment', 'Comment'), ('name', str), ('fields', List['Field'])])
11
- Field = NamedTuple('Field', [('comment', 'Comment'), ('label', str), ('type', str), ('key_type', str), ('val_type', str), ('name', str), ('number', int), ('dependencies', List[str])])
12
- Enum = NamedTuple('Enum', [('comment', 'Comment'), ('name', str), ('fields', Dict[str, 'Field'])])
13
- Message = NamedTuple('Message', [('comment', 'Comment'), ('name', str), ('fields', List['Field']), ('oneofs', List['Oneof']),
14
- ('messages', Dict[str, 'Message']), ('enums', Dict[str, 'Enum']), ('dependencies', List[str])])
15
- Service = NamedTuple('Service', [('name', str), ('functions', Dict[str, 'RpcFunc'])])
16
- RpcFunc = NamedTuple('RpcFunc', [('name', str), ('in_type', str), ('out_type', str), ('uri', str)])
17
- ProtoFile = NamedTuple('ProtoFile',
18
- [('messages', Dict[str, 'Message']), ('enums', Dict[str, 'Enum']),
19
- ('services', Dict[str, 'Service']), ('imports', List[str]),
20
- ('options', Dict[str, str]), ('package', str)])
21
- ProtoFiles = NamedTuple('ProtoFiles', [('files', List['ProtoFile'])])
22
-
23
- class AvroToProto:
24
-
25
- def __init__(self) -> None:
26
- self.naming_mode: Literal['snake', 'pascal', 'camel'] = 'pascal'
27
- self.allow_optional: bool = False
28
- self.default_namespace: str = ''
29
-
30
- def avro_primitive_to_proto_type(self, avro_type: str, dependencies: List[str]) -> str:
31
- """Map Avro primitive types to Protobuf types."""
32
- mapping = {
33
- 'null': 'google.protobuf.Empty', # Special handling may be required
34
- 'boolean': 'bool',
35
- 'int': 'int32',
36
- 'long': 'int64',
37
- 'float': 'float',
38
- 'double': 'double',
39
- 'bytes': 'bytes',
40
- 'string': 'string',
41
- }
42
- # logical types require special handling
43
- if isinstance(avro_type, dict) and 'logicalType' in avro_type:
44
- logical_type = avro_type['logicalType']
45
- if logical_type == 'date':
46
- return 'string'
47
- elif logical_type == 'time-millis':
48
- return 'string'
49
- elif logical_type == 'timestamp-millis':
50
- return 'string'
51
- elif logical_type == 'decimal':
52
- precision = avro_type['precision']
53
- scale = avro_type['scale']
54
- return 'string'
55
- elif logical_type == 'duration':
56
- return 'string'
57
- elif logical_type == 'uuid':
58
- return 'string'
59
-
60
- type = mapping.get(avro_type, '')
61
- if not type:
62
- dependencies.append(avro_type)
63
- type = avro_type
64
- return type
65
-
66
- def compose_name(self, prefix: str, name: str, naming_mode: Literal['pascal', 'camel', 'snake', 'default', 'field'] = 'default') -> str:
67
- if naming_mode == 'default':
68
- naming_mode = self.naming_mode
69
- if naming_mode == 'field':
70
- if self.naming_mode == 'pascal':
71
- naming_mode = 'camel'
72
- else:
73
- naming_mode = self.naming_mode
74
- if naming_mode == 'snake':
75
- return f"{prefix}_{name}"
76
- if naming_mode == 'pascal':
77
- return f"{prefix[0].upper()+prefix[1:] if prefix else ''}{name[0].upper()+name[1:] if name else ''}"
78
- if naming_mode == 'camel':
79
- return f"{prefix[0].lower()+prefix[1:] if prefix else ''}{name[0].upper()+name[1:] if name else ''}"
80
- return prefix+name
81
-
82
- def convert_field(self, message: Message, avro_field: dict, index: int, proto_files: ProtoFiles) -> Field | Oneof | Enum | Message:
83
- """Convert an Avro field to a Protobuf field."""
84
- field_type = avro_field['type']
85
- field_name = avro_field['name'] if 'name' in avro_field else self.compose_name(field_type.split('.')[-1],'value', 'field') if isinstance(field_type, str) else self.compose_name(f"_{index}", 'value', 'field')
86
- if 'doc' in avro_field:
87
- comment = Comment(avro_field["doc"], {})
88
- else:
89
- comment = Comment('',{})
90
-
91
- return self.convert_field_type(message, field_name, field_type, comment, index, proto_files)
92
-
93
- def convert_record_type(self, avro_record: dict, comment: Comment, proto_files: ProtoFiles) -> Message:
94
- """Convert an Avro record to a Protobuf message."""
95
- local_message = Message(comment, avro_record['name'], [], [], {}, {}, [])
96
- offs = 1
97
- for i, f in enumerate(avro_record['fields']):
98
- field = self.convert_field(local_message, f, i+offs, proto_files)
99
- if isinstance(field, Oneof):
100
- for f in field.fields:
101
- local_message.dependencies.extend(f.dependencies)
102
- local_message.oneofs.append(field)
103
- offs += len(field.fields)-1
104
- elif isinstance(field, Enum):
105
- enum = Enum(field.comment, self.compose_name(field.name,'enum'), field.fields)
106
- local_message.enums[enum.name] = enum
107
- local_message.fields.append(Field(field.comment, '', enum.name, '', '', field.name.split('.')[-1], i+offs, []))
108
- elif isinstance(field, Message):
109
- inner_message = Message(field.comment, self.compose_name(field.name,'type'), field.fields, field.oneofs, field.messages, field.enums, [])
110
- local_message.messages[inner_message.name] = inner_message
111
- local_message.fields.append(Field(field.comment, '', inner_message.name, '', '', field.name.split('.')[-1], i+offs, []))
112
- local_message.dependencies.extend(field.dependencies)
113
- else:
114
- local_message.dependencies.extend(field.dependencies)
115
- local_message.fields.append(field)
116
- return local_message
117
-
118
- def convert_field_type(self, message: Message, field_name: str, field_type: str | dict | list, comment: Comment, index: int, proto_files: ProtoFiles) -> Field | Oneof | Enum | Message:
119
- """Convert an Avro field type to a Protobuf field type."""
120
- label = ''
121
-
122
- if isinstance(field_type, list):
123
- # Handling union types (including nullable fields)
124
- non_null_types = [t for t in field_type if t != 'null']
125
- if len(non_null_types) == 1:
126
- if self.allow_optional:
127
- label = 'optional'
128
- field_type = non_null_types[0]
129
- elif len(non_null_types) > 0:
130
- oneof_fields = []
131
- for i, t in enumerate(non_null_types):
132
- field = self.convert_field_type(message, self.compose_name(field_name,'choice', 'field'), t, comment, i+index, proto_files)
133
- if isinstance(field, Field):
134
- if field.type == 'map' or field.type == 'array':
135
- local_message = Message(comment, self.compose_name(field.name,field.type), [], [], {}, {}, field.dependencies)
136
- local_message.fields.append(field)
137
- new_field = Field(field.comment, '', local_message.name, '', '', self.compose_name(field.name.split('.')[-1],field.type, 'field'), i+index, field.dependencies)
138
- message.messages[local_message.name] = local_message
139
- oneof_fields.append(new_field)
140
- else:
141
- field = Field(field.comment, field.label, field.type, field.key_type, field.val_type, self.compose_name(field_name, (field.type.split('.')[-1]), 'field'), i+index, field.dependencies)
142
- oneof_fields.append(field)
143
- elif isinstance(field, Oneof):
144
- deps: List[str] = []
145
- oneof = field
146
- for f in oneof.fields:
147
- deps.extend(f.dependencies)
148
- local_message = Message(comment, self.compose_name(field.name,'choice'), [], [], {}, {}, deps)
149
- index += len(field.fields)
150
- local_message.oneofs.append(field)
151
- new_field = Field(field.comment, '', local_message.name, '', '', field.name.split('.')[-1], i+index, deps)
152
- message.messages[local_message.name] = local_message
153
- oneof_fields.append(new_field)
154
- elif isinstance(field, Enum):
155
- enum = Enum(field.comment, self.compose_name(field.name,"options"), field.fields)
156
- message.enums[enum.name] = enum
157
- field = Field(field.comment, '', enum.name, '', '', field.name.split('.')[-1], i+index, [])
158
- oneof_fields.append(field)
159
- elif isinstance(field, Message):
160
- local_message = Message(field.comment, self.compose_name(field.name,'type'), field.fields, field.oneofs, field.messages, field.enums, field.dependencies)
161
- message.messages[local_message.name] = local_message
162
- field = Field(field.comment, '', local_message.name, '', '', field.name.split('.')[-1], i+index, field.dependencies)
163
- oneof_fields.append(field)
164
- oneof = Oneof(comment, field_name, copy.deepcopy(oneof_fields))
165
- return oneof
166
- else:
167
- raise ValueError(f"Field {field_name} is a union type without any non-null types")
168
-
169
- if isinstance(field_type, dict):
170
- # Nested types (e.g., records, enums) require special handling
171
- if field_type['type'] == 'record':
172
- return self.convert_record_type(field_type, comment, proto_files)
173
- elif field_type['type'] == 'enum':
174
- enum_symbols = {symbol: Field(comment, '', symbol, '', '', symbol, s, []) for s, symbol in enumerate(field_type['symbols'])}
175
- return Enum(comment, field_type['name'], enum_symbols)
176
- elif field_type['type'] == 'array':
177
- converted_field_type = self.convert_field_type(message, self.compose_name(field_name, "item"), field_type['items'], comment, index, proto_files)
178
- if isinstance(converted_field_type, Field):
179
- return Field(comment, 'repeated', 'array', '', converted_field_type.type, field_name, index, converted_field_type.dependencies)
180
- elif isinstance(converted_field_type, Enum):
181
- enum = Enum(converted_field_type.comment, self.compose_name(converted_field_type.name,'enum'), converted_field_type.fields)
182
- message.enums[enum.name] = enum
183
- return Field(comment, 'repeated', 'array', '', enum.name, field_name, index, [])
184
- elif isinstance(converted_field_type, Message):
185
- local_message = Message(converted_field_type.comment, self.compose_name(converted_field_type.name,'type'), converted_field_type.fields, converted_field_type.oneofs, converted_field_type.messages, converted_field_type.enums, converted_field_type.dependencies)
186
- message.messages[local_message.name] = local_message
187
- return Field(comment, 'repeated', 'array', '', local_message.name, field_name, index, [])
188
- elif isinstance(converted_field_type, Oneof):
189
- deps3: List[str] = []
190
- fl = []
191
- for i, f in enumerate(converted_field_type.fields):
192
- fl.append(Field(Comment('',{}), '', f.type, '', '', f.name, i+1, []))
193
- deps3.extend(f.dependencies)
194
- oneof = Oneof(converted_field_type.comment, 'item', fl)
195
- local_message = Message(comment, self.compose_name(field_name,'type'), [], [], {}, {}, deps3)
196
- local_message.oneofs.append(oneof)
197
- new_field = Field(Comment('',{}), 'repeated', 'array', '', local_message.name, field_name.split('.')[-1], index, local_message.dependencies)
198
- message.messages[local_message.name] = local_message
199
- return new_field
200
- elif field_type['type'] == 'map':
201
- converted_field_type = self.convert_field_type(message, self.compose_name(field_name,'item', 'field'), field_type['values'], comment, index, proto_files)
202
- if isinstance(converted_field_type, Field):
203
- return Field(comment, label, 'map', 'string', converted_field_type.type, field_name, index, converted_field_type.dependencies)
204
- elif isinstance(converted_field_type, Enum):
205
- enum = Enum(converted_field_type.comment, self.compose_name(converted_field_type.name,'enum'), converted_field_type.fields)
206
- message.enums[enum.name] = enum
207
- return Field(comment, label, 'map', 'string', enum.name, field_name, index, [])
208
- elif isinstance(converted_field_type, Message):
209
- local_message = Message(converted_field_type.comment, self.compose_name(converted_field_type.name,'type'), converted_field_type.fields, converted_field_type.oneofs, converted_field_type.messages, converted_field_type.enums, [])
210
- message.messages[local_message.name] = local_message
211
- return Field(comment, label, 'map', 'string', local_message.name, field_name, index, local_message.dependencies)
212
- elif isinstance(converted_field_type, Oneof):
213
- deps4: List[str] = []
214
- fl = []
215
- for i, f in enumerate(converted_field_type.fields):
216
- fl.append(Field(Comment('',{}), '', f.type, '', '', f.name, i+1, []))
217
- deps4.extend(f.dependencies)
218
- oneof = Oneof(converted_field_type.comment, 'item', fl)
219
- local_message = Message(comment, self.compose_name(field_name, 'type'), [], [], {}, {}, deps4)
220
- local_message.oneofs.append(oneof)
221
- new_field = Field(Comment('',{}), label, 'map', 'string', local_message.name, field_name.split('.')[-1], index, local_message.dependencies)
222
- message.messages[local_message.name] = local_message
223
- return new_field
224
- elif field_type['type'] == "fixed":
225
- return Field(comment, label, 'fixed','string', 'string', field_name, index, [])
226
- else:
227
- deps1: List[str] = []
228
- proto_type = self.avro_primitive_to_proto_type(field_type['type'], deps1)
229
- return Field(comment, label, proto_type, '', '', field_name, index, deps1)
230
- elif isinstance(field_type, str):
231
- deps2: List[str] = []
232
- proto_type = self.avro_primitive_to_proto_type(field_type, deps2)
233
- return Field(comment, label, proto_type, '', '', field_name, index, deps2)
234
- raise ValueError(f"Unknown field type {field_type}")
235
-
236
- def avro_schema_to_proto_message(self, avro_schema: dict, proto_files: ProtoFiles) -> str:
237
- """Convert an Avro schema to a Protobuf message definition."""
238
- comment = Comment('',{})
239
- if 'doc' in avro_schema:
240
- comment = Comment(avro_schema["doc"], {})
241
- namespace = avro_schema.get("namespace", '')
242
- if not namespace:
243
- namespace = self.default_namespace
244
- if avro_schema['type'] == 'record':
245
- message = self.convert_record_type(avro_schema, comment, proto_files)
246
- file = next((f for f in proto_files.files if f.package == namespace), None)
247
- if not file:
248
- file = ProtoFile({}, {}, {}, [], {}, namespace)
249
- proto_files.files.append(file)
250
- file.messages[message.name] = message
251
- elif avro_schema['type'] == 'enum':
252
- enum_name = avro_schema['name']
253
- enum_symbols = {symbol: Field(comment, '', symbol, '', '', symbol, s, []) for s, symbol in enumerate(avro_schema['symbols'])}
254
- enum = Enum(comment, enum_name, enum_symbols)
255
- file = next((f for f in proto_files.files if f.package == namespace), None)
256
- if not file:
257
- file = ProtoFile({}, {}, {}, [], {}, namespace)
258
- proto_files.files.append(file)
259
- file.enums[enum_name] = enum
260
- return avro_schema["name"]
261
-
262
- def avro_schema_to_proto_messages(self, avro_schema_input, proto_files: ProtoFiles):
263
- """Convert an Avro schema to Protobuf message definitions."""
264
- if not isinstance(avro_schema_input, list):
265
- avro_schema_list = [avro_schema_input]
266
- else:
267
- avro_schema_list = avro_schema_input
268
- for avro_schema in avro_schema_list:
269
- self.avro_schema_to_proto_message(avro_schema, proto_files)
270
-
271
- def save_proto_to_file(self, proto_files: ProtoFiles, proto_path):
272
- """Save the Protobuf schema to a file."""
273
- for proto in proto_files.files:
274
- # gather dependencies that are within the package
275
- deps: List[str] = []
276
- for message in proto.messages.values():
277
- for dep in message.dependencies:
278
- if '.' in dep:
279
- deps.append(dep.rsplit('.',1)[0])
280
- deps = list(set(deps))
281
-
282
- #proto.imports.extend([f.package[len(proto.package)+1:] for f in proto_files.files if f.package.startswith(proto.package) and f.package != proto.package])
283
- proto.imports.extend([d for d in deps if d != proto.package])
284
- proto_file_path = os.path.join(proto_path, f"{proto.package}.proto")
285
- # create the directory for the proto file if it doesn't exist
286
- proto_dir = os.path.dirname(proto_file_path)
287
- if not os.path.exists(proto_dir):
288
- os.makedirs(proto_dir, exist_ok=True)
289
- with open(proto_file_path, 'w') as proto_file:
290
- # dump the ProtoFile structure in proto syntax
291
- proto_str = f'syntax = "proto3";\n\n'
292
- proto_str += f'package {proto.package};\n\n'
293
-
294
- for import_package in proto.imports:
295
- proto_str += f"import \"{import_package}.proto\";\n"
296
- if (len(proto.imports)):
297
- proto_str += "\n"
298
- for enum_name, enum in proto.enums.items():
299
- proto_str += f"enum {enum_name} {{\n"
300
- for _, field in enum.fields.items():
301
- proto_str += f"{indent}{field.name} = {field.number};\n"
302
- proto_str += "}\n\n"
303
- for message in proto.messages.values():
304
- proto_str += self.render_message(message)
305
- for service in proto.services.values():
306
- proto_str += f"service {service.name} {{\n"
307
- for function_name, func in service.functions.items():
308
- proto_str += f"{indent}rpc {func.name} ({func.in_type}) returns ({func.out_type}) {{\n"
309
- proto_str += f"{indent}{indent}option (google.api.http) = {{\n"
310
- proto_str += f"{indent}{indent}{indent}post: \"{func.uri}\"\n"
311
- proto_str += f"{indent}{indent}}};\n"
312
- proto_str += f"{indent}}};\n"
313
- proto_str += "}\n\n"
314
- proto_file.write(proto_str)
315
-
316
- def render_message(self, message, level=0) -> str:
317
- proto_str = f"{indent*level}message {message.name} {{\n"
318
- fieldsAndOneofs = message.fields+message.oneofs
319
- fieldsAndOneofs.sort(key=lambda f: f.number if isinstance(f, Field) else f.fields[0].number)
320
- for fo in fieldsAndOneofs:
321
- if isinstance(fo, Field):
322
- field = fo
323
- if field.type == "map":
324
- proto_str += f"{indent*level}{indent}{field.label}{' ' if field.label else ''}map<{field.key_type}, {field.val_type}> {field.name} = {field.number};\n"
325
- elif field.type == "array":
326
- proto_str += f"{indent*level}{indent}{field.label}{' ' if field.label else ''}{field.val_type} {field.name} = {field.number};\n"
327
- else:
328
- proto_str += f"{indent*level}{indent}{field.label}{' ' if field.label else ''}{field.type} {field.name} = {field.number};\n"
329
- else:
330
- oneof = fo
331
- proto_str += f"{indent*level}{indent}oneof {oneof.name} {{\n"
332
- for field in oneof.fields:
333
- proto_str += f"{indent*level}{indent}{indent}{field.label}{' ' if field.label else ''}{field.type} {field.name} = {field.number};\n"
334
- proto_str += f"{indent*level}{indent}}}\n"
335
- for enum in message.enums.values():
336
- proto_str += f"{indent*level}{indent}enum {enum.name} {{\n"
337
- for _, field in enum.fields.items():
338
- proto_str += f"{indent*level}{indent}{indent}{field.label}{' ' if field.label else ''}{field.name} = {field.number};\n"
339
- proto_str += f"{indent*level}{indent}}}\n"
340
- for local_message in message.messages.values():
341
- proto_str += self.render_message(local_message, level+1)
342
- proto_str += f"{indent*level}}}\n"
343
- return proto_str
344
-
345
-
346
- def convert_avro_to_proto(self, avro_schema_path, proto_file_path):
347
- """Convert Avro schema file to Protobuf .proto file."""
348
- with open(avro_schema_path, 'r') as avro_file:
349
- avro_schema = json.load(avro_file)
350
- proto_files = ProtoFiles([])
351
- self.avro_schema_to_proto_messages(avro_schema, proto_files)
352
- self.save_proto_to_file(proto_files, proto_file_path)
353
-
354
- def convert_avro_to_proto(avro_schema_path, proto_file_path, naming_mode: Literal['snake', 'pascal', 'camel'] = 'pascal', allow_optional: bool = False):
355
- avrotoproto = AvroToProto()
356
- avrotoproto.naming_mode = naming_mode
357
- avrotoproto.allow_optional = allow_optional
358
- avrotoproto.default_namespace = os.path.splitext(os.path.basename(proto_file_path))[0].replace('-','_')
1
+ import copy
2
+ import json
3
+ import argparse
4
+ import os
5
+ from typing import Literal, NamedTuple, Dict, Any, List
6
+
7
+ indent = ' '
8
+
9
+ Comment = NamedTuple('Comment', [('content', str), ('tags', Dict[str, Any])])
10
+ Oneof = NamedTuple('Oneof', [('comment', 'Comment'), ('name', str), ('fields', List['Field'])])
11
+ Field = NamedTuple('Field', [('comment', 'Comment'), ('label', str), ('type', str), ('key_type', str), ('val_type', str), ('name', str), ('number', int), ('dependencies', List[str])])
12
+ Enum = NamedTuple('Enum', [('comment', 'Comment'), ('name', str), ('fields', Dict[str, 'Field'])])
13
+ Message = NamedTuple('Message', [('comment', 'Comment'), ('name', str), ('fields', List['Field']), ('oneofs', List['Oneof']),
14
+ ('messages', Dict[str, 'Message']), ('enums', Dict[str, 'Enum']), ('dependencies', List[str])])
15
+ Service = NamedTuple('Service', [('name', str), ('functions', Dict[str, 'RpcFunc'])])
16
+ RpcFunc = NamedTuple('RpcFunc', [('name', str), ('in_type', str), ('out_type', str), ('uri', str)])
17
+ ProtoFile = NamedTuple('ProtoFile',
18
+ [('messages', Dict[str, 'Message']), ('enums', Dict[str, 'Enum']),
19
+ ('services', Dict[str, 'Service']), ('imports', List[str]),
20
+ ('options', Dict[str, str]), ('package', str)])
21
+ ProtoFiles = NamedTuple('ProtoFiles', [('files', List['ProtoFile'])])
22
+
23
+ class AvroToProto:
24
+
25
+ def __init__(self) -> None:
26
+ self.naming_mode: Literal['snake', 'pascal', 'camel'] = 'pascal'
27
+ self.allow_optional: bool = False
28
+ self.default_namespace: str = ''
29
+
30
+ def avro_primitive_to_proto_type(self, avro_type: str, dependencies: List[str]) -> str:
31
+ """Map Avro primitive types to Protobuf types."""
32
+ mapping = {
33
+ 'null': 'google.protobuf.Empty', # Special handling may be required
34
+ 'boolean': 'bool',
35
+ 'int': 'int32',
36
+ 'long': 'int64',
37
+ 'float': 'float',
38
+ 'double': 'double',
39
+ 'bytes': 'bytes',
40
+ 'string': 'string',
41
+ }
42
+ # logical types require special handling
43
+ if isinstance(avro_type, dict) and 'logicalType' in avro_type:
44
+ logical_type = avro_type['logicalType']
45
+ if logical_type == 'date':
46
+ return 'string'
47
+ elif logical_type == 'time-millis':
48
+ return 'string'
49
+ elif logical_type == 'timestamp-millis':
50
+ return 'string'
51
+ elif logical_type == 'decimal':
52
+ precision = avro_type['precision']
53
+ scale = avro_type['scale']
54
+ return 'string'
55
+ elif logical_type == 'duration':
56
+ return 'string'
57
+ elif logical_type == 'uuid':
58
+ return 'string'
59
+
60
+ type = mapping.get(avro_type, '')
61
+ if not type:
62
+ dependencies.append(avro_type)
63
+ type = avro_type
64
+ return type
65
+
66
+ def compose_name(self, prefix: str, name: str, naming_mode: Literal['pascal', 'camel', 'snake', 'default', 'field'] = 'default') -> str:
67
+ if naming_mode == 'default':
68
+ naming_mode = self.naming_mode
69
+ if naming_mode == 'field':
70
+ if self.naming_mode == 'pascal':
71
+ naming_mode = 'camel'
72
+ else:
73
+ naming_mode = self.naming_mode
74
+ if naming_mode == 'snake':
75
+ return f"{prefix}_{name}"
76
+ if naming_mode == 'pascal':
77
+ return f"{prefix[0].upper()+prefix[1:] if prefix else ''}{name[0].upper()+name[1:] if name else ''}"
78
+ if naming_mode == 'camel':
79
+ return f"{prefix[0].lower()+prefix[1:] if prefix else ''}{name[0].upper()+name[1:] if name else ''}"
80
+ return prefix+name
81
+
82
+ def convert_field(self, message: Message, avro_field: dict, index: int, proto_files: ProtoFiles) -> Field | Oneof | Enum | Message:
83
+ """Convert an Avro field to a Protobuf field."""
84
+ field_type = avro_field['type']
85
+ field_name = avro_field['name'] if 'name' in avro_field else self.compose_name(field_type.split('.')[-1],'value', 'field') if isinstance(field_type, str) else self.compose_name(f"_{index}", 'value', 'field')
86
+ if 'doc' in avro_field:
87
+ comment = Comment(avro_field["doc"], {})
88
+ else:
89
+ comment = Comment('',{})
90
+
91
+ return self.convert_field_type(message, field_name, field_type, comment, index, proto_files)
92
+
93
+ def convert_record_type(self, avro_record: dict, comment: Comment, proto_files: ProtoFiles) -> Message:
94
+ """Convert an Avro record to a Protobuf message."""
95
+ local_message = Message(comment, avro_record['name'], [], [], {}, {}, [])
96
+ offs = 1
97
+ for i, f in enumerate(avro_record['fields']):
98
+ field = self.convert_field(local_message, f, i+offs, proto_files)
99
+ if isinstance(field, Oneof):
100
+ for f in field.fields:
101
+ local_message.dependencies.extend(f.dependencies)
102
+ local_message.oneofs.append(field)
103
+ offs += len(field.fields)-1
104
+ elif isinstance(field, Enum):
105
+ enum = Enum(field.comment, self.compose_name(field.name,'enum'), field.fields)
106
+ local_message.enums[enum.name] = enum
107
+ local_message.fields.append(Field(field.comment, '', enum.name, '', '', field.name.split('.')[-1], i+offs, []))
108
+ elif isinstance(field, Message):
109
+ inner_message = Message(field.comment, self.compose_name(field.name,'type'), field.fields, field.oneofs, field.messages, field.enums, [])
110
+ local_message.messages[inner_message.name] = inner_message
111
+ local_message.fields.append(Field(field.comment, '', inner_message.name, '', '', field.name.split('.')[-1], i+offs, []))
112
+ local_message.dependencies.extend(field.dependencies)
113
+ else:
114
+ local_message.dependencies.extend(field.dependencies)
115
+ local_message.fields.append(field)
116
+ return local_message
117
+
118
+ def convert_field_type(self, message: Message, field_name: str, field_type: str | dict | list, comment: Comment, index: int, proto_files: ProtoFiles) -> Field | Oneof | Enum | Message:
119
+ """Convert an Avro field type to a Protobuf field type."""
120
+ label = ''
121
+
122
+ if isinstance(field_type, list):
123
+ # Handling union types (including nullable fields)
124
+ non_null_types = [t for t in field_type if t != 'null']
125
+ if len(non_null_types) == 1:
126
+ if self.allow_optional:
127
+ label = 'optional'
128
+ field_type = non_null_types[0]
129
+ elif len(non_null_types) > 0:
130
+ oneof_fields = []
131
+ for i, t in enumerate(non_null_types):
132
+ field = self.convert_field_type(message, self.compose_name(field_name,'choice', 'field'), t, comment, i+index, proto_files)
133
+ if isinstance(field, Field):
134
+ if field.type == 'map' or field.type == 'array':
135
+ local_message = Message(comment, self.compose_name(field.name,field.type), [], [], {}, {}, field.dependencies)
136
+ local_message.fields.append(field)
137
+ new_field = Field(field.comment, '', local_message.name, '', '', self.compose_name(field.name.split('.')[-1],field.type, 'field'), i+index, field.dependencies)
138
+ message.messages[local_message.name] = local_message
139
+ oneof_fields.append(new_field)
140
+ else:
141
+ field = Field(field.comment, field.label, field.type, field.key_type, field.val_type, self.compose_name(field_name, (field.type.split('.')[-1]), 'field'), i+index, field.dependencies)
142
+ oneof_fields.append(field)
143
+ elif isinstance(field, Oneof):
144
+ deps: List[str] = []
145
+ oneof = field
146
+ for f in oneof.fields:
147
+ deps.extend(f.dependencies)
148
+ local_message = Message(comment, self.compose_name(field.name,'choice'), [], [], {}, {}, deps)
149
+ index += len(field.fields)
150
+ local_message.oneofs.append(field)
151
+ new_field = Field(field.comment, '', local_message.name, '', '', field.name.split('.')[-1], i+index, deps)
152
+ message.messages[local_message.name] = local_message
153
+ oneof_fields.append(new_field)
154
+ elif isinstance(field, Enum):
155
+ enum = Enum(field.comment, self.compose_name(field.name,"options"), field.fields)
156
+ message.enums[enum.name] = enum
157
+ field = Field(field.comment, '', enum.name, '', '', field.name.split('.')[-1], i+index, [])
158
+ oneof_fields.append(field)
159
+ elif isinstance(field, Message):
160
+ local_message = Message(field.comment, self.compose_name(field.name,'type'), field.fields, field.oneofs, field.messages, field.enums, field.dependencies)
161
+ message.messages[local_message.name] = local_message
162
+ field = Field(field.comment, '', local_message.name, '', '', field.name.split('.')[-1], i+index, field.dependencies)
163
+ oneof_fields.append(field)
164
+ oneof = Oneof(comment, field_name, copy.deepcopy(oneof_fields))
165
+ return oneof
166
+ else:
167
+ raise ValueError(f"Field {field_name} is a union type without any non-null types")
168
+
169
+ if isinstance(field_type, dict):
170
+ # Nested types (e.g., records, enums) require special handling
171
+ if field_type['type'] == 'record':
172
+ return self.convert_record_type(field_type, comment, proto_files)
173
+ elif field_type['type'] == 'enum':
174
+ enum_symbols = {symbol: Field(comment, '', symbol, '', '', symbol, s, []) for s, symbol in enumerate(field_type['symbols'])}
175
+ return Enum(comment, field_type['name'], enum_symbols)
176
+ elif field_type['type'] == 'array':
177
+ converted_field_type = self.convert_field_type(message, self.compose_name(field_name, "item"), field_type['items'], comment, index, proto_files)
178
+ if isinstance(converted_field_type, Field):
179
+ return Field(comment, 'repeated', 'array', '', converted_field_type.type, field_name, index, converted_field_type.dependencies)
180
+ elif isinstance(converted_field_type, Enum):
181
+ enum = Enum(converted_field_type.comment, self.compose_name(converted_field_type.name,'enum'), converted_field_type.fields)
182
+ message.enums[enum.name] = enum
183
+ return Field(comment, 'repeated', 'array', '', enum.name, field_name, index, [])
184
+ elif isinstance(converted_field_type, Message):
185
+ local_message = Message(converted_field_type.comment, self.compose_name(converted_field_type.name,'type'), converted_field_type.fields, converted_field_type.oneofs, converted_field_type.messages, converted_field_type.enums, converted_field_type.dependencies)
186
+ message.messages[local_message.name] = local_message
187
+ return Field(comment, 'repeated', 'array', '', local_message.name, field_name, index, [])
188
+ elif isinstance(converted_field_type, Oneof):
189
+ deps3: List[str] = []
190
+ fl = []
191
+ for i, f in enumerate(converted_field_type.fields):
192
+ fl.append(Field(Comment('',{}), '', f.type, '', '', f.name, i+1, []))
193
+ deps3.extend(f.dependencies)
194
+ oneof = Oneof(converted_field_type.comment, 'item', fl)
195
+ local_message = Message(comment, self.compose_name(field_name,'type'), [], [], {}, {}, deps3)
196
+ local_message.oneofs.append(oneof)
197
+ new_field = Field(Comment('',{}), 'repeated', 'array', '', local_message.name, field_name.split('.')[-1], index, local_message.dependencies)
198
+ message.messages[local_message.name] = local_message
199
+ return new_field
200
+ elif field_type['type'] == 'map':
201
+ converted_field_type = self.convert_field_type(message, self.compose_name(field_name,'item', 'field'), field_type['values'], comment, index, proto_files)
202
+ if isinstance(converted_field_type, Field):
203
+ return Field(comment, label, 'map', 'string', converted_field_type.type, field_name, index, converted_field_type.dependencies)
204
+ elif isinstance(converted_field_type, Enum):
205
+ enum = Enum(converted_field_type.comment, self.compose_name(converted_field_type.name,'enum'), converted_field_type.fields)
206
+ message.enums[enum.name] = enum
207
+ return Field(comment, label, 'map', 'string', enum.name, field_name, index, [])
208
+ elif isinstance(converted_field_type, Message):
209
+ local_message = Message(converted_field_type.comment, self.compose_name(converted_field_type.name,'type'), converted_field_type.fields, converted_field_type.oneofs, converted_field_type.messages, converted_field_type.enums, [])
210
+ message.messages[local_message.name] = local_message
211
+ return Field(comment, label, 'map', 'string', local_message.name, field_name, index, local_message.dependencies)
212
+ elif isinstance(converted_field_type, Oneof):
213
+ deps4: List[str] = []
214
+ fl = []
215
+ for i, f in enumerate(converted_field_type.fields):
216
+ fl.append(Field(Comment('',{}), '', f.type, '', '', f.name, i+1, []))
217
+ deps4.extend(f.dependencies)
218
+ oneof = Oneof(converted_field_type.comment, 'item', fl)
219
+ local_message = Message(comment, self.compose_name(field_name, 'type'), [], [], {}, {}, deps4)
220
+ local_message.oneofs.append(oneof)
221
+ new_field = Field(Comment('',{}), label, 'map', 'string', local_message.name, field_name.split('.')[-1], index, local_message.dependencies)
222
+ message.messages[local_message.name] = local_message
223
+ return new_field
224
+ elif field_type['type'] == "fixed":
225
+ return Field(comment, label, 'fixed','string', 'string', field_name, index, [])
226
+ else:
227
+ deps1: List[str] = []
228
+ proto_type = self.avro_primitive_to_proto_type(field_type['type'], deps1)
229
+ return Field(comment, label, proto_type, '', '', field_name, index, deps1)
230
+ elif isinstance(field_type, str):
231
+ deps2: List[str] = []
232
+ proto_type = self.avro_primitive_to_proto_type(field_type, deps2)
233
+ return Field(comment, label, proto_type, '', '', field_name, index, deps2)
234
+ raise ValueError(f"Unknown field type {field_type}")
235
+
236
+ def avro_schema_to_proto_message(self, avro_schema: dict, proto_files: ProtoFiles) -> str:
237
+ """Convert an Avro schema to a Protobuf message definition."""
238
+ comment = Comment('',{})
239
+ if 'doc' in avro_schema:
240
+ comment = Comment(avro_schema["doc"], {})
241
+ namespace = avro_schema.get("namespace", '')
242
+ if not namespace:
243
+ namespace = self.default_namespace
244
+ if avro_schema['type'] == 'record':
245
+ message = self.convert_record_type(avro_schema, comment, proto_files)
246
+ file = next((f for f in proto_files.files if f.package == namespace), None)
247
+ if not file:
248
+ file = ProtoFile({}, {}, {}, [], {}, namespace)
249
+ proto_files.files.append(file)
250
+ file.messages[message.name] = message
251
+ elif avro_schema['type'] == 'enum':
252
+ enum_name = avro_schema['name']
253
+ enum_symbols = {symbol: Field(comment, '', symbol, '', '', symbol, s, []) for s, symbol in enumerate(avro_schema['symbols'])}
254
+ enum = Enum(comment, enum_name, enum_symbols)
255
+ file = next((f for f in proto_files.files if f.package == namespace), None)
256
+ if not file:
257
+ file = ProtoFile({}, {}, {}, [], {}, namespace)
258
+ proto_files.files.append(file)
259
+ file.enums[enum_name] = enum
260
+ return avro_schema["name"]
261
+
262
+ def avro_schema_to_proto_messages(self, avro_schema_input, proto_files: ProtoFiles):
263
+ """Convert an Avro schema to Protobuf message definitions."""
264
+ if not isinstance(avro_schema_input, list):
265
+ avro_schema_list = [avro_schema_input]
266
+ else:
267
+ avro_schema_list = avro_schema_input
268
+ for avro_schema in avro_schema_list:
269
+ self.avro_schema_to_proto_message(avro_schema, proto_files)
270
+
271
+ def save_proto_to_file(self, proto_files: ProtoFiles, proto_path):
272
+ """Save the Protobuf schema to a file."""
273
+ for proto in proto_files.files:
274
+ # gather dependencies that are within the package
275
+ deps: List[str] = []
276
+ for message in proto.messages.values():
277
+ for dep in message.dependencies:
278
+ if '.' in dep:
279
+ deps.append(dep.rsplit('.',1)[0])
280
+ deps = list(set(deps))
281
+
282
+ #proto.imports.extend([f.package[len(proto.package)+1:] for f in proto_files.files if f.package.startswith(proto.package) and f.package != proto.package])
283
+ proto.imports.extend([d for d in deps if d != proto.package])
284
+ proto_file_path = os.path.join(proto_path, f"{proto.package}.proto")
285
+ # create the directory for the proto file if it doesn't exist
286
+ proto_dir = os.path.dirname(proto_file_path)
287
+ if not os.path.exists(proto_dir):
288
+ os.makedirs(proto_dir, exist_ok=True)
289
+ with open(proto_file_path, 'w') as proto_file:
290
+ # dump the ProtoFile structure in proto syntax
291
+ proto_str = f'syntax = "proto3";\n\n'
292
+ proto_str += f'package {proto.package};\n\n'
293
+
294
+ for import_package in proto.imports:
295
+ proto_str += f"import \"{import_package}.proto\";\n"
296
+ if (len(proto.imports)):
297
+ proto_str += "\n"
298
+ for enum_name, enum in proto.enums.items():
299
+ proto_str += f"enum {enum_name} {{\n"
300
+ for _, field in enum.fields.items():
301
+ proto_str += f"{indent}{field.name} = {field.number};\n"
302
+ proto_str += "}\n\n"
303
+ for message in proto.messages.values():
304
+ proto_str += self.render_message(message)
305
+ for service in proto.services.values():
306
+ proto_str += f"service {service.name} {{\n"
307
+ for function_name, func in service.functions.items():
308
+ proto_str += f"{indent}rpc {func.name} ({func.in_type}) returns ({func.out_type}) {{\n"
309
+ proto_str += f"{indent}{indent}option (google.api.http) = {{\n"
310
+ proto_str += f"{indent}{indent}{indent}post: \"{func.uri}\"\n"
311
+ proto_str += f"{indent}{indent}}};\n"
312
+ proto_str += f"{indent}}};\n"
313
+ proto_str += "}\n\n"
314
+ proto_file.write(proto_str)
315
+
316
+ def render_message(self, message, level=0) -> str:
317
+ proto_str = f"{indent*level}message {message.name} {{\n"
318
+ fieldsAndOneofs = message.fields+message.oneofs
319
+ fieldsAndOneofs.sort(key=lambda f: f.number if isinstance(f, Field) else f.fields[0].number)
320
+ for fo in fieldsAndOneofs:
321
+ if isinstance(fo, Field):
322
+ field = fo
323
+ if field.type == "map":
324
+ proto_str += f"{indent*level}{indent}{field.label}{' ' if field.label else ''}map<{field.key_type}, {field.val_type}> {field.name} = {field.number};\n"
325
+ elif field.type == "array":
326
+ proto_str += f"{indent*level}{indent}{field.label}{' ' if field.label else ''}{field.val_type} {field.name} = {field.number};\n"
327
+ else:
328
+ proto_str += f"{indent*level}{indent}{field.label}{' ' if field.label else ''}{field.type} {field.name} = {field.number};\n"
329
+ else:
330
+ oneof = fo
331
+ proto_str += f"{indent*level}{indent}oneof {oneof.name} {{\n"
332
+ for field in oneof.fields:
333
+ proto_str += f"{indent*level}{indent}{indent}{field.label}{' ' if field.label else ''}{field.type} {field.name} = {field.number};\n"
334
+ proto_str += f"{indent*level}{indent}}}\n"
335
+ for enum in message.enums.values():
336
+ proto_str += f"{indent*level}{indent}enum {enum.name} {{\n"
337
+ for _, field in enum.fields.items():
338
+ proto_str += f"{indent*level}{indent}{indent}{field.label}{' ' if field.label else ''}{field.name} = {field.number};\n"
339
+ proto_str += f"{indent*level}{indent}}}\n"
340
+ for local_message in message.messages.values():
341
+ proto_str += self.render_message(local_message, level+1)
342
+ proto_str += f"{indent*level}}}\n"
343
+ return proto_str
344
+
345
+
346
+ def convert_avro_to_proto(self, avro_schema_path, proto_file_path):
347
+ """Convert Avro schema file to Protobuf .proto file."""
348
+ with open(avro_schema_path, 'r') as avro_file:
349
+ avro_schema = json.load(avro_file)
350
+ proto_files = ProtoFiles([])
351
+ self.avro_schema_to_proto_messages(avro_schema, proto_files)
352
+ self.save_proto_to_file(proto_files, proto_file_path)
353
+
354
+ def convert_avro_to_proto(avro_schema_path, proto_file_path, naming_mode: Literal['snake', 'pascal', 'camel'] = 'pascal', allow_optional: bool = False):
355
+ avrotoproto = AvroToProto()
356
+ avrotoproto.naming_mode = naming_mode
357
+ avrotoproto.allow_optional = allow_optional
358
+ avrotoproto.default_namespace = os.path.splitext(os.path.basename(proto_file_path))[0].replace('-','_')
359
359
  avrotoproto.convert_avro_to_proto(avro_schema_path, proto_file_path)