structurize 2.16.2__py3-none-any.whl → 2.16.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avrotize/__init__.py +63 -63
- avrotize/__main__.py +5 -5
- avrotize/_version.py +34 -34
- avrotize/asn1toavro.py +160 -160
- avrotize/avrotize.py +152 -152
- avrotize/avrotocpp.py +483 -483
- avrotize/avrotocsharp.py +992 -992
- avrotize/avrotocsv.py +121 -121
- avrotize/avrotodatapackage.py +173 -173
- avrotize/avrotodb.py +1383 -1383
- avrotize/avrotogo.py +476 -476
- avrotize/avrotographql.py +197 -197
- avrotize/avrotoiceberg.py +210 -210
- avrotize/avrotojava.py +1023 -1023
- avrotize/avrotojs.py +250 -250
- avrotize/avrotojsons.py +481 -481
- avrotize/avrotojstruct.py +345 -345
- avrotize/avrotokusto.py +363 -363
- avrotize/avrotomd.py +137 -137
- avrotize/avrotools.py +168 -168
- avrotize/avrotoparquet.py +208 -208
- avrotize/avrotoproto.py +358 -358
- avrotize/avrotopython.py +622 -622
- avrotize/avrotorust.py +435 -435
- avrotize/avrotots.py +598 -598
- avrotize/avrotoxsd.py +344 -344
- avrotize/commands.json +2493 -2433
- avrotize/common.py +828 -828
- avrotize/constants.py +4 -4
- avrotize/csvtoavro.py +131 -131
- avrotize/datapackagetoavro.py +76 -76
- avrotize/dependency_resolver.py +348 -348
- avrotize/jsonstoavro.py +1698 -1698
- avrotize/jsonstostructure.py +2642 -2642
- avrotize/jstructtoavro.py +878 -878
- avrotize/kstructtoavro.py +93 -93
- avrotize/kustotoavro.py +455 -455
- avrotize/parquettoavro.py +157 -157
- avrotize/proto2parser.py +497 -497
- avrotize/proto3parser.py +402 -402
- avrotize/prototoavro.py +382 -382
- avrotize/structuretocsharp.py +2005 -2005
- avrotize/structuretojsons.py +498 -498
- avrotize/structuretopython.py +772 -772
- avrotize/structuretots.py +653 -0
- avrotize/xsdtoavro.py +413 -413
- {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/METADATA +848 -805
- structurize-2.16.5.dist-info/RECORD +52 -0
- {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/licenses/LICENSE +200 -200
- structurize-2.16.2.dist-info/RECORD +0 -51
- {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/WHEEL +0 -0
- {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/entry_points.txt +0 -0
- {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/top_level.txt +0 -0
avrotize/avrotoproto.py
CHANGED
|
@@ -1,359 +1,359 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
import json
|
|
3
|
-
import argparse
|
|
4
|
-
import os
|
|
5
|
-
from typing import Literal, NamedTuple, Dict, Any, List
|
|
6
|
-
|
|
7
|
-
indent = ' '
|
|
8
|
-
|
|
9
|
-
Comment = NamedTuple('Comment', [('content', str), ('tags', Dict[str, Any])])
|
|
10
|
-
Oneof = NamedTuple('Oneof', [('comment', 'Comment'), ('name', str), ('fields', List['Field'])])
|
|
11
|
-
Field = NamedTuple('Field', [('comment', 'Comment'), ('label', str), ('type', str), ('key_type', str), ('val_type', str), ('name', str), ('number', int), ('dependencies', List[str])])
|
|
12
|
-
Enum = NamedTuple('Enum', [('comment', 'Comment'), ('name', str), ('fields', Dict[str, 'Field'])])
|
|
13
|
-
Message = NamedTuple('Message', [('comment', 'Comment'), ('name', str), ('fields', List['Field']), ('oneofs', List['Oneof']),
|
|
14
|
-
('messages', Dict[str, 'Message']), ('enums', Dict[str, 'Enum']), ('dependencies', List[str])])
|
|
15
|
-
Service = NamedTuple('Service', [('name', str), ('functions', Dict[str, 'RpcFunc'])])
|
|
16
|
-
RpcFunc = NamedTuple('RpcFunc', [('name', str), ('in_type', str), ('out_type', str), ('uri', str)])
|
|
17
|
-
ProtoFile = NamedTuple('ProtoFile',
|
|
18
|
-
[('messages', Dict[str, 'Message']), ('enums', Dict[str, 'Enum']),
|
|
19
|
-
('services', Dict[str, 'Service']), ('imports', List[str]),
|
|
20
|
-
('options', Dict[str, str]), ('package', str)])
|
|
21
|
-
ProtoFiles = NamedTuple('ProtoFiles', [('files', List['ProtoFile'])])
|
|
22
|
-
|
|
23
|
-
class AvroToProto:
|
|
24
|
-
|
|
25
|
-
def __init__(self) -> None:
|
|
26
|
-
self.naming_mode: Literal['snake', 'pascal', 'camel'] = 'pascal'
|
|
27
|
-
self.allow_optional: bool = False
|
|
28
|
-
self.default_namespace: str = ''
|
|
29
|
-
|
|
30
|
-
def avro_primitive_to_proto_type(self, avro_type: str, dependencies: List[str]) -> str:
|
|
31
|
-
"""Map Avro primitive types to Protobuf types."""
|
|
32
|
-
mapping = {
|
|
33
|
-
'null': 'google.protobuf.Empty', # Special handling may be required
|
|
34
|
-
'boolean': 'bool',
|
|
35
|
-
'int': 'int32',
|
|
36
|
-
'long': 'int64',
|
|
37
|
-
'float': 'float',
|
|
38
|
-
'double': 'double',
|
|
39
|
-
'bytes': 'bytes',
|
|
40
|
-
'string': 'string',
|
|
41
|
-
}
|
|
42
|
-
# logical types require special handling
|
|
43
|
-
if isinstance(avro_type, dict) and 'logicalType' in avro_type:
|
|
44
|
-
logical_type = avro_type['logicalType']
|
|
45
|
-
if logical_type == 'date':
|
|
46
|
-
return 'string'
|
|
47
|
-
elif logical_type == 'time-millis':
|
|
48
|
-
return 'string'
|
|
49
|
-
elif logical_type == 'timestamp-millis':
|
|
50
|
-
return 'string'
|
|
51
|
-
elif logical_type == 'decimal':
|
|
52
|
-
precision = avro_type['precision']
|
|
53
|
-
scale = avro_type['scale']
|
|
54
|
-
return 'string'
|
|
55
|
-
elif logical_type == 'duration':
|
|
56
|
-
return 'string'
|
|
57
|
-
elif logical_type == 'uuid':
|
|
58
|
-
return 'string'
|
|
59
|
-
|
|
60
|
-
type = mapping.get(avro_type, '')
|
|
61
|
-
if not type:
|
|
62
|
-
dependencies.append(avro_type)
|
|
63
|
-
type = avro_type
|
|
64
|
-
return type
|
|
65
|
-
|
|
66
|
-
def compose_name(self, prefix: str, name: str, naming_mode: Literal['pascal', 'camel', 'snake', 'default', 'field'] = 'default') -> str:
|
|
67
|
-
if naming_mode == 'default':
|
|
68
|
-
naming_mode = self.naming_mode
|
|
69
|
-
if naming_mode == 'field':
|
|
70
|
-
if self.naming_mode == 'pascal':
|
|
71
|
-
naming_mode = 'camel'
|
|
72
|
-
else:
|
|
73
|
-
naming_mode = self.naming_mode
|
|
74
|
-
if naming_mode == 'snake':
|
|
75
|
-
return f"{prefix}_{name}"
|
|
76
|
-
if naming_mode == 'pascal':
|
|
77
|
-
return f"{prefix[0].upper()+prefix[1:] if prefix else ''}{name[0].upper()+name[1:] if name else ''}"
|
|
78
|
-
if naming_mode == 'camel':
|
|
79
|
-
return f"{prefix[0].lower()+prefix[1:] if prefix else ''}{name[0].upper()+name[1:] if name else ''}"
|
|
80
|
-
return prefix+name
|
|
81
|
-
|
|
82
|
-
def convert_field(self, message: Message, avro_field: dict, index: int, proto_files: ProtoFiles) -> Field | Oneof | Enum | Message:
|
|
83
|
-
"""Convert an Avro field to a Protobuf field."""
|
|
84
|
-
field_type = avro_field['type']
|
|
85
|
-
field_name = avro_field['name'] if 'name' in avro_field else self.compose_name(field_type.split('.')[-1],'value', 'field') if isinstance(field_type, str) else self.compose_name(f"_{index}", 'value', 'field')
|
|
86
|
-
if 'doc' in avro_field:
|
|
87
|
-
comment = Comment(avro_field["doc"], {})
|
|
88
|
-
else:
|
|
89
|
-
comment = Comment('',{})
|
|
90
|
-
|
|
91
|
-
return self.convert_field_type(message, field_name, field_type, comment, index, proto_files)
|
|
92
|
-
|
|
93
|
-
def convert_record_type(self, avro_record: dict, comment: Comment, proto_files: ProtoFiles) -> Message:
|
|
94
|
-
"""Convert an Avro record to a Protobuf message."""
|
|
95
|
-
local_message = Message(comment, avro_record['name'], [], [], {}, {}, [])
|
|
96
|
-
offs = 1
|
|
97
|
-
for i, f in enumerate(avro_record['fields']):
|
|
98
|
-
field = self.convert_field(local_message, f, i+offs, proto_files)
|
|
99
|
-
if isinstance(field, Oneof):
|
|
100
|
-
for f in field.fields:
|
|
101
|
-
local_message.dependencies.extend(f.dependencies)
|
|
102
|
-
local_message.oneofs.append(field)
|
|
103
|
-
offs += len(field.fields)-1
|
|
104
|
-
elif isinstance(field, Enum):
|
|
105
|
-
enum = Enum(field.comment, self.compose_name(field.name,'enum'), field.fields)
|
|
106
|
-
local_message.enums[enum.name] = enum
|
|
107
|
-
local_message.fields.append(Field(field.comment, '', enum.name, '', '', field.name.split('.')[-1], i+offs, []))
|
|
108
|
-
elif isinstance(field, Message):
|
|
109
|
-
inner_message = Message(field.comment, self.compose_name(field.name,'type'), field.fields, field.oneofs, field.messages, field.enums, [])
|
|
110
|
-
local_message.messages[inner_message.name] = inner_message
|
|
111
|
-
local_message.fields.append(Field(field.comment, '', inner_message.name, '', '', field.name.split('.')[-1], i+offs, []))
|
|
112
|
-
local_message.dependencies.extend(field.dependencies)
|
|
113
|
-
else:
|
|
114
|
-
local_message.dependencies.extend(field.dependencies)
|
|
115
|
-
local_message.fields.append(field)
|
|
116
|
-
return local_message
|
|
117
|
-
|
|
118
|
-
def convert_field_type(self, message: Message, field_name: str, field_type: str | dict | list, comment: Comment, index: int, proto_files: ProtoFiles) -> Field | Oneof | Enum | Message:
|
|
119
|
-
"""Convert an Avro field type to a Protobuf field type."""
|
|
120
|
-
label = ''
|
|
121
|
-
|
|
122
|
-
if isinstance(field_type, list):
|
|
123
|
-
# Handling union types (including nullable fields)
|
|
124
|
-
non_null_types = [t for t in field_type if t != 'null']
|
|
125
|
-
if len(non_null_types) == 1:
|
|
126
|
-
if self.allow_optional:
|
|
127
|
-
label = 'optional'
|
|
128
|
-
field_type = non_null_types[0]
|
|
129
|
-
elif len(non_null_types) > 0:
|
|
130
|
-
oneof_fields = []
|
|
131
|
-
for i, t in enumerate(non_null_types):
|
|
132
|
-
field = self.convert_field_type(message, self.compose_name(field_name,'choice', 'field'), t, comment, i+index, proto_files)
|
|
133
|
-
if isinstance(field, Field):
|
|
134
|
-
if field.type == 'map' or field.type == 'array':
|
|
135
|
-
local_message = Message(comment, self.compose_name(field.name,field.type), [], [], {}, {}, field.dependencies)
|
|
136
|
-
local_message.fields.append(field)
|
|
137
|
-
new_field = Field(field.comment, '', local_message.name, '', '', self.compose_name(field.name.split('.')[-1],field.type, 'field'), i+index, field.dependencies)
|
|
138
|
-
message.messages[local_message.name] = local_message
|
|
139
|
-
oneof_fields.append(new_field)
|
|
140
|
-
else:
|
|
141
|
-
field = Field(field.comment, field.label, field.type, field.key_type, field.val_type, self.compose_name(field_name, (field.type.split('.')[-1]), 'field'), i+index, field.dependencies)
|
|
142
|
-
oneof_fields.append(field)
|
|
143
|
-
elif isinstance(field, Oneof):
|
|
144
|
-
deps: List[str] = []
|
|
145
|
-
oneof = field
|
|
146
|
-
for f in oneof.fields:
|
|
147
|
-
deps.extend(f.dependencies)
|
|
148
|
-
local_message = Message(comment, self.compose_name(field.name,'choice'), [], [], {}, {}, deps)
|
|
149
|
-
index += len(field.fields)
|
|
150
|
-
local_message.oneofs.append(field)
|
|
151
|
-
new_field = Field(field.comment, '', local_message.name, '', '', field.name.split('.')[-1], i+index, deps)
|
|
152
|
-
message.messages[local_message.name] = local_message
|
|
153
|
-
oneof_fields.append(new_field)
|
|
154
|
-
elif isinstance(field, Enum):
|
|
155
|
-
enum = Enum(field.comment, self.compose_name(field.name,"options"), field.fields)
|
|
156
|
-
message.enums[enum.name] = enum
|
|
157
|
-
field = Field(field.comment, '', enum.name, '', '', field.name.split('.')[-1], i+index, [])
|
|
158
|
-
oneof_fields.append(field)
|
|
159
|
-
elif isinstance(field, Message):
|
|
160
|
-
local_message = Message(field.comment, self.compose_name(field.name,'type'), field.fields, field.oneofs, field.messages, field.enums, field.dependencies)
|
|
161
|
-
message.messages[local_message.name] = local_message
|
|
162
|
-
field = Field(field.comment, '', local_message.name, '', '', field.name.split('.')[-1], i+index, field.dependencies)
|
|
163
|
-
oneof_fields.append(field)
|
|
164
|
-
oneof = Oneof(comment, field_name, copy.deepcopy(oneof_fields))
|
|
165
|
-
return oneof
|
|
166
|
-
else:
|
|
167
|
-
raise ValueError(f"Field {field_name} is a union type without any non-null types")
|
|
168
|
-
|
|
169
|
-
if isinstance(field_type, dict):
|
|
170
|
-
# Nested types (e.g., records, enums) require special handling
|
|
171
|
-
if field_type['type'] == 'record':
|
|
172
|
-
return self.convert_record_type(field_type, comment, proto_files)
|
|
173
|
-
elif field_type['type'] == 'enum':
|
|
174
|
-
enum_symbols = {symbol: Field(comment, '', symbol, '', '', symbol, s, []) for s, symbol in enumerate(field_type['symbols'])}
|
|
175
|
-
return Enum(comment, field_type['name'], enum_symbols)
|
|
176
|
-
elif field_type['type'] == 'array':
|
|
177
|
-
converted_field_type = self.convert_field_type(message, self.compose_name(field_name, "item"), field_type['items'], comment, index, proto_files)
|
|
178
|
-
if isinstance(converted_field_type, Field):
|
|
179
|
-
return Field(comment, 'repeated', 'array', '', converted_field_type.type, field_name, index, converted_field_type.dependencies)
|
|
180
|
-
elif isinstance(converted_field_type, Enum):
|
|
181
|
-
enum = Enum(converted_field_type.comment, self.compose_name(converted_field_type.name,'enum'), converted_field_type.fields)
|
|
182
|
-
message.enums[enum.name] = enum
|
|
183
|
-
return Field(comment, 'repeated', 'array', '', enum.name, field_name, index, [])
|
|
184
|
-
elif isinstance(converted_field_type, Message):
|
|
185
|
-
local_message = Message(converted_field_type.comment, self.compose_name(converted_field_type.name,'type'), converted_field_type.fields, converted_field_type.oneofs, converted_field_type.messages, converted_field_type.enums, converted_field_type.dependencies)
|
|
186
|
-
message.messages[local_message.name] = local_message
|
|
187
|
-
return Field(comment, 'repeated', 'array', '', local_message.name, field_name, index, [])
|
|
188
|
-
elif isinstance(converted_field_type, Oneof):
|
|
189
|
-
deps3: List[str] = []
|
|
190
|
-
fl = []
|
|
191
|
-
for i, f in enumerate(converted_field_type.fields):
|
|
192
|
-
fl.append(Field(Comment('',{}), '', f.type, '', '', f.name, i+1, []))
|
|
193
|
-
deps3.extend(f.dependencies)
|
|
194
|
-
oneof = Oneof(converted_field_type.comment, 'item', fl)
|
|
195
|
-
local_message = Message(comment, self.compose_name(field_name,'type'), [], [], {}, {}, deps3)
|
|
196
|
-
local_message.oneofs.append(oneof)
|
|
197
|
-
new_field = Field(Comment('',{}), 'repeated', 'array', '', local_message.name, field_name.split('.')[-1], index, local_message.dependencies)
|
|
198
|
-
message.messages[local_message.name] = local_message
|
|
199
|
-
return new_field
|
|
200
|
-
elif field_type['type'] == 'map':
|
|
201
|
-
converted_field_type = self.convert_field_type(message, self.compose_name(field_name,'item', 'field'), field_type['values'], comment, index, proto_files)
|
|
202
|
-
if isinstance(converted_field_type, Field):
|
|
203
|
-
return Field(comment, label, 'map', 'string', converted_field_type.type, field_name, index, converted_field_type.dependencies)
|
|
204
|
-
elif isinstance(converted_field_type, Enum):
|
|
205
|
-
enum = Enum(converted_field_type.comment, self.compose_name(converted_field_type.name,'enum'), converted_field_type.fields)
|
|
206
|
-
message.enums[enum.name] = enum
|
|
207
|
-
return Field(comment, label, 'map', 'string', enum.name, field_name, index, [])
|
|
208
|
-
elif isinstance(converted_field_type, Message):
|
|
209
|
-
local_message = Message(converted_field_type.comment, self.compose_name(converted_field_type.name,'type'), converted_field_type.fields, converted_field_type.oneofs, converted_field_type.messages, converted_field_type.enums, [])
|
|
210
|
-
message.messages[local_message.name] = local_message
|
|
211
|
-
return Field(comment, label, 'map', 'string', local_message.name, field_name, index, local_message.dependencies)
|
|
212
|
-
elif isinstance(converted_field_type, Oneof):
|
|
213
|
-
deps4: List[str] = []
|
|
214
|
-
fl = []
|
|
215
|
-
for i, f in enumerate(converted_field_type.fields):
|
|
216
|
-
fl.append(Field(Comment('',{}), '', f.type, '', '', f.name, i+1, []))
|
|
217
|
-
deps4.extend(f.dependencies)
|
|
218
|
-
oneof = Oneof(converted_field_type.comment, 'item', fl)
|
|
219
|
-
local_message = Message(comment, self.compose_name(field_name, 'type'), [], [], {}, {}, deps4)
|
|
220
|
-
local_message.oneofs.append(oneof)
|
|
221
|
-
new_field = Field(Comment('',{}), label, 'map', 'string', local_message.name, field_name.split('.')[-1], index, local_message.dependencies)
|
|
222
|
-
message.messages[local_message.name] = local_message
|
|
223
|
-
return new_field
|
|
224
|
-
elif field_type['type'] == "fixed":
|
|
225
|
-
return Field(comment, label, 'fixed','string', 'string', field_name, index, [])
|
|
226
|
-
else:
|
|
227
|
-
deps1: List[str] = []
|
|
228
|
-
proto_type = self.avro_primitive_to_proto_type(field_type['type'], deps1)
|
|
229
|
-
return Field(comment, label, proto_type, '', '', field_name, index, deps1)
|
|
230
|
-
elif isinstance(field_type, str):
|
|
231
|
-
deps2: List[str] = []
|
|
232
|
-
proto_type = self.avro_primitive_to_proto_type(field_type, deps2)
|
|
233
|
-
return Field(comment, label, proto_type, '', '', field_name, index, deps2)
|
|
234
|
-
raise ValueError(f"Unknown field type {field_type}")
|
|
235
|
-
|
|
236
|
-
def avro_schema_to_proto_message(self, avro_schema: dict, proto_files: ProtoFiles) -> str:
|
|
237
|
-
"""Convert an Avro schema to a Protobuf message definition."""
|
|
238
|
-
comment = Comment('',{})
|
|
239
|
-
if 'doc' in avro_schema:
|
|
240
|
-
comment = Comment(avro_schema["doc"], {})
|
|
241
|
-
namespace = avro_schema.get("namespace", '')
|
|
242
|
-
if not namespace:
|
|
243
|
-
namespace = self.default_namespace
|
|
244
|
-
if avro_schema['type'] == 'record':
|
|
245
|
-
message = self.convert_record_type(avro_schema, comment, proto_files)
|
|
246
|
-
file = next((f for f in proto_files.files if f.package == namespace), None)
|
|
247
|
-
if not file:
|
|
248
|
-
file = ProtoFile({}, {}, {}, [], {}, namespace)
|
|
249
|
-
proto_files.files.append(file)
|
|
250
|
-
file.messages[message.name] = message
|
|
251
|
-
elif avro_schema['type'] == 'enum':
|
|
252
|
-
enum_name = avro_schema['name']
|
|
253
|
-
enum_symbols = {symbol: Field(comment, '', symbol, '', '', symbol, s, []) for s, symbol in enumerate(avro_schema['symbols'])}
|
|
254
|
-
enum = Enum(comment, enum_name, enum_symbols)
|
|
255
|
-
file = next((f for f in proto_files.files if f.package == namespace), None)
|
|
256
|
-
if not file:
|
|
257
|
-
file = ProtoFile({}, {}, {}, [], {}, namespace)
|
|
258
|
-
proto_files.files.append(file)
|
|
259
|
-
file.enums[enum_name] = enum
|
|
260
|
-
return avro_schema["name"]
|
|
261
|
-
|
|
262
|
-
def avro_schema_to_proto_messages(self, avro_schema_input, proto_files: ProtoFiles):
|
|
263
|
-
"""Convert an Avro schema to Protobuf message definitions."""
|
|
264
|
-
if not isinstance(avro_schema_input, list):
|
|
265
|
-
avro_schema_list = [avro_schema_input]
|
|
266
|
-
else:
|
|
267
|
-
avro_schema_list = avro_schema_input
|
|
268
|
-
for avro_schema in avro_schema_list:
|
|
269
|
-
self.avro_schema_to_proto_message(avro_schema, proto_files)
|
|
270
|
-
|
|
271
|
-
def save_proto_to_file(self, proto_files: ProtoFiles, proto_path):
|
|
272
|
-
"""Save the Protobuf schema to a file."""
|
|
273
|
-
for proto in proto_files.files:
|
|
274
|
-
# gather dependencies that are within the package
|
|
275
|
-
deps: List[str] = []
|
|
276
|
-
for message in proto.messages.values():
|
|
277
|
-
for dep in message.dependencies:
|
|
278
|
-
if '.' in dep:
|
|
279
|
-
deps.append(dep.rsplit('.',1)[0])
|
|
280
|
-
deps = list(set(deps))
|
|
281
|
-
|
|
282
|
-
#proto.imports.extend([f.package[len(proto.package)+1:] for f in proto_files.files if f.package.startswith(proto.package) and f.package != proto.package])
|
|
283
|
-
proto.imports.extend([d for d in deps if d != proto.package])
|
|
284
|
-
proto_file_path = os.path.join(proto_path, f"{proto.package}.proto")
|
|
285
|
-
# create the directory for the proto file if it doesn't exist
|
|
286
|
-
proto_dir = os.path.dirname(proto_file_path)
|
|
287
|
-
if not os.path.exists(proto_dir):
|
|
288
|
-
os.makedirs(proto_dir, exist_ok=True)
|
|
289
|
-
with open(proto_file_path, 'w') as proto_file:
|
|
290
|
-
# dump the ProtoFile structure in proto syntax
|
|
291
|
-
proto_str = f'syntax = "proto3";\n\n'
|
|
292
|
-
proto_str += f'package {proto.package};\n\n'
|
|
293
|
-
|
|
294
|
-
for import_package in proto.imports:
|
|
295
|
-
proto_str += f"import \"{import_package}.proto\";\n"
|
|
296
|
-
if (len(proto.imports)):
|
|
297
|
-
proto_str += "\n"
|
|
298
|
-
for enum_name, enum in proto.enums.items():
|
|
299
|
-
proto_str += f"enum {enum_name} {{\n"
|
|
300
|
-
for _, field in enum.fields.items():
|
|
301
|
-
proto_str += f"{indent}{field.name} = {field.number};\n"
|
|
302
|
-
proto_str += "}\n\n"
|
|
303
|
-
for message in proto.messages.values():
|
|
304
|
-
proto_str += self.render_message(message)
|
|
305
|
-
for service in proto.services.values():
|
|
306
|
-
proto_str += f"service {service.name} {{\n"
|
|
307
|
-
for function_name, func in service.functions.items():
|
|
308
|
-
proto_str += f"{indent}rpc {func.name} ({func.in_type}) returns ({func.out_type}) {{\n"
|
|
309
|
-
proto_str += f"{indent}{indent}option (google.api.http) = {{\n"
|
|
310
|
-
proto_str += f"{indent}{indent}{indent}post: \"{func.uri}\"\n"
|
|
311
|
-
proto_str += f"{indent}{indent}}};\n"
|
|
312
|
-
proto_str += f"{indent}}};\n"
|
|
313
|
-
proto_str += "}\n\n"
|
|
314
|
-
proto_file.write(proto_str)
|
|
315
|
-
|
|
316
|
-
def render_message(self, message, level=0) -> str:
|
|
317
|
-
proto_str = f"{indent*level}message {message.name} {{\n"
|
|
318
|
-
fieldsAndOneofs = message.fields+message.oneofs
|
|
319
|
-
fieldsAndOneofs.sort(key=lambda f: f.number if isinstance(f, Field) else f.fields[0].number)
|
|
320
|
-
for fo in fieldsAndOneofs:
|
|
321
|
-
if isinstance(fo, Field):
|
|
322
|
-
field = fo
|
|
323
|
-
if field.type == "map":
|
|
324
|
-
proto_str += f"{indent*level}{indent}{field.label}{' ' if field.label else ''}map<{field.key_type}, {field.val_type}> {field.name} = {field.number};\n"
|
|
325
|
-
elif field.type == "array":
|
|
326
|
-
proto_str += f"{indent*level}{indent}{field.label}{' ' if field.label else ''}{field.val_type} {field.name} = {field.number};\n"
|
|
327
|
-
else:
|
|
328
|
-
proto_str += f"{indent*level}{indent}{field.label}{' ' if field.label else ''}{field.type} {field.name} = {field.number};\n"
|
|
329
|
-
else:
|
|
330
|
-
oneof = fo
|
|
331
|
-
proto_str += f"{indent*level}{indent}oneof {oneof.name} {{\n"
|
|
332
|
-
for field in oneof.fields:
|
|
333
|
-
proto_str += f"{indent*level}{indent}{indent}{field.label}{' ' if field.label else ''}{field.type} {field.name} = {field.number};\n"
|
|
334
|
-
proto_str += f"{indent*level}{indent}}}\n"
|
|
335
|
-
for enum in message.enums.values():
|
|
336
|
-
proto_str += f"{indent*level}{indent}enum {enum.name} {{\n"
|
|
337
|
-
for _, field in enum.fields.items():
|
|
338
|
-
proto_str += f"{indent*level}{indent}{indent}{field.label}{' ' if field.label else ''}{field.name} = {field.number};\n"
|
|
339
|
-
proto_str += f"{indent*level}{indent}}}\n"
|
|
340
|
-
for local_message in message.messages.values():
|
|
341
|
-
proto_str += self.render_message(local_message, level+1)
|
|
342
|
-
proto_str += f"{indent*level}}}\n"
|
|
343
|
-
return proto_str
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
def convert_avro_to_proto(self, avro_schema_path, proto_file_path):
|
|
347
|
-
"""Convert Avro schema file to Protobuf .proto file."""
|
|
348
|
-
with open(avro_schema_path, 'r') as avro_file:
|
|
349
|
-
avro_schema = json.load(avro_file)
|
|
350
|
-
proto_files = ProtoFiles([])
|
|
351
|
-
self.avro_schema_to_proto_messages(avro_schema, proto_files)
|
|
352
|
-
self.save_proto_to_file(proto_files, proto_file_path)
|
|
353
|
-
|
|
354
|
-
def convert_avro_to_proto(avro_schema_path, proto_file_path, naming_mode: Literal['snake', 'pascal', 'camel'] = 'pascal', allow_optional: bool = False):
|
|
355
|
-
avrotoproto = AvroToProto()
|
|
356
|
-
avrotoproto.naming_mode = naming_mode
|
|
357
|
-
avrotoproto.allow_optional = allow_optional
|
|
358
|
-
avrotoproto.default_namespace = os.path.splitext(os.path.basename(proto_file_path))[0].replace('-','_')
|
|
1
|
+
import copy
|
|
2
|
+
import json
|
|
3
|
+
import argparse
|
|
4
|
+
import os
|
|
5
|
+
from typing import Literal, NamedTuple, Dict, Any, List
|
|
6
|
+
|
|
7
|
+
indent = ' '
|
|
8
|
+
|
|
9
|
+
Comment = NamedTuple('Comment', [('content', str), ('tags', Dict[str, Any])])
|
|
10
|
+
Oneof = NamedTuple('Oneof', [('comment', 'Comment'), ('name', str), ('fields', List['Field'])])
|
|
11
|
+
Field = NamedTuple('Field', [('comment', 'Comment'), ('label', str), ('type', str), ('key_type', str), ('val_type', str), ('name', str), ('number', int), ('dependencies', List[str])])
|
|
12
|
+
Enum = NamedTuple('Enum', [('comment', 'Comment'), ('name', str), ('fields', Dict[str, 'Field'])])
|
|
13
|
+
Message = NamedTuple('Message', [('comment', 'Comment'), ('name', str), ('fields', List['Field']), ('oneofs', List['Oneof']),
|
|
14
|
+
('messages', Dict[str, 'Message']), ('enums', Dict[str, 'Enum']), ('dependencies', List[str])])
|
|
15
|
+
Service = NamedTuple('Service', [('name', str), ('functions', Dict[str, 'RpcFunc'])])
|
|
16
|
+
RpcFunc = NamedTuple('RpcFunc', [('name', str), ('in_type', str), ('out_type', str), ('uri', str)])
|
|
17
|
+
ProtoFile = NamedTuple('ProtoFile',
|
|
18
|
+
[('messages', Dict[str, 'Message']), ('enums', Dict[str, 'Enum']),
|
|
19
|
+
('services', Dict[str, 'Service']), ('imports', List[str]),
|
|
20
|
+
('options', Dict[str, str]), ('package', str)])
|
|
21
|
+
ProtoFiles = NamedTuple('ProtoFiles', [('files', List['ProtoFile'])])
|
|
22
|
+
|
|
23
|
+
class AvroToProto:
|
|
24
|
+
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
self.naming_mode: Literal['snake', 'pascal', 'camel'] = 'pascal'
|
|
27
|
+
self.allow_optional: bool = False
|
|
28
|
+
self.default_namespace: str = ''
|
|
29
|
+
|
|
30
|
+
def avro_primitive_to_proto_type(self, avro_type: str, dependencies: List[str]) -> str:
|
|
31
|
+
"""Map Avro primitive types to Protobuf types."""
|
|
32
|
+
mapping = {
|
|
33
|
+
'null': 'google.protobuf.Empty', # Special handling may be required
|
|
34
|
+
'boolean': 'bool',
|
|
35
|
+
'int': 'int32',
|
|
36
|
+
'long': 'int64',
|
|
37
|
+
'float': 'float',
|
|
38
|
+
'double': 'double',
|
|
39
|
+
'bytes': 'bytes',
|
|
40
|
+
'string': 'string',
|
|
41
|
+
}
|
|
42
|
+
# logical types require special handling
|
|
43
|
+
if isinstance(avro_type, dict) and 'logicalType' in avro_type:
|
|
44
|
+
logical_type = avro_type['logicalType']
|
|
45
|
+
if logical_type == 'date':
|
|
46
|
+
return 'string'
|
|
47
|
+
elif logical_type == 'time-millis':
|
|
48
|
+
return 'string'
|
|
49
|
+
elif logical_type == 'timestamp-millis':
|
|
50
|
+
return 'string'
|
|
51
|
+
elif logical_type == 'decimal':
|
|
52
|
+
precision = avro_type['precision']
|
|
53
|
+
scale = avro_type['scale']
|
|
54
|
+
return 'string'
|
|
55
|
+
elif logical_type == 'duration':
|
|
56
|
+
return 'string'
|
|
57
|
+
elif logical_type == 'uuid':
|
|
58
|
+
return 'string'
|
|
59
|
+
|
|
60
|
+
type = mapping.get(avro_type, '')
|
|
61
|
+
if not type:
|
|
62
|
+
dependencies.append(avro_type)
|
|
63
|
+
type = avro_type
|
|
64
|
+
return type
|
|
65
|
+
|
|
66
|
+
def compose_name(self, prefix: str, name: str, naming_mode: Literal['pascal', 'camel', 'snake', 'default', 'field'] = 'default') -> str:
|
|
67
|
+
if naming_mode == 'default':
|
|
68
|
+
naming_mode = self.naming_mode
|
|
69
|
+
if naming_mode == 'field':
|
|
70
|
+
if self.naming_mode == 'pascal':
|
|
71
|
+
naming_mode = 'camel'
|
|
72
|
+
else:
|
|
73
|
+
naming_mode = self.naming_mode
|
|
74
|
+
if naming_mode == 'snake':
|
|
75
|
+
return f"{prefix}_{name}"
|
|
76
|
+
if naming_mode == 'pascal':
|
|
77
|
+
return f"{prefix[0].upper()+prefix[1:] if prefix else ''}{name[0].upper()+name[1:] if name else ''}"
|
|
78
|
+
if naming_mode == 'camel':
|
|
79
|
+
return f"{prefix[0].lower()+prefix[1:] if prefix else ''}{name[0].upper()+name[1:] if name else ''}"
|
|
80
|
+
return prefix+name
|
|
81
|
+
|
|
82
|
+
def convert_field(self, message: Message, avro_field: dict, index: int, proto_files: ProtoFiles) -> Field | Oneof | Enum | Message:
|
|
83
|
+
"""Convert an Avro field to a Protobuf field."""
|
|
84
|
+
field_type = avro_field['type']
|
|
85
|
+
field_name = avro_field['name'] if 'name' in avro_field else self.compose_name(field_type.split('.')[-1],'value', 'field') if isinstance(field_type, str) else self.compose_name(f"_{index}", 'value', 'field')
|
|
86
|
+
if 'doc' in avro_field:
|
|
87
|
+
comment = Comment(avro_field["doc"], {})
|
|
88
|
+
else:
|
|
89
|
+
comment = Comment('',{})
|
|
90
|
+
|
|
91
|
+
return self.convert_field_type(message, field_name, field_type, comment, index, proto_files)
|
|
92
|
+
|
|
93
|
+
def convert_record_type(self, avro_record: dict, comment: Comment, proto_files: ProtoFiles) -> Message:
|
|
94
|
+
"""Convert an Avro record to a Protobuf message."""
|
|
95
|
+
local_message = Message(comment, avro_record['name'], [], [], {}, {}, [])
|
|
96
|
+
offs = 1
|
|
97
|
+
for i, f in enumerate(avro_record['fields']):
|
|
98
|
+
field = self.convert_field(local_message, f, i+offs, proto_files)
|
|
99
|
+
if isinstance(field, Oneof):
|
|
100
|
+
for f in field.fields:
|
|
101
|
+
local_message.dependencies.extend(f.dependencies)
|
|
102
|
+
local_message.oneofs.append(field)
|
|
103
|
+
offs += len(field.fields)-1
|
|
104
|
+
elif isinstance(field, Enum):
|
|
105
|
+
enum = Enum(field.comment, self.compose_name(field.name,'enum'), field.fields)
|
|
106
|
+
local_message.enums[enum.name] = enum
|
|
107
|
+
local_message.fields.append(Field(field.comment, '', enum.name, '', '', field.name.split('.')[-1], i+offs, []))
|
|
108
|
+
elif isinstance(field, Message):
|
|
109
|
+
inner_message = Message(field.comment, self.compose_name(field.name,'type'), field.fields, field.oneofs, field.messages, field.enums, [])
|
|
110
|
+
local_message.messages[inner_message.name] = inner_message
|
|
111
|
+
local_message.fields.append(Field(field.comment, '', inner_message.name, '', '', field.name.split('.')[-1], i+offs, []))
|
|
112
|
+
local_message.dependencies.extend(field.dependencies)
|
|
113
|
+
else:
|
|
114
|
+
local_message.dependencies.extend(field.dependencies)
|
|
115
|
+
local_message.fields.append(field)
|
|
116
|
+
return local_message
|
|
117
|
+
|
|
118
|
+
def convert_field_type(self, message: Message, field_name: str, field_type: str | dict | list, comment: Comment, index: int, proto_files: ProtoFiles) -> Field | Oneof | Enum | Message:
|
|
119
|
+
"""Convert an Avro field type to a Protobuf field type."""
|
|
120
|
+
label = ''
|
|
121
|
+
|
|
122
|
+
if isinstance(field_type, list):
|
|
123
|
+
# Handling union types (including nullable fields)
|
|
124
|
+
non_null_types = [t for t in field_type if t != 'null']
|
|
125
|
+
if len(non_null_types) == 1:
|
|
126
|
+
if self.allow_optional:
|
|
127
|
+
label = 'optional'
|
|
128
|
+
field_type = non_null_types[0]
|
|
129
|
+
elif len(non_null_types) > 0:
|
|
130
|
+
oneof_fields = []
|
|
131
|
+
for i, t in enumerate(non_null_types):
|
|
132
|
+
field = self.convert_field_type(message, self.compose_name(field_name,'choice', 'field'), t, comment, i+index, proto_files)
|
|
133
|
+
if isinstance(field, Field):
|
|
134
|
+
if field.type == 'map' or field.type == 'array':
|
|
135
|
+
local_message = Message(comment, self.compose_name(field.name,field.type), [], [], {}, {}, field.dependencies)
|
|
136
|
+
local_message.fields.append(field)
|
|
137
|
+
new_field = Field(field.comment, '', local_message.name, '', '', self.compose_name(field.name.split('.')[-1],field.type, 'field'), i+index, field.dependencies)
|
|
138
|
+
message.messages[local_message.name] = local_message
|
|
139
|
+
oneof_fields.append(new_field)
|
|
140
|
+
else:
|
|
141
|
+
field = Field(field.comment, field.label, field.type, field.key_type, field.val_type, self.compose_name(field_name, (field.type.split('.')[-1]), 'field'), i+index, field.dependencies)
|
|
142
|
+
oneof_fields.append(field)
|
|
143
|
+
elif isinstance(field, Oneof):
|
|
144
|
+
deps: List[str] = []
|
|
145
|
+
oneof = field
|
|
146
|
+
for f in oneof.fields:
|
|
147
|
+
deps.extend(f.dependencies)
|
|
148
|
+
local_message = Message(comment, self.compose_name(field.name,'choice'), [], [], {}, {}, deps)
|
|
149
|
+
index += len(field.fields)
|
|
150
|
+
local_message.oneofs.append(field)
|
|
151
|
+
new_field = Field(field.comment, '', local_message.name, '', '', field.name.split('.')[-1], i+index, deps)
|
|
152
|
+
message.messages[local_message.name] = local_message
|
|
153
|
+
oneof_fields.append(new_field)
|
|
154
|
+
elif isinstance(field, Enum):
|
|
155
|
+
enum = Enum(field.comment, self.compose_name(field.name,"options"), field.fields)
|
|
156
|
+
message.enums[enum.name] = enum
|
|
157
|
+
field = Field(field.comment, '', enum.name, '', '', field.name.split('.')[-1], i+index, [])
|
|
158
|
+
oneof_fields.append(field)
|
|
159
|
+
elif isinstance(field, Message):
|
|
160
|
+
local_message = Message(field.comment, self.compose_name(field.name,'type'), field.fields, field.oneofs, field.messages, field.enums, field.dependencies)
|
|
161
|
+
message.messages[local_message.name] = local_message
|
|
162
|
+
field = Field(field.comment, '', local_message.name, '', '', field.name.split('.')[-1], i+index, field.dependencies)
|
|
163
|
+
oneof_fields.append(field)
|
|
164
|
+
oneof = Oneof(comment, field_name, copy.deepcopy(oneof_fields))
|
|
165
|
+
return oneof
|
|
166
|
+
else:
|
|
167
|
+
raise ValueError(f"Field {field_name} is a union type without any non-null types")
|
|
168
|
+
|
|
169
|
+
if isinstance(field_type, dict):
|
|
170
|
+
# Nested types (e.g., records, enums) require special handling
|
|
171
|
+
if field_type['type'] == 'record':
|
|
172
|
+
return self.convert_record_type(field_type, comment, proto_files)
|
|
173
|
+
elif field_type['type'] == 'enum':
|
|
174
|
+
enum_symbols = {symbol: Field(comment, '', symbol, '', '', symbol, s, []) for s, symbol in enumerate(field_type['symbols'])}
|
|
175
|
+
return Enum(comment, field_type['name'], enum_symbols)
|
|
176
|
+
elif field_type['type'] == 'array':
|
|
177
|
+
converted_field_type = self.convert_field_type(message, self.compose_name(field_name, "item"), field_type['items'], comment, index, proto_files)
|
|
178
|
+
if isinstance(converted_field_type, Field):
|
|
179
|
+
return Field(comment, 'repeated', 'array', '', converted_field_type.type, field_name, index, converted_field_type.dependencies)
|
|
180
|
+
elif isinstance(converted_field_type, Enum):
|
|
181
|
+
enum = Enum(converted_field_type.comment, self.compose_name(converted_field_type.name,'enum'), converted_field_type.fields)
|
|
182
|
+
message.enums[enum.name] = enum
|
|
183
|
+
return Field(comment, 'repeated', 'array', '', enum.name, field_name, index, [])
|
|
184
|
+
elif isinstance(converted_field_type, Message):
|
|
185
|
+
local_message = Message(converted_field_type.comment, self.compose_name(converted_field_type.name,'type'), converted_field_type.fields, converted_field_type.oneofs, converted_field_type.messages, converted_field_type.enums, converted_field_type.dependencies)
|
|
186
|
+
message.messages[local_message.name] = local_message
|
|
187
|
+
return Field(comment, 'repeated', 'array', '', local_message.name, field_name, index, [])
|
|
188
|
+
elif isinstance(converted_field_type, Oneof):
|
|
189
|
+
deps3: List[str] = []
|
|
190
|
+
fl = []
|
|
191
|
+
for i, f in enumerate(converted_field_type.fields):
|
|
192
|
+
fl.append(Field(Comment('',{}), '', f.type, '', '', f.name, i+1, []))
|
|
193
|
+
deps3.extend(f.dependencies)
|
|
194
|
+
oneof = Oneof(converted_field_type.comment, 'item', fl)
|
|
195
|
+
local_message = Message(comment, self.compose_name(field_name,'type'), [], [], {}, {}, deps3)
|
|
196
|
+
local_message.oneofs.append(oneof)
|
|
197
|
+
new_field = Field(Comment('',{}), 'repeated', 'array', '', local_message.name, field_name.split('.')[-1], index, local_message.dependencies)
|
|
198
|
+
message.messages[local_message.name] = local_message
|
|
199
|
+
return new_field
|
|
200
|
+
elif field_type['type'] == 'map':
|
|
201
|
+
converted_field_type = self.convert_field_type(message, self.compose_name(field_name,'item', 'field'), field_type['values'], comment, index, proto_files)
|
|
202
|
+
if isinstance(converted_field_type, Field):
|
|
203
|
+
return Field(comment, label, 'map', 'string', converted_field_type.type, field_name, index, converted_field_type.dependencies)
|
|
204
|
+
elif isinstance(converted_field_type, Enum):
|
|
205
|
+
enum = Enum(converted_field_type.comment, self.compose_name(converted_field_type.name,'enum'), converted_field_type.fields)
|
|
206
|
+
message.enums[enum.name] = enum
|
|
207
|
+
return Field(comment, label, 'map', 'string', enum.name, field_name, index, [])
|
|
208
|
+
elif isinstance(converted_field_type, Message):
|
|
209
|
+
local_message = Message(converted_field_type.comment, self.compose_name(converted_field_type.name,'type'), converted_field_type.fields, converted_field_type.oneofs, converted_field_type.messages, converted_field_type.enums, [])
|
|
210
|
+
message.messages[local_message.name] = local_message
|
|
211
|
+
return Field(comment, label, 'map', 'string', local_message.name, field_name, index, local_message.dependencies)
|
|
212
|
+
elif isinstance(converted_field_type, Oneof):
|
|
213
|
+
deps4: List[str] = []
|
|
214
|
+
fl = []
|
|
215
|
+
for i, f in enumerate(converted_field_type.fields):
|
|
216
|
+
fl.append(Field(Comment('',{}), '', f.type, '', '', f.name, i+1, []))
|
|
217
|
+
deps4.extend(f.dependencies)
|
|
218
|
+
oneof = Oneof(converted_field_type.comment, 'item', fl)
|
|
219
|
+
local_message = Message(comment, self.compose_name(field_name, 'type'), [], [], {}, {}, deps4)
|
|
220
|
+
local_message.oneofs.append(oneof)
|
|
221
|
+
new_field = Field(Comment('',{}), label, 'map', 'string', local_message.name, field_name.split('.')[-1], index, local_message.dependencies)
|
|
222
|
+
message.messages[local_message.name] = local_message
|
|
223
|
+
return new_field
|
|
224
|
+
elif field_type['type'] == "fixed":
|
|
225
|
+
return Field(comment, label, 'fixed','string', 'string', field_name, index, [])
|
|
226
|
+
else:
|
|
227
|
+
deps1: List[str] = []
|
|
228
|
+
proto_type = self.avro_primitive_to_proto_type(field_type['type'], deps1)
|
|
229
|
+
return Field(comment, label, proto_type, '', '', field_name, index, deps1)
|
|
230
|
+
elif isinstance(field_type, str):
|
|
231
|
+
deps2: List[str] = []
|
|
232
|
+
proto_type = self.avro_primitive_to_proto_type(field_type, deps2)
|
|
233
|
+
return Field(comment, label, proto_type, '', '', field_name, index, deps2)
|
|
234
|
+
raise ValueError(f"Unknown field type {field_type}")
|
|
235
|
+
|
|
236
|
+
def avro_schema_to_proto_message(self, avro_schema: dict, proto_files: ProtoFiles) -> str:
|
|
237
|
+
"""Convert an Avro schema to a Protobuf message definition."""
|
|
238
|
+
comment = Comment('',{})
|
|
239
|
+
if 'doc' in avro_schema:
|
|
240
|
+
comment = Comment(avro_schema["doc"], {})
|
|
241
|
+
namespace = avro_schema.get("namespace", '')
|
|
242
|
+
if not namespace:
|
|
243
|
+
namespace = self.default_namespace
|
|
244
|
+
if avro_schema['type'] == 'record':
|
|
245
|
+
message = self.convert_record_type(avro_schema, comment, proto_files)
|
|
246
|
+
file = next((f for f in proto_files.files if f.package == namespace), None)
|
|
247
|
+
if not file:
|
|
248
|
+
file = ProtoFile({}, {}, {}, [], {}, namespace)
|
|
249
|
+
proto_files.files.append(file)
|
|
250
|
+
file.messages[message.name] = message
|
|
251
|
+
elif avro_schema['type'] == 'enum':
|
|
252
|
+
enum_name = avro_schema['name']
|
|
253
|
+
enum_symbols = {symbol: Field(comment, '', symbol, '', '', symbol, s, []) for s, symbol in enumerate(avro_schema['symbols'])}
|
|
254
|
+
enum = Enum(comment, enum_name, enum_symbols)
|
|
255
|
+
file = next((f for f in proto_files.files if f.package == namespace), None)
|
|
256
|
+
if not file:
|
|
257
|
+
file = ProtoFile({}, {}, {}, [], {}, namespace)
|
|
258
|
+
proto_files.files.append(file)
|
|
259
|
+
file.enums[enum_name] = enum
|
|
260
|
+
return avro_schema["name"]
|
|
261
|
+
|
|
262
|
+
def avro_schema_to_proto_messages(self, avro_schema_input, proto_files: ProtoFiles):
|
|
263
|
+
"""Convert an Avro schema to Protobuf message definitions."""
|
|
264
|
+
if not isinstance(avro_schema_input, list):
|
|
265
|
+
avro_schema_list = [avro_schema_input]
|
|
266
|
+
else:
|
|
267
|
+
avro_schema_list = avro_schema_input
|
|
268
|
+
for avro_schema in avro_schema_list:
|
|
269
|
+
self.avro_schema_to_proto_message(avro_schema, proto_files)
|
|
270
|
+
|
|
271
|
+
def save_proto_to_file(self, proto_files: ProtoFiles, proto_path):
|
|
272
|
+
"""Save the Protobuf schema to a file."""
|
|
273
|
+
for proto in proto_files.files:
|
|
274
|
+
# gather dependencies that are within the package
|
|
275
|
+
deps: List[str] = []
|
|
276
|
+
for message in proto.messages.values():
|
|
277
|
+
for dep in message.dependencies:
|
|
278
|
+
if '.' in dep:
|
|
279
|
+
deps.append(dep.rsplit('.',1)[0])
|
|
280
|
+
deps = list(set(deps))
|
|
281
|
+
|
|
282
|
+
#proto.imports.extend([f.package[len(proto.package)+1:] for f in proto_files.files if f.package.startswith(proto.package) and f.package != proto.package])
|
|
283
|
+
proto.imports.extend([d for d in deps if d != proto.package])
|
|
284
|
+
proto_file_path = os.path.join(proto_path, f"{proto.package}.proto")
|
|
285
|
+
# create the directory for the proto file if it doesn't exist
|
|
286
|
+
proto_dir = os.path.dirname(proto_file_path)
|
|
287
|
+
if not os.path.exists(proto_dir):
|
|
288
|
+
os.makedirs(proto_dir, exist_ok=True)
|
|
289
|
+
with open(proto_file_path, 'w') as proto_file:
|
|
290
|
+
# dump the ProtoFile structure in proto syntax
|
|
291
|
+
proto_str = f'syntax = "proto3";\n\n'
|
|
292
|
+
proto_str += f'package {proto.package};\n\n'
|
|
293
|
+
|
|
294
|
+
for import_package in proto.imports:
|
|
295
|
+
proto_str += f"import \"{import_package}.proto\";\n"
|
|
296
|
+
if (len(proto.imports)):
|
|
297
|
+
proto_str += "\n"
|
|
298
|
+
for enum_name, enum in proto.enums.items():
|
|
299
|
+
proto_str += f"enum {enum_name} {{\n"
|
|
300
|
+
for _, field in enum.fields.items():
|
|
301
|
+
proto_str += f"{indent}{field.name} = {field.number};\n"
|
|
302
|
+
proto_str += "}\n\n"
|
|
303
|
+
for message in proto.messages.values():
|
|
304
|
+
proto_str += self.render_message(message)
|
|
305
|
+
for service in proto.services.values():
|
|
306
|
+
proto_str += f"service {service.name} {{\n"
|
|
307
|
+
for function_name, func in service.functions.items():
|
|
308
|
+
proto_str += f"{indent}rpc {func.name} ({func.in_type}) returns ({func.out_type}) {{\n"
|
|
309
|
+
proto_str += f"{indent}{indent}option (google.api.http) = {{\n"
|
|
310
|
+
proto_str += f"{indent}{indent}{indent}post: \"{func.uri}\"\n"
|
|
311
|
+
proto_str += f"{indent}{indent}}};\n"
|
|
312
|
+
proto_str += f"{indent}}};\n"
|
|
313
|
+
proto_str += "}\n\n"
|
|
314
|
+
proto_file.write(proto_str)
|
|
315
|
+
|
|
316
|
+
def render_message(self, message, level=0) -> str:
|
|
317
|
+
proto_str = f"{indent*level}message {message.name} {{\n"
|
|
318
|
+
fieldsAndOneofs = message.fields+message.oneofs
|
|
319
|
+
fieldsAndOneofs.sort(key=lambda f: f.number if isinstance(f, Field) else f.fields[0].number)
|
|
320
|
+
for fo in fieldsAndOneofs:
|
|
321
|
+
if isinstance(fo, Field):
|
|
322
|
+
field = fo
|
|
323
|
+
if field.type == "map":
|
|
324
|
+
proto_str += f"{indent*level}{indent}{field.label}{' ' if field.label else ''}map<{field.key_type}, {field.val_type}> {field.name} = {field.number};\n"
|
|
325
|
+
elif field.type == "array":
|
|
326
|
+
proto_str += f"{indent*level}{indent}{field.label}{' ' if field.label else ''}{field.val_type} {field.name} = {field.number};\n"
|
|
327
|
+
else:
|
|
328
|
+
proto_str += f"{indent*level}{indent}{field.label}{' ' if field.label else ''}{field.type} {field.name} = {field.number};\n"
|
|
329
|
+
else:
|
|
330
|
+
oneof = fo
|
|
331
|
+
proto_str += f"{indent*level}{indent}oneof {oneof.name} {{\n"
|
|
332
|
+
for field in oneof.fields:
|
|
333
|
+
proto_str += f"{indent*level}{indent}{indent}{field.label}{' ' if field.label else ''}{field.type} {field.name} = {field.number};\n"
|
|
334
|
+
proto_str += f"{indent*level}{indent}}}\n"
|
|
335
|
+
for enum in message.enums.values():
|
|
336
|
+
proto_str += f"{indent*level}{indent}enum {enum.name} {{\n"
|
|
337
|
+
for _, field in enum.fields.items():
|
|
338
|
+
proto_str += f"{indent*level}{indent}{indent}{field.label}{' ' if field.label else ''}{field.name} = {field.number};\n"
|
|
339
|
+
proto_str += f"{indent*level}{indent}}}\n"
|
|
340
|
+
for local_message in message.messages.values():
|
|
341
|
+
proto_str += self.render_message(local_message, level+1)
|
|
342
|
+
proto_str += f"{indent*level}}}\n"
|
|
343
|
+
return proto_str
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def convert_avro_to_proto(self, avro_schema_path, proto_file_path):
|
|
347
|
+
"""Convert Avro schema file to Protobuf .proto file."""
|
|
348
|
+
with open(avro_schema_path, 'r') as avro_file:
|
|
349
|
+
avro_schema = json.load(avro_file)
|
|
350
|
+
proto_files = ProtoFiles([])
|
|
351
|
+
self.avro_schema_to_proto_messages(avro_schema, proto_files)
|
|
352
|
+
self.save_proto_to_file(proto_files, proto_file_path)
|
|
353
|
+
|
|
354
|
+
def convert_avro_to_proto(avro_schema_path, proto_file_path, naming_mode: Literal['snake', 'pascal', 'camel'] = 'pascal', allow_optional: bool = False):
|
|
355
|
+
avrotoproto = AvroToProto()
|
|
356
|
+
avrotoproto.naming_mode = naming_mode
|
|
357
|
+
avrotoproto.allow_optional = allow_optional
|
|
358
|
+
avrotoproto.default_namespace = os.path.splitext(os.path.basename(proto_file_path))[0].replace('-','_')
|
|
359
359
|
avrotoproto.convert_avro_to_proto(avro_schema_path, proto_file_path)
|