structurize 2.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avrotize/__init__.py +64 -0
- avrotize/__main__.py +6 -0
- avrotize/_version.py +34 -0
- avrotize/asn1toavro.py +160 -0
- avrotize/avrotize.py +152 -0
- avrotize/avrotocpp.py +483 -0
- avrotize/avrotocsharp.py +1075 -0
- avrotize/avrotocsv.py +121 -0
- avrotize/avrotodatapackage.py +173 -0
- avrotize/avrotodb.py +1383 -0
- avrotize/avrotogo.py +476 -0
- avrotize/avrotographql.py +197 -0
- avrotize/avrotoiceberg.py +210 -0
- avrotize/avrotojava.py +2156 -0
- avrotize/avrotojs.py +250 -0
- avrotize/avrotojsons.py +481 -0
- avrotize/avrotojstruct.py +345 -0
- avrotize/avrotokusto.py +364 -0
- avrotize/avrotomd.py +137 -0
- avrotize/avrotools.py +168 -0
- avrotize/avrotoparquet.py +208 -0
- avrotize/avrotoproto.py +359 -0
- avrotize/avrotopython.py +624 -0
- avrotize/avrotorust.py +435 -0
- avrotize/avrotots.py +598 -0
- avrotize/avrotoxsd.py +344 -0
- avrotize/cddltostructure.py +1841 -0
- avrotize/commands.json +3337 -0
- avrotize/common.py +834 -0
- avrotize/constants.py +72 -0
- avrotize/csvtoavro.py +132 -0
- avrotize/datapackagetoavro.py +76 -0
- avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
- avrotize/dependencies/typescript/node22/package.json +16 -0
- avrotize/dependency_resolver.py +348 -0
- avrotize/dependency_version.py +432 -0
- avrotize/jsonstoavro.py +2167 -0
- avrotize/jsonstostructure.py +2642 -0
- avrotize/jstructtoavro.py +878 -0
- avrotize/kstructtoavro.py +93 -0
- avrotize/kustotoavro.py +455 -0
- avrotize/parquettoavro.py +157 -0
- avrotize/proto2parser.py +498 -0
- avrotize/proto3parser.py +403 -0
- avrotize/prototoavro.py +382 -0
- avrotize/structuretocddl.py +597 -0
- avrotize/structuretocpp.py +697 -0
- avrotize/structuretocsharp.py +2295 -0
- avrotize/structuretocsv.py +365 -0
- avrotize/structuretodatapackage.py +659 -0
- avrotize/structuretodb.py +1125 -0
- avrotize/structuretogo.py +720 -0
- avrotize/structuretographql.py +502 -0
- avrotize/structuretoiceberg.py +355 -0
- avrotize/structuretojava.py +853 -0
- avrotize/structuretojsons.py +498 -0
- avrotize/structuretokusto.py +639 -0
- avrotize/structuretomd.py +322 -0
- avrotize/structuretoproto.py +764 -0
- avrotize/structuretopython.py +772 -0
- avrotize/structuretorust.py +714 -0
- avrotize/structuretots.py +653 -0
- avrotize/structuretoxsd.py +679 -0
- avrotize/xsdtoavro.py +413 -0
- structurize-2.19.0.dist-info/METADATA +107 -0
- structurize-2.19.0.dist-info/RECORD +70 -0
- structurize-2.19.0.dist-info/WHEEL +5 -0
- structurize-2.19.0.dist-info/entry_points.txt +2 -0
- structurize-2.19.0.dist-info/licenses/LICENSE +201 -0
- structurize-2.19.0.dist-info/top_level.txt +1 -0
avrotize/prototoavro.py
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module to convert Protobuf .proto files to Avro schema.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
from typing import Dict, List, Tuple
|
|
9
|
+
from avrotize.common import pascal
|
|
10
|
+
from avrotize.dependency_resolver import sort_messages_by_dependencies, inline_dependencies_of
|
|
11
|
+
from . import proto2parser
|
|
12
|
+
from . import proto3parser
|
|
13
|
+
|
|
14
|
+
AvroSchema = Dict[str, 'AvroSchema'] | List['AvroSchema'] | str | None
|
|
15
|
+
|
|
16
|
+
class ProtoToAvroConverter:
|
|
17
|
+
"""Class to convert Protobuf .proto files to Avro schema."""
|
|
18
|
+
|
|
19
|
+
isomorphic_types = ['float', 'double', 'bytes', 'string']
|
|
20
|
+
|
|
21
|
+
def __init__(self, proto_root: str = None):
|
|
22
|
+
"""Initialize ProtoToAvroConverter.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
proto_root (str): Optional root directory for resolving proto imports.
|
|
26
|
+
When provided, imports are resolved relative to this directory.
|
|
27
|
+
"""
|
|
28
|
+
self.imported_types: Dict[str, str] = {}
|
|
29
|
+
self.generated_types: Dict[str, str] = {}
|
|
30
|
+
self.forward_references: Dict[str, str] = {} # table for resolvbing forward references
|
|
31
|
+
self.proto_root: str = proto_root
|
|
32
|
+
|
|
33
|
+
def proto_type_to_avro_primitive(self, proto_type: str)-> Tuple[bool, str]:
|
|
34
|
+
"""
|
|
35
|
+
Map Protobuf types to Avro primitive types.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
proto_type (str): Protobuf type to convert.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
str or dict: Corresponding Avro type.
|
|
42
|
+
"""
|
|
43
|
+
mapping = {
|
|
44
|
+
'google.protobuf.Empty': 'null', # Special handling may be required
|
|
45
|
+
'bool': 'boolean',
|
|
46
|
+
'int32': 'int',
|
|
47
|
+
'uint32': 'int',
|
|
48
|
+
'sint32': 'int',
|
|
49
|
+
'int64': 'long',
|
|
50
|
+
'uint64': 'long',
|
|
51
|
+
'sint64': 'long',
|
|
52
|
+
'fixed32': 'int',
|
|
53
|
+
'fixed64': 'long',
|
|
54
|
+
'sfixed32': 'int',
|
|
55
|
+
'sfixed64': 'long',
|
|
56
|
+
'google.protobuf.Timestamp': {
|
|
57
|
+
"type": "long",
|
|
58
|
+
"logicalType": "timestamp-micros"
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
if proto_type in self.isomorphic_types:
|
|
62
|
+
return True, proto_type
|
|
63
|
+
mapped = mapping.get(proto_type, None)
|
|
64
|
+
if mapped:
|
|
65
|
+
return True, mapped
|
|
66
|
+
return False, proto_type
|
|
67
|
+
|
|
68
|
+
def build_forward_references_from_message(self, proto_message_type: proto2parser.Message | proto3parser.Message, avro_namespace: str):
|
|
69
|
+
"""
|
|
70
|
+
Build forward references from a Protobuf message.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
proto_message_type: The message type from the parsed proto file.
|
|
74
|
+
avro_namespace (str): The namespace for the message.
|
|
75
|
+
"""
|
|
76
|
+
for _, nested_message in proto_message_type.messages.items():
|
|
77
|
+
nested_namespace = avro_namespace + '.' + proto_message_type.name + '_types'
|
|
78
|
+
self.build_forward_references_from_message(nested_message, nested_namespace)
|
|
79
|
+
for _, enum_type in proto_message_type.enums.items():
|
|
80
|
+
nested_namespace = avro_namespace + '.' + proto_message_type.name + '_types'
|
|
81
|
+
self.forward_references[nested_namespace+'.'+enum_type.name] = "enum"
|
|
82
|
+
self.forward_references[avro_namespace+'.'+proto_message_type.name] = "record"
|
|
83
|
+
|
|
84
|
+
def build_forward_references_from_file(self, proto_file: proto3parser.ProtoFile| proto2parser.ProtoFile, avro_namespace: str):
|
|
85
|
+
"""
|
|
86
|
+
Build forward references from a Protobuf file.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
proto_file: The parsed proto file.
|
|
90
|
+
avro_namespace (str): The namespace for the message.
|
|
91
|
+
"""
|
|
92
|
+
for _, enum_type in proto_file.enums.items():
|
|
93
|
+
self.forward_references[avro_namespace+'.'+enum_type.name] = "enum"
|
|
94
|
+
for _, message in proto_file.messages.items():
|
|
95
|
+
self.build_forward_references_from_message(message, avro_namespace)
|
|
96
|
+
|
|
97
|
+
def convert_proto_to_avro_schema(self, proto_file_path: str, avro_namespace: str, message_type: str) -> list:
|
|
98
|
+
"""
|
|
99
|
+
Convert .proto file to Avro schema.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
proto_file_path (str): Path to the Protobuf .proto file.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
list: Avro schema as a list of dictionaries.
|
|
106
|
+
"""
|
|
107
|
+
with open(proto_file_path, 'r', encoding='utf-8') as proto_file:
|
|
108
|
+
proto_schema = proto_file.read()
|
|
109
|
+
|
|
110
|
+
# Determine whether we have proto3 or proto2 and parse the data
|
|
111
|
+
if re.search(r'syntax\s*=\s*"proto3"', proto_schema):
|
|
112
|
+
data: proto3parser.ProtoFile = proto3parser.parse(proto_schema)
|
|
113
|
+
else:
|
|
114
|
+
data: proto2parser.ProtoFile = proto2parser.parse(proto_schema)
|
|
115
|
+
|
|
116
|
+
# Build forward references
|
|
117
|
+
self.build_forward_references_from_file(data, avro_namespace)
|
|
118
|
+
# Avro schema header
|
|
119
|
+
avro_schema = []
|
|
120
|
+
for import_ in data.imports:
|
|
121
|
+
# Handle protobuf imports
|
|
122
|
+
if import_.startswith('google/protobuf/'):
|
|
123
|
+
script_path = os.path.dirname(os.path.abspath(__file__))
|
|
124
|
+
avsc_dir = os.path.join(script_path, 'prototypes')
|
|
125
|
+
# Load the corresponding avsc file from ./prototypes at this script's path into avro_schema
|
|
126
|
+
avsc = f'{avsc_dir}/{import_.replace("google/protobuf/", "").replace(".proto", ".avsc")}'
|
|
127
|
+
with open(avsc, 'r', encoding='utf-8') as avsc_file:
|
|
128
|
+
types = json.load(avsc_file)
|
|
129
|
+
for t in types:
|
|
130
|
+
qualified_name = t["namespace"] + "." + t["name"]
|
|
131
|
+
self.imported_types[qualified_name] = t
|
|
132
|
+
else:
|
|
133
|
+
# Resolve import path: try proto_root first, then fall back to file-relative path
|
|
134
|
+
import_path = None
|
|
135
|
+
|
|
136
|
+
if self.proto_root:
|
|
137
|
+
# Try resolving relative to proto_root
|
|
138
|
+
candidate_path = os.path.join(self.proto_root, import_)
|
|
139
|
+
if os.path.exists(candidate_path):
|
|
140
|
+
import_path = candidate_path
|
|
141
|
+
|
|
142
|
+
if not import_path:
|
|
143
|
+
# Fall back to resolving relative to the directory of the current proto file
|
|
144
|
+
cwd = os.path.join(os.getcwd(), os.path.dirname(proto_file_path))
|
|
145
|
+
candidate_path = os.path.join(cwd, import_)
|
|
146
|
+
if os.path.exists(candidate_path):
|
|
147
|
+
import_path = candidate_path
|
|
148
|
+
|
|
149
|
+
# Raise an exception if the imported file does not exist
|
|
150
|
+
if not import_path:
|
|
151
|
+
raise FileNotFoundError(f'Import file {import_} does not exist. Searched in proto_root: {self.proto_root}, and relative to: {os.path.dirname(proto_file_path)}')
|
|
152
|
+
|
|
153
|
+
package_name = pascal(import_.replace('.proto', ''))
|
|
154
|
+
import_namespace = (avro_namespace + '.' + package_name) if avro_namespace else package_name
|
|
155
|
+
avro_schema.extend(self.convert_proto_to_avro_schema(import_path, import_namespace, message_type))
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# Convert enum fields
|
|
159
|
+
for _, enum_type in data.enums.items():
|
|
160
|
+
self.handle_enum(enum_type, avro_schema, avro_namespace)
|
|
161
|
+
|
|
162
|
+
# Convert message fields
|
|
163
|
+
for _, m in data.messages.items():
|
|
164
|
+
self.handle_message(m, avro_schema, avro_namespace)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# Sort the messages in avro_schema by dependencies
|
|
168
|
+
if message_type:
|
|
169
|
+
message_schema = next(
|
|
170
|
+
(message for message in avro_schema if message['type'] == "record" and message['name'] == message_type), None)
|
|
171
|
+
if not message_schema:
|
|
172
|
+
raise ValueError(f'Message type {message_type} not found in the Avro schema.')
|
|
173
|
+
else:
|
|
174
|
+
inline_dependencies_of(avro_schema, message_schema)
|
|
175
|
+
return message_schema
|
|
176
|
+
else:
|
|
177
|
+
avro_schema = sort_messages_by_dependencies(avro_schema)
|
|
178
|
+
return avro_schema
|
|
179
|
+
|
|
180
|
+
@staticmethod
|
|
181
|
+
def clean_comment(comment: str):
|
|
182
|
+
"""
|
|
183
|
+
Clean comments by stripping slashes, newlines, linefeeds, and extra whitespace.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
comment (str): The comment to clean.
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
str: Cleaned comment.
|
|
190
|
+
"""
|
|
191
|
+
if comment:
|
|
192
|
+
return comment.replace('//', '').replace('\n', '').lstrip().rstrip()
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
def handle_enum(self, enum_type: proto2parser.Enum | proto3parser.Enum, avro_schema: AvroSchema, avro_namespace: str) -> AvroSchema:
|
|
196
|
+
"""
|
|
197
|
+
Convert enum fields to avro schema.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
enum_type: The enum type from the parsed proto file.
|
|
201
|
+
avro_schema (list): The list to append the converted enum schema.
|
|
202
|
+
namespace (str): The namespace for the enum.
|
|
203
|
+
"""
|
|
204
|
+
comment = self.clean_comment(
|
|
205
|
+
enum_type.comment.content if enum_type.comment and enum_type.comment.content else None)
|
|
206
|
+
|
|
207
|
+
# Create avro schema
|
|
208
|
+
avro_enum: AvroSchema = {
|
|
209
|
+
'name': enum_type.name,
|
|
210
|
+
'type': 'enum',
|
|
211
|
+
'namespace': avro_namespace,
|
|
212
|
+
'symbols': [],
|
|
213
|
+
'ordinals': {}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
if comment:
|
|
217
|
+
avro_enum['doc'] = comment
|
|
218
|
+
for value in enum_type.fields:
|
|
219
|
+
avro_enum['symbols'].append(value.name)
|
|
220
|
+
avro_enum['ordinals'][value.name] = int(value.number)
|
|
221
|
+
avro_schema.append(avro_enum)
|
|
222
|
+
self.generated_types[avro_enum['namespace']+'.'+avro_enum['name']] = "enum"
|
|
223
|
+
return avro_enum
|
|
224
|
+
|
|
225
|
+
def handle_message(self, proto_message_type: proto2parser.Message | proto3parser.Message, avro_schema: AvroSchema, avro_namespace: str)-> AvroSchema:
|
|
226
|
+
"""
|
|
227
|
+
Convert protobuf messages to avro records.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
m: The message type from the parsed proto file.
|
|
231
|
+
avro_schema (list): The list to append the converted message schema.
|
|
232
|
+
namespace (str): The namespace for the message.
|
|
233
|
+
"""
|
|
234
|
+
dependencies = []
|
|
235
|
+
|
|
236
|
+
comment = self.clean_comment(proto_message_type.comment.content if proto_message_type.comment and proto_message_type.comment.content else None)
|
|
237
|
+
avro_record: AvroSchema = {
|
|
238
|
+
'type': 'record',
|
|
239
|
+
'name': proto_message_type.name,
|
|
240
|
+
'namespace': avro_namespace,
|
|
241
|
+
'fields': []
|
|
242
|
+
}
|
|
243
|
+
if comment:
|
|
244
|
+
avro_record['doc'] = comment
|
|
245
|
+
for proto_field in proto_message_type.fields:
|
|
246
|
+
avro_type = self.get_avro_type_for_field(proto_message_type, avro_namespace, avro_schema, dependencies, proto_field)
|
|
247
|
+
comment = self.clean_comment(proto_field.comment.content if proto_field.comment and proto_field.comment.content else None)
|
|
248
|
+
|
|
249
|
+
avro_field = {
|
|
250
|
+
'name': proto_field.name,
|
|
251
|
+
'type': avro_type,
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
if comment:
|
|
255
|
+
avro_field['doc'] = comment
|
|
256
|
+
|
|
257
|
+
avro_record['fields'].append(avro_field)
|
|
258
|
+
|
|
259
|
+
for proto_field in proto_message_type.oneofs:
|
|
260
|
+
avro_oneof: AvroSchema = {
|
|
261
|
+
'name': proto_field.name,
|
|
262
|
+
'type': []
|
|
263
|
+
}
|
|
264
|
+
comment = self.clean_comment(proto_field.comment.content if proto_field.comment and proto_field.comment.content else None)
|
|
265
|
+
if comment:
|
|
266
|
+
avro_oneof['doc'] = comment
|
|
267
|
+
for oneof_field in proto_field.fields:
|
|
268
|
+
avro_type = self.get_avro_type_for_field(proto_message_type, avro_namespace, avro_schema, dependencies, oneof_field)
|
|
269
|
+
comment = self.clean_comment(oneof_field.comment.content if oneof_field.comment and oneof_field.comment.content else None)
|
|
270
|
+
if comment:
|
|
271
|
+
oneof_field['doc'] = comment
|
|
272
|
+
avro_oneof['type'].append(avro_type)
|
|
273
|
+
avro_record['fields'].append(avro_oneof)
|
|
274
|
+
|
|
275
|
+
if dependencies:
|
|
276
|
+
avro_record['dependencies'] = dependencies
|
|
277
|
+
avro_schema.append(avro_record)
|
|
278
|
+
for _, nested_message in proto_message_type.messages.items():
|
|
279
|
+
nested_namespace = avro_namespace + '.' + proto_message_type.name + '_types'
|
|
280
|
+
self.handle_message(nested_message, avro_schema, nested_namespace)
|
|
281
|
+
# Convert enum fields
|
|
282
|
+
for _, enum_type in proto_message_type.enums.items():
|
|
283
|
+
nested_namespace = avro_namespace + '.' + proto_message_type.name + '_types'
|
|
284
|
+
self.handle_enum(enum_type, avro_schema, nested_namespace)
|
|
285
|
+
self.generated_types[avro_record['namespace']+'.'+avro_record['name']] = "record"
|
|
286
|
+
return avro_record
|
|
287
|
+
|
|
288
|
+
def get_avro_type_for_field(self, proto_message_type: proto2parser.Message | proto3parser.Message, avro_namespace: str, avro_schema: AvroSchema, dependencies: List[str], proto_field: proto2parser.Field | proto3parser.Field):
|
|
289
|
+
"""
|
|
290
|
+
Get Avro type for a Protobuf field.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
m: The message type from the parsed proto file.
|
|
294
|
+
namespace (str): The namespace for the message.
|
|
295
|
+
dependencies (list): The list to append the dependencies.
|
|
296
|
+
f: The field from the parsed proto file.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
str or dict: Corresponding Avro type.
|
|
300
|
+
"""
|
|
301
|
+
avro_field_type: AvroSchema = None
|
|
302
|
+
proto_field_type = proto_field.val_type if proto_field.label == 'repeated' or proto_field.type == 'map' else proto_field.type
|
|
303
|
+
is_primitive, avro_field_type = self.proto_type_to_avro_primitive(proto_field_type)
|
|
304
|
+
|
|
305
|
+
if not is_primitive:
|
|
306
|
+
if proto_field.type in self.imported_types:
|
|
307
|
+
avro_field_type = self.imported_types[proto_field.type]
|
|
308
|
+
else:
|
|
309
|
+
avro_field_type = avro_namespace + '.' + avro_field_type
|
|
310
|
+
found_in_nested_definitions = False
|
|
311
|
+
for k, nested_proto_message_type in proto_message_type.messages.items():
|
|
312
|
+
nested_namespace = avro_namespace + '.' + proto_message_type.name + '_types'
|
|
313
|
+
if nested_proto_message_type.name == proto_field_type:
|
|
314
|
+
avro_field_type = self.handle_message(nested_proto_message_type, avro_schema, nested_namespace)
|
|
315
|
+
del proto_message_type.messages[k]
|
|
316
|
+
if 'dependencies' in avro_field_type:
|
|
317
|
+
dependencies.extend(avro_field_type['dependencies'])
|
|
318
|
+
del avro_field_type['dependencies']
|
|
319
|
+
found_in_nested_definitions = True
|
|
320
|
+
break
|
|
321
|
+
if not found_in_nested_definitions:
|
|
322
|
+
for k, nested_proto_enum_type in proto_message_type.enums.items():
|
|
323
|
+
nested_namespace = avro_namespace + '.' + proto_message_type.name + '_types'
|
|
324
|
+
if nested_proto_enum_type.name == proto_field_type:
|
|
325
|
+
avro_field_type = self.handle_enum(nested_proto_enum_type, avro_schema, nested_namespace)
|
|
326
|
+
del proto_message_type.enums[k]
|
|
327
|
+
found_in_nested_definitions = True
|
|
328
|
+
break
|
|
329
|
+
if not found_in_nested_definitions:
|
|
330
|
+
dependency_avro_field_type = avro_field_type
|
|
331
|
+
while '.' in dependency_avro_field_type:
|
|
332
|
+
if dependency_avro_field_type in self.forward_references:
|
|
333
|
+
dependencies.append(dependency_avro_field_type)
|
|
334
|
+
break
|
|
335
|
+
n = dependency_avro_field_type.split('.')
|
|
336
|
+
dependency_avro_field_type = '.'.join(n[:-2]+[n[-1]])
|
|
337
|
+
|
|
338
|
+
if proto_field.label == 'optional':
|
|
339
|
+
avro_field_type = ["null", avro_field_type]
|
|
340
|
+
if proto_field.label == 'repeated':
|
|
341
|
+
avro_type: AvroSchema = {
|
|
342
|
+
"type": "array",
|
|
343
|
+
"items": avro_field_type
|
|
344
|
+
}
|
|
345
|
+
elif proto_field.type == 'map':
|
|
346
|
+
avro_type: AvroSchema = {
|
|
347
|
+
"type": "map",
|
|
348
|
+
"values": avro_field_type,
|
|
349
|
+
}
|
|
350
|
+
else:
|
|
351
|
+
avro_type = avro_field_type
|
|
352
|
+
return avro_type
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def convert_proto_to_avro(proto_file_path: str, avro_schema_path: str, namespace: str = None, message_type: str = None, proto_root: str = None):
|
|
356
|
+
"""
|
|
357
|
+
Convert Protobuf .proto file to Avro schema.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
proto_file_path (str): Path to the Protobuf .proto file.
|
|
361
|
+
avro_schema_path (str): Path to save the Avro schema .avsc file.
|
|
362
|
+
namespace (str): Optional namespace for the Avro schema.
|
|
363
|
+
message_type (str): Optional specific message type to extract.
|
|
364
|
+
proto_root (str): Optional root directory for resolving proto imports.
|
|
365
|
+
When provided, imports are resolved relative to this directory.
|
|
366
|
+
|
|
367
|
+
Raises:
|
|
368
|
+
FileNotFoundError: If the proto file does not exist.
|
|
369
|
+
ValueError: If the file extensions are incorrect.
|
|
370
|
+
"""
|
|
371
|
+
if not os.path.exists(proto_file_path):
|
|
372
|
+
raise FileNotFoundError(f'Proto file {proto_file_path} does not exist.')
|
|
373
|
+
|
|
374
|
+
converter = ProtoToAvroConverter(proto_root=proto_root)
|
|
375
|
+
if not namespace:
|
|
376
|
+
namespace = pascal(os.path.basename(proto_file_path).replace('.proto', ''))
|
|
377
|
+
avro_schema = converter.convert_proto_to_avro_schema(proto_file_path, namespace, message_type)
|
|
378
|
+
|
|
379
|
+
# Convert the Avro schema to JSON and write it to the file
|
|
380
|
+
with open(avro_schema_path, 'w', encoding='utf-8') as avro_file:
|
|
381
|
+
avro_file.write(json.dumps(avro_schema, indent=2))
|
|
382
|
+
|