structurize 2.16.2__py3-none-any.whl → 2.16.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. avrotize/__init__.py +63 -63
  2. avrotize/__main__.py +5 -5
  3. avrotize/_version.py +34 -34
  4. avrotize/asn1toavro.py +160 -160
  5. avrotize/avrotize.py +152 -152
  6. avrotize/avrotocpp.py +483 -483
  7. avrotize/avrotocsharp.py +992 -992
  8. avrotize/avrotocsv.py +121 -121
  9. avrotize/avrotodatapackage.py +173 -173
  10. avrotize/avrotodb.py +1383 -1383
  11. avrotize/avrotogo.py +476 -476
  12. avrotize/avrotographql.py +197 -197
  13. avrotize/avrotoiceberg.py +210 -210
  14. avrotize/avrotojava.py +1023 -1023
  15. avrotize/avrotojs.py +250 -250
  16. avrotize/avrotojsons.py +481 -481
  17. avrotize/avrotojstruct.py +345 -345
  18. avrotize/avrotokusto.py +363 -363
  19. avrotize/avrotomd.py +137 -137
  20. avrotize/avrotools.py +168 -168
  21. avrotize/avrotoparquet.py +208 -208
  22. avrotize/avrotoproto.py +358 -358
  23. avrotize/avrotopython.py +622 -622
  24. avrotize/avrotorust.py +435 -435
  25. avrotize/avrotots.py +598 -598
  26. avrotize/avrotoxsd.py +344 -344
  27. avrotize/commands.json +2493 -2433
  28. avrotize/common.py +828 -828
  29. avrotize/constants.py +4 -4
  30. avrotize/csvtoavro.py +131 -131
  31. avrotize/datapackagetoavro.py +76 -76
  32. avrotize/dependency_resolver.py +348 -348
  33. avrotize/jsonstoavro.py +1698 -1698
  34. avrotize/jsonstostructure.py +2642 -2642
  35. avrotize/jstructtoavro.py +878 -878
  36. avrotize/kstructtoavro.py +93 -93
  37. avrotize/kustotoavro.py +455 -455
  38. avrotize/parquettoavro.py +157 -157
  39. avrotize/proto2parser.py +497 -497
  40. avrotize/proto3parser.py +402 -402
  41. avrotize/prototoavro.py +382 -382
  42. avrotize/structuretocsharp.py +2005 -2005
  43. avrotize/structuretojsons.py +498 -498
  44. avrotize/structuretopython.py +772 -772
  45. avrotize/structuretots.py +653 -0
  46. avrotize/xsdtoavro.py +413 -413
  47. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/METADATA +848 -805
  48. structurize-2.16.5.dist-info/RECORD +52 -0
  49. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/licenses/LICENSE +200 -200
  50. structurize-2.16.2.dist-info/RECORD +0 -51
  51. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/WHEEL +0 -0
  52. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/entry_points.txt +0 -0
  53. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/top_level.txt +0 -0
avrotize/proto2parser.py CHANGED
@@ -1,498 +1,498 @@
1
- #!/usr/bin/env python
2
- # Licensed to the Apache Software Foundation (ASF) under one
3
- # or more contributor license agreements. See the NOTICE file
4
- # distributed with this work for additional information
5
- # regarding copyright ownership. The ASF licenses this file
6
- # to you under the Apache License, Version 2.0 (the
7
- # "License"); you may not use this file except in compliance
8
- # with the License. You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing,
13
- # software distributed under the License is distributed on an
14
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- # KIND, either express or implied. See the License for the
16
- # specific language governing permissions and limitations
17
- # under the License.
18
- # adapted from: https://github.com/xophiix/proto2parser/
19
-
20
- from lark import Lark, Transformer, Tree, Token
21
- from collections import namedtuple
22
- import typing
23
- import json
24
-
25
- BNF = r'''
26
- OCTALDIGIT: "0..7"
27
- IDENT: ( "_" )* LETTER ( LETTER | DECIMALDIGIT | "_" )*
28
- FULLIDENT: IDENT ( "." IDENT )*
29
- MESSAGENAME: IDENT
30
- ENUMNAME: IDENT
31
- FIELDNAME: IDENT
32
- ONEOFNAME: IDENT
33
- MAPNAME: IDENT
34
- SERVICENAME: IDENT
35
- TAGNAME: IDENT
36
- TAGVALUE: IDENT
37
- RPCNAME: IDENT
38
- MESSAGETYPE: [ "." ] ( IDENT "." )* MESSAGENAME
39
- ENUMTYPE: [ "." ] ( IDENT "." )* ENUMNAME
40
- CAPITALLETTER: "A..Z"
41
- GROUPNAME: CAPITALLETTER ( LETTER | DECIMALDIGIT | "_" )*
42
-
43
- INTLIT : DECIMALLIT | OCTALLIT | HEXLIT
44
- DECIMALLIT: ( "1".."9" ) ( DECIMALDIGIT )*
45
- OCTALLIT : "0" ( OCTALDIGIT )*
46
- HEXLIT : "0" ( "x" | "X" ) HEXDIGIT ( HEXDIGIT )*
47
-
48
- FLOATLIT: ( DECIMALS "." [ DECIMALS ] [ EXPONENT ] | DECIMALS EXPONENT | "."DECIMALS [ EXPONENT ] ) | "inf" | "nan"
49
- DECIMALS : DECIMALDIGIT ( DECIMALDIGIT )*
50
- EXPONENT : ( "e" | "E" ) [ "+" | "-" ] DECIMALS
51
-
52
- BOOLLIT: "true" | "false"
53
-
54
- STRLIT: ( "'" ( CHARVALUE )* "'" ) | ( "\"" ( CHARVALUE )* "\"" )
55
- CHARVALUE: HEXESCAPE | OCTESCAPE | CHARESCAPE | /[^\0\n\\]/
56
- HEXESCAPE: "\\" ( "x" | "X" ) HEXDIGIT HEXDIGIT
57
- OCTESCAPE: "\\" OCTALDIGIT OCTALDIGIT OCTALDIGIT
58
- CHARESCAPE: "\\" ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | "\\" | "'" | "\"" )
59
- QUOTE: "'" | "\""
60
-
61
- EMPTYSTATEMENT: ";"
62
-
63
- CONSTANT: FULLIDENT | ( [ "-" | "+" ] INTLIT ) | ( [ "-" | "+" ] FLOATLIT ) | STRLIT | BOOLLIT
64
-
65
- syntax: "syntax" "=" QUOTE "proto2" QUOTE tail
66
-
67
- import: "import" [ "weak" | "public" ] STRLIT tail
68
-
69
- package: "package" FULLIDENT tail
70
-
71
- option: [ comments ] "option" OPTIONNAME "=" CONSTANT tail
72
- OPTIONNAME: ( IDENT | "(" FULLIDENT ")" ) ( "." IDENT )*
73
-
74
- LABEL: "required" | "optional" | "repeated"
75
- TYPE: "double" | "float" | "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" | "bytes" | MESSAGETYPE | ENUMTYPE
76
- FIELDNUMBER: INTLIT
77
-
78
- field: [ comments ] LABEL TYPE FIELDNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] tail
79
- fieldoptions: fieldoption ( "," fieldoption )*
80
- fieldoption: OPTIONNAME "=" CONSTANT
81
-
82
- oneof: [ comments ] "oneof" ONEOFNAME "{" ( oneoffield | EMPTYSTATEMENT )* "}"
83
- oneoffield: [ comments ] TYPE FIELDNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] tail
84
- group: [ comments ] LABEL "group" GROUPNAME "=" FIELDNUMBER messagebody
85
-
86
- mapfield: [ comments ] "map" "<" KEYTYPE "," TYPE ">" MAPNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] tail
87
- KEYTYPE: "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string"
88
-
89
- extensions: [ comments ] "extensions" ranges tail
90
-
91
- reserved: [ comments ] "reserved" ( ranges | fieldnames ) tail
92
- ranges: range ( "," range )*
93
- range: INTLIT [ "to" ( INTLIT | "max" ) ]
94
- fieldnames: FIELDNAME ( "," FIELDNAME )*
95
-
96
- enum: [ comments ] "enum" ENUMNAME enumbody
97
- enumbody: "{" ( option | enumfield | reserved | EMPTYSTATEMENT )* "}"
98
- enumfield: [ comments ] IDENT "=" [ "-" ] INTLIT [ "[" enumvalueoption ( "," enumvalueoption )* "]" ] tail
99
- enumvalueoption: OPTIONNAME "=" CONSTANT
100
-
101
- message: [ comments ] "message" MESSAGENAME messagebody
102
- messagebody: "{" ( field | enum | message | extend | extensions | option | oneof | mapfield | reserved | group | EMPTYSTATEMENT )* "}"
103
- extend: [ comments ] "extend" MESSAGETYPE "{" (field | group)* "}"
104
-
105
- service: [ comments ] "service" SERVICENAME "{" ( option | rpc | EMPTYSTATEMENT )* "}"
106
- rpc: [ comments ] "rpc" RPCNAME "(" [ "stream" ] MESSAGETYPE ")" "returns" "(" [ "stream" ] MESSAGETYPE ")" ( ( "{" ( option | EMPTYSTATEMENT )* "}" ) | ";" )
107
-
108
- proto:[ comments ] [ syntax ] ( import | package | option | topleveldef | EMPTYSTATEMENT )*
109
- topleveldef: message | enum | extend | service
110
-
111
- tail: ";" /[\s|\t]/* [ trail_comment ] NEWLINE
112
- trail_comment: COMMENT
113
- COMMENT: "//" /[^\n]/*
114
- BLOCKCOMMENT: "/*" /./* "*/"
115
- comments: (( COMMENT | BLOCKCOMMENT ) NEWLINE )+
116
-
117
- %import common.HEXDIGIT
118
- %import common.DIGIT -> DECIMALDIGIT
119
- %import common.LETTER
120
- %import common.WS
121
- %import common.NEWLINE
122
- %ignore WS
123
- '''
124
-
125
- Tail = typing.NamedTuple('Tail', [('comment', 'Comment')])
126
- Comment = typing.NamedTuple('Comment', [('content', str), ('tags', typing.Dict[str, typing.Any]), ('ue_specifiers', str)])
127
- Oneof = typing.NamedTuple('Oneof', [('comment', 'Comment'), ('name', str), ('fields', typing.List['Field'])])
128
- FieldOption = typing.NamedTuple('FieldOption', [('name', str), ('content', str)])
129
- Field = typing.NamedTuple('Field', [('comment', 'Comment'), ('label', str), ('type', str), ('key_type', str), ('val_type', str), ('name', str), ('number', int), ('options', typing.Dict[str, 'FieldOption']), ('user_data', typing.Dict[str, typing.Any])])
130
- Enum = typing.NamedTuple('Enum', [('comment', 'Comment'), ('name', str), ('fields', typing.Dict[str, 'Field']), ('user_data', typing.Dict[str, typing.Any])])
131
- Option = typing.NamedTuple('Option', [('comment', 'Comment'), ('name', str), ('content', str)])
132
- Message = typing.NamedTuple('Message', [('comment', 'Comment'), ('name', str), ('fields', typing.List['Field']), ('oneofs', typing.List['Oneof']),
133
- ('messages', typing.Dict[str, 'Message']), ('enums', typing.Dict[str, 'Enum']), ('options', typing.Dict[str, 'Option']), ('user_data', typing.Dict[str, typing.Any])])
134
- Service = typing.NamedTuple('Service', [('name', str), ('functions', typing.Dict[str, 'RpcFunc'])])
135
- RpcFunc = typing.NamedTuple('RpcFunc', [('name', str), ('in_type', str), ('out_type', str), ('uri', str)])
136
- ProtoFile = typing.NamedTuple('ProtoFile',
137
- [('messages', typing.Dict[str, 'Message']), ('enums', typing.Dict[str, 'Enum']),
138
- ('services', typing.Dict[str, 'Service']), ('imports', typing.List[str]),
139
- ('options', typing.Dict[str, str]), ('package', str), ('user_data', typing.Dict[str, typing.Any])])
140
-
141
-
142
- def merge_comments(comments):
143
- content = ""
144
- tags = {}
145
- ue_specifiers = None
146
-
147
- for comment in comments:
148
- content += comment.content
149
- for tag, value in comment.tags.items():
150
- tags[tag] = value
151
- if not ue_specifiers and comment.ue_specifiers:
152
- ue_specifiers = comment.ue_specifiers
153
-
154
- return Comment(content, tags, ue_specifiers)
155
-
156
- def extrat_comments(tokens):
157
- comments = []
158
- for token in tokens:
159
- if isinstance(token, Comment):
160
- comments.append(token)
161
- elif isinstance(token, Tail):
162
- if token.comment:
163
- comments.append(token.comment)
164
- elif isinstance(token, Token):
165
- if token.type == "COMMENT":
166
- comments.append(Comment(token.value, {}, None))
167
-
168
- return merge_comments(comments)
169
-
170
- class ProtoTransformer(Transformer):
171
- '''Converts syntax tree token into more easily usable namedtuple objects'''
172
- def message(self, tokens):
173
- '''Returns a Message namedtuple'''
174
- comment = Comment("", {}, None)
175
- if len(tokens) < 3:
176
- name_token, body = tokens
177
- else:
178
- comment, name_token, body = tokens
179
- return Message(comment, name_token.value, *body, {})
180
-
181
- def oneof(self, tokens):
182
- '''Returns a Oneof namedtuple'''
183
- comment = Comment("", {})
184
- fields = []
185
- name = None
186
- for token in tokens:
187
- if isinstance(token, Comment):
188
- comment = token
189
- elif isinstance(token, Field):
190
- fields.append(token)
191
- elif isinstance(token, Token) and token.type == 'ONEOFNAME':
192
- name = token.value
193
- return Oneof(comment, name, fields)
194
-
195
- def oneoffield(self, tokens):
196
- '''Returns a Field namedtuple'''
197
- comment = Comment("", {})
198
- type = Token("TYPE", "")
199
- fieldname = Token("FIELDNAME", "")
200
- fieldnumber = Token("FIELDNUMBER", "")
201
- for token in tokens:
202
- if isinstance(token, Comment):
203
- comment = token
204
- elif isinstance(token, Token):
205
- if token.type == "TYPE":
206
- type = token
207
- elif token.type == "FIELDNAME":
208
- fieldname = token
209
- elif token.type == "FIELDNUMBER":
210
- fieldnumber = token
211
- elif token.type == "COMMENT":
212
- comment = Comment(token.value, {})
213
- return Field(comment, type.value, type.value, type.value, fieldname.value, int(fieldnumber.value))
214
-
215
- def fieldoption(self, tokens):
216
- name = Token("TYPE", "")
217
- content = Token("", "")
218
- for token in tokens:
219
- if isinstance(token, Token):
220
- if token.type == "OPTIONNAME":
221
- name.value = token.value.strip("()")
222
- if token.type == "CONSTANT":
223
- content = token
224
-
225
- return FieldOption(name, content)
226
-
227
- def enumvalueoption(self, tokens):
228
- return self.fieldoption(tokens)
229
-
230
- def option(self, tokens):
231
- name = Token("TYPE", "")
232
- content = Token("", "")
233
- comment = extrat_comments(tokens)
234
- for token in tokens:
235
- if isinstance(token, Comment):
236
- comment = token
237
- elif isinstance(token, Token):
238
- if token.type == "OPTIONNAME":
239
- name.value = token.value.strip("()")
240
- if token.type == "CONSTANT":
241
- content = token
242
-
243
- return Option(comment, name, content)
244
-
245
- def messagebody(self, items):
246
- '''Returns a tuple of message body namedtuples'''
247
- messages = {}
248
- enums = {}
249
- fields = []
250
- options = {}
251
- oneofs = []
252
- for item in items:
253
- if isinstance(item, Message):
254
- messages[item.name] = item
255
- elif isinstance(item, Enum):
256
- enums[item.name] = item
257
- elif isinstance(item, Field):
258
- fields.append(item)
259
- elif isinstance(item, Option):
260
- options[item.name] = item
261
- elif isinstance(item, Oneof):
262
- oneofs.append(item)
263
-
264
- return fields, oneofs, messages, enums, options
265
-
266
- def tail(self, tokens):
267
- comment = None
268
- for token in tokens:
269
- if isinstance(token, Comment):
270
- comment = token
271
-
272
- return Tail(comment)
273
-
274
- def field(self, tokens):
275
- '''Returns a Field namedtuple'''
276
- type = Token("TYPE", "")
277
- label = Token("LABEL", "")
278
- fieldname = Token("FIELDNAME", "")
279
- fieldnumber = Token("FIELDNUMBER", "")
280
- options = {}
281
- for token in tokens:
282
- if isinstance(token, Tree) and token.data == 'fieldoptions':
283
- for fieldoption in token.children:
284
- if isinstance(fieldoption, FieldOption):
285
- options[fieldoption.name.value] = fieldoption
286
- elif isinstance(token, Token):
287
- if token.type == "TYPE":
288
- type = token
289
- elif token.type == "LABEL":
290
- label = token
291
- elif token.type == "FIELDNAME":
292
- fieldname = token
293
- elif token.type == "FIELDNUMBER":
294
- fieldnumber = token
295
-
296
- return Field(extrat_comments(tokens), label.value, type.value, type.value, type.value, fieldname.value, int(fieldnumber.value), options, {})
297
-
298
- def mapfield(self, tokens):
299
- '''Returns a Field namedtuple'''
300
- val_type = Token("TYPE", "")
301
- key_type = Token("KEYTYPE", "")
302
- fieldname = Token("MAPNAME", "")
303
- fieldnumber = Token("FIELDNUMBER", "")
304
- options = {}
305
- for token in tokens:
306
- if isinstance(token, Tree) and token.data == 'fieldoptions':
307
- for fieldoption in token.children:
308
- if isinstance(fieldoption, FieldOption):
309
- options[token.name] = token
310
- elif isinstance(token, Token):
311
- if token.type == "TYPE":
312
- val_type = token
313
- elif token.type == "KEYTYPE":
314
- key_type = token
315
- elif token.type == "MAPNAME":
316
- fieldname = token
317
- elif token.type == "FIELDNUMBER":
318
- fieldnumber = token
319
- return Field(extrat_comments(tokens), '', 'map', key_type.value, val_type.value, fieldname.value, int(fieldnumber.value), options, {})
320
-
321
- def comments(self, tokens):
322
- '''Returns a Tag namedtuple'''
323
- comment = ''
324
- tags = {}
325
- ue_specifier = None
326
- for token in tokens:
327
- if token is None:
328
- continue
329
-
330
- token_str = ""
331
- if isinstance(token, Token):
332
- token_str = token.value
333
- else:
334
- token_str = token
335
-
336
- if token_str.find("//") >= 0:
337
- comment_content = token_str.replace("//", "").strip(" /\n")
338
- if comment_content.startswith("UPROPERTY") or comment_content.startswith("UCLASS") or comment_content.startswith("UENUM"):
339
- ue_specifier = comment_content
340
- continue
341
-
342
- comment += token_str + "\n"
343
- if token_str.find('@') < 0:
344
- continue
345
- kvs = token_str.strip(" /\n").split('@')
346
- for kv in kvs:
347
- kv = kv.strip(" /\n")
348
- if not kv:
349
- continue
350
- tmp = kv.split('=')
351
- key = tmp[0].strip(" /\n").lower()
352
- if key.find(" ") >= 0:
353
- continue
354
- if len(tmp) > 1:
355
- tags[key] = tmp[1].lower()
356
- else:
357
- tags[key] = True
358
- return Comment(comment, tags, ue_specifier)
359
-
360
- def trail_comment(self, tokens):
361
- if len(tokens) > 0:
362
- return Comment(tokens[0].value, {}, None)
363
- else:
364
- return Comment("", {}, None)
365
-
366
- def enum(self, tokens):
367
- '''Returns an Enum namedtuple'''
368
- comment = Comment("", {}, None)
369
- if len(tokens) < 3:
370
- name, fields = tokens
371
- else:
372
- comment, name, fields = tokens
373
- return Enum(comment, name.value, fields, {})
374
-
375
- def enumbody(self, tokens):
376
- '''Returns a sequence of enum identifiers'''
377
- enumitems = []
378
- for tree in tokens:
379
- if tree.data != 'enumfield':
380
- continue
381
- name = Token("IDENT", "")
382
- value = Token("INTLIT", "")
383
- options = {}
384
- for token in tree.children:
385
- if isinstance(token, Tree) and token.data == 'enumvalueoption':
386
- for enumvalueoption in token.children:
387
- if isinstance(enumvalueoption, FieldOption):
388
- options[token.name] = token
389
- elif isinstance(token, Token):
390
- if token.type == "IDENT":
391
- name = token
392
- elif token.type == "INTLIT":
393
- value = token
394
- enumitems.append(Field(extrat_comments(tree.children), '', 'enum', 'enum', 'enum', name.value, value.value, options, {}))
395
- return enumitems
396
-
397
- def service(self, tokens):
398
- '''Returns a Service namedtuple'''
399
- functions = []
400
- name = ''
401
- for i in range(0, len(tokens)):
402
- if not isinstance(tokens[i], Comment):
403
- if isinstance(tokens[i], RpcFunc):
404
- functions.append(tokens[i])
405
- else:
406
- name = tokens[i].value
407
- return Service(name, functions)
408
-
409
- def rpc(self, tokens):
410
- '''Returns a RpcFunc namedtuple'''
411
- uri = ''
412
- in_type = ''
413
- for token in tokens:
414
- if isinstance(token, Token):
415
- if token.type == "RPCNAME":
416
- name = token
417
- elif token.type == "MESSAGETYPE":
418
- if in_type:
419
- out_type = token
420
- else:
421
- in_type = token
422
- elif not isinstance(token, Comment):
423
- option_token = token
424
- uri = option_token.children[0].value
425
- return RpcFunc(name.value, in_type.value, out_type.value, uri.strip('"'))
426
-
427
-
428
- def _recursive_to_dict(obj):
429
- _dict = {}
430
-
431
- if isinstance(obj, tuple):
432
- node = obj._asdict()
433
- for item in node:
434
- if isinstance(node[item], list): # Process as a list
435
- _dict[item] = [_recursive_to_dict(x) for x in (node[item])]
436
- elif isinstance(node[item], tuple): # Process as a NamedTuple
437
- _dict[item] = _recursive_to_dict(node[item])
438
- elif isinstance(node[item], dict):
439
- for k in node[item]:
440
- if isinstance(node[item][k], tuple):
441
- node[item][k] = _recursive_to_dict(node[item][k])
442
- _dict[item] = node[item]
443
- else: # Process as a regular element
444
- _dict[item] = (node[item])
445
- return _dict
446
-
447
-
448
- def parse_from_file(file: str, encoding: str="utf-8"):
449
- with open(file, 'r', encoding=encoding) as f:
450
- data = f.read()
451
- if data:
452
- return parse(data)
453
-
454
-
455
- def parse(data: str):
456
- parser = Lark(BNF, start='proto', parser='earley', debug=True)
457
- tree = parser.parse(data)
458
- trans_tree = ProtoTransformer().transform(tree)
459
- enums = {}
460
- messages = {}
461
- services = {}
462
- imports = []
463
- import_tree = trans_tree.find_data('import')
464
- for tree in import_tree:
465
- for child in tree.children:
466
- if isinstance(child, Token):
467
- imports.append(child.value.strip('"'))
468
- options = {}
469
- option_tree = trans_tree.find_data('option')
470
- for tree in option_tree:
471
- options[tree.children[0]] = tree.children[1].strip('"')
472
-
473
- package = ''
474
- package_tree = trans_tree.find_data('package')
475
- for tree in package_tree:
476
- package = tree.children[0]
477
-
478
- top_data = trans_tree.find_data('topleveldef')
479
- for top_level in top_data:
480
- for child in top_level.children:
481
- if isinstance(child, Message):
482
- messages[child.name] = child
483
- if isinstance(child, Enum):
484
- enums[child.name] = child
485
- if isinstance(child, Service):
486
- services[child.name] = child
487
- return ProtoFile(messages, enums, services, imports, options, package, {})
488
-
489
-
490
- def serialize2json(data):
491
- return json.dumps(_recursive_to_dict(parse(data)))
492
-
493
-
494
- def serialize2json_from_file(file: str, encoding: str="utf-8"):
495
- with open(file, 'r', encoding=encoding) as f:
496
- data = f.read()
497
- if data:
1
+ #!/usr/bin/env python
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+ # adapted from: https://github.com/xophiix/proto2parser/
19
+
20
+ from lark import Lark, Transformer, Tree, Token
21
+ from collections import namedtuple
22
+ import typing
23
+ import json
24
+
25
+ BNF = r'''
26
+ OCTALDIGIT: "0..7"
27
+ IDENT: ( "_" )* LETTER ( LETTER | DECIMALDIGIT | "_" )*
28
+ FULLIDENT: IDENT ( "." IDENT )*
29
+ MESSAGENAME: IDENT
30
+ ENUMNAME: IDENT
31
+ FIELDNAME: IDENT
32
+ ONEOFNAME: IDENT
33
+ MAPNAME: IDENT
34
+ SERVICENAME: IDENT
35
+ TAGNAME: IDENT
36
+ TAGVALUE: IDENT
37
+ RPCNAME: IDENT
38
+ MESSAGETYPE: [ "." ] ( IDENT "." )* MESSAGENAME
39
+ ENUMTYPE: [ "." ] ( IDENT "." )* ENUMNAME
40
+ CAPITALLETTER: "A..Z"
41
+ GROUPNAME: CAPITALLETTER ( LETTER | DECIMALDIGIT | "_" )*
42
+
43
+ INTLIT : DECIMALLIT | OCTALLIT | HEXLIT
44
+ DECIMALLIT: ( "1".."9" ) ( DECIMALDIGIT )*
45
+ OCTALLIT : "0" ( OCTALDIGIT )*
46
+ HEXLIT : "0" ( "x" | "X" ) HEXDIGIT ( HEXDIGIT )*
47
+
48
+ FLOATLIT: ( DECIMALS "." [ DECIMALS ] [ EXPONENT ] | DECIMALS EXPONENT | "."DECIMALS [ EXPONENT ] ) | "inf" | "nan"
49
+ DECIMALS : DECIMALDIGIT ( DECIMALDIGIT )*
50
+ EXPONENT : ( "e" | "E" ) [ "+" | "-" ] DECIMALS
51
+
52
+ BOOLLIT: "true" | "false"
53
+
54
+ STRLIT: ( "'" ( CHARVALUE )* "'" ) | ( "\"" ( CHARVALUE )* "\"" )
55
+ CHARVALUE: HEXESCAPE | OCTESCAPE | CHARESCAPE | /[^\0\n\\]/
56
+ HEXESCAPE: "\\" ( "x" | "X" ) HEXDIGIT HEXDIGIT
57
+ OCTESCAPE: "\\" OCTALDIGIT OCTALDIGIT OCTALDIGIT
58
+ CHARESCAPE: "\\" ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | "\\" | "'" | "\"" )
59
+ QUOTE: "'" | "\""
60
+
61
+ EMPTYSTATEMENT: ";"
62
+
63
+ CONSTANT: FULLIDENT | ( [ "-" | "+" ] INTLIT ) | ( [ "-" | "+" ] FLOATLIT ) | STRLIT | BOOLLIT
64
+
65
+ syntax: "syntax" "=" QUOTE "proto2" QUOTE tail
66
+
67
+ import: "import" [ "weak" | "public" ] STRLIT tail
68
+
69
+ package: "package" FULLIDENT tail
70
+
71
+ option: [ comments ] "option" OPTIONNAME "=" CONSTANT tail
72
+ OPTIONNAME: ( IDENT | "(" FULLIDENT ")" ) ( "." IDENT )*
73
+
74
+ LABEL: "required" | "optional" | "repeated"
75
+ TYPE: "double" | "float" | "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" | "bytes" | MESSAGETYPE | ENUMTYPE
76
+ FIELDNUMBER: INTLIT
77
+
78
+ field: [ comments ] LABEL TYPE FIELDNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] tail
79
+ fieldoptions: fieldoption ( "," fieldoption )*
80
+ fieldoption: OPTIONNAME "=" CONSTANT
81
+
82
+ oneof: [ comments ] "oneof" ONEOFNAME "{" ( oneoffield | EMPTYSTATEMENT )* "}"
83
+ oneoffield: [ comments ] TYPE FIELDNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] tail
84
+ group: [ comments ] LABEL "group" GROUPNAME "=" FIELDNUMBER messagebody
85
+
86
+ mapfield: [ comments ] "map" "<" KEYTYPE "," TYPE ">" MAPNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] tail
87
+ KEYTYPE: "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string"
88
+
89
+ extensions: [ comments ] "extensions" ranges tail
90
+
91
+ reserved: [ comments ] "reserved" ( ranges | fieldnames ) tail
92
+ ranges: range ( "," range )*
93
+ range: INTLIT [ "to" ( INTLIT | "max" ) ]
94
+ fieldnames: FIELDNAME ( "," FIELDNAME )*
95
+
96
+ enum: [ comments ] "enum" ENUMNAME enumbody
97
+ enumbody: "{" ( option | enumfield | reserved | EMPTYSTATEMENT )* "}"
98
+ enumfield: [ comments ] IDENT "=" [ "-" ] INTLIT [ "[" enumvalueoption ( "," enumvalueoption )* "]" ] tail
99
+ enumvalueoption: OPTIONNAME "=" CONSTANT
100
+
101
+ message: [ comments ] "message" MESSAGENAME messagebody
102
+ messagebody: "{" ( field | enum | message | extend | extensions | option | oneof | mapfield | reserved | group | EMPTYSTATEMENT )* "}"
103
+ extend: [ comments ] "extend" MESSAGETYPE "{" (field | group)* "}"
104
+
105
+ service: [ comments ] "service" SERVICENAME "{" ( option | rpc | EMPTYSTATEMENT )* "}"
106
+ rpc: [ comments ] "rpc" RPCNAME "(" [ "stream" ] MESSAGETYPE ")" "returns" "(" [ "stream" ] MESSAGETYPE ")" ( ( "{" ( option | EMPTYSTATEMENT )* "}" ) | ";" )
107
+
108
+ proto:[ comments ] [ syntax ] ( import | package | option | topleveldef | EMPTYSTATEMENT )*
109
+ topleveldef: message | enum | extend | service
110
+
111
+ tail: ";" /[\s|\t]/* [ trail_comment ] NEWLINE
112
+ trail_comment: COMMENT
113
+ COMMENT: "//" /[^\n]/*
114
+ BLOCKCOMMENT: "/*" /./* "*/"
115
+ comments: (( COMMENT | BLOCKCOMMENT ) NEWLINE )+
116
+
117
+ %import common.HEXDIGIT
118
+ %import common.DIGIT -> DECIMALDIGIT
119
+ %import common.LETTER
120
+ %import common.WS
121
+ %import common.NEWLINE
122
+ %ignore WS
123
+ '''
124
+
125
+ Tail = typing.NamedTuple('Tail', [('comment', 'Comment')])
126
+ Comment = typing.NamedTuple('Comment', [('content', str), ('tags', typing.Dict[str, typing.Any]), ('ue_specifiers', str)])
127
+ Oneof = typing.NamedTuple('Oneof', [('comment', 'Comment'), ('name', str), ('fields', typing.List['Field'])])
128
+ FieldOption = typing.NamedTuple('FieldOption', [('name', str), ('content', str)])
129
+ Field = typing.NamedTuple('Field', [('comment', 'Comment'), ('label', str), ('type', str), ('key_type', str), ('val_type', str), ('name', str), ('number', int), ('options', typing.Dict[str, 'FieldOption']), ('user_data', typing.Dict[str, typing.Any])])
130
+ Enum = typing.NamedTuple('Enum', [('comment', 'Comment'), ('name', str), ('fields', typing.Dict[str, 'Field']), ('user_data', typing.Dict[str, typing.Any])])
131
+ Option = typing.NamedTuple('Option', [('comment', 'Comment'), ('name', str), ('content', str)])
132
+ Message = typing.NamedTuple('Message', [('comment', 'Comment'), ('name', str), ('fields', typing.List['Field']), ('oneofs', typing.List['Oneof']),
133
+ ('messages', typing.Dict[str, 'Message']), ('enums', typing.Dict[str, 'Enum']), ('options', typing.Dict[str, 'Option']), ('user_data', typing.Dict[str, typing.Any])])
134
+ Service = typing.NamedTuple('Service', [('name', str), ('functions', typing.Dict[str, 'RpcFunc'])])
135
+ RpcFunc = typing.NamedTuple('RpcFunc', [('name', str), ('in_type', str), ('out_type', str), ('uri', str)])
136
+ ProtoFile = typing.NamedTuple('ProtoFile',
137
+ [('messages', typing.Dict[str, 'Message']), ('enums', typing.Dict[str, 'Enum']),
138
+ ('services', typing.Dict[str, 'Service']), ('imports', typing.List[str]),
139
+ ('options', typing.Dict[str, str]), ('package', str), ('user_data', typing.Dict[str, typing.Any])])
140
+
141
+
142
+ def merge_comments(comments):
143
+ content = ""
144
+ tags = {}
145
+ ue_specifiers = None
146
+
147
+ for comment in comments:
148
+ content += comment.content
149
+ for tag, value in comment.tags.items():
150
+ tags[tag] = value
151
+ if not ue_specifiers and comment.ue_specifiers:
152
+ ue_specifiers = comment.ue_specifiers
153
+
154
+ return Comment(content, tags, ue_specifiers)
155
+
156
+ def extrat_comments(tokens):
157
+ comments = []
158
+ for token in tokens:
159
+ if isinstance(token, Comment):
160
+ comments.append(token)
161
+ elif isinstance(token, Tail):
162
+ if token.comment:
163
+ comments.append(token.comment)
164
+ elif isinstance(token, Token):
165
+ if token.type == "COMMENT":
166
+ comments.append(Comment(token.value, {}, None))
167
+
168
+ return merge_comments(comments)
169
+
170
+ class ProtoTransformer(Transformer):
171
+ '''Converts syntax tree token into more easily usable namedtuple objects'''
172
+ def message(self, tokens):
173
+ '''Returns a Message namedtuple'''
174
+ comment = Comment("", {}, None)
175
+ if len(tokens) < 3:
176
+ name_token, body = tokens
177
+ else:
178
+ comment, name_token, body = tokens
179
+ return Message(comment, name_token.value, *body, {})
180
+
181
+ def oneof(self, tokens):
182
+ '''Returns a Oneof namedtuple'''
183
+ comment = Comment("", {})
184
+ fields = []
185
+ name = None
186
+ for token in tokens:
187
+ if isinstance(token, Comment):
188
+ comment = token
189
+ elif isinstance(token, Field):
190
+ fields.append(token)
191
+ elif isinstance(token, Token) and token.type == 'ONEOFNAME':
192
+ name = token.value
193
+ return Oneof(comment, name, fields)
194
+
195
+ def oneoffield(self, tokens):
196
+ '''Returns a Field namedtuple'''
197
+ comment = Comment("", {})
198
+ type = Token("TYPE", "")
199
+ fieldname = Token("FIELDNAME", "")
200
+ fieldnumber = Token("FIELDNUMBER", "")
201
+ for token in tokens:
202
+ if isinstance(token, Comment):
203
+ comment = token
204
+ elif isinstance(token, Token):
205
+ if token.type == "TYPE":
206
+ type = token
207
+ elif token.type == "FIELDNAME":
208
+ fieldname = token
209
+ elif token.type == "FIELDNUMBER":
210
+ fieldnumber = token
211
+ elif token.type == "COMMENT":
212
+ comment = Comment(token.value, {})
213
+ return Field(comment, type.value, type.value, type.value, fieldname.value, int(fieldnumber.value))
214
+
215
+ def fieldoption(self, tokens):
216
+ name = Token("TYPE", "")
217
+ content = Token("", "")
218
+ for token in tokens:
219
+ if isinstance(token, Token):
220
+ if token.type == "OPTIONNAME":
221
+ name.value = token.value.strip("()")
222
+ if token.type == "CONSTANT":
223
+ content = token
224
+
225
+ return FieldOption(name, content)
226
+
227
+ def enumvalueoption(self, tokens):
228
+ return self.fieldoption(tokens)
229
+
230
+ def option(self, tokens):
231
+ name = Token("TYPE", "")
232
+ content = Token("", "")
233
+ comment = extrat_comments(tokens)
234
+ for token in tokens:
235
+ if isinstance(token, Comment):
236
+ comment = token
237
+ elif isinstance(token, Token):
238
+ if token.type == "OPTIONNAME":
239
+ name.value = token.value.strip("()")
240
+ if token.type == "CONSTANT":
241
+ content = token
242
+
243
+ return Option(comment, name, content)
244
+
245
+ def messagebody(self, items):
246
+ '''Returns a tuple of message body namedtuples'''
247
+ messages = {}
248
+ enums = {}
249
+ fields = []
250
+ options = {}
251
+ oneofs = []
252
+ for item in items:
253
+ if isinstance(item, Message):
254
+ messages[item.name] = item
255
+ elif isinstance(item, Enum):
256
+ enums[item.name] = item
257
+ elif isinstance(item, Field):
258
+ fields.append(item)
259
+ elif isinstance(item, Option):
260
+ options[item.name] = item
261
+ elif isinstance(item, Oneof):
262
+ oneofs.append(item)
263
+
264
+ return fields, oneofs, messages, enums, options
265
+
266
+ def tail(self, tokens):
267
+ comment = None
268
+ for token in tokens:
269
+ if isinstance(token, Comment):
270
+ comment = token
271
+
272
+ return Tail(comment)
273
+
274
+ def field(self, tokens):
275
+ '''Returns a Field namedtuple'''
276
+ type = Token("TYPE", "")
277
+ label = Token("LABEL", "")
278
+ fieldname = Token("FIELDNAME", "")
279
+ fieldnumber = Token("FIELDNUMBER", "")
280
+ options = {}
281
+ for token in tokens:
282
+ if isinstance(token, Tree) and token.data == 'fieldoptions':
283
+ for fieldoption in token.children:
284
+ if isinstance(fieldoption, FieldOption):
285
+ options[fieldoption.name.value] = fieldoption
286
+ elif isinstance(token, Token):
287
+ if token.type == "TYPE":
288
+ type = token
289
+ elif token.type == "LABEL":
290
+ label = token
291
+ elif token.type == "FIELDNAME":
292
+ fieldname = token
293
+ elif token.type == "FIELDNUMBER":
294
+ fieldnumber = token
295
+
296
+ return Field(extrat_comments(tokens), label.value, type.value, type.value, type.value, fieldname.value, int(fieldnumber.value), options, {})
297
+
298
+ def mapfield(self, tokens):
299
+ '''Returns a Field namedtuple'''
300
+ val_type = Token("TYPE", "")
301
+ key_type = Token("KEYTYPE", "")
302
+ fieldname = Token("MAPNAME", "")
303
+ fieldnumber = Token("FIELDNUMBER", "")
304
+ options = {}
305
+ for token in tokens:
306
+ if isinstance(token, Tree) and token.data == 'fieldoptions':
307
+ for fieldoption in token.children:
308
+ if isinstance(fieldoption, FieldOption):
309
+ options[token.name] = token
310
+ elif isinstance(token, Token):
311
+ if token.type == "TYPE":
312
+ val_type = token
313
+ elif token.type == "KEYTYPE":
314
+ key_type = token
315
+ elif token.type == "MAPNAME":
316
+ fieldname = token
317
+ elif token.type == "FIELDNUMBER":
318
+ fieldnumber = token
319
+ return Field(extrat_comments(tokens), '', 'map', key_type.value, val_type.value, fieldname.value, int(fieldnumber.value), options, {})
320
+
321
+ def comments(self, tokens):
322
+ '''Returns a Tag namedtuple'''
323
+ comment = ''
324
+ tags = {}
325
+ ue_specifier = None
326
+ for token in tokens:
327
+ if token is None:
328
+ continue
329
+
330
+ token_str = ""
331
+ if isinstance(token, Token):
332
+ token_str = token.value
333
+ else:
334
+ token_str = token
335
+
336
+ if token_str.find("//") >= 0:
337
+ comment_content = token_str.replace("//", "").strip(" /\n")
338
+ if comment_content.startswith("UPROPERTY") or comment_content.startswith("UCLASS") or comment_content.startswith("UENUM"):
339
+ ue_specifier = comment_content
340
+ continue
341
+
342
+ comment += token_str + "\n"
343
+ if token_str.find('@') < 0:
344
+ continue
345
+ kvs = token_str.strip(" /\n").split('@')
346
+ for kv in kvs:
347
+ kv = kv.strip(" /\n")
348
+ if not kv:
349
+ continue
350
+ tmp = kv.split('=')
351
+ key = tmp[0].strip(" /\n").lower()
352
+ if key.find(" ") >= 0:
353
+ continue
354
+ if len(tmp) > 1:
355
+ tags[key] = tmp[1].lower()
356
+ else:
357
+ tags[key] = True
358
+ return Comment(comment, tags, ue_specifier)
359
+
360
+ def trail_comment(self, tokens):
361
+ if len(tokens) > 0:
362
+ return Comment(tokens[0].value, {}, None)
363
+ else:
364
+ return Comment("", {}, None)
365
+
366
+ def enum(self, tokens):
367
+ '''Returns an Enum namedtuple'''
368
+ comment = Comment("", {}, None)
369
+ if len(tokens) < 3:
370
+ name, fields = tokens
371
+ else:
372
+ comment, name, fields = tokens
373
+ return Enum(comment, name.value, fields, {})
374
+
375
+ def enumbody(self, tokens):
376
+ '''Returns a sequence of enum identifiers'''
377
+ enumitems = []
378
+ for tree in tokens:
379
+ if tree.data != 'enumfield':
380
+ continue
381
+ name = Token("IDENT", "")
382
+ value = Token("INTLIT", "")
383
+ options = {}
384
+ for token in tree.children:
385
+ if isinstance(token, Tree) and token.data == 'enumvalueoption':
386
+ for enumvalueoption in token.children:
387
+ if isinstance(enumvalueoption, FieldOption):
388
+ options[token.name] = token
389
+ elif isinstance(token, Token):
390
+ if token.type == "IDENT":
391
+ name = token
392
+ elif token.type == "INTLIT":
393
+ value = token
394
+ enumitems.append(Field(extrat_comments(tree.children), '', 'enum', 'enum', 'enum', name.value, value.value, options, {}))
395
+ return enumitems
396
+
397
+ def service(self, tokens):
398
+ '''Returns a Service namedtuple'''
399
+ functions = []
400
+ name = ''
401
+ for i in range(0, len(tokens)):
402
+ if not isinstance(tokens[i], Comment):
403
+ if isinstance(tokens[i], RpcFunc):
404
+ functions.append(tokens[i])
405
+ else:
406
+ name = tokens[i].value
407
+ return Service(name, functions)
408
+
409
+ def rpc(self, tokens):
410
+ '''Returns a RpcFunc namedtuple'''
411
+ uri = ''
412
+ in_type = ''
413
+ for token in tokens:
414
+ if isinstance(token, Token):
415
+ if token.type == "RPCNAME":
416
+ name = token
417
+ elif token.type == "MESSAGETYPE":
418
+ if in_type:
419
+ out_type = token
420
+ else:
421
+ in_type = token
422
+ elif not isinstance(token, Comment):
423
+ option_token = token
424
+ uri = option_token.children[0].value
425
+ return RpcFunc(name.value, in_type.value, out_type.value, uri.strip('"'))
426
+
427
+
428
+ def _recursive_to_dict(obj):
429
+ _dict = {}
430
+
431
+ if isinstance(obj, tuple):
432
+ node = obj._asdict()
433
+ for item in node:
434
+ if isinstance(node[item], list): # Process as a list
435
+ _dict[item] = [_recursive_to_dict(x) for x in (node[item])]
436
+ elif isinstance(node[item], tuple): # Process as a NamedTuple
437
+ _dict[item] = _recursive_to_dict(node[item])
438
+ elif isinstance(node[item], dict):
439
+ for k in node[item]:
440
+ if isinstance(node[item][k], tuple):
441
+ node[item][k] = _recursive_to_dict(node[item][k])
442
+ _dict[item] = node[item]
443
+ else: # Process as a regular element
444
+ _dict[item] = (node[item])
445
+ return _dict
446
+
447
+
448
+ def parse_from_file(file: str, encoding: str="utf-8"):
449
+ with open(file, 'r', encoding=encoding) as f:
450
+ data = f.read()
451
+ if data:
452
+ return parse(data)
453
+
454
+
455
+ def parse(data: str):
456
+ parser = Lark(BNF, start='proto', parser='earley', debug=True)
457
+ tree = parser.parse(data)
458
+ trans_tree = ProtoTransformer().transform(tree)
459
+ enums = {}
460
+ messages = {}
461
+ services = {}
462
+ imports = []
463
+ import_tree = trans_tree.find_data('import')
464
+ for tree in import_tree:
465
+ for child in tree.children:
466
+ if isinstance(child, Token):
467
+ imports.append(child.value.strip('"'))
468
+ options = {}
469
+ option_tree = trans_tree.find_data('option')
470
+ for tree in option_tree:
471
+ options[tree.children[0]] = tree.children[1].strip('"')
472
+
473
+ package = ''
474
+ package_tree = trans_tree.find_data('package')
475
+ for tree in package_tree:
476
+ package = tree.children[0]
477
+
478
+ top_data = trans_tree.find_data('topleveldef')
479
+ for top_level in top_data:
480
+ for child in top_level.children:
481
+ if isinstance(child, Message):
482
+ messages[child.name] = child
483
+ if isinstance(child, Enum):
484
+ enums[child.name] = child
485
+ if isinstance(child, Service):
486
+ services[child.name] = child
487
+ return ProtoFile(messages, enums, services, imports, options, package, {})
488
+
489
+
490
+ def serialize2json(data):
491
+ return json.dumps(_recursive_to_dict(parse(data)))
492
+
493
+
494
+ def serialize2json_from_file(file: str, encoding: str="utf-8"):
495
+ with open(file, 'r', encoding=encoding) as f:
496
+ data = f.read()
497
+ if data:
498
498
  return json.dumps(_recursive_to_dict(parse(data)), indent=4)