structurize 2.16.2__py3-none-any.whl → 2.16.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. avrotize/__init__.py +63 -63
  2. avrotize/__main__.py +5 -5
  3. avrotize/_version.py +34 -34
  4. avrotize/asn1toavro.py +160 -160
  5. avrotize/avrotize.py +152 -152
  6. avrotize/avrotocpp.py +483 -483
  7. avrotize/avrotocsharp.py +992 -992
  8. avrotize/avrotocsv.py +121 -121
  9. avrotize/avrotodatapackage.py +173 -173
  10. avrotize/avrotodb.py +1383 -1383
  11. avrotize/avrotogo.py +476 -476
  12. avrotize/avrotographql.py +197 -197
  13. avrotize/avrotoiceberg.py +210 -210
  14. avrotize/avrotojava.py +1023 -1023
  15. avrotize/avrotojs.py +250 -250
  16. avrotize/avrotojsons.py +481 -481
  17. avrotize/avrotojstruct.py +345 -345
  18. avrotize/avrotokusto.py +363 -363
  19. avrotize/avrotomd.py +137 -137
  20. avrotize/avrotools.py +168 -168
  21. avrotize/avrotoparquet.py +208 -208
  22. avrotize/avrotoproto.py +358 -358
  23. avrotize/avrotopython.py +622 -622
  24. avrotize/avrotorust.py +435 -435
  25. avrotize/avrotots.py +598 -598
  26. avrotize/avrotoxsd.py +344 -344
  27. avrotize/commands.json +2493 -2433
  28. avrotize/common.py +828 -828
  29. avrotize/constants.py +4 -4
  30. avrotize/csvtoavro.py +131 -131
  31. avrotize/datapackagetoavro.py +76 -76
  32. avrotize/dependency_resolver.py +348 -348
  33. avrotize/jsonstoavro.py +1698 -1698
  34. avrotize/jsonstostructure.py +2642 -2642
  35. avrotize/jstructtoavro.py +878 -878
  36. avrotize/kstructtoavro.py +93 -93
  37. avrotize/kustotoavro.py +455 -455
  38. avrotize/parquettoavro.py +157 -157
  39. avrotize/proto2parser.py +497 -497
  40. avrotize/proto3parser.py +402 -402
  41. avrotize/prototoavro.py +382 -382
  42. avrotize/structuretocsharp.py +2005 -2005
  43. avrotize/structuretojsons.py +498 -498
  44. avrotize/structuretopython.py +772 -772
  45. avrotize/structuretots.py +653 -0
  46. avrotize/xsdtoavro.py +413 -413
  47. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/METADATA +848 -805
  48. structurize-2.16.5.dist-info/RECORD +52 -0
  49. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/licenses/LICENSE +200 -200
  50. structurize-2.16.2.dist-info/RECORD +0 -51
  51. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/WHEEL +0 -0
  52. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/entry_points.txt +0 -0
  53. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/top_level.txt +0 -0
avrotize/proto3parser.py CHANGED
@@ -1,403 +1,403 @@
1
- #!/usr/bin/env python
2
- # Licensed to the Apache Software Foundation (ASF) under one
3
- # or more contributor license agreements. See the NOTICE file
4
- # distributed with this work for additional information
5
- # regarding copyright ownership. The ASF licenses this file
6
- # to you under the Apache License, Version 2.0 (the
7
- # "License"); you may not use this file except in compliance
8
- # with the License. You may obtain a copy of the License at
9
- #
10
- # http://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing,
13
- # software distributed under the License is distributed on an
14
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- # KIND, either express or implied. See the License for the
16
- # specific language governing permissions and limitations
17
- # under the License.
18
- #
19
- # adapted from https://github.com/khadgarmage/protoparser
20
-
21
- from lark import Lark, Transformer, Tree, Token
22
- from collections import namedtuple
23
- import typing
24
- import json
25
-
26
- BNF = r'''
27
- OCTALDIGIT: "0..7"
28
- IDENT: ( "_" )* LETTER ( LETTER | DECIMALDIGIT | "_" )*
29
- FULLIDENT: IDENT ( "." IDENT )*
30
- MESSAGENAME: IDENT
31
- ENUMNAME: IDENT
32
- FIELDNAME: IDENT
33
- ONEOFNAME: IDENT
34
- MAPNAME: IDENT
35
- SERVICENAME: IDENT
36
- TAGNAME: IDENT
37
- TAGVALUE: IDENT
38
- RPCNAME: IDENT
39
- MESSAGETYPE: [ "." ] ( IDENT "." )* MESSAGENAME
40
- ENUMTYPE: [ "." ] ( IDENT "." )* ENUMNAME
41
-
42
- INTLIT : [ "-" ] DECIMALLIT | OCTALLIT | HEXLIT
43
- DECIMALLIT: ( "1".."9" ) ( DECIMALDIGIT )*
44
- OCTALLIT : "0" ( OCTALDIGIT )*
45
- HEXLIT : "0" ( "x" | "X" ) HEXDIGIT ( HEXDIGIT )*
46
-
47
- FLOATLIT: ( DECIMALS "." [ DECIMALS ] [ EXPONENT ] | DECIMALS EXPONENT | "."DECIMALS [ EXPONENT ] ) | "inf" | "nan"
48
- DECIMALS : DECIMALDIGIT ( DECIMALDIGIT )*
49
- EXPONENT : ( "e" | "E" ) [ "+" | "-" ] DECIMALS
50
-
51
- BOOLLIT: "true" | "false"
52
-
53
- STRLIT: ( "'" ( CHARVALUE )* "'" ) | ( "\"" ( CHARVALUE )* "\"" )
54
- CHARVALUE: HEXESCAPE | OCTESCAPE | CHARESCAPE | /[^\0\n\\]/
55
- HEXESCAPE: "\\" ( "x" | "X" ) HEXDIGIT HEXDIGIT
56
- OCTESCAPE: "\\" OCTALDIGIT OCTALDIGIT OCTALDIGIT
57
- CHARESCAPE: "\\" ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | "\\" | "'" | "\"" )
58
- QUOTE: "'" | "\""
59
-
60
- EMPTYSTATEMENT: ";"
61
-
62
- CONSTANT: FULLIDENT | ( [ "-" | "+" ] INTLIT ) | ( [ "-" | "+" ] FLOATLIT ) | STRLIT | BOOLLIT
63
-
64
- VERSION: "proto3"
65
-
66
- syntax: "syntax" "=" QUOTE VERSION QUOTE ";"
67
-
68
- import: "import" [ "weak" | "public" ] STRLIT ";"
69
-
70
- package: "package" FULLIDENT ";"
71
-
72
- option: [ COMMENTS ] "option" OPTIONNAME "=" CONSTANT ";"
73
- OPTIONNAME: ( IDENT | "(" FULLIDENT ")" ) ( "." IDENT )*
74
-
75
- TYPE: "double" | "float" | "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" | "bytes" | MESSAGETYPE | ENUMTYPE
76
- FIELDNUMBER: INTLIT
77
-
78
- LABEL: "optional" | "repeated"
79
- field: [ comments ] [ LABEL ] TYPE FIELDNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] TAIL
80
- fieldoptions: fieldoption ( "," fieldoption )*
81
- fieldoption: OPTIONNAME "=" CONSTANT
82
-
83
- oneof: [ comments ] "oneof" ONEOFNAME "{" ( oneoffield | EMPTYSTATEMENT )* "}"
84
- oneoffield: [ comments ] TYPE FIELDNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] ";"
85
-
86
- mapfield: [ comments ] "map" "<" KEYTYPE "," TYPE ">" MAPNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] TAIL
87
- KEYTYPE: "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string"
88
-
89
- reserved: "reserved" ( ranges | fieldnames ) ";"
90
- ranges: range ( "," range )*
91
- range: INTLIT [ "to" ( INTLIT | "max" ) ]
92
- fieldnames: FIELDNAME ( "," FIELDNAME )*
93
-
94
- enum: [ comments ] "enum" ENUMNAME enumbody
95
- enumbody: "{" ( option | enumfield | EMPTYSTATEMENT )* [ COMMENTS ] "}"
96
- enumfield: [ COMMENTS ] IDENT "=" INTLIT [ "[" enumvalueoption ( "," enumvalueoption )* "]" ] TAIL
97
- enumvalueoption: OPTIONNAME "=" CONSTANT
98
-
99
- message: [ comments ] "message" MESSAGENAME messagebody
100
- messagebody: "{" ( field | enum | message | option | oneof | mapfield | reserved | EMPTYSTATEMENT )* "}"
101
-
102
- googleoption: "option" "(google.api.http)" "=" "{" [ "post:" CONSTANT [ "body:" CONSTANT ] ] "}" ";"
103
- service: [ comments ] "service" SERVICENAME "{" ( option | rpc | EMPTYSTATEMENT )* "}"
104
- rpc: [ comments ] "rpc" RPCNAME "(" [ "stream" ] MESSAGETYPE ")" "returns" "(" [ "stream" ] MESSAGETYPE ")" ( ( "{" ( googleoption | option | EMPTYSTATEMENT )* "}" ) | ";" )
105
-
106
- proto: [ comments ] syntax ( import | package | option | topleveldef | EMPTYSTATEMENT )*
107
- topleveldef: message | enum | service | comments
108
-
109
- TAIL: ";" /[\s|\t]/* [ COMMENT ] [ NEWLINE ]
110
- COMMENT: "//" /[^\n]/*
111
- BLOCKCOMMENT: "/*" /./* "*/"
112
- comments: (( COMMENT | BLOCKCOMMENT ) NEWLINE )+
113
- COMMENTS: (( COMMENT | BLOCKCOMMENT ) NEWLINE )+
114
-
115
- %import common.HEXDIGIT
116
- %import common.DIGIT -> DECIMALDIGIT
117
- %import common.LETTER
118
- %import common.WS
119
- %import common.NEWLINE
120
- %ignore WS
121
- '''
122
-
123
- Comment = typing.NamedTuple('Comment', [('content', str), ('tags', typing.Dict[str, typing.Any])])
124
- Oneof = typing.NamedTuple('Oneof', [('comment', 'Comment'), ('name', str), ('fields', typing.List['Field'])])
125
- Field = typing.NamedTuple('Field', [('comment', 'Comment'), ('label', str), ('type', str), ('key_type', str), ('val_type', str), ('name', str), ('number', int)])
126
- Enum = typing.NamedTuple('Enum', [('comment', 'Comment'), ('name', str), ('fields', typing.Dict[str, 'Field'])])
127
- Message = typing.NamedTuple('Message', [('comment', 'Comment'), ('name', str), ('fields', typing.List['Field']), ('oneofs', typing.List['Oneof']),
128
- ('messages', typing.Dict[str, 'Message']), ('enums', typing.Dict[str, 'Enum'])])
129
- Service = typing.NamedTuple('Service', [('name', str), ('functions', typing.Dict[str, 'RpcFunc'])])
130
- RpcFunc = typing.NamedTuple('RpcFunc', [('name', str), ('in_type', str), ('out_type', str), ('uri', str)])
131
- ProtoFile = typing.NamedTuple('ProtoFile',
132
- [('messages', typing.Dict[str, 'Message']), ('enums', typing.Dict[str, 'Enum']),
133
- ('services', typing.Dict[str, 'Service']), ('imports', typing.List[str]),
134
- ('options', typing.Dict[str, str]), ('package', str)])
135
-
136
-
137
- class ProtoTransformer(Transformer):
138
- '''Converts syntax tree token into more easily usable namedtuple objects'''
139
-
140
- def message(self, tokens):
141
- '''Returns a Message namedtuple'''
142
- comment = Comment("", {})
143
- if len(tokens) < 3:
144
- name_token, body = tokens
145
- else:
146
- comment, name_token, body = tokens
147
- return Message(comment, name_token.value, *body)
148
-
149
- def oneof(self, tokens):
150
- '''Returns a Oneof namedtuple'''
151
- comment = Comment("", {})
152
- fields = []
153
- name = None
154
- for token in tokens:
155
- if isinstance(token, Comment):
156
- comment = token
157
- elif isinstance(token, Field):
158
- fields.append(token)
159
- elif isinstance(token, Token) and token.type == 'ONEOFNAME':
160
- name = token.value
161
- return Oneof(comment, name, fields)
162
-
163
- def oneoffield(self, tokens):
164
- '''Returns a Field namedtuple'''
165
- comment = Comment("", {})
166
- type = Token("TYPE", "")
167
- fieldname = Token("FIELDNAME", "")
168
- fieldnumber = Token("FIELDNUMBER", "")
169
- for token in tokens:
170
- if isinstance(token, Comment):
171
- comment = token
172
- elif isinstance(token, Token):
173
- if token.type == "TYPE":
174
- type = token
175
- elif token.type == "FIELDNAME":
176
- fieldname = token
177
- elif token.type == "FIELDNUMBER":
178
- fieldnumber = token
179
- elif token.type == "COMMENT":
180
- comment = Comment(token.value, {})
181
- return Field(comment, '', type.value, type.value, type.value, fieldname.value, int(fieldnumber.value))
182
-
183
- def messagebody(self, items):
184
- '''Returns a tuple of message body namedtuples'''
185
- messages = {}
186
- enums = {}
187
- fields = []
188
- oneofs = []
189
- for item in items:
190
- if isinstance(item, Message):
191
- messages[item.name] = item
192
- elif isinstance(item, Enum):
193
- enums[item.name] = item
194
- elif isinstance(item, Field):
195
- fields.append(item)
196
- elif isinstance(item, Oneof):
197
- oneofs.append(item)
198
- return fields, oneofs, messages, enums
199
-
200
- def field(self, tokens):
201
- '''Returns a Field namedtuple'''
202
- comment = Comment("", {})
203
- type = Token("TYPE", "")
204
- fieldname = Token("FIELDNAME", "")
205
- fieldnumber = Token("FIELDNUMBER", "")
206
- label = Token("LABEL", "")
207
- for token in tokens:
208
- if isinstance(token, Comment):
209
- comment = token
210
- elif isinstance(token, Token):
211
- if token.type == "TYPE":
212
- type = token
213
- elif token.type == "FIELDNAME":
214
- fieldname = token
215
- elif token.type == "FIELDNUMBER":
216
- fieldnumber = token
217
- elif token.type == "COMMENT":
218
- comment = Comment(token.value, {})
219
- elif token.type == "LABEL":
220
- label = token
221
- return Field(comment, label, type.value, type.value, type.value, fieldname.value, int(fieldnumber.value))
222
-
223
-
224
- def mapfield(self, tokens):
225
- '''Returns a Field namedtuple'''
226
- comment = Comment("", {})
227
- val_type = Token("TYPE", "")
228
- key_type = Token("KEYTYPE", "")
229
- fieldname = Token("MAPNAME", "")
230
- fieldnumber = Token("FIELDNUMBER", "")
231
- for token in tokens:
232
- if isinstance(token, Comment):
233
- comment = token
234
- elif isinstance(token, Token):
235
- if token.type == "TYPE":
236
- val_type = token
237
- elif token.type == "KEYTYPE":
238
- key_type = token
239
- elif token.type == "MAPNAME":
240
- fieldname = token
241
- elif token.type == "FIELDNUMBER":
242
- fieldnumber = token
243
- elif token.type == "COMMENT":
244
- comment = Comment(token.value, {})
245
- return Field(comment, '', 'map', key_type.value, val_type.value, fieldname.value, int(fieldnumber.value))
246
-
247
- def comments(self, tokens):
248
- '''Returns a Tag namedtuple'''
249
- comment = ''
250
- tags = {}
251
- for token in tokens:
252
- comment += token
253
- if token.find('@') < 0:
254
- continue
255
- kvs = token.strip(" /\n").split('@')
256
- for kv in kvs:
257
- kv = kv.strip(" /\n")
258
- if not kv:
259
- continue
260
- tmp = kv.split('=')
261
- key = tmp[0].strip(" /\n").lower()
262
- if key.find(" ") >= 0:
263
- continue
264
- if len(tmp) > 1:
265
- tags[key] = tmp[1].lower()
266
- else:
267
- tags[key] = True
268
- return Comment(comment, tags)
269
-
270
- def enum(self, tokens):
271
- '''Returns an Enum namedtuple'''
272
- comment = Comment("", {})
273
- if len(tokens) < 3:
274
- name, fields = tokens
275
- else:
276
- comment, name, fields = tokens
277
- return Enum(comment, name.value, fields)
278
-
279
- def enumbody(self, tokens):
280
- '''Returns a sequence of enum identifiers'''
281
- enumitems = []
282
- for tree in tokens:
283
- if not isinstance(tree, Tree):
284
- continue
285
- if tree.data != 'enumfield':
286
- continue
287
- comment = Comment("", {})
288
- name = Token("IDENT", "")
289
- value = Token("INTLIT", "")
290
- for token in tree.children:
291
- if isinstance(token, Comment):
292
- comment = token
293
- elif isinstance(token, Token):
294
- if token.type == "IDENT":
295
- name = token
296
- elif token.type == "INTLIT":
297
- value = token
298
- elif token.type == "COMMENTS":
299
- comment = Comment(token.value, {})
300
- enumitems.append(Field(comment, '', 'enum', 'enum', 'enum', name.value, value.value))
301
- return enumitems
302
-
303
- def service(self, tokens):
304
- '''Returns a Service namedtuple'''
305
- functions = []
306
- name = ''
307
- for i in range(0, len(tokens)):
308
- if not isinstance(tokens[i], Comment):
309
- if isinstance(tokens[i], RpcFunc):
310
- functions.append(tokens[i])
311
- else:
312
- name = tokens[i].value
313
- return Service(name, functions)
314
-
315
- def rpc(self, tokens):
316
- '''Returns a RpcFunc namedtuple'''
317
- uri = ''
318
- in_type = ''
319
- for token in tokens:
320
- if isinstance(token, Token):
321
- if token.type == "RPCNAME":
322
- name = token
323
- elif token.type == "MESSAGETYPE":
324
- if in_type:
325
- out_type = token
326
- else:
327
- in_type = token
328
- elif not isinstance(token, Comment):
329
- option_token = token
330
- uri = option_token.children[0].value
331
- return RpcFunc(name.value, in_type.value, out_type.value, uri.strip('"'))
332
-
333
-
334
- def _recursive_to_dict(obj):
335
- _dict = {}
336
-
337
- if isinstance(obj, tuple):
338
- node = obj._asdict()
339
- for item in node:
340
- if isinstance(node[item], list): # Process as a list
341
- _dict[item] = [_recursive_to_dict(x) for x in (node[item])]
342
- elif isinstance(node[item], tuple): # Process as a NamedTuple
343
- _dict[item] = _recursive_to_dict(node[item])
344
- elif isinstance(node[item], dict):
345
- for k in node[item]:
346
- if isinstance(node[item][k], tuple):
347
- node[item][k] = _recursive_to_dict(node[item][k])
348
- _dict[item] = node[item]
349
- else: # Process as a regular element
350
- _dict[item] = (node[item])
351
- return _dict
352
-
353
-
354
- def parse_from_file(file: str):
355
- with open(file, 'r') as f:
356
- data = f.read()
357
- if data:
358
- return parse(data)
359
-
360
-
361
- def parse(data: str):
362
- parser = Lark(BNF, start='proto', parser='earley')
363
- tree = parser.parse(data)
364
- trans_tree = ProtoTransformer().transform(tree)
365
- enums = {}
366
- messages = {}
367
- services = {}
368
- imports = []
369
- import_tree = trans_tree.find_data('import')
370
- for tree in import_tree:
371
- for child in tree.children:
372
- imports.append(child.value.strip('"'))
373
- options = {}
374
- option_tree = trans_tree.find_data('option')
375
- for tree in option_tree:
376
- options[tree.children[0]] = tree.children[1].strip('"')
377
-
378
- package = ''
379
- package_tree = trans_tree.find_data('package')
380
- for tree in package_tree:
381
- package = tree.children[0]
382
-
383
- top_data = trans_tree.find_data('topleveldef')
384
- for top_level in top_data:
385
- for child in top_level.children:
386
- if isinstance(child, Message):
387
- messages[child.name] = child
388
- if isinstance(child, Enum):
389
- enums[child.name] = child
390
- if isinstance(child, Service):
391
- services[child.name] = child
392
- return ProtoFile(messages, enums, services, imports, options, package)
393
-
394
-
395
- def serialize2json(data):
396
- return json.dumps(_recursive_to_dict(parse(data)))
397
-
398
-
399
- def serialize2json_from_file(file: str):
400
- with open(file, 'r') as f:
401
- data = f.read()
402
- if data:
1
+ #!/usr/bin/env python
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+ #
19
+ # adapted from https://github.com/khadgarmage/protoparser
20
+
21
+ from lark import Lark, Transformer, Tree, Token
22
+ from collections import namedtuple
23
+ import typing
24
+ import json
25
+
26
+ BNF = r'''
27
+ OCTALDIGIT: "0..7"
28
+ IDENT: ( "_" )* LETTER ( LETTER | DECIMALDIGIT | "_" )*
29
+ FULLIDENT: IDENT ( "." IDENT )*
30
+ MESSAGENAME: IDENT
31
+ ENUMNAME: IDENT
32
+ FIELDNAME: IDENT
33
+ ONEOFNAME: IDENT
34
+ MAPNAME: IDENT
35
+ SERVICENAME: IDENT
36
+ TAGNAME: IDENT
37
+ TAGVALUE: IDENT
38
+ RPCNAME: IDENT
39
+ MESSAGETYPE: [ "." ] ( IDENT "." )* MESSAGENAME
40
+ ENUMTYPE: [ "." ] ( IDENT "." )* ENUMNAME
41
+
42
+ INTLIT : [ "-" ] DECIMALLIT | OCTALLIT | HEXLIT
43
+ DECIMALLIT: ( "1".."9" ) ( DECIMALDIGIT )*
44
+ OCTALLIT : "0" ( OCTALDIGIT )*
45
+ HEXLIT : "0" ( "x" | "X" ) HEXDIGIT ( HEXDIGIT )*
46
+
47
+ FLOATLIT: ( DECIMALS "." [ DECIMALS ] [ EXPONENT ] | DECIMALS EXPONENT | "."DECIMALS [ EXPONENT ] ) | "inf" | "nan"
48
+ DECIMALS : DECIMALDIGIT ( DECIMALDIGIT )*
49
+ EXPONENT : ( "e" | "E" ) [ "+" | "-" ] DECIMALS
50
+
51
+ BOOLLIT: "true" | "false"
52
+
53
+ STRLIT: ( "'" ( CHARVALUE )* "'" ) | ( "\"" ( CHARVALUE )* "\"" )
54
+ CHARVALUE: HEXESCAPE | OCTESCAPE | CHARESCAPE | /[^\0\n\\]/
55
+ HEXESCAPE: "\\" ( "x" | "X" ) HEXDIGIT HEXDIGIT
56
+ OCTESCAPE: "\\" OCTALDIGIT OCTALDIGIT OCTALDIGIT
57
+ CHARESCAPE: "\\" ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | "\\" | "'" | "\"" )
58
+ QUOTE: "'" | "\""
59
+
60
+ EMPTYSTATEMENT: ";"
61
+
62
+ CONSTANT: FULLIDENT | ( [ "-" | "+" ] INTLIT ) | ( [ "-" | "+" ] FLOATLIT ) | STRLIT | BOOLLIT
63
+
64
+ VERSION: "proto3"
65
+
66
+ syntax: "syntax" "=" QUOTE VERSION QUOTE ";"
67
+
68
+ import: "import" [ "weak" | "public" ] STRLIT ";"
69
+
70
+ package: "package" FULLIDENT ";"
71
+
72
+ option: [ COMMENTS ] "option" OPTIONNAME "=" CONSTANT ";"
73
+ OPTIONNAME: ( IDENT | "(" FULLIDENT ")" ) ( "." IDENT )*
74
+
75
+ TYPE: "double" | "float" | "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" | "bytes" | MESSAGETYPE | ENUMTYPE
76
+ FIELDNUMBER: INTLIT
77
+
78
+ LABEL: "optional" | "repeated"
79
+ field: [ comments ] [ LABEL ] TYPE FIELDNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] TAIL
80
+ fieldoptions: fieldoption ( "," fieldoption )*
81
+ fieldoption: OPTIONNAME "=" CONSTANT
82
+
83
+ oneof: [ comments ] "oneof" ONEOFNAME "{" ( oneoffield | EMPTYSTATEMENT )* "}"
84
+ oneoffield: [ comments ] TYPE FIELDNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] ";"
85
+
86
+ mapfield: [ comments ] "map" "<" KEYTYPE "," TYPE ">" MAPNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] TAIL
87
+ KEYTYPE: "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string"
88
+
89
+ reserved: "reserved" ( ranges | fieldnames ) ";"
90
+ ranges: range ( "," range )*
91
+ range: INTLIT [ "to" ( INTLIT | "max" ) ]
92
+ fieldnames: FIELDNAME ( "," FIELDNAME )*
93
+
94
+ enum: [ comments ] "enum" ENUMNAME enumbody
95
+ enumbody: "{" ( option | enumfield | EMPTYSTATEMENT )* [ COMMENTS ] "}"
96
+ enumfield: [ COMMENTS ] IDENT "=" INTLIT [ "[" enumvalueoption ( "," enumvalueoption )* "]" ] TAIL
97
+ enumvalueoption: OPTIONNAME "=" CONSTANT
98
+
99
+ message: [ comments ] "message" MESSAGENAME messagebody
100
+ messagebody: "{" ( field | enum | message | option | oneof | mapfield | reserved | EMPTYSTATEMENT )* "}"
101
+
102
+ googleoption: "option" "(google.api.http)" "=" "{" [ "post:" CONSTANT [ "body:" CONSTANT ] ] "}" ";"
103
+ service: [ comments ] "service" SERVICENAME "{" ( option | rpc | EMPTYSTATEMENT )* "}"
104
+ rpc: [ comments ] "rpc" RPCNAME "(" [ "stream" ] MESSAGETYPE ")" "returns" "(" [ "stream" ] MESSAGETYPE ")" ( ( "{" ( googleoption | option | EMPTYSTATEMENT )* "}" ) | ";" )
105
+
106
+ proto: [ comments ] syntax ( import | package | option | topleveldef | EMPTYSTATEMENT )*
107
+ topleveldef: message | enum | service | comments
108
+
109
+ TAIL: ";" /[\s|\t]/* [ COMMENT ] [ NEWLINE ]
110
+ COMMENT: "//" /[^\n]/*
111
+ BLOCKCOMMENT: "/*" /./* "*/"
112
+ comments: (( COMMENT | BLOCKCOMMENT ) NEWLINE )+
113
+ COMMENTS: (( COMMENT | BLOCKCOMMENT ) NEWLINE )+
114
+
115
+ %import common.HEXDIGIT
116
+ %import common.DIGIT -> DECIMALDIGIT
117
+ %import common.LETTER
118
+ %import common.WS
119
+ %import common.NEWLINE
120
+ %ignore WS
121
+ '''
122
+
123
+ Comment = typing.NamedTuple('Comment', [('content', str), ('tags', typing.Dict[str, typing.Any])])
124
+ Oneof = typing.NamedTuple('Oneof', [('comment', 'Comment'), ('name', str), ('fields', typing.List['Field'])])
125
+ Field = typing.NamedTuple('Field', [('comment', 'Comment'), ('label', str), ('type', str), ('key_type', str), ('val_type', str), ('name', str), ('number', int)])
126
+ Enum = typing.NamedTuple('Enum', [('comment', 'Comment'), ('name', str), ('fields', typing.Dict[str, 'Field'])])
127
+ Message = typing.NamedTuple('Message', [('comment', 'Comment'), ('name', str), ('fields', typing.List['Field']), ('oneofs', typing.List['Oneof']),
128
+ ('messages', typing.Dict[str, 'Message']), ('enums', typing.Dict[str, 'Enum'])])
129
+ Service = typing.NamedTuple('Service', [('name', str), ('functions', typing.Dict[str, 'RpcFunc'])])
130
+ RpcFunc = typing.NamedTuple('RpcFunc', [('name', str), ('in_type', str), ('out_type', str), ('uri', str)])
131
+ ProtoFile = typing.NamedTuple('ProtoFile',
132
+ [('messages', typing.Dict[str, 'Message']), ('enums', typing.Dict[str, 'Enum']),
133
+ ('services', typing.Dict[str, 'Service']), ('imports', typing.List[str]),
134
+ ('options', typing.Dict[str, str]), ('package', str)])
135
+
136
+
137
+ class ProtoTransformer(Transformer):
138
+ '''Converts syntax tree token into more easily usable namedtuple objects'''
139
+
140
+ def message(self, tokens):
141
+ '''Returns a Message namedtuple'''
142
+ comment = Comment("", {})
143
+ if len(tokens) < 3:
144
+ name_token, body = tokens
145
+ else:
146
+ comment, name_token, body = tokens
147
+ return Message(comment, name_token.value, *body)
148
+
149
+ def oneof(self, tokens):
150
+ '''Returns a Oneof namedtuple'''
151
+ comment = Comment("", {})
152
+ fields = []
153
+ name = None
154
+ for token in tokens:
155
+ if isinstance(token, Comment):
156
+ comment = token
157
+ elif isinstance(token, Field):
158
+ fields.append(token)
159
+ elif isinstance(token, Token) and token.type == 'ONEOFNAME':
160
+ name = token.value
161
+ return Oneof(comment, name, fields)
162
+
163
+ def oneoffield(self, tokens):
164
+ '''Returns a Field namedtuple'''
165
+ comment = Comment("", {})
166
+ type = Token("TYPE", "")
167
+ fieldname = Token("FIELDNAME", "")
168
+ fieldnumber = Token("FIELDNUMBER", "")
169
+ for token in tokens:
170
+ if isinstance(token, Comment):
171
+ comment = token
172
+ elif isinstance(token, Token):
173
+ if token.type == "TYPE":
174
+ type = token
175
+ elif token.type == "FIELDNAME":
176
+ fieldname = token
177
+ elif token.type == "FIELDNUMBER":
178
+ fieldnumber = token
179
+ elif token.type == "COMMENT":
180
+ comment = Comment(token.value, {})
181
+ return Field(comment, '', type.value, type.value, type.value, fieldname.value, int(fieldnumber.value))
182
+
183
+ def messagebody(self, items):
184
+ '''Returns a tuple of message body namedtuples'''
185
+ messages = {}
186
+ enums = {}
187
+ fields = []
188
+ oneofs = []
189
+ for item in items:
190
+ if isinstance(item, Message):
191
+ messages[item.name] = item
192
+ elif isinstance(item, Enum):
193
+ enums[item.name] = item
194
+ elif isinstance(item, Field):
195
+ fields.append(item)
196
+ elif isinstance(item, Oneof):
197
+ oneofs.append(item)
198
+ return fields, oneofs, messages, enums
199
+
200
+ def field(self, tokens):
201
+ '''Returns a Field namedtuple'''
202
+ comment = Comment("", {})
203
+ type = Token("TYPE", "")
204
+ fieldname = Token("FIELDNAME", "")
205
+ fieldnumber = Token("FIELDNUMBER", "")
206
+ label = Token("LABEL", "")
207
+ for token in tokens:
208
+ if isinstance(token, Comment):
209
+ comment = token
210
+ elif isinstance(token, Token):
211
+ if token.type == "TYPE":
212
+ type = token
213
+ elif token.type == "FIELDNAME":
214
+ fieldname = token
215
+ elif token.type == "FIELDNUMBER":
216
+ fieldnumber = token
217
+ elif token.type == "COMMENT":
218
+ comment = Comment(token.value, {})
219
+ elif token.type == "LABEL":
220
+ label = token
221
+ return Field(comment, label, type.value, type.value, type.value, fieldname.value, int(fieldnumber.value))
222
+
223
+
224
+ def mapfield(self, tokens):
225
+ '''Returns a Field namedtuple'''
226
+ comment = Comment("", {})
227
+ val_type = Token("TYPE", "")
228
+ key_type = Token("KEYTYPE", "")
229
+ fieldname = Token("MAPNAME", "")
230
+ fieldnumber = Token("FIELDNUMBER", "")
231
+ for token in tokens:
232
+ if isinstance(token, Comment):
233
+ comment = token
234
+ elif isinstance(token, Token):
235
+ if token.type == "TYPE":
236
+ val_type = token
237
+ elif token.type == "KEYTYPE":
238
+ key_type = token
239
+ elif token.type == "MAPNAME":
240
+ fieldname = token
241
+ elif token.type == "FIELDNUMBER":
242
+ fieldnumber = token
243
+ elif token.type == "COMMENT":
244
+ comment = Comment(token.value, {})
245
+ return Field(comment, '', 'map', key_type.value, val_type.value, fieldname.value, int(fieldnumber.value))
246
+
247
+ def comments(self, tokens):
248
+ '''Returns a Tag namedtuple'''
249
+ comment = ''
250
+ tags = {}
251
+ for token in tokens:
252
+ comment += token
253
+ if token.find('@') < 0:
254
+ continue
255
+ kvs = token.strip(" /\n").split('@')
256
+ for kv in kvs:
257
+ kv = kv.strip(" /\n")
258
+ if not kv:
259
+ continue
260
+ tmp = kv.split('=')
261
+ key = tmp[0].strip(" /\n").lower()
262
+ if key.find(" ") >= 0:
263
+ continue
264
+ if len(tmp) > 1:
265
+ tags[key] = tmp[1].lower()
266
+ else:
267
+ tags[key] = True
268
+ return Comment(comment, tags)
269
+
270
+ def enum(self, tokens):
271
+ '''Returns an Enum namedtuple'''
272
+ comment = Comment("", {})
273
+ if len(tokens) < 3:
274
+ name, fields = tokens
275
+ else:
276
+ comment, name, fields = tokens
277
+ return Enum(comment, name.value, fields)
278
+
279
+ def enumbody(self, tokens):
280
+ '''Returns a sequence of enum identifiers'''
281
+ enumitems = []
282
+ for tree in tokens:
283
+ if not isinstance(tree, Tree):
284
+ continue
285
+ if tree.data != 'enumfield':
286
+ continue
287
+ comment = Comment("", {})
288
+ name = Token("IDENT", "")
289
+ value = Token("INTLIT", "")
290
+ for token in tree.children:
291
+ if isinstance(token, Comment):
292
+ comment = token
293
+ elif isinstance(token, Token):
294
+ if token.type == "IDENT":
295
+ name = token
296
+ elif token.type == "INTLIT":
297
+ value = token
298
+ elif token.type == "COMMENTS":
299
+ comment = Comment(token.value, {})
300
+ enumitems.append(Field(comment, '', 'enum', 'enum', 'enum', name.value, value.value))
301
+ return enumitems
302
+
303
+ def service(self, tokens):
304
+ '''Returns a Service namedtuple'''
305
+ functions = []
306
+ name = ''
307
+ for i in range(0, len(tokens)):
308
+ if not isinstance(tokens[i], Comment):
309
+ if isinstance(tokens[i], RpcFunc):
310
+ functions.append(tokens[i])
311
+ else:
312
+ name = tokens[i].value
313
+ return Service(name, functions)
314
+
315
+ def rpc(self, tokens):
316
+ '''Returns a RpcFunc namedtuple'''
317
+ uri = ''
318
+ in_type = ''
319
+ for token in tokens:
320
+ if isinstance(token, Token):
321
+ if token.type == "RPCNAME":
322
+ name = token
323
+ elif token.type == "MESSAGETYPE":
324
+ if in_type:
325
+ out_type = token
326
+ else:
327
+ in_type = token
328
+ elif not isinstance(token, Comment):
329
+ option_token = token
330
+ uri = option_token.children[0].value
331
+ return RpcFunc(name.value, in_type.value, out_type.value, uri.strip('"'))
332
+
333
+
334
+ def _recursive_to_dict(obj):
335
+ _dict = {}
336
+
337
+ if isinstance(obj, tuple):
338
+ node = obj._asdict()
339
+ for item in node:
340
+ if isinstance(node[item], list): # Process as a list
341
+ _dict[item] = [_recursive_to_dict(x) for x in (node[item])]
342
+ elif isinstance(node[item], tuple): # Process as a NamedTuple
343
+ _dict[item] = _recursive_to_dict(node[item])
344
+ elif isinstance(node[item], dict):
345
+ for k in node[item]:
346
+ if isinstance(node[item][k], tuple):
347
+ node[item][k] = _recursive_to_dict(node[item][k])
348
+ _dict[item] = node[item]
349
+ else: # Process as a regular element
350
+ _dict[item] = (node[item])
351
+ return _dict
352
+
353
+
354
+ def parse_from_file(file: str):
355
+ with open(file, 'r') as f:
356
+ data = f.read()
357
+ if data:
358
+ return parse(data)
359
+
360
+
361
+ def parse(data: str):
362
+ parser = Lark(BNF, start='proto', parser='earley')
363
+ tree = parser.parse(data)
364
+ trans_tree = ProtoTransformer().transform(tree)
365
+ enums = {}
366
+ messages = {}
367
+ services = {}
368
+ imports = []
369
+ import_tree = trans_tree.find_data('import')
370
+ for tree in import_tree:
371
+ for child in tree.children:
372
+ imports.append(child.value.strip('"'))
373
+ options = {}
374
+ option_tree = trans_tree.find_data('option')
375
+ for tree in option_tree:
376
+ options[tree.children[0]] = tree.children[1].strip('"')
377
+
378
+ package = ''
379
+ package_tree = trans_tree.find_data('package')
380
+ for tree in package_tree:
381
+ package = tree.children[0]
382
+
383
+ top_data = trans_tree.find_data('topleveldef')
384
+ for top_level in top_data:
385
+ for child in top_level.children:
386
+ if isinstance(child, Message):
387
+ messages[child.name] = child
388
+ if isinstance(child, Enum):
389
+ enums[child.name] = child
390
+ if isinstance(child, Service):
391
+ services[child.name] = child
392
+ return ProtoFile(messages, enums, services, imports, options, package)
393
+
394
+
395
+ def serialize2json(data):
396
+ return json.dumps(_recursive_to_dict(parse(data)))
397
+
398
+
399
+ def serialize2json_from_file(file: str):
400
+ with open(file, 'r') as f:
401
+ data = f.read()
402
+ if data:
403
403
  return json.dumps(_recursive_to_dict(parse(data)))