structurize 2.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. avrotize/__init__.py +63 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp.py +483 -0
  7. avrotize/avrotocsharp.py +992 -0
  8. avrotize/avrotocsv.py +121 -0
  9. avrotize/avrotodatapackage.py +173 -0
  10. avrotize/avrotodb.py +1383 -0
  11. avrotize/avrotogo.py +476 -0
  12. avrotize/avrotographql.py +197 -0
  13. avrotize/avrotoiceberg.py +210 -0
  14. avrotize/avrotojava.py +1023 -0
  15. avrotize/avrotojs.py +250 -0
  16. avrotize/avrotojsons.py +481 -0
  17. avrotize/avrotojstruct.py +345 -0
  18. avrotize/avrotokusto.py +364 -0
  19. avrotize/avrotomd.py +137 -0
  20. avrotize/avrotools.py +168 -0
  21. avrotize/avrotoparquet.py +208 -0
  22. avrotize/avrotoproto.py +359 -0
  23. avrotize/avrotopython.py +622 -0
  24. avrotize/avrotorust.py +435 -0
  25. avrotize/avrotots.py +598 -0
  26. avrotize/avrotoxsd.py +344 -0
  27. avrotize/commands.json +2433 -0
  28. avrotize/common.py +829 -0
  29. avrotize/constants.py +5 -0
  30. avrotize/csvtoavro.py +132 -0
  31. avrotize/datapackagetoavro.py +76 -0
  32. avrotize/dependency_resolver.py +348 -0
  33. avrotize/jsonstoavro.py +1698 -0
  34. avrotize/jsonstostructure.py +2642 -0
  35. avrotize/jstructtoavro.py +878 -0
  36. avrotize/kstructtoavro.py +93 -0
  37. avrotize/kustotoavro.py +455 -0
  38. avrotize/parquettoavro.py +157 -0
  39. avrotize/proto2parser.py +498 -0
  40. avrotize/proto3parser.py +403 -0
  41. avrotize/prototoavro.py +382 -0
  42. avrotize/structuretocsharp.py +2005 -0
  43. avrotize/structuretojsons.py +498 -0
  44. avrotize/structuretopython.py +772 -0
  45. avrotize/xsdtoavro.py +413 -0
  46. structurize-2.16.2.dist-info/METADATA +805 -0
  47. structurize-2.16.2.dist-info/RECORD +51 -0
  48. structurize-2.16.2.dist-info/WHEEL +5 -0
  49. structurize-2.16.2.dist-info/entry_points.txt +2 -0
  50. structurize-2.16.2.dist-info/licenses/LICENSE +201 -0
  51. structurize-2.16.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,403 @@
1
+ #!/usr/bin/env python
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+ #
19
+ # adapted from https://github.com/khadgarmage/protoparser
20
+
21
+ from lark import Lark, Transformer, Tree, Token
22
+ from collections import namedtuple
23
+ import typing
24
+ import json
25
+
26
+ BNF = r'''
27
+ OCTALDIGIT: "0..7"
28
+ IDENT: ( "_" )* LETTER ( LETTER | DECIMALDIGIT | "_" )*
29
+ FULLIDENT: IDENT ( "." IDENT )*
30
+ MESSAGENAME: IDENT
31
+ ENUMNAME: IDENT
32
+ FIELDNAME: IDENT
33
+ ONEOFNAME: IDENT
34
+ MAPNAME: IDENT
35
+ SERVICENAME: IDENT
36
+ TAGNAME: IDENT
37
+ TAGVALUE: IDENT
38
+ RPCNAME: IDENT
39
+ MESSAGETYPE: [ "." ] ( IDENT "." )* MESSAGENAME
40
+ ENUMTYPE: [ "." ] ( IDENT "." )* ENUMNAME
41
+
42
+ INTLIT : [ "-" ] DECIMALLIT | OCTALLIT | HEXLIT
43
+ DECIMALLIT: ( "1".."9" ) ( DECIMALDIGIT )*
44
+ OCTALLIT : "0" ( OCTALDIGIT )*
45
+ HEXLIT : "0" ( "x" | "X" ) HEXDIGIT ( HEXDIGIT )*
46
+
47
+ FLOATLIT: ( DECIMALS "." [ DECIMALS ] [ EXPONENT ] | DECIMALS EXPONENT | "."DECIMALS [ EXPONENT ] ) | "inf" | "nan"
48
+ DECIMALS : DECIMALDIGIT ( DECIMALDIGIT )*
49
+ EXPONENT : ( "e" | "E" ) [ "+" | "-" ] DECIMALS
50
+
51
+ BOOLLIT: "true" | "false"
52
+
53
+ STRLIT: ( "'" ( CHARVALUE )* "'" ) | ( "\"" ( CHARVALUE )* "\"" )
54
+ CHARVALUE: HEXESCAPE | OCTESCAPE | CHARESCAPE | /[^\0\n\\]/
55
+ HEXESCAPE: "\\" ( "x" | "X" ) HEXDIGIT HEXDIGIT
56
+ OCTESCAPE: "\\" OCTALDIGIT OCTALDIGIT OCTALDIGIT
57
+ CHARESCAPE: "\\" ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | "\\" | "'" | "\"" )
58
+ QUOTE: "'" | "\""
59
+
60
+ EMPTYSTATEMENT: ";"
61
+
62
+ CONSTANT: FULLIDENT | ( [ "-" | "+" ] INTLIT ) | ( [ "-" | "+" ] FLOATLIT ) | STRLIT | BOOLLIT
63
+
64
+ VERSION: "proto3"
65
+
66
+ syntax: "syntax" "=" QUOTE VERSION QUOTE ";"
67
+
68
+ import: "import" [ "weak" | "public" ] STRLIT ";"
69
+
70
+ package: "package" FULLIDENT ";"
71
+
72
+ option: [ COMMENTS ] "option" OPTIONNAME "=" CONSTANT ";"
73
+ OPTIONNAME: ( IDENT | "(" FULLIDENT ")" ) ( "." IDENT )*
74
+
75
+ TYPE: "double" | "float" | "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string" | "bytes" | MESSAGETYPE | ENUMTYPE
76
+ FIELDNUMBER: INTLIT
77
+
78
+ LABEL: "optional" | "repeated"
79
+ field: [ comments ] [ LABEL ] TYPE FIELDNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] TAIL
80
+ fieldoptions: fieldoption ( "," fieldoption )*
81
+ fieldoption: OPTIONNAME "=" CONSTANT
82
+
83
+ oneof: [ comments ] "oneof" ONEOFNAME "{" ( oneoffield | EMPTYSTATEMENT )* "}"
84
+ oneoffield: [ comments ] TYPE FIELDNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] ";"
85
+
86
+ mapfield: [ comments ] "map" "<" KEYTYPE "," TYPE ">" MAPNAME "=" FIELDNUMBER [ "[" fieldoptions "]" ] TAIL
87
+ KEYTYPE: "int32" | "int64" | "uint32" | "uint64" | "sint32" | "sint64" | "fixed32" | "fixed64" | "sfixed32" | "sfixed64" | "bool" | "string"
88
+
89
+ reserved: "reserved" ( ranges | fieldnames ) ";"
90
+ ranges: range ( "," range )*
91
+ range: INTLIT [ "to" ( INTLIT | "max" ) ]
92
+ fieldnames: FIELDNAME ( "," FIELDNAME )*
93
+
94
+ enum: [ comments ] "enum" ENUMNAME enumbody
95
+ enumbody: "{" ( option | enumfield | EMPTYSTATEMENT )* [ COMMENTS ] "}"
96
+ enumfield: [ COMMENTS ] IDENT "=" INTLIT [ "[" enumvalueoption ( "," enumvalueoption )* "]" ] TAIL
97
+ enumvalueoption: OPTIONNAME "=" CONSTANT
98
+
99
+ message: [ comments ] "message" MESSAGENAME messagebody
100
+ messagebody: "{" ( field | enum | message | option | oneof | mapfield | reserved | EMPTYSTATEMENT )* "}"
101
+
102
+ googleoption: "option" "(google.api.http)" "=" "{" [ "post:" CONSTANT [ "body:" CONSTANT ] ] "}" ";"
103
+ service: [ comments ] "service" SERVICENAME "{" ( option | rpc | EMPTYSTATEMENT )* "}"
104
+ rpc: [ comments ] "rpc" RPCNAME "(" [ "stream" ] MESSAGETYPE ")" "returns" "(" [ "stream" ] MESSAGETYPE ")" ( ( "{" ( googleoption | option | EMPTYSTATEMENT )* "}" ) | ";" )
105
+
106
+ proto: [ comments ] syntax ( import | package | option | topleveldef | EMPTYSTATEMENT )*
107
+ topleveldef: message | enum | service | comments
108
+
109
+ TAIL: ";" /[\s|\t]/* [ COMMENT ] [ NEWLINE ]
110
+ COMMENT: "//" /[^\n]/*
111
+ BLOCKCOMMENT: "/*" /./* "*/"
112
+ comments: (( COMMENT | BLOCKCOMMENT ) NEWLINE )+
113
+ COMMENTS: (( COMMENT | BLOCKCOMMENT ) NEWLINE )+
114
+
115
+ %import common.HEXDIGIT
116
+ %import common.DIGIT -> DECIMALDIGIT
117
+ %import common.LETTER
118
+ %import common.WS
119
+ %import common.NEWLINE
120
+ %ignore WS
121
+ '''
122
+
123
+ Comment = typing.NamedTuple('Comment', [('content', str), ('tags', typing.Dict[str, typing.Any])])
124
+ Oneof = typing.NamedTuple('Oneof', [('comment', 'Comment'), ('name', str), ('fields', typing.List['Field'])])
125
+ Field = typing.NamedTuple('Field', [('comment', 'Comment'), ('label', str), ('type', str), ('key_type', str), ('val_type', str), ('name', str), ('number', int)])
126
+ Enum = typing.NamedTuple('Enum', [('comment', 'Comment'), ('name', str), ('fields', typing.Dict[str, 'Field'])])
127
+ Message = typing.NamedTuple('Message', [('comment', 'Comment'), ('name', str), ('fields', typing.List['Field']), ('oneofs', typing.List['Oneof']),
128
+ ('messages', typing.Dict[str, 'Message']), ('enums', typing.Dict[str, 'Enum'])])
129
+ Service = typing.NamedTuple('Service', [('name', str), ('functions', typing.Dict[str, 'RpcFunc'])])
130
+ RpcFunc = typing.NamedTuple('RpcFunc', [('name', str), ('in_type', str), ('out_type', str), ('uri', str)])
131
+ ProtoFile = typing.NamedTuple('ProtoFile',
132
+ [('messages', typing.Dict[str, 'Message']), ('enums', typing.Dict[str, 'Enum']),
133
+ ('services', typing.Dict[str, 'Service']), ('imports', typing.List[str]),
134
+ ('options', typing.Dict[str, str]), ('package', str)])
135
+
136
+
137
+ class ProtoTransformer(Transformer):
138
+ '''Converts syntax tree token into more easily usable namedtuple objects'''
139
+
140
+ def message(self, tokens):
141
+ '''Returns a Message namedtuple'''
142
+ comment = Comment("", {})
143
+ if len(tokens) < 3:
144
+ name_token, body = tokens
145
+ else:
146
+ comment, name_token, body = tokens
147
+ return Message(comment, name_token.value, *body)
148
+
149
+ def oneof(self, tokens):
150
+ '''Returns a Oneof namedtuple'''
151
+ comment = Comment("", {})
152
+ fields = []
153
+ name = None
154
+ for token in tokens:
155
+ if isinstance(token, Comment):
156
+ comment = token
157
+ elif isinstance(token, Field):
158
+ fields.append(token)
159
+ elif isinstance(token, Token) and token.type == 'ONEOFNAME':
160
+ name = token.value
161
+ return Oneof(comment, name, fields)
162
+
163
+ def oneoffield(self, tokens):
164
+ '''Returns a Field namedtuple'''
165
+ comment = Comment("", {})
166
+ type = Token("TYPE", "")
167
+ fieldname = Token("FIELDNAME", "")
168
+ fieldnumber = Token("FIELDNUMBER", "")
169
+ for token in tokens:
170
+ if isinstance(token, Comment):
171
+ comment = token
172
+ elif isinstance(token, Token):
173
+ if token.type == "TYPE":
174
+ type = token
175
+ elif token.type == "FIELDNAME":
176
+ fieldname = token
177
+ elif token.type == "FIELDNUMBER":
178
+ fieldnumber = token
179
+ elif token.type == "COMMENT":
180
+ comment = Comment(token.value, {})
181
+ return Field(comment, '', type.value, type.value, type.value, fieldname.value, int(fieldnumber.value))
182
+
183
+ def messagebody(self, items):
184
+ '''Returns a tuple of message body namedtuples'''
185
+ messages = {}
186
+ enums = {}
187
+ fields = []
188
+ oneofs = []
189
+ for item in items:
190
+ if isinstance(item, Message):
191
+ messages[item.name] = item
192
+ elif isinstance(item, Enum):
193
+ enums[item.name] = item
194
+ elif isinstance(item, Field):
195
+ fields.append(item)
196
+ elif isinstance(item, Oneof):
197
+ oneofs.append(item)
198
+ return fields, oneofs, messages, enums
199
+
200
+ def field(self, tokens):
201
+ '''Returns a Field namedtuple'''
202
+ comment = Comment("", {})
203
+ type = Token("TYPE", "")
204
+ fieldname = Token("FIELDNAME", "")
205
+ fieldnumber = Token("FIELDNUMBER", "")
206
+ label = Token("LABEL", "")
207
+ for token in tokens:
208
+ if isinstance(token, Comment):
209
+ comment = token
210
+ elif isinstance(token, Token):
211
+ if token.type == "TYPE":
212
+ type = token
213
+ elif token.type == "FIELDNAME":
214
+ fieldname = token
215
+ elif token.type == "FIELDNUMBER":
216
+ fieldnumber = token
217
+ elif token.type == "COMMENT":
218
+ comment = Comment(token.value, {})
219
+ elif token.type == "LABEL":
220
+ label = token
221
+ return Field(comment, label, type.value, type.value, type.value, fieldname.value, int(fieldnumber.value))
222
+
223
+
224
+ def mapfield(self, tokens):
225
+ '''Returns a Field namedtuple'''
226
+ comment = Comment("", {})
227
+ val_type = Token("TYPE", "")
228
+ key_type = Token("KEYTYPE", "")
229
+ fieldname = Token("MAPNAME", "")
230
+ fieldnumber = Token("FIELDNUMBER", "")
231
+ for token in tokens:
232
+ if isinstance(token, Comment):
233
+ comment = token
234
+ elif isinstance(token, Token):
235
+ if token.type == "TYPE":
236
+ val_type = token
237
+ elif token.type == "KEYTYPE":
238
+ key_type = token
239
+ elif token.type == "MAPNAME":
240
+ fieldname = token
241
+ elif token.type == "FIELDNUMBER":
242
+ fieldnumber = token
243
+ elif token.type == "COMMENT":
244
+ comment = Comment(token.value, {})
245
+ return Field(comment, '', 'map', key_type.value, val_type.value, fieldname.value, int(fieldnumber.value))
246
+
247
+ def comments(self, tokens):
248
+ '''Returns a Tag namedtuple'''
249
+ comment = ''
250
+ tags = {}
251
+ for token in tokens:
252
+ comment += token
253
+ if token.find('@') < 0:
254
+ continue
255
+ kvs = token.strip(" /\n").split('@')
256
+ for kv in kvs:
257
+ kv = kv.strip(" /\n")
258
+ if not kv:
259
+ continue
260
+ tmp = kv.split('=')
261
+ key = tmp[0].strip(" /\n").lower()
262
+ if key.find(" ") >= 0:
263
+ continue
264
+ if len(tmp) > 1:
265
+ tags[key] = tmp[1].lower()
266
+ else:
267
+ tags[key] = True
268
+ return Comment(comment, tags)
269
+
270
+ def enum(self, tokens):
271
+ '''Returns an Enum namedtuple'''
272
+ comment = Comment("", {})
273
+ if len(tokens) < 3:
274
+ name, fields = tokens
275
+ else:
276
+ comment, name, fields = tokens
277
+ return Enum(comment, name.value, fields)
278
+
279
+ def enumbody(self, tokens):
280
+ '''Returns a sequence of enum identifiers'''
281
+ enumitems = []
282
+ for tree in tokens:
283
+ if not isinstance(tree, Tree):
284
+ continue
285
+ if tree.data != 'enumfield':
286
+ continue
287
+ comment = Comment("", {})
288
+ name = Token("IDENT", "")
289
+ value = Token("INTLIT", "")
290
+ for token in tree.children:
291
+ if isinstance(token, Comment):
292
+ comment = token
293
+ elif isinstance(token, Token):
294
+ if token.type == "IDENT":
295
+ name = token
296
+ elif token.type == "INTLIT":
297
+ value = token
298
+ elif token.type == "COMMENTS":
299
+ comment = Comment(token.value, {})
300
+ enumitems.append(Field(comment, '', 'enum', 'enum', 'enum', name.value, value.value))
301
+ return enumitems
302
+
303
+ def service(self, tokens):
304
+ '''Returns a Service namedtuple'''
305
+ functions = []
306
+ name = ''
307
+ for i in range(0, len(tokens)):
308
+ if not isinstance(tokens[i], Comment):
309
+ if isinstance(tokens[i], RpcFunc):
310
+ functions.append(tokens[i])
311
+ else:
312
+ name = tokens[i].value
313
+ return Service(name, functions)
314
+
315
+ def rpc(self, tokens):
316
+ '''Returns a RpcFunc namedtuple'''
317
+ uri = ''
318
+ in_type = ''
319
+ for token in tokens:
320
+ if isinstance(token, Token):
321
+ if token.type == "RPCNAME":
322
+ name = token
323
+ elif token.type == "MESSAGETYPE":
324
+ if in_type:
325
+ out_type = token
326
+ else:
327
+ in_type = token
328
+ elif not isinstance(token, Comment):
329
+ option_token = token
330
+ uri = option_token.children[0].value
331
+ return RpcFunc(name.value, in_type.value, out_type.value, uri.strip('"'))
332
+
333
+
334
+ def _recursive_to_dict(obj):
335
+ _dict = {}
336
+
337
+ if isinstance(obj, tuple):
338
+ node = obj._asdict()
339
+ for item in node:
340
+ if isinstance(node[item], list): # Process as a list
341
+ _dict[item] = [_recursive_to_dict(x) for x in (node[item])]
342
+ elif isinstance(node[item], tuple): # Process as a NamedTuple
343
+ _dict[item] = _recursive_to_dict(node[item])
344
+ elif isinstance(node[item], dict):
345
+ for k in node[item]:
346
+ if isinstance(node[item][k], tuple):
347
+ node[item][k] = _recursive_to_dict(node[item][k])
348
+ _dict[item] = node[item]
349
+ else: # Process as a regular element
350
+ _dict[item] = (node[item])
351
+ return _dict
352
+
353
+
354
+ def parse_from_file(file: str):
355
+ with open(file, 'r') as f:
356
+ data = f.read()
357
+ if data:
358
+ return parse(data)
359
+
360
+
361
+ def parse(data: str):
362
+ parser = Lark(BNF, start='proto', parser='earley')
363
+ tree = parser.parse(data)
364
+ trans_tree = ProtoTransformer().transform(tree)
365
+ enums = {}
366
+ messages = {}
367
+ services = {}
368
+ imports = []
369
+ import_tree = trans_tree.find_data('import')
370
+ for tree in import_tree:
371
+ for child in tree.children:
372
+ imports.append(child.value.strip('"'))
373
+ options = {}
374
+ option_tree = trans_tree.find_data('option')
375
+ for tree in option_tree:
376
+ options[tree.children[0]] = tree.children[1].strip('"')
377
+
378
+ package = ''
379
+ package_tree = trans_tree.find_data('package')
380
+ for tree in package_tree:
381
+ package = tree.children[0]
382
+
383
+ top_data = trans_tree.find_data('topleveldef')
384
+ for top_level in top_data:
385
+ for child in top_level.children:
386
+ if isinstance(child, Message):
387
+ messages[child.name] = child
388
+ if isinstance(child, Enum):
389
+ enums[child.name] = child
390
+ if isinstance(child, Service):
391
+ services[child.name] = child
392
+ return ProtoFile(messages, enums, services, imports, options, package)
393
+
394
+
395
+ def serialize2json(data):
396
+ return json.dumps(_recursive_to_dict(parse(data)))
397
+
398
+
399
+ def serialize2json_from_file(file: str):
400
+ with open(file, 'r') as f:
401
+ data = f.read()
402
+ if data:
403
+ return json.dumps(_recursive_to_dict(parse(data)))