structurize 2.16.2__py3-none-any.whl → 2.16.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avrotize/__init__.py +63 -63
- avrotize/__main__.py +5 -5
- avrotize/_version.py +34 -34
- avrotize/asn1toavro.py +160 -160
- avrotize/avrotize.py +152 -152
- avrotize/avrotocpp.py +483 -483
- avrotize/avrotocsharp.py +992 -992
- avrotize/avrotocsv.py +121 -121
- avrotize/avrotodatapackage.py +173 -173
- avrotize/avrotodb.py +1383 -1383
- avrotize/avrotogo.py +476 -476
- avrotize/avrotographql.py +197 -197
- avrotize/avrotoiceberg.py +210 -210
- avrotize/avrotojava.py +1023 -1023
- avrotize/avrotojs.py +250 -250
- avrotize/avrotojsons.py +481 -481
- avrotize/avrotojstruct.py +345 -345
- avrotize/avrotokusto.py +363 -363
- avrotize/avrotomd.py +137 -137
- avrotize/avrotools.py +168 -168
- avrotize/avrotoparquet.py +208 -208
- avrotize/avrotoproto.py +358 -358
- avrotize/avrotopython.py +622 -622
- avrotize/avrotorust.py +435 -435
- avrotize/avrotots.py +598 -598
- avrotize/avrotoxsd.py +344 -344
- avrotize/commands.json +2493 -2433
- avrotize/common.py +828 -828
- avrotize/constants.py +4 -4
- avrotize/csvtoavro.py +131 -131
- avrotize/datapackagetoavro.py +76 -76
- avrotize/dependency_resolver.py +348 -348
- avrotize/jsonstoavro.py +1698 -1698
- avrotize/jsonstostructure.py +2642 -2642
- avrotize/jstructtoavro.py +878 -878
- avrotize/kstructtoavro.py +93 -93
- avrotize/kustotoavro.py +455 -455
- avrotize/parquettoavro.py +157 -157
- avrotize/proto2parser.py +497 -497
- avrotize/proto3parser.py +402 -402
- avrotize/prototoavro.py +382 -382
- avrotize/structuretocsharp.py +2005 -2005
- avrotize/structuretojsons.py +498 -498
- avrotize/structuretopython.py +772 -772
- avrotize/structuretots.py +653 -0
- avrotize/xsdtoavro.py +413 -413
- structurize-2.16.6.dist-info/METADATA +107 -0
- structurize-2.16.6.dist-info/RECORD +52 -0
- {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/licenses/LICENSE +200 -200
- structurize-2.16.2.dist-info/METADATA +0 -805
- structurize-2.16.2.dist-info/RECORD +0 -51
- {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/WHEEL +0 -0
- {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/entry_points.txt +0 -0
- {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/top_level.txt +0 -0
avrotize/dependency_resolver.py
CHANGED
|
@@ -1,348 +1,348 @@
|
|
|
1
|
-
# sort the dependencies
|
|
2
|
-
|
|
3
|
-
import copy
|
|
4
|
-
from typing import List
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def adjust_resolved_dependencies(avro_schema: List[dict] | dict):
|
|
9
|
-
"""
|
|
10
|
-
After resolving dependencies, it may still be necessary to adjust them. The
|
|
11
|
-
first pass of the algorithms below does inline all dependent types, but
|
|
12
|
-
the resulting document may still have fields defined before the types they
|
|
13
|
-
depend on because of the order in which the resolution happened, which necessarily
|
|
14
|
-
re-sorts the graph. This function will recursively adjust the resolved
|
|
15
|
-
dependencies until all record types have their dependency types defined before them.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
class TreeWalker:
|
|
19
|
-
|
|
20
|
-
def __init__(self):
|
|
21
|
-
self.found_something = True
|
|
22
|
-
|
|
23
|
-
def swap_record_dependencies_above(self, current_node, record, avro_schema) -> str | None:
|
|
24
|
-
""" swap the first reference to of the record type above the record in avro_schema """
|
|
25
|
-
if isinstance(current_node, dict):
|
|
26
|
-
if 'name' in current_node and 'namespace' in current_node and 'type' in current_node and \
|
|
27
|
-
current_node['name'] == record['name'] and current_node.get('namespace','') == record.get('namespace','') and current_node['type'] == record['type']:
|
|
28
|
-
# we reached the record again. we stop here.
|
|
29
|
-
return None
|
|
30
|
-
for k, v in current_node.items():
|
|
31
|
-
if k in ['dependencies', 'unmerged_types']:
|
|
32
|
-
continue
|
|
33
|
-
if isinstance(v, (dict,list)):
|
|
34
|
-
return self.swap_record_dependencies_above(v, record, avro_schema)
|
|
35
|
-
elif isinstance(v, str):
|
|
36
|
-
if k not in ['type', 'values', 'items']:
|
|
37
|
-
continue
|
|
38
|
-
qname = record.get('namespace','')+'.'+record['name']
|
|
39
|
-
if v == qname:
|
|
40
|
-
self.found_something = True
|
|
41
|
-
current_node[k] = copy.deepcopy(record)
|
|
42
|
-
return qname
|
|
43
|
-
elif isinstance(current_node, list):
|
|
44
|
-
for item in current_node:
|
|
45
|
-
if isinstance(item, (dict,list)):
|
|
46
|
-
return self.swap_record_dependencies_above(item, record, avro_schema)
|
|
47
|
-
elif isinstance(item, str):
|
|
48
|
-
qname = record.get('namespace','')+'.'+record['name']
|
|
49
|
-
if item == qname:
|
|
50
|
-
self.found_something = True
|
|
51
|
-
idx = current_node.index(item)
|
|
52
|
-
current_node.remove(item)
|
|
53
|
-
current_node.insert(idx, copy.deepcopy(record))
|
|
54
|
-
return qname
|
|
55
|
-
return None
|
|
56
|
-
|
|
57
|
-
def walk_schema(self, current_node, avro_schema, record_list) -> str | None:
|
|
58
|
-
found_record = None
|
|
59
|
-
if isinstance(current_node, dict):
|
|
60
|
-
if 'type' in current_node and (current_node['type'] == 'record' or current_node['type'] == 'enum'):
|
|
61
|
-
current_qname = current_node.get('namespace','')+'.'+current_node.get('name','')
|
|
62
|
-
if current_qname in record_list:
|
|
63
|
-
self.found_something = True
|
|
64
|
-
return current_qname
|
|
65
|
-
record_list.append(current_qname)
|
|
66
|
-
found_record = self.swap_record_dependencies_above(avro_schema, current_node, avro_schema)
|
|
67
|
-
for k, v in current_node.items():
|
|
68
|
-
if isinstance(v, (dict,list)):
|
|
69
|
-
qname = self.walk_schema(v, avro_schema, record_list)
|
|
70
|
-
if qname:
|
|
71
|
-
self.found_something = True
|
|
72
|
-
current_node[k] = qname
|
|
73
|
-
elif isinstance(current_node, list):
|
|
74
|
-
for item in current_node:
|
|
75
|
-
qname = self.walk_schema(item, avro_schema, record_list)
|
|
76
|
-
if qname:
|
|
77
|
-
self.found_something = True
|
|
78
|
-
idx = current_node.index(item)
|
|
79
|
-
current_node.remove(item)
|
|
80
|
-
current_node.insert(idx, qname)
|
|
81
|
-
# dedupe the list
|
|
82
|
-
new_list = []
|
|
83
|
-
for item in current_node:
|
|
84
|
-
if not item in new_list:
|
|
85
|
-
new_list.append(item)
|
|
86
|
-
current_node.clear()
|
|
87
|
-
current_node.extend(new_list)
|
|
88
|
-
return found_record
|
|
89
|
-
|
|
90
|
-
# while we've got work to do
|
|
91
|
-
tree_walker = TreeWalker()
|
|
92
|
-
while True:
|
|
93
|
-
tree_walker.found_something = False
|
|
94
|
-
tree_walker.walk_schema(avro_schema, avro_schema, [])
|
|
95
|
-
if not tree_walker.found_something:
|
|
96
|
-
break
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def inline_dependencies_of(avro_schema, record):
|
|
101
|
-
""" to break circular dependencies, we will inline all dependent record """
|
|
102
|
-
for dependency in copy.deepcopy(record.get('dependencies', [])):
|
|
103
|
-
dependency_type = next((x for x in avro_schema if x['name'] == dependency or x.get('namespace','')+'.'+x['name'] == dependency), None)
|
|
104
|
-
if not dependency_type:
|
|
105
|
-
continue
|
|
106
|
-
deps = record.get('dependencies', [])
|
|
107
|
-
for field in record['fields']:
|
|
108
|
-
swap_dependency_type(avro_schema, field, dependency, dependency_type, deps, [record['namespace']+'.'+record['name']])
|
|
109
|
-
if 'dependencies' in record:
|
|
110
|
-
del record['dependencies']
|
|
111
|
-
|
|
112
|
-
adjust_resolved_dependencies(record)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
def sort_messages_by_dependencies(avro_schema):
|
|
117
|
-
"""
|
|
118
|
-
Sort the messages in avro_schema by their dependencies. Avro Schema requires
|
|
119
|
-
that type definitions must be defined before they are used. This method
|
|
120
|
-
ensures this. Types that have dependencies will be moved at the end of the list.
|
|
121
|
-
If necessary, it will also resolve circular dependencies by inlining the
|
|
122
|
-
dependent record.
|
|
123
|
-
|
|
124
|
-
The method expects all types with dependencies to have a 'dependencies' key in their
|
|
125
|
-
dict that contains a list of types that they depend on.
|
|
126
|
-
|
|
127
|
-
Args:
|
|
128
|
-
avro_schema: List of Avro schema records.
|
|
129
|
-
"""
|
|
130
|
-
|
|
131
|
-
# if all are just strings, then it is already sorted
|
|
132
|
-
if all(isinstance(record, str) for record in avro_schema):
|
|
133
|
-
return avro_schema
|
|
134
|
-
|
|
135
|
-
sorted_messages = []
|
|
136
|
-
record_stack = []
|
|
137
|
-
while avro_schema:
|
|
138
|
-
found = False
|
|
139
|
-
for record in avro_schema:
|
|
140
|
-
if not isinstance(record, dict):
|
|
141
|
-
sorted_messages.append(record)
|
|
142
|
-
avro_schema.remove(record)
|
|
143
|
-
continue
|
|
144
|
-
|
|
145
|
-
# if this record is not a dependency of any other record, it can be safely emitted now
|
|
146
|
-
#if not any(record.get('namespace','')+'.'+record.get('name') in other_record.get('dependencies', []) for other_record in [x for x in avro_schema if isinstance(x, dict) and 'name' in x]):
|
|
147
|
-
remaining_deps = [dep for dep in record['dependencies'] if not dep in [x.get('namespace','')+'.'+x.get('name','') for x in sorted_messages]] if 'dependencies' in record else []
|
|
148
|
-
if len(remaining_deps) == 0:
|
|
149
|
-
if 'dependencies' in record:
|
|
150
|
-
del record['dependencies']
|
|
151
|
-
sorted_messages.append(record)
|
|
152
|
-
avro_schema.remove(record)
|
|
153
|
-
found = True
|
|
154
|
-
|
|
155
|
-
# If there are no records without dependencies, we will grab the first
|
|
156
|
-
# record with dependencies and start resolving circular dependencies
|
|
157
|
-
if len(avro_schema) > 0 and not found:
|
|
158
|
-
found = False
|
|
159
|
-
for record in avro_schema:
|
|
160
|
-
if isinstance(record, dict) and 'dependencies' in record:
|
|
161
|
-
remaining_deps = [dep for dep in record['dependencies'] if not dep in [x.get('namespace','')+'.'+x.get('name','') for x in sorted_messages]]
|
|
162
|
-
if len(remaining_deps) > 0:
|
|
163
|
-
swap_record_dependencies(avro_schema, record, [record.get('namespace','')+'.'+record['name']], 0)
|
|
164
|
-
if 'dependencies' in record and len(record['dependencies']) == 0:
|
|
165
|
-
del record['dependencies']
|
|
166
|
-
if isinstance(record, dict) and not 'dependencies' in record:
|
|
167
|
-
found = True
|
|
168
|
-
sorted_messages.append(record)
|
|
169
|
-
if record in avro_schema:
|
|
170
|
-
avro_schema.remove(record)
|
|
171
|
-
break
|
|
172
|
-
else:
|
|
173
|
-
remaining_remaining_deps = [dep for dep in record['dependencies'] if not dep in [x.get('namespace')+'.'+x.get('name') for x in sorted_messages]]
|
|
174
|
-
found = len(remaining_deps) != len(remaining_remaining_deps)
|
|
175
|
-
if found:
|
|
176
|
-
break
|
|
177
|
-
|
|
178
|
-
if not found:
|
|
179
|
-
found = False
|
|
180
|
-
for record in avro_schema:
|
|
181
|
-
if isinstance(record, dict) and 'dependencies' in record:
|
|
182
|
-
found = True
|
|
183
|
-
record_deps = copy.deepcopy(record.get('dependencies', []))
|
|
184
|
-
inline_dependencies_of(avro_schema, record)
|
|
185
|
-
# fix the dependencies of all records that have this record as a dependency
|
|
186
|
-
for remaining_schema in avro_schema:
|
|
187
|
-
if isinstance(remaining_schema, dict) and 'dependencies' in remaining_schema and any(dep in record_deps for dep in remaining_schema['dependencies']):
|
|
188
|
-
remaining_schema['dependencies'] = [dep for dep in remaining_schema['dependencies'] if not dep in record_deps]
|
|
189
|
-
qname = record['namespace']+'.'+record['name']
|
|
190
|
-
if not qname in remaining_schema['dependencies']:
|
|
191
|
-
remaining_schema['dependencies'].append(qname)
|
|
192
|
-
break
|
|
193
|
-
|
|
194
|
-
if not found:
|
|
195
|
-
print('WARNING: There are circular dependencies in the schema, unable to resolve them: {}'.format([x['name'] for x in avro_schema if isinstance(x, dict) and 'dependencies' in x]))
|
|
196
|
-
|
|
197
|
-
adjust_resolved_dependencies(sorted_messages)
|
|
198
|
-
return sorted_messages
|
|
199
|
-
|
|
200
|
-
def swap_record_dependencies(avro_schema, record, record_stack: List[str], recursion_depth: int = 0):
|
|
201
|
-
record_stack.append(record.get('namespace', '')+'.'+record['name'])
|
|
202
|
-
if 'dependencies' in record:
|
|
203
|
-
prior_dependencies = copy.deepcopy(record['dependencies'])
|
|
204
|
-
while 'dependencies' in record and len(record['dependencies']) > 0:
|
|
205
|
-
if 'fields' in record:
|
|
206
|
-
for field in record['fields']:
|
|
207
|
-
if isinstance(field['type'], list):
|
|
208
|
-
for item in field['type'].copy():
|
|
209
|
-
sub_field = {
|
|
210
|
-
'type': item,
|
|
211
|
-
'name': field['name']
|
|
212
|
-
}
|
|
213
|
-
resolve_field_dependencies(avro_schema, record, sub_field, record_stack, recursion_depth + 1)
|
|
214
|
-
if sub_field['type'] != item:
|
|
215
|
-
idx = field['type'].index(item)
|
|
216
|
-
field['type'].remove(item)
|
|
217
|
-
field['type'].insert(idx, sub_field['type'])
|
|
218
|
-
else:
|
|
219
|
-
resolve_field_dependencies(avro_schema, record, field, record_stack, recursion_depth + 1)
|
|
220
|
-
if 'dependencies' in record and len(record['dependencies']) > 0:
|
|
221
|
-
# compare the prior dependencies to the current dependencies one-by-one. If they are the same,
|
|
222
|
-
# then we have a circular dependency.
|
|
223
|
-
if prior_dependencies == record['dependencies']:
|
|
224
|
-
print('WARNING: Unable to resolve circular dependency in {}::{} with dependencies: {}'.format(record.get('namespace',''), record['name'], record['dependencies']))
|
|
225
|
-
break
|
|
226
|
-
prior_dependencies = record['dependencies']
|
|
227
|
-
if 'dependencies' in record:
|
|
228
|
-
del record['dependencies']
|
|
229
|
-
record_stack.pop()
|
|
230
|
-
|
|
231
|
-
def resolve_field_dependencies(avro_schema, record, field, record_stack, recursion_depth: int = 0):
|
|
232
|
-
for dependency in record.get('dependencies', []):
|
|
233
|
-
dependency_type = next((x for x in avro_schema if x['name'] == dependency or x.get('namespace','')+'.'+x['name'] == dependency), None)
|
|
234
|
-
if not dependency_type and dependency in record['dependencies']:
|
|
235
|
-
record['dependencies'].remove(dependency)
|
|
236
|
-
continue
|
|
237
|
-
deps = record.get('dependencies', [])
|
|
238
|
-
if dependency_type:
|
|
239
|
-
if record['name'] != dependency and (record.get('namespace','')+'.'+record['name']) != dependency:
|
|
240
|
-
swap_dependency_type(avro_schema, field, dependency, dependency_type, deps, record_stack, recursion_depth + 1)
|
|
241
|
-
record['dependencies'] = [dep for dep in deps if dep != record['name'] and record.get('namespace','')+'.'+record['name'] != dep]
|
|
242
|
-
if len(record['dependencies']) == 0:
|
|
243
|
-
del record['dependencies']
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
def swap_dependency_type(avro_schema, field, dependency, dependency_type, dependencies, record_stack: List[str], recursion_depth: int = 0):
|
|
247
|
-
""" to break circular dependencies, we will inline the dependent record and remove the dependency """
|
|
248
|
-
if not dependency in dependencies:
|
|
249
|
-
return
|
|
250
|
-
if not dependency_type in avro_schema:
|
|
251
|
-
return
|
|
252
|
-
if record_stack and dependency in record_stack:
|
|
253
|
-
dependencies.remove(dependency)
|
|
254
|
-
return
|
|
255
|
-
|
|
256
|
-
# Replace the dependency type with the dependency_type in avro_schema.
|
|
257
|
-
if isinstance(field['type'],str) and field['type'] == dependency:
|
|
258
|
-
if dependency_type in avro_schema:
|
|
259
|
-
field['type'] = dependency_type
|
|
260
|
-
avro_schema.remove(dependency_type)
|
|
261
|
-
dependencies.remove(dependency)
|
|
262
|
-
dependencies.extend(dependency_type.get('dependencies', []))
|
|
263
|
-
if 'dependencies' in dependency_type:
|
|
264
|
-
swap_record_dependencies(avro_schema, dependency_type, record_stack, recursion_depth + 1)
|
|
265
|
-
|
|
266
|
-
# type is a Union?
|
|
267
|
-
elif isinstance(field['type'], list):
|
|
268
|
-
for field_type in field['type']:
|
|
269
|
-
if field_type == dependency:
|
|
270
|
-
if dependency_type in avro_schema:
|
|
271
|
-
index = field['type'].index(field_type)
|
|
272
|
-
field['type'].remove(field_type)
|
|
273
|
-
field['type'].insert(index, dependency_type)
|
|
274
|
-
avro_schema.remove(dependency_type)
|
|
275
|
-
if dependency in dependencies:
|
|
276
|
-
dependencies.remove(dependency)
|
|
277
|
-
dependencies.extend(dependency_type.get('dependencies', []))
|
|
278
|
-
if 'dependencies' in dependency_type:
|
|
279
|
-
swap_record_dependencies(avro_schema, dependency_type, record_stack, recursion_depth + 1)
|
|
280
|
-
for field_type in field['type']:
|
|
281
|
-
if isinstance(field_type, dict):
|
|
282
|
-
swap_dependency_type(avro_schema, field_type, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
283
|
-
elif isinstance(field['type'], dict) and 'type' in field['type']:
|
|
284
|
-
swap_dependency_type(avro_schema, field['type'], dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
285
|
-
elif field['type'] == 'array':
|
|
286
|
-
if not 'items' in field:
|
|
287
|
-
return
|
|
288
|
-
if isinstance(field['items'], list):
|
|
289
|
-
for item in field['items']:
|
|
290
|
-
if item == dependency:
|
|
291
|
-
if dependency_type in avro_schema:
|
|
292
|
-
index = field['items'].index(item)
|
|
293
|
-
field['items'].remove(item)
|
|
294
|
-
field['items'].insert(index, dependency_type)
|
|
295
|
-
avro_schema.remove(dependency_type)
|
|
296
|
-
if dependency in dependencies:
|
|
297
|
-
dependencies.remove(dependency)
|
|
298
|
-
dependencies.extend(dependency_type.get('dependencies', []))
|
|
299
|
-
if 'dependencies' in dependency_type:
|
|
300
|
-
swap_record_dependencies(avro_schema, dependency_type, record_stack)
|
|
301
|
-
for item in field['items']:
|
|
302
|
-
if isinstance(item, dict):
|
|
303
|
-
swap_dependency_type(avro_schema, item, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
304
|
-
elif field['items'] == dependency:
|
|
305
|
-
if dependency_type in avro_schema:
|
|
306
|
-
field['items'] = dependency_type
|
|
307
|
-
avro_schema.remove(dependency_type)
|
|
308
|
-
if dependency in dependencies:
|
|
309
|
-
dependencies.remove(dependency)
|
|
310
|
-
dependencies.extend(dependency_type.get('dependencies', []))
|
|
311
|
-
if 'dependencies' in dependency_type:
|
|
312
|
-
swap_record_dependencies(avro_schema, dependency_type, record_stack)
|
|
313
|
-
elif isinstance(field['items'], dict) and 'type' in field['items']:
|
|
314
|
-
swap_dependency_type(avro_schema, field['items'], dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
315
|
-
elif field['type'] == 'map':
|
|
316
|
-
if isinstance(field['values'], list):
|
|
317
|
-
for item in field['values']:
|
|
318
|
-
if item == dependency:
|
|
319
|
-
if dependency_type in avro_schema:
|
|
320
|
-
index = field['values'].index(item)
|
|
321
|
-
field['values'].remove(item)
|
|
322
|
-
field['values'].insert(index, dependency_type)
|
|
323
|
-
avro_schema.remove(dependency_type)
|
|
324
|
-
if dependency in dependencies:
|
|
325
|
-
dependencies.remove(dependency)
|
|
326
|
-
dependencies.extend(dependency_type.get('dependencies', []))
|
|
327
|
-
if 'dependencies' in dependency_type:
|
|
328
|
-
swap_record_dependencies(avro_schema, dependency_type, record_stack)
|
|
329
|
-
for item in field['values']:
|
|
330
|
-
if isinstance(item, dict):
|
|
331
|
-
swap_dependency_type(avro_schema, item, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
332
|
-
if field['values'] == dependency:
|
|
333
|
-
if dependency_type in avro_schema:
|
|
334
|
-
field['values'] = dependency_type
|
|
335
|
-
avro_schema.remove(dependency_type)
|
|
336
|
-
if dependency in dependencies:
|
|
337
|
-
dependencies.remove(dependency)
|
|
338
|
-
dependencies.extend(dependency_type.get('dependencies', []))
|
|
339
|
-
if 'dependencies' in dependency_type:
|
|
340
|
-
swap_record_dependencies(avro_schema, dependency_type, record_stack)
|
|
341
|
-
elif 'type' in field['values']:
|
|
342
|
-
swap_dependency_type(avro_schema, field['values'], dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
343
|
-
elif field['type'] == 'record':
|
|
344
|
-
record_stack.append(field.get('namespace', '')+'.'+field['name'])
|
|
345
|
-
for dep_field in field['fields']:
|
|
346
|
-
if isinstance(dep_field, dict):
|
|
347
|
-
swap_dependency_type(avro_schema, dep_field, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
348
|
-
record_stack.pop()
|
|
1
|
+
# sort the dependencies
|
|
2
|
+
|
|
3
|
+
import copy
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def adjust_resolved_dependencies(avro_schema: List[dict] | dict):
|
|
9
|
+
"""
|
|
10
|
+
After resolving dependencies, it may still be necessary to adjust them. The
|
|
11
|
+
first pass of the algorithms below does inline all dependent types, but
|
|
12
|
+
the resulting document may still have fields defined before the types they
|
|
13
|
+
depend on because of the order in which the resolution happened, which necessarily
|
|
14
|
+
re-sorts the graph. This function will recursively adjust the resolved
|
|
15
|
+
dependencies until all record types have their dependency types defined before them.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
class TreeWalker:
|
|
19
|
+
|
|
20
|
+
def __init__(self):
|
|
21
|
+
self.found_something = True
|
|
22
|
+
|
|
23
|
+
def swap_record_dependencies_above(self, current_node, record, avro_schema) -> str | None:
|
|
24
|
+
""" swap the first reference to of the record type above the record in avro_schema """
|
|
25
|
+
if isinstance(current_node, dict):
|
|
26
|
+
if 'name' in current_node and 'namespace' in current_node and 'type' in current_node and \
|
|
27
|
+
current_node['name'] == record['name'] and current_node.get('namespace','') == record.get('namespace','') and current_node['type'] == record['type']:
|
|
28
|
+
# we reached the record again. we stop here.
|
|
29
|
+
return None
|
|
30
|
+
for k, v in current_node.items():
|
|
31
|
+
if k in ['dependencies', 'unmerged_types']:
|
|
32
|
+
continue
|
|
33
|
+
if isinstance(v, (dict,list)):
|
|
34
|
+
return self.swap_record_dependencies_above(v, record, avro_schema)
|
|
35
|
+
elif isinstance(v, str):
|
|
36
|
+
if k not in ['type', 'values', 'items']:
|
|
37
|
+
continue
|
|
38
|
+
qname = record.get('namespace','')+'.'+record['name']
|
|
39
|
+
if v == qname:
|
|
40
|
+
self.found_something = True
|
|
41
|
+
current_node[k] = copy.deepcopy(record)
|
|
42
|
+
return qname
|
|
43
|
+
elif isinstance(current_node, list):
|
|
44
|
+
for item in current_node:
|
|
45
|
+
if isinstance(item, (dict,list)):
|
|
46
|
+
return self.swap_record_dependencies_above(item, record, avro_schema)
|
|
47
|
+
elif isinstance(item, str):
|
|
48
|
+
qname = record.get('namespace','')+'.'+record['name']
|
|
49
|
+
if item == qname:
|
|
50
|
+
self.found_something = True
|
|
51
|
+
idx = current_node.index(item)
|
|
52
|
+
current_node.remove(item)
|
|
53
|
+
current_node.insert(idx, copy.deepcopy(record))
|
|
54
|
+
return qname
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
def walk_schema(self, current_node, avro_schema, record_list) -> str | None:
|
|
58
|
+
found_record = None
|
|
59
|
+
if isinstance(current_node, dict):
|
|
60
|
+
if 'type' in current_node and (current_node['type'] == 'record' or current_node['type'] == 'enum'):
|
|
61
|
+
current_qname = current_node.get('namespace','')+'.'+current_node.get('name','')
|
|
62
|
+
if current_qname in record_list:
|
|
63
|
+
self.found_something = True
|
|
64
|
+
return current_qname
|
|
65
|
+
record_list.append(current_qname)
|
|
66
|
+
found_record = self.swap_record_dependencies_above(avro_schema, current_node, avro_schema)
|
|
67
|
+
for k, v in current_node.items():
|
|
68
|
+
if isinstance(v, (dict,list)):
|
|
69
|
+
qname = self.walk_schema(v, avro_schema, record_list)
|
|
70
|
+
if qname:
|
|
71
|
+
self.found_something = True
|
|
72
|
+
current_node[k] = qname
|
|
73
|
+
elif isinstance(current_node, list):
|
|
74
|
+
for item in current_node:
|
|
75
|
+
qname = self.walk_schema(item, avro_schema, record_list)
|
|
76
|
+
if qname:
|
|
77
|
+
self.found_something = True
|
|
78
|
+
idx = current_node.index(item)
|
|
79
|
+
current_node.remove(item)
|
|
80
|
+
current_node.insert(idx, qname)
|
|
81
|
+
# dedupe the list
|
|
82
|
+
new_list = []
|
|
83
|
+
for item in current_node:
|
|
84
|
+
if not item in new_list:
|
|
85
|
+
new_list.append(item)
|
|
86
|
+
current_node.clear()
|
|
87
|
+
current_node.extend(new_list)
|
|
88
|
+
return found_record
|
|
89
|
+
|
|
90
|
+
# while we've got work to do
|
|
91
|
+
tree_walker = TreeWalker()
|
|
92
|
+
while True:
|
|
93
|
+
tree_walker.found_something = False
|
|
94
|
+
tree_walker.walk_schema(avro_schema, avro_schema, [])
|
|
95
|
+
if not tree_walker.found_something:
|
|
96
|
+
break
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def inline_dependencies_of(avro_schema, record):
|
|
101
|
+
""" to break circular dependencies, we will inline all dependent record """
|
|
102
|
+
for dependency in copy.deepcopy(record.get('dependencies', [])):
|
|
103
|
+
dependency_type = next((x for x in avro_schema if x['name'] == dependency or x.get('namespace','')+'.'+x['name'] == dependency), None)
|
|
104
|
+
if not dependency_type:
|
|
105
|
+
continue
|
|
106
|
+
deps = record.get('dependencies', [])
|
|
107
|
+
for field in record['fields']:
|
|
108
|
+
swap_dependency_type(avro_schema, field, dependency, dependency_type, deps, [record['namespace']+'.'+record['name']])
|
|
109
|
+
if 'dependencies' in record:
|
|
110
|
+
del record['dependencies']
|
|
111
|
+
|
|
112
|
+
adjust_resolved_dependencies(record)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def sort_messages_by_dependencies(avro_schema):
|
|
117
|
+
"""
|
|
118
|
+
Sort the messages in avro_schema by their dependencies. Avro Schema requires
|
|
119
|
+
that type definitions must be defined before they are used. This method
|
|
120
|
+
ensures this. Types that have dependencies will be moved at the end of the list.
|
|
121
|
+
If necessary, it will also resolve circular dependencies by inlining the
|
|
122
|
+
dependent record.
|
|
123
|
+
|
|
124
|
+
The method expects all types with dependencies to have a 'dependencies' key in their
|
|
125
|
+
dict that contains a list of types that they depend on.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
avro_schema: List of Avro schema records.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
# if all are just strings, then it is already sorted
|
|
132
|
+
if all(isinstance(record, str) for record in avro_schema):
|
|
133
|
+
return avro_schema
|
|
134
|
+
|
|
135
|
+
sorted_messages = []
|
|
136
|
+
record_stack = []
|
|
137
|
+
while avro_schema:
|
|
138
|
+
found = False
|
|
139
|
+
for record in avro_schema:
|
|
140
|
+
if not isinstance(record, dict):
|
|
141
|
+
sorted_messages.append(record)
|
|
142
|
+
avro_schema.remove(record)
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
# if this record is not a dependency of any other record, it can be safely emitted now
|
|
146
|
+
#if not any(record.get('namespace','')+'.'+record.get('name') in other_record.get('dependencies', []) for other_record in [x for x in avro_schema if isinstance(x, dict) and 'name' in x]):
|
|
147
|
+
remaining_deps = [dep for dep in record['dependencies'] if not dep in [x.get('namespace','')+'.'+x.get('name','') for x in sorted_messages]] if 'dependencies' in record else []
|
|
148
|
+
if len(remaining_deps) == 0:
|
|
149
|
+
if 'dependencies' in record:
|
|
150
|
+
del record['dependencies']
|
|
151
|
+
sorted_messages.append(record)
|
|
152
|
+
avro_schema.remove(record)
|
|
153
|
+
found = True
|
|
154
|
+
|
|
155
|
+
# If there are no records without dependencies, we will grab the first
|
|
156
|
+
# record with dependencies and start resolving circular dependencies
|
|
157
|
+
if len(avro_schema) > 0 and not found:
|
|
158
|
+
found = False
|
|
159
|
+
for record in avro_schema:
|
|
160
|
+
if isinstance(record, dict) and 'dependencies' in record:
|
|
161
|
+
remaining_deps = [dep for dep in record['dependencies'] if not dep in [x.get('namespace','')+'.'+x.get('name','') for x in sorted_messages]]
|
|
162
|
+
if len(remaining_deps) > 0:
|
|
163
|
+
swap_record_dependencies(avro_schema, record, [record.get('namespace','')+'.'+record['name']], 0)
|
|
164
|
+
if 'dependencies' in record and len(record['dependencies']) == 0:
|
|
165
|
+
del record['dependencies']
|
|
166
|
+
if isinstance(record, dict) and not 'dependencies' in record:
|
|
167
|
+
found = True
|
|
168
|
+
sorted_messages.append(record)
|
|
169
|
+
if record in avro_schema:
|
|
170
|
+
avro_schema.remove(record)
|
|
171
|
+
break
|
|
172
|
+
else:
|
|
173
|
+
remaining_remaining_deps = [dep for dep in record['dependencies'] if not dep in [x.get('namespace')+'.'+x.get('name') for x in sorted_messages]]
|
|
174
|
+
found = len(remaining_deps) != len(remaining_remaining_deps)
|
|
175
|
+
if found:
|
|
176
|
+
break
|
|
177
|
+
|
|
178
|
+
if not found:
|
|
179
|
+
found = False
|
|
180
|
+
for record in avro_schema:
|
|
181
|
+
if isinstance(record, dict) and 'dependencies' in record:
|
|
182
|
+
found = True
|
|
183
|
+
record_deps = copy.deepcopy(record.get('dependencies', []))
|
|
184
|
+
inline_dependencies_of(avro_schema, record)
|
|
185
|
+
# fix the dependencies of all records that have this record as a dependency
|
|
186
|
+
for remaining_schema in avro_schema:
|
|
187
|
+
if isinstance(remaining_schema, dict) and 'dependencies' in remaining_schema and any(dep in record_deps for dep in remaining_schema['dependencies']):
|
|
188
|
+
remaining_schema['dependencies'] = [dep for dep in remaining_schema['dependencies'] if not dep in record_deps]
|
|
189
|
+
qname = record['namespace']+'.'+record['name']
|
|
190
|
+
if not qname in remaining_schema['dependencies']:
|
|
191
|
+
remaining_schema['dependencies'].append(qname)
|
|
192
|
+
break
|
|
193
|
+
|
|
194
|
+
if not found:
|
|
195
|
+
print('WARNING: There are circular dependencies in the schema, unable to resolve them: {}'.format([x['name'] for x in avro_schema if isinstance(x, dict) and 'dependencies' in x]))
|
|
196
|
+
|
|
197
|
+
adjust_resolved_dependencies(sorted_messages)
|
|
198
|
+
return sorted_messages
|
|
199
|
+
|
|
200
|
+
def swap_record_dependencies(avro_schema, record, record_stack: List[str], recursion_depth: int = 0):
|
|
201
|
+
record_stack.append(record.get('namespace', '')+'.'+record['name'])
|
|
202
|
+
if 'dependencies' in record:
|
|
203
|
+
prior_dependencies = copy.deepcopy(record['dependencies'])
|
|
204
|
+
while 'dependencies' in record and len(record['dependencies']) > 0:
|
|
205
|
+
if 'fields' in record:
|
|
206
|
+
for field in record['fields']:
|
|
207
|
+
if isinstance(field['type'], list):
|
|
208
|
+
for item in field['type'].copy():
|
|
209
|
+
sub_field = {
|
|
210
|
+
'type': item,
|
|
211
|
+
'name': field['name']
|
|
212
|
+
}
|
|
213
|
+
resolve_field_dependencies(avro_schema, record, sub_field, record_stack, recursion_depth + 1)
|
|
214
|
+
if sub_field['type'] != item:
|
|
215
|
+
idx = field['type'].index(item)
|
|
216
|
+
field['type'].remove(item)
|
|
217
|
+
field['type'].insert(idx, sub_field['type'])
|
|
218
|
+
else:
|
|
219
|
+
resolve_field_dependencies(avro_schema, record, field, record_stack, recursion_depth + 1)
|
|
220
|
+
if 'dependencies' in record and len(record['dependencies']) > 0:
|
|
221
|
+
# compare the prior dependencies to the current dependencies one-by-one. If they are the same,
|
|
222
|
+
# then we have a circular dependency.
|
|
223
|
+
if prior_dependencies == record['dependencies']:
|
|
224
|
+
print('WARNING: Unable to resolve circular dependency in {}::{} with dependencies: {}'.format(record.get('namespace',''), record['name'], record['dependencies']))
|
|
225
|
+
break
|
|
226
|
+
prior_dependencies = record['dependencies']
|
|
227
|
+
if 'dependencies' in record:
|
|
228
|
+
del record['dependencies']
|
|
229
|
+
record_stack.pop()
|
|
230
|
+
|
|
231
|
+
def resolve_field_dependencies(avro_schema, record, field, record_stack, recursion_depth: int = 0):
|
|
232
|
+
for dependency in record.get('dependencies', []):
|
|
233
|
+
dependency_type = next((x for x in avro_schema if x['name'] == dependency or x.get('namespace','')+'.'+x['name'] == dependency), None)
|
|
234
|
+
if not dependency_type and dependency in record['dependencies']:
|
|
235
|
+
record['dependencies'].remove(dependency)
|
|
236
|
+
continue
|
|
237
|
+
deps = record.get('dependencies', [])
|
|
238
|
+
if dependency_type:
|
|
239
|
+
if record['name'] != dependency and (record.get('namespace','')+'.'+record['name']) != dependency:
|
|
240
|
+
swap_dependency_type(avro_schema, field, dependency, dependency_type, deps, record_stack, recursion_depth + 1)
|
|
241
|
+
record['dependencies'] = [dep for dep in deps if dep != record['name'] and record.get('namespace','')+'.'+record['name'] != dep]
|
|
242
|
+
if len(record['dependencies']) == 0:
|
|
243
|
+
del record['dependencies']
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def swap_dependency_type(avro_schema, field, dependency, dependency_type, dependencies, record_stack: List[str], recursion_depth: int = 0):
|
|
247
|
+
""" to break circular dependencies, we will inline the dependent record and remove the dependency """
|
|
248
|
+
if not dependency in dependencies:
|
|
249
|
+
return
|
|
250
|
+
if not dependency_type in avro_schema:
|
|
251
|
+
return
|
|
252
|
+
if record_stack and dependency in record_stack:
|
|
253
|
+
dependencies.remove(dependency)
|
|
254
|
+
return
|
|
255
|
+
|
|
256
|
+
# Replace the dependency type with the dependency_type in avro_schema.
|
|
257
|
+
if isinstance(field['type'],str) and field['type'] == dependency:
|
|
258
|
+
if dependency_type in avro_schema:
|
|
259
|
+
field['type'] = dependency_type
|
|
260
|
+
avro_schema.remove(dependency_type)
|
|
261
|
+
dependencies.remove(dependency)
|
|
262
|
+
dependencies.extend(dependency_type.get('dependencies', []))
|
|
263
|
+
if 'dependencies' in dependency_type:
|
|
264
|
+
swap_record_dependencies(avro_schema, dependency_type, record_stack, recursion_depth + 1)
|
|
265
|
+
|
|
266
|
+
# type is a Union?
|
|
267
|
+
elif isinstance(field['type'], list):
|
|
268
|
+
for field_type in field['type']:
|
|
269
|
+
if field_type == dependency:
|
|
270
|
+
if dependency_type in avro_schema:
|
|
271
|
+
index = field['type'].index(field_type)
|
|
272
|
+
field['type'].remove(field_type)
|
|
273
|
+
field['type'].insert(index, dependency_type)
|
|
274
|
+
avro_schema.remove(dependency_type)
|
|
275
|
+
if dependency in dependencies:
|
|
276
|
+
dependencies.remove(dependency)
|
|
277
|
+
dependencies.extend(dependency_type.get('dependencies', []))
|
|
278
|
+
if 'dependencies' in dependency_type:
|
|
279
|
+
swap_record_dependencies(avro_schema, dependency_type, record_stack, recursion_depth + 1)
|
|
280
|
+
for field_type in field['type']:
|
|
281
|
+
if isinstance(field_type, dict):
|
|
282
|
+
swap_dependency_type(avro_schema, field_type, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
283
|
+
elif isinstance(field['type'], dict) and 'type' in field['type']:
|
|
284
|
+
swap_dependency_type(avro_schema, field['type'], dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
285
|
+
elif field['type'] == 'array':
|
|
286
|
+
if not 'items' in field:
|
|
287
|
+
return
|
|
288
|
+
if isinstance(field['items'], list):
|
|
289
|
+
for item in field['items']:
|
|
290
|
+
if item == dependency:
|
|
291
|
+
if dependency_type in avro_schema:
|
|
292
|
+
index = field['items'].index(item)
|
|
293
|
+
field['items'].remove(item)
|
|
294
|
+
field['items'].insert(index, dependency_type)
|
|
295
|
+
avro_schema.remove(dependency_type)
|
|
296
|
+
if dependency in dependencies:
|
|
297
|
+
dependencies.remove(dependency)
|
|
298
|
+
dependencies.extend(dependency_type.get('dependencies', []))
|
|
299
|
+
if 'dependencies' in dependency_type:
|
|
300
|
+
swap_record_dependencies(avro_schema, dependency_type, record_stack)
|
|
301
|
+
for item in field['items']:
|
|
302
|
+
if isinstance(item, dict):
|
|
303
|
+
swap_dependency_type(avro_schema, item, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
304
|
+
elif field['items'] == dependency:
|
|
305
|
+
if dependency_type in avro_schema:
|
|
306
|
+
field['items'] = dependency_type
|
|
307
|
+
avro_schema.remove(dependency_type)
|
|
308
|
+
if dependency in dependencies:
|
|
309
|
+
dependencies.remove(dependency)
|
|
310
|
+
dependencies.extend(dependency_type.get('dependencies', []))
|
|
311
|
+
if 'dependencies' in dependency_type:
|
|
312
|
+
swap_record_dependencies(avro_schema, dependency_type, record_stack)
|
|
313
|
+
elif isinstance(field['items'], dict) and 'type' in field['items']:
|
|
314
|
+
swap_dependency_type(avro_schema, field['items'], dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
315
|
+
elif field['type'] == 'map':
|
|
316
|
+
if isinstance(field['values'], list):
|
|
317
|
+
for item in field['values']:
|
|
318
|
+
if item == dependency:
|
|
319
|
+
if dependency_type in avro_schema:
|
|
320
|
+
index = field['values'].index(item)
|
|
321
|
+
field['values'].remove(item)
|
|
322
|
+
field['values'].insert(index, dependency_type)
|
|
323
|
+
avro_schema.remove(dependency_type)
|
|
324
|
+
if dependency in dependencies:
|
|
325
|
+
dependencies.remove(dependency)
|
|
326
|
+
dependencies.extend(dependency_type.get('dependencies', []))
|
|
327
|
+
if 'dependencies' in dependency_type:
|
|
328
|
+
swap_record_dependencies(avro_schema, dependency_type, record_stack)
|
|
329
|
+
for item in field['values']:
|
|
330
|
+
if isinstance(item, dict):
|
|
331
|
+
swap_dependency_type(avro_schema, item, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
332
|
+
if field['values'] == dependency:
|
|
333
|
+
if dependency_type in avro_schema:
|
|
334
|
+
field['values'] = dependency_type
|
|
335
|
+
avro_schema.remove(dependency_type)
|
|
336
|
+
if dependency in dependencies:
|
|
337
|
+
dependencies.remove(dependency)
|
|
338
|
+
dependencies.extend(dependency_type.get('dependencies', []))
|
|
339
|
+
if 'dependencies' in dependency_type:
|
|
340
|
+
swap_record_dependencies(avro_schema, dependency_type, record_stack)
|
|
341
|
+
elif 'type' in field['values']:
|
|
342
|
+
swap_dependency_type(avro_schema, field['values'], dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
343
|
+
elif field['type'] == 'record':
|
|
344
|
+
record_stack.append(field.get('namespace', '')+'.'+field['name'])
|
|
345
|
+
for dep_field in field['fields']:
|
|
346
|
+
if isinstance(dep_field, dict):
|
|
347
|
+
swap_dependency_type(avro_schema, dep_field, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
|
|
348
|
+
record_stack.pop()
|