structurize 2.16.2__py3-none-any.whl → 2.16.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. avrotize/__init__.py +63 -63
  2. avrotize/__main__.py +5 -5
  3. avrotize/_version.py +34 -34
  4. avrotize/asn1toavro.py +160 -160
  5. avrotize/avrotize.py +152 -152
  6. avrotize/avrotocpp.py +483 -483
  7. avrotize/avrotocsharp.py +992 -992
  8. avrotize/avrotocsv.py +121 -121
  9. avrotize/avrotodatapackage.py +173 -173
  10. avrotize/avrotodb.py +1383 -1383
  11. avrotize/avrotogo.py +476 -476
  12. avrotize/avrotographql.py +197 -197
  13. avrotize/avrotoiceberg.py +210 -210
  14. avrotize/avrotojava.py +1023 -1023
  15. avrotize/avrotojs.py +250 -250
  16. avrotize/avrotojsons.py +481 -481
  17. avrotize/avrotojstruct.py +345 -345
  18. avrotize/avrotokusto.py +363 -363
  19. avrotize/avrotomd.py +137 -137
  20. avrotize/avrotools.py +168 -168
  21. avrotize/avrotoparquet.py +208 -208
  22. avrotize/avrotoproto.py +358 -358
  23. avrotize/avrotopython.py +622 -622
  24. avrotize/avrotorust.py +435 -435
  25. avrotize/avrotots.py +598 -598
  26. avrotize/avrotoxsd.py +344 -344
  27. avrotize/commands.json +2493 -2433
  28. avrotize/common.py +828 -828
  29. avrotize/constants.py +4 -4
  30. avrotize/csvtoavro.py +131 -131
  31. avrotize/datapackagetoavro.py +76 -76
  32. avrotize/dependency_resolver.py +348 -348
  33. avrotize/jsonstoavro.py +1698 -1698
  34. avrotize/jsonstostructure.py +2642 -2642
  35. avrotize/jstructtoavro.py +878 -878
  36. avrotize/kstructtoavro.py +93 -93
  37. avrotize/kustotoavro.py +455 -455
  38. avrotize/parquettoavro.py +157 -157
  39. avrotize/proto2parser.py +497 -497
  40. avrotize/proto3parser.py +402 -402
  41. avrotize/prototoavro.py +382 -382
  42. avrotize/structuretocsharp.py +2005 -2005
  43. avrotize/structuretojsons.py +498 -498
  44. avrotize/structuretopython.py +772 -772
  45. avrotize/structuretots.py +653 -0
  46. avrotize/xsdtoavro.py +413 -413
  47. structurize-2.16.6.dist-info/METADATA +107 -0
  48. structurize-2.16.6.dist-info/RECORD +52 -0
  49. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/licenses/LICENSE +200 -200
  50. structurize-2.16.2.dist-info/METADATA +0 -805
  51. structurize-2.16.2.dist-info/RECORD +0 -51
  52. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/WHEEL +0 -0
  53. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/entry_points.txt +0 -0
  54. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/top_level.txt +0 -0
@@ -1,348 +1,348 @@
1
- # sort the dependencies
2
-
3
- import copy
4
- from typing import List
5
-
6
-
7
-
8
- def adjust_resolved_dependencies(avro_schema: List[dict] | dict):
9
- """
10
- After resolving dependencies, it may still be necessary to adjust them. The
11
- first pass of the algorithms below does inline all dependent types, but
12
- the resulting document may still have fields defined before the types they
13
- depend on because of the order in which the resolution happened, which necessarily
14
- re-sorts the graph. This function will recursively adjust the resolved
15
- dependencies until all record types have their dependency types defined before them.
16
- """
17
-
18
- class TreeWalker:
19
-
20
- def __init__(self):
21
- self.found_something = True
22
-
23
- def swap_record_dependencies_above(self, current_node, record, avro_schema) -> str | None:
24
- """ swap the first reference to of the record type above the record in avro_schema """
25
- if isinstance(current_node, dict):
26
- if 'name' in current_node and 'namespace' in current_node and 'type' in current_node and \
27
- current_node['name'] == record['name'] and current_node.get('namespace','') == record.get('namespace','') and current_node['type'] == record['type']:
28
- # we reached the record again. we stop here.
29
- return None
30
- for k, v in current_node.items():
31
- if k in ['dependencies', 'unmerged_types']:
32
- continue
33
- if isinstance(v, (dict,list)):
34
- return self.swap_record_dependencies_above(v, record, avro_schema)
35
- elif isinstance(v, str):
36
- if k not in ['type', 'values', 'items']:
37
- continue
38
- qname = record.get('namespace','')+'.'+record['name']
39
- if v == qname:
40
- self.found_something = True
41
- current_node[k] = copy.deepcopy(record)
42
- return qname
43
- elif isinstance(current_node, list):
44
- for item in current_node:
45
- if isinstance(item, (dict,list)):
46
- return self.swap_record_dependencies_above(item, record, avro_schema)
47
- elif isinstance(item, str):
48
- qname = record.get('namespace','')+'.'+record['name']
49
- if item == qname:
50
- self.found_something = True
51
- idx = current_node.index(item)
52
- current_node.remove(item)
53
- current_node.insert(idx, copy.deepcopy(record))
54
- return qname
55
- return None
56
-
57
- def walk_schema(self, current_node, avro_schema, record_list) -> str | None:
58
- found_record = None
59
- if isinstance(current_node, dict):
60
- if 'type' in current_node and (current_node['type'] == 'record' or current_node['type'] == 'enum'):
61
- current_qname = current_node.get('namespace','')+'.'+current_node.get('name','')
62
- if current_qname in record_list:
63
- self.found_something = True
64
- return current_qname
65
- record_list.append(current_qname)
66
- found_record = self.swap_record_dependencies_above(avro_schema, current_node, avro_schema)
67
- for k, v in current_node.items():
68
- if isinstance(v, (dict,list)):
69
- qname = self.walk_schema(v, avro_schema, record_list)
70
- if qname:
71
- self.found_something = True
72
- current_node[k] = qname
73
- elif isinstance(current_node, list):
74
- for item in current_node:
75
- qname = self.walk_schema(item, avro_schema, record_list)
76
- if qname:
77
- self.found_something = True
78
- idx = current_node.index(item)
79
- current_node.remove(item)
80
- current_node.insert(idx, qname)
81
- # dedupe the list
82
- new_list = []
83
- for item in current_node:
84
- if not item in new_list:
85
- new_list.append(item)
86
- current_node.clear()
87
- current_node.extend(new_list)
88
- return found_record
89
-
90
- # while we've got work to do
91
- tree_walker = TreeWalker()
92
- while True:
93
- tree_walker.found_something = False
94
- tree_walker.walk_schema(avro_schema, avro_schema, [])
95
- if not tree_walker.found_something:
96
- break
97
-
98
-
99
-
100
- def inline_dependencies_of(avro_schema, record):
101
- """ to break circular dependencies, we will inline all dependent record """
102
- for dependency in copy.deepcopy(record.get('dependencies', [])):
103
- dependency_type = next((x for x in avro_schema if x['name'] == dependency or x.get('namespace','')+'.'+x['name'] == dependency), None)
104
- if not dependency_type:
105
- continue
106
- deps = record.get('dependencies', [])
107
- for field in record['fields']:
108
- swap_dependency_type(avro_schema, field, dependency, dependency_type, deps, [record['namespace']+'.'+record['name']])
109
- if 'dependencies' in record:
110
- del record['dependencies']
111
-
112
- adjust_resolved_dependencies(record)
113
-
114
-
115
-
116
- def sort_messages_by_dependencies(avro_schema):
117
- """
118
- Sort the messages in avro_schema by their dependencies. Avro Schema requires
119
- that type definitions must be defined before they are used. This method
120
- ensures this. Types that have dependencies will be moved at the end of the list.
121
- If necessary, it will also resolve circular dependencies by inlining the
122
- dependent record.
123
-
124
- The method expects all types with dependencies to have a 'dependencies' key in their
125
- dict that contains a list of types that they depend on.
126
-
127
- Args:
128
- avro_schema: List of Avro schema records.
129
- """
130
-
131
- # if all are just strings, then it is already sorted
132
- if all(isinstance(record, str) for record in avro_schema):
133
- return avro_schema
134
-
135
- sorted_messages = []
136
- record_stack = []
137
- while avro_schema:
138
- found = False
139
- for record in avro_schema:
140
- if not isinstance(record, dict):
141
- sorted_messages.append(record)
142
- avro_schema.remove(record)
143
- continue
144
-
145
- # if this record is not a dependency of any other record, it can be safely emitted now
146
- #if not any(record.get('namespace','')+'.'+record.get('name') in other_record.get('dependencies', []) for other_record in [x for x in avro_schema if isinstance(x, dict) and 'name' in x]):
147
- remaining_deps = [dep for dep in record['dependencies'] if not dep in [x.get('namespace','')+'.'+x.get('name','') for x in sorted_messages]] if 'dependencies' in record else []
148
- if len(remaining_deps) == 0:
149
- if 'dependencies' in record:
150
- del record['dependencies']
151
- sorted_messages.append(record)
152
- avro_schema.remove(record)
153
- found = True
154
-
155
- # If there are no records without dependencies, we will grab the first
156
- # record with dependencies and start resolving circular dependencies
157
- if len(avro_schema) > 0 and not found:
158
- found = False
159
- for record in avro_schema:
160
- if isinstance(record, dict) and 'dependencies' in record:
161
- remaining_deps = [dep for dep in record['dependencies'] if not dep in [x.get('namespace','')+'.'+x.get('name','') for x in sorted_messages]]
162
- if len(remaining_deps) > 0:
163
- swap_record_dependencies(avro_schema, record, [record.get('namespace','')+'.'+record['name']], 0)
164
- if 'dependencies' in record and len(record['dependencies']) == 0:
165
- del record['dependencies']
166
- if isinstance(record, dict) and not 'dependencies' in record:
167
- found = True
168
- sorted_messages.append(record)
169
- if record in avro_schema:
170
- avro_schema.remove(record)
171
- break
172
- else:
173
- remaining_remaining_deps = [dep for dep in record['dependencies'] if not dep in [x.get('namespace')+'.'+x.get('name') for x in sorted_messages]]
174
- found = len(remaining_deps) != len(remaining_remaining_deps)
175
- if found:
176
- break
177
-
178
- if not found:
179
- found = False
180
- for record in avro_schema:
181
- if isinstance(record, dict) and 'dependencies' in record:
182
- found = True
183
- record_deps = copy.deepcopy(record.get('dependencies', []))
184
- inline_dependencies_of(avro_schema, record)
185
- # fix the dependencies of all records that have this record as a dependency
186
- for remaining_schema in avro_schema:
187
- if isinstance(remaining_schema, dict) and 'dependencies' in remaining_schema and any(dep in record_deps for dep in remaining_schema['dependencies']):
188
- remaining_schema['dependencies'] = [dep for dep in remaining_schema['dependencies'] if not dep in record_deps]
189
- qname = record['namespace']+'.'+record['name']
190
- if not qname in remaining_schema['dependencies']:
191
- remaining_schema['dependencies'].append(qname)
192
- break
193
-
194
- if not found:
195
- print('WARNING: There are circular dependencies in the schema, unable to resolve them: {}'.format([x['name'] for x in avro_schema if isinstance(x, dict) and 'dependencies' in x]))
196
-
197
- adjust_resolved_dependencies(sorted_messages)
198
- return sorted_messages
199
-
200
- def swap_record_dependencies(avro_schema, record, record_stack: List[str], recursion_depth: int = 0):
201
- record_stack.append(record.get('namespace', '')+'.'+record['name'])
202
- if 'dependencies' in record:
203
- prior_dependencies = copy.deepcopy(record['dependencies'])
204
- while 'dependencies' in record and len(record['dependencies']) > 0:
205
- if 'fields' in record:
206
- for field in record['fields']:
207
- if isinstance(field['type'], list):
208
- for item in field['type'].copy():
209
- sub_field = {
210
- 'type': item,
211
- 'name': field['name']
212
- }
213
- resolve_field_dependencies(avro_schema, record, sub_field, record_stack, recursion_depth + 1)
214
- if sub_field['type'] != item:
215
- idx = field['type'].index(item)
216
- field['type'].remove(item)
217
- field['type'].insert(idx, sub_field['type'])
218
- else:
219
- resolve_field_dependencies(avro_schema, record, field, record_stack, recursion_depth + 1)
220
- if 'dependencies' in record and len(record['dependencies']) > 0:
221
- # compare the prior dependencies to the current dependencies one-by-one. If they are the same,
222
- # then we have a circular dependency.
223
- if prior_dependencies == record['dependencies']:
224
- print('WARNING: Unable to resolve circular dependency in {}::{} with dependencies: {}'.format(record.get('namespace',''), record['name'], record['dependencies']))
225
- break
226
- prior_dependencies = record['dependencies']
227
- if 'dependencies' in record:
228
- del record['dependencies']
229
- record_stack.pop()
230
-
231
- def resolve_field_dependencies(avro_schema, record, field, record_stack, recursion_depth: int = 0):
232
- for dependency in record.get('dependencies', []):
233
- dependency_type = next((x for x in avro_schema if x['name'] == dependency or x.get('namespace','')+'.'+x['name'] == dependency), None)
234
- if not dependency_type and dependency in record['dependencies']:
235
- record['dependencies'].remove(dependency)
236
- continue
237
- deps = record.get('dependencies', [])
238
- if dependency_type:
239
- if record['name'] != dependency and (record.get('namespace','')+'.'+record['name']) != dependency:
240
- swap_dependency_type(avro_schema, field, dependency, dependency_type, deps, record_stack, recursion_depth + 1)
241
- record['dependencies'] = [dep for dep in deps if dep != record['name'] and record.get('namespace','')+'.'+record['name'] != dep]
242
- if len(record['dependencies']) == 0:
243
- del record['dependencies']
244
-
245
-
246
- def swap_dependency_type(avro_schema, field, dependency, dependency_type, dependencies, record_stack: List[str], recursion_depth: int = 0):
247
- """ to break circular dependencies, we will inline the dependent record and remove the dependency """
248
- if not dependency in dependencies:
249
- return
250
- if not dependency_type in avro_schema:
251
- return
252
- if record_stack and dependency in record_stack:
253
- dependencies.remove(dependency)
254
- return
255
-
256
- # Replace the dependency type with the dependency_type in avro_schema.
257
- if isinstance(field['type'],str) and field['type'] == dependency:
258
- if dependency_type in avro_schema:
259
- field['type'] = dependency_type
260
- avro_schema.remove(dependency_type)
261
- dependencies.remove(dependency)
262
- dependencies.extend(dependency_type.get('dependencies', []))
263
- if 'dependencies' in dependency_type:
264
- swap_record_dependencies(avro_schema, dependency_type, record_stack, recursion_depth + 1)
265
-
266
- # type is a Union?
267
- elif isinstance(field['type'], list):
268
- for field_type in field['type']:
269
- if field_type == dependency:
270
- if dependency_type in avro_schema:
271
- index = field['type'].index(field_type)
272
- field['type'].remove(field_type)
273
- field['type'].insert(index, dependency_type)
274
- avro_schema.remove(dependency_type)
275
- if dependency in dependencies:
276
- dependencies.remove(dependency)
277
- dependencies.extend(dependency_type.get('dependencies', []))
278
- if 'dependencies' in dependency_type:
279
- swap_record_dependencies(avro_schema, dependency_type, record_stack, recursion_depth + 1)
280
- for field_type in field['type']:
281
- if isinstance(field_type, dict):
282
- swap_dependency_type(avro_schema, field_type, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
283
- elif isinstance(field['type'], dict) and 'type' in field['type']:
284
- swap_dependency_type(avro_schema, field['type'], dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
285
- elif field['type'] == 'array':
286
- if not 'items' in field:
287
- return
288
- if isinstance(field['items'], list):
289
- for item in field['items']:
290
- if item == dependency:
291
- if dependency_type in avro_schema:
292
- index = field['items'].index(item)
293
- field['items'].remove(item)
294
- field['items'].insert(index, dependency_type)
295
- avro_schema.remove(dependency_type)
296
- if dependency in dependencies:
297
- dependencies.remove(dependency)
298
- dependencies.extend(dependency_type.get('dependencies', []))
299
- if 'dependencies' in dependency_type:
300
- swap_record_dependencies(avro_schema, dependency_type, record_stack)
301
- for item in field['items']:
302
- if isinstance(item, dict):
303
- swap_dependency_type(avro_schema, item, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
304
- elif field['items'] == dependency:
305
- if dependency_type in avro_schema:
306
- field['items'] = dependency_type
307
- avro_schema.remove(dependency_type)
308
- if dependency in dependencies:
309
- dependencies.remove(dependency)
310
- dependencies.extend(dependency_type.get('dependencies', []))
311
- if 'dependencies' in dependency_type:
312
- swap_record_dependencies(avro_schema, dependency_type, record_stack)
313
- elif isinstance(field['items'], dict) and 'type' in field['items']:
314
- swap_dependency_type(avro_schema, field['items'], dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
315
- elif field['type'] == 'map':
316
- if isinstance(field['values'], list):
317
- for item in field['values']:
318
- if item == dependency:
319
- if dependency_type in avro_schema:
320
- index = field['values'].index(item)
321
- field['values'].remove(item)
322
- field['values'].insert(index, dependency_type)
323
- avro_schema.remove(dependency_type)
324
- if dependency in dependencies:
325
- dependencies.remove(dependency)
326
- dependencies.extend(dependency_type.get('dependencies', []))
327
- if 'dependencies' in dependency_type:
328
- swap_record_dependencies(avro_schema, dependency_type, record_stack)
329
- for item in field['values']:
330
- if isinstance(item, dict):
331
- swap_dependency_type(avro_schema, item, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
332
- if field['values'] == dependency:
333
- if dependency_type in avro_schema:
334
- field['values'] = dependency_type
335
- avro_schema.remove(dependency_type)
336
- if dependency in dependencies:
337
- dependencies.remove(dependency)
338
- dependencies.extend(dependency_type.get('dependencies', []))
339
- if 'dependencies' in dependency_type:
340
- swap_record_dependencies(avro_schema, dependency_type, record_stack)
341
- elif 'type' in field['values']:
342
- swap_dependency_type(avro_schema, field['values'], dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
343
- elif field['type'] == 'record':
344
- record_stack.append(field.get('namespace', '')+'.'+field['name'])
345
- for dep_field in field['fields']:
346
- if isinstance(dep_field, dict):
347
- swap_dependency_type(avro_schema, dep_field, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
348
- record_stack.pop()
1
+ # sort the dependencies
2
+
3
+ import copy
4
+ from typing import List
5
+
6
+
7
+
8
+ def adjust_resolved_dependencies(avro_schema: List[dict] | dict):
9
+ """
10
+ After resolving dependencies, it may still be necessary to adjust them. The
11
+ first pass of the algorithms below does inline all dependent types, but
12
+ the resulting document may still have fields defined before the types they
13
+ depend on because of the order in which the resolution happened, which necessarily
14
+ re-sorts the graph. This function will recursively adjust the resolved
15
+ dependencies until all record types have their dependency types defined before them.
16
+ """
17
+
18
+ class TreeWalker:
19
+
20
+ def __init__(self):
21
+ self.found_something = True
22
+
23
+ def swap_record_dependencies_above(self, current_node, record, avro_schema) -> str | None:
24
+ """ swap the first reference to of the record type above the record in avro_schema """
25
+ if isinstance(current_node, dict):
26
+ if 'name' in current_node and 'namespace' in current_node and 'type' in current_node and \
27
+ current_node['name'] == record['name'] and current_node.get('namespace','') == record.get('namespace','') and current_node['type'] == record['type']:
28
+ # we reached the record again. we stop here.
29
+ return None
30
+ for k, v in current_node.items():
31
+ if k in ['dependencies', 'unmerged_types']:
32
+ continue
33
+ if isinstance(v, (dict,list)):
34
+ return self.swap_record_dependencies_above(v, record, avro_schema)
35
+ elif isinstance(v, str):
36
+ if k not in ['type', 'values', 'items']:
37
+ continue
38
+ qname = record.get('namespace','')+'.'+record['name']
39
+ if v == qname:
40
+ self.found_something = True
41
+ current_node[k] = copy.deepcopy(record)
42
+ return qname
43
+ elif isinstance(current_node, list):
44
+ for item in current_node:
45
+ if isinstance(item, (dict,list)):
46
+ return self.swap_record_dependencies_above(item, record, avro_schema)
47
+ elif isinstance(item, str):
48
+ qname = record.get('namespace','')+'.'+record['name']
49
+ if item == qname:
50
+ self.found_something = True
51
+ idx = current_node.index(item)
52
+ current_node.remove(item)
53
+ current_node.insert(idx, copy.deepcopy(record))
54
+ return qname
55
+ return None
56
+
57
+ def walk_schema(self, current_node, avro_schema, record_list) -> str | None:
58
+ found_record = None
59
+ if isinstance(current_node, dict):
60
+ if 'type' in current_node and (current_node['type'] == 'record' or current_node['type'] == 'enum'):
61
+ current_qname = current_node.get('namespace','')+'.'+current_node.get('name','')
62
+ if current_qname in record_list:
63
+ self.found_something = True
64
+ return current_qname
65
+ record_list.append(current_qname)
66
+ found_record = self.swap_record_dependencies_above(avro_schema, current_node, avro_schema)
67
+ for k, v in current_node.items():
68
+ if isinstance(v, (dict,list)):
69
+ qname = self.walk_schema(v, avro_schema, record_list)
70
+ if qname:
71
+ self.found_something = True
72
+ current_node[k] = qname
73
+ elif isinstance(current_node, list):
74
+ for item in current_node:
75
+ qname = self.walk_schema(item, avro_schema, record_list)
76
+ if qname:
77
+ self.found_something = True
78
+ idx = current_node.index(item)
79
+ current_node.remove(item)
80
+ current_node.insert(idx, qname)
81
+ # dedupe the list
82
+ new_list = []
83
+ for item in current_node:
84
+ if not item in new_list:
85
+ new_list.append(item)
86
+ current_node.clear()
87
+ current_node.extend(new_list)
88
+ return found_record
89
+
90
+ # while we've got work to do
91
+ tree_walker = TreeWalker()
92
+ while True:
93
+ tree_walker.found_something = False
94
+ tree_walker.walk_schema(avro_schema, avro_schema, [])
95
+ if not tree_walker.found_something:
96
+ break
97
+
98
+
99
+
100
+ def inline_dependencies_of(avro_schema, record):
101
+ """ to break circular dependencies, we will inline all dependent record """
102
+ for dependency in copy.deepcopy(record.get('dependencies', [])):
103
+ dependency_type = next((x for x in avro_schema if x['name'] == dependency or x.get('namespace','')+'.'+x['name'] == dependency), None)
104
+ if not dependency_type:
105
+ continue
106
+ deps = record.get('dependencies', [])
107
+ for field in record['fields']:
108
+ swap_dependency_type(avro_schema, field, dependency, dependency_type, deps, [record['namespace']+'.'+record['name']])
109
+ if 'dependencies' in record:
110
+ del record['dependencies']
111
+
112
+ adjust_resolved_dependencies(record)
113
+
114
+
115
+
116
+ def sort_messages_by_dependencies(avro_schema):
117
+ """
118
+ Sort the messages in avro_schema by their dependencies. Avro Schema requires
119
+ that type definitions must be defined before they are used. This method
120
+ ensures this. Types that have dependencies will be moved at the end of the list.
121
+ If necessary, it will also resolve circular dependencies by inlining the
122
+ dependent record.
123
+
124
+ The method expects all types with dependencies to have a 'dependencies' key in their
125
+ dict that contains a list of types that they depend on.
126
+
127
+ Args:
128
+ avro_schema: List of Avro schema records.
129
+ """
130
+
131
+ # if all are just strings, then it is already sorted
132
+ if all(isinstance(record, str) for record in avro_schema):
133
+ return avro_schema
134
+
135
+ sorted_messages = []
136
+ record_stack = []
137
+ while avro_schema:
138
+ found = False
139
+ for record in avro_schema:
140
+ if not isinstance(record, dict):
141
+ sorted_messages.append(record)
142
+ avro_schema.remove(record)
143
+ continue
144
+
145
+ # if this record is not a dependency of any other record, it can be safely emitted now
146
+ #if not any(record.get('namespace','')+'.'+record.get('name') in other_record.get('dependencies', []) for other_record in [x for x in avro_schema if isinstance(x, dict) and 'name' in x]):
147
+ remaining_deps = [dep for dep in record['dependencies'] if not dep in [x.get('namespace','')+'.'+x.get('name','') for x in sorted_messages]] if 'dependencies' in record else []
148
+ if len(remaining_deps) == 0:
149
+ if 'dependencies' in record:
150
+ del record['dependencies']
151
+ sorted_messages.append(record)
152
+ avro_schema.remove(record)
153
+ found = True
154
+
155
+ # If there are no records without dependencies, we will grab the first
156
+ # record with dependencies and start resolving circular dependencies
157
+ if len(avro_schema) > 0 and not found:
158
+ found = False
159
+ for record in avro_schema:
160
+ if isinstance(record, dict) and 'dependencies' in record:
161
+ remaining_deps = [dep for dep in record['dependencies'] if not dep in [x.get('namespace','')+'.'+x.get('name','') for x in sorted_messages]]
162
+ if len(remaining_deps) > 0:
163
+ swap_record_dependencies(avro_schema, record, [record.get('namespace','')+'.'+record['name']], 0)
164
+ if 'dependencies' in record and len(record['dependencies']) == 0:
165
+ del record['dependencies']
166
+ if isinstance(record, dict) and not 'dependencies' in record:
167
+ found = True
168
+ sorted_messages.append(record)
169
+ if record in avro_schema:
170
+ avro_schema.remove(record)
171
+ break
172
+ else:
173
+ remaining_remaining_deps = [dep for dep in record['dependencies'] if not dep in [x.get('namespace')+'.'+x.get('name') for x in sorted_messages]]
174
+ found = len(remaining_deps) != len(remaining_remaining_deps)
175
+ if found:
176
+ break
177
+
178
+ if not found:
179
+ found = False
180
+ for record in avro_schema:
181
+ if isinstance(record, dict) and 'dependencies' in record:
182
+ found = True
183
+ record_deps = copy.deepcopy(record.get('dependencies', []))
184
+ inline_dependencies_of(avro_schema, record)
185
+ # fix the dependencies of all records that have this record as a dependency
186
+ for remaining_schema in avro_schema:
187
+ if isinstance(remaining_schema, dict) and 'dependencies' in remaining_schema and any(dep in record_deps for dep in remaining_schema['dependencies']):
188
+ remaining_schema['dependencies'] = [dep for dep in remaining_schema['dependencies'] if not dep in record_deps]
189
+ qname = record['namespace']+'.'+record['name']
190
+ if not qname in remaining_schema['dependencies']:
191
+ remaining_schema['dependencies'].append(qname)
192
+ break
193
+
194
+ if not found:
195
+ print('WARNING: There are circular dependencies in the schema, unable to resolve them: {}'.format([x['name'] for x in avro_schema if isinstance(x, dict) and 'dependencies' in x]))
196
+
197
+ adjust_resolved_dependencies(sorted_messages)
198
+ return sorted_messages
199
+
200
+ def swap_record_dependencies(avro_schema, record, record_stack: List[str], recursion_depth: int = 0):
201
+ record_stack.append(record.get('namespace', '')+'.'+record['name'])
202
+ if 'dependencies' in record:
203
+ prior_dependencies = copy.deepcopy(record['dependencies'])
204
+ while 'dependencies' in record and len(record['dependencies']) > 0:
205
+ if 'fields' in record:
206
+ for field in record['fields']:
207
+ if isinstance(field['type'], list):
208
+ for item in field['type'].copy():
209
+ sub_field = {
210
+ 'type': item,
211
+ 'name': field['name']
212
+ }
213
+ resolve_field_dependencies(avro_schema, record, sub_field, record_stack, recursion_depth + 1)
214
+ if sub_field['type'] != item:
215
+ idx = field['type'].index(item)
216
+ field['type'].remove(item)
217
+ field['type'].insert(idx, sub_field['type'])
218
+ else:
219
+ resolve_field_dependencies(avro_schema, record, field, record_stack, recursion_depth + 1)
220
+ if 'dependencies' in record and len(record['dependencies']) > 0:
221
+ # compare the prior dependencies to the current dependencies one-by-one. If they are the same,
222
+ # then we have a circular dependency.
223
+ if prior_dependencies == record['dependencies']:
224
+ print('WARNING: Unable to resolve circular dependency in {}::{} with dependencies: {}'.format(record.get('namespace',''), record['name'], record['dependencies']))
225
+ break
226
+ prior_dependencies = record['dependencies']
227
+ if 'dependencies' in record:
228
+ del record['dependencies']
229
+ record_stack.pop()
230
+
231
+ def resolve_field_dependencies(avro_schema, record, field, record_stack, recursion_depth: int = 0):
232
+ for dependency in record.get('dependencies', []):
233
+ dependency_type = next((x for x in avro_schema if x['name'] == dependency or x.get('namespace','')+'.'+x['name'] == dependency), None)
234
+ if not dependency_type and dependency in record['dependencies']:
235
+ record['dependencies'].remove(dependency)
236
+ continue
237
+ deps = record.get('dependencies', [])
238
+ if dependency_type:
239
+ if record['name'] != dependency and (record.get('namespace','')+'.'+record['name']) != dependency:
240
+ swap_dependency_type(avro_schema, field, dependency, dependency_type, deps, record_stack, recursion_depth + 1)
241
+ record['dependencies'] = [dep for dep in deps if dep != record['name'] and record.get('namespace','')+'.'+record['name'] != dep]
242
+ if len(record['dependencies']) == 0:
243
+ del record['dependencies']
244
+
245
+
246
+ def swap_dependency_type(avro_schema, field, dependency, dependency_type, dependencies, record_stack: List[str], recursion_depth: int = 0):
247
+ """ to break circular dependencies, we will inline the dependent record and remove the dependency """
248
+ if not dependency in dependencies:
249
+ return
250
+ if not dependency_type in avro_schema:
251
+ return
252
+ if record_stack and dependency in record_stack:
253
+ dependencies.remove(dependency)
254
+ return
255
+
256
+ # Replace the dependency type with the dependency_type in avro_schema.
257
+ if isinstance(field['type'],str) and field['type'] == dependency:
258
+ if dependency_type in avro_schema:
259
+ field['type'] = dependency_type
260
+ avro_schema.remove(dependency_type)
261
+ dependencies.remove(dependency)
262
+ dependencies.extend(dependency_type.get('dependencies', []))
263
+ if 'dependencies' in dependency_type:
264
+ swap_record_dependencies(avro_schema, dependency_type, record_stack, recursion_depth + 1)
265
+
266
+ # type is a Union?
267
+ elif isinstance(field['type'], list):
268
+ for field_type in field['type']:
269
+ if field_type == dependency:
270
+ if dependency_type in avro_schema:
271
+ index = field['type'].index(field_type)
272
+ field['type'].remove(field_type)
273
+ field['type'].insert(index, dependency_type)
274
+ avro_schema.remove(dependency_type)
275
+ if dependency in dependencies:
276
+ dependencies.remove(dependency)
277
+ dependencies.extend(dependency_type.get('dependencies', []))
278
+ if 'dependencies' in dependency_type:
279
+ swap_record_dependencies(avro_schema, dependency_type, record_stack, recursion_depth + 1)
280
+ for field_type in field['type']:
281
+ if isinstance(field_type, dict):
282
+ swap_dependency_type(avro_schema, field_type, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
283
+ elif isinstance(field['type'], dict) and 'type' in field['type']:
284
+ swap_dependency_type(avro_schema, field['type'], dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
285
+ elif field['type'] == 'array':
286
+ if not 'items' in field:
287
+ return
288
+ if isinstance(field['items'], list):
289
+ for item in field['items']:
290
+ if item == dependency:
291
+ if dependency_type in avro_schema:
292
+ index = field['items'].index(item)
293
+ field['items'].remove(item)
294
+ field['items'].insert(index, dependency_type)
295
+ avro_schema.remove(dependency_type)
296
+ if dependency in dependencies:
297
+ dependencies.remove(dependency)
298
+ dependencies.extend(dependency_type.get('dependencies', []))
299
+ if 'dependencies' in dependency_type:
300
+ swap_record_dependencies(avro_schema, dependency_type, record_stack)
301
+ for item in field['items']:
302
+ if isinstance(item, dict):
303
+ swap_dependency_type(avro_schema, item, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
304
+ elif field['items'] == dependency:
305
+ if dependency_type in avro_schema:
306
+ field['items'] = dependency_type
307
+ avro_schema.remove(dependency_type)
308
+ if dependency in dependencies:
309
+ dependencies.remove(dependency)
310
+ dependencies.extend(dependency_type.get('dependencies', []))
311
+ if 'dependencies' in dependency_type:
312
+ swap_record_dependencies(avro_schema, dependency_type, record_stack)
313
+ elif isinstance(field['items'], dict) and 'type' in field['items']:
314
+ swap_dependency_type(avro_schema, field['items'], dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
315
+ elif field['type'] == 'map':
316
+ if isinstance(field['values'], list):
317
+ for item in field['values']:
318
+ if item == dependency:
319
+ if dependency_type in avro_schema:
320
+ index = field['values'].index(item)
321
+ field['values'].remove(item)
322
+ field['values'].insert(index, dependency_type)
323
+ avro_schema.remove(dependency_type)
324
+ if dependency in dependencies:
325
+ dependencies.remove(dependency)
326
+ dependencies.extend(dependency_type.get('dependencies', []))
327
+ if 'dependencies' in dependency_type:
328
+ swap_record_dependencies(avro_schema, dependency_type, record_stack)
329
+ for item in field['values']:
330
+ if isinstance(item, dict):
331
+ swap_dependency_type(avro_schema, item, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
332
+ if field['values'] == dependency:
333
+ if dependency_type in avro_schema:
334
+ field['values'] = dependency_type
335
+ avro_schema.remove(dependency_type)
336
+ if dependency in dependencies:
337
+ dependencies.remove(dependency)
338
+ dependencies.extend(dependency_type.get('dependencies', []))
339
+ if 'dependencies' in dependency_type:
340
+ swap_record_dependencies(avro_schema, dependency_type, record_stack)
341
+ elif 'type' in field['values']:
342
+ swap_dependency_type(avro_schema, field['values'], dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
343
+ elif field['type'] == 'record':
344
+ record_stack.append(field.get('namespace', '')+'.'+field['name'])
345
+ for dep_field in field['fields']:
346
+ if isinstance(dep_field, dict):
347
+ swap_dependency_type(avro_schema, dep_field, dependency, dependency_type, dependencies, record_stack, recursion_depth + 1)
348
+ record_stack.pop()