structurize 2.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. avrotize/__init__.py +64 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp.py +483 -0
  7. avrotize/avrotocsharp.py +1075 -0
  8. avrotize/avrotocsv.py +121 -0
  9. avrotize/avrotodatapackage.py +173 -0
  10. avrotize/avrotodb.py +1383 -0
  11. avrotize/avrotogo.py +476 -0
  12. avrotize/avrotographql.py +197 -0
  13. avrotize/avrotoiceberg.py +210 -0
  14. avrotize/avrotojava.py +2156 -0
  15. avrotize/avrotojs.py +250 -0
  16. avrotize/avrotojsons.py +481 -0
  17. avrotize/avrotojstruct.py +345 -0
  18. avrotize/avrotokusto.py +364 -0
  19. avrotize/avrotomd.py +137 -0
  20. avrotize/avrotools.py +168 -0
  21. avrotize/avrotoparquet.py +208 -0
  22. avrotize/avrotoproto.py +359 -0
  23. avrotize/avrotopython.py +624 -0
  24. avrotize/avrotorust.py +435 -0
  25. avrotize/avrotots.py +598 -0
  26. avrotize/avrotoxsd.py +344 -0
  27. avrotize/cddltostructure.py +1841 -0
  28. avrotize/commands.json +3337 -0
  29. avrotize/common.py +834 -0
  30. avrotize/constants.py +72 -0
  31. avrotize/csvtoavro.py +132 -0
  32. avrotize/datapackagetoavro.py +76 -0
  33. avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
  34. avrotize/dependencies/typescript/node22/package.json +16 -0
  35. avrotize/dependency_resolver.py +348 -0
  36. avrotize/dependency_version.py +432 -0
  37. avrotize/jsonstoavro.py +2167 -0
  38. avrotize/jsonstostructure.py +2642 -0
  39. avrotize/jstructtoavro.py +878 -0
  40. avrotize/kstructtoavro.py +93 -0
  41. avrotize/kustotoavro.py +455 -0
  42. avrotize/parquettoavro.py +157 -0
  43. avrotize/proto2parser.py +498 -0
  44. avrotize/proto3parser.py +403 -0
  45. avrotize/prototoavro.py +382 -0
  46. avrotize/structuretocddl.py +597 -0
  47. avrotize/structuretocpp.py +697 -0
  48. avrotize/structuretocsharp.py +2295 -0
  49. avrotize/structuretocsv.py +365 -0
  50. avrotize/structuretodatapackage.py +659 -0
  51. avrotize/structuretodb.py +1125 -0
  52. avrotize/structuretogo.py +720 -0
  53. avrotize/structuretographql.py +502 -0
  54. avrotize/structuretoiceberg.py +355 -0
  55. avrotize/structuretojava.py +853 -0
  56. avrotize/structuretojsons.py +498 -0
  57. avrotize/structuretokusto.py +639 -0
  58. avrotize/structuretomd.py +322 -0
  59. avrotize/structuretoproto.py +764 -0
  60. avrotize/structuretopython.py +772 -0
  61. avrotize/structuretorust.py +714 -0
  62. avrotize/structuretots.py +653 -0
  63. avrotize/structuretoxsd.py +679 -0
  64. avrotize/xsdtoavro.py +413 -0
  65. structurize-2.19.0.dist-info/METADATA +107 -0
  66. structurize-2.19.0.dist-info/RECORD +70 -0
  67. structurize-2.19.0.dist-info/WHEEL +5 -0
  68. structurize-2.19.0.dist-info/entry_points.txt +2 -0
  69. structurize-2.19.0.dist-info/licenses/LICENSE +201 -0
  70. structurize-2.19.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,345 @@
1
+ import json
2
+ import uuid
3
+ from typing import Any, Dict, List, Union
4
+
5
+
6
+ class AvroToJsonStructure:
7
+ """
8
+ Convert (one or more) Avro schemas into a single JSON-Structure document.
9
+ """
10
+
11
+ def __init__(self, avro_encoding: bool = False) -> None:
12
+ self.known_types: set[str] = set()
13
+ self.reference_stack: set[str] = set()
14
+ self.avro_encoding: bool = avro_encoding
15
+
16
+ # ------------------------------------------------------------------ TOP-LEVEL
17
+
18
+ def convert(
19
+ self,
20
+ avro_schema: Union[Dict[str, Any], List[Any]],
21
+ namespace: str | None = None,
22
+ ) -> Dict[str, Any]:
23
+ """
24
+ Entry-point: return a full JSON-Structure document for `avro_schema`.
25
+ """
26
+
27
+ # ------------- LIST (multiple root schemas) --------------------
28
+ if isinstance(avro_schema, list):
29
+ # Empty list – return a stub document
30
+ if not avro_schema:
31
+ anon_name = f"empty_list_{uuid.uuid4().hex[:8]}"
32
+ return {
33
+ "$schema": "https://json-structure.org/meta/core/v0/#",
34
+ "$id": f"https://example.com/schemas/{anon_name}",
35
+ "name": anon_name,
36
+ "definitions": {},
37
+ }
38
+
39
+ # TEMPORARY: process only first element
40
+ first = avro_schema[0]
41
+ if isinstance(first, dict):
42
+ return self.convert(first, namespace)
43
+
44
+ # First element non-dict → return stub
45
+ bad_name = f"invalid_list_root_{uuid.uuid4().hex[:8]}"
46
+ return {
47
+ "$schema": "https://json-structure.org/meta/core/v0/#",
48
+ "$id": f"https://example.com/schemas/{bad_name}",
49
+ "name": bad_name,
50
+ "definitions": {},
51
+ }
52
+
53
+ # ------------- SINGLE SCHEMA -----------------------------------
54
+ # Reset caches for each top-level conversion
55
+ self.known_types.clear()
56
+ self.reference_stack.clear()
57
+
58
+ current_namespace = avro_schema.get("namespace", namespace)
59
+ name = self.clean_name(
60
+ avro_schema.get("name", f"AnonymousType_{uuid.uuid4().hex}")
61
+ )
62
+ fqn = self.get_fqn(current_namespace, name)
63
+
64
+ doc: Dict[str, Any] = {
65
+ "$schema": "https://json-structure.org/meta/core/v0/#",
66
+ "$id": f"https://example.com/schemas/{fqn}",
67
+ "name": name,
68
+ "$root": f"#/definitions/{fqn}",
69
+ "definitions": {},
70
+ }
71
+
72
+ # Build definitions – do NOT skip root
73
+ self.register_definition(avro_schema, current_namespace, doc["definitions"])
74
+ return doc
75
+
76
+ # ------------------------------------------------------------------ REGISTRATION
77
+
78
+ def register_definition(
79
+ self,
80
+ avro_schema: Dict[str, Any],
81
+ namespace: str | None,
82
+ definitions: Dict[str, Any],
83
+ is_root: bool = False, # retained only for signature compatibility
84
+ ) -> None:
85
+ """
86
+ Ensure `avro_schema` has an entry in `definitions`.
87
+ """
88
+
89
+ current_namespace = avro_schema.get("namespace", namespace)
90
+ name = self.clean_name(
91
+ avro_schema.get("name", f"AnonymousType_{uuid.uuid4().hex}")
92
+ )
93
+ fqn = self.get_fqn(current_namespace, name)
94
+
95
+ if fqn in self.known_types: # already built / in progress
96
+ return
97
+
98
+ self.known_types.add(fqn)
99
+
100
+ created = self.build_type_definition(avro_schema, current_namespace, definitions)
101
+
102
+ # Remove marker if nothing was actually created
103
+ if fqn not in definitions and created is None:
104
+ self.known_types.discard(fqn)
105
+
106
+ # ------------------------------------------------------------------ BUILD TYPE
107
+
108
+ def build_type_definition(self, avro_schema, namespace, definitions):
109
+ if not isinstance(avro_schema, dict): # Should be a complex type dict
110
+ return None
111
+
112
+ avro_type = avro_schema.get("type")
113
+ # Use the schema's own namespace if provided, otherwise fall back to the passed 'namespace'
114
+ current_schema_namespace = avro_schema.get("namespace", namespace)
115
+ name = self.clean_name(avro_schema.get("name", f"AnonymousType_{uuid.uuid4().hex}"))
116
+ fqn = self.get_fqn(current_schema_namespace, name)
117
+
118
+ if fqn in self.reference_stack:
119
+ # Circular reference during the build of this specific definition.
120
+ # Depending on JSON Structure spec, could return a $ref or handle as error.
121
+ # For now, allowing it to proceed might lead to incomplete recursive definitions
122
+ # if not handled carefully by $ref logic in resolve_avro_type.
123
+ # However, known_types in register_definition should catch completed cycles.
124
+ pass
125
+
126
+ self.reference_stack.add(fqn)
127
+
128
+ # This variable will hold the actual definition content (the value part of the key-value pair)
129
+ type_definition_content = None
130
+
131
+ if avro_type == "record":
132
+ props = {"name": name, "type": "object", "properties": {}, "required": []}
133
+ if "doc" in avro_schema:
134
+ props["description"] = avro_schema["doc"]
135
+
136
+ # Namespace for resolving field types within this record
137
+ record_fields_namespace = avro_schema.get("namespace", namespace)
138
+
139
+ for field in avro_schema.get("fields", []):
140
+ field_name = field["name"]
141
+ field_type_schema = field["type"]
142
+
143
+ resolved_field_type = self.resolve_avro_type(field_type_schema, record_fields_namespace, definitions)
144
+
145
+ if "default" in field:
146
+ resolved_field_type["default"] = self.encode_default_value(field["default"], resolved_field_type.get("type", "unknown"))
147
+
148
+ if not self.is_nullable_union(field_type_schema):
149
+ props["required"].append(field_name)
150
+
151
+ if "doc" in field:
152
+ resolved_field_type["description"] = field["doc"]
153
+
154
+ props["properties"][field_name] = resolved_field_type
155
+ type_definition_content = props
156
+
157
+ elif avro_type == "enum":
158
+ props = {"name": name, "type": "string", "enum": avro_schema["symbols"]}
159
+ if "doc" in avro_schema:
160
+ props["description"] = avro_schema["doc"]
161
+ if "default" in avro_schema: # Avro enum default
162
+ props["default"] = avro_schema["default"]
163
+ type_definition_content = props
164
+
165
+ elif avro_type == "fixed":
166
+ props = {"name": name, "type": "binary", "byteLength": avro_schema["size"]} # Consider "maxLength" or custom prop
167
+ if "doc" in avro_schema:
168
+ props["description"] = avro_schema["doc"]
169
+ type_definition_content = props
170
+
171
+ elif isinstance(avro_type, str) and avro_schema.get("logicalType"):
172
+ # This is a named type that is also a logical type, e.g. a named decimal
173
+ props = self.resolve_logical_type(avro_schema["logicalType"], avro_schema)
174
+ # Ensure name and description from the schema are part of the definition
175
+ if "name" not in props: props["name"] = name
176
+ if "doc" in avro_schema and "description" not in props : props["description"] = avro_schema["doc"]
177
+ type_definition_content = props
178
+
179
+ elif isinstance(avro_type, (list, dict)) and not avro_schema.get("name"):
180
+ # An anonymous complex type (array, map, union) is the schema itself.
181
+ # It needs a generated name (which 'name' variable already holds).
182
+ props = self.resolve_avro_type(avro_schema, current_schema_namespace, definitions)
183
+ if "name" not in props: props["name"] = name # Ensure generated name is part of definition
184
+ type_definition_content = props
185
+
186
+ # else:
187
+ # If avro_type is a primitive string (e.g. "string", "int") or a named type reference string,
188
+ # it doesn't form a new entry in "definitions" by itself.
189
+ # resolve_avro_type handles these cases by returning the primitive type object or a $ref.
190
+ # So, type_definition_content remains None, and nothing is added to definitions here.
191
+
192
+ # If a definition was constructed, add it to the definitions map with proper nesting.
193
+ if type_definition_content is not None:
194
+ parts = fqn.split('/')
195
+ current_level_dict = definitions
196
+ for i, part_name in enumerate(parts):
197
+ if i == len(parts) - 1: # Last part is the type name itself
198
+ current_level_dict[part_name] = type_definition_content
199
+ else: # This is a namespace part
200
+ current_level_dict = current_level_dict.setdefault(part_name, {})
201
+
202
+ self.reference_stack.remove(fqn)
203
+ return type_definition_content # Return the definition object (or None)
204
+
205
+ # ------------------------------------------------------------------ RESOLVE TYPE
206
+
207
+ def resolve_avro_type(
208
+ self,
209
+ avro_type_schema: Any,
210
+ context_namespace: str | None,
211
+ definitions: Dict[str, Any],
212
+ ) -> Dict[str, Any]:
213
+ """
214
+ Convert any Avro type expression into a JSON-Structure node (or $ref).
215
+ """
216
+
217
+ # ------------------ STRING (primitive or reference) --------------
218
+ if isinstance(avro_type_schema, str):
219
+ if avro_type_schema in self.get_primitive_types():
220
+ return {"type": self.get_primitive_types()[avro_type_schema]}
221
+ # Named type reference
222
+ if "." in avro_type_schema:
223
+ ref_fqn = avro_type_schema.replace(".", "/")
224
+ else:
225
+ ref_fqn = self.get_fqn(context_namespace, self.clean_name(avro_type_schema))
226
+ return {"$ref": f"#/definitions/{ref_fqn}"}
227
+
228
+ # ------------------ UNION ----------------------------------------
229
+ if isinstance(avro_type_schema, list):
230
+ if not self.avro_encoding and "null" in avro_type_schema:
231
+ non_null = [t for t in avro_type_schema if t != "null"]
232
+ if len(non_null) == 1:
233
+ # Optional short-form
234
+ return self.resolve_avro_type(non_null[0], context_namespace, definitions)
235
+
236
+ choices: Dict[str, Any] = {}
237
+ for member in avro_type_schema:
238
+ if isinstance(member, str):
239
+ key = self.clean_name(member)
240
+ elif isinstance(member, dict) and member.get("name"):
241
+ key = self.clean_name(member["name"])
242
+ else:
243
+ key = f"anonymous_{uuid.uuid4().hex[:8]}"
244
+ choices[key] = self.resolve_avro_type(member, context_namespace, definitions)
245
+
246
+ return {"type": "choice", "choices": choices}
247
+
248
+ # ------------------ DICT (complex inline) ------------------------
249
+ if isinstance(avro_type_schema, dict):
250
+ category = avro_type_schema.get("type")
251
+ inline_ns = avro_type_schema.get("namespace", context_namespace)
252
+
253
+ if category in ("record", "enum", "fixed"):
254
+ # Ensure definition exists then reference it
255
+ self.register_definition(avro_type_schema, inline_ns, definitions)
256
+ ref_name = self.clean_name(avro_type_schema["name"])
257
+ ref_fqn = self.get_fqn(inline_ns, ref_name)
258
+ return {"$ref": f"#/definitions/{ref_fqn}"}
259
+
260
+ if category == "array":
261
+ return {
262
+ "type": "array",
263
+ "items": self.resolve_avro_type(
264
+ avro_type_schema["items"], inline_ns, definitions
265
+ ),
266
+ }
267
+
268
+ if category == "map":
269
+ return {
270
+ "type": "map",
271
+ "values": self.resolve_avro_type(
272
+ avro_type_schema["values"], inline_ns, definitions
273
+ ),
274
+ }
275
+
276
+ logical_type = avro_type_schema.get("logicalType")
277
+ if logical_type:
278
+ return self.resolve_logical_type(logical_type, avro_type_schema)
279
+
280
+ raise ValueError(f"Unsupported Avro type schema: {avro_type_schema}")
281
+
282
+ # ------------------------------------------------------------------ HELPERS
283
+
284
+ def is_nullable_union(self, avro_field_type_schema: Any) -> bool:
285
+ return isinstance(avro_field_type_schema, list) and "null" in avro_field_type_schema
286
+
287
+ def encode_default_value(self, value: Any, json_structure_type: str) -> Any:
288
+ # Minimal – pass through. Extend for binary/base64 etc. if needed.
289
+ return value
290
+
291
+ def resolve_logical_type(self, logical_type: str, schema: Dict[str, Any]) -> Dict[str, Any]:
292
+ """
293
+ Very small logical-type mapping demo. Extend as required.
294
+ """
295
+ mapping = {
296
+ "timestamp-micros": {"type": "int64", "logicalType": "timestampMicros"},
297
+ "timestamp-millis": {"type": "int64", "logicalType": "timestampMillis"},
298
+ "date": {"type": "int32", "logicalType": "date"},
299
+ "uuid": {"type": "string", "format": "uuid"},
300
+ }
301
+ return mapping.get(logical_type, {"type": "string"})
302
+
303
+ def clean_name(self, name: str) -> str:
304
+ return name.replace(".", "_")
305
+
306
+ def get_fqn(self, namespace: str | None, name: str) -> str:
307
+ if namespace:
308
+ return f"{namespace.replace('.', '/')}/{name}"
309
+ return name
310
+
311
+ @staticmethod
312
+ def get_primitive_types() -> Dict[str, str]:
313
+ return {
314
+ "string": "string",
315
+ "boolean": "boolean",
316
+ "int": "int32",
317
+ "long": "int64",
318
+ "float": "float",
319
+ "double": "double",
320
+ "bytes": "binary",
321
+ "null": "null",
322
+ }
323
+
324
+
325
+ # ---------------------------------------------------------------------- CLI HELPER
326
+
327
+ def convert_avro_to_json_structure(
328
+ avro_schema_file: str,
329
+ json_structure_file: str,
330
+ naming_mode: str = "default",
331
+ avro_encoding: bool = False,
332
+ ) -> None:
333
+ """
334
+ Convenience wrapper: read Avro schema from file and write JSON-Structure out.
335
+ """
336
+
337
+ converter = AvroToJsonStructure(avro_encoding=avro_encoding)
338
+
339
+ with open(avro_schema_file, "r", encoding="utf-8") as f:
340
+ avro_schema = json.load(f)
341
+
342
+ json_structure = converter.convert(avro_schema)
343
+
344
+ with open(json_structure_file, "w", encoding="utf-8") as f:
345
+ json.dump(json_structure, f, indent=4)