structurize 2.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avrotize/__init__.py +64 -0
- avrotize/__main__.py +6 -0
- avrotize/_version.py +34 -0
- avrotize/asn1toavro.py +160 -0
- avrotize/avrotize.py +152 -0
- avrotize/avrotocpp.py +483 -0
- avrotize/avrotocsharp.py +1075 -0
- avrotize/avrotocsv.py +121 -0
- avrotize/avrotodatapackage.py +173 -0
- avrotize/avrotodb.py +1383 -0
- avrotize/avrotogo.py +476 -0
- avrotize/avrotographql.py +197 -0
- avrotize/avrotoiceberg.py +210 -0
- avrotize/avrotojava.py +2156 -0
- avrotize/avrotojs.py +250 -0
- avrotize/avrotojsons.py +481 -0
- avrotize/avrotojstruct.py +345 -0
- avrotize/avrotokusto.py +364 -0
- avrotize/avrotomd.py +137 -0
- avrotize/avrotools.py +168 -0
- avrotize/avrotoparquet.py +208 -0
- avrotize/avrotoproto.py +359 -0
- avrotize/avrotopython.py +624 -0
- avrotize/avrotorust.py +435 -0
- avrotize/avrotots.py +598 -0
- avrotize/avrotoxsd.py +344 -0
- avrotize/cddltostructure.py +1841 -0
- avrotize/commands.json +3337 -0
- avrotize/common.py +834 -0
- avrotize/constants.py +72 -0
- avrotize/csvtoavro.py +132 -0
- avrotize/datapackagetoavro.py +76 -0
- avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
- avrotize/dependencies/typescript/node22/package.json +16 -0
- avrotize/dependency_resolver.py +348 -0
- avrotize/dependency_version.py +432 -0
- avrotize/jsonstoavro.py +2167 -0
- avrotize/jsonstostructure.py +2642 -0
- avrotize/jstructtoavro.py +878 -0
- avrotize/kstructtoavro.py +93 -0
- avrotize/kustotoavro.py +455 -0
- avrotize/parquettoavro.py +157 -0
- avrotize/proto2parser.py +498 -0
- avrotize/proto3parser.py +403 -0
- avrotize/prototoavro.py +382 -0
- avrotize/structuretocddl.py +597 -0
- avrotize/structuretocpp.py +697 -0
- avrotize/structuretocsharp.py +2295 -0
- avrotize/structuretocsv.py +365 -0
- avrotize/structuretodatapackage.py +659 -0
- avrotize/structuretodb.py +1125 -0
- avrotize/structuretogo.py +720 -0
- avrotize/structuretographql.py +502 -0
- avrotize/structuretoiceberg.py +355 -0
- avrotize/structuretojava.py +853 -0
- avrotize/structuretojsons.py +498 -0
- avrotize/structuretokusto.py +639 -0
- avrotize/structuretomd.py +322 -0
- avrotize/structuretoproto.py +764 -0
- avrotize/structuretopython.py +772 -0
- avrotize/structuretorust.py +714 -0
- avrotize/structuretots.py +653 -0
- avrotize/structuretoxsd.py +679 -0
- avrotize/xsdtoavro.py +413 -0
- structurize-2.19.0.dist-info/METADATA +107 -0
- structurize-2.19.0.dist-info/RECORD +70 -0
- structurize-2.19.0.dist-info/WHEEL +5 -0
- structurize-2.19.0.dist-info/entry_points.txt +2 -0
- structurize-2.19.0.dist-info/licenses/LICENSE +201 -0
- structurize-2.19.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import Any, Dict, List, Union
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class AvroToJsonStructure:
|
|
7
|
+
"""
|
|
8
|
+
Convert (one or more) Avro schemas into a single JSON-Structure document.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, avro_encoding: bool = False) -> None:
|
|
12
|
+
self.known_types: set[str] = set()
|
|
13
|
+
self.reference_stack: set[str] = set()
|
|
14
|
+
self.avro_encoding: bool = avro_encoding
|
|
15
|
+
|
|
16
|
+
# ------------------------------------------------------------------ TOP-LEVEL
|
|
17
|
+
|
|
18
|
+
def convert(
|
|
19
|
+
self,
|
|
20
|
+
avro_schema: Union[Dict[str, Any], List[Any]],
|
|
21
|
+
namespace: str | None = None,
|
|
22
|
+
) -> Dict[str, Any]:
|
|
23
|
+
"""
|
|
24
|
+
Entry-point: return a full JSON-Structure document for `avro_schema`.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
# ------------- LIST (multiple root schemas) --------------------
|
|
28
|
+
if isinstance(avro_schema, list):
|
|
29
|
+
# Empty list – return a stub document
|
|
30
|
+
if not avro_schema:
|
|
31
|
+
anon_name = f"empty_list_{uuid.uuid4().hex[:8]}"
|
|
32
|
+
return {
|
|
33
|
+
"$schema": "https://json-structure.org/meta/core/v0/#",
|
|
34
|
+
"$id": f"https://example.com/schemas/{anon_name}",
|
|
35
|
+
"name": anon_name,
|
|
36
|
+
"definitions": {},
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# TEMPORARY: process only first element
|
|
40
|
+
first = avro_schema[0]
|
|
41
|
+
if isinstance(first, dict):
|
|
42
|
+
return self.convert(first, namespace)
|
|
43
|
+
|
|
44
|
+
# First element non-dict → return stub
|
|
45
|
+
bad_name = f"invalid_list_root_{uuid.uuid4().hex[:8]}"
|
|
46
|
+
return {
|
|
47
|
+
"$schema": "https://json-structure.org/meta/core/v0/#",
|
|
48
|
+
"$id": f"https://example.com/schemas/{bad_name}",
|
|
49
|
+
"name": bad_name,
|
|
50
|
+
"definitions": {},
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# ------------- SINGLE SCHEMA -----------------------------------
|
|
54
|
+
# Reset caches for each top-level conversion
|
|
55
|
+
self.known_types.clear()
|
|
56
|
+
self.reference_stack.clear()
|
|
57
|
+
|
|
58
|
+
current_namespace = avro_schema.get("namespace", namespace)
|
|
59
|
+
name = self.clean_name(
|
|
60
|
+
avro_schema.get("name", f"AnonymousType_{uuid.uuid4().hex}")
|
|
61
|
+
)
|
|
62
|
+
fqn = self.get_fqn(current_namespace, name)
|
|
63
|
+
|
|
64
|
+
doc: Dict[str, Any] = {
|
|
65
|
+
"$schema": "https://json-structure.org/meta/core/v0/#",
|
|
66
|
+
"$id": f"https://example.com/schemas/{fqn}",
|
|
67
|
+
"name": name,
|
|
68
|
+
"$root": f"#/definitions/{fqn}",
|
|
69
|
+
"definitions": {},
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
# Build definitions – do NOT skip root
|
|
73
|
+
self.register_definition(avro_schema, current_namespace, doc["definitions"])
|
|
74
|
+
return doc
|
|
75
|
+
|
|
76
|
+
# ------------------------------------------------------------------ REGISTRATION
|
|
77
|
+
|
|
78
|
+
def register_definition(
|
|
79
|
+
self,
|
|
80
|
+
avro_schema: Dict[str, Any],
|
|
81
|
+
namespace: str | None,
|
|
82
|
+
definitions: Dict[str, Any],
|
|
83
|
+
is_root: bool = False, # retained only for signature compatibility
|
|
84
|
+
) -> None:
|
|
85
|
+
"""
|
|
86
|
+
Ensure `avro_schema` has an entry in `definitions`.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
current_namespace = avro_schema.get("namespace", namespace)
|
|
90
|
+
name = self.clean_name(
|
|
91
|
+
avro_schema.get("name", f"AnonymousType_{uuid.uuid4().hex}")
|
|
92
|
+
)
|
|
93
|
+
fqn = self.get_fqn(current_namespace, name)
|
|
94
|
+
|
|
95
|
+
if fqn in self.known_types: # already built / in progress
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
self.known_types.add(fqn)
|
|
99
|
+
|
|
100
|
+
created = self.build_type_definition(avro_schema, current_namespace, definitions)
|
|
101
|
+
|
|
102
|
+
# Remove marker if nothing was actually created
|
|
103
|
+
if fqn not in definitions and created is None:
|
|
104
|
+
self.known_types.discard(fqn)
|
|
105
|
+
|
|
106
|
+
# ------------------------------------------------------------------ BUILD TYPE
|
|
107
|
+
|
|
108
|
+
def build_type_definition(self, avro_schema, namespace, definitions):
|
|
109
|
+
if not isinstance(avro_schema, dict): # Should be a complex type dict
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
avro_type = avro_schema.get("type")
|
|
113
|
+
# Use the schema's own namespace if provided, otherwise fall back to the passed 'namespace'
|
|
114
|
+
current_schema_namespace = avro_schema.get("namespace", namespace)
|
|
115
|
+
name = self.clean_name(avro_schema.get("name", f"AnonymousType_{uuid.uuid4().hex}"))
|
|
116
|
+
fqn = self.get_fqn(current_schema_namespace, name)
|
|
117
|
+
|
|
118
|
+
if fqn in self.reference_stack:
|
|
119
|
+
# Circular reference during the build of this specific definition.
|
|
120
|
+
# Depending on JSON Structure spec, could return a $ref or handle as error.
|
|
121
|
+
# For now, allowing it to proceed might lead to incomplete recursive definitions
|
|
122
|
+
# if not handled carefully by $ref logic in resolve_avro_type.
|
|
123
|
+
# However, known_types in register_definition should catch completed cycles.
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
self.reference_stack.add(fqn)
|
|
127
|
+
|
|
128
|
+
# This variable will hold the actual definition content (the value part of the key-value pair)
|
|
129
|
+
type_definition_content = None
|
|
130
|
+
|
|
131
|
+
if avro_type == "record":
|
|
132
|
+
props = {"name": name, "type": "object", "properties": {}, "required": []}
|
|
133
|
+
if "doc" in avro_schema:
|
|
134
|
+
props["description"] = avro_schema["doc"]
|
|
135
|
+
|
|
136
|
+
# Namespace for resolving field types within this record
|
|
137
|
+
record_fields_namespace = avro_schema.get("namespace", namespace)
|
|
138
|
+
|
|
139
|
+
for field in avro_schema.get("fields", []):
|
|
140
|
+
field_name = field["name"]
|
|
141
|
+
field_type_schema = field["type"]
|
|
142
|
+
|
|
143
|
+
resolved_field_type = self.resolve_avro_type(field_type_schema, record_fields_namespace, definitions)
|
|
144
|
+
|
|
145
|
+
if "default" in field:
|
|
146
|
+
resolved_field_type["default"] = self.encode_default_value(field["default"], resolved_field_type.get("type", "unknown"))
|
|
147
|
+
|
|
148
|
+
if not self.is_nullable_union(field_type_schema):
|
|
149
|
+
props["required"].append(field_name)
|
|
150
|
+
|
|
151
|
+
if "doc" in field:
|
|
152
|
+
resolved_field_type["description"] = field["doc"]
|
|
153
|
+
|
|
154
|
+
props["properties"][field_name] = resolved_field_type
|
|
155
|
+
type_definition_content = props
|
|
156
|
+
|
|
157
|
+
elif avro_type == "enum":
|
|
158
|
+
props = {"name": name, "type": "string", "enum": avro_schema["symbols"]}
|
|
159
|
+
if "doc" in avro_schema:
|
|
160
|
+
props["description"] = avro_schema["doc"]
|
|
161
|
+
if "default" in avro_schema: # Avro enum default
|
|
162
|
+
props["default"] = avro_schema["default"]
|
|
163
|
+
type_definition_content = props
|
|
164
|
+
|
|
165
|
+
elif avro_type == "fixed":
|
|
166
|
+
props = {"name": name, "type": "binary", "byteLength": avro_schema["size"]} # Consider "maxLength" or custom prop
|
|
167
|
+
if "doc" in avro_schema:
|
|
168
|
+
props["description"] = avro_schema["doc"]
|
|
169
|
+
type_definition_content = props
|
|
170
|
+
|
|
171
|
+
elif isinstance(avro_type, str) and avro_schema.get("logicalType"):
|
|
172
|
+
# This is a named type that is also a logical type, e.g. a named decimal
|
|
173
|
+
props = self.resolve_logical_type(avro_schema["logicalType"], avro_schema)
|
|
174
|
+
# Ensure name and description from the schema are part of the definition
|
|
175
|
+
if "name" not in props: props["name"] = name
|
|
176
|
+
if "doc" in avro_schema and "description" not in props : props["description"] = avro_schema["doc"]
|
|
177
|
+
type_definition_content = props
|
|
178
|
+
|
|
179
|
+
elif isinstance(avro_type, (list, dict)) and not avro_schema.get("name"):
|
|
180
|
+
# An anonymous complex type (array, map, union) is the schema itself.
|
|
181
|
+
# It needs a generated name (which 'name' variable already holds).
|
|
182
|
+
props = self.resolve_avro_type(avro_schema, current_schema_namespace, definitions)
|
|
183
|
+
if "name" not in props: props["name"] = name # Ensure generated name is part of definition
|
|
184
|
+
type_definition_content = props
|
|
185
|
+
|
|
186
|
+
# else:
|
|
187
|
+
# If avro_type is a primitive string (e.g. "string", "int") or a named type reference string,
|
|
188
|
+
# it doesn't form a new entry in "definitions" by itself.
|
|
189
|
+
# resolve_avro_type handles these cases by returning the primitive type object or a $ref.
|
|
190
|
+
# So, type_definition_content remains None, and nothing is added to definitions here.
|
|
191
|
+
|
|
192
|
+
# If a definition was constructed, add it to the definitions map with proper nesting.
|
|
193
|
+
if type_definition_content is not None:
|
|
194
|
+
parts = fqn.split('/')
|
|
195
|
+
current_level_dict = definitions
|
|
196
|
+
for i, part_name in enumerate(parts):
|
|
197
|
+
if i == len(parts) - 1: # Last part is the type name itself
|
|
198
|
+
current_level_dict[part_name] = type_definition_content
|
|
199
|
+
else: # This is a namespace part
|
|
200
|
+
current_level_dict = current_level_dict.setdefault(part_name, {})
|
|
201
|
+
|
|
202
|
+
self.reference_stack.remove(fqn)
|
|
203
|
+
return type_definition_content # Return the definition object (or None)
|
|
204
|
+
|
|
205
|
+
# ------------------------------------------------------------------ RESOLVE TYPE
|
|
206
|
+
|
|
207
|
+
def resolve_avro_type(
|
|
208
|
+
self,
|
|
209
|
+
avro_type_schema: Any,
|
|
210
|
+
context_namespace: str | None,
|
|
211
|
+
definitions: Dict[str, Any],
|
|
212
|
+
) -> Dict[str, Any]:
|
|
213
|
+
"""
|
|
214
|
+
Convert any Avro type expression into a JSON-Structure node (or $ref).
|
|
215
|
+
"""
|
|
216
|
+
|
|
217
|
+
# ------------------ STRING (primitive or reference) --------------
|
|
218
|
+
if isinstance(avro_type_schema, str):
|
|
219
|
+
if avro_type_schema in self.get_primitive_types():
|
|
220
|
+
return {"type": self.get_primitive_types()[avro_type_schema]}
|
|
221
|
+
# Named type reference
|
|
222
|
+
if "." in avro_type_schema:
|
|
223
|
+
ref_fqn = avro_type_schema.replace(".", "/")
|
|
224
|
+
else:
|
|
225
|
+
ref_fqn = self.get_fqn(context_namespace, self.clean_name(avro_type_schema))
|
|
226
|
+
return {"$ref": f"#/definitions/{ref_fqn}"}
|
|
227
|
+
|
|
228
|
+
# ------------------ UNION ----------------------------------------
|
|
229
|
+
if isinstance(avro_type_schema, list):
|
|
230
|
+
if not self.avro_encoding and "null" in avro_type_schema:
|
|
231
|
+
non_null = [t for t in avro_type_schema if t != "null"]
|
|
232
|
+
if len(non_null) == 1:
|
|
233
|
+
# Optional short-form
|
|
234
|
+
return self.resolve_avro_type(non_null[0], context_namespace, definitions)
|
|
235
|
+
|
|
236
|
+
choices: Dict[str, Any] = {}
|
|
237
|
+
for member in avro_type_schema:
|
|
238
|
+
if isinstance(member, str):
|
|
239
|
+
key = self.clean_name(member)
|
|
240
|
+
elif isinstance(member, dict) and member.get("name"):
|
|
241
|
+
key = self.clean_name(member["name"])
|
|
242
|
+
else:
|
|
243
|
+
key = f"anonymous_{uuid.uuid4().hex[:8]}"
|
|
244
|
+
choices[key] = self.resolve_avro_type(member, context_namespace, definitions)
|
|
245
|
+
|
|
246
|
+
return {"type": "choice", "choices": choices}
|
|
247
|
+
|
|
248
|
+
# ------------------ DICT (complex inline) ------------------------
|
|
249
|
+
if isinstance(avro_type_schema, dict):
|
|
250
|
+
category = avro_type_schema.get("type")
|
|
251
|
+
inline_ns = avro_type_schema.get("namespace", context_namespace)
|
|
252
|
+
|
|
253
|
+
if category in ("record", "enum", "fixed"):
|
|
254
|
+
# Ensure definition exists then reference it
|
|
255
|
+
self.register_definition(avro_type_schema, inline_ns, definitions)
|
|
256
|
+
ref_name = self.clean_name(avro_type_schema["name"])
|
|
257
|
+
ref_fqn = self.get_fqn(inline_ns, ref_name)
|
|
258
|
+
return {"$ref": f"#/definitions/{ref_fqn}"}
|
|
259
|
+
|
|
260
|
+
if category == "array":
|
|
261
|
+
return {
|
|
262
|
+
"type": "array",
|
|
263
|
+
"items": self.resolve_avro_type(
|
|
264
|
+
avro_type_schema["items"], inline_ns, definitions
|
|
265
|
+
),
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
if category == "map":
|
|
269
|
+
return {
|
|
270
|
+
"type": "map",
|
|
271
|
+
"values": self.resolve_avro_type(
|
|
272
|
+
avro_type_schema["values"], inline_ns, definitions
|
|
273
|
+
),
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
logical_type = avro_type_schema.get("logicalType")
|
|
277
|
+
if logical_type:
|
|
278
|
+
return self.resolve_logical_type(logical_type, avro_type_schema)
|
|
279
|
+
|
|
280
|
+
raise ValueError(f"Unsupported Avro type schema: {avro_type_schema}")
|
|
281
|
+
|
|
282
|
+
# ------------------------------------------------------------------ HELPERS
|
|
283
|
+
|
|
284
|
+
def is_nullable_union(self, avro_field_type_schema: Any) -> bool:
|
|
285
|
+
return isinstance(avro_field_type_schema, list) and "null" in avro_field_type_schema
|
|
286
|
+
|
|
287
|
+
def encode_default_value(self, value: Any, json_structure_type: str) -> Any:
|
|
288
|
+
# Minimal – pass through. Extend for binary/base64 etc. if needed.
|
|
289
|
+
return value
|
|
290
|
+
|
|
291
|
+
def resolve_logical_type(self, logical_type: str, schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
292
|
+
"""
|
|
293
|
+
Very small logical-type mapping demo. Extend as required.
|
|
294
|
+
"""
|
|
295
|
+
mapping = {
|
|
296
|
+
"timestamp-micros": {"type": "int64", "logicalType": "timestampMicros"},
|
|
297
|
+
"timestamp-millis": {"type": "int64", "logicalType": "timestampMillis"},
|
|
298
|
+
"date": {"type": "int32", "logicalType": "date"},
|
|
299
|
+
"uuid": {"type": "string", "format": "uuid"},
|
|
300
|
+
}
|
|
301
|
+
return mapping.get(logical_type, {"type": "string"})
|
|
302
|
+
|
|
303
|
+
def clean_name(self, name: str) -> str:
|
|
304
|
+
return name.replace(".", "_")
|
|
305
|
+
|
|
306
|
+
def get_fqn(self, namespace: str | None, name: str) -> str:
|
|
307
|
+
if namespace:
|
|
308
|
+
return f"{namespace.replace('.', '/')}/{name}"
|
|
309
|
+
return name
|
|
310
|
+
|
|
311
|
+
@staticmethod
|
|
312
|
+
def get_primitive_types() -> Dict[str, str]:
|
|
313
|
+
return {
|
|
314
|
+
"string": "string",
|
|
315
|
+
"boolean": "boolean",
|
|
316
|
+
"int": "int32",
|
|
317
|
+
"long": "int64",
|
|
318
|
+
"float": "float",
|
|
319
|
+
"double": "double",
|
|
320
|
+
"bytes": "binary",
|
|
321
|
+
"null": "null",
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
# ---------------------------------------------------------------------- CLI HELPER
|
|
326
|
+
|
|
327
|
+
def convert_avro_to_json_structure(
|
|
328
|
+
avro_schema_file: str,
|
|
329
|
+
json_structure_file: str,
|
|
330
|
+
naming_mode: str = "default",
|
|
331
|
+
avro_encoding: bool = False,
|
|
332
|
+
) -> None:
|
|
333
|
+
"""
|
|
334
|
+
Convenience wrapper: read Avro schema from file and write JSON-Structure out.
|
|
335
|
+
"""
|
|
336
|
+
|
|
337
|
+
converter = AvroToJsonStructure(avro_encoding=avro_encoding)
|
|
338
|
+
|
|
339
|
+
with open(avro_schema_file, "r", encoding="utf-8") as f:
|
|
340
|
+
avro_schema = json.load(f)
|
|
341
|
+
|
|
342
|
+
json_structure = converter.convert(avro_schema)
|
|
343
|
+
|
|
344
|
+
with open(json_structure_file, "w", encoding="utf-8") as f:
|
|
345
|
+
json.dump(json_structure, f, indent=4)
|