structurize 3.5.8__tar.gz → 3.5.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {structurize-3.5.8/structurize.egg-info → structurize-3.5.9}/PKG-INFO +1 -1
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/_version.py +3 -3
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotocpp.py +6 -3
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotogo.py +4 -1
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotojava.py +8 -4
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotojs.py +4 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotojsons.py +5 -2
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotojstruct.py +4 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotoproto.py +6 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotopython.py +4 -1
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotorust.py +17 -4
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotots.py +4 -1
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotoxsd.py +3 -1
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/common.py +318 -29
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/jsonstoavro.py +3 -1
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/jstructtoavro.py +17 -10
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretocpp.py +43 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretogo.py +3 -1
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretojava.py +2 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretorust.py +2 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretots.py +2 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/xsdtoavro.py +2 -1
- {structurize-3.5.8 → structurize-3.5.9/structurize.egg-info}/PKG-INFO +1 -1
- {structurize-3.5.8 → structurize-3.5.9}/.gitignore +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/LICENSE +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/MANIFEST.in +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/README.md +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/__init__.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/__main__.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/asn1toavro.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotize.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotocsharp.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotocsv.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotodatapackage.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotodb.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotographql.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotoiceberg.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotokusto.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotomd.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotools.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrotoparquet.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrototsml.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/avrovalidator.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/cddltostructure.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/choice_inference.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/commands.json +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/constants.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/csvtoavro.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/datapackagetoavro.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/dependencies/cpp/vcpkg/vcpkg.json +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/dependencies/typescript/node22/package.json +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/dependency_resolver.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/dependency_version.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/jsonstostructure.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/jsontoschema.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/kstructtoavro.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/kustotoavro.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/kustotojstruct.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/mcp_server.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/openapitostructure.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/parquettoavro.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/proto2parser.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/proto3parser.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/prototoavro.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/schema_inference.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/sqltoavro.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretocddl.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretocsharp.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretocsv.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretodatapackage.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretodb.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretographql.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretoiceberg.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretojs.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretojsons.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretokusto.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretomd.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretoproto.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretopython.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretotsml.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/structuretoxsd.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/tmslvalidate.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/validate.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/avrotize/xmltoschema.py +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/build.ps1 +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/build.sh +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/pyproject.toml +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/setup.cfg +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/structurize.egg-info/SOURCES.txt +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/structurize.egg-info/dependency_links.txt +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/structurize.egg-info/entry_points.txt +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/structurize.egg-info/requires.txt +0 -0
- {structurize-3.5.8 → structurize-3.5.9}/structurize.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: structurize
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.9
|
|
4
4
|
Summary: Tools to convert from and to JSON Structure from various other schema languages.
|
|
5
5
|
Author-email: Clemens Vasters <clemensv@microsoft.com>
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '3.5.
|
|
22
|
-
__version_tuple__ = version_tuple = (3, 5,
|
|
21
|
+
__version__ = version = '3.5.9'
|
|
22
|
+
__version_tuple__ = version_tuple = (3, 5, 9)
|
|
23
23
|
|
|
24
|
-
__commit_id__ = commit_id = '
|
|
24
|
+
__commit_id__ = commit_id = 'gb097b347e'
|
|
@@ -5,7 +5,7 @@ import json
|
|
|
5
5
|
import os
|
|
6
6
|
from typing import Dict, List, Union
|
|
7
7
|
|
|
8
|
-
from avrotize.common import is_generic_avro_type, pascal, process_template
|
|
8
|
+
from avrotize.common import is_generic_avro_type, is_any_value_type, pascal, process_template
|
|
9
9
|
|
|
10
10
|
INDENT = ' '
|
|
11
11
|
|
|
@@ -45,6 +45,9 @@ class AvroToCpp:
|
|
|
45
45
|
|
|
46
46
|
def map_primitive_to_cpp(self, avro_type: str, is_optional: bool) -> str:
|
|
47
47
|
"""Maps Avro primitive types to C++ types"""
|
|
48
|
+
# Handle AnyValue (extensible any type) regardless of namespace qualification
|
|
49
|
+
if is_any_value_type(avro_type):
|
|
50
|
+
return 'std::optional<nlohmann::json>' if is_optional else 'nlohmann::json'
|
|
48
51
|
optional_mapping = {
|
|
49
52
|
'null': 'std::optional<std::monostate>',
|
|
50
53
|
'boolean': 'std::optional<bool>',
|
|
@@ -53,7 +56,7 @@ class AvroToCpp:
|
|
|
53
56
|
'float': 'std::optional<float>',
|
|
54
57
|
'double': 'std::optional<double>',
|
|
55
58
|
'bytes': 'std::optional<std::vector<uint8_t>>',
|
|
56
|
-
'string': 'std::optional<std::string>'
|
|
59
|
+
'string': 'std::optional<std::string>',
|
|
57
60
|
}
|
|
58
61
|
required_mapping = {
|
|
59
62
|
'null': 'std::monostate',
|
|
@@ -63,7 +66,7 @@ class AvroToCpp:
|
|
|
63
66
|
'float': 'float',
|
|
64
67
|
'double': 'double',
|
|
65
68
|
'bytes': 'std::vector<uint8_t>',
|
|
66
|
-
'string': 'std::string'
|
|
69
|
+
'string': 'std::string',
|
|
67
70
|
}
|
|
68
71
|
if '.' in avro_type:
|
|
69
72
|
type_name = avro_type.split('.')[-1]
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
from typing import Dict, List, Union, Set
|
|
4
|
-
from avrotize.common import get_longest_namespace_prefix, is_generic_avro_type, pascal, render_template
|
|
4
|
+
from avrotize.common import get_longest_namespace_prefix, is_generic_avro_type, is_any_value_type, pascal, render_template
|
|
5
5
|
|
|
6
6
|
INDENT = ' '
|
|
7
7
|
|
|
@@ -55,6 +55,9 @@ class AvroToGo:
|
|
|
55
55
|
|
|
56
56
|
def map_primitive_to_go(self, avro_type: str, is_optional: bool) -> str:
|
|
57
57
|
"""Maps Avro primitive types to Go types"""
|
|
58
|
+
# Handle AnyValue (extensible any type) regardless of namespace qualification
|
|
59
|
+
if is_any_value_type(avro_type):
|
|
60
|
+
return 'interface{}'
|
|
58
61
|
optional_mapping = {
|
|
59
62
|
'null': 'interface{}',
|
|
60
63
|
'boolean': '*bool',
|
|
@@ -7,7 +7,7 @@ from typing import Dict, List, Tuple, Union
|
|
|
7
7
|
from avrotize.constants import (AVRO_VERSION, JACKSON_ANNOTATIONS_VERSION, JACKSON_VERSION,
|
|
8
8
|
JDK_VERSION, JUNIT_VERSION, MAVEN_COMPILER_VERSION, MAVEN_SUREFIRE_VERSION)
|
|
9
9
|
|
|
10
|
-
from avrotize.common import pascal, camel, is_generic_avro_type, inline_avro_references, build_flat_type_dict
|
|
10
|
+
from avrotize.common import pascal, camel, is_generic_avro_type, is_any_value_type, inline_avro_references, build_flat_type_dict
|
|
11
11
|
|
|
12
12
|
INDENT = ' '
|
|
13
13
|
POM_CONTENT = """<?xml version="1.0" encoding="UTF-8"?>
|
|
@@ -296,6 +296,9 @@ class AvroToJava:
|
|
|
296
296
|
|
|
297
297
|
def map_primitive_to_java(self, avro_type: str, is_optional: bool) -> JavaType:
|
|
298
298
|
"""Maps Avro primitive types to Java types"""
|
|
299
|
+
# Handle AnyValue (extensible any type) regardless of namespace qualification
|
|
300
|
+
if is_any_value_type(avro_type):
|
|
301
|
+
return AvroToJava.JavaType('Object')
|
|
299
302
|
optional_mapping = {
|
|
300
303
|
'null': 'Void',
|
|
301
304
|
'boolean': 'Boolean',
|
|
@@ -1314,9 +1317,10 @@ class AvroToJava:
|
|
|
1314
1317
|
|
|
1315
1318
|
union_variable_name = self.safe_identifier(union_variable_name, class_name)
|
|
1316
1319
|
|
|
1317
|
-
# Constructor for each type
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
+
# Constructor for each type (skip Object to avoid duplicate with the generic Object constructor)
|
|
1321
|
+
if union_type.type_name != 'Object':
|
|
1322
|
+
class_definition_ctors += \
|
|
1323
|
+
f"{INDENT*1}public {union_class_name}({union_type.type_name} {union_variable_name}) {{\n{INDENT*2}this._{camel(union_variable_name)} = {union_variable_name};\n{INDENT*1}}}\n"
|
|
1320
1324
|
|
|
1321
1325
|
# Declarations
|
|
1322
1326
|
class_definition_decls += \
|
|
@@ -5,6 +5,7 @@ import os
|
|
|
5
5
|
from typing import Any, Dict, List, Set, Union
|
|
6
6
|
|
|
7
7
|
from avrotize.common import pascal
|
|
8
|
+
from avrotize.common import is_any_value_type
|
|
8
9
|
|
|
9
10
|
INDENT = ' ' * 4
|
|
10
11
|
|
|
@@ -34,6 +35,9 @@ class AvroToJavaScript:
|
|
|
34
35
|
|
|
35
36
|
def map_primitive_to_javascript(self, avro_type: str) -> str:
|
|
36
37
|
""" Map Avro primitive type to TypeScript type """
|
|
38
|
+
# Handle AnyValue (extensible any type) regardless of namespace qualification
|
|
39
|
+
if is_any_value_type(avro_type):
|
|
40
|
+
return 'any'
|
|
37
41
|
mapping = {
|
|
38
42
|
'null': 'null',
|
|
39
43
|
'boolean': 'boolean',
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import json
|
|
3
3
|
from typing import Dict, Any, Union, List
|
|
4
|
-
from avrotize.common import build_tree_hash_list, group_by_hash, is_generic_json_type, NodeHashReference
|
|
4
|
+
from avrotize.common import build_tree_hash_list, group_by_hash, is_any_value_type, is_generic_json_type, NodeHashReference
|
|
5
5
|
from functools import reduce
|
|
6
6
|
import jsonpath_ng
|
|
7
7
|
|
|
@@ -141,8 +141,11 @@ class AvroToJsonSchemaConverter:
|
|
|
141
141
|
'double': {'type': 'number', 'format': 'double'},
|
|
142
142
|
'bytes': {'type': 'string', 'contentEncoding': 'base64'},
|
|
143
143
|
'string': {'type': 'string'},
|
|
144
|
-
'fixed': {'type': 'string'} # Could specify length in a format or a separate attribute
|
|
144
|
+
'fixed': {'type': 'string'}, # Could specify length in a format or a separate attribute
|
|
145
145
|
}
|
|
146
|
+
# Handle AnyValue (extensible any type) regardless of namespace qualification
|
|
147
|
+
if isinstance(avro_type, str) and is_any_value_type(avro_type):
|
|
148
|
+
return {}
|
|
146
149
|
type_ref = mapping.get(avro_type, '') # Defaulting to string type for any unknown types
|
|
147
150
|
if not type_ref:
|
|
148
151
|
raise ValueError(f"Avro schema contains unexpected type {avro_type}")
|
|
@@ -2,6 +2,8 @@ import json
|
|
|
2
2
|
import uuid
|
|
3
3
|
from typing import Any, Dict, List, Union
|
|
4
4
|
|
|
5
|
+
from avrotize.common import is_any_value_type
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
class AvroToJsonStructure:
|
|
7
9
|
"""
|
|
@@ -220,6 +222,8 @@ class AvroToJsonStructure:
|
|
|
220
222
|
|
|
221
223
|
# ------------------ STRING (primitive or reference) --------------
|
|
222
224
|
if isinstance(avro_type_schema, str):
|
|
225
|
+
if is_any_value_type(avro_type_schema):
|
|
226
|
+
return {"type": "any"}
|
|
223
227
|
if avro_type_schema in self.get_primitive_types():
|
|
224
228
|
return {"type": self.get_primitive_types()[avro_type_schema]}
|
|
225
229
|
# Named type reference
|
|
@@ -4,6 +4,8 @@ import argparse
|
|
|
4
4
|
import os
|
|
5
5
|
from typing import Literal, NamedTuple, Dict, Any, List
|
|
6
6
|
|
|
7
|
+
from avrotize.common import is_any_value_type
|
|
8
|
+
|
|
7
9
|
indent = ' '
|
|
8
10
|
|
|
9
11
|
Comment = NamedTuple('Comment', [('content', str), ('tags', Dict[str, Any])])
|
|
@@ -39,6 +41,10 @@ class AvroToProto:
|
|
|
39
41
|
'bytes': 'bytes',
|
|
40
42
|
'string': 'string',
|
|
41
43
|
}
|
|
44
|
+
# Handle AnyValue (extensible any type) regardless of namespace qualification
|
|
45
|
+
if isinstance(avro_type, str) and is_any_value_type(avro_type):
|
|
46
|
+
dependencies.append('google/protobuf/any.proto')
|
|
47
|
+
return 'google.protobuf.Any'
|
|
42
48
|
# logical types require special handling
|
|
43
49
|
if isinstance(avro_type, dict) and 'logicalType' in avro_type:
|
|
44
50
|
logical_type = avro_type['logicalType']
|
|
@@ -8,7 +8,7 @@ import os
|
|
|
8
8
|
import re
|
|
9
9
|
import random
|
|
10
10
|
from typing import Dict, List, Set, Tuple, Union, Any
|
|
11
|
-
from avrotize.common import fullname, get_typing_args_from_string, is_generic_avro_type, pascal, process_template, build_flat_type_dict, inline_avro_references, is_type_with_alternate, strip_alternate_type
|
|
11
|
+
from avrotize.common import fullname, get_typing_args_from_string, is_generic_avro_type, is_any_value_type, pascal, process_template, build_flat_type_dict, inline_avro_references, is_type_with_alternate, strip_alternate_type
|
|
12
12
|
|
|
13
13
|
INDENT = ' '
|
|
14
14
|
|
|
@@ -145,6 +145,9 @@ class AvroToPython:
|
|
|
145
145
|
}
|
|
146
146
|
if is_generic_avro_type(avro_type):
|
|
147
147
|
return True, 'typing.Any'
|
|
148
|
+
# Handle AnyValue (extensible any type) regardless of namespace qualification
|
|
149
|
+
if isinstance(avro_type, str) and is_any_value_type(avro_type):
|
|
150
|
+
return True, 'typing.Any'
|
|
148
151
|
mapped = mapping.get(avro_type, None)
|
|
149
152
|
if mapped:
|
|
150
153
|
return True, mapped
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
from typing import Dict, List, Union
|
|
4
|
-
from avrotize.common import is_generic_avro_type, render_template, pascal, camel, snake
|
|
4
|
+
from avrotize.common import is_generic_avro_type, is_any_value_type, render_template, pascal, camel, snake
|
|
5
5
|
|
|
6
6
|
INDENT = ' '
|
|
7
7
|
|
|
@@ -44,6 +44,9 @@ class AvroToRust:
|
|
|
44
44
|
|
|
45
45
|
def map_primitive_to_rust(self, avro_fullname: str, is_optional: bool) -> str:
|
|
46
46
|
"""Maps Avro primitive types to Rust types"""
|
|
47
|
+
# Handle AnyValue (extensible any type) regardless of namespace qualification
|
|
48
|
+
if is_any_value_type(avro_fullname):
|
|
49
|
+
return 'Option<serde_json::Value>' if is_optional else 'serde_json::Value'
|
|
47
50
|
optional_mapping = {
|
|
48
51
|
'null': 'None',
|
|
49
52
|
'boolean': 'Option<bool>',
|
|
@@ -279,15 +282,25 @@ class AvroToRust:
|
|
|
279
282
|
|
|
280
283
|
# Track seen predicates to identify structurally identical variants
|
|
281
284
|
seen_predicates: set = set()
|
|
285
|
+
# Track seen variant names to deduplicate
|
|
286
|
+
seen_names: dict = {}
|
|
282
287
|
union_fields = []
|
|
283
288
|
for i, t in enumerate(union_types):
|
|
284
289
|
predicate = self.get_is_json_match_clause(field_name, t, for_union=True)
|
|
285
290
|
# Mark if this is the first variant with this predicate structure
|
|
286
|
-
# Subsequent variants with same predicate can't be distinguished during JSON deserialization
|
|
287
291
|
is_first_with_predicate = predicate not in seen_predicates
|
|
288
292
|
seen_predicates.add(predicate)
|
|
293
|
+
|
|
294
|
+
# Deduplicate variant names
|
|
295
|
+
variant_name = pascal(t.rsplit('::',1)[-1])
|
|
296
|
+
if variant_name in seen_names:
|
|
297
|
+
seen_names[variant_name] += 1
|
|
298
|
+
variant_name = f"{variant_name}{seen_names[variant_name]}"
|
|
299
|
+
else:
|
|
300
|
+
seen_names[variant_name] = 1
|
|
301
|
+
|
|
289
302
|
union_fields.append({
|
|
290
|
-
'name':
|
|
303
|
+
'name': variant_name,
|
|
291
304
|
'type': t,
|
|
292
305
|
'random_value': self.generate_random_value(t),
|
|
293
306
|
'default_value': 'Default::default()',
|
|
@@ -379,7 +392,7 @@ class AvroToRust:
|
|
|
379
392
|
dependencies = []
|
|
380
393
|
if self.serde_annotation or self.avro_annotation:
|
|
381
394
|
dependencies.append('serde = { version = "1.0", features = ["derive"] }')
|
|
382
|
-
|
|
395
|
+
dependencies.append('serde_json = "1.0"')
|
|
383
396
|
dependencies.append('chrono = { version = "0.4", features = ["serde"] }')
|
|
384
397
|
dependencies.append('uuid = { version = "1.11", features = ["serde", "v4"] }')
|
|
385
398
|
if self.avro_annotation or self.serde_annotation:
|
|
@@ -4,7 +4,7 @@ import json
|
|
|
4
4
|
import os
|
|
5
5
|
from typing import Dict, List, Set, Union
|
|
6
6
|
|
|
7
|
-
from avrotize.common import build_flat_type_dict, fullname, inline_avro_references, is_generic_avro_type, is_type_with_alternate, pascal, process_template, strip_alternate_type
|
|
7
|
+
from avrotize.common import build_flat_type_dict, fullname, inline_avro_references, is_generic_avro_type, is_any_value_type, is_type_with_alternate, pascal, process_template, strip_alternate_type
|
|
8
8
|
from numpy import full
|
|
9
9
|
|
|
10
10
|
|
|
@@ -38,6 +38,9 @@ class AvroToTypeScript:
|
|
|
38
38
|
|
|
39
39
|
def map_primitive_to_typescript(self, avro_type: str) -> str:
|
|
40
40
|
"""Map Avro primitive type to TypeScript type."""
|
|
41
|
+
# Handle AnyValue (extensible any type) regardless of namespace qualification
|
|
42
|
+
if is_any_value_type(avro_type):
|
|
43
|
+
return 'any'
|
|
41
44
|
mapping = {
|
|
42
45
|
'null': 'null',
|
|
43
46
|
'boolean': 'boolean',
|
|
@@ -5,7 +5,7 @@ import xml.etree.ElementTree as ET
|
|
|
5
5
|
from xml.etree.ElementTree import Element, SubElement, tostring
|
|
6
6
|
from xml.dom import minidom
|
|
7
7
|
|
|
8
|
-
from avrotize.common import is_generic_avro_type
|
|
8
|
+
from avrotize.common import is_generic_avro_type, is_any_value_type
|
|
9
9
|
|
|
10
10
|
class AvroToXSD:
|
|
11
11
|
def __init__(self, target_namespace: str = ''):
|
|
@@ -83,6 +83,8 @@ class AvroToXSD:
|
|
|
83
83
|
if isinstance(avro_type, dict) and 'logicalType' in avro_type and 'type' in avro_type:
|
|
84
84
|
return avro_type['type'] in {'int', 'long', 'float', 'double', 'bytes', 'string'}
|
|
85
85
|
elif isinstance(avro_type, str):
|
|
86
|
+
if is_any_value_type(avro_type):
|
|
87
|
+
return True
|
|
86
88
|
return avro_type in {'null', 'boolean', 'int', 'long', 'float', 'double', 'bytes', 'string'}
|
|
87
89
|
else:
|
|
88
90
|
return False
|
|
@@ -60,42 +60,293 @@ def avro_namespace(name):
|
|
|
60
60
|
return val
|
|
61
61
|
|
|
62
62
|
|
|
63
|
-
|
|
63
|
+
ANY_VALUE_RECORD: dict = {
|
|
64
|
+
"type": "record",
|
|
65
|
+
"name": "AnyValue",
|
|
66
|
+
"namespace": "avrotize",
|
|
67
|
+
"doc": "Extensible record placeholder for the 'any' type. Add fields via schema evolution.",
|
|
68
|
+
"fields": []
|
|
69
|
+
}
|
|
70
|
+
"""Avro record definition for the extensible 'any' type. Defined once, referenced by name thereafter."""
|
|
71
|
+
|
|
72
|
+
ANY_VALUE_NAME = "avrotize.AnyValue"
|
|
73
|
+
"""Fully-qualified name reference for the AnyValue record."""
|
|
74
|
+
|
|
75
|
+
ANY_VALUE_NAMESPACE = "avrotize"
|
|
76
|
+
"""Namespace used for all AnyValue record variants."""
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def any_value_name(record_name: str = "", field_name: str = "") -> str:
|
|
80
|
+
"""Generate a qualified AnyValue record name from parent record and field context.
|
|
81
|
+
|
|
82
|
+
Produces a unique name like 'Order_data_AnyValue' so that different
|
|
83
|
+
any-typed fields can evolve their AnyValue records independently.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
record_name: Name of the parent record (e.g., 'Order').
|
|
87
|
+
field_name: Name of the field (e.g., 'data').
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
str: A qualified name like 'Order_data_AnyValue', or just 'AnyValue' if no context.
|
|
91
|
+
"""
|
|
92
|
+
parts = []
|
|
93
|
+
if record_name:
|
|
94
|
+
parts.append(avro_name(record_name))
|
|
95
|
+
if field_name and field_name != record_name:
|
|
96
|
+
parts.append(avro_name(field_name))
|
|
97
|
+
if parts:
|
|
98
|
+
return "_".join(parts) + "_AnyValue"
|
|
99
|
+
return "AnyValue"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def is_any_value_type(avro_type: str) -> bool:
|
|
103
|
+
"""Check if a type name refers to an AnyValue variant (any record in the avrotize namespace)."""
|
|
104
|
+
if not isinstance(avro_type, str):
|
|
105
|
+
return False
|
|
106
|
+
return (avro_type.startswith('avrotize.') or
|
|
107
|
+
avro_type == 'AnyValue' or
|
|
108
|
+
avro_type.endswith('AnyValue'))
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def generic_type(*, define_any_value: bool = True, name: str = "AnyValue") -> list[str | dict]:
|
|
64
112
|
"""
|
|
65
|
-
Constructs a generic Avro type
|
|
113
|
+
Constructs a generic Avro type as a union of all primitive types, an extensible
|
|
114
|
+
empty record, and recursive array/map types.
|
|
115
|
+
|
|
116
|
+
The record is an empty record that can be extended via Avro schema
|
|
117
|
+
evolution (adding fields with defaults). Arrays and maps reference it
|
|
118
|
+
by name, enabling infinite nesting.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
define_any_value: If True (default), includes the full record definition.
|
|
122
|
+
Set to False for subsequent uses in the same schema to avoid redefinition errors.
|
|
123
|
+
name: Name for the extensible record. Defaults to "AnyValue".
|
|
124
|
+
Use a unique name per field (e.g., "PayloadAnyValue") to enable
|
|
125
|
+
independent schema evolution of different any-typed fields.
|
|
66
126
|
|
|
67
127
|
Returns:
|
|
68
|
-
list[str | dict]: A
|
|
69
|
-
"""
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
{
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
128
|
+
list[str | dict]: A union type representing 'any'.
|
|
129
|
+
"""
|
|
130
|
+
fqn = f"{ANY_VALUE_NAMESPACE}.{name}"
|
|
131
|
+
record_def: dict = {
|
|
132
|
+
"type": "record",
|
|
133
|
+
"name": name,
|
|
134
|
+
"namespace": ANY_VALUE_NAMESPACE,
|
|
135
|
+
"doc": "Extensible record placeholder for the 'any' type. Add fields via schema evolution.",
|
|
136
|
+
"fields": []
|
|
137
|
+
}
|
|
138
|
+
any_value_entry: str | dict = record_def if define_any_value else fqn
|
|
139
|
+
# Inner union used in array items and map values — references record by name.
|
|
140
|
+
# Record ref comes AFTER array/map so serializers try map before the empty record.
|
|
141
|
+
inner_union: list[str | dict] = [
|
|
142
|
+
"null", "boolean", "int", "long", "float", "double", "bytes", "string",
|
|
143
|
+
{"type": "array", "items": ["null", "boolean", "int", "long", "float", "double", "bytes", "string", fqn]},
|
|
144
|
+
{"type": "map", "values": ["null", "boolean", "int", "long", "float", "double", "bytes", "string", fqn]},
|
|
145
|
+
fqn
|
|
146
|
+
]
|
|
147
|
+
# Outer union — defines record (must come before array/map for schema parsing),
|
|
148
|
+
# then array/map use inner_union which references it by name
|
|
149
|
+
outer_union: list[str | dict] = [
|
|
150
|
+
"null", "boolean", "int", "long", "float", "double", "bytes", "string",
|
|
151
|
+
any_value_entry,
|
|
152
|
+
{"type": "array", "items": inner_union},
|
|
153
|
+
{"type": "map", "values": inner_union}
|
|
154
|
+
]
|
|
155
|
+
return outer_union
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def deduplicate_any_value_record(schema) -> None:
|
|
159
|
+
"""
|
|
160
|
+
Post-process an Avro schema to ensure each AnyValue variant is defined only once.
|
|
161
|
+
|
|
162
|
+
Handles both the default 'AnyValue' and per-field named variants (any record
|
|
163
|
+
in the 'avrotize' namespace). Each unique name is kept at first occurrence;
|
|
164
|
+
subsequent occurrences are replaced with name references.
|
|
165
|
+
|
|
166
|
+
Also repairs schemas where definitions were lost during union
|
|
167
|
+
merging/flattening by re-inserting definitions at the first reference point.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
schema: The Avro schema (dict, list, or str) to deduplicate in place.
|
|
171
|
+
"""
|
|
172
|
+
import json
|
|
173
|
+
|
|
174
|
+
if not _has_any_value_record(schema):
|
|
175
|
+
# Break aliasing: serialize/deserialize to get independent objects
|
|
176
|
+
if isinstance(schema, list):
|
|
177
|
+
fresh = json.loads(json.dumps(schema))
|
|
178
|
+
schema.clear()
|
|
179
|
+
schema.extend(fresh)
|
|
180
|
+
_repair_missing_definitions(schema)
|
|
181
|
+
return
|
|
182
|
+
|
|
183
|
+
# Track which names have been seen (first definition kept)
|
|
184
|
+
seen_names: set = set()
|
|
185
|
+
_deduplicate_any_value_walk(schema, seen_names)
|
|
186
|
+
|
|
187
|
+
# Break aliasing: serialize/deserialize to get independent objects at each path.
|
|
188
|
+
# This ensures the repair won't accidentally modify shared structures.
|
|
189
|
+
if isinstance(schema, list):
|
|
190
|
+
fresh = json.loads(json.dumps(schema))
|
|
191
|
+
schema.clear()
|
|
192
|
+
schema.extend(fresh)
|
|
193
|
+
elif isinstance(schema, dict):
|
|
194
|
+
fresh = json.loads(json.dumps(schema))
|
|
195
|
+
schema.clear()
|
|
196
|
+
schema.update(fresh)
|
|
197
|
+
|
|
198
|
+
# Repair: find refs without matching defs and re-insert definitions
|
|
199
|
+
_repair_missing_definitions(schema)
|
|
200
|
+
|
|
201
|
+
# Final dedup pass to clean up any duplicates introduced by repair
|
|
202
|
+
seen_names2: set = set()
|
|
203
|
+
_deduplicate_any_value_walk(schema, seen_names2)
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _is_any_value_record_node(node) -> bool:
|
|
207
|
+
"""Check if a dict node is an AnyValue record definition (any record in avrotize namespace)."""
|
|
208
|
+
return (isinstance(node, dict) and
|
|
209
|
+
node.get("type") == "record" and
|
|
210
|
+
node.get("namespace") == ANY_VALUE_NAMESPACE)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _has_any_value_record(node) -> bool:
|
|
214
|
+
"""Check if any AnyValue record definition or reference exists in the schema."""
|
|
215
|
+
if isinstance(node, str):
|
|
216
|
+
return is_any_value_type(node)
|
|
217
|
+
elif isinstance(node, list):
|
|
218
|
+
return any(_has_any_value_record(item) for item in node)
|
|
219
|
+
elif isinstance(node, dict):
|
|
220
|
+
if _is_any_value_record_node(node):
|
|
221
|
+
return True
|
|
222
|
+
return any(_has_any_value_record(v) for v in node.values() if isinstance(v, (list, dict)))
|
|
223
|
+
return False
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _replace_all_any_value_defs(node) -> None:
|
|
227
|
+
"""Replace ALL AnyValue record definitions with name references."""
|
|
228
|
+
if isinstance(node, list):
|
|
229
|
+
for i, item in enumerate(node):
|
|
230
|
+
if _is_any_value_record_node(item):
|
|
231
|
+
node[i] = f"{ANY_VALUE_NAMESPACE}.{item['name']}"
|
|
232
|
+
else:
|
|
233
|
+
_replace_all_any_value_defs(item)
|
|
234
|
+
elif isinstance(node, dict):
|
|
235
|
+
for key, value in node.items():
|
|
236
|
+
if isinstance(value, (list, dict)):
|
|
237
|
+
_replace_all_any_value_defs(value)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _deduplicate_any_value_walk(node, seen_names: set) -> None:
|
|
241
|
+
"""Recursively walk and deduplicate AnyValue record definitions.
|
|
242
|
+
|
|
243
|
+
When replacing an inline definition with a name reference, moves the
|
|
244
|
+
reference to the end of the containing union so that map/array types
|
|
245
|
+
are tried first during serialization union resolution.
|
|
246
|
+
"""
|
|
247
|
+
if isinstance(node, list):
|
|
248
|
+
# Use index-based iteration to safely handle list mutations
|
|
249
|
+
i = 0
|
|
250
|
+
while i < len(node):
|
|
251
|
+
item = node[i]
|
|
252
|
+
if _is_any_value_record_node(item):
|
|
253
|
+
fqn = f"{ANY_VALUE_NAMESPACE}.{item['name']}"
|
|
254
|
+
if fqn in seen_names:
|
|
255
|
+
# Replace with name reference and move to end of the union
|
|
256
|
+
node.pop(i)
|
|
257
|
+
node.append(fqn)
|
|
258
|
+
# Don't increment i - next item shifted into current position
|
|
259
|
+
else:
|
|
260
|
+
seen_names.add(fqn)
|
|
261
|
+
i += 1
|
|
262
|
+
else:
|
|
263
|
+
_deduplicate_any_value_walk(item, seen_names)
|
|
264
|
+
i += 1
|
|
265
|
+
elif isinstance(node, dict):
|
|
266
|
+
for key, value in node.items():
|
|
267
|
+
if isinstance(value, (list, dict)):
|
|
268
|
+
_deduplicate_any_value_walk(value, seen_names)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _repair_missing_definitions(schema) -> None:
|
|
272
|
+
"""Re-insert AnyValue definitions where refs exist but defs were lost during merging.
|
|
273
|
+
|
|
274
|
+
Scans the schema for all AnyValue name references (strings like 'avrotize.XxxAnyValue')
|
|
275
|
+
and all AnyValue record definitions. For any name that has references but no definition,
|
|
276
|
+
replaces the FIRST reference with a full record definition inline.
|
|
277
|
+
"""
|
|
278
|
+
# Collect all defined names and all referenced names
|
|
279
|
+
defined_names: set = set()
|
|
280
|
+
referenced_names: set = set()
|
|
281
|
+
_collect_any_value_names(schema, defined_names, referenced_names)
|
|
282
|
+
|
|
283
|
+
# Find names that are referenced but not defined
|
|
284
|
+
missing = referenced_names - defined_names
|
|
285
|
+
if not missing:
|
|
286
|
+
return
|
|
287
|
+
|
|
288
|
+
# For each missing name, replace the first reference with a definition
|
|
289
|
+
for fqn in missing:
|
|
290
|
+
name = fqn.replace(f"{ANY_VALUE_NAMESPACE}.", "", 1)
|
|
291
|
+
record_def = {
|
|
292
|
+
"type": "record",
|
|
293
|
+
"name": name,
|
|
294
|
+
"namespace": ANY_VALUE_NAMESPACE,
|
|
295
|
+
"doc": "Extensible record placeholder for the 'any' type. Add fields via schema evolution.",
|
|
296
|
+
"fields": []
|
|
297
|
+
}
|
|
298
|
+
_replace_first_ref_with_def(schema, fqn, record_def)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _collect_any_value_names(node, defined: set, referenced: set) -> None:
|
|
302
|
+
"""Collect all AnyValue definition names and reference names in the schema."""
|
|
303
|
+
if isinstance(node, str):
|
|
304
|
+
if node.startswith(f"{ANY_VALUE_NAMESPACE}.") and node.endswith("AnyValue"):
|
|
305
|
+
referenced.add(node)
|
|
306
|
+
elif isinstance(node, list):
|
|
307
|
+
for item in node:
|
|
308
|
+
_collect_any_value_names(item, defined, referenced)
|
|
309
|
+
elif isinstance(node, dict):
|
|
310
|
+
if _is_any_value_record_node(node) and node.get("name", "").endswith("AnyValue"):
|
|
311
|
+
defined.add(f"{ANY_VALUE_NAMESPACE}.{node['name']}")
|
|
312
|
+
for v in node.values():
|
|
313
|
+
if isinstance(v, (list, dict, str)):
|
|
314
|
+
_collect_any_value_names(v, defined, referenced)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def _replace_first_ref_with_def(node, fqn: str, record_def: dict) -> bool:
|
|
318
|
+
"""Replace the first occurrence of a name reference string with a record definition.
|
|
319
|
+
|
|
320
|
+
Returns True if replacement was made, False otherwise.
|
|
321
|
+
"""
|
|
322
|
+
if isinstance(node, list):
|
|
323
|
+
for i, item in enumerate(node):
|
|
324
|
+
if item == fqn:
|
|
325
|
+
node[i] = record_def
|
|
326
|
+
return True
|
|
327
|
+
elif isinstance(item, (list, dict)):
|
|
328
|
+
if _replace_first_ref_with_def(item, fqn, record_def):
|
|
329
|
+
return True
|
|
330
|
+
elif isinstance(node, dict):
|
|
331
|
+
for key, value in node.items():
|
|
332
|
+
if value == fqn:
|
|
333
|
+
node[key] = record_def
|
|
334
|
+
return True
|
|
335
|
+
elif isinstance(value, (list, dict)):
|
|
336
|
+
if _replace_first_ref_with_def(value, fqn, record_def):
|
|
337
|
+
return True
|
|
338
|
+
return False
|
|
339
|
+
|
|
93
340
|
|
|
94
341
|
|
|
95
342
|
def is_generic_avro_type(avro_type: list) -> bool:
|
|
96
343
|
"""
|
|
97
344
|
Check if the given Avro type is a generic type.
|
|
98
345
|
|
|
346
|
+
Recognizes the current AnyValue-based format (with any name in the avrotize
|
|
347
|
+
namespace), the default AnyValue format, and the legacy 2-level nested
|
|
348
|
+
primitives union for backward compatibility.
|
|
349
|
+
|
|
99
350
|
Args:
|
|
100
351
|
avro_type (Union[str, Dict[str, Any]]): The Avro type to check.
|
|
101
352
|
|
|
@@ -104,8 +355,46 @@ def is_generic_avro_type(avro_type: list) -> bool:
|
|
|
104
355
|
"""
|
|
105
356
|
if isinstance(avro_type, str) or isinstance(avro_type, dict):
|
|
106
357
|
return False
|
|
107
|
-
|
|
108
|
-
|
|
358
|
+
# Check current default format (with full definition and with name reference)
|
|
359
|
+
if Compare().check(avro_type, generic_type(define_any_value=True)) == NO_DIFF:
|
|
360
|
+
return True
|
|
361
|
+
if Compare().check(avro_type, generic_type(define_any_value=False)) == NO_DIFF:
|
|
362
|
+
return True
|
|
363
|
+
# Check for per-field named variant: look for any avrotize.* record in the union
|
|
364
|
+
if _is_any_value_union_structure(avro_type):
|
|
365
|
+
return True
|
|
366
|
+
# Check legacy format (2-level nested primitives union without AnyValue)
|
|
367
|
+
if Compare().check(avro_type, _legacy_generic_type()) == NO_DIFF:
|
|
368
|
+
return True
|
|
369
|
+
return False
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _is_any_value_union_structure(avro_type: list) -> bool:
|
|
373
|
+
"""Check if a union has the generic_type structure with any avrotize.* record."""
|
|
374
|
+
# Must have at least 11 elements (8 primitives + record + array + map)
|
|
375
|
+
if len(avro_type) < 11:
|
|
376
|
+
return False
|
|
377
|
+
# Check primitives prefix
|
|
378
|
+
expected_primitives = ["null", "boolean", "int", "long", "float", "double", "bytes", "string"]
|
|
379
|
+
if avro_type[:8] != expected_primitives:
|
|
380
|
+
return False
|
|
381
|
+
# Look for an avrotize namespace record (inline def or name ref) in remaining elements
|
|
382
|
+
for item in avro_type[8:]:
|
|
383
|
+
if isinstance(item, dict) and _is_any_value_record_node(item):
|
|
384
|
+
return True
|
|
385
|
+
if isinstance(item, str) and item.startswith(f"{ANY_VALUE_NAMESPACE}."):
|
|
386
|
+
return True
|
|
387
|
+
return False
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def _legacy_generic_type() -> list[str | dict]:
|
|
391
|
+
"""Construct the legacy generic Avro type (2-level nested primitives without AnyValue)."""
|
|
392
|
+
simple = ["null", "boolean", "int", "long", "float", "double", "bytes", "string"]
|
|
393
|
+
l2 = simple.copy()
|
|
394
|
+
l2.extend([{"type": "array", "items": simple}, {"type": "map", "values": simple}])
|
|
395
|
+
l1 = simple.copy()
|
|
396
|
+
l1.extend([{"type": "array", "items": l2}, {"type": "map", "values": l2}])
|
|
397
|
+
return l1
|
|
109
398
|
|
|
110
399
|
|
|
111
400
|
def is_generic_json_type(json_type: Dict[str, Any] | List[Dict[str, Any] | str] | str) -> bool:
|
|
@@ -12,7 +12,7 @@ import jsonpointer
|
|
|
12
12
|
from jsonpointer import JsonPointerException
|
|
13
13
|
import requests
|
|
14
14
|
|
|
15
|
-
from avrotize.common import avro_name, avro_namespace, find_schema_node, generic_type, set_schema_node
|
|
15
|
+
from avrotize.common import avro_name, avro_namespace, deduplicate_any_value_record, find_schema_node, generic_type, set_schema_node
|
|
16
16
|
from avrotize.dependency_resolver import inline_dependencies_of, sort_messages_by_dependencies
|
|
17
17
|
|
|
18
18
|
primitive_types = ['null', 'string', 'int',
|
|
@@ -2124,6 +2124,8 @@ class JsonToAvroConverter:
|
|
|
2124
2124
|
# drop the file name from the parsed URL to get the base URI
|
|
2125
2125
|
avro_schema = self.jsons_to_avro(
|
|
2126
2126
|
json_schema, namespace, parsed_url.geturl())
|
|
2127
|
+
# Deduplicate AnyValue record definitions (keep only the first one)
|
|
2128
|
+
deduplicate_any_value_record(avro_schema)
|
|
2127
2129
|
if len(avro_schema) == 1:
|
|
2128
2130
|
avro_schema = avro_schema[0]
|
|
2129
2131
|
|
|
@@ -8,6 +8,8 @@ This is the reverse operation of avrotojstruct.py.
|
|
|
8
8
|
import json
|
|
9
9
|
from typing import Any, Dict, List, Union, Optional
|
|
10
10
|
|
|
11
|
+
from avrotize.common import ANY_VALUE_RECORD, any_value_name, deduplicate_any_value_record, generic_type
|
|
12
|
+
|
|
11
13
|
|
|
12
14
|
class JsonStructureToAvro:
|
|
13
15
|
"""
|
|
@@ -54,9 +56,12 @@ class JsonStructureToAvro:
|
|
|
54
56
|
# Filter out abstract types
|
|
55
57
|
concrete_types = [schema for schema in self.converted_types.values()
|
|
56
58
|
if not (schema.get('type') == 'null' and 'Abstract type' in schema.get('doc', ''))]
|
|
57
|
-
|
|
59
|
+
result = [root_schema] + concrete_types if concrete_types else root_schema
|
|
60
|
+
else:
|
|
61
|
+
result = root_schema
|
|
58
62
|
|
|
59
|
-
|
|
63
|
+
deduplicate_any_value_record(result)
|
|
64
|
+
return result
|
|
60
65
|
|
|
61
66
|
if not root_ref:
|
|
62
67
|
raise ValueError("JSON Structure document must have either 'type' or '$root' property")
|
|
@@ -79,10 +84,13 @@ class JsonStructureToAvro:
|
|
|
79
84
|
|
|
80
85
|
# Return single schema or list depending on how many types were defined
|
|
81
86
|
if len(self.converted_types) == 1:
|
|
82
|
-
|
|
87
|
+
result = root_schema
|
|
83
88
|
else:
|
|
84
89
|
# Return all schemas as a list
|
|
85
|
-
|
|
90
|
+
result = list(self.converted_types.values())
|
|
91
|
+
|
|
92
|
+
deduplicate_any_value_record(result)
|
|
93
|
+
return result
|
|
86
94
|
|
|
87
95
|
def _flatten_definitions(self, definitions: Dict[str, Any], prefix: str = '') -> Dict[str, Dict[str, Any]]:
|
|
88
96
|
"""
|
|
@@ -618,7 +626,7 @@ class JsonStructureToAvro:
|
|
|
618
626
|
return avro_record
|
|
619
627
|
|
|
620
628
|
def _convert_any(self, schema: Dict[str, Any], namespace: Optional[str], name: str) -> Dict[str, Any]:
|
|
621
|
-
"""Convert JSON Structure 'any' type to Avro union of all basic types."""
|
|
629
|
+
"""Convert JSON Structure 'any' type to Avro union of all basic types plus extensible record."""
|
|
622
630
|
avro_record: Dict[str, Any] = {
|
|
623
631
|
'type': 'record',
|
|
624
632
|
'name': name
|
|
@@ -632,11 +640,10 @@ class JsonStructureToAvro:
|
|
|
632
640
|
else:
|
|
633
641
|
avro_record['doc'] = 'Any type'
|
|
634
642
|
|
|
635
|
-
#
|
|
636
|
-
# or as a string containing JSON
|
|
643
|
+
# Use the generic_type() which includes primitives, AnyValue record, arrays, and maps
|
|
637
644
|
avro_record['fields'] = [{
|
|
638
645
|
'name': 'value',
|
|
639
|
-
'type':
|
|
646
|
+
'type': generic_type(name=any_value_name(name, 'value'))
|
|
640
647
|
}]
|
|
641
648
|
|
|
642
649
|
return avro_record
|
|
@@ -731,8 +738,8 @@ class JsonStructureToAvro:
|
|
|
731
738
|
|
|
732
739
|
# Handle any types
|
|
733
740
|
if type_value == 'any':
|
|
734
|
-
# Return union of all basic types
|
|
735
|
-
return
|
|
741
|
+
# Return union of all basic types + extensible AnyValue record + arrays/maps
|
|
742
|
+
return generic_type()
|
|
736
743
|
|
|
737
744
|
if type_value == 'array':
|
|
738
745
|
return {
|
|
@@ -284,6 +284,10 @@ class StructureToCpp:
|
|
|
284
284
|
properties = structure_schema.get('properties', {})
|
|
285
285
|
required_props = structure_schema.get('required', [])
|
|
286
286
|
|
|
287
|
+
# Track fields that need string-based JSON serialization
|
|
288
|
+
string_json_fields = [] # (field_name, original_name, field_type, source_type)
|
|
289
|
+
all_field_info = [] # (field_name, original_name, field_type, source_type)
|
|
290
|
+
|
|
287
291
|
for prop_name, prop_schema in properties.items():
|
|
288
292
|
field_name = self.safe_identifier(prop_name)
|
|
289
293
|
is_required = prop_name in required_props if not isinstance(required_props, list) or len(required_props) == 0 or not isinstance(required_props[0], list) else any(prop_name in req_set for req_set in required_props)
|
|
@@ -291,6 +295,12 @@ class StructureToCpp:
|
|
|
291
295
|
# Convert to C++ type
|
|
292
296
|
field_type = self.convert_structure_type_to_cpp(class_name, field_name, prop_schema, schema_namespace, nullable=not is_required)
|
|
293
297
|
|
|
298
|
+
# Get source type
|
|
299
|
+
source_type = prop_schema.get('type', 'string') if isinstance(prop_schema, dict) and isinstance(prop_schema.get('type'), str) else 'object'
|
|
300
|
+
all_field_info.append((field_name, prop_name, field_type, source_type))
|
|
301
|
+
if source_type in ('int64', 'uint64', 'int128', 'uint128', 'decimal'):
|
|
302
|
+
string_json_fields.append((field_name, prop_name, field_type, source_type))
|
|
303
|
+
|
|
294
304
|
# Add documentation
|
|
295
305
|
if 'description' in prop_schema or 'doc' in prop_schema:
|
|
296
306
|
field_doc = prop_schema.get('description', prop_schema.get('doc', ''))
|
|
@@ -310,6 +320,38 @@ class StructureToCpp:
|
|
|
310
320
|
if self.json_annotation:
|
|
311
321
|
class_definition += self.generate_to_json_method(class_name)
|
|
312
322
|
|
|
323
|
+
# Add custom nlohmann to_json/from_json if we have string-serialized numeric fields
|
|
324
|
+
if self.json_annotation and string_json_fields:
|
|
325
|
+
class_definition += f"\n{INDENT}friend void to_json(nlohmann::json& j, const {class_name}& v) {{\n"
|
|
326
|
+
class_definition += f"{INDENT}{INDENT}j = nlohmann::json::object();\n"
|
|
327
|
+
for fname, orig_name, ftype, stype in all_field_info:
|
|
328
|
+
if stype in ('int64', 'uint64', 'int128', 'uint128'):
|
|
329
|
+
if 'optional' in ftype:
|
|
330
|
+
class_definition += f"{INDENT}{INDENT}if (v.{fname}.has_value()) j[\"{orig_name}\"] = std::to_string(v.{fname}.value()); else j[\"{orig_name}\"] = nullptr;\n"
|
|
331
|
+
else:
|
|
332
|
+
class_definition += f"{INDENT}{INDENT}j[\"{orig_name}\"] = std::to_string(v.{fname});\n"
|
|
333
|
+
elif stype == 'decimal':
|
|
334
|
+
# decimal is already std::string in C++, serializes as string naturally
|
|
335
|
+
class_definition += f"{INDENT}{INDENT}j[\"{orig_name}\"] = v.{fname};\n"
|
|
336
|
+
else:
|
|
337
|
+
class_definition += f"{INDENT}{INDENT}j[\"{orig_name}\"] = v.{fname};\n"
|
|
338
|
+
class_definition += f"{INDENT}}}\n"
|
|
339
|
+
|
|
340
|
+
class_definition += f"\n{INDENT}friend void from_json(const nlohmann::json& j, {class_name}& v) {{\n"
|
|
341
|
+
for fname, orig_name, ftype, stype in all_field_info:
|
|
342
|
+
if stype in ('int64', 'uint64', 'int128', 'uint128'):
|
|
343
|
+
cpp_parse = 'std::stoll' if stype in ('int64',) else 'std::stoull' if stype in ('uint64',) else 'std::stoll'
|
|
344
|
+
if 'optional' in ftype:
|
|
345
|
+
class_definition += f"{INDENT}{INDENT}if (j.contains(\"{orig_name}\") && !j[\"{orig_name}\"].is_null()) v.{fname} = {cpp_parse}(j[\"{orig_name}\"].get<std::string>()); else v.{fname} = std::nullopt;\n"
|
|
346
|
+
else:
|
|
347
|
+
class_definition += f"{INDENT}{INDENT}if (j.contains(\"{orig_name}\")) v.{fname} = {cpp_parse}(j[\"{orig_name}\"].get<std::string>());\n"
|
|
348
|
+
else:
|
|
349
|
+
if 'optional' in ftype:
|
|
350
|
+
class_definition += f"{INDENT}{INDENT}if (j.contains(\"{orig_name}\") && !j[\"{orig_name}\"].is_null()) v.{fname} = j[\"{orig_name}\"].get<{ftype.replace('std::optional<', '').rstrip('>')}>();\n"
|
|
351
|
+
else:
|
|
352
|
+
class_definition += f"{INDENT}{INDENT}if (j.contains(\"{orig_name}\")) j.at(\"{orig_name}\").get_to(v.{fname});\n"
|
|
353
|
+
class_definition += f"{INDENT}}}\n"
|
|
354
|
+
|
|
313
355
|
class_definition += "};\n\n"
|
|
314
356
|
|
|
315
357
|
# Create includes
|
|
@@ -571,6 +613,7 @@ class StructureToCpp:
|
|
|
571
613
|
if "std::optional" in definition:
|
|
572
614
|
file.write("#include <optional>\n")
|
|
573
615
|
file.write("#include <stdexcept>\n")
|
|
616
|
+
file.write("#include <string>\n")
|
|
574
617
|
if "std::chrono" in definition:
|
|
575
618
|
file.write("#include <chrono>\n")
|
|
576
619
|
if "boost::uuid" in definition:
|
|
@@ -329,10 +329,12 @@ class StructureToGo:
|
|
|
329
329
|
if not is_required and not field_type.startswith('*') and not field_type.startswith('[') and not field_type.startswith('map[') and field_type != 'interface{}':
|
|
330
330
|
field_type = f'*{field_type}'
|
|
331
331
|
|
|
332
|
+
source_type = prop_schema.get('type', 'string') if isinstance(prop_schema.get('type'), str) else 'object'
|
|
332
333
|
fields.append({
|
|
333
334
|
'name': pascal(prop_name),
|
|
334
335
|
'type': field_type,
|
|
335
|
-
'original_name': prop_name
|
|
336
|
+
'original_name': prop_name,
|
|
337
|
+
'source_type': source_type
|
|
336
338
|
})
|
|
337
339
|
|
|
338
340
|
# Get imports needed
|
|
@@ -435,10 +435,12 @@ class StructureToJava:
|
|
|
435
435
|
prop_type = prop_type[:-1]
|
|
436
436
|
const_value = self.format_const_value(const_val, prop_type)
|
|
437
437
|
|
|
438
|
+
source_type = prop_schema.get('type', 'string') if isinstance(prop_schema.get('type'), str) else 'object'
|
|
438
439
|
return {
|
|
439
440
|
'name': safe_field_name,
|
|
440
441
|
'original_name': prop_name,
|
|
441
442
|
'type': field_type.type_name,
|
|
443
|
+
'source_type': source_type,
|
|
442
444
|
'docstring': doc,
|
|
443
445
|
'is_const': is_const,
|
|
444
446
|
'const_value': const_value
|
|
@@ -324,10 +324,12 @@ class StructureToRust:
|
|
|
324
324
|
|
|
325
325
|
serde_rename = field_name != original_field_name
|
|
326
326
|
|
|
327
|
+
source_type = prop_schema.get('type', 'string') if isinstance(prop_schema.get('type'), str) else 'object'
|
|
327
328
|
fields.append({
|
|
328
329
|
'original_name': original_field_name,
|
|
329
330
|
'name': field_name,
|
|
330
331
|
'type': prop_type,
|
|
332
|
+
'source_type': source_type,
|
|
331
333
|
'serde_rename': serde_rename,
|
|
332
334
|
'random_value': self.generate_random_value(prop_type)
|
|
333
335
|
})
|
|
@@ -342,11 +342,13 @@ class StructureToTypeScript:
|
|
|
342
342
|
is_enum = True
|
|
343
343
|
break
|
|
344
344
|
|
|
345
|
+
source_type = prop_schema.get('type', 'string') if isinstance(prop_schema, dict) and isinstance(prop_schema.get('type'), str) else 'object'
|
|
345
346
|
fields.append({
|
|
346
347
|
'name': self.safe_name(prop_name),
|
|
347
348
|
'original_name': prop_name,
|
|
348
349
|
'type': field_type,
|
|
349
350
|
'type_no_null': field_type_no_null,
|
|
351
|
+
'source_type': source_type,
|
|
350
352
|
'is_required': is_required,
|
|
351
353
|
'is_optional': is_optional,
|
|
352
354
|
'is_primitive': self.is_typescript_primitive(field_type_no_null.replace('[]', '')),
|
|
@@ -8,7 +8,7 @@ from typing import Dict, List, Tuple
|
|
|
8
8
|
import xml.etree.ElementTree as ET
|
|
9
9
|
import json
|
|
10
10
|
from urllib.parse import urlparse
|
|
11
|
-
from avrotize.common import avro_namespace, generic_type
|
|
11
|
+
from avrotize.common import avro_namespace, deduplicate_any_value_record, generic_type
|
|
12
12
|
|
|
13
13
|
from avrotize.dependency_resolver import inline_dependencies_of, sort_messages_by_dependencies
|
|
14
14
|
|
|
@@ -381,6 +381,7 @@ class XSDToAvro:
|
|
|
381
381
|
element, namespaces))
|
|
382
382
|
|
|
383
383
|
avro_schema = sort_messages_by_dependencies(avro_schema)
|
|
384
|
+
deduplicate_any_value_record(avro_schema)
|
|
384
385
|
if len(avro_schema) == 1:
|
|
385
386
|
return avro_schema[0]
|
|
386
387
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: structurize
|
|
3
|
-
Version: 3.5.
|
|
3
|
+
Version: 3.5.9
|
|
4
4
|
Summary: Tools to convert from and to JSON Structure from various other schema languages.
|
|
5
5
|
Author-email: Clemens Vasters <clemensv@microsoft.com>
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{structurize-3.5.8 → structurize-3.5.9}/avrotize/dependencies/typescript/node22/package.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|