structurize 2.21.0__tar.gz → 2.22.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {structurize-2.21.0/structurize.egg-info → structurize-2.22.0}/PKG-INFO +1 -1
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/_version.py +3 -3
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotogo.py +21 -8
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotojava.py +67 -5
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotopython.py +48 -2
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotorust.py +64 -26
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretocsharp.py +42 -11
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretodb.py +21 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretogo.py +38 -10
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretojava.py +114 -8
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretopython.py +100 -19
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretots.py +8 -5
- {structurize-2.21.0 → structurize-2.22.0/structurize.egg-info}/PKG-INFO +1 -1
- {structurize-2.21.0 → structurize-2.22.0}/.gitignore +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/LICENSE +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/MANIFEST.in +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/README.md +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/__init__.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/__main__.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/asn1toavro.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotize.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotocpp.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotocsharp.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotocsv.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotodatapackage.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotodb.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotographql.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotoiceberg.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotojs.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotojsons.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotojstruct.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotokusto.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotomd.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotools.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotoparquet.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotoproto.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotots.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotoxsd.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/cddltostructure.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/commands.json +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/common.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/constants.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/csvtoavro.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/datapackagetoavro.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/dependencies/cpp/vcpkg/vcpkg.json +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/dependencies/typescript/node22/package.json +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/dependency_resolver.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/dependency_version.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/jsonstoavro.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/jsonstostructure.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/jstructtoavro.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/kstructtoavro.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/kustotoavro.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/openapitostructure.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/parquettoavro.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/proto2parser.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/proto3parser.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/prototoavro.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretocddl.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretocpp.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretocsv.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretodatapackage.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretographql.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretoiceberg.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretojs.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretojsons.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretokusto.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretomd.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretoproto.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretorust.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretoxsd.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/avrotize/xsdtoavro.py +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/build.ps1 +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/build.sh +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/pyproject.toml +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/setup.cfg +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/structurize.egg-info/SOURCES.txt +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/structurize.egg-info/dependency_links.txt +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/structurize.egg-info/entry_points.txt +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/structurize.egg-info/requires.txt +0 -0
- {structurize-2.21.0 → structurize-2.22.0}/structurize.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: structurize
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.22.0
|
|
4
4
|
Summary: Tools to convert from and to JSON Structure from various other schema languages.
|
|
5
5
|
Author-email: Clemens Vasters <clemensv@microsoft.com>
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '2.
|
|
32
|
-
__version_tuple__ = version_tuple = (2,
|
|
31
|
+
__version__ = version = '2.22.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (2, 22, 0)
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'g5d3e04df0'
|
|
@@ -10,8 +10,15 @@ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | None
|
|
|
10
10
|
class AvroToGo:
|
|
11
11
|
"""Converts Avro schema to Go structs, including JSON and Avro marshalling methods"""
|
|
12
12
|
|
|
13
|
+
# Go reserved keywords that cannot be used as package names
|
|
14
|
+
GO_RESERVED_WORDS = [
|
|
15
|
+
'break', 'default', 'func', 'interface', 'select', 'case', 'defer', 'go', 'map', 'struct', 'chan',
|
|
16
|
+
'else', 'goto', 'package', 'switch', 'const', 'fallthrough', 'if', 'range', 'type', 'continue', 'for',
|
|
17
|
+
'import', 'return', 'var',
|
|
18
|
+
]
|
|
19
|
+
|
|
13
20
|
def __init__(self, base_package: str = '') -> None:
|
|
14
|
-
self.base_package = base_package
|
|
21
|
+
self.base_package = self._safe_package_name(base_package) if base_package else base_package
|
|
15
22
|
self.output_dir = os.getcwd()
|
|
16
23
|
self.generated_types_avro_namespace: Dict[str, str] = {}
|
|
17
24
|
self.generated_types_go_package: Dict[str, str] = {}
|
|
@@ -25,14 +32,15 @@ class AvroToGo:
|
|
|
25
32
|
self.structs = []
|
|
26
33
|
self.enums = []
|
|
27
34
|
|
|
35
|
+
def _safe_package_name(self, name: str) -> str:
|
|
36
|
+
"""Converts a name to a safe Go package name"""
|
|
37
|
+
if name in self.GO_RESERVED_WORDS:
|
|
38
|
+
return f"{name}_"
|
|
39
|
+
return name
|
|
40
|
+
|
|
28
41
|
def safe_identifier(self, name: str) -> str:
|
|
29
42
|
"""Converts a name to a safe Go identifier"""
|
|
30
|
-
|
|
31
|
-
'break', 'default', 'func', 'interface', 'select', 'case', 'defer', 'go', 'map', 'struct', 'chan',
|
|
32
|
-
'else', 'goto', 'package', 'switch', 'const', 'fallthrough', 'if', 'range', 'type', 'continue', 'for',
|
|
33
|
-
'import', 'return', 'var',
|
|
34
|
-
]
|
|
35
|
-
if name in reserved_words:
|
|
43
|
+
if name in self.GO_RESERVED_WORDS:
|
|
36
44
|
return f"{name}_"
|
|
37
45
|
return name
|
|
38
46
|
|
|
@@ -157,6 +165,10 @@ class AvroToGo:
|
|
|
157
165
|
'original_name': field['name']
|
|
158
166
|
} for field in avro_schema.get('fields', [])]
|
|
159
167
|
|
|
168
|
+
# Collect imports from field types
|
|
169
|
+
go_types = [f['type'] for f in fields]
|
|
170
|
+
imports = self.get_imports_for_definition(go_types)
|
|
171
|
+
|
|
160
172
|
context = {
|
|
161
173
|
'doc': avro_schema.get('doc', ''),
|
|
162
174
|
'struct_name': go_struct_name,
|
|
@@ -166,6 +178,7 @@ class AvroToGo:
|
|
|
166
178
|
'avro_annotation': self.avro_annotation,
|
|
167
179
|
'json_match_predicates': [self.get_is_json_match_clause(f['name'], f['type']) for f in fields],
|
|
168
180
|
'base_package': self.base_package,
|
|
181
|
+
'imports': imports,
|
|
169
182
|
}
|
|
170
183
|
|
|
171
184
|
pkg_dir = os.path.join(self.output_dir, 'pkg', self.base_package)
|
|
@@ -430,7 +443,7 @@ class AvroToGo:
|
|
|
430
443
|
def convert(self, avro_schema_path: str, output_dir: str):
|
|
431
444
|
"""Converts Avro schema to Go"""
|
|
432
445
|
if not self.base_package:
|
|
433
|
-
self.base_package = os.path.splitext(os.path.basename(avro_schema_path))[0]
|
|
446
|
+
self.base_package = self._safe_package_name(os.path.splitext(os.path.basename(avro_schema_path))[0])
|
|
434
447
|
|
|
435
448
|
with open(avro_schema_path, 'r', encoding='utf-8') as file:
|
|
436
449
|
schema = json.load(file)
|
|
@@ -1721,6 +1721,51 @@ class AvroToJava:
|
|
|
1721
1721
|
def get_test_imports(self, fields: List) -> List[str]:
|
|
1722
1722
|
""" Gets the necessary imports for the test class """
|
|
1723
1723
|
imports = []
|
|
1724
|
+
|
|
1725
|
+
# Track simple names to detect conflicts
|
|
1726
|
+
# Map: simple_name -> list of FQNs that have that simple name
|
|
1727
|
+
simple_name_to_fqns: Dict[str, List[str]] = {}
|
|
1728
|
+
|
|
1729
|
+
# First pass: collect all custom type FQNs and their simple names
|
|
1730
|
+
for field in fields:
|
|
1731
|
+
inner_types = []
|
|
1732
|
+
if field.field_type.startswith("List<"):
|
|
1733
|
+
inner_type = field.field_type[5:-1]
|
|
1734
|
+
if inner_type.startswith("Map<"):
|
|
1735
|
+
start = inner_type.index('<') + 1
|
|
1736
|
+
end = inner_type.rindex('>')
|
|
1737
|
+
map_types = inner_type[start:end].split(',')
|
|
1738
|
+
if len(map_types) > 1:
|
|
1739
|
+
inner_types.append(map_types[1].strip())
|
|
1740
|
+
else:
|
|
1741
|
+
inner_types.append(inner_type)
|
|
1742
|
+
elif field.field_type.startswith("Map<"):
|
|
1743
|
+
start = field.field_type.index('<') + 1
|
|
1744
|
+
end = field.field_type.rindex('>')
|
|
1745
|
+
map_types = field.field_type[start:end].split(',')
|
|
1746
|
+
if len(map_types) > 1:
|
|
1747
|
+
inner_types.append(map_types[1].strip())
|
|
1748
|
+
if not field.field_type.startswith(("List<", "Map<")):
|
|
1749
|
+
inner_types.append(field.field_type)
|
|
1750
|
+
if hasattr(field, 'java_type_obj') and field.java_type_obj and field.java_type_obj.union_types:
|
|
1751
|
+
for union_member_type in field.java_type_obj.union_types:
|
|
1752
|
+
inner_types.append(union_member_type.type_name)
|
|
1753
|
+
|
|
1754
|
+
for type_to_check in inner_types:
|
|
1755
|
+
if type_to_check in self.generated_types_java_package and '.' in type_to_check:
|
|
1756
|
+
simple_name = type_to_check.split('.')[-1]
|
|
1757
|
+
if simple_name not in simple_name_to_fqns:
|
|
1758
|
+
simple_name_to_fqns[simple_name] = []
|
|
1759
|
+
if type_to_check not in simple_name_to_fqns[simple_name]:
|
|
1760
|
+
simple_name_to_fqns[simple_name].append(type_to_check)
|
|
1761
|
+
|
|
1762
|
+
# Find conflicting simple names (same simple name, different FQNs)
|
|
1763
|
+
conflicting_fqns: set = set()
|
|
1764
|
+
for simple_name, fqns in simple_name_to_fqns.items():
|
|
1765
|
+
if len(fqns) > 1:
|
|
1766
|
+
# This simple name has conflicts - mark all FQNs as conflicting
|
|
1767
|
+
conflicting_fqns.update(fqns)
|
|
1768
|
+
|
|
1724
1769
|
for field in fields:
|
|
1725
1770
|
# Extract inner types from generic collections
|
|
1726
1771
|
inner_types = []
|
|
@@ -1772,7 +1817,8 @@ class AvroToJava:
|
|
|
1772
1817
|
if type_to_check in self.generated_types_java_package:
|
|
1773
1818
|
type_kind = self.generated_types_java_package[type_to_check]
|
|
1774
1819
|
# Only import if it's a fully qualified name with a package
|
|
1775
|
-
|
|
1820
|
+
# Skip imports for types with conflicting simple names - they'll use FQN
|
|
1821
|
+
if '.' in type_to_check and type_to_check not in conflicting_fqns:
|
|
1776
1822
|
import_stmt = f"import {type_to_check};"
|
|
1777
1823
|
if import_stmt not in imports:
|
|
1778
1824
|
imports.append(import_stmt)
|
|
@@ -1809,10 +1855,11 @@ class AvroToJava:
|
|
|
1809
1855
|
if java_qualified_name:
|
|
1810
1856
|
if java_qualified_name in self.generated_types_java_package or java_qualified_name.split('.')[-1] in self.generated_types_java_package:
|
|
1811
1857
|
member_type_kind = self.generated_types_java_package.get(java_qualified_name, self.generated_types_java_package.get(java_qualified_name.split('.')[-1], None))
|
|
1812
|
-
# Import the class/enum
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
imports
|
|
1858
|
+
# Import the class/enum only if not conflicting
|
|
1859
|
+
if java_qualified_name not in conflicting_fqns:
|
|
1860
|
+
class_import = f"import {java_qualified_name};"
|
|
1861
|
+
if class_import not in imports:
|
|
1862
|
+
imports.append(class_import)
|
|
1816
1863
|
# No longer import test classes - we instantiate classes directly
|
|
1817
1864
|
return imports
|
|
1818
1865
|
|
|
@@ -1920,6 +1967,21 @@ class AvroToJava:
|
|
|
1920
1967
|
'Double': 'Double.valueOf(3.14)',
|
|
1921
1968
|
'byte[]': 'new byte[] { 0x01, 0x02, 0x03 }',
|
|
1922
1969
|
'Object': 'null', # Use null for Object types (Avro unions) to avoid reference equality issues
|
|
1970
|
+
# Java time types - use factory methods, not constructors
|
|
1971
|
+
'Instant': 'java.time.Instant.now()',
|
|
1972
|
+
'java.time.Instant': 'java.time.Instant.now()',
|
|
1973
|
+
'LocalDate': 'java.time.LocalDate.now()',
|
|
1974
|
+
'java.time.LocalDate': 'java.time.LocalDate.now()',
|
|
1975
|
+
'LocalTime': 'java.time.LocalTime.now()',
|
|
1976
|
+
'java.time.LocalTime': 'java.time.LocalTime.now()',
|
|
1977
|
+
'LocalDateTime': 'java.time.LocalDateTime.now()',
|
|
1978
|
+
'java.time.LocalDateTime': 'java.time.LocalDateTime.now()',
|
|
1979
|
+
'Duration': 'java.time.Duration.ofSeconds(42)',
|
|
1980
|
+
'java.time.Duration': 'java.time.Duration.ofSeconds(42)',
|
|
1981
|
+
'UUID': 'java.util.UUID.randomUUID()',
|
|
1982
|
+
'java.util.UUID': 'java.util.UUID.randomUUID()',
|
|
1983
|
+
'BigDecimal': 'new java.math.BigDecimal("42.00")',
|
|
1984
|
+
'java.math.BigDecimal': 'new java.math.BigDecimal("42.00")',
|
|
1923
1985
|
}
|
|
1924
1986
|
|
|
1925
1987
|
# Handle generic types
|
|
@@ -12,6 +12,38 @@ from avrotize.common import fullname, get_typing_args_from_string, is_generic_av
|
|
|
12
12
|
|
|
13
13
|
INDENT = ' '
|
|
14
14
|
|
|
15
|
+
# Python standard library modules that should not be shadowed by package names
|
|
16
|
+
PYTHON_STDLIB_MODULES = {
|
|
17
|
+
'abc', 'aifc', 'argparse', 'array', 'ast', 'asynchat', 'asyncio', 'asyncore',
|
|
18
|
+
'atexit', 'audioop', 'base64', 'bdb', 'binascii', 'binhex', 'bisect', 'builtins',
|
|
19
|
+
'bz2', 'calendar', 'cgi', 'cgitb', 'chunk', 'cmath', 'cmd', 'code', 'codecs',
|
|
20
|
+
'codeop', 'collections', 'colorsys', 'compileall', 'concurrent', 'configparser',
|
|
21
|
+
'contextlib', 'contextvars', 'copy', 'copyreg', 'cProfile', 'crypt', 'csv',
|
|
22
|
+
'ctypes', 'curses', 'dataclasses', 'datetime', 'dbm', 'decimal', 'difflib',
|
|
23
|
+
'dis', 'distutils', 'doctest', 'email', 'encodings', 'enum', 'errno', 'faulthandler',
|
|
24
|
+
'fcntl', 'filecmp', 'fileinput', 'fnmatch', 'fractions', 'ftplib', 'functools',
|
|
25
|
+
'gc', 'getopt', 'getpass', 'gettext', 'glob', 'graphlib', 'grp', 'gzip',
|
|
26
|
+
'hashlib', 'heapq', 'hmac', 'html', 'http', 'imaplib', 'imghdr', 'imp',
|
|
27
|
+
'importlib', 'inspect', 'io', 'ipaddress', 'itertools', 'json', 'keyword',
|
|
28
|
+
'lib2to3', 'linecache', 'locale', 'logging', 'lzma', 'mailbox', 'mailcap',
|
|
29
|
+
'marshal', 'math', 'mimetypes', 'mmap', 'modulefinder', 'multiprocessing',
|
|
30
|
+
'netrc', 'nis', 'nntplib', 'numbers', 'operator', 'optparse', 'os', 'ossaudiodev',
|
|
31
|
+
'pathlib', 'pdb', 'pickle', 'pickletools', 'pipes', 'pkgutil', 'platform',
|
|
32
|
+
'plistlib', 'poplib', 'posix', 'posixpath', 'pprint', 'profile', 'pstats',
|
|
33
|
+
'pty', 'pwd', 'py_compile', 'pyclbr', 'pydoc', 'queue', 'quopri', 'random',
|
|
34
|
+
're', 'readline', 'reprlib', 'resource', 'rlcompleter', 'runpy', 'sched',
|
|
35
|
+
'secrets', 'select', 'selectors', 'shelve', 'shlex', 'shutil', 'signal',
|
|
36
|
+
'site', 'smtpd', 'smtplib', 'sndhdr', 'socket', 'socketserver', 'spwd',
|
|
37
|
+
'sqlite3', 'ssl', 'stat', 'statistics', 'string', 'stringprep', 'struct',
|
|
38
|
+
'subprocess', 'sunau', 'symtable', 'sys', 'sysconfig', 'syslog', 'tabnanny',
|
|
39
|
+
'tarfile', 'telnetlib', 'tempfile', 'termios', 'test', 'textwrap', 'threading',
|
|
40
|
+
'time', 'timeit', 'tkinter', 'token', 'tokenize', 'trace', 'traceback',
|
|
41
|
+
'tracemalloc', 'tty', 'turtle', 'turtledemo', 'types', 'typing', 'unicodedata',
|
|
42
|
+
'unittest', 'urllib', 'uu', 'uuid', 'venv', 'warnings', 'wave', 'weakref',
|
|
43
|
+
'webbrowser', 'winreg', 'winsound', 'wsgiref', 'xdrlib', 'xml', 'xmlrpc',
|
|
44
|
+
'zipapp', 'zipfile', 'zipimport', 'zlib', 'zoneinfo',
|
|
45
|
+
}
|
|
46
|
+
|
|
15
47
|
|
|
16
48
|
def is_python_reserved_word(word: str) -> bool:
|
|
17
49
|
"""Checks if a word is a Python reserved word"""
|
|
@@ -25,6 +57,13 @@ def is_python_reserved_word(word: str) -> bool:
|
|
|
25
57
|
return word in reserved_words
|
|
26
58
|
|
|
27
59
|
|
|
60
|
+
def safe_package_name(name: str) -> str:
|
|
61
|
+
"""Converts a name to a safe Python package name that won't shadow stdlib"""
|
|
62
|
+
if name.lower() in PYTHON_STDLIB_MODULES:
|
|
63
|
+
return f"{name}_types"
|
|
64
|
+
return name
|
|
65
|
+
|
|
66
|
+
|
|
28
67
|
class AvroToPython:
|
|
29
68
|
"""Converts Avro schema to Python data classes"""
|
|
30
69
|
|
|
@@ -167,6 +206,9 @@ class AvroToPython:
|
|
|
167
206
|
enum_ref = self.generate_enum(avro_type, parent_package, write_file=True)
|
|
168
207
|
import_types.add(enum_ref)
|
|
169
208
|
return self.strip_package_from_fully_qualified_name(enum_ref)
|
|
209
|
+
elif avro_type['type'] == 'fixed':
|
|
210
|
+
# Fixed types are represented as bytes in Python
|
|
211
|
+
return 'bytes'
|
|
170
212
|
elif avro_type['type'] == 'array':
|
|
171
213
|
return f"typing.List[{self.convert_avro_type_to_python(avro_type['items'], parent_package, import_types)}]"
|
|
172
214
|
elif avro_type['type'] == 'map':
|
|
@@ -327,7 +369,8 @@ class AvroToPython:
|
|
|
327
369
|
def generate_test_class(self, package_name: str, class_name: str, fields: List[Dict[str, str]], import_types: Set[str]) -> None:
|
|
328
370
|
"""Generates a unit test class for a Python data class"""
|
|
329
371
|
test_class_name = f"Test_{class_name}"
|
|
330
|
-
|
|
372
|
+
flat_package = package_name.replace('.', '_').lower()
|
|
373
|
+
tests_package_name = flat_package if flat_package.startswith('test_') else f"test_{flat_package}"
|
|
331
374
|
test_class_definition = process_template(
|
|
332
375
|
"avrotopython/test_class.jinja",
|
|
333
376
|
package_name=package_name,
|
|
@@ -348,7 +391,8 @@ class AvroToPython:
|
|
|
348
391
|
def generate_test_enum(self, package_name: str, class_name: str, symbols: List[str]) -> None:
|
|
349
392
|
"""Generates a unit test class for a Python enum"""
|
|
350
393
|
test_class_name = f"Test_{class_name}"
|
|
351
|
-
|
|
394
|
+
flat_package = package_name.replace('.', '_').lower()
|
|
395
|
+
tests_package_name = flat_package if flat_package.startswith('test_') else f"test_{flat_package}"
|
|
352
396
|
test_class_definition = process_template(
|
|
353
397
|
"avrotopython/test_enum.jinja",
|
|
354
398
|
package_name=package_name,
|
|
@@ -609,6 +653,7 @@ def convert_avro_to_python(avro_schema_path, py_file_path, package_name='', data
|
|
|
609
653
|
if not package_name:
|
|
610
654
|
package_name = os.path.splitext(os.path.basename(avro_schema_path))[
|
|
611
655
|
0].lower().replace('-', '_')
|
|
656
|
+
package_name = safe_package_name(package_name)
|
|
612
657
|
|
|
613
658
|
avro_to_python = AvroToPython(
|
|
614
659
|
package_name, dataclasses_json_annotation=dataclasses_json_annotation, avro_annotation=avro_annotation)
|
|
@@ -617,6 +662,7 @@ def convert_avro_to_python(avro_schema_path, py_file_path, package_name='', data
|
|
|
617
662
|
|
|
618
663
|
def convert_avro_schema_to_python(avro_schema, py_file_path, package_name='', dataclasses_json_annotation=False, avro_annotation=False):
|
|
619
664
|
"""Converts Avro schema to Python data classes"""
|
|
665
|
+
package_name = safe_package_name(package_name) if package_name else package_name
|
|
620
666
|
avro_to_python = AvroToPython(
|
|
621
667
|
package_name, dataclasses_json_annotation=dataclasses_json_annotation, avro_annotation=avro_annotation)
|
|
622
668
|
if isinstance(avro_schema, dict):
|
|
@@ -144,12 +144,15 @@ class AvroToRust:
|
|
|
144
144
|
field_name = self.safe_identifier(snake(original_field_name))
|
|
145
145
|
field_type = self.convert_avro_type_to_rust(field_name, field['type'], parent_namespace)
|
|
146
146
|
serde_rename = field_name != original_field_name
|
|
147
|
+
# Check if this is a generated type (enum, union, or record) where random values may match default
|
|
148
|
+
is_generated_type = field_type in self.generated_types_rust_package or '::' in field_type
|
|
147
149
|
fields.append({
|
|
148
150
|
'original_name': original_field_name,
|
|
149
151
|
'name': field_name,
|
|
150
152
|
'type': field_type,
|
|
151
153
|
'serde_rename': serde_rename,
|
|
152
|
-
'random_value': self.generate_random_value(field_type)
|
|
154
|
+
'random_value': self.generate_random_value(field_type),
|
|
155
|
+
'is_generated_type': is_generated_type
|
|
153
156
|
})
|
|
154
157
|
|
|
155
158
|
struct_name = self.safe_identifier(pascal(avro_schema['name']))
|
|
@@ -187,28 +190,51 @@ class AvroToRust:
|
|
|
187
190
|
def get_is_json_match_clause(self, field_name: str, field_type: str, for_union=False) -> str:
|
|
188
191
|
"""Generates the is_json_match clause for a field"""
|
|
189
192
|
ref = f'node[\"{field_name}\"]' if not for_union else 'node'
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
return f"{ref}.
|
|
202
|
-
elif
|
|
203
|
-
return f"{ref}.
|
|
204
|
-
elif
|
|
205
|
-
return f"{ref}.
|
|
206
|
-
elif
|
|
207
|
-
return f"{ref}.
|
|
208
|
-
elif
|
|
209
|
-
return f"{ref}.
|
|
193
|
+
|
|
194
|
+
# Check if type is optional - if so, we need to allow null values
|
|
195
|
+
is_optional = field_type.startswith('Option<')
|
|
196
|
+
base_type = field_type[7:-1] if is_optional else field_type
|
|
197
|
+
null_check = f" || {ref}.is_null()" if is_optional else ""
|
|
198
|
+
|
|
199
|
+
# serde_json::Value can be any JSON type, so always return true
|
|
200
|
+
if base_type == 'serde_json::Value':
|
|
201
|
+
return "true"
|
|
202
|
+
|
|
203
|
+
if base_type == 'String':
|
|
204
|
+
return f"({ref}.is_string(){null_check})"
|
|
205
|
+
elif base_type == 'bool':
|
|
206
|
+
return f"({ref}.is_boolean(){null_check})"
|
|
207
|
+
elif base_type == 'i32':
|
|
208
|
+
return f"({ref}.is_i64(){null_check})"
|
|
209
|
+
elif base_type == 'i64':
|
|
210
|
+
return f"({ref}.is_i64(){null_check})"
|
|
211
|
+
elif base_type == 'f32':
|
|
212
|
+
return f"({ref}.is_f64(){null_check})"
|
|
213
|
+
elif base_type == 'f64':
|
|
214
|
+
return f"({ref}.is_f64(){null_check})"
|
|
215
|
+
elif base_type == 'Vec<u8>':
|
|
216
|
+
return f"({ref}.is_array(){null_check})"
|
|
217
|
+
elif base_type == 'std::collections::HashMap<String, String>':
|
|
218
|
+
return f"({ref}.is_object(){null_check})"
|
|
219
|
+
elif base_type.startswith('std::collections::HashMap<String, '):
|
|
220
|
+
return f"({ref}.is_object(){null_check})"
|
|
221
|
+
elif base_type.startswith('Vec<'):
|
|
222
|
+
return f"({ref}.is_array(){null_check})"
|
|
223
|
+
# chrono types - check for string (ISO 8601 format) or number (timestamp)
|
|
224
|
+
elif 'chrono::NaiveDateTime' in base_type or 'NaiveDateTime' in base_type:
|
|
225
|
+
return f"({ref}.is_string() || {ref}.is_i64(){null_check})"
|
|
226
|
+
elif 'chrono::NaiveDate' in base_type or 'NaiveDate' in base_type:
|
|
227
|
+
return f"({ref}.is_string() || {ref}.is_i64(){null_check})"
|
|
228
|
+
elif 'chrono::NaiveTime' in base_type or 'NaiveTime' in base_type:
|
|
229
|
+
return f"({ref}.is_string() || {ref}.is_i64(){null_check})"
|
|
230
|
+
# uuid type - check for string
|
|
231
|
+
elif 'uuid::Uuid' in base_type or 'Uuid' in base_type:
|
|
232
|
+
return f"({ref}.is_string(){null_check})"
|
|
210
233
|
else:
|
|
211
|
-
|
|
234
|
+
# Custom types - call their is_json_match method
|
|
235
|
+
if is_optional:
|
|
236
|
+
return f"({base_type}::is_json_match(&{ref}) || {ref}.is_null())"
|
|
237
|
+
return f"{base_type}::is_json_match(&{ref})"
|
|
212
238
|
|
|
213
239
|
|
|
214
240
|
def generate_enum(self, avro_schema: Dict, parent_namespace: str) -> str:
|
|
@@ -250,17 +276,29 @@ class AvroToRust:
|
|
|
250
276
|
ns = namespace.replace('.', '::').lower()
|
|
251
277
|
union_enum_name = pascal(field_name) + 'Union'
|
|
252
278
|
union_types = [self.convert_avro_type_to_rust(field_name + "Option" + str(i), t, namespace) for i, t in enumerate(avro_type) if t != 'null']
|
|
253
|
-
|
|
254
|
-
|
|
279
|
+
|
|
280
|
+
# Track seen predicates to identify structurally identical variants
|
|
281
|
+
seen_predicates: set = set()
|
|
282
|
+
union_fields = []
|
|
283
|
+
for i, t in enumerate(union_types):
|
|
284
|
+
predicate = self.get_is_json_match_clause(field_name, t, for_union=True)
|
|
285
|
+
# Mark if this is the first variant with this predicate structure
|
|
286
|
+
# Subsequent variants with same predicate can't be distinguished during JSON deserialization
|
|
287
|
+
is_first_with_predicate = predicate not in seen_predicates
|
|
288
|
+
seen_predicates.add(predicate)
|
|
289
|
+
union_fields.append({
|
|
255
290
|
'name': pascal(t.rsplit('::',1)[-1]),
|
|
256
291
|
'type': t,
|
|
257
292
|
'random_value': self.generate_random_value(t),
|
|
258
293
|
'default_value': 'Default::default()',
|
|
259
|
-
'json_match_predicate':
|
|
260
|
-
|
|
294
|
+
'json_match_predicate': predicate,
|
|
295
|
+
'is_first_with_predicate': is_first_with_predicate,
|
|
296
|
+
})
|
|
297
|
+
|
|
261
298
|
qualified_union_enum_name = self.safe_package(self.concat_package(ns, union_enum_name))
|
|
262
299
|
context = {
|
|
263
300
|
'serde_annotation': self.serde_annotation,
|
|
301
|
+
'avro_annotation': self.avro_annotation,
|
|
264
302
|
'union_enum_name': union_enum_name,
|
|
265
303
|
'union_fields': union_fields,
|
|
266
304
|
'json_match_predicates': [self.get_is_json_match_clause(f['name'], f['type'], for_union=True) for f in union_fields]
|
|
@@ -143,6 +143,35 @@ class StructureToCSharp:
|
|
|
143
143
|
]
|
|
144
144
|
return word in reserved_words
|
|
145
145
|
|
|
146
|
+
def safe_identifier(self, name: str, class_name: str = '', fallback_prefix: str = 'field') -> str:
|
|
147
|
+
"""Converts a name to a safe C# identifier.
|
|
148
|
+
|
|
149
|
+
Handles:
|
|
150
|
+
- Reserved words (prepend @)
|
|
151
|
+
- Numeric prefixes (prepend _)
|
|
152
|
+
- Special characters (replace with _)
|
|
153
|
+
- All-special-char names (use fallback_prefix)
|
|
154
|
+
- Class name collision (append _)
|
|
155
|
+
"""
|
|
156
|
+
import re
|
|
157
|
+
# Replace invalid characters with underscores
|
|
158
|
+
safe = re.sub(r'[^a-zA-Z0-9_]', '_', str(name))
|
|
159
|
+
# Remove leading/trailing underscores from sanitization
|
|
160
|
+
safe = safe.strip('_') if safe != name else safe
|
|
161
|
+
# If nothing left after removing special chars, use fallback
|
|
162
|
+
if not safe or not re.match(r'^[a-zA-Z_@]', safe):
|
|
163
|
+
if safe and re.match(r'^[0-9]', safe):
|
|
164
|
+
safe = '_' + safe # Numeric prefix
|
|
165
|
+
else:
|
|
166
|
+
safe = fallback_prefix + '_' + (safe if safe else 'unnamed')
|
|
167
|
+
# Handle reserved words with @ prefix
|
|
168
|
+
if self.is_csharp_reserved_word(safe):
|
|
169
|
+
safe = '@' + safe
|
|
170
|
+
# Handle class name collision
|
|
171
|
+
if class_name and safe == class_name:
|
|
172
|
+
safe = safe + '_'
|
|
173
|
+
return safe
|
|
174
|
+
|
|
146
175
|
def is_csharp_primitive_type(self, csharp_type: str) -> bool:
|
|
147
176
|
""" Checks if a type is a C# primitive type """
|
|
148
177
|
if csharp_type.endswith('?'):
|
|
@@ -416,16 +445,18 @@ class StructureToCSharp:
|
|
|
416
445
|
""" Generates a property for a class """
|
|
417
446
|
property_definition = ''
|
|
418
447
|
|
|
419
|
-
# Resolve property name
|
|
420
|
-
field_name = prop_name
|
|
421
|
-
if self.is_csharp_reserved_word(field_name):
|
|
422
|
-
field_name = f"@{field_name}"
|
|
448
|
+
# Resolve property name using safe_identifier for special chars, numeric prefixes, etc.
|
|
449
|
+
field_name = self.safe_identifier(prop_name, class_name)
|
|
423
450
|
if self.pascal_properties:
|
|
424
|
-
field_name_cs = pascal(field_name)
|
|
451
|
+
field_name_cs = pascal(field_name.lstrip('@'))
|
|
452
|
+
# Re-check for class name collision after pascal casing
|
|
453
|
+
if field_name_cs == class_name:
|
|
454
|
+
field_name_cs += "_"
|
|
425
455
|
else:
|
|
426
456
|
field_name_cs = field_name
|
|
427
|
-
|
|
428
|
-
|
|
457
|
+
|
|
458
|
+
# Track if field name differs from original for JSON annotation
|
|
459
|
+
needs_json_annotation = field_name_cs != prop_name
|
|
429
460
|
|
|
430
461
|
# Check if this is a const field
|
|
431
462
|
if 'const' in prop_schema:
|
|
@@ -442,9 +473,9 @@ class StructureToCSharp:
|
|
|
442
473
|
|
|
443
474
|
# Add JSON property name annotation when property name differs from schema name
|
|
444
475
|
# This is needed for proper JSON serialization/deserialization, especially with pascal_properties
|
|
445
|
-
if
|
|
476
|
+
if needs_json_annotation:
|
|
446
477
|
property_definition += f'{INDENT}[System.Text.Json.Serialization.JsonPropertyName("{prop_name}")]\n'
|
|
447
|
-
if self.newtonsoft_json_annotation and
|
|
478
|
+
if self.newtonsoft_json_annotation and needs_json_annotation:
|
|
448
479
|
property_definition += f'{INDENT}[Newtonsoft.Json.JsonProperty("{prop_name}")]\n'
|
|
449
480
|
|
|
450
481
|
# Add XML element annotation if enabled
|
|
@@ -473,9 +504,9 @@ class StructureToCSharp:
|
|
|
473
504
|
|
|
474
505
|
# Add JSON property name annotation when property name differs from schema name
|
|
475
506
|
# This is needed for proper JSON serialization/deserialization, especially with pascal_properties
|
|
476
|
-
if
|
|
507
|
+
if needs_json_annotation:
|
|
477
508
|
property_definition += f'{INDENT}[System.Text.Json.Serialization.JsonPropertyName("{prop_name}")]\n'
|
|
478
|
-
if self.newtonsoft_json_annotation and
|
|
509
|
+
if self.newtonsoft_json_annotation and needs_json_annotation:
|
|
479
510
|
property_definition += f'{INDENT}[Newtonsoft.Json.JsonProperty("{prop_name}")]\n'
|
|
480
511
|
|
|
481
512
|
# Add XML element annotation if enabled
|
|
@@ -443,6 +443,27 @@ def structure_type_to_sql_type(structure_type: Any, dialect: str) -> str:
|
|
|
443
443
|
struct_type = structure_type.get("type", "string")
|
|
444
444
|
if struct_type in ["array", "set", "map", "object", "choice", "tuple"]:
|
|
445
445
|
return type_map[dialect][struct_type]
|
|
446
|
+
|
|
447
|
+
# Handle string type with maxLength annotation
|
|
448
|
+
if struct_type == "string" and "maxLength" in structure_type:
|
|
449
|
+
max_length = structure_type["maxLength"]
|
|
450
|
+
if dialect == "sqlserver" or dialect == "sqlanywhere":
|
|
451
|
+
return f"NVARCHAR({max_length})"
|
|
452
|
+
elif dialect in ["postgres", "redshift", "db2"]:
|
|
453
|
+
return f"VARCHAR({max_length})"
|
|
454
|
+
elif dialect in ["mysql", "mariadb"]:
|
|
455
|
+
return f"VARCHAR({max_length})"
|
|
456
|
+
elif dialect == "sqlite":
|
|
457
|
+
return f"VARCHAR({max_length})"
|
|
458
|
+
elif dialect == "oracle":
|
|
459
|
+
return f"VARCHAR2({max_length})"
|
|
460
|
+
elif dialect == "bigquery":
|
|
461
|
+
return f"STRING({max_length})"
|
|
462
|
+
elif dialect == "snowflake":
|
|
463
|
+
return f"VARCHAR({max_length})"
|
|
464
|
+
else:
|
|
465
|
+
return f"VARCHAR({max_length})"
|
|
466
|
+
|
|
446
467
|
return structure_type_to_sql_type(struct_type, dialect)
|
|
447
468
|
|
|
448
469
|
return type_map.get(dialect, type_map["postgres"])["string"]
|
|
@@ -16,8 +16,15 @@ INDENT = ' '
|
|
|
16
16
|
class StructureToGo:
|
|
17
17
|
""" Converts JSON Structure schema to Go structs """
|
|
18
18
|
|
|
19
|
+
# Go reserved keywords that cannot be used as package names
|
|
20
|
+
GO_RESERVED_WORDS = [
|
|
21
|
+
'break', 'default', 'func', 'interface', 'select', 'case', 'defer', 'go', 'map', 'struct', 'chan',
|
|
22
|
+
'else', 'goto', 'package', 'switch', 'const', 'fallthrough', 'if', 'range', 'type', 'continue', 'for',
|
|
23
|
+
'import', 'return', 'var',
|
|
24
|
+
]
|
|
25
|
+
|
|
19
26
|
def __init__(self, base_package: str = '') -> None:
|
|
20
|
-
self.base_package = base_package
|
|
27
|
+
self.base_package = self._safe_package_name(base_package) if base_package else base_package
|
|
21
28
|
self.output_dir = os.getcwd()
|
|
22
29
|
self.json_annotation = False
|
|
23
30
|
self.avro_annotation = False
|
|
@@ -31,17 +38,37 @@ class StructureToGo:
|
|
|
31
38
|
self.structs: List[Dict] = []
|
|
32
39
|
self.enums: List[Dict] = []
|
|
33
40
|
|
|
34
|
-
def
|
|
35
|
-
"""Converts a name to a safe Go
|
|
36
|
-
|
|
37
|
-
'break', 'default', 'func', 'interface', 'select', 'case', 'defer', 'go', 'map', 'struct', 'chan',
|
|
38
|
-
'else', 'goto', 'package', 'switch', 'const', 'fallthrough', 'if', 'range', 'type', 'continue', 'for',
|
|
39
|
-
'import', 'return', 'var',
|
|
40
|
-
]
|
|
41
|
-
if name in reserved_words:
|
|
41
|
+
def _safe_package_name(self, name: str) -> str:
|
|
42
|
+
"""Converts a name to a safe Go package name"""
|
|
43
|
+
if name in self.GO_RESERVED_WORDS:
|
|
42
44
|
return f"{name}_"
|
|
43
45
|
return name
|
|
44
46
|
|
|
47
|
+
def safe_identifier(self, name: str, fallback_prefix: str = 'field') -> str:
|
|
48
|
+
"""Converts a name to a safe Go identifier.
|
|
49
|
+
|
|
50
|
+
Handles:
|
|
51
|
+
- Reserved words (append _)
|
|
52
|
+
- Numeric prefixes (prepend _)
|
|
53
|
+
- Special characters (replace with _)
|
|
54
|
+
- All-special-char names (use fallback_prefix)
|
|
55
|
+
"""
|
|
56
|
+
import re
|
|
57
|
+
# Replace invalid characters with underscores
|
|
58
|
+
safe = re.sub(r'[^a-zA-Z0-9_]', '_', str(name))
|
|
59
|
+
# Remove leading/trailing underscores from sanitization
|
|
60
|
+
safe = safe.strip('_') if safe != name else safe
|
|
61
|
+
# If nothing left after removing special chars, use fallback
|
|
62
|
+
if not safe or not re.match(r'^[a-zA-Z_]', safe):
|
|
63
|
+
if safe and re.match(r'^[0-9]', safe):
|
|
64
|
+
safe = '_' + safe # Numeric prefix
|
|
65
|
+
else:
|
|
66
|
+
safe = fallback_prefix + '_' + (safe if safe else 'unnamed')
|
|
67
|
+
# Handle reserved words
|
|
68
|
+
if safe in self.GO_RESERVED_WORDS:
|
|
69
|
+
safe = safe + '_'
|
|
70
|
+
return safe
|
|
71
|
+
|
|
45
72
|
def go_type_name(self, name: str, namespace: str = '') -> str:
|
|
46
73
|
"""Returns a qualified name for a Go struct or enum"""
|
|
47
74
|
if namespace:
|
|
@@ -675,7 +702,8 @@ class StructureToGo:
|
|
|
675
702
|
def convert(self, structure_schema_path: str, output_dir: str):
|
|
676
703
|
"""Converts JSON Structure schema to Go"""
|
|
677
704
|
if not self.base_package:
|
|
678
|
-
|
|
705
|
+
pkg_name = os.path.splitext(os.path.basename(structure_schema_path))[0].replace('-', '_').lower()
|
|
706
|
+
self.base_package = self._safe_package_name(pkg_name)
|
|
679
707
|
|
|
680
708
|
with open(structure_schema_path, 'r', encoding='utf-8') as file:
|
|
681
709
|
schema = json.load(file)
|