structurize 2.21.0__tar.gz → 2.22.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {structurize-2.21.0/structurize.egg-info → structurize-2.22.0}/PKG-INFO +1 -1
  2. {structurize-2.21.0 → structurize-2.22.0}/avrotize/_version.py +3 -3
  3. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotogo.py +21 -8
  4. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotojava.py +67 -5
  5. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotopython.py +48 -2
  6. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotorust.py +64 -26
  7. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretocsharp.py +42 -11
  8. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretodb.py +21 -0
  9. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretogo.py +38 -10
  10. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretojava.py +114 -8
  11. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretopython.py +100 -19
  12. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretots.py +8 -5
  13. {structurize-2.21.0 → structurize-2.22.0/structurize.egg-info}/PKG-INFO +1 -1
  14. {structurize-2.21.0 → structurize-2.22.0}/.gitignore +0 -0
  15. {structurize-2.21.0 → structurize-2.22.0}/LICENSE +0 -0
  16. {structurize-2.21.0 → structurize-2.22.0}/MANIFEST.in +0 -0
  17. {structurize-2.21.0 → structurize-2.22.0}/README.md +0 -0
  18. {structurize-2.21.0 → structurize-2.22.0}/avrotize/__init__.py +0 -0
  19. {structurize-2.21.0 → structurize-2.22.0}/avrotize/__main__.py +0 -0
  20. {structurize-2.21.0 → structurize-2.22.0}/avrotize/asn1toavro.py +0 -0
  21. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotize.py +0 -0
  22. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotocpp.py +0 -0
  23. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotocsharp.py +0 -0
  24. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotocsv.py +0 -0
  25. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotodatapackage.py +0 -0
  26. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotodb.py +0 -0
  27. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotographql.py +0 -0
  28. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotoiceberg.py +0 -0
  29. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotojs.py +0 -0
  30. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotojsons.py +0 -0
  31. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotojstruct.py +0 -0
  32. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotokusto.py +0 -0
  33. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotomd.py +0 -0
  34. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotools.py +0 -0
  35. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotoparquet.py +0 -0
  36. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotoproto.py +0 -0
  37. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotots.py +0 -0
  38. {structurize-2.21.0 → structurize-2.22.0}/avrotize/avrotoxsd.py +0 -0
  39. {structurize-2.21.0 → structurize-2.22.0}/avrotize/cddltostructure.py +0 -0
  40. {structurize-2.21.0 → structurize-2.22.0}/avrotize/commands.json +0 -0
  41. {structurize-2.21.0 → structurize-2.22.0}/avrotize/common.py +0 -0
  42. {structurize-2.21.0 → structurize-2.22.0}/avrotize/constants.py +0 -0
  43. {structurize-2.21.0 → structurize-2.22.0}/avrotize/csvtoavro.py +0 -0
  44. {structurize-2.21.0 → structurize-2.22.0}/avrotize/datapackagetoavro.py +0 -0
  45. {structurize-2.21.0 → structurize-2.22.0}/avrotize/dependencies/cpp/vcpkg/vcpkg.json +0 -0
  46. {structurize-2.21.0 → structurize-2.22.0}/avrotize/dependencies/typescript/node22/package.json +0 -0
  47. {structurize-2.21.0 → structurize-2.22.0}/avrotize/dependency_resolver.py +0 -0
  48. {structurize-2.21.0 → structurize-2.22.0}/avrotize/dependency_version.py +0 -0
  49. {structurize-2.21.0 → structurize-2.22.0}/avrotize/jsonstoavro.py +0 -0
  50. {structurize-2.21.0 → structurize-2.22.0}/avrotize/jsonstostructure.py +0 -0
  51. {structurize-2.21.0 → structurize-2.22.0}/avrotize/jstructtoavro.py +0 -0
  52. {structurize-2.21.0 → structurize-2.22.0}/avrotize/kstructtoavro.py +0 -0
  53. {structurize-2.21.0 → structurize-2.22.0}/avrotize/kustotoavro.py +0 -0
  54. {structurize-2.21.0 → structurize-2.22.0}/avrotize/openapitostructure.py +0 -0
  55. {structurize-2.21.0 → structurize-2.22.0}/avrotize/parquettoavro.py +0 -0
  56. {structurize-2.21.0 → structurize-2.22.0}/avrotize/proto2parser.py +0 -0
  57. {structurize-2.21.0 → structurize-2.22.0}/avrotize/proto3parser.py +0 -0
  58. {structurize-2.21.0 → structurize-2.22.0}/avrotize/prototoavro.py +0 -0
  59. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretocddl.py +0 -0
  60. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretocpp.py +0 -0
  61. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretocsv.py +0 -0
  62. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretodatapackage.py +0 -0
  63. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretographql.py +0 -0
  64. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretoiceberg.py +0 -0
  65. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretojs.py +0 -0
  66. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretojsons.py +0 -0
  67. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretokusto.py +0 -0
  68. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretomd.py +0 -0
  69. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretoproto.py +0 -0
  70. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretorust.py +0 -0
  71. {structurize-2.21.0 → structurize-2.22.0}/avrotize/structuretoxsd.py +0 -0
  72. {structurize-2.21.0 → structurize-2.22.0}/avrotize/xsdtoavro.py +0 -0
  73. {structurize-2.21.0 → structurize-2.22.0}/build.ps1 +0 -0
  74. {structurize-2.21.0 → structurize-2.22.0}/build.sh +0 -0
  75. {structurize-2.21.0 → structurize-2.22.0}/pyproject.toml +0 -0
  76. {structurize-2.21.0 → structurize-2.22.0}/setup.cfg +0 -0
  77. {structurize-2.21.0 → structurize-2.22.0}/structurize.egg-info/SOURCES.txt +0 -0
  78. {structurize-2.21.0 → structurize-2.22.0}/structurize.egg-info/dependency_links.txt +0 -0
  79. {structurize-2.21.0 → structurize-2.22.0}/structurize.egg-info/entry_points.txt +0 -0
  80. {structurize-2.21.0 → structurize-2.22.0}/structurize.egg-info/requires.txt +0 -0
  81. {structurize-2.21.0 → structurize-2.22.0}/structurize.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: structurize
3
- Version: 2.21.0
3
+ Version: 2.22.0
4
4
  Summary: Tools to convert from and to JSON Structure from various other schema languages.
5
5
  Author-email: Clemens Vasters <clemensv@microsoft.com>
6
6
  Classifier: Programming Language :: Python :: 3
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '2.21.0'
32
- __version_tuple__ = version_tuple = (2, 21, 0)
31
+ __version__ = version = '2.22.0'
32
+ __version_tuple__ = version_tuple = (2, 22, 0)
33
33
 
34
- __commit_id__ = commit_id = 'g1df522119'
34
+ __commit_id__ = commit_id = 'g5d3e04df0'
@@ -10,8 +10,15 @@ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | None
10
10
  class AvroToGo:
11
11
  """Converts Avro schema to Go structs, including JSON and Avro marshalling methods"""
12
12
 
13
+ # Go reserved keywords that cannot be used as package names
14
+ GO_RESERVED_WORDS = [
15
+ 'break', 'default', 'func', 'interface', 'select', 'case', 'defer', 'go', 'map', 'struct', 'chan',
16
+ 'else', 'goto', 'package', 'switch', 'const', 'fallthrough', 'if', 'range', 'type', 'continue', 'for',
17
+ 'import', 'return', 'var',
18
+ ]
19
+
13
20
  def __init__(self, base_package: str = '') -> None:
14
- self.base_package = base_package
21
+ self.base_package = self._safe_package_name(base_package) if base_package else base_package
15
22
  self.output_dir = os.getcwd()
16
23
  self.generated_types_avro_namespace: Dict[str, str] = {}
17
24
  self.generated_types_go_package: Dict[str, str] = {}
@@ -25,14 +32,15 @@ class AvroToGo:
25
32
  self.structs = []
26
33
  self.enums = []
27
34
 
35
+ def _safe_package_name(self, name: str) -> str:
36
+ """Converts a name to a safe Go package name"""
37
+ if name in self.GO_RESERVED_WORDS:
38
+ return f"{name}_"
39
+ return name
40
+
28
41
  def safe_identifier(self, name: str) -> str:
29
42
  """Converts a name to a safe Go identifier"""
30
- reserved_words = [
31
- 'break', 'default', 'func', 'interface', 'select', 'case', 'defer', 'go', 'map', 'struct', 'chan',
32
- 'else', 'goto', 'package', 'switch', 'const', 'fallthrough', 'if', 'range', 'type', 'continue', 'for',
33
- 'import', 'return', 'var',
34
- ]
35
- if name in reserved_words:
43
+ if name in self.GO_RESERVED_WORDS:
36
44
  return f"{name}_"
37
45
  return name
38
46
 
@@ -157,6 +165,10 @@ class AvroToGo:
157
165
  'original_name': field['name']
158
166
  } for field in avro_schema.get('fields', [])]
159
167
 
168
+ # Collect imports from field types
169
+ go_types = [f['type'] for f in fields]
170
+ imports = self.get_imports_for_definition(go_types)
171
+
160
172
  context = {
161
173
  'doc': avro_schema.get('doc', ''),
162
174
  'struct_name': go_struct_name,
@@ -166,6 +178,7 @@ class AvroToGo:
166
178
  'avro_annotation': self.avro_annotation,
167
179
  'json_match_predicates': [self.get_is_json_match_clause(f['name'], f['type']) for f in fields],
168
180
  'base_package': self.base_package,
181
+ 'imports': imports,
169
182
  }
170
183
 
171
184
  pkg_dir = os.path.join(self.output_dir, 'pkg', self.base_package)
@@ -430,7 +443,7 @@ class AvroToGo:
430
443
  def convert(self, avro_schema_path: str, output_dir: str):
431
444
  """Converts Avro schema to Go"""
432
445
  if not self.base_package:
433
- self.base_package = os.path.splitext(os.path.basename(avro_schema_path))[0]
446
+ self.base_package = self._safe_package_name(os.path.splitext(os.path.basename(avro_schema_path))[0])
434
447
 
435
448
  with open(avro_schema_path, 'r', encoding='utf-8') as file:
436
449
  schema = json.load(file)
@@ -1721,6 +1721,51 @@ class AvroToJava:
1721
1721
  def get_test_imports(self, fields: List) -> List[str]:
1722
1722
  """ Gets the necessary imports for the test class """
1723
1723
  imports = []
1724
+
1725
+ # Track simple names to detect conflicts
1726
+ # Map: simple_name -> list of FQNs that have that simple name
1727
+ simple_name_to_fqns: Dict[str, List[str]] = {}
1728
+
1729
+ # First pass: collect all custom type FQNs and their simple names
1730
+ for field in fields:
1731
+ inner_types = []
1732
+ if field.field_type.startswith("List<"):
1733
+ inner_type = field.field_type[5:-1]
1734
+ if inner_type.startswith("Map<"):
1735
+ start = inner_type.index('<') + 1
1736
+ end = inner_type.rindex('>')
1737
+ map_types = inner_type[start:end].split(',')
1738
+ if len(map_types) > 1:
1739
+ inner_types.append(map_types[1].strip())
1740
+ else:
1741
+ inner_types.append(inner_type)
1742
+ elif field.field_type.startswith("Map<"):
1743
+ start = field.field_type.index('<') + 1
1744
+ end = field.field_type.rindex('>')
1745
+ map_types = field.field_type[start:end].split(',')
1746
+ if len(map_types) > 1:
1747
+ inner_types.append(map_types[1].strip())
1748
+ if not field.field_type.startswith(("List<", "Map<")):
1749
+ inner_types.append(field.field_type)
1750
+ if hasattr(field, 'java_type_obj') and field.java_type_obj and field.java_type_obj.union_types:
1751
+ for union_member_type in field.java_type_obj.union_types:
1752
+ inner_types.append(union_member_type.type_name)
1753
+
1754
+ for type_to_check in inner_types:
1755
+ if type_to_check in self.generated_types_java_package and '.' in type_to_check:
1756
+ simple_name = type_to_check.split('.')[-1]
1757
+ if simple_name not in simple_name_to_fqns:
1758
+ simple_name_to_fqns[simple_name] = []
1759
+ if type_to_check not in simple_name_to_fqns[simple_name]:
1760
+ simple_name_to_fqns[simple_name].append(type_to_check)
1761
+
1762
+ # Find conflicting simple names (same simple name, different FQNs)
1763
+ conflicting_fqns: set = set()
1764
+ for simple_name, fqns in simple_name_to_fqns.items():
1765
+ if len(fqns) > 1:
1766
+ # This simple name has conflicts - mark all FQNs as conflicting
1767
+ conflicting_fqns.update(fqns)
1768
+
1724
1769
  for field in fields:
1725
1770
  # Extract inner types from generic collections
1726
1771
  inner_types = []
@@ -1772,7 +1817,8 @@ class AvroToJava:
1772
1817
  if type_to_check in self.generated_types_java_package:
1773
1818
  type_kind = self.generated_types_java_package[type_to_check]
1774
1819
  # Only import if it's a fully qualified name with a package
1775
- if '.' in type_to_check:
1820
+ # Skip imports for types with conflicting simple names - they'll use FQN
1821
+ if '.' in type_to_check and type_to_check not in conflicting_fqns:
1776
1822
  import_stmt = f"import {type_to_check};"
1777
1823
  if import_stmt not in imports:
1778
1824
  imports.append(import_stmt)
@@ -1809,10 +1855,11 @@ class AvroToJava:
1809
1855
  if java_qualified_name:
1810
1856
  if java_qualified_name in self.generated_types_java_package or java_qualified_name.split('.')[-1] in self.generated_types_java_package:
1811
1857
  member_type_kind = self.generated_types_java_package.get(java_qualified_name, self.generated_types_java_package.get(java_qualified_name.split('.')[-1], None))
1812
- # Import the class/enum
1813
- class_import = f"import {java_qualified_name};"
1814
- if class_import not in imports:
1815
- imports.append(class_import)
1858
+ # Import the class/enum only if not conflicting
1859
+ if java_qualified_name not in conflicting_fqns:
1860
+ class_import = f"import {java_qualified_name};"
1861
+ if class_import not in imports:
1862
+ imports.append(class_import)
1816
1863
  # No longer import test classes - we instantiate classes directly
1817
1864
  return imports
1818
1865
 
@@ -1920,6 +1967,21 @@ class AvroToJava:
1920
1967
  'Double': 'Double.valueOf(3.14)',
1921
1968
  'byte[]': 'new byte[] { 0x01, 0x02, 0x03 }',
1922
1969
  'Object': 'null', # Use null for Object types (Avro unions) to avoid reference equality issues
1970
+ # Java time types - use factory methods, not constructors
1971
+ 'Instant': 'java.time.Instant.now()',
1972
+ 'java.time.Instant': 'java.time.Instant.now()',
1973
+ 'LocalDate': 'java.time.LocalDate.now()',
1974
+ 'java.time.LocalDate': 'java.time.LocalDate.now()',
1975
+ 'LocalTime': 'java.time.LocalTime.now()',
1976
+ 'java.time.LocalTime': 'java.time.LocalTime.now()',
1977
+ 'LocalDateTime': 'java.time.LocalDateTime.now()',
1978
+ 'java.time.LocalDateTime': 'java.time.LocalDateTime.now()',
1979
+ 'Duration': 'java.time.Duration.ofSeconds(42)',
1980
+ 'java.time.Duration': 'java.time.Duration.ofSeconds(42)',
1981
+ 'UUID': 'java.util.UUID.randomUUID()',
1982
+ 'java.util.UUID': 'java.util.UUID.randomUUID()',
1983
+ 'BigDecimal': 'new java.math.BigDecimal("42.00")',
1984
+ 'java.math.BigDecimal': 'new java.math.BigDecimal("42.00")',
1923
1985
  }
1924
1986
 
1925
1987
  # Handle generic types
@@ -12,6 +12,38 @@ from avrotize.common import fullname, get_typing_args_from_string, is_generic_av
12
12
 
13
13
  INDENT = ' '
14
14
 
15
+ # Python standard library modules that should not be shadowed by package names
16
+ PYTHON_STDLIB_MODULES = {
17
+ 'abc', 'aifc', 'argparse', 'array', 'ast', 'asynchat', 'asyncio', 'asyncore',
18
+ 'atexit', 'audioop', 'base64', 'bdb', 'binascii', 'binhex', 'bisect', 'builtins',
19
+ 'bz2', 'calendar', 'cgi', 'cgitb', 'chunk', 'cmath', 'cmd', 'code', 'codecs',
20
+ 'codeop', 'collections', 'colorsys', 'compileall', 'concurrent', 'configparser',
21
+ 'contextlib', 'contextvars', 'copy', 'copyreg', 'cProfile', 'crypt', 'csv',
22
+ 'ctypes', 'curses', 'dataclasses', 'datetime', 'dbm', 'decimal', 'difflib',
23
+ 'dis', 'distutils', 'doctest', 'email', 'encodings', 'enum', 'errno', 'faulthandler',
24
+ 'fcntl', 'filecmp', 'fileinput', 'fnmatch', 'fractions', 'ftplib', 'functools',
25
+ 'gc', 'getopt', 'getpass', 'gettext', 'glob', 'graphlib', 'grp', 'gzip',
26
+ 'hashlib', 'heapq', 'hmac', 'html', 'http', 'imaplib', 'imghdr', 'imp',
27
+ 'importlib', 'inspect', 'io', 'ipaddress', 'itertools', 'json', 'keyword',
28
+ 'lib2to3', 'linecache', 'locale', 'logging', 'lzma', 'mailbox', 'mailcap',
29
+ 'marshal', 'math', 'mimetypes', 'mmap', 'modulefinder', 'multiprocessing',
30
+ 'netrc', 'nis', 'nntplib', 'numbers', 'operator', 'optparse', 'os', 'ossaudiodev',
31
+ 'pathlib', 'pdb', 'pickle', 'pickletools', 'pipes', 'pkgutil', 'platform',
32
+ 'plistlib', 'poplib', 'posix', 'posixpath', 'pprint', 'profile', 'pstats',
33
+ 'pty', 'pwd', 'py_compile', 'pyclbr', 'pydoc', 'queue', 'quopri', 'random',
34
+ 're', 'readline', 'reprlib', 'resource', 'rlcompleter', 'runpy', 'sched',
35
+ 'secrets', 'select', 'selectors', 'shelve', 'shlex', 'shutil', 'signal',
36
+ 'site', 'smtpd', 'smtplib', 'sndhdr', 'socket', 'socketserver', 'spwd',
37
+ 'sqlite3', 'ssl', 'stat', 'statistics', 'string', 'stringprep', 'struct',
38
+ 'subprocess', 'sunau', 'symtable', 'sys', 'sysconfig', 'syslog', 'tabnanny',
39
+ 'tarfile', 'telnetlib', 'tempfile', 'termios', 'test', 'textwrap', 'threading',
40
+ 'time', 'timeit', 'tkinter', 'token', 'tokenize', 'trace', 'traceback',
41
+ 'tracemalloc', 'tty', 'turtle', 'turtledemo', 'types', 'typing', 'unicodedata',
42
+ 'unittest', 'urllib', 'uu', 'uuid', 'venv', 'warnings', 'wave', 'weakref',
43
+ 'webbrowser', 'winreg', 'winsound', 'wsgiref', 'xdrlib', 'xml', 'xmlrpc',
44
+ 'zipapp', 'zipfile', 'zipimport', 'zlib', 'zoneinfo',
45
+ }
46
+
15
47
 
16
48
  def is_python_reserved_word(word: str) -> bool:
17
49
  """Checks if a word is a Python reserved word"""
@@ -25,6 +57,13 @@ def is_python_reserved_word(word: str) -> bool:
25
57
  return word in reserved_words
26
58
 
27
59
 
60
+ def safe_package_name(name: str) -> str:
61
+ """Converts a name to a safe Python package name that won't shadow stdlib"""
62
+ if name.lower() in PYTHON_STDLIB_MODULES:
63
+ return f"{name}_types"
64
+ return name
65
+
66
+
28
67
  class AvroToPython:
29
68
  """Converts Avro schema to Python data classes"""
30
69
 
@@ -167,6 +206,9 @@ class AvroToPython:
167
206
  enum_ref = self.generate_enum(avro_type, parent_package, write_file=True)
168
207
  import_types.add(enum_ref)
169
208
  return self.strip_package_from_fully_qualified_name(enum_ref)
209
+ elif avro_type['type'] == 'fixed':
210
+ # Fixed types are represented as bytes in Python
211
+ return 'bytes'
170
212
  elif avro_type['type'] == 'array':
171
213
  return f"typing.List[{self.convert_avro_type_to_python(avro_type['items'], parent_package, import_types)}]"
172
214
  elif avro_type['type'] == 'map':
@@ -327,7 +369,8 @@ class AvroToPython:
327
369
  def generate_test_class(self, package_name: str, class_name: str, fields: List[Dict[str, str]], import_types: Set[str]) -> None:
328
370
  """Generates a unit test class for a Python data class"""
329
371
  test_class_name = f"Test_{class_name}"
330
- tests_package_name = "test_"+package_name.replace('.', '_').lower()
372
+ flat_package = package_name.replace('.', '_').lower()
373
+ tests_package_name = flat_package if flat_package.startswith('test_') else f"test_{flat_package}"
331
374
  test_class_definition = process_template(
332
375
  "avrotopython/test_class.jinja",
333
376
  package_name=package_name,
@@ -348,7 +391,8 @@ class AvroToPython:
348
391
  def generate_test_enum(self, package_name: str, class_name: str, symbols: List[str]) -> None:
349
392
  """Generates a unit test class for a Python enum"""
350
393
  test_class_name = f"Test_{class_name}"
351
- tests_package_name = "test_"+package_name.replace('.', '_').lower()
394
+ flat_package = package_name.replace('.', '_').lower()
395
+ tests_package_name = flat_package if flat_package.startswith('test_') else f"test_{flat_package}"
352
396
  test_class_definition = process_template(
353
397
  "avrotopython/test_enum.jinja",
354
398
  package_name=package_name,
@@ -609,6 +653,7 @@ def convert_avro_to_python(avro_schema_path, py_file_path, package_name='', data
609
653
  if not package_name:
610
654
  package_name = os.path.splitext(os.path.basename(avro_schema_path))[
611
655
  0].lower().replace('-', '_')
656
+ package_name = safe_package_name(package_name)
612
657
 
613
658
  avro_to_python = AvroToPython(
614
659
  package_name, dataclasses_json_annotation=dataclasses_json_annotation, avro_annotation=avro_annotation)
@@ -617,6 +662,7 @@ def convert_avro_to_python(avro_schema_path, py_file_path, package_name='', data
617
662
 
618
663
  def convert_avro_schema_to_python(avro_schema, py_file_path, package_name='', dataclasses_json_annotation=False, avro_annotation=False):
619
664
  """Converts Avro schema to Python data classes"""
665
+ package_name = safe_package_name(package_name) if package_name else package_name
620
666
  avro_to_python = AvroToPython(
621
667
  package_name, dataclasses_json_annotation=dataclasses_json_annotation, avro_annotation=avro_annotation)
622
668
  if isinstance(avro_schema, dict):
@@ -144,12 +144,15 @@ class AvroToRust:
144
144
  field_name = self.safe_identifier(snake(original_field_name))
145
145
  field_type = self.convert_avro_type_to_rust(field_name, field['type'], parent_namespace)
146
146
  serde_rename = field_name != original_field_name
147
+ # Check if this is a generated type (enum, union, or record) where random values may match default
148
+ is_generated_type = field_type in self.generated_types_rust_package or '::' in field_type
147
149
  fields.append({
148
150
  'original_name': original_field_name,
149
151
  'name': field_name,
150
152
  'type': field_type,
151
153
  'serde_rename': serde_rename,
152
- 'random_value': self.generate_random_value(field_type)
154
+ 'random_value': self.generate_random_value(field_type),
155
+ 'is_generated_type': is_generated_type
153
156
  })
154
157
 
155
158
  struct_name = self.safe_identifier(pascal(avro_schema['name']))
@@ -187,28 +190,51 @@ class AvroToRust:
187
190
  def get_is_json_match_clause(self, field_name: str, field_type: str, for_union=False) -> str:
188
191
  """Generates the is_json_match clause for a field"""
189
192
  ref = f'node[\"{field_name}\"]' if not for_union else 'node'
190
- if field_type == 'String' or field_type == 'Option<String>':
191
- return f"{ref}.is_string()"
192
- elif field_type == 'bool' or field_type == 'Option<bool>':
193
- return f"{ref}.is_boolean()"
194
- elif field_type == 'i32' or field_type == 'Option<i32>':
195
- return f"{ref}.is_i64()"
196
- elif field_type == 'i64' or field_type == 'Option<i64>':
197
- return f"{ref}.is_i64()"
198
- elif field_type == 'f32' or field_type == 'Option<f32>':
199
- return f"{ref}.is_f64()"
200
- elif field_type == 'f64' or field_type == 'Option<f64>':
201
- return f"{ref}.is_f64()"
202
- elif field_type == 'Vec<u8>' or field_type == 'Option<Vec<u8>>':
203
- return f"{ref}.is_array()"
204
- elif field_type == 'serde_json::Value' or field_type == 'std::collections::HashMap<String, String>':
205
- return f"{ref}.is_object()"
206
- elif field_type.startswith('std::collections::HashMap<String, '):
207
- return f"{ref}.is_object()"
208
- elif field_type.startswith('Vec<'):
209
- return f"{ref}.is_array()"
193
+
194
+ # Check if type is optional - if so, we need to allow null values
195
+ is_optional = field_type.startswith('Option<')
196
+ base_type = field_type[7:-1] if is_optional else field_type
197
+ null_check = f" || {ref}.is_null()" if is_optional else ""
198
+
199
+ # serde_json::Value can be any JSON type, so always return true
200
+ if base_type == 'serde_json::Value':
201
+ return "true"
202
+
203
+ if base_type == 'String':
204
+ return f"({ref}.is_string(){null_check})"
205
+ elif base_type == 'bool':
206
+ return f"({ref}.is_boolean(){null_check})"
207
+ elif base_type == 'i32':
208
+ return f"({ref}.is_i64(){null_check})"
209
+ elif base_type == 'i64':
210
+ return f"({ref}.is_i64(){null_check})"
211
+ elif base_type == 'f32':
212
+ return f"({ref}.is_f64(){null_check})"
213
+ elif base_type == 'f64':
214
+ return f"({ref}.is_f64(){null_check})"
215
+ elif base_type == 'Vec<u8>':
216
+ return f"({ref}.is_array(){null_check})"
217
+ elif base_type == 'std::collections::HashMap<String, String>':
218
+ return f"({ref}.is_object(){null_check})"
219
+ elif base_type.startswith('std::collections::HashMap<String, '):
220
+ return f"({ref}.is_object(){null_check})"
221
+ elif base_type.startswith('Vec<'):
222
+ return f"({ref}.is_array(){null_check})"
223
+ # chrono types - check for string (ISO 8601 format) or number (timestamp)
224
+ elif 'chrono::NaiveDateTime' in base_type or 'NaiveDateTime' in base_type:
225
+ return f"({ref}.is_string() || {ref}.is_i64(){null_check})"
226
+ elif 'chrono::NaiveDate' in base_type or 'NaiveDate' in base_type:
227
+ return f"({ref}.is_string() || {ref}.is_i64(){null_check})"
228
+ elif 'chrono::NaiveTime' in base_type or 'NaiveTime' in base_type:
229
+ return f"({ref}.is_string() || {ref}.is_i64(){null_check})"
230
+ # uuid type - check for string
231
+ elif 'uuid::Uuid' in base_type or 'Uuid' in base_type:
232
+ return f"({ref}.is_string(){null_check})"
210
233
  else:
211
- return f"{field_type}::is_json_match(&{ref})"
234
+ # Custom types - call their is_json_match method
235
+ if is_optional:
236
+ return f"({base_type}::is_json_match(&{ref}) || {ref}.is_null())"
237
+ return f"{base_type}::is_json_match(&{ref})"
212
238
 
213
239
 
214
240
  def generate_enum(self, avro_schema: Dict, parent_namespace: str) -> str:
@@ -250,17 +276,29 @@ class AvroToRust:
250
276
  ns = namespace.replace('.', '::').lower()
251
277
  union_enum_name = pascal(field_name) + 'Union'
252
278
  union_types = [self.convert_avro_type_to_rust(field_name + "Option" + str(i), t, namespace) for i, t in enumerate(avro_type) if t != 'null']
253
- union_fields = [
254
- {
279
+
280
+ # Track seen predicates to identify structurally identical variants
281
+ seen_predicates: set = set()
282
+ union_fields = []
283
+ for i, t in enumerate(union_types):
284
+ predicate = self.get_is_json_match_clause(field_name, t, for_union=True)
285
+ # Mark if this is the first variant with this predicate structure
286
+ # Subsequent variants with same predicate can't be distinguished during JSON deserialization
287
+ is_first_with_predicate = predicate not in seen_predicates
288
+ seen_predicates.add(predicate)
289
+ union_fields.append({
255
290
  'name': pascal(t.rsplit('::',1)[-1]),
256
291
  'type': t,
257
292
  'random_value': self.generate_random_value(t),
258
293
  'default_value': 'Default::default()',
259
- 'json_match_predicate': self.get_is_json_match_clause(field_name, t, for_union=True),
260
- } for i, t in enumerate(union_types)]
294
+ 'json_match_predicate': predicate,
295
+ 'is_first_with_predicate': is_first_with_predicate,
296
+ })
297
+
261
298
  qualified_union_enum_name = self.safe_package(self.concat_package(ns, union_enum_name))
262
299
  context = {
263
300
  'serde_annotation': self.serde_annotation,
301
+ 'avro_annotation': self.avro_annotation,
264
302
  'union_enum_name': union_enum_name,
265
303
  'union_fields': union_fields,
266
304
  'json_match_predicates': [self.get_is_json_match_clause(f['name'], f['type'], for_union=True) for f in union_fields]
@@ -143,6 +143,35 @@ class StructureToCSharp:
143
143
  ]
144
144
  return word in reserved_words
145
145
 
146
+ def safe_identifier(self, name: str, class_name: str = '', fallback_prefix: str = 'field') -> str:
147
+ """Converts a name to a safe C# identifier.
148
+
149
+ Handles:
150
+ - Reserved words (prepend @)
151
+ - Numeric prefixes (prepend _)
152
+ - Special characters (replace with _)
153
+ - All-special-char names (use fallback_prefix)
154
+ - Class name collision (append _)
155
+ """
156
+ import re
157
+ # Replace invalid characters with underscores
158
+ safe = re.sub(r'[^a-zA-Z0-9_]', '_', str(name))
159
+ # Remove leading/trailing underscores from sanitization
160
+ safe = safe.strip('_') if safe != name else safe
161
+ # If nothing left after removing special chars, use fallback
162
+ if not safe or not re.match(r'^[a-zA-Z_@]', safe):
163
+ if safe and re.match(r'^[0-9]', safe):
164
+ safe = '_' + safe # Numeric prefix
165
+ else:
166
+ safe = fallback_prefix + '_' + (safe if safe else 'unnamed')
167
+ # Handle reserved words with @ prefix
168
+ if self.is_csharp_reserved_word(safe):
169
+ safe = '@' + safe
170
+ # Handle class name collision
171
+ if class_name and safe == class_name:
172
+ safe = safe + '_'
173
+ return safe
174
+
146
175
  def is_csharp_primitive_type(self, csharp_type: str) -> bool:
147
176
  """ Checks if a type is a C# primitive type """
148
177
  if csharp_type.endswith('?'):
@@ -416,16 +445,18 @@ class StructureToCSharp:
416
445
  """ Generates a property for a class """
417
446
  property_definition = ''
418
447
 
419
- # Resolve property name
420
- field_name = prop_name
421
- if self.is_csharp_reserved_word(field_name):
422
- field_name = f"@{field_name}"
448
+ # Resolve property name using safe_identifier for special chars, numeric prefixes, etc.
449
+ field_name = self.safe_identifier(prop_name, class_name)
423
450
  if self.pascal_properties:
424
- field_name_cs = pascal(field_name)
451
+ field_name_cs = pascal(field_name.lstrip('@'))
452
+ # Re-check for class name collision after pascal casing
453
+ if field_name_cs == class_name:
454
+ field_name_cs += "_"
425
455
  else:
426
456
  field_name_cs = field_name
427
- if field_name_cs == class_name:
428
- field_name_cs += "_"
457
+
458
+ # Track if field name differs from original for JSON annotation
459
+ needs_json_annotation = field_name_cs != prop_name
429
460
 
430
461
  # Check if this is a const field
431
462
  if 'const' in prop_schema:
@@ -442,9 +473,9 @@ class StructureToCSharp:
442
473
 
443
474
  # Add JSON property name annotation when property name differs from schema name
444
475
  # This is needed for proper JSON serialization/deserialization, especially with pascal_properties
445
- if field_name != field_name_cs:
476
+ if needs_json_annotation:
446
477
  property_definition += f'{INDENT}[System.Text.Json.Serialization.JsonPropertyName("{prop_name}")]\n'
447
- if self.newtonsoft_json_annotation and field_name != field_name_cs:
478
+ if self.newtonsoft_json_annotation and needs_json_annotation:
448
479
  property_definition += f'{INDENT}[Newtonsoft.Json.JsonProperty("{prop_name}")]\n'
449
480
 
450
481
  # Add XML element annotation if enabled
@@ -473,9 +504,9 @@ class StructureToCSharp:
473
504
 
474
505
  # Add JSON property name annotation when property name differs from schema name
475
506
  # This is needed for proper JSON serialization/deserialization, especially with pascal_properties
476
- if field_name != field_name_cs:
507
+ if needs_json_annotation:
477
508
  property_definition += f'{INDENT}[System.Text.Json.Serialization.JsonPropertyName("{prop_name}")]\n'
478
- if self.newtonsoft_json_annotation and field_name != field_name_cs:
509
+ if self.newtonsoft_json_annotation and needs_json_annotation:
479
510
  property_definition += f'{INDENT}[Newtonsoft.Json.JsonProperty("{prop_name}")]\n'
480
511
 
481
512
  # Add XML element annotation if enabled
@@ -443,6 +443,27 @@ def structure_type_to_sql_type(structure_type: Any, dialect: str) -> str:
443
443
  struct_type = structure_type.get("type", "string")
444
444
  if struct_type in ["array", "set", "map", "object", "choice", "tuple"]:
445
445
  return type_map[dialect][struct_type]
446
+
447
+ # Handle string type with maxLength annotation
448
+ if struct_type == "string" and "maxLength" in structure_type:
449
+ max_length = structure_type["maxLength"]
450
+ if dialect == "sqlserver" or dialect == "sqlanywhere":
451
+ return f"NVARCHAR({max_length})"
452
+ elif dialect in ["postgres", "redshift", "db2"]:
453
+ return f"VARCHAR({max_length})"
454
+ elif dialect in ["mysql", "mariadb"]:
455
+ return f"VARCHAR({max_length})"
456
+ elif dialect == "sqlite":
457
+ return f"VARCHAR({max_length})"
458
+ elif dialect == "oracle":
459
+ return f"VARCHAR2({max_length})"
460
+ elif dialect == "bigquery":
461
+ return f"STRING({max_length})"
462
+ elif dialect == "snowflake":
463
+ return f"VARCHAR({max_length})"
464
+ else:
465
+ return f"VARCHAR({max_length})"
466
+
446
467
  return structure_type_to_sql_type(struct_type, dialect)
447
468
 
448
469
  return type_map.get(dialect, type_map["postgres"])["string"]
@@ -16,8 +16,15 @@ INDENT = ' '
16
16
  class StructureToGo:
17
17
  """ Converts JSON Structure schema to Go structs """
18
18
 
19
+ # Go reserved keywords that cannot be used as package names
20
+ GO_RESERVED_WORDS = [
21
+ 'break', 'default', 'func', 'interface', 'select', 'case', 'defer', 'go', 'map', 'struct', 'chan',
22
+ 'else', 'goto', 'package', 'switch', 'const', 'fallthrough', 'if', 'range', 'type', 'continue', 'for',
23
+ 'import', 'return', 'var',
24
+ ]
25
+
19
26
  def __init__(self, base_package: str = '') -> None:
20
- self.base_package = base_package
27
+ self.base_package = self._safe_package_name(base_package) if base_package else base_package
21
28
  self.output_dir = os.getcwd()
22
29
  self.json_annotation = False
23
30
  self.avro_annotation = False
@@ -31,17 +38,37 @@ class StructureToGo:
31
38
  self.structs: List[Dict] = []
32
39
  self.enums: List[Dict] = []
33
40
 
34
- def safe_identifier(self, name: str) -> str:
35
- """Converts a name to a safe Go identifier"""
36
- reserved_words = [
37
- 'break', 'default', 'func', 'interface', 'select', 'case', 'defer', 'go', 'map', 'struct', 'chan',
38
- 'else', 'goto', 'package', 'switch', 'const', 'fallthrough', 'if', 'range', 'type', 'continue', 'for',
39
- 'import', 'return', 'var',
40
- ]
41
- if name in reserved_words:
41
+ def _safe_package_name(self, name: str) -> str:
42
+ """Converts a name to a safe Go package name"""
43
+ if name in self.GO_RESERVED_WORDS:
42
44
  return f"{name}_"
43
45
  return name
44
46
 
47
+ def safe_identifier(self, name: str, fallback_prefix: str = 'field') -> str:
48
+ """Converts a name to a safe Go identifier.
49
+
50
+ Handles:
51
+ - Reserved words (append _)
52
+ - Numeric prefixes (prepend _)
53
+ - Special characters (replace with _)
54
+ - All-special-char names (use fallback_prefix)
55
+ """
56
+ import re
57
+ # Replace invalid characters with underscores
58
+ safe = re.sub(r'[^a-zA-Z0-9_]', '_', str(name))
59
+ # Remove leading/trailing underscores from sanitization
60
+ safe = safe.strip('_') if safe != name else safe
61
+ # If nothing left after removing special chars, use fallback
62
+ if not safe or not re.match(r'^[a-zA-Z_]', safe):
63
+ if safe and re.match(r'^[0-9]', safe):
64
+ safe = '_' + safe # Numeric prefix
65
+ else:
66
+ safe = fallback_prefix + '_' + (safe if safe else 'unnamed')
67
+ # Handle reserved words
68
+ if safe in self.GO_RESERVED_WORDS:
69
+ safe = safe + '_'
70
+ return safe
71
+
45
72
  def go_type_name(self, name: str, namespace: str = '') -> str:
46
73
  """Returns a qualified name for a Go struct or enum"""
47
74
  if namespace:
@@ -675,7 +702,8 @@ class StructureToGo:
675
702
  def convert(self, structure_schema_path: str, output_dir: str):
676
703
  """Converts JSON Structure schema to Go"""
677
704
  if not self.base_package:
678
- self.base_package = os.path.splitext(os.path.basename(structure_schema_path))[0].replace('-', '_').lower()
705
+ pkg_name = os.path.splitext(os.path.basename(structure_schema_path))[0].replace('-', '_').lower()
706
+ self.base_package = self._safe_package_name(pkg_name)
679
707
 
680
708
  with open(structure_schema_path, 'r', encoding='utf-8') as file:
681
709
  schema = json.load(file)