structurize 2.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. avrotize/__init__.py +64 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp.py +483 -0
  7. avrotize/avrotocsharp.py +1075 -0
  8. avrotize/avrotocsv.py +121 -0
  9. avrotize/avrotodatapackage.py +173 -0
  10. avrotize/avrotodb.py +1383 -0
  11. avrotize/avrotogo.py +476 -0
  12. avrotize/avrotographql.py +197 -0
  13. avrotize/avrotoiceberg.py +210 -0
  14. avrotize/avrotojava.py +2156 -0
  15. avrotize/avrotojs.py +250 -0
  16. avrotize/avrotojsons.py +481 -0
  17. avrotize/avrotojstruct.py +345 -0
  18. avrotize/avrotokusto.py +364 -0
  19. avrotize/avrotomd.py +137 -0
  20. avrotize/avrotools.py +168 -0
  21. avrotize/avrotoparquet.py +208 -0
  22. avrotize/avrotoproto.py +359 -0
  23. avrotize/avrotopython.py +624 -0
  24. avrotize/avrotorust.py +435 -0
  25. avrotize/avrotots.py +598 -0
  26. avrotize/avrotoxsd.py +344 -0
  27. avrotize/cddltostructure.py +1841 -0
  28. avrotize/commands.json +3337 -0
  29. avrotize/common.py +834 -0
  30. avrotize/constants.py +72 -0
  31. avrotize/csvtoavro.py +132 -0
  32. avrotize/datapackagetoavro.py +76 -0
  33. avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
  34. avrotize/dependencies/typescript/node22/package.json +16 -0
  35. avrotize/dependency_resolver.py +348 -0
  36. avrotize/dependency_version.py +432 -0
  37. avrotize/jsonstoavro.py +2167 -0
  38. avrotize/jsonstostructure.py +2642 -0
  39. avrotize/jstructtoavro.py +878 -0
  40. avrotize/kstructtoavro.py +93 -0
  41. avrotize/kustotoavro.py +455 -0
  42. avrotize/parquettoavro.py +157 -0
  43. avrotize/proto2parser.py +498 -0
  44. avrotize/proto3parser.py +403 -0
  45. avrotize/prototoavro.py +382 -0
  46. avrotize/structuretocddl.py +597 -0
  47. avrotize/structuretocpp.py +697 -0
  48. avrotize/structuretocsharp.py +2295 -0
  49. avrotize/structuretocsv.py +365 -0
  50. avrotize/structuretodatapackage.py +659 -0
  51. avrotize/structuretodb.py +1125 -0
  52. avrotize/structuretogo.py +720 -0
  53. avrotize/structuretographql.py +502 -0
  54. avrotize/structuretoiceberg.py +355 -0
  55. avrotize/structuretojava.py +853 -0
  56. avrotize/structuretojsons.py +498 -0
  57. avrotize/structuretokusto.py +639 -0
  58. avrotize/structuretomd.py +322 -0
  59. avrotize/structuretoproto.py +764 -0
  60. avrotize/structuretopython.py +772 -0
  61. avrotize/structuretorust.py +714 -0
  62. avrotize/structuretots.py +653 -0
  63. avrotize/structuretoxsd.py +679 -0
  64. avrotize/xsdtoavro.py +413 -0
  65. structurize-2.19.0.dist-info/METADATA +107 -0
  66. structurize-2.19.0.dist-info/RECORD +70 -0
  67. structurize-2.19.0.dist-info/WHEEL +5 -0
  68. structurize-2.19.0.dist-info/entry_points.txt +2 -0
  69. structurize-2.19.0.dist-info/licenses/LICENSE +201 -0
  70. structurize-2.19.0.dist-info/top_level.txt +1 -0
avrotize/avrotojava.py ADDED
@@ -0,0 +1,2156 @@
1
+ # pylint: disable=too-many-arguments, too-many-locals, too-many-branches, too-many-statements, line-too-long
2
+
3
+ """ Generates Java classes from Avro schema """
4
+ import json
5
+ import os
6
+ from typing import Dict, List, Tuple, Union
7
+ from avrotize.constants import (AVRO_VERSION, JACKSON_VERSION, JDK_VERSION,
8
+ JUNIT_VERSION, MAVEN_COMPILER_VERSION, MAVEN_SUREFIRE_VERSION)
9
+
10
+ from avrotize.common import pascal, camel, is_generic_avro_type, inline_avro_references, build_flat_type_dict
11
+
12
+ INDENT = ' '
13
+ POM_CONTENT = """<?xml version="1.0" encoding="UTF-8"?>
14
+ <project xmlns="http://maven.apache.org/POM/4.0.0"
15
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
16
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
17
+ <modelVersion>4.0.0</modelVersion>
18
+ <groupId>{groupid}</groupId>
19
+ <artifactId>{artifactid}</artifactId>
20
+ <version>1.0-SNAPSHOT</version>
21
+ <properties>
22
+ <maven.compiler.source>{JDK_VERSION}</maven.compiler.source>
23
+ <maven.compiler.target>{JDK_VERSION}</maven.compiler.target>
24
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
25
+ </properties>
26
+ <dependencies>
27
+ <dependency>
28
+ <groupId>org.apache.avro</groupId>
29
+ <artifactId>avro</artifactId>
30
+ <version>{AVRO_VERSION}</version>
31
+ </dependency>
32
+ <dependency>
33
+ <groupId>com.fasterxml.jackson.core</groupId>
34
+ <artifactId>jackson-core</artifactId>
35
+ <version>{JACKSON_VERSION}</version>
36
+ </dependency>
37
+ <dependency>
38
+ <groupId>com.fasterxml.jackson.core</groupId>
39
+ <artifactId>jackson-databind</artifactId>
40
+ <version>{JACKSON_VERSION}</version>
41
+ </dependency>
42
+ <dependency>
43
+ <groupId>com.fasterxml.jackson.core</groupId>
44
+ <artifactId>jackson-annotations</artifactId>
45
+ <version>{JACKSON_VERSION}</version>
46
+ </dependency>
47
+ <dependency>
48
+ <groupId>org.junit.jupiter</groupId>
49
+ <artifactId>junit-jupiter-api</artifactId>
50
+ <version>{JUNIT_VERSION}</version>
51
+ <scope>test</scope>
52
+ </dependency>
53
+ <dependency>
54
+ <groupId>org.junit.jupiter</groupId>
55
+ <artifactId>junit-jupiter-engine</artifactId>
56
+ <version>{JUNIT_VERSION}</version>
57
+ <scope>test</scope>
58
+ </dependency>
59
+ </dependencies>
60
+ <build>
61
+ <plugins>
62
+ <plugin>
63
+ <groupId>org.apache.maven.plugins</groupId>
64
+ <artifactId>maven-compiler-plugin</artifactId>
65
+ <version>{MAVEN_COMPILER_VERSION}</version>
66
+ <configuration>
67
+ <compilerArgs>
68
+ <arg>-Xmaxerrs</arg>
69
+ <arg>1000</arg>
70
+ </compilerArgs>
71
+ </configuration>
72
+ </plugin>
73
+ <plugin>
74
+ <groupId>org.apache.maven.plugins</groupId>
75
+ <artifactId>maven-surefire-plugin</artifactId>
76
+ <version>{MAVEN_SUREFIRE_VERSION}</version>
77
+ <configuration>
78
+ <useSystemClassLoader>false</useSystemClassLoader>
79
+ </configuration>
80
+ </plugin>
81
+ </plugins>
82
+ </build>
83
+ </project>
84
+ """
85
+
86
+ PREAMBLE_TOBYTEARRAY = \
87
+ """
88
+ byte[] result = null;
89
+ String mediaType = contentType.split(";")[0].trim().toLowerCase();
90
+ boolean shouldCompress = mediaType.endsWith("+gzip");
91
+ if (shouldCompress) {
92
+ mediaType = mediaType.substring(0, mediaType.length() - 5);
93
+ }
94
+ """
95
+
96
+
97
+ EPILOGUE_TOBYTEARRAY_COMPRESSION = \
98
+ """
99
+ if (result != null && shouldCompress) {
100
+ try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
101
+ GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream)) {
102
+ gzipOutputStream.write(result);
103
+ gzipOutputStream.finish();
104
+ result = byteArrayOutputStream.toByteArray();
105
+ } catch (IOException e) {
106
+ throw new UnsupportedOperationException("Error compressing data to gzip");
107
+ }
108
+ }
109
+ """
110
+
111
+ EPILOGUE_TOBYTEARRAY = \
112
+ """
113
+ throw new UnsupportedOperationException("Unsupported media type + mediaType");
114
+ """
115
+
116
+ PREAMBLE_FROMDATA_COMPRESSION = \
117
+ """
118
+ if (mediaType.endsWith("+gzip")) {
119
+ mediaType = mediaType.substring(0, mediaType.length() - 5);
120
+ InputStream stream = null;
121
+
122
+ if (data instanceof InputStream) {
123
+ stream = (InputStream) data;
124
+ } else if (data instanceof byte[]) {
125
+ stream = new ByteArrayInputStream((byte[]) data);
126
+ } else {
127
+ throw new UnsupportedOperationException("Data is not of a supported type for gzip decompression");
128
+ }
129
+
130
+ try (InputStream gzipStream = new GZIPInputStream(stream);
131
+ ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
132
+ byte[] buffer = new byte[1024];
133
+ int bytesRead;
134
+ while ((bytesRead = gzipStream.read(buffer)) != -1) {
135
+ outputStream.write(buffer, 0, bytesRead);
136
+ }
137
+ data = outputStream.toByteArray();
138
+ } catch (IOException e) {
139
+ e.printStackTrace();
140
+ }
141
+ }
142
+ """
143
+
144
+
145
+ JSON_FROMDATA_THROWS = \
146
+ ",JsonProcessingException, IOException"
147
+ JSON_FROMDATA = \
148
+ """
149
+ if ( mediaType.equals("application/json")) {
150
+ if (data instanceof byte[]) {
151
+ ByteArrayInputStream stream = new ByteArrayInputStream((byte[]) data);
152
+ return (new ObjectMapper()).readValue(stream, {typeName}.class);
153
+ }
154
+ else if (data instanceof InputStream) {
155
+ return (new ObjectMapper()).readValue((InputStream)data, {typeName}.class);
156
+ }
157
+ else if (data instanceof JsonNode) {
158
+ return (new ObjectMapper()).readValue(((JsonNode)data).toString(), {typeName}.class);
159
+ }
160
+ else if ( data instanceof String) {
161
+ return (new ObjectMapper()).readValue(((String)data), {typeName}.class);
162
+ }
163
+ throw new UnsupportedOperationException("Data is not of a supported type for JSON conversion to {typeName}");
164
+ }
165
+ """
166
+ JSON_TOBYTEARRAY_THROWS = ",JsonProcessingException"
167
+ JSON_TOBYTEARRAY = \
168
+ """
169
+ if ( mediaType.equals("application/json")) {
170
+ result = new ObjectMapper().writeValueAsBytes(this);
171
+ }
172
+ """
173
+
174
+ AVRO_FROMDATA_THROWS = ",IOException"
175
+ AVRO_FROMDATA = \
176
+ """
177
+ if ( mediaType.equals("avro/binary") || mediaType.equals("application/vnd.apache.avro+avro")) {
178
+ if (data instanceof byte[]) {
179
+ return AVROREADER.read(new {typeName}(), DecoderFactory.get().binaryDecoder((byte[])data, null));
180
+ } else if (data instanceof InputStream) {
181
+ return AVROREADER.read(new {typeName}(), DecoderFactory.get().binaryDecoder((InputStream)data, null));
182
+ }
183
+ throw new UnsupportedOperationException("Data is not of a supported type for Avro conversion to {typeName}");
184
+ } else if ( mediaType.equals("avro/json") || mediaType.equals("application/vnd.apache.avro+json")) {
185
+ if (data instanceof byte[]) {
186
+ return AVROREADER.read(new {typeName}(), DecoderFactory.get().jsonDecoder({typeName}.AVROSCHEMA, new ByteArrayInputStream((byte[])data)));
187
+ } else if (data instanceof InputStream) {
188
+ return AVROREADER.read(new {typeName}(), DecoderFactory.get().jsonDecoder({typeName}.AVROSCHEMA, (InputStream)data));
189
+ } else if (data instanceof String) {
190
+ return AVROREADER.read(new {typeName}(), DecoderFactory.get().jsonDecoder({typeName}.AVROSCHEMA, (String)data));
191
+ }
192
+ throw new UnsupportedOperationException("Data is not of a supported type for Avro conversion to {typeName}");
193
+ }
194
+ """
195
+
196
+
197
+ AVRO_TOBYTEARRAY_THROWS = ",IOException"
198
+ AVRO_TOBYTEARRAY = \
199
+ """
200
+ if ( mediaType.equals("avro/binary") || mediaType.equals("application/vnd.apache.avro+avro")) {
201
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
202
+ Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
203
+ AVROWRITER.write(this, encoder);
204
+ encoder.flush();
205
+ result = out.toByteArray();
206
+ }
207
+ else if ( mediaType.equals("avro/json") || mediaType.equals("application/vnd.apache.avro+json")) {
208
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
209
+ Encoder encoder = EncoderFactory.get().jsonEncoder({typeName}.AVROSCHEMA, out);
210
+ AVROWRITER.write(this, encoder);
211
+ encoder.flush();
212
+ result = out.toByteArray();
213
+ }
214
+ """
215
+
216
+
217
+ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | None
218
+
219
+
220
+ def flatten_type_name(name: str) -> str:
221
+ """Strips the namespace from a name"""
222
+ if name.endswith('[]'):
223
+ return flatten_type_name(name[:-2]+'Array')
224
+ base_name = pascal(name.replace(' ', '').split('.')[-1].replace('>', '').replace('<', '').replace(',', ''))
225
+ return base_name
226
+
227
+
228
+ def is_java_reserved_word(word: str) -> bool:
229
+ """Checks if a word is a Java reserved word"""
230
+ reserved_words = [
231
+ 'abstract', 'assert', 'boolean', 'break', 'byte', 'case', 'catch', 'char', 'class', 'const',
232
+ 'continue', 'default', 'do', 'double', 'else', 'enum', 'extends', 'final', 'finally', 'float',
233
+ 'for', 'goto', 'if', 'implements', 'import', 'instanceof', 'int', 'interface', 'long', 'native',
234
+ 'new', 'package', 'private', 'protected', 'public', 'return', 'short', 'static', 'strictfp',
235
+ 'super', 'switch', 'synchronized', 'this', 'throw', 'throws', 'transient', 'try', 'void', 'volatile',
236
+ 'while', 'true', 'false', 'null', 'record',
237
+ ]
238
+ return word in reserved_words
239
+
240
+
241
+ class AvroToJava:
242
+ """Converts Avro schema to Java classes, including Jackson annotations and Avro SpecificRecord methods"""
243
+
244
+ def __init__(self, base_package: str = '') -> None:
245
+ self.base_package = base_package.replace('.', '/')
246
+ self.output_dir = os.getcwd()
247
+ self.avro_annotation = False
248
+ self.jackson_annotations = False
249
+ self.pascal_properties = False
250
+ self.generated_types_avro_namespace: Dict[str,str] = {}
251
+ self.generated_types_java_package: Dict[str,str] = {}
252
+ self.generated_avro_schemas: Dict[str, Dict] = {}
253
+ self.discriminated_unions: Dict[str, List[Dict]] = {} # Maps union name to list of subtype schemas
254
+
255
+ def qualified_name(self, package: str, name: str) -> str:
256
+ """Concatenates package and name using a dot separator"""
257
+ slash_package_name = package.replace('.', '/')
258
+ safe_package_slash = self.safe_package(slash_package_name.lower())
259
+ safe_package = safe_package_slash.replace('/', '.')
260
+ return f"{safe_package}.{name}" if package else name
261
+
262
+ def join_packages(self, parent_package: str, package: str) -> str:
263
+ """Joins package and name using a dot separator"""
264
+ if parent_package and package:
265
+ return f"{parent_package}.{package}".lower()
266
+ elif parent_package:
267
+ return parent_package.lower()
268
+ return package.lower()
269
+
270
+ class JavaType:
271
+ """Java type definition"""
272
+
273
+ def __init__(self, type_name: str, union_types: List['AvroToJava.JavaType'] | None = None, is_class: bool = False, is_enum: bool = False) -> None:
274
+ self.type_name = type_name
275
+ self.union_types = union_types
276
+ self.is_class = is_class
277
+ self.is_enum = is_enum
278
+
279
+ def safe_identifier(self, name: str, class_name: str = '') -> str:
280
+ """Converts a name to a safe Java identifier"""
281
+ if is_java_reserved_word(name):
282
+ return f"_{name}"
283
+ if class_name and name == class_name:
284
+ return f"{name}_"
285
+ return name
286
+
287
+ def safe_package(self, packageName: str) -> str:
288
+ """Converts a name to a safe Java identifier by checking each path segment"""
289
+ segments = packageName.split('/')
290
+ safe_segments = [
291
+ self.safe_identifier(segment)
292
+ for segment in segments
293
+ ]
294
+
295
+ return '/'.join(safe_segments)
296
+
297
+ def map_primitive_to_java(self, avro_type: str, is_optional: bool) -> JavaType:
298
+ """Maps Avro primitive types to Java types"""
299
+ optional_mapping = {
300
+ 'null': 'Void',
301
+ 'boolean': 'Boolean',
302
+ 'int': 'Integer',
303
+ 'long': 'Long',
304
+ 'float': 'Float',
305
+ 'double': 'Double',
306
+ 'bytes': 'byte[]',
307
+ 'string': 'String',
308
+ }
309
+ required_mapping = {
310
+ 'null': 'void',
311
+ 'boolean': 'boolean',
312
+ 'int': 'int',
313
+ 'long': 'long',
314
+ 'float': 'float',
315
+ 'double': 'double',
316
+ 'bytes': 'byte[]',
317
+ 'string': 'String',
318
+ }
319
+ if '.' in avro_type:
320
+ type_name = avro_type.split('.')[-1]
321
+ package_name = '.'.join(avro_type.split('.')[:-1]).lower()
322
+ avro_type = self.qualified_name(package_name, type_name)
323
+ if avro_type in self.generated_types_avro_namespace:
324
+ kind = self.generated_types_avro_namespace[avro_type]
325
+ qualified_class_name = self.qualified_name(self.base_package, avro_type)
326
+ return AvroToJava.JavaType(qualified_class_name, is_class=kind=="class", is_enum=kind=="enum")
327
+ else:
328
+ return AvroToJava.JavaType(required_mapping.get(avro_type, avro_type) if not is_optional else optional_mapping.get(avro_type, avro_type))
329
+
330
+ def is_java_primitive(self, java_type: JavaType) -> bool:
331
+ """Checks if a Java type is a primitive type"""
332
+ return java_type.type_name in [
333
+ 'void', 'boolean', 'int', 'long', 'float', 'double', 'byte[]', 'String',
334
+ 'Boolean', 'Integer', 'Long', 'Float', 'Double', 'Void']
335
+
336
+ def is_java_optional_type(self, java_type: JavaType) -> bool:
337
+ """Checks if a Java type is an optional type"""
338
+ return java_type.type_name in ['Void', 'Boolean', 'Integer', 'Long', 'Float', 'Double']
339
+
340
+ def is_java_numeric_type(self, java_type: JavaType) -> bool:
341
+ """Checks if a Java type is a numeric type"""
342
+ return java_type.type_name in ['int', 'long', 'float', 'double', 'Integer', 'Long', 'Float', 'Double']
343
+
344
+ def is_java_integer_type(self, java_type: JavaType) -> bool:
345
+ """Checks if a Java type is an integer type"""
346
+ return java_type.type_name in ['int', 'long', 'Integer', 'Long']
347
+
348
+ def convert_avro_type_to_java(self, class_name: str, field_name: str, avro_type: Union[str, Dict, List], parent_package: str, nullable: bool = False) -> JavaType:
349
+ """Converts Avro type to Java type"""
350
+ if isinstance(avro_type, str):
351
+ return self.map_primitive_to_java(avro_type, nullable)
352
+ elif isinstance(avro_type, list):
353
+ if (is_generic_avro_type(avro_type)):
354
+ return AvroToJava.JavaType('Object')
355
+ non_null_types = [t for t in avro_type if t != 'null']
356
+ if len(non_null_types) == 1:
357
+ if isinstance(non_null_types[0], str):
358
+ return self.map_primitive_to_java(non_null_types[0], True)
359
+ else:
360
+ return self.convert_avro_type_to_java(class_name, field_name, non_null_types[0], parent_package)
361
+ else:
362
+ if self.jackson_annotations:
363
+ return AvroToJava.JavaType(self.generate_embedded_union_class_jackson(class_name, field_name, non_null_types, parent_package, write_file=True), is_class=True)
364
+ else:
365
+ types: List[AvroToJava.JavaType] = [self.convert_avro_type_to_java(
366
+ class_name, field_name, t, parent_package) for t in non_null_types]
367
+ return AvroToJava.JavaType('Object', types)
368
+ elif isinstance(avro_type, dict):
369
+ if avro_type['type'] in ['record', 'enum']:
370
+ return self.generate_class_or_enum(avro_type, parent_package, write_file=True)
371
+ elif avro_type['type'] == 'fixed':
372
+ if 'logicalType' in avro_type and avro_type['logicalType'] == 'decimal':
373
+ return AvroToJava.JavaType('BigDecimal')
374
+ return AvroToJava.JavaType('byte[]')
375
+ elif avro_type['type'] == 'bytes' and 'logicalType' in avro_type:
376
+ if avro_type['logicalType'] == 'decimal':
377
+ return AvroToJava.JavaType('BigDecimal')
378
+ elif avro_type['type'] == 'array':
379
+ item_java_type = self.convert_avro_type_to_java(class_name, field_name, avro_type['items'], parent_package, nullable=True)
380
+ item_type = item_java_type.type_name
381
+ # Check if item is a union type by name pattern or registered type
382
+ is_union_item = (item_type.endswith("Union") or
383
+ (item_type in self.generated_types_java_package and self.generated_types_java_package[item_type] == "union"))
384
+ if is_union_item:
385
+ return AvroToJava.JavaType(f"List<{item_type}>", union_types=[AvroToJava.JavaType(item_type)])
386
+ return AvroToJava.JavaType(f"List<{item_type}>")
387
+ elif avro_type['type'] == 'map':
388
+ value_java_type = self.convert_avro_type_to_java(class_name, field_name, avro_type['values'], parent_package, nullable=True)
389
+ values_type = value_java_type.type_name
390
+ # Check if value is a union type by name pattern or registered type
391
+ is_union_value = (values_type.endswith("Union") or
392
+ (values_type in self.generated_types_java_package and self.generated_types_java_package[values_type] == "union"))
393
+ if is_union_value:
394
+ return AvroToJava.JavaType(f"Map<String,{values_type}>", union_types=[AvroToJava.JavaType(values_type)])
395
+ return AvroToJava.JavaType(f"Map<String,{values_type}>")
396
+ elif 'logicalType' in avro_type:
397
+ if avro_type['logicalType'] == 'date':
398
+ return AvroToJava.JavaType('LocalDate')
399
+ elif avro_type['logicalType'] == 'time-millis' or avro_type['logicalType'] == 'time-micros':
400
+ return AvroToJava.JavaType('LocalTime')
401
+ elif avro_type['logicalType'] == 'timestamp-millis' or avro_type['logicalType'] == 'timestamp-micros':
402
+ return AvroToJava.JavaType('Instant')
403
+ elif avro_type['logicalType'] == 'local-timestamp-millis' or avro_type['logicalType'] == 'local-timestamp-micros':
404
+ return AvroToJava.JavaType('LocalDateTime')
405
+ elif avro_type['logicalType'] == 'uuid':
406
+ return AvroToJava.JavaType('UUID')
407
+ elif avro_type['logicalType'] == 'duration':
408
+ return AvroToJava.JavaType('Duration')
409
+ return self.convert_avro_type_to_java(class_name, field_name, avro_type['type'], parent_package)
410
+ return 'Object'
411
+
412
+ def generate_class_or_enum(self, avro_schema: Dict, parent_package: str, write_file: bool = True) -> JavaType:
413
+ """ Generates a Java class or enum from an Avro schema """
414
+ if avro_schema['type'] == 'record':
415
+ return self.generate_class(avro_schema, parent_package, write_file)
416
+ elif avro_schema['type'] == 'enum':
417
+ return self.generate_enum(avro_schema, parent_package, write_file)
418
+ return AvroToJava.JavaType('Object')
419
+
420
+ def generate_create_test_instance_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
421
+ """ Generates a static createTestInstance method that creates a fully initialized instance """
422
+ method = f"\n{INDENT}/**\n{INDENT} * Creates a test instance with all required fields populated\n{INDENT} * @return a fully initialized test instance\n{INDENT} */\n"
423
+ method += f"{INDENT}public static {class_name} createTestInstance() {{\n"
424
+ method += f"{INDENT*2}{class_name} instance = new {class_name}();\n"
425
+
426
+ for field in fields:
427
+ # Skip const fields
428
+ if "const" in field:
429
+ continue
430
+
431
+ # Match the logic in generate_property: field_name is already Pascal-cased if needed
432
+ field_name = pascal(field['name']) if self.pascal_properties else field['name']
433
+ safe_field_name = self.safe_identifier(field_name, class_name)
434
+ field_type = self.convert_avro_type_to_java(class_name, safe_field_name, field['type'], parent_package)
435
+
436
+ # Get a test value for this field
437
+ test_value = self.get_test_value(field_type.type_name, parent_package.replace('.', '/'))
438
+
439
+ # Setter name matches generate_property: set{pascal(field_name)} where field_name is already potentially Pascal-cased
440
+ method += f"{INDENT*2}instance.set{pascal(field_name)}({test_value});\n"
441
+
442
+ method += f"{INDENT*2}return instance;\n"
443
+ method += f"{INDENT}}}\n"
444
+ return method
445
+
446
+ def generate_class(self, avro_schema: Dict, parent_package: str, write_file: bool) -> JavaType:
447
+ """ Generates a Java class from an Avro record schema """
448
+ class_definition = ''
449
+ if 'doc' in avro_schema:
450
+ class_definition += f"/** {avro_schema['doc']} */\n"
451
+ namespace = avro_schema.get('namespace', parent_package)
452
+ if not 'namespace' in avro_schema:
453
+ avro_schema['namespace'] = namespace
454
+ package = self.join_packages(self.base_package, namespace).replace('.', '/').lower()
455
+ package = package.replace('.', '/').lower()
456
+ package = self.safe_package(package)
457
+ class_name = self.safe_identifier(avro_schema['name'])
458
+ namespace_qualified_name = self.qualified_name(namespace,avro_schema['name'])
459
+ qualified_class_name = self.qualified_name(package.replace('/', '.'), class_name)
460
+ if namespace_qualified_name in self.generated_types_avro_namespace:
461
+ return AvroToJava.JavaType(qualified_class_name, is_class=True)
462
+ self.generated_types_avro_namespace[namespace_qualified_name] = "class"
463
+ self.generated_types_java_package[qualified_class_name] = "class"
464
+ self.generated_avro_schemas[qualified_class_name] = avro_schema
465
+
466
+ # Track discriminated union subtypes
467
+ if 'union' in avro_schema:
468
+ union_name = avro_schema['union']
469
+ if union_name not in self.discriminated_unions:
470
+ self.discriminated_unions[union_name] = []
471
+ self.discriminated_unions[union_name].append({
472
+ 'schema': avro_schema,
473
+ 'class_name': class_name,
474
+ 'package': package.replace('/', '.'),
475
+ 'qualified_name': qualified_class_name
476
+ })
477
+
478
+ fields_str = [self.generate_property(class_name, field, namespace) for field in avro_schema.get('fields', [])]
479
+ class_body = "\n".join(fields_str)
480
+ class_definition += f"public class {class_name}"
481
+
482
+ # Add extends clause if this is a discriminated union subtype
483
+ if 'union' in avro_schema and self.jackson_annotations:
484
+ union_name = avro_schema['union']
485
+ class_definition += f" extends {union_name}"
486
+
487
+ if self.avro_annotation:
488
+ class_definition += " implements SpecificRecord"
489
+ class_definition += " {\n"
490
+ class_definition += f"{INDENT}public {class_name}() {{}}\n"
491
+ class_definition += class_body
492
+
493
+ if self.avro_annotation:
494
+ class_definition += f"\n{INDENT}public {class_name}(GenericData.Record record) {{\n"
495
+ class_definition += f"{INDENT*2}for( int i = 0; i < record.getSchema().getFields().size(); i++ ) {{\n"
496
+ class_definition += f"{INDENT*3}this.put(i, record.get(i));\n"
497
+ class_definition += f"{INDENT*2}}}\n"
498
+ class_definition += f"{INDENT}}}\n"
499
+
500
+ # Generate createTestInstance() method for testing
501
+ class_definition += self.generate_create_test_instance_method(class_name, avro_schema.get('fields', []), namespace)
502
+
503
+ if self.avro_annotation:
504
+ # Inline all schema references like C# does - each class has self-contained schema
505
+ local_avro_schema = inline_avro_references(avro_schema.copy(), self.type_dict, '')
506
+ avro_schema_json = json.dumps(local_avro_schema)
507
+
508
+ # Java has a limit of 65535 bytes for string constants
509
+ # If the schema is too large, we need to split it into chunks
510
+ MAX_STRING_CONSTANT_LENGTH = 60000 # Leave some margin for safety
511
+
512
+ if len(avro_schema_json) > MAX_STRING_CONSTANT_LENGTH:
513
+ # Split into multiple private string methods to avoid the 65535 byte limit
514
+ # Each method returns a part of the schema, concatenated at runtime
515
+ chunk_size = MAX_STRING_CONSTANT_LENGTH
516
+ chunks = [avro_schema_json[i:i+chunk_size] for i in range(0, len(avro_schema_json), chunk_size)]
517
+
518
+ # Generate a method for each chunk
519
+ for i, chunk in enumerate(chunks):
520
+ # Use the same escaping technique as the non-chunked version
521
+ escaped_chunk = chunk.replace('"', '§')
522
+ escaped_chunk = f"\"+\n{INDENT*2}\"".join(
523
+ [escaped_chunk[j:j+80] for j in range(0, len(escaped_chunk), 80)])
524
+ escaped_chunk = escaped_chunk.replace('§', '\\"')
525
+ class_definition += f"\n\n{INDENT}private static String getAvroSchemaPart{i}() {{\n"
526
+ class_definition += f"{INDENT*2}return \"{escaped_chunk}\";\n"
527
+ class_definition += f"{INDENT}}}"
528
+
529
+ # Generate the combining method
530
+ class_definition += f"\n\n{INDENT}private static String getAvroSchemaJson() {{\n"
531
+ class_definition += f"{INDENT*2}return "
532
+ class_definition += " + ".join([f"getAvroSchemaPart{i}()" for i in range(len(chunks))])
533
+ class_definition += ";\n"
534
+ class_definition += f"{INDENT}}}\n"
535
+ class_definition += f"\n{INDENT}public static final Schema AVROSCHEMA = new Schema.Parser().parse(getAvroSchemaJson());"
536
+ else:
537
+ avro_schema_json = avro_schema_json.replace('"', '§')
538
+ avro_schema_json = f"\"+\n{INDENT}\"".join(
539
+ [avro_schema_json[i:i+80] for i in range(0, len(avro_schema_json), 80)])
540
+ avro_schema_json = avro_schema_json.replace('§', '\\"')
541
+ class_definition += f"\n\n{INDENT}public static final Schema AVROSCHEMA = new Schema.Parser().parse(\n{INDENT}\"{avro_schema_json}\");"
542
+
543
+ # Store the schema for tracking
544
+ avro_namespace = avro_schema.get('namespace', '')
545
+ schema_full_name = f"{avro_namespace}.{class_name}" if avro_namespace else class_name
546
+ self.generated_types_avro_namespace[schema_full_name] = "class"
547
+
548
+ class_definition += f"\n{INDENT}public static final DatumWriter<{class_name}> AVROWRITER = new SpecificDatumWriter<{class_name}>(AVROSCHEMA);"
549
+ class_definition += f"\n{INDENT}public static final DatumReader<{class_name}> AVROREADER = new SpecificDatumReader<{class_name}>(AVROSCHEMA);\n"
550
+
551
+ if self.jackson_annotations:
552
+ class_definition += f"\n{INDENT}@JsonIgnore"
553
+ class_definition += f"\n{INDENT}@Override\n{INDENT}public Schema getSchema(){{ return AVROSCHEMA; }}\n"
554
+ class_definition += self.generate_avro_get_method(class_name, avro_schema.get('fields', []), namespace)
555
+ class_definition += self.generate_avro_put_method(class_name, avro_schema.get('fields', []), namespace)
556
+
557
+ # emit toByteArray method
558
+ class_definition += f"\n\n{INDENT}/**\n{INDENT} * Converts the object to a byte array\n{INDENT} * @param contentType the content type of the byte array\n{INDENT} * @return the byte array\n{INDENT} */\n"
559
+ class_definition += f"{INDENT}public byte[] toByteArray(String contentType) throws UnsupportedOperationException" + \
560
+ f"{ JSON_TOBYTEARRAY_THROWS if self.jackson_annotations else '' }" + \
561
+ f"{ AVRO_TOBYTEARRAY_THROWS if self.avro_annotation else '' } {{"
562
+ if self.jackson_annotations or self.avro_annotation:
563
+ class_definition += f'\n{INDENT*2}'.join((PREAMBLE_TOBYTEARRAY).split("\n"))
564
+ if self.avro_annotation:
565
+ class_definition += f'\n{INDENT*2}'+f'\n{INDENT*2}'.join(
566
+ AVRO_TOBYTEARRAY.strip().replace("{typeName}", class_name).split("\n"))
567
+ if self.jackson_annotations:
568
+ class_definition += f'\n{INDENT*2}'+f'\n{INDENT*2}'.join(
569
+ JSON_TOBYTEARRAY.strip().replace("{typeName}", class_name).split("\n"))
570
+ if self.avro_annotation or self.jackson_annotations:
571
+ class_definition += f'\n{INDENT*2}'.join(EPILOGUE_TOBYTEARRAY_COMPRESSION.split("\n"))
572
+ class_definition += f'\n{INDENT*2}if ( result != null ) {{ return result; }}'
573
+ class_definition += (f'\n{INDENT*2}'.join((EPILOGUE_TOBYTEARRAY.strip()).split("\n")))+f"\n{INDENT}}}"
574
+
575
+ # emit fromData factory method
576
+ class_definition += f"\n\n{INDENT}/**\n{INDENT} * Converts the data to an object\n{INDENT} * @param data the data to convert\n{INDENT} * @param contentType the content type of the data\n{INDENT} * @return the object\n{INDENT} */\n"
577
+ class_definition += f"{INDENT}public static {class_name} fromData(Object data, String contentType) throws UnsupportedOperationException" + \
578
+ f"{ JSON_FROMDATA_THROWS if self.jackson_annotations else '' }" + \
579
+ f"{ AVRO_FROMDATA_THROWS if self.avro_annotation else '' } {{"
580
+ class_definition += f'\n{INDENT*2}if ( data instanceof {class_name}) return ({class_name})data;'
581
+
582
+ if self.avro_annotation or self.jackson_annotations:
583
+ class_definition += f'\n{INDENT*2}String mediaType = contentType.split(";")[0].trim().toLowerCase();'
584
+ class_definition += f'\n{INDENT*2}'.join((PREAMBLE_FROMDATA_COMPRESSION).split("\n"))
585
+ if self.avro_annotation:
586
+ class_definition += f'\n{INDENT*2}'+f'\n{INDENT*2}'.join(
587
+ AVRO_FROMDATA.strip().replace("{typeName}", class_name).split("\n"))
588
+ if self.jackson_annotations:
589
+ class_definition += f'\n{INDENT*2}'+f'\n{INDENT*2}'.join(
590
+ JSON_FROMDATA.strip().replace("{typeName}", class_name).split("\n"))
591
+ class_definition += f"\n{INDENT*2}throw new UnsupportedOperationException(\"Unsupported media type \"+ contentType);\n{INDENT}}}"
592
+
593
+ if self.jackson_annotations:
594
+ class_definition += self.create_is_json_match_method(avro_schema, avro_schema.get('namespace', namespace), class_name)
595
+
596
+ # Add equals() and hashCode() methods
597
+ class_definition += self.generate_equals_method(class_name, avro_schema.get('fields', []), namespace)
598
+ class_definition += self.generate_hashcode_method(class_name, avro_schema.get('fields', []), namespace)
599
+
600
+ class_definition += "\n}"
601
+
602
+ if write_file:
603
+ self.write_to_file(package, class_name, class_definition)
604
+ return AvroToJava.JavaType(qualified_class_name, is_class=True)
605
+
606
+ def create_is_json_match_method(self, avro_schema, parent_namespace, class_name) -> str:
607
+ """ Generates the isJsonMatch method for a class using Jackson """
608
+ predicates = ''
609
+ class_definition = ''
610
+ class_definition += f"\n\n{INDENT}/**\n{INDENT} * Checks if the JSON node matches the schema\n{INDENT}"
611
+ class_definition += f"\n{INDENT}@param node The JSON node to check */"
612
+ class_definition += f"\n{INDENT}public static boolean isJsonMatch(com.fasterxml.jackson.databind.JsonNode node)\n{INDENT}{{"
613
+ field_defs = ''
614
+
615
+ field_count = 0
616
+ for field in avro_schema.get('fields', []):
617
+ if field_count > 0:
618
+ field_defs += f" && \n{INDENT*3}"
619
+ field_count += 1
620
+ field_name = field['name']
621
+ if field_name == class_name:
622
+ field_name += "_"
623
+ field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_namespace)
624
+ predicate, clause = self.get_is_json_match_clause(class_name, field_name, field_type, field)
625
+ field_defs += clause
626
+ if predicate:
627
+ predicates += predicate + "\n"
628
+ if ( len(predicates) > 0 ):
629
+ class_definition += f'\n{INDENT*2}'+f'\n{INDENT*2}'.join(predicates.split('\n'))
630
+ class_definition += f"\n{INDENT*2}return {field_defs}"
631
+ class_definition += f";\n{INDENT}}}"
632
+ return class_definition
633
+
634
+ def get_is_json_match_clause(self, class_name: str, field_name: str, field_type: JavaType, field: Dict = None) -> Tuple[str, str]:
635
+ """ Generates the isJsonMatch clause for a field using Jackson """
636
+ class_definition = ''
637
+ predicates = ''
638
+ field_name_js = field_name
639
+
640
+ # Check if field is nullable (Avro union with null)
641
+ is_nullable = False
642
+ if field and 'type' in field:
643
+ avro_type = field['type']
644
+ if isinstance(avro_type, list) and 'null' in avro_type:
645
+ is_nullable = True
646
+
647
+ is_optional = is_nullable or self.is_java_optional_type(field_type)
648
+
649
+ # Check if this is a const field (e.g., discriminator)
650
+ has_const = field and 'const' in field and field['const'] is not None
651
+ const_value = field['const'] if has_const else None
652
+
653
+ if is_optional:
654
+ node_check = f"!node.has(\"{field_name_js}\") || node.get(\"{field_name_js}\").isNull() || node.get(\"{field_name_js}\")"
655
+ else:
656
+ node_check = f"node.has(\"{field_name_js}\") && node.get(\"{field_name_js}\")"
657
+
658
+ if field_type.type_name == 'byte[]':
659
+ class_definition += f"({node_check}.isBinary())"
660
+ elif field_type.type_name == 'string' or field_type.type_name == 'String':
661
+ class_definition += f"({node_check}.isTextual())"
662
+ elif field_type.type_name == 'int' or field_type.type_name == 'Integer':
663
+ class_definition += f"({node_check}.canConvertToInt())"
664
+ elif field_type.type_name == 'long' or field_type.type_name == 'Long':
665
+ class_definition += f"({node_check}.canConvertToLong())"
666
+ elif field_type.type_name == 'float' or field_type.type_name == 'Float':
667
+ class_definition += f"({node_check}.isNumber())"
668
+ elif field_type.type_name == 'double' or field_type.type_name == 'Double':
669
+ class_definition += f"({node_check}.isNumber())"
670
+ elif field_type.type_name == 'BigDecimal':
671
+ class_definition += f"({node_check}.isBigDecimal())"
672
+ elif field_type.type_name == 'boolean' or field_type.type_name == 'Boolean':
673
+ class_definition += f"({node_check}.isBoolean())"
674
+ elif field_type.type_name == 'UUID':
675
+ class_definition += f"({node_check}.isTextual())"
676
+ elif field_type.type_name == 'LocalDate':
677
+ class_definition += f"({node_check}.isTextual())"
678
+ elif field_type.type_name == 'LocalTime':
679
+ class_definition += f"({node_check}.isTextual())"
680
+ elif field_type.type_name == 'Instant':
681
+ class_definition += f"({node_check}.isTextual())"
682
+ elif field_type.type_name == 'LocalDateTime':
683
+ class_definition += f"({node_check}.isTextual())"
684
+ elif field_type.type_name == 'Duration':
685
+ class_definition += f"({node_check}.isTextual())"
686
+ elif field_type.type_name == "Object":
687
+ class_definition += f"({node_check}.isObject())"
688
+ elif field_type.type_name.startswith("List<"):
689
+ items_type = field_type.type_name[5:-1]
690
+ pred = f"Predicate<JsonNode> val{field_name_js} = (JsonNode n) -> n.isArray() && !n.elements().hasNext() || "
691
+ pred_test = self.predicate_test(items_type)
692
+ if pred_test:
693
+ pred += "n.elements().next()" + pred_test
694
+ elif items_type in self.generated_types_java_package:
695
+ kind = self.generated_types_java_package[items_type]
696
+ if kind == "enum":
697
+ # Try to match the incoming text against Avro symbols
698
+ pred += f"n.elements().next().isTextual() && java.util.Arrays.stream({items_type}.values()).anyMatch(e -> e.avroSymbol().equals(n.elements().next().asText()))"
699
+ else:
700
+ pred += f"{items_type}.isJsonMatch(n.elements().next())"
701
+ else:
702
+ pred += "true"
703
+ predicates += pred + ";"
704
+ class_definition += f"(node.has(\"{field_name_js}\") && val{field_name_js}.test(node.get(\"{field_name_js}\")))"
705
+ elif field_type.type_name.startswith("Map<"):
706
+ comma_offset = field_type.type_name.find(',')+1
707
+ values_type = field_type.type_name[comma_offset:-1]
708
+ pred = f"Predicate<JsonNode> val{field_name_js} = (JsonNode n) -> n.isObject() && !n.elements().hasNext() || "
709
+ pred_test = self.predicate_test(values_type)
710
+ if pred_test:
711
+ pred += "n.elements().next()" + pred_test
712
+ elif values_type in self.generated_types_java_package:
713
+ kind = self.generated_types_java_package[values_type]
714
+ if kind == "enum":
715
+ # Try to match the incoming text against Avro symbols
716
+ pred += f"n.elements().next().isTextual() && java.util.Arrays.stream({values_type}.values()).anyMatch(e -> e.avroSymbol().equals(n.elements().next().asText()))"
717
+ else:
718
+ pred += f"{values_type}.isJsonMatch(n.elements().next())"
719
+ else:
720
+ pred += "true"
721
+ predicates += pred + ";"
722
+ class_definition += f"(node.has(\"{field_name_js}\") && val{field_name_js}.test(node.get(\"{field_name_js}\")))"
723
+ elif field_type.is_class:
724
+ if is_optional:
725
+ class_definition += f"(!node.has(\"{field_name_js}\") || node.get(\"{field_name_js}\").isNull() || {field_type.type_name}.isJsonMatch(node.get(\"{field_name_js}\")))"
726
+ else:
727
+ class_definition += f"(node.has(\"{field_name_js}\") && {field_type.type_name}.isJsonMatch(node.get(\"{field_name_js}\")))"
728
+ elif field_type.is_enum:
729
+ # For const enum fields (discriminators), check the exact value
730
+ if has_const:
731
+ # const_value is the string value from the schema, not the enum qualified name
732
+ # Ensure we use the raw string value for comparison
733
+ raw_const = const_value if isinstance(const_value, str) else str(const_value)
734
+ class_definition += f"(node.has(\"{field_name_js}\") && node.get(\"{field_name_js}\").isTextual() && node.get(\"{field_name_js}\").asText().equals(\"{raw_const}\"))"
735
+ else:
736
+ # Try to match the incoming text against Avro symbols
737
+ class_definition += f"(node.get(\"{field_name_js}\").isTextual() && java.util.Arrays.stream({field_type.type_name}.values()).anyMatch(e -> e.avroSymbol().equals(node.get(\"{field_name_js}\").asText())))"
738
+ else:
739
+ is_union = False
740
+ field_union = pascal(field_name) + 'Union'
741
+ if field_type == field_union:
742
+ field_union = class_name + "." + pascal(field_name) + 'Union'
743
+ type_kind = self.generated_types_avro_namespace[field_union] if field_union in self.generated_types_avro_namespace else "class"
744
+ if type_kind == "union":
745
+ is_union = True
746
+ class_definition += f"({node_check}.isObject() && {field_type.type_name}.isJsonMatch(node.get(\"{field_name_js}\")))"
747
+ if not is_union:
748
+ class_definition += f"(node.has(\"{field_name_js}\"))"
749
+ return predicates, class_definition
750
+
751
+ def predicate_test(self, items_type):
752
+ """ Generates the predicate test for a list or map"""
753
+ if items_type == "String":
754
+ return ".isTextual()"
755
+ elif items_type in ['int', 'Integer']:
756
+ return ".canConvertToInt()"
757
+ elif items_type in ['long', 'Long']:
758
+ return ".canConvertToLong()"
759
+ elif items_type in ['float', 'Float', 'double', 'Double', 'decimal']:
760
+ return ".isNumber()"
761
+ elif items_type in ['boolean', 'Boolean']:
762
+ return ".isBoolean()"
763
+ elif items_type == 'byte[]':
764
+ return ".isBinary()"
765
+ elif items_type == 'UUID':
766
+ return ".isTextual()"
767
+ elif items_type == 'LocalDate':
768
+ return ".isTextual()"
769
+ elif items_type == 'LocalTime':
770
+ return ".isTextual()"
771
+ elif items_type == 'Instant':
772
+ return ".isTextual()"
773
+ elif items_type == 'LocalDateTime':
774
+ return ".isTextual()"
775
+ elif items_type == 'Duration':
776
+ return ".isTextual()"
777
+ elif items_type == "Object":
778
+ return ".isObject()"
779
+ return ""
780
+
781
+ def get_is_json_match_clause_type(self, element_name: str, class_name: str, field_type: JavaType) -> str:
782
+ """ Generates the isJsonMatch clause for a field using Jackson """
783
+ predicates = ''
784
+ class_definition = ''
785
+ is_optional = field_type.type_name[-1] == '?'
786
+ #is_optional = field_type[-1] == '?'
787
+ #field_type = field_type[:-1] if is_optional else field_type
788
+ is_optional = False
789
+ node_check = f"{element_name}.isMissingNode() == false && {element_name}"
790
+ null_check = f"{element_name}.isNull()" if is_optional else "false"
791
+ if field_type.type_name == 'byte[]':
792
+ class_definition += f"({node_check}.isBinary()){f' || {null_check}' if is_optional else ''}"
793
+ elif field_type.type_name == 'String':
794
+ class_definition += f"({node_check}.isTextual()){f' || {null_check}' if is_optional else ''}"
795
+ elif self.is_java_numeric_type(field_type):
796
+ class_definition += f"({node_check}.isNumber()){f' || {null_check}' if is_optional else ''}"
797
+ elif field_type.type_name == 'bool' or field_type.type_name == 'Boolean':
798
+ class_definition += f"({node_check}.isBoolean()){f' || {null_check}' if is_optional else ''}"
799
+ elif field_type.type_name.startswith("List<"):
800
+ items_type = field_type.type_name[5:-1]
801
+ predicates += f"Predicate<JsonNode> val{element_name}. = (JsonNode n) -> n.isObject() && n.fields().hasNext() && n.fields().next().getValue().isTextual();"
802
+ class_definition += f"({node_check}.isArray()){f' || {null_check}' if is_optional else ''}"
803
+ elif field_type.type_name.startswith("Map<"):
804
+ values_type = field_type.type_name[4:-1]
805
+ class_definition += f"({node_check}.isObject()){f' || {null_check}' if is_optional else ''}"
806
+ elif field_type.is_class:
807
+ class_definition += f"({null_check} || {field_type.type_name}.isJsonMatch({element_name}))"
808
+ elif field_type.is_enum:
809
+ # Try to match the incoming text against Avro symbols
810
+ class_definition += f"({null_check} || ({node_check}.isTextual() && java.util.Arrays.stream({field_type.type_name}.values()).anyMatch(e -> e.avroSymbol().equals({element_name}.asText()))))"
811
+ else:
812
+ is_union = False
813
+ field_union = pascal(element_name) + 'Union'
814
+ if field_type == field_union:
815
+ field_union = class_name + "." + pascal(element_name) + 'Union'
816
+ type_kind = self.generated_types_avro_namespace[field_union] if field_union in self.generated_types_avro_namespace else "class"
817
+ if type_kind == "union":
818
+ is_union = True
819
+ class_definition += f"({null_check} || {field_type}.isJsonMatch({element_name}))"
820
+ if not is_union:
821
+ class_definition += f"({node_check}.isObject()){f' || {null_check}' if is_optional else ''}"
822
+
823
+ return class_definition
824
+
825
+ def generate_equals_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
826
+ """ Generates the equals method for a class """
827
+ equals_method = f"\n\n{INDENT}@Override\n{INDENT}public boolean equals(Object obj) {{\n"
828
+ equals_method += f"{INDENT * 2}if (this == obj) return true;\n"
829
+ equals_method += f"{INDENT * 2}if (obj == null || getClass() != obj.getClass()) return false;\n"
830
+ equals_method += f"{INDENT * 2}{class_name} other = ({class_name}) obj;\n"
831
+
832
+ if not fields:
833
+ equals_method += f"{INDENT * 2}return true;\n"
834
+ else:
835
+ for index, field in enumerate(fields):
836
+ field_name = pascal(field['name']) if self.pascal_properties else field['name']
837
+ field_name = self.safe_identifier(field_name, class_name)
838
+ field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
839
+
840
+ if field_type.type_name in ['int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char']:
841
+ equals_method += f"{INDENT * 2}if (this.{field_name} != other.{field_name}) return false;\n"
842
+ elif field_type.type_name == 'byte[]':
843
+ equals_method += f"{INDENT * 2}if (!java.util.Arrays.equals(this.{field_name}, other.{field_name})) return false;\n"
844
+ else:
845
+ equals_method += f"{INDENT * 2}if (this.{field_name} == null ? other.{field_name} != null : !this.{field_name}.equals(other.{field_name})) return false;\n"
846
+
847
+ equals_method += f"{INDENT * 2}return true;\n"
848
+
849
+ equals_method += f"{INDENT}}}\n"
850
+ return equals_method
851
+
852
+ def generate_hashcode_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
853
+ """ Generates the hashCode method for a class """
854
+ hashcode_method = f"\n{INDENT}@Override\n{INDENT}public int hashCode() {{\n"
855
+
856
+ if not fields:
857
+ hashcode_method += f"{INDENT * 2}return 0;\n"
858
+ else:
859
+ hashcode_method += f"{INDENT * 2}int result = 1;\n"
860
+ temp_counter = 0
861
+ for field in fields:
862
+ field_name = pascal(field['name']) if self.pascal_properties else field['name']
863
+ field_name = self.safe_identifier(field_name, class_name)
864
+ field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
865
+
866
+ if field_type.type_name == 'boolean':
867
+ hashcode_method += f"{INDENT * 2}result = 31 * result + (this.{field_name} ? 1 : 0);\n"
868
+ elif field_type.type_name in ['byte', 'short', 'char', 'int']:
869
+ hashcode_method += f"{INDENT * 2}result = 31 * result + this.{field_name};\n"
870
+ elif field_type.type_name == 'long':
871
+ hashcode_method += f"{INDENT * 2}result = 31 * result + (int)(this.{field_name} ^ (this.{field_name} >>> 32));\n"
872
+ elif field_type.type_name == 'float':
873
+ hashcode_method += f"{INDENT * 2}result = 31 * result + Float.floatToIntBits(this.{field_name});\n"
874
+ elif field_type.type_name == 'double':
875
+ temp_var = f"temp{temp_counter}" if temp_counter > 0 else "temp"
876
+ temp_counter += 1
877
+ hashcode_method += f"{INDENT * 2}long {temp_var} = Double.doubleToLongBits(this.{field_name});\n"
878
+ hashcode_method += f"{INDENT * 2}result = 31 * result + (int)({temp_var} ^ ({temp_var} >>> 32));\n"
879
+ elif field_type.type_name == 'byte[]':
880
+ hashcode_method += f"{INDENT * 2}result = 31 * result + java.util.Arrays.hashCode(this.{field_name});\n"
881
+ else:
882
+ hashcode_method += f"{INDENT * 2}result = 31 * result + (this.{field_name} != null ? this.{field_name}.hashCode() : 0);\n"
883
+
884
+ hashcode_method += f"{INDENT * 2}return result;\n"
885
+
886
+ hashcode_method += f"{INDENT}}}\n"
887
+ return hashcode_method
888
+
889
+ def generate_union_equals_method(self, union_class_name: str, union_types: List['AvroToJava.JavaType']) -> str:
890
+ """ Generates the equals method for a union class """
891
+ equals_method = f"\n{INDENT}@Override\n{INDENT}public boolean equals(Object obj) {{\n"
892
+ equals_method += f"{INDENT * 2}if (this == obj) return true;\n"
893
+ equals_method += f"{INDENT * 2}if (obj == null || getClass() != obj.getClass()) return false;\n"
894
+ equals_method += f"{INDENT * 2}{union_class_name} other = ({union_class_name}) obj;\n"
895
+
896
+ # In a union, only ONE field should be set at a time
897
+ # We need to check if the same field is set in both objects and if the values match
898
+ for i, union_type in enumerate(union_types):
899
+ # we need the nullable version (wrapper) of all primitive types
900
+ if self.is_java_primitive(union_type):
901
+ union_type = self.map_primitive_to_java(union_type.type_name, True)
902
+
903
+ union_variable_name = union_type.type_name
904
+ if union_type.type_name.startswith("Map<"):
905
+ union_variable_name = flatten_type_name(union_type.type_name)
906
+ elif union_type.type_name.startswith("List<"):
907
+ union_variable_name = flatten_type_name(union_type.type_name)
908
+ elif union_type.type_name == "byte[]":
909
+ union_variable_name = "Bytes"
910
+ else:
911
+ union_variable_name = union_type.type_name.rsplit('.', 1)[-1]
912
+
913
+ field_name = f"_{camel(union_variable_name)}"
914
+
915
+ # Check if this field is set in this object
916
+ if i == 0:
917
+ equals_method += f"{INDENT * 2}if (this.{field_name} != null) {{\n"
918
+ else:
919
+ equals_method += f"{INDENT * 2}else if (this.{field_name} != null) {{\n"
920
+
921
+ # If set, check if it's also set in the other object with the same value
922
+ if union_type.type_name == 'byte[]':
923
+ equals_method += f"{INDENT * 3}return java.util.Arrays.equals(this.{field_name}, other.{field_name});\n"
924
+ else:
925
+ equals_method += f"{INDENT * 3}return this.{field_name}.equals(other.{field_name});\n"
926
+
927
+ equals_method += f"{INDENT * 2}}}\n"
928
+
929
+ # If no field is set in this, check other is also unset
930
+ equals_method += f"{INDENT * 2}// Both are null/unset - check other is also unset\n"
931
+ equals_method += f"{INDENT * 2}return "
932
+ for i, union_type in enumerate(union_types):
933
+ # we need the nullable version (wrapper) of all primitive types
934
+ if self.is_java_primitive(union_type):
935
+ union_type = self.map_primitive_to_java(union_type.type_name, True)
936
+
937
+ union_variable_name = union_type.type_name
938
+ if union_type.type_name.startswith("Map<"):
939
+ union_variable_name = flatten_type_name(union_type.type_name)
940
+ elif union_type.type_name.startswith("List<"):
941
+ union_variable_name = flatten_type_name(union_type.type_name)
942
+ elif union_type.type_name == "byte[]":
943
+ union_variable_name = "Bytes"
944
+ else:
945
+ union_variable_name = union_type.type_name.rsplit('.', 1)[-1]
946
+ field_name = f"_{camel(union_variable_name)}"
947
+ if i > 0:
948
+ equals_method += " && "
949
+ equals_method += f"other.{field_name} == null"
950
+ equals_method += ";\n"
951
+ equals_method += f"{INDENT}}}\n"
952
+ return equals_method
953
+
954
+ def generate_union_hashcode_method(self, union_class_name: str, union_types: List['AvroToJava.JavaType']) -> str:
955
+ """ Generates the hashCode method for a union class """
956
+ hashcode_method = f"\n{INDENT}@Override\n{INDENT}public int hashCode() {{\n"
957
+
958
+ # In a union, only ONE field should be set at a time
959
+ # Return the hash of whichever field is set
960
+ for i, union_type in enumerate(union_types):
961
+ # we need the nullable version (wrapper) of all primitive types
962
+ if self.is_java_primitive(union_type):
963
+ union_type = self.map_primitive_to_java(union_type.type_name, True)
964
+
965
+ union_variable_name = union_type.type_name
966
+ if union_type.type_name.startswith("Map<"):
967
+ union_variable_name = flatten_type_name(union_type.type_name)
968
+ elif union_type.type_name.startswith("List<"):
969
+ union_variable_name = flatten_type_name(union_type.type_name)
970
+ elif union_type.type_name == "byte[]":
971
+ union_variable_name = "Bytes"
972
+ else:
973
+ union_variable_name = union_type.type_name.rsplit('.', 1)[-1]
974
+
975
+ field_name = f"_{camel(union_variable_name)}"
976
+
977
+ # Return hash of whichever field is set
978
+ if i == 0:
979
+ hashcode_method += f"{INDENT * 2}if (this.{field_name} != null) {{\n"
980
+ else:
981
+ hashcode_method += f"{INDENT * 2}else if (this.{field_name} != null) {{\n"
982
+
983
+ # Use proper hash calculation based on type
984
+ if union_type.type_name == 'byte[]':
985
+ hashcode_method += f"{INDENT * 3}return java.util.Arrays.hashCode(this.{field_name});\n"
986
+ else:
987
+ hashcode_method += f"{INDENT * 3}return this.{field_name}.hashCode();\n"
988
+
989
+ hashcode_method += f"{INDENT * 2}}}\n"
990
+
991
+ # If no field is set, return 0
992
+ hashcode_method += f"{INDENT * 2}return 0;\n"
993
+ hashcode_method += f"{INDENT}}}\n"
994
+ return hashcode_method
995
+
996
+ def generate_avro_get_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
997
+ """ Generates the get method for SpecificRecord """
998
+ get_method = f"\n{INDENT}@Override\n{INDENT}public Object get(int field$) {{\n"
999
+ get_method += f"{INDENT * 2}switch (field$) {{\n"
1000
+ for index, field in enumerate(fields):
1001
+ field_name = pascal(field['name']) if self.pascal_properties else field['name']
1002
+ field_name = self.safe_identifier(field_name, class_name)
1003
+ field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
1004
+
1005
+ # Check if field type is a union
1006
+ is_union = field_type.type_name in self.generated_types_avro_namespace and self.generated_types_avro_namespace[field_type.type_name] == "union"
1007
+ is_union = is_union or (field_type.type_name in self.generated_types_java_package and self.generated_types_java_package[field_type.type_name] == "union")
1008
+ # Also check if it's an Object with union_types (non-Jackson union)
1009
+ is_union = is_union or (field_type.type_name == "Object" and field_type.union_types is not None and len(field_type.union_types) > 1)
1010
+
1011
+ # Check if field is List<Union> or Map<String, Union>
1012
+ is_list_of_unions = field_type.type_name.startswith("List<") and field_type.union_types and len(field_type.union_types) > 0
1013
+ is_map_of_unions = field_type.type_name.startswith("Map<") and field_type.union_types and len(field_type.union_types) > 0
1014
+
1015
+ # For union fields, return the unwrapped object using toObject()
1016
+ # This allows Avro's SpecificDatumWriter to serialize the actual value (String, Integer, etc.)
1017
+ # instead of trying to serialize our custom wrapper class
1018
+ # The put() method will wrap it back using new UnionType(value$)
1019
+ if is_union:
1020
+ get_method += f"{INDENT * 3}case {index}: return this.{field_name} != null ? this.{field_name}.toObject() : null;\n"
1021
+ elif is_list_of_unions:
1022
+ # For List<Union>, unwrap each element by calling toObject() on it
1023
+ # Avro will deserialize this as List<Object> which put() will rewrap
1024
+ get_method += f"{INDENT * 3}case {index}: return this.{field_name} != null ? this.{field_name}.stream().map(u -> u != null ? u.toObject() : null).collect(java.util.stream.Collectors.toList()) : null;\n"
1025
+ elif is_map_of_unions:
1026
+ # For Map<String, Union>, unwrap each value by calling toObject() on it
1027
+ get_method += f"{INDENT * 3}case {index}: return this.{field_name} != null ? this.{field_name}.entrySet().stream().collect(java.util.stream.Collectors.toMap(java.util.Map.Entry::getKey, e -> e.getValue() != null ? e.getValue().toObject() : null)) : null;\n"
1028
+ elif field_type.is_enum:
1029
+ # For enum fields, convert to GenericEnumSymbol for Avro serialization
1030
+ # Use avroSymbol() to get the original Avro symbol name for serialization
1031
+ get_method += f"{INDENT * 3}case {index}: return this.{field_name} != null ? new GenericData.EnumSymbol({field_type.type_name}.SCHEMA, this.{field_name}.avroSymbol()) : null;\n"
1032
+ else:
1033
+ # For all other field types, return the field as-is
1034
+ # Avro's SpecificDatumWriter will handle serialization internally
1035
+ get_method += f"{INDENT * 3}case {index}: return this.{field_name};\n"
1036
+ get_method += f"{INDENT * 3}default: throw new AvroRuntimeException(\"Bad index: \" + field$);\n"
1037
+ get_method += f"{INDENT * 2}}}\n{INDENT}}}\n"
1038
+ return get_method
1039
+
1040
+ def generate_avro_put_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
1041
+ """ Generates the put method for SpecificRecord """
1042
+ suppress_unchecked = False
1043
+ put_method = f"\n{INDENT}@Override\n{INDENT}public void put(int field$, Object value$) {{\n"
1044
+ put_method += f"{INDENT * 2}switch (field$) {{\n"
1045
+ for index, field in enumerate(fields):
1046
+ # Skip const fields as they are final and cannot be reassigned
1047
+ if "const" in field:
1048
+ put_method += f"{INDENT * 3}case {index}: break; // const field, cannot be set\n"
1049
+ continue
1050
+
1051
+ field_name = pascal(field['name']) if self.pascal_properties else field['name']
1052
+ field_name = self.safe_identifier(field_name, class_name)
1053
+ field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
1054
+ if field_type.type_name.startswith("List<") or field_type.type_name.startswith("Map<"):
1055
+ suppress_unchecked = True
1056
+
1057
+ # Check if the field type is a generated type (union, class, or enum)
1058
+ type_kind = None
1059
+ if field_type.type_name in self.generated_types_avro_namespace:
1060
+ type_kind = self.generated_types_avro_namespace[field_type.type_name]
1061
+ elif field_type.type_name in self.generated_types_java_package:
1062
+ type_kind = self.generated_types_java_package[field_type.type_name]
1063
+
1064
+ # Check if this is List<Union> or Map<String, Union>
1065
+ is_list_of_unions = field_type.type_name.startswith("List<") and field_type.union_types and len(field_type.union_types) > 0
1066
+ is_map_of_unions = field_type.type_name.startswith("Map<") and field_type.union_types and len(field_type.union_types) > 0
1067
+
1068
+ if is_list_of_unions:
1069
+ # Extract the union type name from List<UnionType>
1070
+ union_type_match = field_type.type_name[5:-1] # Remove "List<" and ">"
1071
+ # For List<Union>, handle both wrapped List<UnionWrapper> and unwrapped List<Object>
1072
+ # Avro deserialization provides List<Object>, so we need to wrap each element
1073
+ put_method += f"{INDENT * 3}case {index}: {{\n"
1074
+ put_method += f"{INDENT * 4}if (value$ instanceof List<?>) {{\n"
1075
+ put_method += f"{INDENT * 5}List<?> list = (List<?>)value$;\n"
1076
+ put_method += f"{INDENT * 5}if (list.isEmpty() || !(list.get(0) instanceof {union_type_match})) {{\n"
1077
+ put_method += f"{INDENT * 6}// Unwrapped from Avro - need to wrap, handling nulls\n"
1078
+ put_method += f"{INDENT * 6}this.{field_name} = list.stream().map(v -> v != null ? new {union_type_match}(v) : null).collect(java.util.stream.Collectors.toList());\n"
1079
+ put_method += f"{INDENT * 5}}} else {{\n"
1080
+ put_method += f"{INDENT * 6}// Already wrapped\n"
1081
+ put_method += f"{INDENT * 6}this.{field_name} = ({field_type.type_name})value$;\n"
1082
+ put_method += f"{INDENT * 5}}}\n"
1083
+ put_method += f"{INDENT * 4}}}\n"
1084
+ put_method += f"{INDENT * 4}break;\n"
1085
+ put_method += f"{INDENT * 3}}}\n"
1086
+ elif is_map_of_unions:
1087
+ # Extract the union type name from Map<String, UnionType>
1088
+ union_type_match = field_type.type_name.split(",")[1].strip()[:-1] # Remove "Map<String, " and ">"
1089
+ put_method += f"{INDENT * 3}case {index}: {{\n"
1090
+ put_method += f"{INDENT * 4}if (value$ instanceof Map<?,?>) {{\n"
1091
+ put_method += f"{INDENT * 5}Map<?,?> map = (Map<?,?>)value$;\n"
1092
+ put_method += f"{INDENT * 5}if (map.isEmpty() || !(map.values().iterator().next() instanceof {union_type_match})) {{\n"
1093
+ put_method += f"{INDENT * 6}// Unwrapped from Avro - need to wrap, handling nulls\n"
1094
+ put_method += f"{INDENT * 6}this.{field_name} = map.entrySet().stream().collect(java.util.stream.Collectors.toMap(e -> (String)e.getKey(), e -> e.getValue() != null ? new {union_type_match}(e.getValue()) : null));\n"
1095
+ put_method += f"{INDENT * 5}}} else {{\n"
1096
+ put_method += f"{INDENT * 6}// Already wrapped\n"
1097
+ put_method += f"{INDENT * 6}this.{field_name} = ({field_type.type_name})value$;\n"
1098
+ put_method += f"{INDENT * 5}}}\n"
1099
+ put_method += f"{INDENT * 4}}}\n"
1100
+ put_method += f"{INDENT * 4}break;\n"
1101
+ put_method += f"{INDENT * 3}}}\n"
1102
+ elif type_kind == "union":
1103
+ # Unions can contain primitives or records - use the appropriate constructor
1104
+ # If Avro passes a GenericData.Record, use the GenericData.Record constructor
1105
+ # Otherwise use the Object constructor for already-constructed types
1106
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof GenericData.Record ? new {field_type.type_name}((GenericData.Record)value$) : new {field_type.type_name}(value$); break;\n"
1107
+ elif type_kind == "class":
1108
+ # Record types need to be converted from GenericData.Record if that's what Avro passes
1109
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof GenericData.Record ? new {field_type.type_name}((GenericData.Record)value$) : ({field_type.type_name})value$; break;\n"
1110
+ elif type_kind == "enum":
1111
+ # Enums need to be converted from GenericData.EnumSymbol
1112
+ # Use fromAvroSymbol to match original Avro symbol names
1113
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof GenericData.EnumSymbol ? {field_type.type_name}.fromAvroSymbol(value$.toString()) : ({field_type.type_name})value$; break;\n"
1114
+ else:
1115
+ # Check if this is a List<RecordType> or Map<String,RecordType>
1116
+ is_list_of_records = False
1117
+ is_map_of_records = False
1118
+ if field_type.type_name.startswith("List<"):
1119
+ item_type = field_type.type_name[5:-1]
1120
+ if item_type in self.generated_types_java_package and self.generated_types_java_package[item_type] == "class":
1121
+ is_list_of_records = True
1122
+ elif field_type.type_name.startswith("Map<"):
1123
+ # Extract value type from Map<String, ValueType>
1124
+ value_type = field_type.type_name.split(",")[1].strip()[:-1]
1125
+ if value_type in self.generated_types_java_package and self.generated_types_java_package[value_type] == "class":
1126
+ is_map_of_records = True
1127
+
1128
+ if is_list_of_records:
1129
+ item_type = field_type.type_name[5:-1]
1130
+ put_method += f"{INDENT * 3}case {index}: {{\n"
1131
+ put_method += f"{INDENT * 4}if (value$ instanceof List<?>) {{\n"
1132
+ put_method += f"{INDENT * 5}List<?> list = (List<?>)value$;\n"
1133
+ put_method += f"{INDENT * 5}if (list.isEmpty() || !(list.get(0) instanceof {item_type})) {{\n"
1134
+ put_method += f"{INDENT * 6}// Unwrapped from Avro - need to wrap GenericData.Record objects\n"
1135
+ put_method += f"{INDENT * 6}this.{field_name} = list.stream().map(item -> item instanceof GenericData.Record ? new {item_type}((GenericData.Record)item) : ({item_type})item).collect(java.util.stream.Collectors.toList());\n"
1136
+ put_method += f"{INDENT * 5}}} else {{\n"
1137
+ put_method += f"{INDENT * 6}// Already wrapped\n"
1138
+ put_method += f"{INDENT * 6}this.{field_name} = ({field_type.type_name})value$;\n"
1139
+ put_method += f"{INDENT * 5}}}\n"
1140
+ put_method += f"{INDENT * 4}}} else {{\n"
1141
+ put_method += f"{INDENT * 5}// Handle null or other types\n"
1142
+ put_method += f"{INDENT * 5}this.{field_name} = value$ != null ? ({field_type.type_name})value$ : null;\n"
1143
+ put_method += f"{INDENT * 4}}}\n"
1144
+ put_method += f"{INDENT * 4}break;\n"
1145
+ put_method += f"{INDENT * 3}}}\n"
1146
+ elif is_map_of_records:
1147
+ value_type = field_type.type_name.split(",")[1].strip()[:-1]
1148
+ put_method += f"{INDENT * 3}case {index}: {{\n"
1149
+ put_method += f"{INDENT * 4}if (value$ instanceof Map<?,?>) {{\n"
1150
+ put_method += f"{INDENT * 5}Map<?,?> map = (Map<?,?>)value$;\n"
1151
+ put_method += f"{INDENT * 5}if (map.isEmpty() || !(map.values().iterator().next() instanceof {value_type})) {{\n"
1152
+ put_method += f"{INDENT * 6}// Unwrapped from Avro - need to wrap GenericData.Record objects\n"
1153
+ put_method += f"{INDENT * 6}this.{field_name} = map.entrySet().stream().collect(java.util.stream.Collectors.toMap(e -> (String)e.getKey(), e -> e.getValue() instanceof GenericData.Record ? new {value_type}((GenericData.Record)e.getValue()) : ({value_type})e.getValue()));\n"
1154
+ put_method += f"{INDENT * 5}}} else {{\n"
1155
+ put_method += f"{INDENT * 6}// Already wrapped\n"
1156
+ put_method += f"{INDENT * 6}this.{field_name} = ({field_type.type_name})value$;\n"
1157
+ put_method += f"{INDENT * 5}}}\n"
1158
+ put_method += f"{INDENT * 4}}} else {{\n"
1159
+ put_method += f"{INDENT * 5}// Handle null or other types\n"
1160
+ put_method += f"{INDENT * 5}this.{field_name} = value$ != null ? ({field_type.type_name})value$ : null;\n"
1161
+ put_method += f"{INDENT * 4}}}\n"
1162
+ put_method += f"{INDENT * 4}break;\n"
1163
+ put_method += f"{INDENT * 3}}}\n"
1164
+ elif field_type.type_name == 'String':
1165
+ # Handle null values for String fields
1166
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ != null ? value$.toString() : null; break;\n"
1167
+ elif field_type.type_name.startswith("List<"):
1168
+ # Extract the element type
1169
+ element_type = field_type.type_name[5:-1]
1170
+ # Check if it's a List of enums
1171
+ if element_type in self.generated_types_java_package and self.generated_types_java_package[element_type] == "enum":
1172
+ # For List<Enum>, convert GenericEnumSymbol to actual enum values
1173
+ # Use fromAvroSymbol to match original Avro symbol names
1174
+ put_method += f"{INDENT * 3}case {index}: {{\n"
1175
+ put_method += f"{INDENT * 4}if (value$ instanceof List<?>) {{\n"
1176
+ put_method += f"{INDENT * 5}List<?> list = (List<?>)value$;\n"
1177
+ put_method += f"{INDENT * 5}this.{field_name} = list.stream().map(item -> item instanceof GenericData.EnumSymbol ? {element_type}.fromAvroSymbol(item.toString()) : ({element_type})item).collect(java.util.stream.Collectors.toList());\n"
1178
+ put_method += f"{INDENT * 4}}} else {{\n"
1179
+ put_method += f"{INDENT * 5}this.{field_name} = null;\n"
1180
+ put_method += f"{INDENT * 4}}}\n"
1181
+ put_method += f"{INDENT * 4}break;\n"
1182
+ put_method += f"{INDENT * 3}}}\n"
1183
+ elif element_type == "String":
1184
+ # For List<String>, convert Utf8 to String
1185
+ put_method += f"{INDENT * 3}case {index}: {{\n"
1186
+ put_method += f"{INDENT * 4}if (value$ instanceof List<?>) {{\n"
1187
+ put_method += f"{INDENT * 5}List<?> list = (List<?>)value$;\n"
1188
+ put_method += f"{INDENT * 5}this.{field_name} = list.stream().map(item -> item != null ? item.toString() : null).collect(java.util.stream.Collectors.toList());\n"
1189
+ put_method += f"{INDENT * 4}}} else {{\n"
1190
+ put_method += f"{INDENT * 5}this.{field_name} = null;\n"
1191
+ put_method += f"{INDENT * 4}}}\n"
1192
+ put_method += f"{INDENT * 4}break;\n"
1193
+ put_method += f"{INDENT * 3}}}\n"
1194
+ else:
1195
+ # For other List types, create a defensive copy
1196
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof List<?> ? new java.util.ArrayList<>(({field_type.type_name})value$) : null; break;\n"
1197
+ elif field_type.type_name.startswith("Map<"):
1198
+ # For any Map type, create a defensive copy to avoid sharing references
1199
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof Map<?,?> ? new java.util.HashMap<>(({field_type.type_name})value$) : null; break;\n"
1200
+ else:
1201
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = ({field_type.type_name})value$; break;\n"
1202
+ put_method += f"{INDENT * 3}default: throw new AvroRuntimeException(\"Bad index: \" + field$);\n"
1203
+ put_method += f"{INDENT * 2}}}\n{INDENT}}}\n"
1204
+ if suppress_unchecked:
1205
+ put_method = f"\n{INDENT}@SuppressWarnings(\"unchecked\"){put_method}"
1206
+ return put_method
1207
+
1208
+ def generate_enum(self, avro_schema: Dict, parent_package: str, write_file: bool) -> JavaType:
1209
+ """ Generates a Java enum from an Avro enum schema """
1210
+ enum_definition = ''
1211
+ if 'doc' in avro_schema:
1212
+ enum_definition += f"/** {avro_schema['doc']} */\n"
1213
+
1214
+ package = self.join_packages(self.base_package, avro_schema.get('namespace', parent_package)).replace('.', '/').lower()
1215
+ enum_name = self.safe_identifier(avro_schema['name'])
1216
+ type_name = self.qualified_name(package.replace('/', '.'), enum_name)
1217
+ self.generated_types_avro_namespace[self.qualified_name(avro_schema.get('namespace', parent_package),avro_schema['name'])] = "enum"
1218
+ self.generated_types_java_package[type_name] = "enum"
1219
+ self.generated_avro_schemas[type_name] = avro_schema
1220
+ symbols = avro_schema.get('symbols', [])
1221
+ # Convert symbols to valid Java identifiers in SCREAMING_CASE (uppercase)
1222
+ # Replace invalid chars, prepend _ if starts with digit or is a reserved word
1223
+ # Keep track of mapping from Java symbol to original Avro symbol for serialization
1224
+ java_symbols = []
1225
+ symbol_pairs = [] # (java_symbol, avro_symbol) pairs
1226
+ for symbol in symbols:
1227
+ java_symbol = symbol.replace('-', '_').replace('.', '_').upper()
1228
+ if java_symbol and java_symbol[0].isdigit():
1229
+ java_symbol = '_' + java_symbol
1230
+ # Check if the symbol is a Java reserved word and prefix with underscore
1231
+ if is_java_reserved_word(java_symbol.lower()):
1232
+ java_symbol = '_' + java_symbol
1233
+ java_symbols.append(java_symbol)
1234
+ symbol_pairs.append((java_symbol, symbol))
1235
+
1236
+ # Build enum with avroSymbol field for proper Avro serialization
1237
+ enum_definition += f"public enum {enum_name} {{\n"
1238
+ # Each enum constant has its original Avro symbol stored
1239
+ enum_constants = []
1240
+ for java_symbol, avro_symbol in symbol_pairs:
1241
+ enum_constants.append(f'{java_symbol}("{avro_symbol}")')
1242
+ enum_definition += f"{INDENT}" + ", ".join(enum_constants)
1243
+
1244
+ # Add avroSymbol field and method with Jackson annotations for proper JSON serialization
1245
+ enum_definition += f";\n\n{INDENT}private final String avroSymbol;\n\n"
1246
+ enum_definition += f"{INDENT}{enum_name}(String avroSymbol) {{\n{INDENT*2}this.avroSymbol = avroSymbol;\n{INDENT}}}\n\n"
1247
+ # @JsonValue tells Jackson to serialize the enum using avroSymbol() value
1248
+ enum_definition += f"{INDENT}@com.fasterxml.jackson.annotation.JsonValue\n"
1249
+ enum_definition += f"{INDENT}public String avroSymbol() {{\n{INDENT*2}return avroSymbol;\n{INDENT}}}\n\n"
1250
+
1251
+ # Add static lookup method to find enum by Avro symbol with @JsonCreator for deserialization
1252
+ enum_definition += f"{INDENT}@com.fasterxml.jackson.annotation.JsonCreator\n"
1253
+ enum_definition += f"{INDENT}public static {enum_name} fromAvroSymbol(String symbol) {{\n"
1254
+ enum_definition += f"{INDENT*2}for ({enum_name} e : values()) {{\n"
1255
+ enum_definition += f"{INDENT*3}if (e.avroSymbol.equals(symbol)) return e;\n"
1256
+ enum_definition += f"{INDENT*2}}}\n"
1257
+ enum_definition += f"{INDENT*2}throw new IllegalArgumentException(\"Unknown symbol: \" + symbol);\n"
1258
+ enum_definition += f"{INDENT}}}\n"
1259
+
1260
+ # Add Avro schema if annotations are enabled
1261
+ if self.avro_annotation:
1262
+ # Create inline schema for the enum
1263
+ enum_schema = {
1264
+ "type": "enum",
1265
+ "name": enum_name,
1266
+ "symbols": symbols
1267
+ }
1268
+ if 'namespace' in avro_schema:
1269
+ enum_schema['namespace'] = avro_schema['namespace']
1270
+ if 'doc' in avro_schema:
1271
+ enum_schema['doc'] = avro_schema['doc']
1272
+
1273
+ enum_schema_json = json.dumps(enum_schema)
1274
+ enum_schema_json = enum_schema_json.replace('"', '§')
1275
+ enum_schema_json = f"\"+\n{INDENT}\"".join(
1276
+ [enum_schema_json[i:i+80] for i in range(0, len(enum_schema_json), 80)])
1277
+ enum_schema_json = enum_schema_json.replace('§', '\\"')
1278
+
1279
+ enum_definition += f"\n{INDENT}public static final Schema SCHEMA = new Schema.Parser().parse(\n{INDENT}\"{enum_schema_json}\");\n"
1280
+
1281
+ enum_definition += "}\n"
1282
+ if write_file:
1283
+ self.write_to_file(package, enum_name, enum_definition)
1284
+ return AvroToJava.JavaType(type_name, is_enum=True)
1285
+
1286
+ def generate_embedded_union_class_jackson(self, class_name: str, field_name: str, avro_type: List, parent_package: str, write_file: bool) -> str:
1287
+ """ Generates an embedded Union Class for Java using Jackson """
1288
+ class_definition_ctors = class_definition_decls = class_definition_read = class_definition_write = class_definition = ''
1289
+ class_definition_toobject = class_definition_fromobjectctor = class_definition_genericrecordctor = ''
1290
+
1291
+ list_is_json_match: List[str] = []
1292
+ union_class_name = class_name + pascal(field_name) + 'Union'
1293
+ package = self.join_packages(self.base_package, parent_package).replace('.', '/').lower()
1294
+ union_types: List[AvroToJava.JavaType] = [self.convert_avro_type_to_java(class_name, field_name + "Option" + str(i), t, parent_package) for i, t in enumerate(avro_type)]
1295
+ for i, union_type in enumerate(union_types):
1296
+ # we need the nullable version (wrapper) of all primitive types
1297
+ if self.is_java_primitive(union_type):
1298
+ union_type = self.map_primitive_to_java(union_type.type_name, True)
1299
+ union_variable_name = union_type.type_name
1300
+ is_dict = is_list = False
1301
+ if union_type.type_name.startswith("Map<"):
1302
+ # handle Map types
1303
+ is_dict = True
1304
+ # find the comma
1305
+ union_variable_name = flatten_type_name(union_type.type_name)
1306
+ elif union_type.type_name.startswith("List<"):
1307
+ # handle List types
1308
+ is_list = True
1309
+ union_variable_name = flatten_type_name(union_type.type_name)
1310
+ elif union_type.type_name == "byte[]":
1311
+ union_variable_name = "Bytes"
1312
+ else:
1313
+ union_variable_name = union_type.type_name.rsplit('.', 1)[-1]
1314
+
1315
+ union_variable_name = self.safe_identifier(union_variable_name, class_name)
1316
+
1317
+ # Constructor for each type
1318
+ class_definition_ctors += \
1319
+ f"{INDENT*1}public {union_class_name}({union_type.type_name} {union_variable_name}) {{\n{INDENT*2}this._{camel(union_variable_name)} = {union_variable_name};\n{INDENT*1}}}\n"
1320
+
1321
+ # Declarations
1322
+ class_definition_decls += \
1323
+ f"{INDENT*1}private {union_type.type_name} _{camel(union_variable_name)};\n" + \
1324
+ f"{INDENT*1}public {union_type.type_name} get{union_variable_name}() {{ return _{camel(union_variable_name)}; }}\n";
1325
+
1326
+ # For toObject(), wrap enums in GenericData.EnumSymbol so Avro can serialize them
1327
+ # Use avroSymbol() to get the original Avro symbol name for serialization
1328
+ if union_type.is_enum:
1329
+ class_definition_toobject += f"{INDENT*2}if (_{camel(union_variable_name)} != null) {{\n{INDENT*3}return new GenericData.EnumSymbol({union_type.type_name}.SCHEMA, _{camel(union_variable_name)}.avroSymbol());\n{INDENT*2}}}\n"
1330
+ else:
1331
+ class_definition_toobject += f"{INDENT*2}if (_{camel(union_variable_name)} != null) {{\n{INDENT*3}return _{camel(union_variable_name)};\n{INDENT*2}}}\n"
1332
+
1333
+ # GenericData.Record constructor only handles record types - primitives come through fromObject
1334
+ if self.avro_annotation and union_type.is_class:
1335
+ class_definition_genericrecordctor += f"{INDENT*2}if (record.getSchema().getFullName().equals({union_type.type_name}.AVROSCHEMA.getFullName())) {{\n"
1336
+ class_definition_genericrecordctor += f"{INDENT*3}this._{camel(union_variable_name)} = new {union_type.type_name}(record);\n{INDENT*3}return;\n{INDENT*2}}}\n"
1337
+
1338
+ # there can only be one list and one map in the union, so we don't need to differentiate this any further
1339
+ if is_list:
1340
+ class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof List<?>) {{\n{INDENT*3}this._{camel(union_variable_name)} = ({union_type.type_name})obj;\n{INDENT*3}return;\n{INDENT*2}}}\n"
1341
+ elif is_dict:
1342
+ class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof Map<?,?>) {{\n{INDENT*3}this._{camel(union_variable_name)} = ({union_type.type_name})obj;\n{INDENT*3}return;\n{INDENT*2}}}\n"
1343
+ else:
1344
+ # For class types, check for GenericData.Record first (Avro deserialization), then typed instance
1345
+ if self.avro_annotation and union_type.is_class:
1346
+ class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof GenericData.Record) {{\n"
1347
+ class_definition_fromobjectctor += f"{INDENT*3}GenericData.Record record = (GenericData.Record)obj;\n"
1348
+ # Use getFullName() for robust schema comparison instead of separate name + namespace
1349
+ class_definition_fromobjectctor += f"{INDENT*3}String recordFullName = record.getSchema().getFullName();\n"
1350
+ class_definition_fromobjectctor += f"{INDENT*3}String expectedFullName = {union_type.type_name}.AVROSCHEMA.getFullName();\n"
1351
+ class_definition_fromobjectctor += f"{INDENT*3}if (recordFullName.equals(expectedFullName)) {{\n"
1352
+ class_definition_fromobjectctor += f"{INDENT*4}this._{camel(union_variable_name)} = new {union_type.type_name}(record);\n{INDENT*4}return;\n{INDENT*3}}}\n{INDENT*2}}}\n"
1353
+
1354
+ # Handle Avro's Utf8 type for String
1355
+ if self.avro_annotation and union_type.type_name == "String":
1356
+ class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof org.apache.avro.util.Utf8) {{\n{INDENT*3}this._{camel(union_variable_name)} = obj.toString();\n{INDENT*3}return;\n{INDENT*2}}}\n"
1357
+
1358
+ # Handle Avro's GenericEnumSymbol for enum types
1359
+ # Use fromAvroSymbol to match original Avro symbol names
1360
+ if self.avro_annotation and union_type.is_enum:
1361
+ class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof GenericData.EnumSymbol) {{\n{INDENT*3}this._{camel(union_variable_name)} = {union_type.type_name}.fromAvroSymbol(obj.toString());\n{INDENT*3}return;\n{INDENT*2}}}\n"
1362
+
1363
+ class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof {union_type.type_name}) {{\n{INDENT*3}this._{camel(union_variable_name)} = ({union_type.type_name})obj;\n{INDENT*3}return;\n{INDENT*2}}}\n"
1364
+
1365
+ # Read method logic - test types in order using duck typing (like C# implementation)
1366
+ if is_dict:
1367
+ class_definition_read += f"{INDENT*3}if (node.isObject()) {{\n{INDENT*4}{union_type.type_name} map = mapper.readValue(node.toString(), new TypeReference<{union_type.type_name}>(){{}});\n{INDENT*3}return new {union_class_name}(map);\n{INDENT*3}}}\n"
1368
+ elif is_list:
1369
+ class_definition_read += f"{INDENT*3}if (node.isArray()) {{\n{INDENT*4}{union_type.type_name} list = mapper.readValue(node.toString(), new TypeReference<{union_type.type_name}>(){{}});\n{INDENT*4}return new {union_class_name}(list);\n{INDENT*3}}}\n"
1370
+ elif self.is_java_primitive(union_type):
1371
+ if union_type.type_name == "String":
1372
+ class_definition_read += f"{INDENT*3}if (node.isTextual()) {{\n{INDENT*4}return new {union_class_name}(node.asText());\n{INDENT*3}}}\n"
1373
+ elif union_type.type_name == "byte[]":
1374
+ class_definition_read += f"{INDENT*3}if (node.isBinary()) {{\n{INDENT*4}return new {union_class_name}(node.binaryValue());\n{INDENT*3}}}\n"
1375
+ elif union_type.type_name in ["int", "Int", "Integer"]:
1376
+ class_definition_read += f"{INDENT*3}if (node.canConvertToInt()) {{\n{INDENT*4}return new {union_class_name}(node.asInt());\n{INDENT*3}}}\n"
1377
+ elif union_type.type_name in ["long", "Long"]:
1378
+ class_definition_read += f"{INDENT*3}if (node.canConvertToLong()) {{\n{INDENT*4}return new {union_class_name}(node.asLong());\n{INDENT*3}}}\n"
1379
+ elif union_type.type_name in ["float", "Float"]:
1380
+ class_definition_read += f"{INDENT*3}if (node.isFloat()) {{\n{INDENT*4}return new {union_class_name}(node.floatValue());\n{INDENT*3}}}\n"
1381
+ elif union_type.type_name in ["double", "Double"]:
1382
+ class_definition_read += f"{INDENT*3}if (node.isDouble()) {{\n{INDENT*4}return new {union_class_name}(node.doubleValue());\n{INDENT*3}}}\n"
1383
+ elif union_type.type_name == "decimal":
1384
+ class_definition_read += f"{INDENT*3}if (node.isBigDecimal()) {{\n{INDENT*4}return new {union_class_name}(node.decimalValue());\n{INDENT*3}}}\n"
1385
+ elif union_type.type_name in ["boolean", "Boolean"]:
1386
+ class_definition_read += f"{INDENT*3}if (node.isBoolean()) {{\n{INDENT*4}return new {union_class_name}(node.asBoolean());\n{INDENT*3}}}\n"
1387
+ else:
1388
+ # For classes and enums, use duck typing with isJsonMatch() (C# pattern)
1389
+ if union_type.is_enum:
1390
+ # Use fromAvroSymbol to match original Avro symbol names
1391
+ class_definition_read += f"{INDENT*3}if (node.isTextual()) {{\n{INDENT*4}return new {union_class_name}({union_type.type_name}.fromAvroSymbol(node.asText()));\n{INDENT*3}}}\n"
1392
+ elif union_type.is_class:
1393
+ # Use isJsonMatch() to test if this type matches, then use fromData() to deserialize
1394
+ class_definition_read += f"{INDENT*3}if ({union_type.type_name}.isJsonMatch(node)) {{\n{INDENT*4}return new {union_class_name}({union_type.type_name}.fromData(node, \"application/json\"));\n{INDENT*3}}}\n"
1395
+
1396
+ # Write method logic
1397
+ class_definition_write += f"{INDENT*3}{union_type.type_name} {camel(union_variable_name)}Value = value.get{union_variable_name}();\n{INDENT*3}if ({camel(union_variable_name)}Value != null) {{\n{INDENT*4}generator.writeObject({camel(union_variable_name)}Value);\n{INDENT*4}return;\n{INDENT*3}}}\n"
1398
+
1399
+ # JSON match method logic
1400
+ gij = self.get_is_json_match_clause_type("node", class_name, union_type)
1401
+ if gij:
1402
+ list_is_json_match.append(gij)
1403
+
1404
+ class_definition = f"@JsonSerialize(using = {union_class_name}.Serializer.class)\n"
1405
+ class_definition += f"@JsonDeserialize(using = {union_class_name}.Deserializer.class)\n"
1406
+ class_definition += f"public class {union_class_name} {{\n"
1407
+ class_definition += class_definition_decls
1408
+ class_definition += f"\n{INDENT}public " + union_class_name + "() {}\n"
1409
+ if self.avro_annotation:
1410
+ class_definition += f"\n{INDENT}public {union_class_name}(GenericData.Record record) {{\n"
1411
+ class_definition += class_definition_genericrecordctor
1412
+ class_definition += f"{INDENT*2}throw new UnsupportedOperationException(\"No record type is set in the union\");\n"
1413
+ class_definition += f"{INDENT}}}\n"
1414
+ class_definition += f"\n{INDENT}public {union_class_name}(Object obj) {{\n"
1415
+ class_definition += f"{INDENT*2}if (obj == null) {{\n"
1416
+ class_definition += f"{INDENT*3}return; // null is valid for unions with null type\n"
1417
+ class_definition += f"{INDENT*2}}}\n"
1418
+ class_definition += class_definition_fromobjectctor
1419
+ class_definition += f"{INDENT*2}throw new UnsupportedOperationException(\"No record type is set in the union\");\n"
1420
+ class_definition += f"{INDENT}}}\n"
1421
+ class_definition += class_definition_ctors
1422
+ class_definition += f"\n{INDENT}public Object toObject() {{\n"
1423
+ class_definition += class_definition_toobject
1424
+ class_definition += f"{INDENT*2}throw new UnsupportedOperationException(\"No record type is set in the union\");\n"
1425
+ class_definition += f"{INDENT}}}\n"
1426
+ class_definition += f"\n{INDENT}public static class Serializer extends JsonSerializer<" + union_class_name + "> {\n"
1427
+ class_definition += f"{INDENT*2}@Override\n"
1428
+ class_definition += f"{INDENT*2}public void serialize(" + union_class_name + " value, JsonGenerator generator, SerializerProvider serializers) throws IOException {\n"
1429
+ class_definition += class_definition_write
1430
+ class_definition += f"{INDENT*3}throw new UnsupportedOperationException(\"No record type is set in the union\");\n"
1431
+ class_definition += f"{INDENT*2}}}\n{INDENT}}}\n"
1432
+ class_definition += f"\n{INDENT}public static class Deserializer extends JsonDeserializer<" + union_class_name + "> {\n"
1433
+ class_definition += f"{INDENT*2}@Override\n"
1434
+ class_definition += f"{INDENT*2}public " + union_class_name + " deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JsonProcessingException {\n"
1435
+ class_definition += f"{INDENT*3}ObjectMapper mapper = (ObjectMapper) p.getCodec();\n"
1436
+ class_definition += f"{INDENT*3}JsonNode node = mapper.readTree(p);\n"
1437
+ class_definition += class_definition_read
1438
+ class_definition += f"{INDENT*3}throw new UnsupportedOperationException(\"No record type matched the JSON data\");\n"
1439
+ class_definition += f"{INDENT*2}}}\n{INDENT}}}\n"
1440
+ class_definition += f"\n{INDENT*1}public static boolean isJsonMatch(JsonNode node) {{\n"
1441
+ class_definition += f"{INDENT*2}return " + " || ".join(list_is_json_match) + ";\n"
1442
+ class_definition += f"{INDENT*1}}}\n"
1443
+
1444
+ # Add equals method for union class
1445
+ class_definition += self.generate_union_equals_method(union_class_name, union_types)
1446
+
1447
+ # Add hashCode method for union class
1448
+ class_definition += self.generate_union_hashcode_method(union_class_name, union_types)
1449
+ class_definition += "}\n"
1450
+
1451
+ if write_file:
1452
+ self.write_to_file(package, union_class_name, class_definition)
1453
+ # Calculate qualified name for the union
1454
+ qualified_union_name = self.qualified_name(package.replace('/', '.'), union_class_name)
1455
+ self.generated_types_avro_namespace[union_class_name] = "union" # Track union types
1456
+ self.generated_types_java_package[union_class_name] = "union" # Track union types with simple name
1457
+ self.generated_types_java_package[qualified_union_name] = "union" # Also track with qualified name
1458
+ # Store the union schema with the types information
1459
+ self.generated_avro_schemas[union_class_name] = {"types": avro_type}
1460
+ self.generated_avro_schemas[qualified_union_name] = {"types": avro_type}
1461
+ return union_class_name
1462
+
1463
+
1464
+ def generate_property(self, class_name: str, field: Dict, parent_package: str) -> str:
1465
+ """ Generates a Java property definition """
1466
+ field_name = pascal(field['name']) if self.pascal_properties else field['name']
1467
+ field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
1468
+ safe_field_name = self.safe_identifier(field_name, class_name)
1469
+ property_def = ''
1470
+ if 'doc' in field:
1471
+ property_def += f"{INDENT}/** {field['doc']} */\n"
1472
+
1473
+ # For discriminator const fields, don't put @JsonProperty on the field
1474
+ # The getter will handle JSON serialization/deserialization
1475
+ is_discriminator_const = field.get('discriminator', False) and 'const' in field
1476
+ if self.jackson_annotations and not is_discriminator_const:
1477
+ property_def += f"{INDENT}@JsonProperty(\"{field['name']}\")\n"
1478
+
1479
+ # Handle const fields
1480
+ if 'const' in field and field['const'] is not None:
1481
+ const_value = field['const']
1482
+ is_discriminator = field.get('discriminator', False)
1483
+
1484
+ # For enum types, qualify with the enum type name and convert to SCREAMING_CASE
1485
+ if field_type.type_name not in ('String', 'int', 'Integer', 'long', 'Long', 'double', 'Double', 'boolean', 'Boolean'):
1486
+ # Convert enum const value to uppercase to match Java enum constant naming convention
1487
+ const_value_upper = str(const_value).replace('-', '_').replace('.', '_').upper()
1488
+ if const_value_upper and const_value_upper[0].isdigit():
1489
+ const_value_upper = '_' + const_value_upper
1490
+ if is_java_reserved_word(const_value_upper.lower()):
1491
+ const_value_upper = '_' + const_value_upper
1492
+ const_value = f'{field_type.type_name}.{const_value_upper}'
1493
+ elif field_type.type_name == 'String':
1494
+ const_value = f'"{const_value}"'
1495
+
1496
+ property_def += f"{INDENT}private final {field_type.type_name} {safe_field_name} = {const_value};\n"
1497
+
1498
+ # For discriminator fields, we need both the enum value accessor and String override
1499
+ if is_discriminator:
1500
+ # Provide a typed accessor for the enum value (ignored by Jackson since it's synthetic)
1501
+ if self.jackson_annotations:
1502
+ property_def += f"{INDENT}@JsonIgnore\n"
1503
+ property_def += f"{INDENT}public {field_type.type_name} get{pascal(field_name)}Value() {{ return {safe_field_name}; }}\n"
1504
+ # Generate the getter that returns String (Jackson will use this for serialization)
1505
+ # Use avroSymbol() to get the original Avro symbol name for serialization
1506
+ # Use READ_ONLY since this is a const field that doesn't need deserialization
1507
+ # Note: Not using @Override because not all discriminated union variants extend a base class
1508
+ if self.jackson_annotations:
1509
+ property_def += f"{INDENT}@JsonProperty(value=\"{field['name']}\", access=JsonProperty.Access.READ_ONLY)\n"
1510
+ property_def += f"{INDENT}public String get{pascal(field_name)}() {{ return {safe_field_name}.avroSymbol(); }}\n"
1511
+ else:
1512
+ property_def += f"{INDENT}public {field_type.type_name} get{pascal(field_name)}() {{ return {safe_field_name}; }}\n"
1513
+ else:
1514
+ property_def += f"{INDENT}private {field_type.type_name} {safe_field_name};\n"
1515
+ property_def += f"{INDENT}public {field_type.type_name} get{pascal(field_name)}() {{ return {safe_field_name}; }}\n"
1516
+ property_def += f"{INDENT}public void set{pascal(field_name)}({field_type.type_name} {safe_field_name}) {{ this.{safe_field_name} = {safe_field_name}; }}\n"
1517
+
1518
+ # Generate typed accessors only for direct union fields (not for List/Map<Union>)
1519
+ # For List<Union>, the field IS the list, not a single union value
1520
+ if field_type.union_types and not field_type.type_name.startswith("List<") and not field_type.type_name.startswith("Map<"):
1521
+ for union_type in field_type.union_types:
1522
+ if union_type.type_name.startswith("List<") or union_type.type_name.startswith("Map<"):
1523
+ property_def += f"{INDENT}@SuppressWarnings(\"unchecked\")\n"
1524
+ property_def += f"{INDENT}public {union_type.type_name} get{pascal(field_name)}As{flatten_type_name(union_type.type_name)}() {{ return ({union_type.type_name}){safe_field_name}; }}\n"
1525
+ property_def += f"{INDENT}public void set{pascal(field_name)}As{flatten_type_name(union_type.type_name)}({union_type.type_name} {safe_field_name}) {{ this.{safe_field_name} = {safe_field_name}; }}\n"
1526
+ return property_def
1527
+
1528
+ def write_to_file(self, package: str, name: str, definition: str):
1529
+ """ Writes a Java class or enum to a file """
1530
+ package = package.lower()
1531
+ package = self.safe_package(package)
1532
+ directory_path = os.path.join(
1533
+ self.output_dir, package.replace('.', os.sep).replace('/', os.sep))
1534
+ if not os.path.exists(directory_path):
1535
+ os.makedirs(directory_path, exist_ok=True)
1536
+ file_path = os.path.join(directory_path, f"{name}.java")
1537
+
1538
+ with open(file_path, 'w', encoding='utf-8') as file:
1539
+ if package:
1540
+ file.write(f"package {package.replace('/', '.')};\n\n")
1541
+
1542
+ # Check if this class extends a discriminated union base class
1543
+ # Pattern: "public class ClassName extends UnionName"
1544
+ if " extends " in definition and self.jackson_annotations:
1545
+ import re
1546
+ match = re.search(r'public class \w+ extends (\w+)', definition)
1547
+ if match:
1548
+ base_class_name = match.group(1)
1549
+ # Check if this base class is a discriminated union we generated
1550
+ for union_name, union_subtypes in self.discriminated_unions.items():
1551
+ if union_name == base_class_name:
1552
+ # Get the package where the union base class is generated
1553
+ # (it's in the same package as the first subtype)
1554
+ union_package = union_subtypes[0]['package'] if union_subtypes else self.base_package.replace('/', '.')
1555
+ # Only import if the union is in a different package
1556
+ current_package = package.replace('/', '.')
1557
+ if union_package != current_package:
1558
+ file.write(f"import {union_package}.{union_name};\n")
1559
+ break
1560
+
1561
+ if "List<" in definition or "ArrayList<" in definition:
1562
+ file.write("import java.util.List;\n")
1563
+ if "ArrayList<" in definition or "Arrays.asList" in definition:
1564
+ file.write("import java.util.ArrayList;\n")
1565
+ if "Map<" in definition or "HashMap<" in definition:
1566
+ file.write("import java.util.Map;\n")
1567
+ if "HashMap<" in definition:
1568
+ file.write("import java.util.HashMap;\n")
1569
+ if "Predicate<" in definition:
1570
+ file.write("import java.util.function.Predicate;\n")
1571
+ if "BigDecimal" in definition:
1572
+ file.write("import java.math.BigDecimal;\n")
1573
+ if "LocalDate" in definition:
1574
+ file.write("import java.time.LocalDate;\n")
1575
+ if "LocalTime" in definition:
1576
+ file.write("import java.time.LocalTime;\n")
1577
+ if "Instant" in definition:
1578
+ file.write("import java.time.Instant;\n")
1579
+ if "LocalDateTime" in definition:
1580
+ file.write("import java.time.LocalDateTime;\n")
1581
+ if "UUID" in definition:
1582
+ file.write("import java.util.UUID;\n")
1583
+ if "Duration" in definition:
1584
+ file.write("import java.time.Duration;\n")
1585
+
1586
+ if self.avro_annotation:
1587
+ if 'AvroRuntimeException' in definition:
1588
+ file.write("import org.apache.avro.AvroRuntimeException;\n")
1589
+ if 'Schema' in definition:
1590
+ file.write("import org.apache.avro.Schema;\n")
1591
+ if 'GenericData' in definition:
1592
+ file.write("import org.apache.avro.generic.GenericData;\n")
1593
+ if 'DatumReader' in definition:
1594
+ file.write("import org.apache.avro.io.DatumReader;\n")
1595
+ if 'DatumWriter' in definition:
1596
+ file.write("import org.apache.avro.io.DatumWriter;\n")
1597
+ if 'DecoderFactory' in definition:
1598
+ file.write("import org.apache.avro.io.DecoderFactory;\n")
1599
+ if 'EncoderFactory' in definition:
1600
+ file.write("import org.apache.avro.io.EncoderFactory;\n")
1601
+ if 'SpecificDatumReader' in definition:
1602
+ file.write("import org.apache.avro.specific.SpecificDatumReader;\n")
1603
+ if 'SpecificDatumWriter' in definition:
1604
+ file.write("import org.apache.avro.specific.SpecificDatumWriter;\n")
1605
+ if 'SpecificRecord' in definition:
1606
+ file.write("import org.apache.avro.specific.SpecificRecord;\n")
1607
+ if 'Encoder' in definition:
1608
+ file.write("import org.apache.avro.io.Encoder;\n")
1609
+ if self.jackson_annotations:
1610
+ if 'JsonNode' in definition:
1611
+ file.write("import com.fasterxml.jackson.databind.JsonNode;\n")
1612
+ if 'ObjectMapper' in definition:
1613
+ file.write("import com.fasterxml.jackson.databind.ObjectMapper;\n")
1614
+ if 'JsonSerialize' in definition:
1615
+ file.write("import com.fasterxml.jackson.databind.annotation.JsonSerialize;\n")
1616
+ if 'JsonDeserialize' in definition:
1617
+ file.write("import com.fasterxml.jackson.databind.annotation.JsonDeserialize;\n")
1618
+ if 'JsonSerializer' in definition:
1619
+ file.write("import com.fasterxml.jackson.databind.JsonSerializer;\n")
1620
+ if 'SerializerProvider' in definition:
1621
+ file.write("import com.fasterxml.jackson.databind.SerializerProvider;\n")
1622
+ if 'JsonDeserializer' in definition:
1623
+ file.write("import com.fasterxml.jackson.databind.JsonDeserializer;\n")
1624
+ if 'DeserializationContext' in definition:
1625
+ file.write("import com.fasterxml.jackson.databind.DeserializationContext;\n")
1626
+ if 'JsonParser' in definition:
1627
+ file.write("import com.fasterxml.jackson.core.JsonParser;\n")
1628
+ if 'JsonIgnore' in definition:
1629
+ file.write("import com.fasterxml.jackson.annotation.JsonIgnore;\n")
1630
+ if 'JsonIgnoreProperties' in definition:
1631
+ file.write("import com.fasterxml.jackson.annotation.JsonIgnoreProperties;\n")
1632
+ if 'JsonProperty' in definition:
1633
+ file.write("import com.fasterxml.jackson.annotation.JsonProperty;\n")
1634
+ if 'JsonProcessingException' in definition:
1635
+ file.write("import com.fasterxml.jackson.core.JsonProcessingException;\n")
1636
+ if 'JsonGenerator' in definition:
1637
+ file.write("import com.fasterxml.jackson.core.JsonGenerator;\n")
1638
+ if 'TypeReference' in definition:
1639
+ file.write("import com.fasterxml.jackson.core.type.TypeReference;\n")
1640
+ if self.avro_annotation or self.jackson_annotations:
1641
+ if 'GZIPOutputStream' in definition:
1642
+ file.write("import java.util.zip.GZIPOutputStream;\n")
1643
+ if 'GZIPInputStream' in definition:
1644
+ file.write("import java.util.zip.GZIPInputStream;\n")
1645
+ if 'ByteArrayInputStream' in definition:
1646
+ file.write("import java.io.ByteArrayInputStream;\n")
1647
+ if "ByteArrayOutputStream" in definition:
1648
+ file.write("import java.io.ByteArrayOutputStream;\n")
1649
+ if "InputStream" in definition:
1650
+ file.write("import java.io.InputStream;\n")
1651
+ if "IOException" in definition:
1652
+ file.write("import java.io.IOException;\n")
1653
+ if "InflaterInputStream" in definition:
1654
+ file.write("import java.util.zip.InflaterInputStream;\n")
1655
+ file.write("\n")
1656
+ file.write(definition)
1657
+
1658
+ def generate_tests(self, base_output_dir: str) -> None:
1659
+ """ Generates unit tests for all the generated Java classes and enums """
1660
+ from avrotize.common import process_template
1661
+
1662
+ test_directory_path = os.path.join(base_output_dir, "src/test/java")
1663
+ if not os.path.exists(test_directory_path):
1664
+ os.makedirs(test_directory_path, exist_ok=True)
1665
+
1666
+ for class_name, type_kind in self.generated_types_java_package.items():
1667
+ if type_kind in ["class", "enum"]:
1668
+ self.generate_test_class(class_name, type_kind, test_directory_path)
1669
+
1670
+ def generate_test_class(self, class_name: str, type_kind: str, test_directory_path: str) -> None:
1671
+ """ Generates a unit test class for a given Java class or enum """
1672
+ from avrotize.common import process_template
1673
+
1674
+ avro_schema = self.generated_avro_schemas.get(class_name, {})
1675
+ simple_class_name = class_name.split('.')[-1]
1676
+ package = ".".join(class_name.split('.')[:-1])
1677
+ test_class_name = f"{simple_class_name}Test"
1678
+
1679
+ if type_kind == "class":
1680
+ fields = self.get_class_test_fields(avro_schema, simple_class_name, package)
1681
+ imports = self.get_test_imports(fields)
1682
+ test_class_definition = process_template(
1683
+ "avrotojava/class_test.java.jinja",
1684
+ package=package,
1685
+ test_class_name=test_class_name,
1686
+ class_name=simple_class_name,
1687
+ fields=fields,
1688
+ imports=imports,
1689
+ avro_annotation=self.avro_annotation,
1690
+ jackson_annotation=self.jackson_annotations
1691
+ )
1692
+ elif type_kind == "enum":
1693
+ # Convert symbols to Java-safe identifiers in SCREAMING_CASE (same logic as generate_enum)
1694
+ raw_symbols = avro_schema.get('symbols', [])
1695
+ java_safe_symbols = []
1696
+ for symbol in raw_symbols:
1697
+ java_symbol = symbol.replace('-', '_').replace('.', '_').upper()
1698
+ if java_symbol and java_symbol[0].isdigit():
1699
+ java_symbol = '_' + java_symbol
1700
+ if is_java_reserved_word(java_symbol.lower()):
1701
+ java_symbol = '_' + java_symbol
1702
+ java_safe_symbols.append(java_symbol)
1703
+
1704
+ test_class_definition = process_template(
1705
+ "avrotojava/enum_test.java.jinja",
1706
+ package=package,
1707
+ test_class_name=test_class_name,
1708
+ enum_name=simple_class_name,
1709
+ symbols=java_safe_symbols # Pass converted symbols instead of raw
1710
+ )
1711
+
1712
+ # Write test file
1713
+ package_path = package.replace('.', os.sep)
1714
+ test_file_dir = os.path.join(test_directory_path, package_path)
1715
+ if not os.path.exists(test_file_dir):
1716
+ os.makedirs(test_file_dir, exist_ok=True)
1717
+ test_file_path = os.path.join(test_file_dir, f"{test_class_name}.java")
1718
+ with open(test_file_path, 'w', encoding='utf-8') as test_file:
1719
+ test_file.write(test_class_definition)
1720
+
1721
+ def get_test_imports(self, fields: List) -> List[str]:
1722
+ """ Gets the necessary imports for the test class """
1723
+ imports = []
1724
+ for field in fields:
1725
+ # Extract inner types from generic collections
1726
+ inner_types = []
1727
+ if field.field_type.startswith("List<"):
1728
+ if "import java.util.List;" not in imports:
1729
+ imports.append("import java.util.List;")
1730
+ if "import java.util.ArrayList;" not in imports:
1731
+ imports.append("import java.util.ArrayList;")
1732
+ # Extract the inner type: List<Type> -> Type
1733
+ inner_type = field.field_type[5:-1]
1734
+ # Check if inner type is also a Map
1735
+ if inner_type.startswith("Map<"):
1736
+ if "import java.util.Map;" not in imports:
1737
+ imports.append("import java.util.Map;")
1738
+ if "import java.util.HashMap;" not in imports:
1739
+ imports.append("import java.util.HashMap;")
1740
+ # Extract Map value type
1741
+ start = inner_type.index('<') + 1
1742
+ end = inner_type.rindex('>')
1743
+ map_types = inner_type[start:end].split(',')
1744
+ if len(map_types) > 1:
1745
+ inner_types.append(map_types[1].strip())
1746
+ else:
1747
+ inner_types.append(inner_type)
1748
+ elif field.field_type.startswith("Map<"):
1749
+ if "import java.util.Map;" not in imports:
1750
+ imports.append("import java.util.Map;")
1751
+ if "import java.util.HashMap;" not in imports:
1752
+ imports.append("import java.util.HashMap;")
1753
+ # Extract value type from Map<K,V>
1754
+ start = field.field_type.index('<') + 1
1755
+ end = field.field_type.rindex('>')
1756
+ map_types = field.field_type[start:end].split(',')
1757
+ if len(map_types) > 1:
1758
+ inner_types.append(map_types[1].strip())
1759
+
1760
+ # Add the direct field type for non-generic types
1761
+ if not field.field_type.startswith(("List<", "Map<")):
1762
+ inner_types.append(field.field_type)
1763
+
1764
+ # If field is Object with union_types (Avro-style union), add all union member types for imports
1765
+ if hasattr(field, 'java_type_obj') and field.java_type_obj and field.java_type_obj.union_types:
1766
+ for union_member_type in field.java_type_obj.union_types:
1767
+ inner_types.append(union_member_type.type_name)
1768
+
1769
+ # Process each type (including inner types from generics)
1770
+ for type_to_check in inner_types:
1771
+ # Add imports for enum and class types
1772
+ if type_to_check in self.generated_types_java_package:
1773
+ type_kind = self.generated_types_java_package[type_to_check]
1774
+ # Only import if it's a fully qualified name with a package
1775
+ if '.' in type_to_check:
1776
+ import_stmt = f"import {type_to_check};"
1777
+ if import_stmt not in imports:
1778
+ imports.append(import_stmt)
1779
+ # No longer import test classes - we instantiate classes directly
1780
+ # Process unions regardless of whether they're fully qualified
1781
+ # (they might be simple names that need member imports)
1782
+ if type_kind == "union":
1783
+ avro_schema = self.generated_avro_schemas.get(type_to_check, {})
1784
+ if avro_schema and 'types' in avro_schema:
1785
+ for union_type in avro_schema['types']:
1786
+ java_qualified_name = None
1787
+ if isinstance(union_type, dict) and 'name' in union_type:
1788
+ # It's a complex type reference (inline definition)
1789
+ type_name = union_type['name']
1790
+ if 'namespace' in union_type:
1791
+ avro_namespace = union_type['namespace']
1792
+ # Build full Java qualified name with base package
1793
+ java_qualified_name = self.join_packages(self.base_package, avro_namespace).replace('/', '.').lower() + '.' + type_name
1794
+ else:
1795
+ java_qualified_name = type_name
1796
+ elif isinstance(union_type, str) and union_type not in ['null', 'string', 'int', 'long', 'float', 'double', 'boolean', 'bytes']:
1797
+ # It's a string reference to a named type (could be class or enum)
1798
+ # The string is the Avro qualified name, need to convert to Java
1799
+ avro_name_parts = union_type.split('.')
1800
+ if len(avro_name_parts) > 1:
1801
+ # Has namespace
1802
+ type_name = avro_name_parts[-1]
1803
+ avro_namespace = '.'.join(avro_name_parts[:-1])
1804
+ java_qualified_name = self.join_packages(self.base_package, avro_namespace).replace('/', '.').lower() + '.' + type_name
1805
+ else:
1806
+ # No namespace, just a simple name
1807
+ java_qualified_name = union_type
1808
+
1809
+ if java_qualified_name:
1810
+ if java_qualified_name in self.generated_types_java_package or java_qualified_name.split('.')[-1] in self.generated_types_java_package:
1811
+ member_type_kind = self.generated_types_java_package.get(java_qualified_name, self.generated_types_java_package.get(java_qualified_name.split('.')[-1], None))
1812
+ # Import the class/enum
1813
+ class_import = f"import {java_qualified_name};"
1814
+ if class_import not in imports:
1815
+ imports.append(class_import)
1816
+ # No longer import test classes - we instantiate classes directly
1817
+ return imports
1818
+
1819
+ def get_class_test_fields(self, avro_schema: Dict, class_name: str, package: str) -> List:
1820
+ """ Retrieves fields for a given class name """
1821
+
1822
+ class Field:
1823
+ def __init__(self, fn: str, ft: str, tv: str, ct: bool, ie: bool = False, java_type_obj: 'AvroToJava.JavaType' = None, is_discrim: bool = False):
1824
+ self.field_name = fn
1825
+ self.field_type = ft
1826
+ # Extract base type for generic types (e.g., List<Object> -> List)
1827
+ if '<' in ft:
1828
+ self.base_type = ft.split('<')[0]
1829
+ else:
1830
+ self.base_type = ft
1831
+ self.test_value = tv
1832
+ self.is_const = ct
1833
+ self.is_enum = ie
1834
+ self.is_discriminator = is_discrim
1835
+ self.java_type_obj = java_type_obj # Store the full JavaType object for union access
1836
+
1837
+ fields: List[Field] = []
1838
+ if avro_schema and 'fields' in avro_schema:
1839
+ for field in avro_schema['fields']:
1840
+ field_name = pascal(field['name']) if self.pascal_properties else field['name']
1841
+ field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], avro_schema.get('namespace', ''))
1842
+ # Check if the field type is an enum
1843
+ is_enum = field_type.type_name in self.generated_types_java_package and \
1844
+ self.generated_types_java_package[field_type.type_name] == "enum"
1845
+ is_discriminator = field.get('discriminator', False)
1846
+
1847
+ # Generate test value for the field
1848
+ if "const" in field and field["const"] is not None:
1849
+ const_value = field["const"]
1850
+ # For enum types, qualify with the enum type name and convert to SCREAMING_CASE
1851
+ if is_enum or (field_type.type_name not in ('String', 'int', 'Integer', 'long', 'Long', 'double', 'Double', 'boolean', 'Boolean')):
1852
+ # Convert enum const value to uppercase to match Java enum constant naming convention
1853
+ const_value_upper = str(const_value).replace('-', '_').replace('.', '_').upper()
1854
+ if const_value_upper and const_value_upper[0].isdigit():
1855
+ const_value_upper = '_' + const_value_upper
1856
+ if is_java_reserved_word(const_value_upper.lower()):
1857
+ const_value_upper = '_' + const_value_upper
1858
+ test_value = f'{field_type.type_name}.{const_value_upper}'
1859
+ else:
1860
+ test_value = f'"{const_value}"'
1861
+ else:
1862
+ test_value = self.get_test_value_from_field(field['type'], field_type, package)
1863
+
1864
+ f = Field(
1865
+ field_name,
1866
+ field_type.type_name,
1867
+ test_value,
1868
+ "const" in field and field["const"] is not None,
1869
+ is_enum,
1870
+ field_type, # Pass the full JavaType object
1871
+ is_discriminator
1872
+ )
1873
+ fields.append(f)
1874
+ return fields
1875
+
1876
+ def get_test_value_from_field(self, avro_field_type: Union[str, Dict, List], java_type: JavaType, package: str) -> str:
1877
+ """Returns a default test value based on the Avro field type and Java type"""
1878
+ # If it's an Object with union_types (Avro-style union), pick a member type
1879
+ if java_type.type_name == "Object" and java_type.union_types is not None and len(java_type.union_types) > 0:
1880
+ # Pick the first union type and generate a test value for it
1881
+ first_union_type = java_type.union_types[0]
1882
+ return self.get_test_value(first_union_type.type_name, package)
1883
+ # For List<Object> where Object is a union, we need to handle it specially
1884
+ elif java_type.type_name.startswith("List<Object>"):
1885
+ # avro_field_type could be: ["null", {"type": "array", "items": [union types]}]
1886
+ # or just: {"type": "array", "items": [union types]}
1887
+ array_schema = avro_field_type
1888
+ if isinstance(avro_field_type, list):
1889
+ # It's a union - find the array type
1890
+ for t in avro_field_type:
1891
+ if isinstance(t, dict) and t.get('type') == 'array':
1892
+ array_schema = t
1893
+ break
1894
+
1895
+ if isinstance(array_schema, dict) and array_schema.get('type') == 'array':
1896
+ items_type = array_schema.get('items')
1897
+ if isinstance(items_type, list): # Union array
1898
+ # Pick the first non-null type
1899
+ non_null_types = [t for t in items_type if t != 'null']
1900
+ if non_null_types:
1901
+ inner_java_type = self.convert_avro_type_to_java('_test', '_field', non_null_types[0], package)
1902
+ inner_value = self.get_test_value(inner_java_type.type_name, package)
1903
+ return f'new ArrayList<>(java.util.Arrays.asList({inner_value}))'
1904
+ # Default: use type name
1905
+ return self.get_test_value(java_type.type_name, package)
1906
+
1907
+ def get_test_value(self, java_type: str, package: str) -> str:
1908
+ """Returns a default test value based on the Java type"""
1909
+ test_values = {
1910
+ 'String': '"test_string"',
1911
+ 'boolean': 'true',
1912
+ 'Boolean': 'Boolean.TRUE',
1913
+ 'int': '42',
1914
+ 'Integer': 'Integer.valueOf(42)',
1915
+ 'long': '42L',
1916
+ 'Long': 'Long.valueOf(42L)',
1917
+ 'float': '3.14f',
1918
+ 'Float': 'Float.valueOf(3.14f)',
1919
+ 'double': '3.14',
1920
+ 'Double': 'Double.valueOf(3.14)',
1921
+ 'byte[]': 'new byte[] { 0x01, 0x02, 0x03 }',
1922
+ 'Object': 'null', # Use null for Object types (Avro unions) to avoid reference equality issues
1923
+ }
1924
+
1925
+ # Handle generic types
1926
+ if java_type.startswith("List<"):
1927
+ inner_type = java_type[5:-1]
1928
+ inner_value = self.get_test_value(inner_type, package)
1929
+ # Arrays.asList(null) throws NPE, so create empty list for null values
1930
+ if inner_value == 'null':
1931
+ return 'new ArrayList<>()'
1932
+ return f'new ArrayList<>(java.util.Arrays.asList({inner_value}))'
1933
+ elif java_type.startswith("Map<"):
1934
+ return 'new HashMap<>()'
1935
+
1936
+ # Check if it's a generated type (enum, class, or union)
1937
+ if java_type in self.generated_types_java_package:
1938
+ type_kind = self.generated_types_java_package[java_type]
1939
+ if type_kind == "enum":
1940
+ # Get the first symbol for the enum
1941
+ avro_schema = self.generated_avro_schemas.get(java_type, {})
1942
+ symbols = avro_schema.get('symbols', [])
1943
+ if symbols:
1944
+ # Convert symbol to valid Java identifier in SCREAMING_CASE (same logic as in generate_enum)
1945
+ first_symbol = symbols[0].replace('-', '_').replace('.', '_').upper()
1946
+ if first_symbol and first_symbol[0].isdigit():
1947
+ first_symbol = '_' + first_symbol
1948
+ # Check if the symbol is a Java reserved word and prefix with underscore
1949
+ if is_java_reserved_word(first_symbol.lower()):
1950
+ first_symbol = '_' + first_symbol
1951
+ # Use fully qualified name to avoid conflicts with field names
1952
+ return f'{java_type}.{first_symbol}'
1953
+ return f'{java_type}.values()[0]'
1954
+ elif type_kind == "class":
1955
+ # Create a new instance using the createTestInstance() method
1956
+ # Use fully qualified name to avoid conflicts with field names
1957
+ return f'{java_type}.createTestInstance()'
1958
+ elif type_kind == "union":
1959
+ # For union types, we need to create an instance with one of the union types set
1960
+ # Get the union's schema to find available types
1961
+ avro_schema = self.generated_avro_schemas.get(java_type, {})
1962
+ if avro_schema and 'types' in avro_schema:
1963
+ # Use the first non-null type from the union
1964
+ for union_type in avro_schema['types']:
1965
+ if union_type != 'null' and isinstance(union_type, dict):
1966
+ # It's a complex type - check if enum or class
1967
+ if 'name' in union_type:
1968
+ type_name = union_type['name']
1969
+ if 'namespace' in union_type:
1970
+ avro_namespace = union_type['namespace']
1971
+ # Build full Java qualified name with base package
1972
+ java_qualified_name = self.join_packages(self.base_package, avro_namespace).replace('/', '.').lower() + '.' + type_name
1973
+ else:
1974
+ java_qualified_name = type_name
1975
+ simple_union_name = java_type.split('.')[-1]
1976
+
1977
+ # Check if this union member is an enum or class
1978
+ member_type_kind = self.generated_types_java_package.get(java_qualified_name)
1979
+ if member_type_kind == "enum":
1980
+ # For enums, use the first enum value
1981
+ member_value = self.get_test_value(java_qualified_name, package)
1982
+ return f'new {simple_union_name}({member_value})'
1983
+ else:
1984
+ # For classes, create a new instance using createTestInstance()
1985
+ # Use fully qualified name to avoid conflicts with field names
1986
+ return f'new {simple_union_name}({java_qualified_name}.createTestInstance())'
1987
+ elif union_type != 'null' and isinstance(union_type, str):
1988
+ # It's a simple type - convert from Avro type to Java type
1989
+ simple_union_name = java_type.split('.')[-1]
1990
+ # Convert Avro primitive type to Java type
1991
+ java_primitive_type = self.convert_avro_type_to_java('_test', '_field', union_type, package)
1992
+ simple_value = self.get_test_value(java_primitive_type.type_name, package)
1993
+ return f'new {simple_union_name}({simple_value})'
1994
+ # Fallback: create an empty union instance
1995
+ simple_name = java_type.split('.')[-1]
1996
+ return f'new {simple_name}()'
1997
+
1998
+ return test_values.get(java_type, f'new {java_type}()')
1999
+
2000
+ def generate_discriminated_union_base_classes(self):
2001
+ """Generate abstract base classes for discriminated unions with Jackson annotations"""
2002
+ if not self.jackson_annotations or not self.discriminated_unions:
2003
+ return
2004
+
2005
+ for union_name, subtypes in self.discriminated_unions.items():
2006
+ if not subtypes:
2007
+ continue
2008
+
2009
+ # Get the first subtype to determine package and discriminator field
2010
+ first_subtype = subtypes[0]
2011
+ package = first_subtype['package']
2012
+
2013
+ # Find the discriminator field (should have 'discriminator': true)
2014
+ discriminator_field = None
2015
+ discriminator_values = {}
2016
+
2017
+ for subtype_info in subtypes:
2018
+ schema = subtype_info['schema']
2019
+ for field in schema.get('fields', []):
2020
+ if field.get('discriminator'):
2021
+ discriminator_field = field['name']
2022
+ if 'const' in field:
2023
+ discriminator_values[subtype_info['class_name']] = field['const']
2024
+ break
2025
+
2026
+ if not discriminator_field:
2027
+ print(f"WARN: Could not find discriminator field for union {union_name}")
2028
+ continue
2029
+
2030
+ # Generate the abstract base class
2031
+ class_definition = f"/**\n * Abstract base class for {union_name} discriminated union\n */\n"
2032
+
2033
+ # Add Jackson @JsonTypeInfo annotation
2034
+ class_definition += f'@JsonTypeInfo(\n'
2035
+ class_definition += f'{INDENT}use = JsonTypeInfo.Id.NAME,\n'
2036
+ class_definition += f'{INDENT}include = JsonTypeInfo.As.EXISTING_PROPERTY,\n'
2037
+ class_definition += f'{INDENT}property = "{discriminator_field}",\n'
2038
+ class_definition += f'{INDENT}visible = true\n'
2039
+ class_definition += f')\n'
2040
+
2041
+ # Add Jackson @JsonSubTypes annotation
2042
+ class_definition += f'@JsonSubTypes({{\n'
2043
+ for i, subtype_info in enumerate(subtypes):
2044
+ class_name = subtype_info['class_name']
2045
+ disc_value = discriminator_values.get(class_name, class_name)
2046
+ comma = ',' if i < len(subtypes) - 1 else ''
2047
+ class_definition += f'{INDENT}@JsonSubTypes.Type(value = {class_name}.class, name = "{disc_value}"){comma}\n'
2048
+ class_definition += f'}})\n'
2049
+
2050
+ # Abstract class declaration
2051
+ class_definition += f'public abstract class {union_name} {{\n'
2052
+
2053
+ # Add the discriminator field getter (abstract)
2054
+ class_definition += f'{INDENT}/**\n{INDENT} * Gets the discriminator value\n{INDENT} * @return the type discriminator\n{INDENT} */\n'
2055
+ class_definition += f'{INDENT}public abstract String get{pascal(discriminator_field)}();\n'
2056
+
2057
+ class_definition += '}\n'
2058
+
2059
+ # Write the file
2060
+ dir_path = os.path.join(self.output_dir, package.replace('.', os.sep))
2061
+ os.makedirs(dir_path, exist_ok=True)
2062
+ file_path = os.path.join(dir_path, f"{union_name}.java")
2063
+
2064
+ # Build the full file content with imports
2065
+ imports = [
2066
+ 'import com.fasterxml.jackson.annotation.JsonSubTypes;',
2067
+ 'import com.fasterxml.jackson.annotation.JsonTypeInfo;'
2068
+ ]
2069
+
2070
+ full_content = f"package {package};\n\n"
2071
+ full_content += '\n'.join(imports) + '\n\n'
2072
+ full_content += class_definition
2073
+
2074
+ with open(file_path, 'w', encoding='utf-8') as file:
2075
+ file.write(full_content)
2076
+
2077
+ print(f"Generated discriminated union base class: {union_name}")
2078
+
2079
+ def convert_schema(self, schema: JsonNode, output_dir: str):
2080
+ """Converts Avro schema to Java"""
2081
+ if not isinstance(schema, list):
2082
+ schema = [schema]
2083
+
2084
+ # Build type dictionary for inline schema resolution (like C# does)
2085
+ self.type_dict = build_flat_type_dict(schema)
2086
+
2087
+ if not os.path.exists(output_dir):
2088
+ os.makedirs(output_dir, exist_ok=True)
2089
+ base_output_dir = output_dir # Store the base directory before changing it
2090
+ pom_path = os.path.join(output_dir, "pom.xml")
2091
+ if not os.path.exists(pom_path):
2092
+ package_elements = self.base_package.split('.') if self.base_package else ["com", "example"]
2093
+ groupid = '.'.join(package_elements[:-1]) if len(package_elements) > 1 else package_elements[0]
2094
+ artifactid = package_elements[-1]
2095
+ with open(pom_path, 'w', encoding='utf-8') as file:
2096
+ file.write(POM_CONTENT.format(
2097
+ groupid=groupid,
2098
+ artifactid=artifactid,
2099
+ AVRO_VERSION=AVRO_VERSION,
2100
+ JACKSON_VERSION=JACKSON_VERSION,
2101
+ JDK_VERSION=JDK_VERSION,
2102
+ JUNIT_VERSION=JUNIT_VERSION,
2103
+ MAVEN_COMPILER_VERSION=MAVEN_COMPILER_VERSION,
2104
+ MAVEN_SUREFIRE_VERSION=MAVEN_SUREFIRE_VERSION,
2105
+ PACKAGE=self.base_package))
2106
+ output_dir = os.path.join(
2107
+ output_dir, "src/main/java".replace('/', os.sep))
2108
+ if not os.path.exists(output_dir):
2109
+ os.makedirs(output_dir, exist_ok=True)
2110
+ self.output_dir = output_dir
2111
+ for avro_schema in (x for x in schema if isinstance(x, dict)):
2112
+ self.generate_class_or_enum(avro_schema, '')
2113
+ self.generate_discriminated_union_base_classes()
2114
+ self.generate_tests(base_output_dir)
2115
+
2116
+ def convert(self, avro_schema_path: str, output_dir: str):
2117
+ """Converts Avro schema to Java"""
2118
+ with open(avro_schema_path, 'r', encoding='utf-8') as file:
2119
+ schema = json.load(file)
2120
+ self.convert_schema(schema, output_dir)
2121
+
2122
+
2123
+ def convert_avro_to_java(avro_schema_path, java_file_path, package_name='', pascal_properties=False, jackson_annotation=False, avro_annotation=False):
2124
+ """_summary_
2125
+
2126
+ Converts Avro schema to C# classes
2127
+
2128
+ Args:
2129
+ avro_schema_path (_type_): Avro input schema path
2130
+ cs_file_path (_type_): Output C# file path
2131
+ """
2132
+ if not package_name:
2133
+ package_name = os.path.splitext(os.path.basename(java_file_path))[0].replace('-', '_').lower()
2134
+ avrotojava = AvroToJava()
2135
+ avrotojava.base_package = package_name
2136
+ avrotojava.pascal_properties = pascal_properties
2137
+ avrotojava.avro_annotation = avro_annotation
2138
+ avrotojava.jackson_annotations = jackson_annotation
2139
+ avrotojava.convert(avro_schema_path, java_file_path)
2140
+
2141
+
2142
+ def convert_avro_schema_to_java(avro_schema: JsonNode, output_dir: str, package_name='', pascal_properties=False, jackson_annotation=False, avro_annotation=False):
2143
+ """_summary_
2144
+
2145
+ Converts Avro schema to C# classes
2146
+
2147
+ Args:
2148
+ avro_schema (_type_): Avro schema as a dictionary or list of dictionaries
2149
+ output_dir (_type_): Output directory path
2150
+ """
2151
+ avrotojava = AvroToJava()
2152
+ avrotojava.base_package = package_name
2153
+ avrotojava.pascal_properties = pascal_properties
2154
+ avrotojava.avro_annotation = avro_annotation
2155
+ avrotojava.jackson_annotations = jackson_annotation
2156
+ avrotojava.convert_schema(avro_schema, output_dir)