structurize 2.16.5__py3-none-any.whl → 2.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
avrotize/avrotojava.py CHANGED
@@ -6,7 +6,7 @@ import os
6
6
  from typing import Dict, List, Tuple, Union
7
7
  from avrotize.constants import AVRO_VERSION, JACKSON_VERSION, JDK_VERSION
8
8
 
9
- from avrotize.common import pascal, camel, is_generic_avro_type
9
+ from avrotize.common import pascal, camel, is_generic_avro_type, inline_avro_references, build_flat_type_dict
10
10
 
11
11
  INDENT = ' '
12
12
  POM_CONTENT = """<?xml version="1.0" encoding="UTF-8"?>
@@ -20,6 +20,7 @@ POM_CONTENT = """<?xml version="1.0" encoding="UTF-8"?>
20
20
  <properties>
21
21
  <maven.compiler.source>{JDK_VERSION}</maven.compiler.source>
22
22
  <maven.compiler.target>{JDK_VERSION}</maven.compiler.target>
23
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
23
24
  </properties>
24
25
  <dependencies>
25
26
  <dependency>
@@ -28,12 +29,56 @@ POM_CONTENT = """<?xml version="1.0" encoding="UTF-8"?>
28
29
  <version>{AVRO_VERSION}</version>
29
30
  </dependency>
30
31
  <dependency>
31
- <groupId>com.fasterxml.jackson</groupId>
32
- <artifactId>jackson-bom</artifactId>
32
+ <groupId>com.fasterxml.jackson.core</groupId>
33
+ <artifactId>jackson-core</artifactId>
33
34
  <version>{JACKSON_VERSION}</version>
34
- <type>pom</type>
35
+ </dependency>
36
+ <dependency>
37
+ <groupId>com.fasterxml.jackson.core</groupId>
38
+ <artifactId>jackson-databind</artifactId>
39
+ <version>{JACKSON_VERSION}</version>
40
+ </dependency>
41
+ <dependency>
42
+ <groupId>com.fasterxml.jackson.core</groupId>
43
+ <artifactId>jackson-annotations</artifactId>
44
+ <version>{JACKSON_VERSION}</version>
45
+ </dependency>
46
+ <dependency>
47
+ <groupId>org.junit.jupiter</groupId>
48
+ <artifactId>junit-jupiter-api</artifactId>
49
+ <version>5.10.0</version>
50
+ <scope>test</scope>
51
+ </dependency>
52
+ <dependency>
53
+ <groupId>org.junit.jupiter</groupId>
54
+ <artifactId>junit-jupiter-engine</artifactId>
55
+ <version>5.10.0</version>
56
+ <scope>test</scope>
35
57
  </dependency>
36
58
  </dependencies>
59
+ <build>
60
+ <plugins>
61
+ <plugin>
62
+ <groupId>org.apache.maven.plugins</groupId>
63
+ <artifactId>maven-compiler-plugin</artifactId>
64
+ <version>3.11.0</version>
65
+ <configuration>
66
+ <compilerArgs>
67
+ <arg>-Xmaxerrs</arg>
68
+ <arg>1000</arg>
69
+ </compilerArgs>
70
+ </configuration>
71
+ </plugin>
72
+ <plugin>
73
+ <groupId>org.apache.maven.plugins</groupId>
74
+ <artifactId>maven-surefire-plugin</artifactId>
75
+ <version>3.0.0-M9</version>
76
+ <configuration>
77
+ <useSystemClassLoader>false</useSystemClassLoader>
78
+ </configuration>
79
+ </plugin>
80
+ </plugins>
81
+ </build>
37
82
  </project>
38
83
  """
39
84
 
@@ -41,12 +86,16 @@ PREAMBLE_TOBYTEARRAY = \
41
86
  """
42
87
  byte[] result = null;
43
88
  String mediaType = contentType.split(";")[0].trim().toLowerCase();
89
+ boolean shouldCompress = mediaType.endsWith("+gzip");
90
+ if (shouldCompress) {
91
+ mediaType = mediaType.substring(0, mediaType.length() - 5);
92
+ }
44
93
  """
45
94
 
46
95
 
47
96
  EPILOGUE_TOBYTEARRAY_COMPRESSION = \
48
97
  """
49
- if (result != null && mediaType.endsWith("+gzip")) {
98
+ if (result != null && shouldCompress) {
50
99
  try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
51
100
  GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream)) {
52
101
  gzipOutputStream.write(result);
@@ -66,6 +115,7 @@ throw new UnsupportedOperationException("Unsupported media type + mediaType");
66
115
  PREAMBLE_FROMDATA_COMPRESSION = \
67
116
  """
68
117
  if (mediaType.endsWith("+gzip")) {
118
+ mediaType = mediaType.substring(0, mediaType.length() - 5);
69
119
  InputStream stream = null;
70
120
 
71
121
  if (data instanceof InputStream) {
@@ -95,7 +145,7 @@ JSON_FROMDATA_THROWS = \
95
145
  ",JsonProcessingException, IOException"
96
146
  JSON_FROMDATA = \
97
147
  """
98
- if ( mediaType == "application/json") {
148
+ if ( mediaType.equals("application/json")) {
99
149
  if (data instanceof byte[]) {
100
150
  ByteArrayInputStream stream = new ByteArrayInputStream((byte[]) data);
101
151
  return (new ObjectMapper()).readValue(stream, {typeName}.class);
@@ -115,7 +165,7 @@ if ( mediaType == "application/json") {
115
165
  JSON_TOBYTEARRAY_THROWS = ",JsonProcessingException"
116
166
  JSON_TOBYTEARRAY = \
117
167
  """
118
- if ( mediaType == "application/json") {
168
+ if ( mediaType.equals("application/json")) {
119
169
  result = new ObjectMapper().writeValueAsBytes(this);
120
170
  }
121
171
  """
@@ -123,14 +173,14 @@ if ( mediaType == "application/json") {
123
173
  AVRO_FROMDATA_THROWS = ",IOException"
124
174
  AVRO_FROMDATA = \
125
175
  """
126
- if ( mediaType == "avro/binary" || mediaType == "application/vnd.apache.avro+avro") {
176
+ if ( mediaType.equals("avro/binary") || mediaType.equals("application/vnd.apache.avro+avro")) {
127
177
  if (data instanceof byte[]) {
128
178
  return AVROREADER.read(new {typeName}(), DecoderFactory.get().binaryDecoder((byte[])data, null));
129
179
  } else if (data instanceof InputStream) {
130
180
  return AVROREADER.read(new {typeName}(), DecoderFactory.get().binaryDecoder((InputStream)data, null));
131
181
  }
132
182
  throw new UnsupportedOperationException("Data is not of a supported type for Avro conversion to {typeName}");
133
- } else if ( mediaType == "avro/json" || mediaType == "application/vnd.apache.avro+json") {
183
+ } else if ( mediaType.equals("avro/json") || mediaType.equals("application/vnd.apache.avro+json")) {
134
184
  if (data instanceof byte[]) {
135
185
  return AVROREADER.read(new {typeName}(), DecoderFactory.get().jsonDecoder({typeName}.AVROSCHEMA, new ByteArrayInputStream((byte[])data)));
136
186
  } else if (data instanceof InputStream) {
@@ -146,14 +196,14 @@ if ( mediaType == "avro/binary" || mediaType == "application/vnd.apache.avro+avr
146
196
  AVRO_TOBYTEARRAY_THROWS = ",IOException"
147
197
  AVRO_TOBYTEARRAY = \
148
198
  """
149
- if ( mediaType == "avro/binary" || mediaType == "application/vnd.apache.avro+avro") {
199
+ if ( mediaType.equals("avro/binary") || mediaType.equals("application/vnd.apache.avro+avro")) {
150
200
  ByteArrayOutputStream out = new ByteArrayOutputStream();
151
201
  Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
152
202
  AVROWRITER.write(this, encoder);
153
203
  encoder.flush();
154
204
  result = out.toByteArray();
155
205
  }
156
- else if ( mediaType == "avro/json" || mediaType == "application/vnd.apache.avro+json") {
206
+ else if ( mediaType.equals("avro/json") || mediaType.equals("application/vnd.apache.avro+json")) {
157
207
  ByteArrayOutputStream out = new ByteArrayOutputStream();
158
208
  Encoder encoder = EncoderFactory.get().jsonEncoder({typeName}.AVROSCHEMA, out);
159
209
  AVROWRITER.write(this, encoder);
@@ -198,6 +248,8 @@ class AvroToJava:
198
248
  self.pascal_properties = False
199
249
  self.generated_types_avro_namespace: Dict[str,str] = {}
200
250
  self.generated_types_java_package: Dict[str,str] = {}
251
+ self.generated_avro_schemas: Dict[str, Dict] = {}
252
+ self.discriminated_unions: Dict[str, List[Dict]] = {} # Maps union name to list of subtype schemas
201
253
 
202
254
  def qualified_name(self, package: str, name: str) -> str:
203
255
  """Concatenates package and name using a dot separator"""
@@ -323,10 +375,22 @@ class AvroToJava:
323
375
  if avro_type['logicalType'] == 'decimal':
324
376
  return AvroToJava.JavaType('BigDecimal')
325
377
  elif avro_type['type'] == 'array':
326
- item_type = self.convert_avro_type_to_java(class_name, field_name, avro_type['items'], parent_package, nullable=True).type_name
378
+ item_java_type = self.convert_avro_type_to_java(class_name, field_name, avro_type['items'], parent_package, nullable=True)
379
+ item_type = item_java_type.type_name
380
+ # Check if item is a union type by name pattern or registered type
381
+ is_union_item = (item_type.endswith("Union") or
382
+ (item_type in self.generated_types_java_package and self.generated_types_java_package[item_type] == "union"))
383
+ if is_union_item:
384
+ return AvroToJava.JavaType(f"List<{item_type}>", union_types=[AvroToJava.JavaType(item_type)])
327
385
  return AvroToJava.JavaType(f"List<{item_type}>")
328
386
  elif avro_type['type'] == 'map':
329
- values_type = self.convert_avro_type_to_java(class_name, field_name, avro_type['values'], parent_package, nullable=True).type_name
387
+ value_java_type = self.convert_avro_type_to_java(class_name, field_name, avro_type['values'], parent_package, nullable=True)
388
+ values_type = value_java_type.type_name
389
+ # Check if value is a union type by name pattern or registered type
390
+ is_union_value = (values_type.endswith("Union") or
391
+ (values_type in self.generated_types_java_package and self.generated_types_java_package[values_type] == "union"))
392
+ if is_union_value:
393
+ return AvroToJava.JavaType(f"Map<String,{values_type}>", union_types=[AvroToJava.JavaType(values_type)])
330
394
  return AvroToJava.JavaType(f"Map<String,{values_type}>")
331
395
  elif 'logicalType' in avro_type:
332
396
  if avro_type['logicalType'] == 'date':
@@ -352,6 +416,32 @@ class AvroToJava:
352
416
  return self.generate_enum(avro_schema, parent_package, write_file)
353
417
  return AvroToJava.JavaType('Object')
354
418
 
419
+ def generate_create_test_instance_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
420
+ """ Generates a static createTestInstance method that creates a fully initialized instance """
421
+ method = f"\n{INDENT}/**\n{INDENT} * Creates a test instance with all required fields populated\n{INDENT} * @return a fully initialized test instance\n{INDENT} */\n"
422
+ method += f"{INDENT}public static {class_name} createTestInstance() {{\n"
423
+ method += f"{INDENT*2}{class_name} instance = new {class_name}();\n"
424
+
425
+ for field in fields:
426
+ # Skip const fields
427
+ if "const" in field:
428
+ continue
429
+
430
+ # Match the logic in generate_property: field_name is already Pascal-cased if needed
431
+ field_name = pascal(field['name']) if self.pascal_properties else field['name']
432
+ safe_field_name = self.safe_identifier(field_name, class_name)
433
+ field_type = self.convert_avro_type_to_java(class_name, safe_field_name, field['type'], parent_package)
434
+
435
+ # Get a test value for this field
436
+ test_value = self.get_test_value(field_type.type_name, parent_package.replace('.', '/'))
437
+
438
+ # Setter name matches generate_property: set{pascal(field_name)} where field_name is already potentially Pascal-cased
439
+ method += f"{INDENT*2}instance.set{pascal(field_name)}({test_value});\n"
440
+
441
+ method += f"{INDENT*2}return instance;\n"
442
+ method += f"{INDENT}}}\n"
443
+ return method
444
+
355
445
  def generate_class(self, avro_schema: Dict, parent_package: str, write_file: bool) -> JavaType:
356
446
  """ Generates a Java class from an Avro record schema """
357
447
  class_definition = ''
@@ -370,9 +460,29 @@ class AvroToJava:
370
460
  return AvroToJava.JavaType(qualified_class_name, is_class=True)
371
461
  self.generated_types_avro_namespace[namespace_qualified_name] = "class"
372
462
  self.generated_types_java_package[qualified_class_name] = "class"
463
+ self.generated_avro_schemas[qualified_class_name] = avro_schema
464
+
465
+ # Track discriminated union subtypes
466
+ if 'union' in avro_schema:
467
+ union_name = avro_schema['union']
468
+ if union_name not in self.discriminated_unions:
469
+ self.discriminated_unions[union_name] = []
470
+ self.discriminated_unions[union_name].append({
471
+ 'schema': avro_schema,
472
+ 'class_name': class_name,
473
+ 'package': package.replace('/', '.'),
474
+ 'qualified_name': qualified_class_name
475
+ })
476
+
373
477
  fields_str = [self.generate_property(class_name, field, namespace) for field in avro_schema.get('fields', [])]
374
478
  class_body = "\n".join(fields_str)
375
479
  class_definition += f"public class {class_name}"
480
+
481
+ # Add extends clause if this is a discriminated union subtype
482
+ if 'union' in avro_schema and self.jackson_annotations:
483
+ union_name = avro_schema['union']
484
+ class_definition += f" extends {union_name}"
485
+
376
486
  if self.avro_annotation:
377
487
  class_definition += " implements SpecificRecord"
378
488
  class_definition += " {\n"
@@ -386,13 +496,54 @@ class AvroToJava:
386
496
  class_definition += f"{INDENT*2}}}\n"
387
497
  class_definition += f"{INDENT}}}\n"
388
498
 
499
+ # Generate createTestInstance() method for testing
500
+ class_definition += self.generate_create_test_instance_method(class_name, avro_schema.get('fields', []), namespace)
501
+
389
502
  if self.avro_annotation:
390
- avro_schema_json = json.dumps(avro_schema)
391
- avro_schema_json = avro_schema_json.replace('"', '§')
392
- avro_schema_json = f"\"+\n{INDENT}\"".join(
393
- [avro_schema_json[i:i+80] for i in range(0, len(avro_schema_json), 80)])
394
- avro_schema_json = avro_schema_json.replace('§', '\\"')
395
- class_definition += f"\n\n{INDENT}public static final Schema AVROSCHEMA = new Schema.Parser().parse(\n{INDENT}\"{avro_schema_json}\");"
503
+ # Inline all schema references like C# does - each class has self-contained schema
504
+ local_avro_schema = inline_avro_references(avro_schema.copy(), self.type_dict, '')
505
+ avro_schema_json = json.dumps(local_avro_schema)
506
+
507
+ # Java has a limit of 65535 bytes for string constants
508
+ # If the schema is too large, we need to split it into chunks
509
+ MAX_STRING_CONSTANT_LENGTH = 60000 # Leave some margin for safety
510
+
511
+ if len(avro_schema_json) > MAX_STRING_CONSTANT_LENGTH:
512
+ # Split into multiple private string methods to avoid the 65535 byte limit
513
+ # Each method returns a part of the schema, concatenated at runtime
514
+ chunk_size = MAX_STRING_CONSTANT_LENGTH
515
+ chunks = [avro_schema_json[i:i+chunk_size] for i in range(0, len(avro_schema_json), chunk_size)]
516
+
517
+ # Generate a method for each chunk
518
+ for i, chunk in enumerate(chunks):
519
+ # Use the same escaping technique as the non-chunked version
520
+ escaped_chunk = chunk.replace('"', '§')
521
+ escaped_chunk = f"\"+\n{INDENT*2}\"".join(
522
+ [escaped_chunk[j:j+80] for j in range(0, len(escaped_chunk), 80)])
523
+ escaped_chunk = escaped_chunk.replace('§', '\\"')
524
+ class_definition += f"\n\n{INDENT}private static String getAvroSchemaPart{i}() {{\n"
525
+ class_definition += f"{INDENT*2}return \"{escaped_chunk}\";\n"
526
+ class_definition += f"{INDENT}}}"
527
+
528
+ # Generate the combining method
529
+ class_definition += f"\n\n{INDENT}private static String getAvroSchemaJson() {{\n"
530
+ class_definition += f"{INDENT*2}return "
531
+ class_definition += " + ".join([f"getAvroSchemaPart{i}()" for i in range(len(chunks))])
532
+ class_definition += ";\n"
533
+ class_definition += f"{INDENT}}}\n"
534
+ class_definition += f"\n{INDENT}public static final Schema AVROSCHEMA = new Schema.Parser().parse(getAvroSchemaJson());"
535
+ else:
536
+ avro_schema_json = avro_schema_json.replace('"', '§')
537
+ avro_schema_json = f"\"+\n{INDENT}\"".join(
538
+ [avro_schema_json[i:i+80] for i in range(0, len(avro_schema_json), 80)])
539
+ avro_schema_json = avro_schema_json.replace('§', '\\"')
540
+ class_definition += f"\n\n{INDENT}public static final Schema AVROSCHEMA = new Schema.Parser().parse(\n{INDENT}\"{avro_schema_json}\");"
541
+
542
+ # Store the schema for tracking
543
+ avro_namespace = avro_schema.get('namespace', '')
544
+ schema_full_name = f"{avro_namespace}.{class_name}" if avro_namespace else class_name
545
+ self.generated_types_avro_namespace[schema_full_name] = "class"
546
+
396
547
  class_definition += f"\n{INDENT}public static final DatumWriter<{class_name}> AVROWRITER = new SpecificDatumWriter<{class_name}>(AVROSCHEMA);"
397
548
  class_definition += f"\n{INDENT}public static final DatumReader<{class_name}> AVROREADER = new SpecificDatumReader<{class_name}>(AVROSCHEMA);\n"
398
549
 
@@ -441,6 +592,10 @@ class AvroToJava:
441
592
  if self.jackson_annotations:
442
593
  class_definition += self.create_is_json_match_method(avro_schema, avro_schema.get('namespace', namespace), class_name)
443
594
 
595
+ # Add equals() and hashCode() methods
596
+ class_definition += self.generate_equals_method(class_name, avro_schema.get('fields', []), namespace)
597
+ class_definition += self.generate_hashcode_method(class_name, avro_schema.get('fields', []), namespace)
598
+
444
599
  class_definition += "\n}"
445
600
 
446
601
  if write_file:
@@ -465,7 +620,7 @@ class AvroToJava:
465
620
  if field_name == class_name:
466
621
  field_name += "_"
467
622
  field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_namespace)
468
- predicate, clause = self.get_is_json_match_clause(class_name, field_name, field_type)
623
+ predicate, clause = self.get_is_json_match_clause(class_name, field_name, field_type, field)
469
624
  field_defs += clause
470
625
  if predicate:
471
626
  predicates += predicate + "\n"
@@ -475,12 +630,24 @@ class AvroToJava:
475
630
  class_definition += f";\n{INDENT}}}"
476
631
  return class_definition
477
632
 
478
- def get_is_json_match_clause(self, class_name: str, field_name: str, field_type: JavaType) -> Tuple[str, str]:
633
+ def get_is_json_match_clause(self, class_name: str, field_name: str, field_type: JavaType, field: Dict = None) -> Tuple[str, str]:
479
634
  """ Generates the isJsonMatch clause for a field using Jackson """
480
635
  class_definition = ''
481
636
  predicates = ''
482
637
  field_name_js = field_name
483
- is_optional = self.is_java_optional_type(field_type)
638
+
639
+ # Check if field is nullable (Avro union with null)
640
+ is_nullable = False
641
+ if field and 'type' in field:
642
+ avro_type = field['type']
643
+ if isinstance(avro_type, list) and 'null' in avro_type:
644
+ is_nullable = True
645
+
646
+ is_optional = is_nullable or self.is_java_optional_type(field_type)
647
+
648
+ # Check if this is a const field (e.g., discriminator)
649
+ has_const = field and 'const' in field and field['const'] is not None
650
+ const_value = field['const'] if has_const else None
484
651
 
485
652
  if is_optional:
486
653
  node_check = f"!node.has(\"{field_name_js}\") || node.get(\"{field_name_js}\").isNull() || node.get(\"{field_name_js}\")"
@@ -496,9 +663,9 @@ class AvroToJava:
496
663
  elif field_type.type_name == 'long' or field_type.type_name == 'Long':
497
664
  class_definition += f"({node_check}.canConvertToLong())"
498
665
  elif field_type.type_name == 'float' or field_type.type_name == 'Float':
499
- class_definition += f"({node_check}.isFloat())"
666
+ class_definition += f"({node_check}.isNumber())"
500
667
  elif field_type.type_name == 'double' or field_type.type_name == 'Double':
501
- class_definition += f"({node_check}.isDouble())"
668
+ class_definition += f"({node_check}.isNumber())"
502
669
  elif field_type.type_name == 'BigDecimal':
503
670
  class_definition += f"({node_check}.isBigDecimal())"
504
671
  elif field_type.type_name == 'boolean' or field_type.type_name == 'Boolean':
@@ -551,9 +718,19 @@ class AvroToJava:
551
718
  predicates += pred + ";"
552
719
  class_definition += f"(node.has(\"{field_name_js}\") && val{field_name_js}.test(node.get(\"{field_name_js}\")))"
553
720
  elif field_type.is_class:
554
- class_definition += f"(node.has(\"{field_name_js}\") && {field_type.type_name}.isJsonMatch(node.get(\"{field_name_js}\")))"
721
+ if is_optional:
722
+ class_definition += f"(!node.has(\"{field_name_js}\") || node.get(\"{field_name_js}\").isNull() || {field_type.type_name}.isJsonMatch(node.get(\"{field_name_js}\")))"
723
+ else:
724
+ class_definition += f"(node.has(\"{field_name_js}\") && {field_type.type_name}.isJsonMatch(node.get(\"{field_name_js}\")))"
555
725
  elif field_type.is_enum:
556
- class_definition += f"(node.get(\"{field_name_js}\").isTextual() && Enum.valueOf({field_type.type_name}.class, node.get(\"{field_name_js}\").asText()) != null)"
726
+ # For const enum fields (discriminators), check the exact value
727
+ if has_const:
728
+ # const_value is the string value from the schema, not the enum qualified name
729
+ # Ensure we use the raw string value for comparison
730
+ raw_const = const_value if isinstance(const_value, str) else str(const_value)
731
+ class_definition += f"(node.has(\"{field_name_js}\") && node.get(\"{field_name_js}\").isTextual() && node.get(\"{field_name_js}\").asText().equals(\"{raw_const}\"))"
732
+ else:
733
+ class_definition += f"(node.get(\"{field_name_js}\").isTextual() && Enum.valueOf({field_type.type_name}.class, node.get(\"{field_name_js}\").asText()) != null)"
557
734
  else:
558
735
  is_union = False
559
736
  field_union = pascal(field_name) + 'Union'
@@ -640,6 +817,177 @@ class AvroToJava:
640
817
 
641
818
  return class_definition
642
819
 
820
+ def generate_equals_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
821
+ """ Generates the equals method for a class """
822
+ equals_method = f"\n\n{INDENT}@Override\n{INDENT}public boolean equals(Object obj) {{\n"
823
+ equals_method += f"{INDENT * 2}if (this == obj) return true;\n"
824
+ equals_method += f"{INDENT * 2}if (obj == null || getClass() != obj.getClass()) return false;\n"
825
+ equals_method += f"{INDENT * 2}{class_name} other = ({class_name}) obj;\n"
826
+
827
+ if not fields:
828
+ equals_method += f"{INDENT * 2}return true;\n"
829
+ else:
830
+ for index, field in enumerate(fields):
831
+ field_name = pascal(field['name']) if self.pascal_properties else field['name']
832
+ field_name = self.safe_identifier(field_name, class_name)
833
+ field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
834
+
835
+ if field_type.type_name in ['int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char']:
836
+ equals_method += f"{INDENT * 2}if (this.{field_name} != other.{field_name}) return false;\n"
837
+ elif field_type.type_name == 'byte[]':
838
+ equals_method += f"{INDENT * 2}if (!java.util.Arrays.equals(this.{field_name}, other.{field_name})) return false;\n"
839
+ else:
840
+ equals_method += f"{INDENT * 2}if (this.{field_name} == null ? other.{field_name} != null : !this.{field_name}.equals(other.{field_name})) return false;\n"
841
+
842
+ equals_method += f"{INDENT * 2}return true;\n"
843
+
844
+ equals_method += f"{INDENT}}}\n"
845
+ return equals_method
846
+
847
+ def generate_hashcode_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
848
+ """ Generates the hashCode method for a class """
849
+ hashcode_method = f"\n{INDENT}@Override\n{INDENT}public int hashCode() {{\n"
850
+
851
+ if not fields:
852
+ hashcode_method += f"{INDENT * 2}return 0;\n"
853
+ else:
854
+ hashcode_method += f"{INDENT * 2}int result = 1;\n"
855
+ temp_counter = 0
856
+ for field in fields:
857
+ field_name = pascal(field['name']) if self.pascal_properties else field['name']
858
+ field_name = self.safe_identifier(field_name, class_name)
859
+ field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
860
+
861
+ if field_type.type_name == 'boolean':
862
+ hashcode_method += f"{INDENT * 2}result = 31 * result + (this.{field_name} ? 1 : 0);\n"
863
+ elif field_type.type_name in ['byte', 'short', 'char', 'int']:
864
+ hashcode_method += f"{INDENT * 2}result = 31 * result + this.{field_name};\n"
865
+ elif field_type.type_name == 'long':
866
+ hashcode_method += f"{INDENT * 2}result = 31 * result + (int)(this.{field_name} ^ (this.{field_name} >>> 32));\n"
867
+ elif field_type.type_name == 'float':
868
+ hashcode_method += f"{INDENT * 2}result = 31 * result + Float.floatToIntBits(this.{field_name});\n"
869
+ elif field_type.type_name == 'double':
870
+ temp_var = f"temp{temp_counter}" if temp_counter > 0 else "temp"
871
+ temp_counter += 1
872
+ hashcode_method += f"{INDENT * 2}long {temp_var} = Double.doubleToLongBits(this.{field_name});\n"
873
+ hashcode_method += f"{INDENT * 2}result = 31 * result + (int)({temp_var} ^ ({temp_var} >>> 32));\n"
874
+ elif field_type.type_name == 'byte[]':
875
+ hashcode_method += f"{INDENT * 2}result = 31 * result + java.util.Arrays.hashCode(this.{field_name});\n"
876
+ else:
877
+ hashcode_method += f"{INDENT * 2}result = 31 * result + (this.{field_name} != null ? this.{field_name}.hashCode() : 0);\n"
878
+
879
+ hashcode_method += f"{INDENT * 2}return result;\n"
880
+
881
+ hashcode_method += f"{INDENT}}}\n"
882
+ return hashcode_method
883
+
884
+ def generate_union_equals_method(self, union_class_name: str, union_types: List['AvroToJava.JavaType']) -> str:
885
+ """ Generates the equals method for a union class """
886
+ equals_method = f"\n{INDENT}@Override\n{INDENT}public boolean equals(Object obj) {{\n"
887
+ equals_method += f"{INDENT * 2}if (this == obj) return true;\n"
888
+ equals_method += f"{INDENT * 2}if (obj == null || getClass() != obj.getClass()) return false;\n"
889
+ equals_method += f"{INDENT * 2}{union_class_name} other = ({union_class_name}) obj;\n"
890
+
891
+ # In a union, only ONE field should be set at a time
892
+ # We need to check if the same field is set in both objects and if the values match
893
+ for i, union_type in enumerate(union_types):
894
+ # we need the nullable version (wrapper) of all primitive types
895
+ if self.is_java_primitive(union_type):
896
+ union_type = self.map_primitive_to_java(union_type.type_name, True)
897
+
898
+ union_variable_name = union_type.type_name
899
+ if union_type.type_name.startswith("Map<"):
900
+ union_variable_name = flatten_type_name(union_type.type_name)
901
+ elif union_type.type_name.startswith("List<"):
902
+ union_variable_name = flatten_type_name(union_type.type_name)
903
+ elif union_type.type_name == "byte[]":
904
+ union_variable_name = "Bytes"
905
+ else:
906
+ union_variable_name = union_type.type_name.rsplit('.', 1)[-1]
907
+
908
+ field_name = f"_{camel(union_variable_name)}"
909
+
910
+ # Check if this field is set in this object
911
+ if i == 0:
912
+ equals_method += f"{INDENT * 2}if (this.{field_name} != null) {{\n"
913
+ else:
914
+ equals_method += f"{INDENT * 2}else if (this.{field_name} != null) {{\n"
915
+
916
+ # If set, check if it's also set in the other object with the same value
917
+ if union_type.type_name == 'byte[]':
918
+ equals_method += f"{INDENT * 3}return java.util.Arrays.equals(this.{field_name}, other.{field_name});\n"
919
+ else:
920
+ equals_method += f"{INDENT * 3}return this.{field_name}.equals(other.{field_name});\n"
921
+
922
+ equals_method += f"{INDENT * 2}}}\n"
923
+
924
+ # If no field is set in this, check other is also unset
925
+ equals_method += f"{INDENT * 2}// Both are null/unset - check other is also unset\n"
926
+ equals_method += f"{INDENT * 2}return "
927
+ for i, union_type in enumerate(union_types):
928
+ # we need the nullable version (wrapper) of all primitive types
929
+ if self.is_java_primitive(union_type):
930
+ union_type = self.map_primitive_to_java(union_type.type_name, True)
931
+
932
+ union_variable_name = union_type.type_name
933
+ if union_type.type_name.startswith("Map<"):
934
+ union_variable_name = flatten_type_name(union_type.type_name)
935
+ elif union_type.type_name.startswith("List<"):
936
+ union_variable_name = flatten_type_name(union_type.type_name)
937
+ elif union_type.type_name == "byte[]":
938
+ union_variable_name = "Bytes"
939
+ else:
940
+ union_variable_name = union_type.type_name.rsplit('.', 1)[-1]
941
+ field_name = f"_{camel(union_variable_name)}"
942
+ if i > 0:
943
+ equals_method += " && "
944
+ equals_method += f"other.{field_name} == null"
945
+ equals_method += ";\n"
946
+ equals_method += f"{INDENT}}}\n"
947
+ return equals_method
948
+
949
+ def generate_union_hashcode_method(self, union_class_name: str, union_types: List['AvroToJava.JavaType']) -> str:
950
+ """ Generates the hashCode method for a union class """
951
+ hashcode_method = f"\n{INDENT}@Override\n{INDENT}public int hashCode() {{\n"
952
+
953
+ # In a union, only ONE field should be set at a time
954
+ # Return the hash of whichever field is set
955
+ for i, union_type in enumerate(union_types):
956
+ # we need the nullable version (wrapper) of all primitive types
957
+ if self.is_java_primitive(union_type):
958
+ union_type = self.map_primitive_to_java(union_type.type_name, True)
959
+
960
+ union_variable_name = union_type.type_name
961
+ if union_type.type_name.startswith("Map<"):
962
+ union_variable_name = flatten_type_name(union_type.type_name)
963
+ elif union_type.type_name.startswith("List<"):
964
+ union_variable_name = flatten_type_name(union_type.type_name)
965
+ elif union_type.type_name == "byte[]":
966
+ union_variable_name = "Bytes"
967
+ else:
968
+ union_variable_name = union_type.type_name.rsplit('.', 1)[-1]
969
+
970
+ field_name = f"_{camel(union_variable_name)}"
971
+
972
+ # Return hash of whichever field is set
973
+ if i == 0:
974
+ hashcode_method += f"{INDENT * 2}if (this.{field_name} != null) {{\n"
975
+ else:
976
+ hashcode_method += f"{INDENT * 2}else if (this.{field_name} != null) {{\n"
977
+
978
+ # Use proper hash calculation based on type
979
+ if union_type.type_name == 'byte[]':
980
+ hashcode_method += f"{INDENT * 3}return java.util.Arrays.hashCode(this.{field_name});\n"
981
+ else:
982
+ hashcode_method += f"{INDENT * 3}return this.{field_name}.hashCode();\n"
983
+
984
+ hashcode_method += f"{INDENT * 2}}}\n"
985
+
986
+ # If no field is set, return 0
987
+ hashcode_method += f"{INDENT * 2}return 0;\n"
988
+ hashcode_method += f"{INDENT}}}\n"
989
+ return hashcode_method
990
+
643
991
  def generate_avro_get_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
644
992
  """ Generates the get method for SpecificRecord """
645
993
  get_method = f"\n{INDENT}@Override\n{INDENT}public Object get(int field$) {{\n"
@@ -648,9 +996,37 @@ class AvroToJava:
648
996
  field_name = pascal(field['name']) if self.pascal_properties else field['name']
649
997
  field_name = self.safe_identifier(field_name, class_name)
650
998
  field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
651
- if field_type.type_name in self.generated_types_avro_namespace and self.generated_types_avro_namespace[field_type.type_name] == "union":
652
- get_method += f"{INDENT * 3}case {index}: return this.{field_name}!=null?this.{field_name}.toObject():null;\n"
999
+
1000
+ # Check if field type is a union
1001
+ is_union = field_type.type_name in self.generated_types_avro_namespace and self.generated_types_avro_namespace[field_type.type_name] == "union"
1002
+ is_union = is_union or (field_type.type_name in self.generated_types_java_package and self.generated_types_java_package[field_type.type_name] == "union")
1003
+ # Also check if it's an Object with union_types (non-Jackson union)
1004
+ is_union = is_union or (field_type.type_name == "Object" and field_type.union_types is not None and len(field_type.union_types) > 1)
1005
+
1006
+ # Check if field is List<Union> or Map<String, Union>
1007
+ is_list_of_unions = field_type.type_name.startswith("List<") and field_type.union_types and len(field_type.union_types) > 0
1008
+ is_map_of_unions = field_type.type_name.startswith("Map<") and field_type.union_types and len(field_type.union_types) > 0
1009
+
1010
+ # For union fields, return the unwrapped object using toObject()
1011
+ # This allows Avro's SpecificDatumWriter to serialize the actual value (String, Integer, etc.)
1012
+ # instead of trying to serialize our custom wrapper class
1013
+ # The put() method will wrap it back using new UnionType(value$)
1014
+ if is_union:
1015
+ get_method += f"{INDENT * 3}case {index}: return this.{field_name} != null ? this.{field_name}.toObject() : null;\n"
1016
+ elif is_list_of_unions:
1017
+ # For List<Union>, unwrap each element by calling toObject() on it
1018
+ # Avro will deserialize this as List<Object> which put() will rewrap
1019
+ get_method += f"{INDENT * 3}case {index}: return this.{field_name} != null ? this.{field_name}.stream().map(u -> u != null ? u.toObject() : null).collect(java.util.stream.Collectors.toList()) : null;\n"
1020
+ elif is_map_of_unions:
1021
+ # For Map<String, Union>, unwrap each value by calling toObject() on it
1022
+ get_method += f"{INDENT * 3}case {index}: return this.{field_name} != null ? this.{field_name}.entrySet().stream().collect(java.util.stream.Collectors.toMap(java.util.Map.Entry::getKey, e -> e.getValue() != null ? e.getValue().toObject() : null)) : null;\n"
1023
+ elif field_type.is_enum:
1024
+ # For enum fields, convert to GenericEnumSymbol for Avro serialization
1025
+ # This allows SpecificDatumWriter to serialize enums inside unions correctly
1026
+ get_method += f"{INDENT * 3}case {index}: return this.{field_name} != null ? new GenericData.EnumSymbol({field_type.type_name}.SCHEMA, this.{field_name}.name()) : null;\n"
653
1027
  else:
1028
+ # For all other field types, return the field as-is
1029
+ # Avro's SpecificDatumWriter will handle serialization internally
654
1030
  get_method += f"{INDENT * 3}case {index}: return this.{field_name};\n"
655
1031
  get_method += f"{INDENT * 3}default: throw new AvroRuntimeException(\"Bad index: \" + field$);\n"
656
1032
  get_method += f"{INDENT * 2}}}\n{INDENT}}}\n"
@@ -662,16 +1038,158 @@ class AvroToJava:
662
1038
  put_method = f"\n{INDENT}@Override\n{INDENT}public void put(int field$, Object value$) {{\n"
663
1039
  put_method += f"{INDENT * 2}switch (field$) {{\n"
664
1040
  for index, field in enumerate(fields):
1041
+ # Skip const fields as they are final and cannot be reassigned
1042
+ if "const" in field:
1043
+ put_method += f"{INDENT * 3}case {index}: break; // const field, cannot be set\n"
1044
+ continue
1045
+
665
1046
  field_name = pascal(field['name']) if self.pascal_properties else field['name']
666
1047
  field_name = self.safe_identifier(field_name, class_name)
667
1048
  field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
668
1049
  if field_type.type_name.startswith("List<") or field_type.type_name.startswith("Map<"):
669
1050
  suppress_unchecked = True
670
- if field_type.type_name in self.generated_types_avro_namespace and self.generated_types_avro_namespace[field_type.type_name] == "union":
671
- put_method += f"{INDENT * 3}case {index}: this.{field_name} = new {field_type.type_name}((GenericData.Record)value$); break;\n"
1051
+
1052
+ # Check if the field type is a generated type (union, class, or enum)
1053
+ type_kind = None
1054
+ if field_type.type_name in self.generated_types_avro_namespace:
1055
+ type_kind = self.generated_types_avro_namespace[field_type.type_name]
1056
+ elif field_type.type_name in self.generated_types_java_package:
1057
+ type_kind = self.generated_types_java_package[field_type.type_name]
1058
+
1059
+ # Check if this is List<Union> or Map<String, Union>
1060
+ is_list_of_unions = field_type.type_name.startswith("List<") and field_type.union_types and len(field_type.union_types) > 0
1061
+ is_map_of_unions = field_type.type_name.startswith("Map<") and field_type.union_types and len(field_type.union_types) > 0
1062
+
1063
+ if is_list_of_unions:
1064
+ # Extract the union type name from List<UnionType>
1065
+ union_type_match = field_type.type_name[5:-1] # Remove "List<" and ">"
1066
+ # For List<Union>, handle both wrapped List<UnionWrapper> and unwrapped List<Object>
1067
+ # Avro deserialization provides List<Object>, so we need to wrap each element
1068
+ put_method += f"{INDENT * 3}case {index}: {{\n"
1069
+ put_method += f"{INDENT * 4}if (value$ instanceof List<?>) {{\n"
1070
+ put_method += f"{INDENT * 5}List<?> list = (List<?>)value$;\n"
1071
+ put_method += f"{INDENT * 5}if (list.isEmpty() || !(list.get(0) instanceof {union_type_match})) {{\n"
1072
+ put_method += f"{INDENT * 6}// Unwrapped from Avro - need to wrap, handling nulls\n"
1073
+ put_method += f"{INDENT * 6}this.{field_name} = list.stream().map(v -> v != null ? new {union_type_match}(v) : null).collect(java.util.stream.Collectors.toList());\n"
1074
+ put_method += f"{INDENT * 5}}} else {{\n"
1075
+ put_method += f"{INDENT * 6}// Already wrapped\n"
1076
+ put_method += f"{INDENT * 6}this.{field_name} = ({field_type.type_name})value$;\n"
1077
+ put_method += f"{INDENT * 5}}}\n"
1078
+ put_method += f"{INDENT * 4}}}\n"
1079
+ put_method += f"{INDENT * 4}break;\n"
1080
+ put_method += f"{INDENT * 3}}}\n"
1081
+ elif is_map_of_unions:
1082
+ # Extract the union type name from Map<String, UnionType>
1083
+ union_type_match = field_type.type_name.split(",")[1].strip()[:-1] # Remove "Map<String, " and ">"
1084
+ put_method += f"{INDENT * 3}case {index}: {{\n"
1085
+ put_method += f"{INDENT * 4}if (value$ instanceof Map<?,?>) {{\n"
1086
+ put_method += f"{INDENT * 5}Map<?,?> map = (Map<?,?>)value$;\n"
1087
+ put_method += f"{INDENT * 5}if (map.isEmpty() || !(map.values().iterator().next() instanceof {union_type_match})) {{\n"
1088
+ put_method += f"{INDENT * 6}// Unwrapped from Avro - need to wrap, handling nulls\n"
1089
+ put_method += f"{INDENT * 6}this.{field_name} = map.entrySet().stream().collect(java.util.stream.Collectors.toMap(e -> (String)e.getKey(), e -> e.getValue() != null ? new {union_type_match}(e.getValue()) : null));\n"
1090
+ put_method += f"{INDENT * 5}}} else {{\n"
1091
+ put_method += f"{INDENT * 6}// Already wrapped\n"
1092
+ put_method += f"{INDENT * 6}this.{field_name} = ({field_type.type_name})value$;\n"
1093
+ put_method += f"{INDENT * 5}}}\n"
1094
+ put_method += f"{INDENT * 4}}}\n"
1095
+ put_method += f"{INDENT * 4}break;\n"
1096
+ put_method += f"{INDENT * 3}}}\n"
1097
+ elif type_kind == "union":
1098
+ # Unions can contain primitives or records - use the appropriate constructor
1099
+ # If Avro passes a GenericData.Record, use the GenericData.Record constructor
1100
+ # Otherwise use the Object constructor for already-constructed types
1101
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof GenericData.Record ? new {field_type.type_name}((GenericData.Record)value$) : new {field_type.type_name}(value$); break;\n"
1102
+ elif type_kind == "class":
1103
+ # Record types need to be converted from GenericData.Record if that's what Avro passes
1104
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof GenericData.Record ? new {field_type.type_name}((GenericData.Record)value$) : ({field_type.type_name})value$; break;\n"
1105
+ elif type_kind == "enum":
1106
+ # Enums need to be converted from GenericData.EnumSymbol
1107
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof GenericData.EnumSymbol ? {field_type.type_name}.valueOf(value$.toString()) : ({field_type.type_name})value$; break;\n"
672
1108
  else:
673
- if field_type.type_name == 'String':
674
- put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$.toString(); break;\n"
1109
+ # Check if this is a List<RecordType> or Map<String,RecordType>
1110
+ is_list_of_records = False
1111
+ is_map_of_records = False
1112
+ if field_type.type_name.startswith("List<"):
1113
+ item_type = field_type.type_name[5:-1]
1114
+ if item_type in self.generated_types_java_package and self.generated_types_java_package[item_type] == "class":
1115
+ is_list_of_records = True
1116
+ elif field_type.type_name.startswith("Map<"):
1117
+ # Extract value type from Map<String, ValueType>
1118
+ value_type = field_type.type_name.split(",")[1].strip()[:-1]
1119
+ if value_type in self.generated_types_java_package and self.generated_types_java_package[value_type] == "class":
1120
+ is_map_of_records = True
1121
+
1122
+ if is_list_of_records:
1123
+ item_type = field_type.type_name[5:-1]
1124
+ put_method += f"{INDENT * 3}case {index}: {{\n"
1125
+ put_method += f"{INDENT * 4}if (value$ instanceof List<?>) {{\n"
1126
+ put_method += f"{INDENT * 5}List<?> list = (List<?>)value$;\n"
1127
+ put_method += f"{INDENT * 5}if (list.isEmpty() || !(list.get(0) instanceof {item_type})) {{\n"
1128
+ put_method += f"{INDENT * 6}// Unwrapped from Avro - need to wrap GenericData.Record objects\n"
1129
+ put_method += f"{INDENT * 6}this.{field_name} = list.stream().map(item -> item instanceof GenericData.Record ? new {item_type}((GenericData.Record)item) : ({item_type})item).collect(java.util.stream.Collectors.toList());\n"
1130
+ put_method += f"{INDENT * 5}}} else {{\n"
1131
+ put_method += f"{INDENT * 6}// Already wrapped\n"
1132
+ put_method += f"{INDENT * 6}this.{field_name} = ({field_type.type_name})value$;\n"
1133
+ put_method += f"{INDENT * 5}}}\n"
1134
+ put_method += f"{INDENT * 4}}} else {{\n"
1135
+ put_method += f"{INDENT * 5}// Handle null or other types\n"
1136
+ put_method += f"{INDENT * 5}this.{field_name} = value$ != null ? ({field_type.type_name})value$ : null;\n"
1137
+ put_method += f"{INDENT * 4}}}\n"
1138
+ put_method += f"{INDENT * 4}break;\n"
1139
+ put_method += f"{INDENT * 3}}}\n"
1140
+ elif is_map_of_records:
1141
+ value_type = field_type.type_name.split(",")[1].strip()[:-1]
1142
+ put_method += f"{INDENT * 3}case {index}: {{\n"
1143
+ put_method += f"{INDENT * 4}if (value$ instanceof Map<?,?>) {{\n"
1144
+ put_method += f"{INDENT * 5}Map<?,?> map = (Map<?,?>)value$;\n"
1145
+ put_method += f"{INDENT * 5}if (map.isEmpty() || !(map.values().iterator().next() instanceof {value_type})) {{\n"
1146
+ put_method += f"{INDENT * 6}// Unwrapped from Avro - need to wrap GenericData.Record objects\n"
1147
+ put_method += f"{INDENT * 6}this.{field_name} = map.entrySet().stream().collect(java.util.stream.Collectors.toMap(e -> (String)e.getKey(), e -> e.getValue() instanceof GenericData.Record ? new {value_type}((GenericData.Record)e.getValue()) : ({value_type})e.getValue()));\n"
1148
+ put_method += f"{INDENT * 5}}} else {{\n"
1149
+ put_method += f"{INDENT * 6}// Already wrapped\n"
1150
+ put_method += f"{INDENT * 6}this.{field_name} = ({field_type.type_name})value$;\n"
1151
+ put_method += f"{INDENT * 5}}}\n"
1152
+ put_method += f"{INDENT * 4}}} else {{\n"
1153
+ put_method += f"{INDENT * 5}// Handle null or other types\n"
1154
+ put_method += f"{INDENT * 5}this.{field_name} = value$ != null ? ({field_type.type_name})value$ : null;\n"
1155
+ put_method += f"{INDENT * 4}}}\n"
1156
+ put_method += f"{INDENT * 4}break;\n"
1157
+ put_method += f"{INDENT * 3}}}\n"
1158
+ elif field_type.type_name == 'String':
1159
+ # Handle null values for String fields
1160
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ != null ? value$.toString() : null; break;\n"
1161
+ elif field_type.type_name.startswith("List<"):
1162
+ # Extract the element type
1163
+ element_type = field_type.type_name[5:-1]
1164
+ # Check if it's a List of enums
1165
+ if element_type in self.generated_types_java_package and self.generated_types_java_package[element_type] == "enum":
1166
+ # For List<Enum>, convert GenericEnumSymbol to actual enum values
1167
+ put_method += f"{INDENT * 3}case {index}: {{\n"
1168
+ put_method += f"{INDENT * 4}if (value$ instanceof List<?>) {{\n"
1169
+ put_method += f"{INDENT * 5}List<?> list = (List<?>)value$;\n"
1170
+ put_method += f"{INDENT * 5}this.{field_name} = list.stream().map(item -> item instanceof GenericData.EnumSymbol ? {element_type}.valueOf(item.toString()) : ({element_type})item).collect(java.util.stream.Collectors.toList());\n"
1171
+ put_method += f"{INDENT * 4}}} else {{\n"
1172
+ put_method += f"{INDENT * 5}this.{field_name} = null;\n"
1173
+ put_method += f"{INDENT * 4}}}\n"
1174
+ put_method += f"{INDENT * 4}break;\n"
1175
+ put_method += f"{INDENT * 3}}}\n"
1176
+ elif element_type == "String":
1177
+ # For List<String>, convert Utf8 to String
1178
+ put_method += f"{INDENT * 3}case {index}: {{\n"
1179
+ put_method += f"{INDENT * 4}if (value$ instanceof List<?>) {{\n"
1180
+ put_method += f"{INDENT * 5}List<?> list = (List<?>)value$;\n"
1181
+ put_method += f"{INDENT * 5}this.{field_name} = list.stream().map(item -> item != null ? item.toString() : null).collect(java.util.stream.Collectors.toList());\n"
1182
+ put_method += f"{INDENT * 4}}} else {{\n"
1183
+ put_method += f"{INDENT * 5}this.{field_name} = null;\n"
1184
+ put_method += f"{INDENT * 4}}}\n"
1185
+ put_method += f"{INDENT * 4}break;\n"
1186
+ put_method += f"{INDENT * 3}}}\n"
1187
+ else:
1188
+ # For other List types, create a defensive copy
1189
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof List<?> ? new java.util.ArrayList<>(({field_type.type_name})value$) : null; break;\n"
1190
+ elif field_type.type_name.startswith("Map<"):
1191
+ # For any Map type, create a defensive copy to avoid sharing references
1192
+ put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof Map<?,?> ? new java.util.HashMap<>(({field_type.type_name})value$) : null; break;\n"
675
1193
  else:
676
1194
  put_method += f"{INDENT * 3}case {index}: this.{field_name} = ({field_type.type_name})value$; break;\n"
677
1195
  put_method += f"{INDENT * 3}default: throw new AvroRuntimeException(\"Bad index: \" + field$);\n"
@@ -690,11 +1208,47 @@ class AvroToJava:
690
1208
  enum_name = self.safe_identifier(avro_schema['name'])
691
1209
  type_name = self.qualified_name(package.replace('/', '.'), enum_name)
692
1210
  self.generated_types_avro_namespace[self.qualified_name(avro_schema.get('namespace', parent_package),avro_schema['name'])] = "enum"
693
- self.generated_types_java_package[type_name] = "enum"
1211
+ self.generated_types_java_package[type_name] = "enum"
1212
+ self.generated_avro_schemas[type_name] = avro_schema
694
1213
  symbols = avro_schema.get('symbols', [])
695
- symbols_str = ', '.join([symbol.upper() for symbol in symbols])
1214
+ # Convert symbols to valid Java identifiers, preserving case
1215
+ # Replace invalid chars, prepend _ if starts with digit or is a reserved word
1216
+ java_symbols = []
1217
+ for symbol in symbols:
1218
+ java_symbol = symbol.replace('-', '_').replace('.', '_')
1219
+ if java_symbol and java_symbol[0].isdigit():
1220
+ java_symbol = '_' + java_symbol
1221
+ # Check if the symbol is a Java reserved word and prefix with underscore
1222
+ if is_java_reserved_word(java_symbol):
1223
+ java_symbol = '_' + java_symbol
1224
+ java_symbols.append(java_symbol)
1225
+ symbols_str = ', '.join(java_symbols)
696
1226
  enum_definition += f"public enum {enum_name} {{\n"
697
- enum_definition += f"{INDENT}{symbols_str};\n"
1227
+ enum_definition += f"{INDENT}{symbols_str}"
1228
+
1229
+ # Add Avro schema if annotations are enabled
1230
+ if self.avro_annotation:
1231
+ # Create inline schema for the enum
1232
+ enum_schema = {
1233
+ "type": "enum",
1234
+ "name": enum_name,
1235
+ "symbols": symbols
1236
+ }
1237
+ if 'namespace' in avro_schema:
1238
+ enum_schema['namespace'] = avro_schema['namespace']
1239
+ if 'doc' in avro_schema:
1240
+ enum_schema['doc'] = avro_schema['doc']
1241
+
1242
+ enum_schema_json = json.dumps(enum_schema)
1243
+ enum_schema_json = enum_schema_json.replace('"', '§')
1244
+ enum_schema_json = f"\"+\n{INDENT}\"".join(
1245
+ [enum_schema_json[i:i+80] for i in range(0, len(enum_schema_json), 80)])
1246
+ enum_schema_json = enum_schema_json.replace('§', '\\"')
1247
+
1248
+ enum_definition += f";\n\n{INDENT}public static final Schema SCHEMA = new Schema.Parser().parse(\n{INDENT}\"{enum_schema_json}\");\n"
1249
+ else:
1250
+ enum_definition += f";\n"
1251
+
698
1252
  enum_definition += "}\n"
699
1253
  if write_file:
700
1254
  self.write_to_file(package, enum_name, enum_definition)
@@ -740,11 +1294,16 @@ class AvroToJava:
740
1294
  f"{INDENT*1}private {union_type.type_name} _{camel(union_variable_name)};\n" + \
741
1295
  f"{INDENT*1}public {union_type.type_name} get{union_variable_name}() {{ return _{camel(union_variable_name)}; }}\n";
742
1296
 
743
- class_definition_toobject += f"{INDENT*2}if (_{camel(union_variable_name)} != null) {{\n{INDENT*3}return _{camel(union_variable_name)};\n{INDENT*2}}}\n"
1297
+ # For toObject(), wrap enums in GenericData.EnumSymbol so Avro can serialize them
1298
+ if union_type.is_enum:
1299
+ class_definition_toobject += f"{INDENT*2}if (_{camel(union_variable_name)} != null) {{\n{INDENT*3}return new GenericData.EnumSymbol({union_type.type_name}.SCHEMA, _{camel(union_variable_name)}.name());\n{INDENT*2}}}\n"
1300
+ else:
1301
+ class_definition_toobject += f"{INDENT*2}if (_{camel(union_variable_name)} != null) {{\n{INDENT*3}return _{camel(union_variable_name)};\n{INDENT*2}}}\n"
744
1302
 
1303
+ # GenericData.Record constructor only handles record types - primitives come through fromObject
745
1304
  if self.avro_annotation and union_type.is_class:
746
- class_definition_genericrecordctor += f"{INDENT*2}if ( {union_type.type_name}.AVROSCHEMA.getName().equals(record.getSchema().getName()) && {union_type.type_name}.AVROSCHEMA.getNamespace().equals(record.getSchema().getNamespace()) ) {{"
747
- class_definition_genericrecordctor += f"\n{INDENT*3}this._{camel(union_variable_name)} = new {union_type.type_name}(record);\n{INDENT*3}return;\n{INDENT*2}}}\n"
1305
+ class_definition_genericrecordctor += f"{INDENT*2}if (record.getSchema().getFullName().equals({union_type.type_name}.AVROSCHEMA.getFullName())) {{\n"
1306
+ class_definition_genericrecordctor += f"{INDENT*3}this._{camel(union_variable_name)} = new {union_type.type_name}(record);\n{INDENT*3}return;\n{INDENT*2}}}\n"
748
1307
 
749
1308
  # there can only be one list and one map in the union, so we don't need to differentiate this any further
750
1309
  if is_list:
@@ -752,9 +1311,27 @@ class AvroToJava:
752
1311
  elif is_dict:
753
1312
  class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof Map<?,?>) {{\n{INDENT*3}this._{camel(union_variable_name)} = ({union_type.type_name})obj;\n{INDENT*3}return;\n{INDENT*2}}}\n"
754
1313
  else:
1314
+ # For class types, check for GenericData.Record first (Avro deserialization), then typed instance
1315
+ if self.avro_annotation and union_type.is_class:
1316
+ class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof GenericData.Record) {{\n"
1317
+ class_definition_fromobjectctor += f"{INDENT*3}GenericData.Record record = (GenericData.Record)obj;\n"
1318
+ # Use getFullName() for robust schema comparison instead of separate name + namespace
1319
+ class_definition_fromobjectctor += f"{INDENT*3}String recordFullName = record.getSchema().getFullName();\n"
1320
+ class_definition_fromobjectctor += f"{INDENT*3}String expectedFullName = {union_type.type_name}.AVROSCHEMA.getFullName();\n"
1321
+ class_definition_fromobjectctor += f"{INDENT*3}if (recordFullName.equals(expectedFullName)) {{\n"
1322
+ class_definition_fromobjectctor += f"{INDENT*4}this._{camel(union_variable_name)} = new {union_type.type_name}(record);\n{INDENT*4}return;\n{INDENT*3}}}\n{INDENT*2}}}\n"
1323
+
1324
+ # Handle Avro's Utf8 type for String
1325
+ if self.avro_annotation and union_type.type_name == "String":
1326
+ class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof org.apache.avro.util.Utf8) {{\n{INDENT*3}this._{camel(union_variable_name)} = obj.toString();\n{INDENT*3}return;\n{INDENT*2}}}\n"
1327
+
1328
+ # Handle Avro's GenericEnumSymbol for enum types
1329
+ if self.avro_annotation and union_type.is_enum:
1330
+ class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof GenericData.EnumSymbol) {{\n{INDENT*3}this._{camel(union_variable_name)} = {union_type.type_name}.valueOf(obj.toString());\n{INDENT*3}return;\n{INDENT*2}}}\n"
1331
+
755
1332
  class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof {union_type.type_name}) {{\n{INDENT*3}this._{camel(union_variable_name)} = ({union_type.type_name})obj;\n{INDENT*3}return;\n{INDENT*2}}}\n"
756
1333
 
757
- # Read method logic
1334
+ # Read method logic - test types in order using duck typing (like C# implementation)
758
1335
  if is_dict:
759
1336
  class_definition_read += f"{INDENT*3}if (node.isObject()) {{\n{INDENT*4}{union_type.type_name} map = mapper.readValue(node.toString(), new TypeReference<{union_type.type_name}>(){{}});\n{INDENT*3}return new {union_class_name}(map);\n{INDENT*3}}}\n"
760
1337
  elif is_list:
@@ -764,7 +1341,7 @@ class AvroToJava:
764
1341
  class_definition_read += f"{INDENT*3}if (node.isTextual()) {{\n{INDENT*4}return new {union_class_name}(node.asText());\n{INDENT*3}}}\n"
765
1342
  elif union_type.type_name == "byte[]":
766
1343
  class_definition_read += f"{INDENT*3}if (node.isBinary()) {{\n{INDENT*4}return new {union_class_name}(node.binaryValue());\n{INDENT*3}}}\n"
767
- elif union_type.type_name in ["int", "Int"]:
1344
+ elif union_type.type_name in ["int", "Int", "Integer"]:
768
1345
  class_definition_read += f"{INDENT*3}if (node.canConvertToInt()) {{\n{INDENT*4}return new {union_class_name}(node.asInt());\n{INDENT*3}}}\n"
769
1346
  elif union_type.type_name in ["long", "Long"]:
770
1347
  class_definition_read += f"{INDENT*3}if (node.canConvertToLong()) {{\n{INDENT*4}return new {union_class_name}(node.asLong());\n{INDENT*3}}}\n"
@@ -777,10 +1354,12 @@ class AvroToJava:
777
1354
  elif union_type.type_name in ["boolean", "Boolean"]:
778
1355
  class_definition_read += f"{INDENT*3}if (node.isBoolean()) {{\n{INDENT*4}return new {union_class_name}(node.asBoolean());\n{INDENT*3}}}\n"
779
1356
  else:
1357
+ # For classes and enums, use duck typing with isJsonMatch() (C# pattern)
780
1358
  if union_type.is_enum:
781
1359
  class_definition_read += f"{INDENT*3}if (node.isTextual()) {{\n{INDENT*4}return new {union_class_name}(Enum.valueOf({union_type.type_name}.class, node.asText()));\n{INDENT*3}}}\n"
782
- else:
783
- class_definition_read += f"{INDENT*3}if (node.isObject() && {union_type.type_name}.isJsonMatch(node)) {{\n{INDENT*4}return new {union_class_name}(mapper.readValue(node.toString(), {union_type.type_name}.class));\n{INDENT*3}}}\n"
1360
+ elif union_type.is_class:
1361
+ # Use isJsonMatch() to test if this type matches, then use fromData() to deserialize
1362
+ class_definition_read += f"{INDENT*3}if ({union_type.type_name}.isJsonMatch(node)) {{\n{INDENT*4}return new {union_class_name}({union_type.type_name}.fromData(node, \"application/json\"));\n{INDENT*3}}}\n"
784
1363
 
785
1364
  # Write method logic
786
1365
  class_definition_write += f"{INDENT*3}{union_type.type_name} {camel(union_variable_name)}Value = value.get{union_variable_name}();\n{INDENT*3}if ({camel(union_variable_name)}Value != null) {{\n{INDENT*4}generator.writeObject({camel(union_variable_name)}Value);\n{INDENT*4}return;\n{INDENT*3}}}\n"
@@ -801,6 +1380,9 @@ class AvroToJava:
801
1380
  class_definition += f"{INDENT*2}throw new UnsupportedOperationException(\"No record type is set in the union\");\n"
802
1381
  class_definition += f"{INDENT}}}\n"
803
1382
  class_definition += f"\n{INDENT}public {union_class_name}(Object obj) {{\n"
1383
+ class_definition += f"{INDENT*2}if (obj == null) {{\n"
1384
+ class_definition += f"{INDENT*3}return; // null is valid for unions with null type\n"
1385
+ class_definition += f"{INDENT*2}}}\n"
804
1386
  class_definition += class_definition_fromobjectctor
805
1387
  class_definition += f"{INDENT*2}throw new UnsupportedOperationException(\"No record type is set in the union\");\n"
806
1388
  class_definition += f"{INDENT}}}\n"
@@ -825,12 +1407,25 @@ class AvroToJava:
825
1407
  class_definition += f"{INDENT*2}}}\n{INDENT}}}\n"
826
1408
  class_definition += f"\n{INDENT*1}public static boolean isJsonMatch(JsonNode node) {{\n"
827
1409
  class_definition += f"{INDENT*2}return " + " || ".join(list_is_json_match) + ";\n"
828
- class_definition += f"{INDENT*1}}}\n}}\n"
1410
+ class_definition += f"{INDENT*1}}}\n"
1411
+
1412
+ # Add equals method for union class
1413
+ class_definition += self.generate_union_equals_method(union_class_name, union_types)
1414
+
1415
+ # Add hashCode method for union class
1416
+ class_definition += self.generate_union_hashcode_method(union_class_name, union_types)
1417
+ class_definition += "}\n"
829
1418
 
830
1419
  if write_file:
831
1420
  self.write_to_file(package, union_class_name, class_definition)
1421
+ # Calculate qualified name for the union
1422
+ qualified_union_name = self.qualified_name(package.replace('/', '.'), union_class_name)
832
1423
  self.generated_types_avro_namespace[union_class_name] = "union" # Track union types
833
- self.generated_types_java_package[union_class_name] = "union" # Track union types
1424
+ self.generated_types_java_package[union_class_name] = "union" # Track union types with simple name
1425
+ self.generated_types_java_package[qualified_union_name] = "union" # Also track with qualified name
1426
+ # Store the union schema with the types information
1427
+ self.generated_avro_schemas[union_class_name] = {"types": avro_type}
1428
+ self.generated_avro_schemas[qualified_union_name] = {"types": avro_type}
834
1429
  return union_class_name
835
1430
 
836
1431
 
@@ -842,12 +1437,48 @@ class AvroToJava:
842
1437
  property_def = ''
843
1438
  if 'doc' in field:
844
1439
  property_def += f"{INDENT}/** {field['doc']} */\n"
845
- if self.jackson_annotations:
1440
+
1441
+ # For discriminator const fields, don't put @JsonProperty on the field
1442
+ # The getter will handle JSON serialization/deserialization
1443
+ is_discriminator_const = field.get('discriminator', False) and 'const' in field
1444
+ if self.jackson_annotations and not is_discriminator_const:
846
1445
  property_def += f"{INDENT}@JsonProperty(\"{field['name']}\")\n"
847
- property_def += f"{INDENT}private {field_type.type_name} {safe_field_name};\n"
848
- property_def += f"{INDENT}public {field_type.type_name} get{pascal(field_name)}() {{ return {safe_field_name}; }}\n"
849
- property_def += f"{INDENT}public void set{pascal(field_name)}({field_type.type_name} {safe_field_name}) {{ this.{safe_field_name} = {safe_field_name}; }}\n"
850
- if field_type.union_types:
1446
+
1447
+ # Handle const fields
1448
+ if 'const' in field and field['const'] is not None:
1449
+ const_value = field['const']
1450
+ is_discriminator = field.get('discriminator', False)
1451
+
1452
+ # For enum types, qualify with the enum type name
1453
+ if field_type.type_name not in ('String', 'int', 'Integer', 'long', 'Long', 'double', 'Double', 'boolean', 'Boolean'):
1454
+ const_value = f'{field_type.type_name}.{const_value}'
1455
+ elif field_type.type_name == 'String':
1456
+ const_value = f'"{const_value}"'
1457
+
1458
+ property_def += f"{INDENT}private final {field_type.type_name} {safe_field_name} = {const_value};\n"
1459
+
1460
+ # For discriminator fields, we need both the enum value accessor and String override
1461
+ if is_discriminator:
1462
+ # Provide a typed accessor for the enum value (ignored by Jackson since it's synthetic)
1463
+ if self.jackson_annotations:
1464
+ property_def += f"{INDENT}@JsonIgnore\n"
1465
+ property_def += f"{INDENT}public {field_type.type_name} get{pascal(field_name)}Value() {{ return {safe_field_name}; }}\n"
1466
+ # Generate the getter that returns String (Jackson will use this for serialization)
1467
+ # Use READ_ONLY since this is a const field that doesn't need deserialization
1468
+ # Note: Not using @Override because not all discriminated union variants extend a base class
1469
+ if self.jackson_annotations:
1470
+ property_def += f"{INDENT}@JsonProperty(value=\"{field['name']}\", access=JsonProperty.Access.READ_ONLY)\n"
1471
+ property_def += f"{INDENT}public String get{pascal(field_name)}() {{ return {safe_field_name}.name(); }}\n"
1472
+ else:
1473
+ property_def += f"{INDENT}public {field_type.type_name} get{pascal(field_name)}() {{ return {safe_field_name}; }}\n"
1474
+ else:
1475
+ property_def += f"{INDENT}private {field_type.type_name} {safe_field_name};\n"
1476
+ property_def += f"{INDENT}public {field_type.type_name} get{pascal(field_name)}() {{ return {safe_field_name}; }}\n"
1477
+ property_def += f"{INDENT}public void set{pascal(field_name)}({field_type.type_name} {safe_field_name}) {{ this.{safe_field_name} = {safe_field_name}; }}\n"
1478
+
1479
+ # Generate typed accessors only for direct union fields (not for List/Map<Union>)
1480
+ # For List<Union>, the field IS the list, not a single union value
1481
+ if field_type.union_types and not field_type.type_name.startswith("List<") and not field_type.type_name.startswith("Map<"):
851
1482
  for union_type in field_type.union_types:
852
1483
  if union_type.type_name.startswith("List<") or union_type.type_name.startswith("Map<"):
853
1484
  property_def += f"{INDENT}@SuppressWarnings(\"unchecked\")\n"
@@ -868,10 +1499,34 @@ class AvroToJava:
868
1499
  with open(file_path, 'w', encoding='utf-8') as file:
869
1500
  if package:
870
1501
  file.write(f"package {package.replace('/', '.')};\n\n")
871
- if "List<" in definition:
1502
+
1503
+ # Check if this class extends a discriminated union base class
1504
+ # Pattern: "public class ClassName extends UnionName"
1505
+ if " extends " in definition and self.jackson_annotations:
1506
+ import re
1507
+ match = re.search(r'public class \w+ extends (\w+)', definition)
1508
+ if match:
1509
+ base_class_name = match.group(1)
1510
+ # Check if this base class is a discriminated union we generated
1511
+ for union_name, union_subtypes in self.discriminated_unions.items():
1512
+ if union_name == base_class_name:
1513
+ # Get the package where the union base class is generated
1514
+ # (it's in the same package as the first subtype)
1515
+ union_package = union_subtypes[0]['package'] if union_subtypes else self.base_package.replace('/', '.')
1516
+ # Only import if the union is in a different package
1517
+ current_package = package.replace('/', '.')
1518
+ if union_package != current_package:
1519
+ file.write(f"import {union_package}.{union_name};\n")
1520
+ break
1521
+
1522
+ if "List<" in definition or "ArrayList<" in definition:
872
1523
  file.write("import java.util.List;\n")
873
- if "Map<" in definition:
1524
+ if "ArrayList<" in definition or "Arrays.asList" in definition:
1525
+ file.write("import java.util.ArrayList;\n")
1526
+ if "Map<" in definition or "HashMap<" in definition:
874
1527
  file.write("import java.util.Map;\n")
1528
+ if "HashMap<" in definition:
1529
+ file.write("import java.util.HashMap;\n")
875
1530
  if "Predicate<" in definition:
876
1531
  file.write("import java.util.function.Predicate;\n")
877
1532
  if "BigDecimal" in definition:
@@ -933,6 +1588,8 @@ class AvroToJava:
933
1588
  file.write("import com.fasterxml.jackson.core.JsonParser;\n")
934
1589
  if 'JsonIgnore' in definition:
935
1590
  file.write("import com.fasterxml.jackson.annotation.JsonIgnore;\n")
1591
+ if 'JsonIgnoreProperties' in definition:
1592
+ file.write("import com.fasterxml.jackson.annotation.JsonIgnoreProperties;\n")
936
1593
  if 'JsonProperty' in definition:
937
1594
  file.write("import com.fasterxml.jackson.annotation.JsonProperty;\n")
938
1595
  if 'JsonProcessingException' in definition:
@@ -959,12 +1616,432 @@ class AvroToJava:
959
1616
  file.write("\n")
960
1617
  file.write(definition)
961
1618
 
1619
+ def generate_tests(self, base_output_dir: str) -> None:
1620
+ """ Generates unit tests for all the generated Java classes and enums """
1621
+ from avrotize.common import process_template
1622
+
1623
+ test_directory_path = os.path.join(base_output_dir, "src/test/java")
1624
+ if not os.path.exists(test_directory_path):
1625
+ os.makedirs(test_directory_path, exist_ok=True)
1626
+
1627
+ for class_name, type_kind in self.generated_types_java_package.items():
1628
+ if type_kind in ["class", "enum"]:
1629
+ self.generate_test_class(class_name, type_kind, test_directory_path)
1630
+
1631
+ def generate_test_class(self, class_name: str, type_kind: str, test_directory_path: str) -> None:
1632
+ """ Generates a unit test class for a given Java class or enum """
1633
+ from avrotize.common import process_template
1634
+
1635
+ avro_schema = self.generated_avro_schemas.get(class_name, {})
1636
+ simple_class_name = class_name.split('.')[-1]
1637
+ package = ".".join(class_name.split('.')[:-1])
1638
+ test_class_name = f"{simple_class_name}Test"
1639
+
1640
+ if type_kind == "class":
1641
+ fields = self.get_class_test_fields(avro_schema, simple_class_name, package)
1642
+ imports = self.get_test_imports(fields)
1643
+ test_class_definition = process_template(
1644
+ "avrotojava/class_test.java.jinja",
1645
+ package=package,
1646
+ test_class_name=test_class_name,
1647
+ class_name=simple_class_name,
1648
+ fields=fields,
1649
+ imports=imports,
1650
+ avro_annotation=self.avro_annotation,
1651
+ jackson_annotation=self.jackson_annotations
1652
+ )
1653
+ elif type_kind == "enum":
1654
+ # Convert symbols to Java-safe identifiers (same logic as generate_enum)
1655
+ raw_symbols = avro_schema.get('symbols', [])
1656
+ java_safe_symbols = []
1657
+ for symbol in raw_symbols:
1658
+ java_symbol = symbol.replace('-', '_').replace('.', '_')
1659
+ if java_symbol and java_symbol[0].isdigit():
1660
+ java_symbol = '_' + java_symbol
1661
+ if is_java_reserved_word(java_symbol):
1662
+ java_symbol = '_' + java_symbol
1663
+ java_safe_symbols.append(java_symbol)
1664
+
1665
+ test_class_definition = process_template(
1666
+ "avrotojava/enum_test.java.jinja",
1667
+ package=package,
1668
+ test_class_name=test_class_name,
1669
+ enum_name=simple_class_name,
1670
+ symbols=java_safe_symbols # Pass converted symbols instead of raw
1671
+ )
1672
+
1673
+ # Write test file
1674
+ package_path = package.replace('.', os.sep)
1675
+ test_file_dir = os.path.join(test_directory_path, package_path)
1676
+ if not os.path.exists(test_file_dir):
1677
+ os.makedirs(test_file_dir, exist_ok=True)
1678
+ test_file_path = os.path.join(test_file_dir, f"{test_class_name}.java")
1679
+ with open(test_file_path, 'w', encoding='utf-8') as test_file:
1680
+ test_file.write(test_class_definition)
1681
+
1682
+ def get_test_imports(self, fields: List) -> List[str]:
1683
+ """ Gets the necessary imports for the test class """
1684
+ imports = []
1685
+ for field in fields:
1686
+ # Extract inner types from generic collections
1687
+ inner_types = []
1688
+ if field.field_type.startswith("List<"):
1689
+ if "import java.util.List;" not in imports:
1690
+ imports.append("import java.util.List;")
1691
+ if "import java.util.ArrayList;" not in imports:
1692
+ imports.append("import java.util.ArrayList;")
1693
+ # Extract the inner type: List<Type> -> Type
1694
+ inner_type = field.field_type[5:-1]
1695
+ # Check if inner type is also a Map
1696
+ if inner_type.startswith("Map<"):
1697
+ if "import java.util.Map;" not in imports:
1698
+ imports.append("import java.util.Map;")
1699
+ if "import java.util.HashMap;" not in imports:
1700
+ imports.append("import java.util.HashMap;")
1701
+ # Extract Map value type
1702
+ start = inner_type.index('<') + 1
1703
+ end = inner_type.rindex('>')
1704
+ map_types = inner_type[start:end].split(',')
1705
+ if len(map_types) > 1:
1706
+ inner_types.append(map_types[1].strip())
1707
+ else:
1708
+ inner_types.append(inner_type)
1709
+ elif field.field_type.startswith("Map<"):
1710
+ if "import java.util.Map;" not in imports:
1711
+ imports.append("import java.util.Map;")
1712
+ if "import java.util.HashMap;" not in imports:
1713
+ imports.append("import java.util.HashMap;")
1714
+ # Extract value type from Map<K,V>
1715
+ start = field.field_type.index('<') + 1
1716
+ end = field.field_type.rindex('>')
1717
+ map_types = field.field_type[start:end].split(',')
1718
+ if len(map_types) > 1:
1719
+ inner_types.append(map_types[1].strip())
1720
+
1721
+ # Add the direct field type for non-generic types
1722
+ if not field.field_type.startswith(("List<", "Map<")):
1723
+ inner_types.append(field.field_type)
1724
+
1725
+ # If field is Object with union_types (Avro-style union), add all union member types for imports
1726
+ if hasattr(field, 'java_type_obj') and field.java_type_obj and field.java_type_obj.union_types:
1727
+ for union_member_type in field.java_type_obj.union_types:
1728
+ inner_types.append(union_member_type.type_name)
1729
+
1730
+ # Process each type (including inner types from generics)
1731
+ for type_to_check in inner_types:
1732
+ # Add imports for enum and class types
1733
+ if type_to_check in self.generated_types_java_package:
1734
+ type_kind = self.generated_types_java_package[type_to_check]
1735
+ # Only import if it's a fully qualified name with a package
1736
+ if '.' in type_to_check:
1737
+ import_stmt = f"import {type_to_check};"
1738
+ if import_stmt not in imports:
1739
+ imports.append(import_stmt)
1740
+ # No longer import test classes - we instantiate classes directly
1741
+ # Process unions regardless of whether they're fully qualified
1742
+ # (they might be simple names that need member imports)
1743
+ if type_kind == "union":
1744
+ avro_schema = self.generated_avro_schemas.get(type_to_check, {})
1745
+ if avro_schema and 'types' in avro_schema:
1746
+ for union_type in avro_schema['types']:
1747
+ java_qualified_name = None
1748
+ if isinstance(union_type, dict) and 'name' in union_type:
1749
+ # It's a complex type reference (inline definition)
1750
+ type_name = union_type['name']
1751
+ if 'namespace' in union_type:
1752
+ avro_namespace = union_type['namespace']
1753
+ # Build full Java qualified name with base package
1754
+ java_qualified_name = self.join_packages(self.base_package, avro_namespace).replace('/', '.').lower() + '.' + type_name
1755
+ else:
1756
+ java_qualified_name = type_name
1757
+ elif isinstance(union_type, str) and union_type not in ['null', 'string', 'int', 'long', 'float', 'double', 'boolean', 'bytes']:
1758
+ # It's a string reference to a named type (could be class or enum)
1759
+ # The string is the Avro qualified name, need to convert to Java
1760
+ avro_name_parts = union_type.split('.')
1761
+ if len(avro_name_parts) > 1:
1762
+ # Has namespace
1763
+ type_name = avro_name_parts[-1]
1764
+ avro_namespace = '.'.join(avro_name_parts[:-1])
1765
+ java_qualified_name = self.join_packages(self.base_package, avro_namespace).replace('/', '.').lower() + '.' + type_name
1766
+ else:
1767
+ # No namespace, just a simple name
1768
+ java_qualified_name = union_type
1769
+
1770
+ if java_qualified_name:
1771
+ if java_qualified_name in self.generated_types_java_package or java_qualified_name.split('.')[-1] in self.generated_types_java_package:
1772
+ member_type_kind = self.generated_types_java_package.get(java_qualified_name, self.generated_types_java_package.get(java_qualified_name.split('.')[-1], None))
1773
+ # Import the class/enum
1774
+ class_import = f"import {java_qualified_name};"
1775
+ if class_import not in imports:
1776
+ imports.append(class_import)
1777
+ # No longer import test classes - we instantiate classes directly
1778
+ return imports
1779
+
1780
+ def get_class_test_fields(self, avro_schema: Dict, class_name: str, package: str) -> List:
1781
+ """ Retrieves fields for a given class name """
1782
+
1783
+ class Field:
1784
+ def __init__(self, fn: str, ft: str, tv: str, ct: bool, ie: bool = False, java_type_obj: 'AvroToJava.JavaType' = None, is_discrim: bool = False):
1785
+ self.field_name = fn
1786
+ self.field_type = ft
1787
+ # Extract base type for generic types (e.g., List<Object> -> List)
1788
+ if '<' in ft:
1789
+ self.base_type = ft.split('<')[0]
1790
+ else:
1791
+ self.base_type = ft
1792
+ self.test_value = tv
1793
+ self.is_const = ct
1794
+ self.is_enum = ie
1795
+ self.is_discriminator = is_discrim
1796
+ self.java_type_obj = java_type_obj # Store the full JavaType object for union access
1797
+
1798
+ fields: List[Field] = []
1799
+ if avro_schema and 'fields' in avro_schema:
1800
+ for field in avro_schema['fields']:
1801
+ field_name = pascal(field['name']) if self.pascal_properties else field['name']
1802
+ field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], avro_schema.get('namespace', ''))
1803
+ # Check if the field type is an enum
1804
+ is_enum = field_type.type_name in self.generated_types_java_package and \
1805
+ self.generated_types_java_package[field_type.type_name] == "enum"
1806
+ is_discriminator = field.get('discriminator', False)
1807
+
1808
+ # Generate test value for the field
1809
+ if "const" in field and field["const"] is not None:
1810
+ const_value = field["const"]
1811
+ # For enum types, qualify with the enum type name
1812
+ if is_enum or (field_type.type_name not in ('String', 'int', 'Integer', 'long', 'Long', 'double', 'Double', 'boolean', 'Boolean')):
1813
+ test_value = f'{field_type.type_name}.{const_value}'
1814
+ else:
1815
+ test_value = f'"{const_value}"'
1816
+ else:
1817
+ test_value = self.get_test_value_from_field(field['type'], field_type, package)
1818
+
1819
+ f = Field(
1820
+ field_name,
1821
+ field_type.type_name,
1822
+ test_value,
1823
+ "const" in field and field["const"] is not None,
1824
+ is_enum,
1825
+ field_type, # Pass the full JavaType object
1826
+ is_discriminator
1827
+ )
1828
+ fields.append(f)
1829
+ return fields
1830
+
1831
+ def get_test_value_from_field(self, avro_field_type: Union[str, Dict, List], java_type: JavaType, package: str) -> str:
1832
+ """Returns a default test value based on the Avro field type and Java type"""
1833
+ # If it's an Object with union_types (Avro-style union), pick a member type
1834
+ if java_type.type_name == "Object" and java_type.union_types is not None and len(java_type.union_types) > 0:
1835
+ # Pick the first union type and generate a test value for it
1836
+ first_union_type = java_type.union_types[0]
1837
+ return self.get_test_value(first_union_type.type_name, package)
1838
+ # For List<Object> where Object is a union, we need to handle it specially
1839
+ elif java_type.type_name.startswith("List<Object>"):
1840
+ # avro_field_type could be: ["null", {"type": "array", "items": [union types]}]
1841
+ # or just: {"type": "array", "items": [union types]}
1842
+ array_schema = avro_field_type
1843
+ if isinstance(avro_field_type, list):
1844
+ # It's a union - find the array type
1845
+ for t in avro_field_type:
1846
+ if isinstance(t, dict) and t.get('type') == 'array':
1847
+ array_schema = t
1848
+ break
1849
+
1850
+ if isinstance(array_schema, dict) and array_schema.get('type') == 'array':
1851
+ items_type = array_schema.get('items')
1852
+ if isinstance(items_type, list): # Union array
1853
+ # Pick the first non-null type
1854
+ non_null_types = [t for t in items_type if t != 'null']
1855
+ if non_null_types:
1856
+ inner_java_type = self.convert_avro_type_to_java('_test', '_field', non_null_types[0], package)
1857
+ inner_value = self.get_test_value(inner_java_type.type_name, package)
1858
+ return f'new ArrayList<>(java.util.Arrays.asList({inner_value}))'
1859
+ # Default: use type name
1860
+ return self.get_test_value(java_type.type_name, package)
1861
+
1862
+ def get_test_value(self, java_type: str, package: str) -> str:
1863
+ """Returns a default test value based on the Java type"""
1864
+ test_values = {
1865
+ 'String': '"test_string"',
1866
+ 'boolean': 'true',
1867
+ 'Boolean': 'Boolean.TRUE',
1868
+ 'int': '42',
1869
+ 'Integer': 'Integer.valueOf(42)',
1870
+ 'long': '42L',
1871
+ 'Long': 'Long.valueOf(42L)',
1872
+ 'float': '3.14f',
1873
+ 'Float': 'Float.valueOf(3.14f)',
1874
+ 'double': '3.14',
1875
+ 'Double': 'Double.valueOf(3.14)',
1876
+ 'byte[]': 'new byte[] { 0x01, 0x02, 0x03 }',
1877
+ 'Object': 'null', # Use null for Object types (Avro unions) to avoid reference equality issues
1878
+ }
1879
+
1880
+ # Handle generic types
1881
+ if java_type.startswith("List<"):
1882
+ inner_type = java_type[5:-1]
1883
+ inner_value = self.get_test_value(inner_type, package)
1884
+ # Arrays.asList(null) throws NPE, so create empty list for null values
1885
+ if inner_value == 'null':
1886
+ return 'new ArrayList<>()'
1887
+ return f'new ArrayList<>(java.util.Arrays.asList({inner_value}))'
1888
+ elif java_type.startswith("Map<"):
1889
+ return 'new HashMap<>()'
1890
+
1891
+ # Check if it's a generated type (enum, class, or union)
1892
+ if java_type in self.generated_types_java_package:
1893
+ type_kind = self.generated_types_java_package[java_type]
1894
+ if type_kind == "enum":
1895
+ # Get the first symbol for the enum
1896
+ avro_schema = self.generated_avro_schemas.get(java_type, {})
1897
+ symbols = avro_schema.get('symbols', [])
1898
+ if symbols:
1899
+ # Convert symbol to valid Java identifier (same logic as in generate_enum)
1900
+ first_symbol = symbols[0].replace('-', '_').replace('.', '_')
1901
+ if first_symbol and first_symbol[0].isdigit():
1902
+ first_symbol = '_' + first_symbol
1903
+ # Check if the symbol is a Java reserved word and prefix with underscore
1904
+ if is_java_reserved_word(first_symbol):
1905
+ first_symbol = '_' + first_symbol
1906
+ # Use fully qualified name to avoid conflicts with field names
1907
+ return f'{java_type}.{first_symbol}'
1908
+ return f'{java_type}.values()[0]'
1909
+ elif type_kind == "class":
1910
+ # Create a new instance using the createTestInstance() method
1911
+ # Use fully qualified name to avoid conflicts with field names
1912
+ return f'{java_type}.createTestInstance()'
1913
+ elif type_kind == "union":
1914
+ # For union types, we need to create an instance with one of the union types set
1915
+ # Get the union's schema to find available types
1916
+ avro_schema = self.generated_avro_schemas.get(java_type, {})
1917
+ if avro_schema and 'types' in avro_schema:
1918
+ # Use the first non-null type from the union
1919
+ for union_type in avro_schema['types']:
1920
+ if union_type != 'null' and isinstance(union_type, dict):
1921
+ # It's a complex type - check if enum or class
1922
+ if 'name' in union_type:
1923
+ type_name = union_type['name']
1924
+ if 'namespace' in union_type:
1925
+ avro_namespace = union_type['namespace']
1926
+ # Build full Java qualified name with base package
1927
+ java_qualified_name = self.join_packages(self.base_package, avro_namespace).replace('/', '.').lower() + '.' + type_name
1928
+ else:
1929
+ java_qualified_name = type_name
1930
+ simple_union_name = java_type.split('.')[-1]
1931
+
1932
+ # Check if this union member is an enum or class
1933
+ member_type_kind = self.generated_types_java_package.get(java_qualified_name)
1934
+ if member_type_kind == "enum":
1935
+ # For enums, use the first enum value
1936
+ member_value = self.get_test_value(java_qualified_name, package)
1937
+ return f'new {simple_union_name}({member_value})'
1938
+ else:
1939
+ # For classes, create a new instance using createTestInstance()
1940
+ # Use fully qualified name to avoid conflicts with field names
1941
+ return f'new {simple_union_name}({java_qualified_name}.createTestInstance())'
1942
+ elif union_type != 'null' and isinstance(union_type, str):
1943
+ # It's a simple type - convert from Avro type to Java type
1944
+ simple_union_name = java_type.split('.')[-1]
1945
+ # Convert Avro primitive type to Java type
1946
+ java_primitive_type = self.convert_avro_type_to_java('_test', '_field', union_type, package)
1947
+ simple_value = self.get_test_value(java_primitive_type.type_name, package)
1948
+ return f'new {simple_union_name}({simple_value})'
1949
+ # Fallback: create an empty union instance
1950
+ simple_name = java_type.split('.')[-1]
1951
+ return f'new {simple_name}()'
1952
+
1953
+ return test_values.get(java_type, f'new {java_type}()')
1954
+
1955
+ def generate_discriminated_union_base_classes(self):
1956
+ """Generate abstract base classes for discriminated unions with Jackson annotations"""
1957
+ if not self.jackson_annotations or not self.discriminated_unions:
1958
+ return
1959
+
1960
+ for union_name, subtypes in self.discriminated_unions.items():
1961
+ if not subtypes:
1962
+ continue
1963
+
1964
+ # Get the first subtype to determine package and discriminator field
1965
+ first_subtype = subtypes[0]
1966
+ package = first_subtype['package']
1967
+
1968
+ # Find the discriminator field (should have 'discriminator': true)
1969
+ discriminator_field = None
1970
+ discriminator_values = {}
1971
+
1972
+ for subtype_info in subtypes:
1973
+ schema = subtype_info['schema']
1974
+ for field in schema.get('fields', []):
1975
+ if field.get('discriminator'):
1976
+ discriminator_field = field['name']
1977
+ if 'const' in field:
1978
+ discriminator_values[subtype_info['class_name']] = field['const']
1979
+ break
1980
+
1981
+ if not discriminator_field:
1982
+ print(f"WARN: Could not find discriminator field for union {union_name}")
1983
+ continue
1984
+
1985
+ # Generate the abstract base class
1986
+ class_definition = f"/**\n * Abstract base class for {union_name} discriminated union\n */\n"
1987
+
1988
+ # Add Jackson @JsonTypeInfo annotation
1989
+ class_definition += f'@JsonTypeInfo(\n'
1990
+ class_definition += f'{INDENT}use = JsonTypeInfo.Id.NAME,\n'
1991
+ class_definition += f'{INDENT}include = JsonTypeInfo.As.EXISTING_PROPERTY,\n'
1992
+ class_definition += f'{INDENT}property = "{discriminator_field}",\n'
1993
+ class_definition += f'{INDENT}visible = true\n'
1994
+ class_definition += f')\n'
1995
+
1996
+ # Add Jackson @JsonSubTypes annotation
1997
+ class_definition += f'@JsonSubTypes({{\n'
1998
+ for i, subtype_info in enumerate(subtypes):
1999
+ class_name = subtype_info['class_name']
2000
+ disc_value = discriminator_values.get(class_name, class_name)
2001
+ comma = ',' if i < len(subtypes) - 1 else ''
2002
+ class_definition += f'{INDENT}@JsonSubTypes.Type(value = {class_name}.class, name = "{disc_value}"){comma}\n'
2003
+ class_definition += f'}})\n'
2004
+
2005
+ # Abstract class declaration
2006
+ class_definition += f'public abstract class {union_name} {{\n'
2007
+
2008
+ # Add the discriminator field getter (abstract)
2009
+ class_definition += f'{INDENT}/**\n{INDENT} * Gets the discriminator value\n{INDENT} * @return the type discriminator\n{INDENT} */\n'
2010
+ class_definition += f'{INDENT}public abstract String get{pascal(discriminator_field)}();\n'
2011
+
2012
+ class_definition += '}\n'
2013
+
2014
+ # Write the file
2015
+ dir_path = os.path.join(self.output_dir, package.replace('.', os.sep))
2016
+ os.makedirs(dir_path, exist_ok=True)
2017
+ file_path = os.path.join(dir_path, f"{union_name}.java")
2018
+
2019
+ # Build the full file content with imports
2020
+ imports = [
2021
+ 'import com.fasterxml.jackson.annotation.JsonSubTypes;',
2022
+ 'import com.fasterxml.jackson.annotation.JsonTypeInfo;'
2023
+ ]
2024
+
2025
+ full_content = f"package {package};\n\n"
2026
+ full_content += '\n'.join(imports) + '\n\n'
2027
+ full_content += class_definition
2028
+
2029
+ with open(file_path, 'w', encoding='utf-8') as file:
2030
+ file.write(full_content)
2031
+
2032
+ print(f"Generated discriminated union base class: {union_name}")
2033
+
962
2034
  def convert_schema(self, schema: JsonNode, output_dir: str):
963
2035
  """Converts Avro schema to Java"""
964
2036
  if not isinstance(schema, list):
965
2037
  schema = [schema]
2038
+
2039
+ # Build type dictionary for inline schema resolution (like C# does)
2040
+ self.type_dict = build_flat_type_dict(schema)
2041
+
966
2042
  if not os.path.exists(output_dir):
967
2043
  os.makedirs(output_dir, exist_ok=True)
2044
+ base_output_dir = output_dir # Store the base directory before changing it
968
2045
  pom_path = os.path.join(output_dir, "pom.xml")
969
2046
  if not os.path.exists(pom_path):
970
2047
  package_elements = self.base_package.split('.') if self.base_package else ["com", "example"]
@@ -979,6 +2056,8 @@ class AvroToJava:
979
2056
  self.output_dir = output_dir
980
2057
  for avro_schema in (x for x in schema if isinstance(x, dict)):
981
2058
  self.generate_class_or_enum(avro_schema, '')
2059
+ self.generate_discriminated_union_base_classes()
2060
+ self.generate_tests(base_output_dir)
982
2061
 
983
2062
  def convert(self, avro_schema_path: str, output_dir: str):
984
2063
  """Converts Avro schema to Java"""