structurize 2.16.5__py3-none-any.whl → 2.17.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avrotize/__init__.py +1 -0
- avrotize/_version.py +3 -3
- avrotize/avrotocsharp.py +74 -10
- avrotize/avrotojava.py +1130 -51
- avrotize/avrotopython.py +4 -2
- avrotize/commands.json +671 -53
- avrotize/common.py +6 -1
- avrotize/jsonstoavro.py +518 -49
- avrotize/structuretocpp.py +697 -0
- avrotize/structuretocsv.py +365 -0
- avrotize/structuretodatapackage.py +659 -0
- avrotize/structuretodb.py +1125 -0
- avrotize/structuretogo.py +720 -0
- avrotize/structuretographql.py +502 -0
- avrotize/structuretoiceberg.py +355 -0
- avrotize/structuretojava.py +853 -0
- avrotize/structuretokusto.py +639 -0
- avrotize/structuretomd.py +322 -0
- avrotize/structuretoproto.py +764 -0
- avrotize/structuretorust.py +714 -0
- avrotize/structuretoxsd.py +679 -0
- structurize-2.17.0.dist-info/METADATA +107 -0
- {structurize-2.16.5.dist-info → structurize-2.17.0.dist-info}/RECORD +27 -14
- structurize-2.16.5.dist-info/METADATA +0 -848
- {structurize-2.16.5.dist-info → structurize-2.17.0.dist-info}/WHEEL +0 -0
- {structurize-2.16.5.dist-info → structurize-2.17.0.dist-info}/entry_points.txt +0 -0
- {structurize-2.16.5.dist-info → structurize-2.17.0.dist-info}/licenses/LICENSE +0 -0
- {structurize-2.16.5.dist-info → structurize-2.17.0.dist-info}/top_level.txt +0 -0
avrotize/avrotojava.py
CHANGED
|
@@ -6,7 +6,7 @@ import os
|
|
|
6
6
|
from typing import Dict, List, Tuple, Union
|
|
7
7
|
from avrotize.constants import AVRO_VERSION, JACKSON_VERSION, JDK_VERSION
|
|
8
8
|
|
|
9
|
-
from avrotize.common import pascal, camel, is_generic_avro_type
|
|
9
|
+
from avrotize.common import pascal, camel, is_generic_avro_type, inline_avro_references, build_flat_type_dict
|
|
10
10
|
|
|
11
11
|
INDENT = ' '
|
|
12
12
|
POM_CONTENT = """<?xml version="1.0" encoding="UTF-8"?>
|
|
@@ -20,6 +20,7 @@ POM_CONTENT = """<?xml version="1.0" encoding="UTF-8"?>
|
|
|
20
20
|
<properties>
|
|
21
21
|
<maven.compiler.source>{JDK_VERSION}</maven.compiler.source>
|
|
22
22
|
<maven.compiler.target>{JDK_VERSION}</maven.compiler.target>
|
|
23
|
+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
|
23
24
|
</properties>
|
|
24
25
|
<dependencies>
|
|
25
26
|
<dependency>
|
|
@@ -28,12 +29,56 @@ POM_CONTENT = """<?xml version="1.0" encoding="UTF-8"?>
|
|
|
28
29
|
<version>{AVRO_VERSION}</version>
|
|
29
30
|
</dependency>
|
|
30
31
|
<dependency>
|
|
31
|
-
<groupId>com.fasterxml.jackson</groupId>
|
|
32
|
-
<artifactId>jackson-
|
|
32
|
+
<groupId>com.fasterxml.jackson.core</groupId>
|
|
33
|
+
<artifactId>jackson-core</artifactId>
|
|
33
34
|
<version>{JACKSON_VERSION}</version>
|
|
34
|
-
|
|
35
|
+
</dependency>
|
|
36
|
+
<dependency>
|
|
37
|
+
<groupId>com.fasterxml.jackson.core</groupId>
|
|
38
|
+
<artifactId>jackson-databind</artifactId>
|
|
39
|
+
<version>{JACKSON_VERSION}</version>
|
|
40
|
+
</dependency>
|
|
41
|
+
<dependency>
|
|
42
|
+
<groupId>com.fasterxml.jackson.core</groupId>
|
|
43
|
+
<artifactId>jackson-annotations</artifactId>
|
|
44
|
+
<version>{JACKSON_VERSION}</version>
|
|
45
|
+
</dependency>
|
|
46
|
+
<dependency>
|
|
47
|
+
<groupId>org.junit.jupiter</groupId>
|
|
48
|
+
<artifactId>junit-jupiter-api</artifactId>
|
|
49
|
+
<version>5.10.0</version>
|
|
50
|
+
<scope>test</scope>
|
|
51
|
+
</dependency>
|
|
52
|
+
<dependency>
|
|
53
|
+
<groupId>org.junit.jupiter</groupId>
|
|
54
|
+
<artifactId>junit-jupiter-engine</artifactId>
|
|
55
|
+
<version>5.10.0</version>
|
|
56
|
+
<scope>test</scope>
|
|
35
57
|
</dependency>
|
|
36
58
|
</dependencies>
|
|
59
|
+
<build>
|
|
60
|
+
<plugins>
|
|
61
|
+
<plugin>
|
|
62
|
+
<groupId>org.apache.maven.plugins</groupId>
|
|
63
|
+
<artifactId>maven-compiler-plugin</artifactId>
|
|
64
|
+
<version>3.11.0</version>
|
|
65
|
+
<configuration>
|
|
66
|
+
<compilerArgs>
|
|
67
|
+
<arg>-Xmaxerrs</arg>
|
|
68
|
+
<arg>1000</arg>
|
|
69
|
+
</compilerArgs>
|
|
70
|
+
</configuration>
|
|
71
|
+
</plugin>
|
|
72
|
+
<plugin>
|
|
73
|
+
<groupId>org.apache.maven.plugins</groupId>
|
|
74
|
+
<artifactId>maven-surefire-plugin</artifactId>
|
|
75
|
+
<version>3.0.0-M9</version>
|
|
76
|
+
<configuration>
|
|
77
|
+
<useSystemClassLoader>false</useSystemClassLoader>
|
|
78
|
+
</configuration>
|
|
79
|
+
</plugin>
|
|
80
|
+
</plugins>
|
|
81
|
+
</build>
|
|
37
82
|
</project>
|
|
38
83
|
"""
|
|
39
84
|
|
|
@@ -41,12 +86,16 @@ PREAMBLE_TOBYTEARRAY = \
|
|
|
41
86
|
"""
|
|
42
87
|
byte[] result = null;
|
|
43
88
|
String mediaType = contentType.split(";")[0].trim().toLowerCase();
|
|
89
|
+
boolean shouldCompress = mediaType.endsWith("+gzip");
|
|
90
|
+
if (shouldCompress) {
|
|
91
|
+
mediaType = mediaType.substring(0, mediaType.length() - 5);
|
|
92
|
+
}
|
|
44
93
|
"""
|
|
45
94
|
|
|
46
95
|
|
|
47
96
|
EPILOGUE_TOBYTEARRAY_COMPRESSION = \
|
|
48
97
|
"""
|
|
49
|
-
if (result != null &&
|
|
98
|
+
if (result != null && shouldCompress) {
|
|
50
99
|
try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
|
|
51
100
|
GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream)) {
|
|
52
101
|
gzipOutputStream.write(result);
|
|
@@ -66,6 +115,7 @@ throw new UnsupportedOperationException("Unsupported media type + mediaType");
|
|
|
66
115
|
PREAMBLE_FROMDATA_COMPRESSION = \
|
|
67
116
|
"""
|
|
68
117
|
if (mediaType.endsWith("+gzip")) {
|
|
118
|
+
mediaType = mediaType.substring(0, mediaType.length() - 5);
|
|
69
119
|
InputStream stream = null;
|
|
70
120
|
|
|
71
121
|
if (data instanceof InputStream) {
|
|
@@ -95,7 +145,7 @@ JSON_FROMDATA_THROWS = \
|
|
|
95
145
|
",JsonProcessingException, IOException"
|
|
96
146
|
JSON_FROMDATA = \
|
|
97
147
|
"""
|
|
98
|
-
if ( mediaType
|
|
148
|
+
if ( mediaType.equals("application/json")) {
|
|
99
149
|
if (data instanceof byte[]) {
|
|
100
150
|
ByteArrayInputStream stream = new ByteArrayInputStream((byte[]) data);
|
|
101
151
|
return (new ObjectMapper()).readValue(stream, {typeName}.class);
|
|
@@ -115,7 +165,7 @@ if ( mediaType == "application/json") {
|
|
|
115
165
|
JSON_TOBYTEARRAY_THROWS = ",JsonProcessingException"
|
|
116
166
|
JSON_TOBYTEARRAY = \
|
|
117
167
|
"""
|
|
118
|
-
if ( mediaType
|
|
168
|
+
if ( mediaType.equals("application/json")) {
|
|
119
169
|
result = new ObjectMapper().writeValueAsBytes(this);
|
|
120
170
|
}
|
|
121
171
|
"""
|
|
@@ -123,14 +173,14 @@ if ( mediaType == "application/json") {
|
|
|
123
173
|
AVRO_FROMDATA_THROWS = ",IOException"
|
|
124
174
|
AVRO_FROMDATA = \
|
|
125
175
|
"""
|
|
126
|
-
if ( mediaType
|
|
176
|
+
if ( mediaType.equals("avro/binary") || mediaType.equals("application/vnd.apache.avro+avro")) {
|
|
127
177
|
if (data instanceof byte[]) {
|
|
128
178
|
return AVROREADER.read(new {typeName}(), DecoderFactory.get().binaryDecoder((byte[])data, null));
|
|
129
179
|
} else if (data instanceof InputStream) {
|
|
130
180
|
return AVROREADER.read(new {typeName}(), DecoderFactory.get().binaryDecoder((InputStream)data, null));
|
|
131
181
|
}
|
|
132
182
|
throw new UnsupportedOperationException("Data is not of a supported type for Avro conversion to {typeName}");
|
|
133
|
-
} else if ( mediaType
|
|
183
|
+
} else if ( mediaType.equals("avro/json") || mediaType.equals("application/vnd.apache.avro+json")) {
|
|
134
184
|
if (data instanceof byte[]) {
|
|
135
185
|
return AVROREADER.read(new {typeName}(), DecoderFactory.get().jsonDecoder({typeName}.AVROSCHEMA, new ByteArrayInputStream((byte[])data)));
|
|
136
186
|
} else if (data instanceof InputStream) {
|
|
@@ -146,14 +196,14 @@ if ( mediaType == "avro/binary" || mediaType == "application/vnd.apache.avro+avr
|
|
|
146
196
|
AVRO_TOBYTEARRAY_THROWS = ",IOException"
|
|
147
197
|
AVRO_TOBYTEARRAY = \
|
|
148
198
|
"""
|
|
149
|
-
if ( mediaType
|
|
199
|
+
if ( mediaType.equals("avro/binary") || mediaType.equals("application/vnd.apache.avro+avro")) {
|
|
150
200
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
151
201
|
Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
|
|
152
202
|
AVROWRITER.write(this, encoder);
|
|
153
203
|
encoder.flush();
|
|
154
204
|
result = out.toByteArray();
|
|
155
205
|
}
|
|
156
|
-
else if ( mediaType
|
|
206
|
+
else if ( mediaType.equals("avro/json") || mediaType.equals("application/vnd.apache.avro+json")) {
|
|
157
207
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
158
208
|
Encoder encoder = EncoderFactory.get().jsonEncoder({typeName}.AVROSCHEMA, out);
|
|
159
209
|
AVROWRITER.write(this, encoder);
|
|
@@ -198,6 +248,8 @@ class AvroToJava:
|
|
|
198
248
|
self.pascal_properties = False
|
|
199
249
|
self.generated_types_avro_namespace: Dict[str,str] = {}
|
|
200
250
|
self.generated_types_java_package: Dict[str,str] = {}
|
|
251
|
+
self.generated_avro_schemas: Dict[str, Dict] = {}
|
|
252
|
+
self.discriminated_unions: Dict[str, List[Dict]] = {} # Maps union name to list of subtype schemas
|
|
201
253
|
|
|
202
254
|
def qualified_name(self, package: str, name: str) -> str:
|
|
203
255
|
"""Concatenates package and name using a dot separator"""
|
|
@@ -323,10 +375,22 @@ class AvroToJava:
|
|
|
323
375
|
if avro_type['logicalType'] == 'decimal':
|
|
324
376
|
return AvroToJava.JavaType('BigDecimal')
|
|
325
377
|
elif avro_type['type'] == 'array':
|
|
326
|
-
|
|
378
|
+
item_java_type = self.convert_avro_type_to_java(class_name, field_name, avro_type['items'], parent_package, nullable=True)
|
|
379
|
+
item_type = item_java_type.type_name
|
|
380
|
+
# Check if item is a union type by name pattern or registered type
|
|
381
|
+
is_union_item = (item_type.endswith("Union") or
|
|
382
|
+
(item_type in self.generated_types_java_package and self.generated_types_java_package[item_type] == "union"))
|
|
383
|
+
if is_union_item:
|
|
384
|
+
return AvroToJava.JavaType(f"List<{item_type}>", union_types=[AvroToJava.JavaType(item_type)])
|
|
327
385
|
return AvroToJava.JavaType(f"List<{item_type}>")
|
|
328
386
|
elif avro_type['type'] == 'map':
|
|
329
|
-
|
|
387
|
+
value_java_type = self.convert_avro_type_to_java(class_name, field_name, avro_type['values'], parent_package, nullable=True)
|
|
388
|
+
values_type = value_java_type.type_name
|
|
389
|
+
# Check if value is a union type by name pattern or registered type
|
|
390
|
+
is_union_value = (values_type.endswith("Union") or
|
|
391
|
+
(values_type in self.generated_types_java_package and self.generated_types_java_package[values_type] == "union"))
|
|
392
|
+
if is_union_value:
|
|
393
|
+
return AvroToJava.JavaType(f"Map<String,{values_type}>", union_types=[AvroToJava.JavaType(values_type)])
|
|
330
394
|
return AvroToJava.JavaType(f"Map<String,{values_type}>")
|
|
331
395
|
elif 'logicalType' in avro_type:
|
|
332
396
|
if avro_type['logicalType'] == 'date':
|
|
@@ -352,6 +416,32 @@ class AvroToJava:
|
|
|
352
416
|
return self.generate_enum(avro_schema, parent_package, write_file)
|
|
353
417
|
return AvroToJava.JavaType('Object')
|
|
354
418
|
|
|
419
|
+
def generate_create_test_instance_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
|
|
420
|
+
""" Generates a static createTestInstance method that creates a fully initialized instance """
|
|
421
|
+
method = f"\n{INDENT}/**\n{INDENT} * Creates a test instance with all required fields populated\n{INDENT} * @return a fully initialized test instance\n{INDENT} */\n"
|
|
422
|
+
method += f"{INDENT}public static {class_name} createTestInstance() {{\n"
|
|
423
|
+
method += f"{INDENT*2}{class_name} instance = new {class_name}();\n"
|
|
424
|
+
|
|
425
|
+
for field in fields:
|
|
426
|
+
# Skip const fields
|
|
427
|
+
if "const" in field:
|
|
428
|
+
continue
|
|
429
|
+
|
|
430
|
+
# Match the logic in generate_property: field_name is already Pascal-cased if needed
|
|
431
|
+
field_name = pascal(field['name']) if self.pascal_properties else field['name']
|
|
432
|
+
safe_field_name = self.safe_identifier(field_name, class_name)
|
|
433
|
+
field_type = self.convert_avro_type_to_java(class_name, safe_field_name, field['type'], parent_package)
|
|
434
|
+
|
|
435
|
+
# Get a test value for this field
|
|
436
|
+
test_value = self.get_test_value(field_type.type_name, parent_package.replace('.', '/'))
|
|
437
|
+
|
|
438
|
+
# Setter name matches generate_property: set{pascal(field_name)} where field_name is already potentially Pascal-cased
|
|
439
|
+
method += f"{INDENT*2}instance.set{pascal(field_name)}({test_value});\n"
|
|
440
|
+
|
|
441
|
+
method += f"{INDENT*2}return instance;\n"
|
|
442
|
+
method += f"{INDENT}}}\n"
|
|
443
|
+
return method
|
|
444
|
+
|
|
355
445
|
def generate_class(self, avro_schema: Dict, parent_package: str, write_file: bool) -> JavaType:
|
|
356
446
|
""" Generates a Java class from an Avro record schema """
|
|
357
447
|
class_definition = ''
|
|
@@ -370,9 +460,29 @@ class AvroToJava:
|
|
|
370
460
|
return AvroToJava.JavaType(qualified_class_name, is_class=True)
|
|
371
461
|
self.generated_types_avro_namespace[namespace_qualified_name] = "class"
|
|
372
462
|
self.generated_types_java_package[qualified_class_name] = "class"
|
|
463
|
+
self.generated_avro_schemas[qualified_class_name] = avro_schema
|
|
464
|
+
|
|
465
|
+
# Track discriminated union subtypes
|
|
466
|
+
if 'union' in avro_schema:
|
|
467
|
+
union_name = avro_schema['union']
|
|
468
|
+
if union_name not in self.discriminated_unions:
|
|
469
|
+
self.discriminated_unions[union_name] = []
|
|
470
|
+
self.discriminated_unions[union_name].append({
|
|
471
|
+
'schema': avro_schema,
|
|
472
|
+
'class_name': class_name,
|
|
473
|
+
'package': package.replace('/', '.'),
|
|
474
|
+
'qualified_name': qualified_class_name
|
|
475
|
+
})
|
|
476
|
+
|
|
373
477
|
fields_str = [self.generate_property(class_name, field, namespace) for field in avro_schema.get('fields', [])]
|
|
374
478
|
class_body = "\n".join(fields_str)
|
|
375
479
|
class_definition += f"public class {class_name}"
|
|
480
|
+
|
|
481
|
+
# Add extends clause if this is a discriminated union subtype
|
|
482
|
+
if 'union' in avro_schema and self.jackson_annotations:
|
|
483
|
+
union_name = avro_schema['union']
|
|
484
|
+
class_definition += f" extends {union_name}"
|
|
485
|
+
|
|
376
486
|
if self.avro_annotation:
|
|
377
487
|
class_definition += " implements SpecificRecord"
|
|
378
488
|
class_definition += " {\n"
|
|
@@ -386,13 +496,54 @@ class AvroToJava:
|
|
|
386
496
|
class_definition += f"{INDENT*2}}}\n"
|
|
387
497
|
class_definition += f"{INDENT}}}\n"
|
|
388
498
|
|
|
499
|
+
# Generate createTestInstance() method for testing
|
|
500
|
+
class_definition += self.generate_create_test_instance_method(class_name, avro_schema.get('fields', []), namespace)
|
|
501
|
+
|
|
389
502
|
if self.avro_annotation:
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
avro_schema_json =
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
503
|
+
# Inline all schema references like C# does - each class has self-contained schema
|
|
504
|
+
local_avro_schema = inline_avro_references(avro_schema.copy(), self.type_dict, '')
|
|
505
|
+
avro_schema_json = json.dumps(local_avro_schema)
|
|
506
|
+
|
|
507
|
+
# Java has a limit of 65535 bytes for string constants
|
|
508
|
+
# If the schema is too large, we need to split it into chunks
|
|
509
|
+
MAX_STRING_CONSTANT_LENGTH = 60000 # Leave some margin for safety
|
|
510
|
+
|
|
511
|
+
if len(avro_schema_json) > MAX_STRING_CONSTANT_LENGTH:
|
|
512
|
+
# Split into multiple private string methods to avoid the 65535 byte limit
|
|
513
|
+
# Each method returns a part of the schema, concatenated at runtime
|
|
514
|
+
chunk_size = MAX_STRING_CONSTANT_LENGTH
|
|
515
|
+
chunks = [avro_schema_json[i:i+chunk_size] for i in range(0, len(avro_schema_json), chunk_size)]
|
|
516
|
+
|
|
517
|
+
# Generate a method for each chunk
|
|
518
|
+
for i, chunk in enumerate(chunks):
|
|
519
|
+
# Use the same escaping technique as the non-chunked version
|
|
520
|
+
escaped_chunk = chunk.replace('"', '§')
|
|
521
|
+
escaped_chunk = f"\"+\n{INDENT*2}\"".join(
|
|
522
|
+
[escaped_chunk[j:j+80] for j in range(0, len(escaped_chunk), 80)])
|
|
523
|
+
escaped_chunk = escaped_chunk.replace('§', '\\"')
|
|
524
|
+
class_definition += f"\n\n{INDENT}private static String getAvroSchemaPart{i}() {{\n"
|
|
525
|
+
class_definition += f"{INDENT*2}return \"{escaped_chunk}\";\n"
|
|
526
|
+
class_definition += f"{INDENT}}}"
|
|
527
|
+
|
|
528
|
+
# Generate the combining method
|
|
529
|
+
class_definition += f"\n\n{INDENT}private static String getAvroSchemaJson() {{\n"
|
|
530
|
+
class_definition += f"{INDENT*2}return "
|
|
531
|
+
class_definition += " + ".join([f"getAvroSchemaPart{i}()" for i in range(len(chunks))])
|
|
532
|
+
class_definition += ";\n"
|
|
533
|
+
class_definition += f"{INDENT}}}\n"
|
|
534
|
+
class_definition += f"\n{INDENT}public static final Schema AVROSCHEMA = new Schema.Parser().parse(getAvroSchemaJson());"
|
|
535
|
+
else:
|
|
536
|
+
avro_schema_json = avro_schema_json.replace('"', '§')
|
|
537
|
+
avro_schema_json = f"\"+\n{INDENT}\"".join(
|
|
538
|
+
[avro_schema_json[i:i+80] for i in range(0, len(avro_schema_json), 80)])
|
|
539
|
+
avro_schema_json = avro_schema_json.replace('§', '\\"')
|
|
540
|
+
class_definition += f"\n\n{INDENT}public static final Schema AVROSCHEMA = new Schema.Parser().parse(\n{INDENT}\"{avro_schema_json}\");"
|
|
541
|
+
|
|
542
|
+
# Store the schema for tracking
|
|
543
|
+
avro_namespace = avro_schema.get('namespace', '')
|
|
544
|
+
schema_full_name = f"{avro_namespace}.{class_name}" if avro_namespace else class_name
|
|
545
|
+
self.generated_types_avro_namespace[schema_full_name] = "class"
|
|
546
|
+
|
|
396
547
|
class_definition += f"\n{INDENT}public static final DatumWriter<{class_name}> AVROWRITER = new SpecificDatumWriter<{class_name}>(AVROSCHEMA);"
|
|
397
548
|
class_definition += f"\n{INDENT}public static final DatumReader<{class_name}> AVROREADER = new SpecificDatumReader<{class_name}>(AVROSCHEMA);\n"
|
|
398
549
|
|
|
@@ -441,6 +592,10 @@ class AvroToJava:
|
|
|
441
592
|
if self.jackson_annotations:
|
|
442
593
|
class_definition += self.create_is_json_match_method(avro_schema, avro_schema.get('namespace', namespace), class_name)
|
|
443
594
|
|
|
595
|
+
# Add equals() and hashCode() methods
|
|
596
|
+
class_definition += self.generate_equals_method(class_name, avro_schema.get('fields', []), namespace)
|
|
597
|
+
class_definition += self.generate_hashcode_method(class_name, avro_schema.get('fields', []), namespace)
|
|
598
|
+
|
|
444
599
|
class_definition += "\n}"
|
|
445
600
|
|
|
446
601
|
if write_file:
|
|
@@ -465,7 +620,7 @@ class AvroToJava:
|
|
|
465
620
|
if field_name == class_name:
|
|
466
621
|
field_name += "_"
|
|
467
622
|
field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_namespace)
|
|
468
|
-
predicate, clause = self.get_is_json_match_clause(class_name, field_name, field_type)
|
|
623
|
+
predicate, clause = self.get_is_json_match_clause(class_name, field_name, field_type, field)
|
|
469
624
|
field_defs += clause
|
|
470
625
|
if predicate:
|
|
471
626
|
predicates += predicate + "\n"
|
|
@@ -475,12 +630,24 @@ class AvroToJava:
|
|
|
475
630
|
class_definition += f";\n{INDENT}}}"
|
|
476
631
|
return class_definition
|
|
477
632
|
|
|
478
|
-
def get_is_json_match_clause(self, class_name: str, field_name: str, field_type: JavaType) -> Tuple[str, str]:
|
|
633
|
+
def get_is_json_match_clause(self, class_name: str, field_name: str, field_type: JavaType, field: Dict = None) -> Tuple[str, str]:
|
|
479
634
|
""" Generates the isJsonMatch clause for a field using Jackson """
|
|
480
635
|
class_definition = ''
|
|
481
636
|
predicates = ''
|
|
482
637
|
field_name_js = field_name
|
|
483
|
-
|
|
638
|
+
|
|
639
|
+
# Check if field is nullable (Avro union with null)
|
|
640
|
+
is_nullable = False
|
|
641
|
+
if field and 'type' in field:
|
|
642
|
+
avro_type = field['type']
|
|
643
|
+
if isinstance(avro_type, list) and 'null' in avro_type:
|
|
644
|
+
is_nullable = True
|
|
645
|
+
|
|
646
|
+
is_optional = is_nullable or self.is_java_optional_type(field_type)
|
|
647
|
+
|
|
648
|
+
# Check if this is a const field (e.g., discriminator)
|
|
649
|
+
has_const = field and 'const' in field and field['const'] is not None
|
|
650
|
+
const_value = field['const'] if has_const else None
|
|
484
651
|
|
|
485
652
|
if is_optional:
|
|
486
653
|
node_check = f"!node.has(\"{field_name_js}\") || node.get(\"{field_name_js}\").isNull() || node.get(\"{field_name_js}\")"
|
|
@@ -496,9 +663,9 @@ class AvroToJava:
|
|
|
496
663
|
elif field_type.type_name == 'long' or field_type.type_name == 'Long':
|
|
497
664
|
class_definition += f"({node_check}.canConvertToLong())"
|
|
498
665
|
elif field_type.type_name == 'float' or field_type.type_name == 'Float':
|
|
499
|
-
class_definition += f"({node_check}.
|
|
666
|
+
class_definition += f"({node_check}.isNumber())"
|
|
500
667
|
elif field_type.type_name == 'double' or field_type.type_name == 'Double':
|
|
501
|
-
class_definition += f"({node_check}.
|
|
668
|
+
class_definition += f"({node_check}.isNumber())"
|
|
502
669
|
elif field_type.type_name == 'BigDecimal':
|
|
503
670
|
class_definition += f"({node_check}.isBigDecimal())"
|
|
504
671
|
elif field_type.type_name == 'boolean' or field_type.type_name == 'Boolean':
|
|
@@ -551,9 +718,19 @@ class AvroToJava:
|
|
|
551
718
|
predicates += pred + ";"
|
|
552
719
|
class_definition += f"(node.has(\"{field_name_js}\") && val{field_name_js}.test(node.get(\"{field_name_js}\")))"
|
|
553
720
|
elif field_type.is_class:
|
|
554
|
-
|
|
721
|
+
if is_optional:
|
|
722
|
+
class_definition += f"(!node.has(\"{field_name_js}\") || node.get(\"{field_name_js}\").isNull() || {field_type.type_name}.isJsonMatch(node.get(\"{field_name_js}\")))"
|
|
723
|
+
else:
|
|
724
|
+
class_definition += f"(node.has(\"{field_name_js}\") && {field_type.type_name}.isJsonMatch(node.get(\"{field_name_js}\")))"
|
|
555
725
|
elif field_type.is_enum:
|
|
556
|
-
|
|
726
|
+
# For const enum fields (discriminators), check the exact value
|
|
727
|
+
if has_const:
|
|
728
|
+
# const_value is the string value from the schema, not the enum qualified name
|
|
729
|
+
# Ensure we use the raw string value for comparison
|
|
730
|
+
raw_const = const_value if isinstance(const_value, str) else str(const_value)
|
|
731
|
+
class_definition += f"(node.has(\"{field_name_js}\") && node.get(\"{field_name_js}\").isTextual() && node.get(\"{field_name_js}\").asText().equals(\"{raw_const}\"))"
|
|
732
|
+
else:
|
|
733
|
+
class_definition += f"(node.get(\"{field_name_js}\").isTextual() && Enum.valueOf({field_type.type_name}.class, node.get(\"{field_name_js}\").asText()) != null)"
|
|
557
734
|
else:
|
|
558
735
|
is_union = False
|
|
559
736
|
field_union = pascal(field_name) + 'Union'
|
|
@@ -640,6 +817,177 @@ class AvroToJava:
|
|
|
640
817
|
|
|
641
818
|
return class_definition
|
|
642
819
|
|
|
820
|
+
def generate_equals_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
|
|
821
|
+
""" Generates the equals method for a class """
|
|
822
|
+
equals_method = f"\n\n{INDENT}@Override\n{INDENT}public boolean equals(Object obj) {{\n"
|
|
823
|
+
equals_method += f"{INDENT * 2}if (this == obj) return true;\n"
|
|
824
|
+
equals_method += f"{INDENT * 2}if (obj == null || getClass() != obj.getClass()) return false;\n"
|
|
825
|
+
equals_method += f"{INDENT * 2}{class_name} other = ({class_name}) obj;\n"
|
|
826
|
+
|
|
827
|
+
if not fields:
|
|
828
|
+
equals_method += f"{INDENT * 2}return true;\n"
|
|
829
|
+
else:
|
|
830
|
+
for index, field in enumerate(fields):
|
|
831
|
+
field_name = pascal(field['name']) if self.pascal_properties else field['name']
|
|
832
|
+
field_name = self.safe_identifier(field_name, class_name)
|
|
833
|
+
field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
|
|
834
|
+
|
|
835
|
+
if field_type.type_name in ['int', 'long', 'float', 'double', 'boolean', 'byte', 'short', 'char']:
|
|
836
|
+
equals_method += f"{INDENT * 2}if (this.{field_name} != other.{field_name}) return false;\n"
|
|
837
|
+
elif field_type.type_name == 'byte[]':
|
|
838
|
+
equals_method += f"{INDENT * 2}if (!java.util.Arrays.equals(this.{field_name}, other.{field_name})) return false;\n"
|
|
839
|
+
else:
|
|
840
|
+
equals_method += f"{INDENT * 2}if (this.{field_name} == null ? other.{field_name} != null : !this.{field_name}.equals(other.{field_name})) return false;\n"
|
|
841
|
+
|
|
842
|
+
equals_method += f"{INDENT * 2}return true;\n"
|
|
843
|
+
|
|
844
|
+
equals_method += f"{INDENT}}}\n"
|
|
845
|
+
return equals_method
|
|
846
|
+
|
|
847
|
+
def generate_hashcode_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
|
|
848
|
+
""" Generates the hashCode method for a class """
|
|
849
|
+
hashcode_method = f"\n{INDENT}@Override\n{INDENT}public int hashCode() {{\n"
|
|
850
|
+
|
|
851
|
+
if not fields:
|
|
852
|
+
hashcode_method += f"{INDENT * 2}return 0;\n"
|
|
853
|
+
else:
|
|
854
|
+
hashcode_method += f"{INDENT * 2}int result = 1;\n"
|
|
855
|
+
temp_counter = 0
|
|
856
|
+
for field in fields:
|
|
857
|
+
field_name = pascal(field['name']) if self.pascal_properties else field['name']
|
|
858
|
+
field_name = self.safe_identifier(field_name, class_name)
|
|
859
|
+
field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
|
|
860
|
+
|
|
861
|
+
if field_type.type_name == 'boolean':
|
|
862
|
+
hashcode_method += f"{INDENT * 2}result = 31 * result + (this.{field_name} ? 1 : 0);\n"
|
|
863
|
+
elif field_type.type_name in ['byte', 'short', 'char', 'int']:
|
|
864
|
+
hashcode_method += f"{INDENT * 2}result = 31 * result + this.{field_name};\n"
|
|
865
|
+
elif field_type.type_name == 'long':
|
|
866
|
+
hashcode_method += f"{INDENT * 2}result = 31 * result + (int)(this.{field_name} ^ (this.{field_name} >>> 32));\n"
|
|
867
|
+
elif field_type.type_name == 'float':
|
|
868
|
+
hashcode_method += f"{INDENT * 2}result = 31 * result + Float.floatToIntBits(this.{field_name});\n"
|
|
869
|
+
elif field_type.type_name == 'double':
|
|
870
|
+
temp_var = f"temp{temp_counter}" if temp_counter > 0 else "temp"
|
|
871
|
+
temp_counter += 1
|
|
872
|
+
hashcode_method += f"{INDENT * 2}long {temp_var} = Double.doubleToLongBits(this.{field_name});\n"
|
|
873
|
+
hashcode_method += f"{INDENT * 2}result = 31 * result + (int)({temp_var} ^ ({temp_var} >>> 32));\n"
|
|
874
|
+
elif field_type.type_name == 'byte[]':
|
|
875
|
+
hashcode_method += f"{INDENT * 2}result = 31 * result + java.util.Arrays.hashCode(this.{field_name});\n"
|
|
876
|
+
else:
|
|
877
|
+
hashcode_method += f"{INDENT * 2}result = 31 * result + (this.{field_name} != null ? this.{field_name}.hashCode() : 0);\n"
|
|
878
|
+
|
|
879
|
+
hashcode_method += f"{INDENT * 2}return result;\n"
|
|
880
|
+
|
|
881
|
+
hashcode_method += f"{INDENT}}}\n"
|
|
882
|
+
return hashcode_method
|
|
883
|
+
|
|
884
|
+
def generate_union_equals_method(self, union_class_name: str, union_types: List['AvroToJava.JavaType']) -> str:
|
|
885
|
+
""" Generates the equals method for a union class """
|
|
886
|
+
equals_method = f"\n{INDENT}@Override\n{INDENT}public boolean equals(Object obj) {{\n"
|
|
887
|
+
equals_method += f"{INDENT * 2}if (this == obj) return true;\n"
|
|
888
|
+
equals_method += f"{INDENT * 2}if (obj == null || getClass() != obj.getClass()) return false;\n"
|
|
889
|
+
equals_method += f"{INDENT * 2}{union_class_name} other = ({union_class_name}) obj;\n"
|
|
890
|
+
|
|
891
|
+
# In a union, only ONE field should be set at a time
|
|
892
|
+
# We need to check if the same field is set in both objects and if the values match
|
|
893
|
+
for i, union_type in enumerate(union_types):
|
|
894
|
+
# we need the nullable version (wrapper) of all primitive types
|
|
895
|
+
if self.is_java_primitive(union_type):
|
|
896
|
+
union_type = self.map_primitive_to_java(union_type.type_name, True)
|
|
897
|
+
|
|
898
|
+
union_variable_name = union_type.type_name
|
|
899
|
+
if union_type.type_name.startswith("Map<"):
|
|
900
|
+
union_variable_name = flatten_type_name(union_type.type_name)
|
|
901
|
+
elif union_type.type_name.startswith("List<"):
|
|
902
|
+
union_variable_name = flatten_type_name(union_type.type_name)
|
|
903
|
+
elif union_type.type_name == "byte[]":
|
|
904
|
+
union_variable_name = "Bytes"
|
|
905
|
+
else:
|
|
906
|
+
union_variable_name = union_type.type_name.rsplit('.', 1)[-1]
|
|
907
|
+
|
|
908
|
+
field_name = f"_{camel(union_variable_name)}"
|
|
909
|
+
|
|
910
|
+
# Check if this field is set in this object
|
|
911
|
+
if i == 0:
|
|
912
|
+
equals_method += f"{INDENT * 2}if (this.{field_name} != null) {{\n"
|
|
913
|
+
else:
|
|
914
|
+
equals_method += f"{INDENT * 2}else if (this.{field_name} != null) {{\n"
|
|
915
|
+
|
|
916
|
+
# If set, check if it's also set in the other object with the same value
|
|
917
|
+
if union_type.type_name == 'byte[]':
|
|
918
|
+
equals_method += f"{INDENT * 3}return java.util.Arrays.equals(this.{field_name}, other.{field_name});\n"
|
|
919
|
+
else:
|
|
920
|
+
equals_method += f"{INDENT * 3}return this.{field_name}.equals(other.{field_name});\n"
|
|
921
|
+
|
|
922
|
+
equals_method += f"{INDENT * 2}}}\n"
|
|
923
|
+
|
|
924
|
+
# If no field is set in this, check other is also unset
|
|
925
|
+
equals_method += f"{INDENT * 2}// Both are null/unset - check other is also unset\n"
|
|
926
|
+
equals_method += f"{INDENT * 2}return "
|
|
927
|
+
for i, union_type in enumerate(union_types):
|
|
928
|
+
# we need the nullable version (wrapper) of all primitive types
|
|
929
|
+
if self.is_java_primitive(union_type):
|
|
930
|
+
union_type = self.map_primitive_to_java(union_type.type_name, True)
|
|
931
|
+
|
|
932
|
+
union_variable_name = union_type.type_name
|
|
933
|
+
if union_type.type_name.startswith("Map<"):
|
|
934
|
+
union_variable_name = flatten_type_name(union_type.type_name)
|
|
935
|
+
elif union_type.type_name.startswith("List<"):
|
|
936
|
+
union_variable_name = flatten_type_name(union_type.type_name)
|
|
937
|
+
elif union_type.type_name == "byte[]":
|
|
938
|
+
union_variable_name = "Bytes"
|
|
939
|
+
else:
|
|
940
|
+
union_variable_name = union_type.type_name.rsplit('.', 1)[-1]
|
|
941
|
+
field_name = f"_{camel(union_variable_name)}"
|
|
942
|
+
if i > 0:
|
|
943
|
+
equals_method += " && "
|
|
944
|
+
equals_method += f"other.{field_name} == null"
|
|
945
|
+
equals_method += ";\n"
|
|
946
|
+
equals_method += f"{INDENT}}}\n"
|
|
947
|
+
return equals_method
|
|
948
|
+
|
|
949
|
+
def generate_union_hashcode_method(self, union_class_name: str, union_types: List['AvroToJava.JavaType']) -> str:
|
|
950
|
+
""" Generates the hashCode method for a union class """
|
|
951
|
+
hashcode_method = f"\n{INDENT}@Override\n{INDENT}public int hashCode() {{\n"
|
|
952
|
+
|
|
953
|
+
# In a union, only ONE field should be set at a time
|
|
954
|
+
# Return the hash of whichever field is set
|
|
955
|
+
for i, union_type in enumerate(union_types):
|
|
956
|
+
# we need the nullable version (wrapper) of all primitive types
|
|
957
|
+
if self.is_java_primitive(union_type):
|
|
958
|
+
union_type = self.map_primitive_to_java(union_type.type_name, True)
|
|
959
|
+
|
|
960
|
+
union_variable_name = union_type.type_name
|
|
961
|
+
if union_type.type_name.startswith("Map<"):
|
|
962
|
+
union_variable_name = flatten_type_name(union_type.type_name)
|
|
963
|
+
elif union_type.type_name.startswith("List<"):
|
|
964
|
+
union_variable_name = flatten_type_name(union_type.type_name)
|
|
965
|
+
elif union_type.type_name == "byte[]":
|
|
966
|
+
union_variable_name = "Bytes"
|
|
967
|
+
else:
|
|
968
|
+
union_variable_name = union_type.type_name.rsplit('.', 1)[-1]
|
|
969
|
+
|
|
970
|
+
field_name = f"_{camel(union_variable_name)}"
|
|
971
|
+
|
|
972
|
+
# Return hash of whichever field is set
|
|
973
|
+
if i == 0:
|
|
974
|
+
hashcode_method += f"{INDENT * 2}if (this.{field_name} != null) {{\n"
|
|
975
|
+
else:
|
|
976
|
+
hashcode_method += f"{INDENT * 2}else if (this.{field_name} != null) {{\n"
|
|
977
|
+
|
|
978
|
+
# Use proper hash calculation based on type
|
|
979
|
+
if union_type.type_name == 'byte[]':
|
|
980
|
+
hashcode_method += f"{INDENT * 3}return java.util.Arrays.hashCode(this.{field_name});\n"
|
|
981
|
+
else:
|
|
982
|
+
hashcode_method += f"{INDENT * 3}return this.{field_name}.hashCode();\n"
|
|
983
|
+
|
|
984
|
+
hashcode_method += f"{INDENT * 2}}}\n"
|
|
985
|
+
|
|
986
|
+
# If no field is set, return 0
|
|
987
|
+
hashcode_method += f"{INDENT * 2}return 0;\n"
|
|
988
|
+
hashcode_method += f"{INDENT}}}\n"
|
|
989
|
+
return hashcode_method
|
|
990
|
+
|
|
643
991
|
def generate_avro_get_method(self, class_name: str, fields: List[Dict], parent_package: str) -> str:
|
|
644
992
|
""" Generates the get method for SpecificRecord """
|
|
645
993
|
get_method = f"\n{INDENT}@Override\n{INDENT}public Object get(int field$) {{\n"
|
|
@@ -648,9 +996,37 @@ class AvroToJava:
|
|
|
648
996
|
field_name = pascal(field['name']) if self.pascal_properties else field['name']
|
|
649
997
|
field_name = self.safe_identifier(field_name, class_name)
|
|
650
998
|
field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
|
|
651
|
-
|
|
652
|
-
|
|
999
|
+
|
|
1000
|
+
# Check if field type is a union
|
|
1001
|
+
is_union = field_type.type_name in self.generated_types_avro_namespace and self.generated_types_avro_namespace[field_type.type_name] == "union"
|
|
1002
|
+
is_union = is_union or (field_type.type_name in self.generated_types_java_package and self.generated_types_java_package[field_type.type_name] == "union")
|
|
1003
|
+
# Also check if it's an Object with union_types (non-Jackson union)
|
|
1004
|
+
is_union = is_union or (field_type.type_name == "Object" and field_type.union_types is not None and len(field_type.union_types) > 1)
|
|
1005
|
+
|
|
1006
|
+
# Check if field is List<Union> or Map<String, Union>
|
|
1007
|
+
is_list_of_unions = field_type.type_name.startswith("List<") and field_type.union_types and len(field_type.union_types) > 0
|
|
1008
|
+
is_map_of_unions = field_type.type_name.startswith("Map<") and field_type.union_types and len(field_type.union_types) > 0
|
|
1009
|
+
|
|
1010
|
+
# For union fields, return the unwrapped object using toObject()
|
|
1011
|
+
# This allows Avro's SpecificDatumWriter to serialize the actual value (String, Integer, etc.)
|
|
1012
|
+
# instead of trying to serialize our custom wrapper class
|
|
1013
|
+
# The put() method will wrap it back using new UnionType(value$)
|
|
1014
|
+
if is_union:
|
|
1015
|
+
get_method += f"{INDENT * 3}case {index}: return this.{field_name} != null ? this.{field_name}.toObject() : null;\n"
|
|
1016
|
+
elif is_list_of_unions:
|
|
1017
|
+
# For List<Union>, unwrap each element by calling toObject() on it
|
|
1018
|
+
# Avro will deserialize this as List<Object> which put() will rewrap
|
|
1019
|
+
get_method += f"{INDENT * 3}case {index}: return this.{field_name} != null ? this.{field_name}.stream().map(u -> u != null ? u.toObject() : null).collect(java.util.stream.Collectors.toList()) : null;\n"
|
|
1020
|
+
elif is_map_of_unions:
|
|
1021
|
+
# For Map<String, Union>, unwrap each value by calling toObject() on it
|
|
1022
|
+
get_method += f"{INDENT * 3}case {index}: return this.{field_name} != null ? this.{field_name}.entrySet().stream().collect(java.util.stream.Collectors.toMap(java.util.Map.Entry::getKey, e -> e.getValue() != null ? e.getValue().toObject() : null)) : null;\n"
|
|
1023
|
+
elif field_type.is_enum:
|
|
1024
|
+
# For enum fields, convert to GenericEnumSymbol for Avro serialization
|
|
1025
|
+
# This allows SpecificDatumWriter to serialize enums inside unions correctly
|
|
1026
|
+
get_method += f"{INDENT * 3}case {index}: return this.{field_name} != null ? new GenericData.EnumSymbol({field_type.type_name}.SCHEMA, this.{field_name}.name()) : null;\n"
|
|
653
1027
|
else:
|
|
1028
|
+
# For all other field types, return the field as-is
|
|
1029
|
+
# Avro's SpecificDatumWriter will handle serialization internally
|
|
654
1030
|
get_method += f"{INDENT * 3}case {index}: return this.{field_name};\n"
|
|
655
1031
|
get_method += f"{INDENT * 3}default: throw new AvroRuntimeException(\"Bad index: \" + field$);\n"
|
|
656
1032
|
get_method += f"{INDENT * 2}}}\n{INDENT}}}\n"
|
|
@@ -662,16 +1038,158 @@ class AvroToJava:
|
|
|
662
1038
|
put_method = f"\n{INDENT}@Override\n{INDENT}public void put(int field$, Object value$) {{\n"
|
|
663
1039
|
put_method += f"{INDENT * 2}switch (field$) {{\n"
|
|
664
1040
|
for index, field in enumerate(fields):
|
|
1041
|
+
# Skip const fields as they are final and cannot be reassigned
|
|
1042
|
+
if "const" in field:
|
|
1043
|
+
put_method += f"{INDENT * 3}case {index}: break; // const field, cannot be set\n"
|
|
1044
|
+
continue
|
|
1045
|
+
|
|
665
1046
|
field_name = pascal(field['name']) if self.pascal_properties else field['name']
|
|
666
1047
|
field_name = self.safe_identifier(field_name, class_name)
|
|
667
1048
|
field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], parent_package)
|
|
668
1049
|
if field_type.type_name.startswith("List<") or field_type.type_name.startswith("Map<"):
|
|
669
1050
|
suppress_unchecked = True
|
|
670
|
-
|
|
671
|
-
|
|
1051
|
+
|
|
1052
|
+
# Check if the field type is a generated type (union, class, or enum)
|
|
1053
|
+
type_kind = None
|
|
1054
|
+
if field_type.type_name in self.generated_types_avro_namespace:
|
|
1055
|
+
type_kind = self.generated_types_avro_namespace[field_type.type_name]
|
|
1056
|
+
elif field_type.type_name in self.generated_types_java_package:
|
|
1057
|
+
type_kind = self.generated_types_java_package[field_type.type_name]
|
|
1058
|
+
|
|
1059
|
+
# Check if this is List<Union> or Map<String, Union>
|
|
1060
|
+
is_list_of_unions = field_type.type_name.startswith("List<") and field_type.union_types and len(field_type.union_types) > 0
|
|
1061
|
+
is_map_of_unions = field_type.type_name.startswith("Map<") and field_type.union_types and len(field_type.union_types) > 0
|
|
1062
|
+
|
|
1063
|
+
if is_list_of_unions:
|
|
1064
|
+
# Extract the union type name from List<UnionType>
|
|
1065
|
+
union_type_match = field_type.type_name[5:-1] # Remove "List<" and ">"
|
|
1066
|
+
# For List<Union>, handle both wrapped List<UnionWrapper> and unwrapped List<Object>
|
|
1067
|
+
# Avro deserialization provides List<Object>, so we need to wrap each element
|
|
1068
|
+
put_method += f"{INDENT * 3}case {index}: {{\n"
|
|
1069
|
+
put_method += f"{INDENT * 4}if (value$ instanceof List<?>) {{\n"
|
|
1070
|
+
put_method += f"{INDENT * 5}List<?> list = (List<?>)value$;\n"
|
|
1071
|
+
put_method += f"{INDENT * 5}if (list.isEmpty() || !(list.get(0) instanceof {union_type_match})) {{\n"
|
|
1072
|
+
put_method += f"{INDENT * 6}// Unwrapped from Avro - need to wrap, handling nulls\n"
|
|
1073
|
+
put_method += f"{INDENT * 6}this.{field_name} = list.stream().map(v -> v != null ? new {union_type_match}(v) : null).collect(java.util.stream.Collectors.toList());\n"
|
|
1074
|
+
put_method += f"{INDENT * 5}}} else {{\n"
|
|
1075
|
+
put_method += f"{INDENT * 6}// Already wrapped\n"
|
|
1076
|
+
put_method += f"{INDENT * 6}this.{field_name} = ({field_type.type_name})value$;\n"
|
|
1077
|
+
put_method += f"{INDENT * 5}}}\n"
|
|
1078
|
+
put_method += f"{INDENT * 4}}}\n"
|
|
1079
|
+
put_method += f"{INDENT * 4}break;\n"
|
|
1080
|
+
put_method += f"{INDENT * 3}}}\n"
|
|
1081
|
+
elif is_map_of_unions:
|
|
1082
|
+
# Extract the union type name from Map<String, UnionType>
|
|
1083
|
+
union_type_match = field_type.type_name.split(",")[1].strip()[:-1] # Remove "Map<String, " and ">"
|
|
1084
|
+
put_method += f"{INDENT * 3}case {index}: {{\n"
|
|
1085
|
+
put_method += f"{INDENT * 4}if (value$ instanceof Map<?,?>) {{\n"
|
|
1086
|
+
put_method += f"{INDENT * 5}Map<?,?> map = (Map<?,?>)value$;\n"
|
|
1087
|
+
put_method += f"{INDENT * 5}if (map.isEmpty() || !(map.values().iterator().next() instanceof {union_type_match})) {{\n"
|
|
1088
|
+
put_method += f"{INDENT * 6}// Unwrapped from Avro - need to wrap, handling nulls\n"
|
|
1089
|
+
put_method += f"{INDENT * 6}this.{field_name} = map.entrySet().stream().collect(java.util.stream.Collectors.toMap(e -> (String)e.getKey(), e -> e.getValue() != null ? new {union_type_match}(e.getValue()) : null));\n"
|
|
1090
|
+
put_method += f"{INDENT * 5}}} else {{\n"
|
|
1091
|
+
put_method += f"{INDENT * 6}// Already wrapped\n"
|
|
1092
|
+
put_method += f"{INDENT * 6}this.{field_name} = ({field_type.type_name})value$;\n"
|
|
1093
|
+
put_method += f"{INDENT * 5}}}\n"
|
|
1094
|
+
put_method += f"{INDENT * 4}}}\n"
|
|
1095
|
+
put_method += f"{INDENT * 4}break;\n"
|
|
1096
|
+
put_method += f"{INDENT * 3}}}\n"
|
|
1097
|
+
elif type_kind == "union":
|
|
1098
|
+
# Unions can contain primitives or records - use the appropriate constructor
|
|
1099
|
+
# If Avro passes a GenericData.Record, use the GenericData.Record constructor
|
|
1100
|
+
# Otherwise use the Object constructor for already-constructed types
|
|
1101
|
+
put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof GenericData.Record ? new {field_type.type_name}((GenericData.Record)value$) : new {field_type.type_name}(value$); break;\n"
|
|
1102
|
+
elif type_kind == "class":
|
|
1103
|
+
# Record types need to be converted from GenericData.Record if that's what Avro passes
|
|
1104
|
+
put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof GenericData.Record ? new {field_type.type_name}((GenericData.Record)value$) : ({field_type.type_name})value$; break;\n"
|
|
1105
|
+
elif type_kind == "enum":
|
|
1106
|
+
# Enums need to be converted from GenericData.EnumSymbol
|
|
1107
|
+
put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof GenericData.EnumSymbol ? {field_type.type_name}.valueOf(value$.toString()) : ({field_type.type_name})value$; break;\n"
|
|
672
1108
|
else:
|
|
673
|
-
if
|
|
674
|
-
|
|
1109
|
+
# Check if this is a List<RecordType> or Map<String,RecordType>
|
|
1110
|
+
is_list_of_records = False
|
|
1111
|
+
is_map_of_records = False
|
|
1112
|
+
if field_type.type_name.startswith("List<"):
|
|
1113
|
+
item_type = field_type.type_name[5:-1]
|
|
1114
|
+
if item_type in self.generated_types_java_package and self.generated_types_java_package[item_type] == "class":
|
|
1115
|
+
is_list_of_records = True
|
|
1116
|
+
elif field_type.type_name.startswith("Map<"):
|
|
1117
|
+
# Extract value type from Map<String, ValueType>
|
|
1118
|
+
value_type = field_type.type_name.split(",")[1].strip()[:-1]
|
|
1119
|
+
if value_type in self.generated_types_java_package and self.generated_types_java_package[value_type] == "class":
|
|
1120
|
+
is_map_of_records = True
|
|
1121
|
+
|
|
1122
|
+
if is_list_of_records:
|
|
1123
|
+
item_type = field_type.type_name[5:-1]
|
|
1124
|
+
put_method += f"{INDENT * 3}case {index}: {{\n"
|
|
1125
|
+
put_method += f"{INDENT * 4}if (value$ instanceof List<?>) {{\n"
|
|
1126
|
+
put_method += f"{INDENT * 5}List<?> list = (List<?>)value$;\n"
|
|
1127
|
+
put_method += f"{INDENT * 5}if (list.isEmpty() || !(list.get(0) instanceof {item_type})) {{\n"
|
|
1128
|
+
put_method += f"{INDENT * 6}// Unwrapped from Avro - need to wrap GenericData.Record objects\n"
|
|
1129
|
+
put_method += f"{INDENT * 6}this.{field_name} = list.stream().map(item -> item instanceof GenericData.Record ? new {item_type}((GenericData.Record)item) : ({item_type})item).collect(java.util.stream.Collectors.toList());\n"
|
|
1130
|
+
put_method += f"{INDENT * 5}}} else {{\n"
|
|
1131
|
+
put_method += f"{INDENT * 6}// Already wrapped\n"
|
|
1132
|
+
put_method += f"{INDENT * 6}this.{field_name} = ({field_type.type_name})value$;\n"
|
|
1133
|
+
put_method += f"{INDENT * 5}}}\n"
|
|
1134
|
+
put_method += f"{INDENT * 4}}} else {{\n"
|
|
1135
|
+
put_method += f"{INDENT * 5}// Handle null or other types\n"
|
|
1136
|
+
put_method += f"{INDENT * 5}this.{field_name} = value$ != null ? ({field_type.type_name})value$ : null;\n"
|
|
1137
|
+
put_method += f"{INDENT * 4}}}\n"
|
|
1138
|
+
put_method += f"{INDENT * 4}break;\n"
|
|
1139
|
+
put_method += f"{INDENT * 3}}}\n"
|
|
1140
|
+
elif is_map_of_records:
|
|
1141
|
+
value_type = field_type.type_name.split(",")[1].strip()[:-1]
|
|
1142
|
+
put_method += f"{INDENT * 3}case {index}: {{\n"
|
|
1143
|
+
put_method += f"{INDENT * 4}if (value$ instanceof Map<?,?>) {{\n"
|
|
1144
|
+
put_method += f"{INDENT * 5}Map<?,?> map = (Map<?,?>)value$;\n"
|
|
1145
|
+
put_method += f"{INDENT * 5}if (map.isEmpty() || !(map.values().iterator().next() instanceof {value_type})) {{\n"
|
|
1146
|
+
put_method += f"{INDENT * 6}// Unwrapped from Avro - need to wrap GenericData.Record objects\n"
|
|
1147
|
+
put_method += f"{INDENT * 6}this.{field_name} = map.entrySet().stream().collect(java.util.stream.Collectors.toMap(e -> (String)e.getKey(), e -> e.getValue() instanceof GenericData.Record ? new {value_type}((GenericData.Record)e.getValue()) : ({value_type})e.getValue()));\n"
|
|
1148
|
+
put_method += f"{INDENT * 5}}} else {{\n"
|
|
1149
|
+
put_method += f"{INDENT * 6}// Already wrapped\n"
|
|
1150
|
+
put_method += f"{INDENT * 6}this.{field_name} = ({field_type.type_name})value$;\n"
|
|
1151
|
+
put_method += f"{INDENT * 5}}}\n"
|
|
1152
|
+
put_method += f"{INDENT * 4}}} else {{\n"
|
|
1153
|
+
put_method += f"{INDENT * 5}// Handle null or other types\n"
|
|
1154
|
+
put_method += f"{INDENT * 5}this.{field_name} = value$ != null ? ({field_type.type_name})value$ : null;\n"
|
|
1155
|
+
put_method += f"{INDENT * 4}}}\n"
|
|
1156
|
+
put_method += f"{INDENT * 4}break;\n"
|
|
1157
|
+
put_method += f"{INDENT * 3}}}\n"
|
|
1158
|
+
elif field_type.type_name == 'String':
|
|
1159
|
+
# Handle null values for String fields
|
|
1160
|
+
put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ != null ? value$.toString() : null; break;\n"
|
|
1161
|
+
elif field_type.type_name.startswith("List<"):
|
|
1162
|
+
# Extract the element type
|
|
1163
|
+
element_type = field_type.type_name[5:-1]
|
|
1164
|
+
# Check if it's a List of enums
|
|
1165
|
+
if element_type in self.generated_types_java_package and self.generated_types_java_package[element_type] == "enum":
|
|
1166
|
+
# For List<Enum>, convert GenericEnumSymbol to actual enum values
|
|
1167
|
+
put_method += f"{INDENT * 3}case {index}: {{\n"
|
|
1168
|
+
put_method += f"{INDENT * 4}if (value$ instanceof List<?>) {{\n"
|
|
1169
|
+
put_method += f"{INDENT * 5}List<?> list = (List<?>)value$;\n"
|
|
1170
|
+
put_method += f"{INDENT * 5}this.{field_name} = list.stream().map(item -> item instanceof GenericData.EnumSymbol ? {element_type}.valueOf(item.toString()) : ({element_type})item).collect(java.util.stream.Collectors.toList());\n"
|
|
1171
|
+
put_method += f"{INDENT * 4}}} else {{\n"
|
|
1172
|
+
put_method += f"{INDENT * 5}this.{field_name} = null;\n"
|
|
1173
|
+
put_method += f"{INDENT * 4}}}\n"
|
|
1174
|
+
put_method += f"{INDENT * 4}break;\n"
|
|
1175
|
+
put_method += f"{INDENT * 3}}}\n"
|
|
1176
|
+
elif element_type == "String":
|
|
1177
|
+
# For List<String>, convert Utf8 to String
|
|
1178
|
+
put_method += f"{INDENT * 3}case {index}: {{\n"
|
|
1179
|
+
put_method += f"{INDENT * 4}if (value$ instanceof List<?>) {{\n"
|
|
1180
|
+
put_method += f"{INDENT * 5}List<?> list = (List<?>)value$;\n"
|
|
1181
|
+
put_method += f"{INDENT * 5}this.{field_name} = list.stream().map(item -> item != null ? item.toString() : null).collect(java.util.stream.Collectors.toList());\n"
|
|
1182
|
+
put_method += f"{INDENT * 4}}} else {{\n"
|
|
1183
|
+
put_method += f"{INDENT * 5}this.{field_name} = null;\n"
|
|
1184
|
+
put_method += f"{INDENT * 4}}}\n"
|
|
1185
|
+
put_method += f"{INDENT * 4}break;\n"
|
|
1186
|
+
put_method += f"{INDENT * 3}}}\n"
|
|
1187
|
+
else:
|
|
1188
|
+
# For other List types, create a defensive copy
|
|
1189
|
+
put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof List<?> ? new java.util.ArrayList<>(({field_type.type_name})value$) : null; break;\n"
|
|
1190
|
+
elif field_type.type_name.startswith("Map<"):
|
|
1191
|
+
# For any Map type, create a defensive copy to avoid sharing references
|
|
1192
|
+
put_method += f"{INDENT * 3}case {index}: this.{field_name} = value$ instanceof Map<?,?> ? new java.util.HashMap<>(({field_type.type_name})value$) : null; break;\n"
|
|
675
1193
|
else:
|
|
676
1194
|
put_method += f"{INDENT * 3}case {index}: this.{field_name} = ({field_type.type_name})value$; break;\n"
|
|
677
1195
|
put_method += f"{INDENT * 3}default: throw new AvroRuntimeException(\"Bad index: \" + field$);\n"
|
|
@@ -690,11 +1208,47 @@ class AvroToJava:
|
|
|
690
1208
|
enum_name = self.safe_identifier(avro_schema['name'])
|
|
691
1209
|
type_name = self.qualified_name(package.replace('/', '.'), enum_name)
|
|
692
1210
|
self.generated_types_avro_namespace[self.qualified_name(avro_schema.get('namespace', parent_package),avro_schema['name'])] = "enum"
|
|
693
|
-
self.generated_types_java_package[type_name] = "enum"
|
|
1211
|
+
self.generated_types_java_package[type_name] = "enum"
|
|
1212
|
+
self.generated_avro_schemas[type_name] = avro_schema
|
|
694
1213
|
symbols = avro_schema.get('symbols', [])
|
|
695
|
-
|
|
1214
|
+
# Convert symbols to valid Java identifiers, preserving case
|
|
1215
|
+
# Replace invalid chars, prepend _ if starts with digit or is a reserved word
|
|
1216
|
+
java_symbols = []
|
|
1217
|
+
for symbol in symbols:
|
|
1218
|
+
java_symbol = symbol.replace('-', '_').replace('.', '_')
|
|
1219
|
+
if java_symbol and java_symbol[0].isdigit():
|
|
1220
|
+
java_symbol = '_' + java_symbol
|
|
1221
|
+
# Check if the symbol is a Java reserved word and prefix with underscore
|
|
1222
|
+
if is_java_reserved_word(java_symbol):
|
|
1223
|
+
java_symbol = '_' + java_symbol
|
|
1224
|
+
java_symbols.append(java_symbol)
|
|
1225
|
+
symbols_str = ', '.join(java_symbols)
|
|
696
1226
|
enum_definition += f"public enum {enum_name} {{\n"
|
|
697
|
-
enum_definition += f"{INDENT}{symbols_str}
|
|
1227
|
+
enum_definition += f"{INDENT}{symbols_str}"
|
|
1228
|
+
|
|
1229
|
+
# Add Avro schema if annotations are enabled
|
|
1230
|
+
if self.avro_annotation:
|
|
1231
|
+
# Create inline schema for the enum
|
|
1232
|
+
enum_schema = {
|
|
1233
|
+
"type": "enum",
|
|
1234
|
+
"name": enum_name,
|
|
1235
|
+
"symbols": symbols
|
|
1236
|
+
}
|
|
1237
|
+
if 'namespace' in avro_schema:
|
|
1238
|
+
enum_schema['namespace'] = avro_schema['namespace']
|
|
1239
|
+
if 'doc' in avro_schema:
|
|
1240
|
+
enum_schema['doc'] = avro_schema['doc']
|
|
1241
|
+
|
|
1242
|
+
enum_schema_json = json.dumps(enum_schema)
|
|
1243
|
+
enum_schema_json = enum_schema_json.replace('"', '§')
|
|
1244
|
+
enum_schema_json = f"\"+\n{INDENT}\"".join(
|
|
1245
|
+
[enum_schema_json[i:i+80] for i in range(0, len(enum_schema_json), 80)])
|
|
1246
|
+
enum_schema_json = enum_schema_json.replace('§', '\\"')
|
|
1247
|
+
|
|
1248
|
+
enum_definition += f";\n\n{INDENT}public static final Schema SCHEMA = new Schema.Parser().parse(\n{INDENT}\"{enum_schema_json}\");\n"
|
|
1249
|
+
else:
|
|
1250
|
+
enum_definition += f";\n"
|
|
1251
|
+
|
|
698
1252
|
enum_definition += "}\n"
|
|
699
1253
|
if write_file:
|
|
700
1254
|
self.write_to_file(package, enum_name, enum_definition)
|
|
@@ -740,11 +1294,16 @@ class AvroToJava:
|
|
|
740
1294
|
f"{INDENT*1}private {union_type.type_name} _{camel(union_variable_name)};\n" + \
|
|
741
1295
|
f"{INDENT*1}public {union_type.type_name} get{union_variable_name}() {{ return _{camel(union_variable_name)}; }}\n";
|
|
742
1296
|
|
|
743
|
-
|
|
1297
|
+
# For toObject(), wrap enums in GenericData.EnumSymbol so Avro can serialize them
|
|
1298
|
+
if union_type.is_enum:
|
|
1299
|
+
class_definition_toobject += f"{INDENT*2}if (_{camel(union_variable_name)} != null) {{\n{INDENT*3}return new GenericData.EnumSymbol({union_type.type_name}.SCHEMA, _{camel(union_variable_name)}.name());\n{INDENT*2}}}\n"
|
|
1300
|
+
else:
|
|
1301
|
+
class_definition_toobject += f"{INDENT*2}if (_{camel(union_variable_name)} != null) {{\n{INDENT*3}return _{camel(union_variable_name)};\n{INDENT*2}}}\n"
|
|
744
1302
|
|
|
1303
|
+
# GenericData.Record constructor only handles record types - primitives come through fromObject
|
|
745
1304
|
if self.avro_annotation and union_type.is_class:
|
|
746
|
-
class_definition_genericrecordctor += f"{INDENT*2}if (
|
|
747
|
-
class_definition_genericrecordctor += f"
|
|
1305
|
+
class_definition_genericrecordctor += f"{INDENT*2}if (record.getSchema().getFullName().equals({union_type.type_name}.AVROSCHEMA.getFullName())) {{\n"
|
|
1306
|
+
class_definition_genericrecordctor += f"{INDENT*3}this._{camel(union_variable_name)} = new {union_type.type_name}(record);\n{INDENT*3}return;\n{INDENT*2}}}\n"
|
|
748
1307
|
|
|
749
1308
|
# there can only be one list and one map in the union, so we don't need to differentiate this any further
|
|
750
1309
|
if is_list:
|
|
@@ -752,9 +1311,27 @@ class AvroToJava:
|
|
|
752
1311
|
elif is_dict:
|
|
753
1312
|
class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof Map<?,?>) {{\n{INDENT*3}this._{camel(union_variable_name)} = ({union_type.type_name})obj;\n{INDENT*3}return;\n{INDENT*2}}}\n"
|
|
754
1313
|
else:
|
|
1314
|
+
# For class types, check for GenericData.Record first (Avro deserialization), then typed instance
|
|
1315
|
+
if self.avro_annotation and union_type.is_class:
|
|
1316
|
+
class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof GenericData.Record) {{\n"
|
|
1317
|
+
class_definition_fromobjectctor += f"{INDENT*3}GenericData.Record record = (GenericData.Record)obj;\n"
|
|
1318
|
+
# Use getFullName() for robust schema comparison instead of separate name + namespace
|
|
1319
|
+
class_definition_fromobjectctor += f"{INDENT*3}String recordFullName = record.getSchema().getFullName();\n"
|
|
1320
|
+
class_definition_fromobjectctor += f"{INDENT*3}String expectedFullName = {union_type.type_name}.AVROSCHEMA.getFullName();\n"
|
|
1321
|
+
class_definition_fromobjectctor += f"{INDENT*3}if (recordFullName.equals(expectedFullName)) {{\n"
|
|
1322
|
+
class_definition_fromobjectctor += f"{INDENT*4}this._{camel(union_variable_name)} = new {union_type.type_name}(record);\n{INDENT*4}return;\n{INDENT*3}}}\n{INDENT*2}}}\n"
|
|
1323
|
+
|
|
1324
|
+
# Handle Avro's Utf8 type for String
|
|
1325
|
+
if self.avro_annotation and union_type.type_name == "String":
|
|
1326
|
+
class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof org.apache.avro.util.Utf8) {{\n{INDENT*3}this._{camel(union_variable_name)} = obj.toString();\n{INDENT*3}return;\n{INDENT*2}}}\n"
|
|
1327
|
+
|
|
1328
|
+
# Handle Avro's GenericEnumSymbol for enum types
|
|
1329
|
+
if self.avro_annotation and union_type.is_enum:
|
|
1330
|
+
class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof GenericData.EnumSymbol) {{\n{INDENT*3}this._{camel(union_variable_name)} = {union_type.type_name}.valueOf(obj.toString());\n{INDENT*3}return;\n{INDENT*2}}}\n"
|
|
1331
|
+
|
|
755
1332
|
class_definition_fromobjectctor += f"{INDENT*2}if (obj instanceof {union_type.type_name}) {{\n{INDENT*3}this._{camel(union_variable_name)} = ({union_type.type_name})obj;\n{INDENT*3}return;\n{INDENT*2}}}\n"
|
|
756
1333
|
|
|
757
|
-
# Read method logic
|
|
1334
|
+
# Read method logic - test types in order using duck typing (like C# implementation)
|
|
758
1335
|
if is_dict:
|
|
759
1336
|
class_definition_read += f"{INDENT*3}if (node.isObject()) {{\n{INDENT*4}{union_type.type_name} map = mapper.readValue(node.toString(), new TypeReference<{union_type.type_name}>(){{}});\n{INDENT*3}return new {union_class_name}(map);\n{INDENT*3}}}\n"
|
|
760
1337
|
elif is_list:
|
|
@@ -764,7 +1341,7 @@ class AvroToJava:
|
|
|
764
1341
|
class_definition_read += f"{INDENT*3}if (node.isTextual()) {{\n{INDENT*4}return new {union_class_name}(node.asText());\n{INDENT*3}}}\n"
|
|
765
1342
|
elif union_type.type_name == "byte[]":
|
|
766
1343
|
class_definition_read += f"{INDENT*3}if (node.isBinary()) {{\n{INDENT*4}return new {union_class_name}(node.binaryValue());\n{INDENT*3}}}\n"
|
|
767
|
-
elif union_type.type_name in ["int", "Int"]:
|
|
1344
|
+
elif union_type.type_name in ["int", "Int", "Integer"]:
|
|
768
1345
|
class_definition_read += f"{INDENT*3}if (node.canConvertToInt()) {{\n{INDENT*4}return new {union_class_name}(node.asInt());\n{INDENT*3}}}\n"
|
|
769
1346
|
elif union_type.type_name in ["long", "Long"]:
|
|
770
1347
|
class_definition_read += f"{INDENT*3}if (node.canConvertToLong()) {{\n{INDENT*4}return new {union_class_name}(node.asLong());\n{INDENT*3}}}\n"
|
|
@@ -777,10 +1354,12 @@ class AvroToJava:
|
|
|
777
1354
|
elif union_type.type_name in ["boolean", "Boolean"]:
|
|
778
1355
|
class_definition_read += f"{INDENT*3}if (node.isBoolean()) {{\n{INDENT*4}return new {union_class_name}(node.asBoolean());\n{INDENT*3}}}\n"
|
|
779
1356
|
else:
|
|
1357
|
+
# For classes and enums, use duck typing with isJsonMatch() (C# pattern)
|
|
780
1358
|
if union_type.is_enum:
|
|
781
1359
|
class_definition_read += f"{INDENT*3}if (node.isTextual()) {{\n{INDENT*4}return new {union_class_name}(Enum.valueOf({union_type.type_name}.class, node.asText()));\n{INDENT*3}}}\n"
|
|
782
|
-
|
|
783
|
-
|
|
1360
|
+
elif union_type.is_class:
|
|
1361
|
+
# Use isJsonMatch() to test if this type matches, then use fromData() to deserialize
|
|
1362
|
+
class_definition_read += f"{INDENT*3}if ({union_type.type_name}.isJsonMatch(node)) {{\n{INDENT*4}return new {union_class_name}({union_type.type_name}.fromData(node, \"application/json\"));\n{INDENT*3}}}\n"
|
|
784
1363
|
|
|
785
1364
|
# Write method logic
|
|
786
1365
|
class_definition_write += f"{INDENT*3}{union_type.type_name} {camel(union_variable_name)}Value = value.get{union_variable_name}();\n{INDENT*3}if ({camel(union_variable_name)}Value != null) {{\n{INDENT*4}generator.writeObject({camel(union_variable_name)}Value);\n{INDENT*4}return;\n{INDENT*3}}}\n"
|
|
@@ -801,6 +1380,9 @@ class AvroToJava:
|
|
|
801
1380
|
class_definition += f"{INDENT*2}throw new UnsupportedOperationException(\"No record type is set in the union\");\n"
|
|
802
1381
|
class_definition += f"{INDENT}}}\n"
|
|
803
1382
|
class_definition += f"\n{INDENT}public {union_class_name}(Object obj) {{\n"
|
|
1383
|
+
class_definition += f"{INDENT*2}if (obj == null) {{\n"
|
|
1384
|
+
class_definition += f"{INDENT*3}return; // null is valid for unions with null type\n"
|
|
1385
|
+
class_definition += f"{INDENT*2}}}\n"
|
|
804
1386
|
class_definition += class_definition_fromobjectctor
|
|
805
1387
|
class_definition += f"{INDENT*2}throw new UnsupportedOperationException(\"No record type is set in the union\");\n"
|
|
806
1388
|
class_definition += f"{INDENT}}}\n"
|
|
@@ -825,12 +1407,25 @@ class AvroToJava:
|
|
|
825
1407
|
class_definition += f"{INDENT*2}}}\n{INDENT}}}\n"
|
|
826
1408
|
class_definition += f"\n{INDENT*1}public static boolean isJsonMatch(JsonNode node) {{\n"
|
|
827
1409
|
class_definition += f"{INDENT*2}return " + " || ".join(list_is_json_match) + ";\n"
|
|
828
|
-
class_definition += f"{INDENT*1}}}\n
|
|
1410
|
+
class_definition += f"{INDENT*1}}}\n"
|
|
1411
|
+
|
|
1412
|
+
# Add equals method for union class
|
|
1413
|
+
class_definition += self.generate_union_equals_method(union_class_name, union_types)
|
|
1414
|
+
|
|
1415
|
+
# Add hashCode method for union class
|
|
1416
|
+
class_definition += self.generate_union_hashcode_method(union_class_name, union_types)
|
|
1417
|
+
class_definition += "}\n"
|
|
829
1418
|
|
|
830
1419
|
if write_file:
|
|
831
1420
|
self.write_to_file(package, union_class_name, class_definition)
|
|
1421
|
+
# Calculate qualified name for the union
|
|
1422
|
+
qualified_union_name = self.qualified_name(package.replace('/', '.'), union_class_name)
|
|
832
1423
|
self.generated_types_avro_namespace[union_class_name] = "union" # Track union types
|
|
833
|
-
self.generated_types_java_package[union_class_name] = "union" # Track union types
|
|
1424
|
+
self.generated_types_java_package[union_class_name] = "union" # Track union types with simple name
|
|
1425
|
+
self.generated_types_java_package[qualified_union_name] = "union" # Also track with qualified name
|
|
1426
|
+
# Store the union schema with the types information
|
|
1427
|
+
self.generated_avro_schemas[union_class_name] = {"types": avro_type}
|
|
1428
|
+
self.generated_avro_schemas[qualified_union_name] = {"types": avro_type}
|
|
834
1429
|
return union_class_name
|
|
835
1430
|
|
|
836
1431
|
|
|
@@ -842,12 +1437,48 @@ class AvroToJava:
|
|
|
842
1437
|
property_def = ''
|
|
843
1438
|
if 'doc' in field:
|
|
844
1439
|
property_def += f"{INDENT}/** {field['doc']} */\n"
|
|
845
|
-
|
|
1440
|
+
|
|
1441
|
+
# For discriminator const fields, don't put @JsonProperty on the field
|
|
1442
|
+
# The getter will handle JSON serialization/deserialization
|
|
1443
|
+
is_discriminator_const = field.get('discriminator', False) and 'const' in field
|
|
1444
|
+
if self.jackson_annotations and not is_discriminator_const:
|
|
846
1445
|
property_def += f"{INDENT}@JsonProperty(\"{field['name']}\")\n"
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
1446
|
+
|
|
1447
|
+
# Handle const fields
|
|
1448
|
+
if 'const' in field and field['const'] is not None:
|
|
1449
|
+
const_value = field['const']
|
|
1450
|
+
is_discriminator = field.get('discriminator', False)
|
|
1451
|
+
|
|
1452
|
+
# For enum types, qualify with the enum type name
|
|
1453
|
+
if field_type.type_name not in ('String', 'int', 'Integer', 'long', 'Long', 'double', 'Double', 'boolean', 'Boolean'):
|
|
1454
|
+
const_value = f'{field_type.type_name}.{const_value}'
|
|
1455
|
+
elif field_type.type_name == 'String':
|
|
1456
|
+
const_value = f'"{const_value}"'
|
|
1457
|
+
|
|
1458
|
+
property_def += f"{INDENT}private final {field_type.type_name} {safe_field_name} = {const_value};\n"
|
|
1459
|
+
|
|
1460
|
+
# For discriminator fields, we need both the enum value accessor and String override
|
|
1461
|
+
if is_discriminator:
|
|
1462
|
+
# Provide a typed accessor for the enum value (ignored by Jackson since it's synthetic)
|
|
1463
|
+
if self.jackson_annotations:
|
|
1464
|
+
property_def += f"{INDENT}@JsonIgnore\n"
|
|
1465
|
+
property_def += f"{INDENT}public {field_type.type_name} get{pascal(field_name)}Value() {{ return {safe_field_name}; }}\n"
|
|
1466
|
+
# Generate the getter that returns String (Jackson will use this for serialization)
|
|
1467
|
+
# Use READ_ONLY since this is a const field that doesn't need deserialization
|
|
1468
|
+
# Note: Not using @Override because not all discriminated union variants extend a base class
|
|
1469
|
+
if self.jackson_annotations:
|
|
1470
|
+
property_def += f"{INDENT}@JsonProperty(value=\"{field['name']}\", access=JsonProperty.Access.READ_ONLY)\n"
|
|
1471
|
+
property_def += f"{INDENT}public String get{pascal(field_name)}() {{ return {safe_field_name}.name(); }}\n"
|
|
1472
|
+
else:
|
|
1473
|
+
property_def += f"{INDENT}public {field_type.type_name} get{pascal(field_name)}() {{ return {safe_field_name}; }}\n"
|
|
1474
|
+
else:
|
|
1475
|
+
property_def += f"{INDENT}private {field_type.type_name} {safe_field_name};\n"
|
|
1476
|
+
property_def += f"{INDENT}public {field_type.type_name} get{pascal(field_name)}() {{ return {safe_field_name}; }}\n"
|
|
1477
|
+
property_def += f"{INDENT}public void set{pascal(field_name)}({field_type.type_name} {safe_field_name}) {{ this.{safe_field_name} = {safe_field_name}; }}\n"
|
|
1478
|
+
|
|
1479
|
+
# Generate typed accessors only for direct union fields (not for List/Map<Union>)
|
|
1480
|
+
# For List<Union>, the field IS the list, not a single union value
|
|
1481
|
+
if field_type.union_types and not field_type.type_name.startswith("List<") and not field_type.type_name.startswith("Map<"):
|
|
851
1482
|
for union_type in field_type.union_types:
|
|
852
1483
|
if union_type.type_name.startswith("List<") or union_type.type_name.startswith("Map<"):
|
|
853
1484
|
property_def += f"{INDENT}@SuppressWarnings(\"unchecked\")\n"
|
|
@@ -868,10 +1499,34 @@ class AvroToJava:
|
|
|
868
1499
|
with open(file_path, 'w', encoding='utf-8') as file:
|
|
869
1500
|
if package:
|
|
870
1501
|
file.write(f"package {package.replace('/', '.')};\n\n")
|
|
871
|
-
|
|
1502
|
+
|
|
1503
|
+
# Check if this class extends a discriminated union base class
|
|
1504
|
+
# Pattern: "public class ClassName extends UnionName"
|
|
1505
|
+
if " extends " in definition and self.jackson_annotations:
|
|
1506
|
+
import re
|
|
1507
|
+
match = re.search(r'public class \w+ extends (\w+)', definition)
|
|
1508
|
+
if match:
|
|
1509
|
+
base_class_name = match.group(1)
|
|
1510
|
+
# Check if this base class is a discriminated union we generated
|
|
1511
|
+
for union_name, union_subtypes in self.discriminated_unions.items():
|
|
1512
|
+
if union_name == base_class_name:
|
|
1513
|
+
# Get the package where the union base class is generated
|
|
1514
|
+
# (it's in the same package as the first subtype)
|
|
1515
|
+
union_package = union_subtypes[0]['package'] if union_subtypes else self.base_package.replace('/', '.')
|
|
1516
|
+
# Only import if the union is in a different package
|
|
1517
|
+
current_package = package.replace('/', '.')
|
|
1518
|
+
if union_package != current_package:
|
|
1519
|
+
file.write(f"import {union_package}.{union_name};\n")
|
|
1520
|
+
break
|
|
1521
|
+
|
|
1522
|
+
if "List<" in definition or "ArrayList<" in definition:
|
|
872
1523
|
file.write("import java.util.List;\n")
|
|
873
|
-
if "
|
|
1524
|
+
if "ArrayList<" in definition or "Arrays.asList" in definition:
|
|
1525
|
+
file.write("import java.util.ArrayList;\n")
|
|
1526
|
+
if "Map<" in definition or "HashMap<" in definition:
|
|
874
1527
|
file.write("import java.util.Map;\n")
|
|
1528
|
+
if "HashMap<" in definition:
|
|
1529
|
+
file.write("import java.util.HashMap;\n")
|
|
875
1530
|
if "Predicate<" in definition:
|
|
876
1531
|
file.write("import java.util.function.Predicate;\n")
|
|
877
1532
|
if "BigDecimal" in definition:
|
|
@@ -933,6 +1588,8 @@ class AvroToJava:
|
|
|
933
1588
|
file.write("import com.fasterxml.jackson.core.JsonParser;\n")
|
|
934
1589
|
if 'JsonIgnore' in definition:
|
|
935
1590
|
file.write("import com.fasterxml.jackson.annotation.JsonIgnore;\n")
|
|
1591
|
+
if 'JsonIgnoreProperties' in definition:
|
|
1592
|
+
file.write("import com.fasterxml.jackson.annotation.JsonIgnoreProperties;\n")
|
|
936
1593
|
if 'JsonProperty' in definition:
|
|
937
1594
|
file.write("import com.fasterxml.jackson.annotation.JsonProperty;\n")
|
|
938
1595
|
if 'JsonProcessingException' in definition:
|
|
@@ -959,12 +1616,432 @@ class AvroToJava:
|
|
|
959
1616
|
file.write("\n")
|
|
960
1617
|
file.write(definition)
|
|
961
1618
|
|
|
1619
|
+
def generate_tests(self, base_output_dir: str) -> None:
|
|
1620
|
+
""" Generates unit tests for all the generated Java classes and enums """
|
|
1621
|
+
from avrotize.common import process_template
|
|
1622
|
+
|
|
1623
|
+
test_directory_path = os.path.join(base_output_dir, "src/test/java")
|
|
1624
|
+
if not os.path.exists(test_directory_path):
|
|
1625
|
+
os.makedirs(test_directory_path, exist_ok=True)
|
|
1626
|
+
|
|
1627
|
+
for class_name, type_kind in self.generated_types_java_package.items():
|
|
1628
|
+
if type_kind in ["class", "enum"]:
|
|
1629
|
+
self.generate_test_class(class_name, type_kind, test_directory_path)
|
|
1630
|
+
|
|
1631
|
+
def generate_test_class(self, class_name: str, type_kind: str, test_directory_path: str) -> None:
|
|
1632
|
+
""" Generates a unit test class for a given Java class or enum """
|
|
1633
|
+
from avrotize.common import process_template
|
|
1634
|
+
|
|
1635
|
+
avro_schema = self.generated_avro_schemas.get(class_name, {})
|
|
1636
|
+
simple_class_name = class_name.split('.')[-1]
|
|
1637
|
+
package = ".".join(class_name.split('.')[:-1])
|
|
1638
|
+
test_class_name = f"{simple_class_name}Test"
|
|
1639
|
+
|
|
1640
|
+
if type_kind == "class":
|
|
1641
|
+
fields = self.get_class_test_fields(avro_schema, simple_class_name, package)
|
|
1642
|
+
imports = self.get_test_imports(fields)
|
|
1643
|
+
test_class_definition = process_template(
|
|
1644
|
+
"avrotojava/class_test.java.jinja",
|
|
1645
|
+
package=package,
|
|
1646
|
+
test_class_name=test_class_name,
|
|
1647
|
+
class_name=simple_class_name,
|
|
1648
|
+
fields=fields,
|
|
1649
|
+
imports=imports,
|
|
1650
|
+
avro_annotation=self.avro_annotation,
|
|
1651
|
+
jackson_annotation=self.jackson_annotations
|
|
1652
|
+
)
|
|
1653
|
+
elif type_kind == "enum":
|
|
1654
|
+
# Convert symbols to Java-safe identifiers (same logic as generate_enum)
|
|
1655
|
+
raw_symbols = avro_schema.get('symbols', [])
|
|
1656
|
+
java_safe_symbols = []
|
|
1657
|
+
for symbol in raw_symbols:
|
|
1658
|
+
java_symbol = symbol.replace('-', '_').replace('.', '_')
|
|
1659
|
+
if java_symbol and java_symbol[0].isdigit():
|
|
1660
|
+
java_symbol = '_' + java_symbol
|
|
1661
|
+
if is_java_reserved_word(java_symbol):
|
|
1662
|
+
java_symbol = '_' + java_symbol
|
|
1663
|
+
java_safe_symbols.append(java_symbol)
|
|
1664
|
+
|
|
1665
|
+
test_class_definition = process_template(
|
|
1666
|
+
"avrotojava/enum_test.java.jinja",
|
|
1667
|
+
package=package,
|
|
1668
|
+
test_class_name=test_class_name,
|
|
1669
|
+
enum_name=simple_class_name,
|
|
1670
|
+
symbols=java_safe_symbols # Pass converted symbols instead of raw
|
|
1671
|
+
)
|
|
1672
|
+
|
|
1673
|
+
# Write test file
|
|
1674
|
+
package_path = package.replace('.', os.sep)
|
|
1675
|
+
test_file_dir = os.path.join(test_directory_path, package_path)
|
|
1676
|
+
if not os.path.exists(test_file_dir):
|
|
1677
|
+
os.makedirs(test_file_dir, exist_ok=True)
|
|
1678
|
+
test_file_path = os.path.join(test_file_dir, f"{test_class_name}.java")
|
|
1679
|
+
with open(test_file_path, 'w', encoding='utf-8') as test_file:
|
|
1680
|
+
test_file.write(test_class_definition)
|
|
1681
|
+
|
|
1682
|
+
def get_test_imports(self, fields: List) -> List[str]:
|
|
1683
|
+
""" Gets the necessary imports for the test class """
|
|
1684
|
+
imports = []
|
|
1685
|
+
for field in fields:
|
|
1686
|
+
# Extract inner types from generic collections
|
|
1687
|
+
inner_types = []
|
|
1688
|
+
if field.field_type.startswith("List<"):
|
|
1689
|
+
if "import java.util.List;" not in imports:
|
|
1690
|
+
imports.append("import java.util.List;")
|
|
1691
|
+
if "import java.util.ArrayList;" not in imports:
|
|
1692
|
+
imports.append("import java.util.ArrayList;")
|
|
1693
|
+
# Extract the inner type: List<Type> -> Type
|
|
1694
|
+
inner_type = field.field_type[5:-1]
|
|
1695
|
+
# Check if inner type is also a Map
|
|
1696
|
+
if inner_type.startswith("Map<"):
|
|
1697
|
+
if "import java.util.Map;" not in imports:
|
|
1698
|
+
imports.append("import java.util.Map;")
|
|
1699
|
+
if "import java.util.HashMap;" not in imports:
|
|
1700
|
+
imports.append("import java.util.HashMap;")
|
|
1701
|
+
# Extract Map value type
|
|
1702
|
+
start = inner_type.index('<') + 1
|
|
1703
|
+
end = inner_type.rindex('>')
|
|
1704
|
+
map_types = inner_type[start:end].split(',')
|
|
1705
|
+
if len(map_types) > 1:
|
|
1706
|
+
inner_types.append(map_types[1].strip())
|
|
1707
|
+
else:
|
|
1708
|
+
inner_types.append(inner_type)
|
|
1709
|
+
elif field.field_type.startswith("Map<"):
|
|
1710
|
+
if "import java.util.Map;" not in imports:
|
|
1711
|
+
imports.append("import java.util.Map;")
|
|
1712
|
+
if "import java.util.HashMap;" not in imports:
|
|
1713
|
+
imports.append("import java.util.HashMap;")
|
|
1714
|
+
# Extract value type from Map<K,V>
|
|
1715
|
+
start = field.field_type.index('<') + 1
|
|
1716
|
+
end = field.field_type.rindex('>')
|
|
1717
|
+
map_types = field.field_type[start:end].split(',')
|
|
1718
|
+
if len(map_types) > 1:
|
|
1719
|
+
inner_types.append(map_types[1].strip())
|
|
1720
|
+
|
|
1721
|
+
# Add the direct field type for non-generic types
|
|
1722
|
+
if not field.field_type.startswith(("List<", "Map<")):
|
|
1723
|
+
inner_types.append(field.field_type)
|
|
1724
|
+
|
|
1725
|
+
# If field is Object with union_types (Avro-style union), add all union member types for imports
|
|
1726
|
+
if hasattr(field, 'java_type_obj') and field.java_type_obj and field.java_type_obj.union_types:
|
|
1727
|
+
for union_member_type in field.java_type_obj.union_types:
|
|
1728
|
+
inner_types.append(union_member_type.type_name)
|
|
1729
|
+
|
|
1730
|
+
# Process each type (including inner types from generics)
|
|
1731
|
+
for type_to_check in inner_types:
|
|
1732
|
+
# Add imports for enum and class types
|
|
1733
|
+
if type_to_check in self.generated_types_java_package:
|
|
1734
|
+
type_kind = self.generated_types_java_package[type_to_check]
|
|
1735
|
+
# Only import if it's a fully qualified name with a package
|
|
1736
|
+
if '.' in type_to_check:
|
|
1737
|
+
import_stmt = f"import {type_to_check};"
|
|
1738
|
+
if import_stmt not in imports:
|
|
1739
|
+
imports.append(import_stmt)
|
|
1740
|
+
# No longer import test classes - we instantiate classes directly
|
|
1741
|
+
# Process unions regardless of whether they're fully qualified
|
|
1742
|
+
# (they might be simple names that need member imports)
|
|
1743
|
+
if type_kind == "union":
|
|
1744
|
+
avro_schema = self.generated_avro_schemas.get(type_to_check, {})
|
|
1745
|
+
if avro_schema and 'types' in avro_schema:
|
|
1746
|
+
for union_type in avro_schema['types']:
|
|
1747
|
+
java_qualified_name = None
|
|
1748
|
+
if isinstance(union_type, dict) and 'name' in union_type:
|
|
1749
|
+
# It's a complex type reference (inline definition)
|
|
1750
|
+
type_name = union_type['name']
|
|
1751
|
+
if 'namespace' in union_type:
|
|
1752
|
+
avro_namespace = union_type['namespace']
|
|
1753
|
+
# Build full Java qualified name with base package
|
|
1754
|
+
java_qualified_name = self.join_packages(self.base_package, avro_namespace).replace('/', '.').lower() + '.' + type_name
|
|
1755
|
+
else:
|
|
1756
|
+
java_qualified_name = type_name
|
|
1757
|
+
elif isinstance(union_type, str) and union_type not in ['null', 'string', 'int', 'long', 'float', 'double', 'boolean', 'bytes']:
|
|
1758
|
+
# It's a string reference to a named type (could be class or enum)
|
|
1759
|
+
# The string is the Avro qualified name, need to convert to Java
|
|
1760
|
+
avro_name_parts = union_type.split('.')
|
|
1761
|
+
if len(avro_name_parts) > 1:
|
|
1762
|
+
# Has namespace
|
|
1763
|
+
type_name = avro_name_parts[-1]
|
|
1764
|
+
avro_namespace = '.'.join(avro_name_parts[:-1])
|
|
1765
|
+
java_qualified_name = self.join_packages(self.base_package, avro_namespace).replace('/', '.').lower() + '.' + type_name
|
|
1766
|
+
else:
|
|
1767
|
+
# No namespace, just a simple name
|
|
1768
|
+
java_qualified_name = union_type
|
|
1769
|
+
|
|
1770
|
+
if java_qualified_name:
|
|
1771
|
+
if java_qualified_name in self.generated_types_java_package or java_qualified_name.split('.')[-1] in self.generated_types_java_package:
|
|
1772
|
+
member_type_kind = self.generated_types_java_package.get(java_qualified_name, self.generated_types_java_package.get(java_qualified_name.split('.')[-1], None))
|
|
1773
|
+
# Import the class/enum
|
|
1774
|
+
class_import = f"import {java_qualified_name};"
|
|
1775
|
+
if class_import not in imports:
|
|
1776
|
+
imports.append(class_import)
|
|
1777
|
+
# No longer import test classes - we instantiate classes directly
|
|
1778
|
+
return imports
|
|
1779
|
+
|
|
1780
|
+
def get_class_test_fields(self, avro_schema: Dict, class_name: str, package: str) -> List:
|
|
1781
|
+
""" Retrieves fields for a given class name """
|
|
1782
|
+
|
|
1783
|
+
class Field:
|
|
1784
|
+
def __init__(self, fn: str, ft: str, tv: str, ct: bool, ie: bool = False, java_type_obj: 'AvroToJava.JavaType' = None, is_discrim: bool = False):
|
|
1785
|
+
self.field_name = fn
|
|
1786
|
+
self.field_type = ft
|
|
1787
|
+
# Extract base type for generic types (e.g., List<Object> -> List)
|
|
1788
|
+
if '<' in ft:
|
|
1789
|
+
self.base_type = ft.split('<')[0]
|
|
1790
|
+
else:
|
|
1791
|
+
self.base_type = ft
|
|
1792
|
+
self.test_value = tv
|
|
1793
|
+
self.is_const = ct
|
|
1794
|
+
self.is_enum = ie
|
|
1795
|
+
self.is_discriminator = is_discrim
|
|
1796
|
+
self.java_type_obj = java_type_obj # Store the full JavaType object for union access
|
|
1797
|
+
|
|
1798
|
+
fields: List[Field] = []
|
|
1799
|
+
if avro_schema and 'fields' in avro_schema:
|
|
1800
|
+
for field in avro_schema['fields']:
|
|
1801
|
+
field_name = pascal(field['name']) if self.pascal_properties else field['name']
|
|
1802
|
+
field_type = self.convert_avro_type_to_java(class_name, field_name, field['type'], avro_schema.get('namespace', ''))
|
|
1803
|
+
# Check if the field type is an enum
|
|
1804
|
+
is_enum = field_type.type_name in self.generated_types_java_package and \
|
|
1805
|
+
self.generated_types_java_package[field_type.type_name] == "enum"
|
|
1806
|
+
is_discriminator = field.get('discriminator', False)
|
|
1807
|
+
|
|
1808
|
+
# Generate test value for the field
|
|
1809
|
+
if "const" in field and field["const"] is not None:
|
|
1810
|
+
const_value = field["const"]
|
|
1811
|
+
# For enum types, qualify with the enum type name
|
|
1812
|
+
if is_enum or (field_type.type_name not in ('String', 'int', 'Integer', 'long', 'Long', 'double', 'Double', 'boolean', 'Boolean')):
|
|
1813
|
+
test_value = f'{field_type.type_name}.{const_value}'
|
|
1814
|
+
else:
|
|
1815
|
+
test_value = f'"{const_value}"'
|
|
1816
|
+
else:
|
|
1817
|
+
test_value = self.get_test_value_from_field(field['type'], field_type, package)
|
|
1818
|
+
|
|
1819
|
+
f = Field(
|
|
1820
|
+
field_name,
|
|
1821
|
+
field_type.type_name,
|
|
1822
|
+
test_value,
|
|
1823
|
+
"const" in field and field["const"] is not None,
|
|
1824
|
+
is_enum,
|
|
1825
|
+
field_type, # Pass the full JavaType object
|
|
1826
|
+
is_discriminator
|
|
1827
|
+
)
|
|
1828
|
+
fields.append(f)
|
|
1829
|
+
return fields
|
|
1830
|
+
|
|
1831
|
+
def get_test_value_from_field(self, avro_field_type: Union[str, Dict, List], java_type: JavaType, package: str) -> str:
|
|
1832
|
+
"""Returns a default test value based on the Avro field type and Java type"""
|
|
1833
|
+
# If it's an Object with union_types (Avro-style union), pick a member type
|
|
1834
|
+
if java_type.type_name == "Object" and java_type.union_types is not None and len(java_type.union_types) > 0:
|
|
1835
|
+
# Pick the first union type and generate a test value for it
|
|
1836
|
+
first_union_type = java_type.union_types[0]
|
|
1837
|
+
return self.get_test_value(first_union_type.type_name, package)
|
|
1838
|
+
# For List<Object> where Object is a union, we need to handle it specially
|
|
1839
|
+
elif java_type.type_name.startswith("List<Object>"):
|
|
1840
|
+
# avro_field_type could be: ["null", {"type": "array", "items": [union types]}]
|
|
1841
|
+
# or just: {"type": "array", "items": [union types]}
|
|
1842
|
+
array_schema = avro_field_type
|
|
1843
|
+
if isinstance(avro_field_type, list):
|
|
1844
|
+
# It's a union - find the array type
|
|
1845
|
+
for t in avro_field_type:
|
|
1846
|
+
if isinstance(t, dict) and t.get('type') == 'array':
|
|
1847
|
+
array_schema = t
|
|
1848
|
+
break
|
|
1849
|
+
|
|
1850
|
+
if isinstance(array_schema, dict) and array_schema.get('type') == 'array':
|
|
1851
|
+
items_type = array_schema.get('items')
|
|
1852
|
+
if isinstance(items_type, list): # Union array
|
|
1853
|
+
# Pick the first non-null type
|
|
1854
|
+
non_null_types = [t for t in items_type if t != 'null']
|
|
1855
|
+
if non_null_types:
|
|
1856
|
+
inner_java_type = self.convert_avro_type_to_java('_test', '_field', non_null_types[0], package)
|
|
1857
|
+
inner_value = self.get_test_value(inner_java_type.type_name, package)
|
|
1858
|
+
return f'new ArrayList<>(java.util.Arrays.asList({inner_value}))'
|
|
1859
|
+
# Default: use type name
|
|
1860
|
+
return self.get_test_value(java_type.type_name, package)
|
|
1861
|
+
|
|
1862
|
+
def get_test_value(self, java_type: str, package: str) -> str:
|
|
1863
|
+
"""Returns a default test value based on the Java type"""
|
|
1864
|
+
test_values = {
|
|
1865
|
+
'String': '"test_string"',
|
|
1866
|
+
'boolean': 'true',
|
|
1867
|
+
'Boolean': 'Boolean.TRUE',
|
|
1868
|
+
'int': '42',
|
|
1869
|
+
'Integer': 'Integer.valueOf(42)',
|
|
1870
|
+
'long': '42L',
|
|
1871
|
+
'Long': 'Long.valueOf(42L)',
|
|
1872
|
+
'float': '3.14f',
|
|
1873
|
+
'Float': 'Float.valueOf(3.14f)',
|
|
1874
|
+
'double': '3.14',
|
|
1875
|
+
'Double': 'Double.valueOf(3.14)',
|
|
1876
|
+
'byte[]': 'new byte[] { 0x01, 0x02, 0x03 }',
|
|
1877
|
+
'Object': 'null', # Use null for Object types (Avro unions) to avoid reference equality issues
|
|
1878
|
+
}
|
|
1879
|
+
|
|
1880
|
+
# Handle generic types
|
|
1881
|
+
if java_type.startswith("List<"):
|
|
1882
|
+
inner_type = java_type[5:-1]
|
|
1883
|
+
inner_value = self.get_test_value(inner_type, package)
|
|
1884
|
+
# Arrays.asList(null) throws NPE, so create empty list for null values
|
|
1885
|
+
if inner_value == 'null':
|
|
1886
|
+
return 'new ArrayList<>()'
|
|
1887
|
+
return f'new ArrayList<>(java.util.Arrays.asList({inner_value}))'
|
|
1888
|
+
elif java_type.startswith("Map<"):
|
|
1889
|
+
return 'new HashMap<>()'
|
|
1890
|
+
|
|
1891
|
+
# Check if it's a generated type (enum, class, or union)
|
|
1892
|
+
if java_type in self.generated_types_java_package:
|
|
1893
|
+
type_kind = self.generated_types_java_package[java_type]
|
|
1894
|
+
if type_kind == "enum":
|
|
1895
|
+
# Get the first symbol for the enum
|
|
1896
|
+
avro_schema = self.generated_avro_schemas.get(java_type, {})
|
|
1897
|
+
symbols = avro_schema.get('symbols', [])
|
|
1898
|
+
if symbols:
|
|
1899
|
+
# Convert symbol to valid Java identifier (same logic as in generate_enum)
|
|
1900
|
+
first_symbol = symbols[0].replace('-', '_').replace('.', '_')
|
|
1901
|
+
if first_symbol and first_symbol[0].isdigit():
|
|
1902
|
+
first_symbol = '_' + first_symbol
|
|
1903
|
+
# Check if the symbol is a Java reserved word and prefix with underscore
|
|
1904
|
+
if is_java_reserved_word(first_symbol):
|
|
1905
|
+
first_symbol = '_' + first_symbol
|
|
1906
|
+
# Use fully qualified name to avoid conflicts with field names
|
|
1907
|
+
return f'{java_type}.{first_symbol}'
|
|
1908
|
+
return f'{java_type}.values()[0]'
|
|
1909
|
+
elif type_kind == "class":
|
|
1910
|
+
# Create a new instance using the createTestInstance() method
|
|
1911
|
+
# Use fully qualified name to avoid conflicts with field names
|
|
1912
|
+
return f'{java_type}.createTestInstance()'
|
|
1913
|
+
elif type_kind == "union":
|
|
1914
|
+
# For union types, we need to create an instance with one of the union types set
|
|
1915
|
+
# Get the union's schema to find available types
|
|
1916
|
+
avro_schema = self.generated_avro_schemas.get(java_type, {})
|
|
1917
|
+
if avro_schema and 'types' in avro_schema:
|
|
1918
|
+
# Use the first non-null type from the union
|
|
1919
|
+
for union_type in avro_schema['types']:
|
|
1920
|
+
if union_type != 'null' and isinstance(union_type, dict):
|
|
1921
|
+
# It's a complex type - check if enum or class
|
|
1922
|
+
if 'name' in union_type:
|
|
1923
|
+
type_name = union_type['name']
|
|
1924
|
+
if 'namespace' in union_type:
|
|
1925
|
+
avro_namespace = union_type['namespace']
|
|
1926
|
+
# Build full Java qualified name with base package
|
|
1927
|
+
java_qualified_name = self.join_packages(self.base_package, avro_namespace).replace('/', '.').lower() + '.' + type_name
|
|
1928
|
+
else:
|
|
1929
|
+
java_qualified_name = type_name
|
|
1930
|
+
simple_union_name = java_type.split('.')[-1]
|
|
1931
|
+
|
|
1932
|
+
# Check if this union member is an enum or class
|
|
1933
|
+
member_type_kind = self.generated_types_java_package.get(java_qualified_name)
|
|
1934
|
+
if member_type_kind == "enum":
|
|
1935
|
+
# For enums, use the first enum value
|
|
1936
|
+
member_value = self.get_test_value(java_qualified_name, package)
|
|
1937
|
+
return f'new {simple_union_name}({member_value})'
|
|
1938
|
+
else:
|
|
1939
|
+
# For classes, create a new instance using createTestInstance()
|
|
1940
|
+
# Use fully qualified name to avoid conflicts with field names
|
|
1941
|
+
return f'new {simple_union_name}({java_qualified_name}.createTestInstance())'
|
|
1942
|
+
elif union_type != 'null' and isinstance(union_type, str):
|
|
1943
|
+
# It's a simple type - convert from Avro type to Java type
|
|
1944
|
+
simple_union_name = java_type.split('.')[-1]
|
|
1945
|
+
# Convert Avro primitive type to Java type
|
|
1946
|
+
java_primitive_type = self.convert_avro_type_to_java('_test', '_field', union_type, package)
|
|
1947
|
+
simple_value = self.get_test_value(java_primitive_type.type_name, package)
|
|
1948
|
+
return f'new {simple_union_name}({simple_value})'
|
|
1949
|
+
# Fallback: create an empty union instance
|
|
1950
|
+
simple_name = java_type.split('.')[-1]
|
|
1951
|
+
return f'new {simple_name}()'
|
|
1952
|
+
|
|
1953
|
+
return test_values.get(java_type, f'new {java_type}()')
|
|
1954
|
+
|
|
1955
|
+
def generate_discriminated_union_base_classes(self):
|
|
1956
|
+
"""Generate abstract base classes for discriminated unions with Jackson annotations"""
|
|
1957
|
+
if not self.jackson_annotations or not self.discriminated_unions:
|
|
1958
|
+
return
|
|
1959
|
+
|
|
1960
|
+
for union_name, subtypes in self.discriminated_unions.items():
|
|
1961
|
+
if not subtypes:
|
|
1962
|
+
continue
|
|
1963
|
+
|
|
1964
|
+
# Get the first subtype to determine package and discriminator field
|
|
1965
|
+
first_subtype = subtypes[0]
|
|
1966
|
+
package = first_subtype['package']
|
|
1967
|
+
|
|
1968
|
+
# Find the discriminator field (should have 'discriminator': true)
|
|
1969
|
+
discriminator_field = None
|
|
1970
|
+
discriminator_values = {}
|
|
1971
|
+
|
|
1972
|
+
for subtype_info in subtypes:
|
|
1973
|
+
schema = subtype_info['schema']
|
|
1974
|
+
for field in schema.get('fields', []):
|
|
1975
|
+
if field.get('discriminator'):
|
|
1976
|
+
discriminator_field = field['name']
|
|
1977
|
+
if 'const' in field:
|
|
1978
|
+
discriminator_values[subtype_info['class_name']] = field['const']
|
|
1979
|
+
break
|
|
1980
|
+
|
|
1981
|
+
if not discriminator_field:
|
|
1982
|
+
print(f"WARN: Could not find discriminator field for union {union_name}")
|
|
1983
|
+
continue
|
|
1984
|
+
|
|
1985
|
+
# Generate the abstract base class
|
|
1986
|
+
class_definition = f"/**\n * Abstract base class for {union_name} discriminated union\n */\n"
|
|
1987
|
+
|
|
1988
|
+
# Add Jackson @JsonTypeInfo annotation
|
|
1989
|
+
class_definition += f'@JsonTypeInfo(\n'
|
|
1990
|
+
class_definition += f'{INDENT}use = JsonTypeInfo.Id.NAME,\n'
|
|
1991
|
+
class_definition += f'{INDENT}include = JsonTypeInfo.As.EXISTING_PROPERTY,\n'
|
|
1992
|
+
class_definition += f'{INDENT}property = "{discriminator_field}",\n'
|
|
1993
|
+
class_definition += f'{INDENT}visible = true\n'
|
|
1994
|
+
class_definition += f')\n'
|
|
1995
|
+
|
|
1996
|
+
# Add Jackson @JsonSubTypes annotation
|
|
1997
|
+
class_definition += f'@JsonSubTypes({{\n'
|
|
1998
|
+
for i, subtype_info in enumerate(subtypes):
|
|
1999
|
+
class_name = subtype_info['class_name']
|
|
2000
|
+
disc_value = discriminator_values.get(class_name, class_name)
|
|
2001
|
+
comma = ',' if i < len(subtypes) - 1 else ''
|
|
2002
|
+
class_definition += f'{INDENT}@JsonSubTypes.Type(value = {class_name}.class, name = "{disc_value}"){comma}\n'
|
|
2003
|
+
class_definition += f'}})\n'
|
|
2004
|
+
|
|
2005
|
+
# Abstract class declaration
|
|
2006
|
+
class_definition += f'public abstract class {union_name} {{\n'
|
|
2007
|
+
|
|
2008
|
+
# Add the discriminator field getter (abstract)
|
|
2009
|
+
class_definition += f'{INDENT}/**\n{INDENT} * Gets the discriminator value\n{INDENT} * @return the type discriminator\n{INDENT} */\n'
|
|
2010
|
+
class_definition += f'{INDENT}public abstract String get{pascal(discriminator_field)}();\n'
|
|
2011
|
+
|
|
2012
|
+
class_definition += '}\n'
|
|
2013
|
+
|
|
2014
|
+
# Write the file
|
|
2015
|
+
dir_path = os.path.join(self.output_dir, package.replace('.', os.sep))
|
|
2016
|
+
os.makedirs(dir_path, exist_ok=True)
|
|
2017
|
+
file_path = os.path.join(dir_path, f"{union_name}.java")
|
|
2018
|
+
|
|
2019
|
+
# Build the full file content with imports
|
|
2020
|
+
imports = [
|
|
2021
|
+
'import com.fasterxml.jackson.annotation.JsonSubTypes;',
|
|
2022
|
+
'import com.fasterxml.jackson.annotation.JsonTypeInfo;'
|
|
2023
|
+
]
|
|
2024
|
+
|
|
2025
|
+
full_content = f"package {package};\n\n"
|
|
2026
|
+
full_content += '\n'.join(imports) + '\n\n'
|
|
2027
|
+
full_content += class_definition
|
|
2028
|
+
|
|
2029
|
+
with open(file_path, 'w', encoding='utf-8') as file:
|
|
2030
|
+
file.write(full_content)
|
|
2031
|
+
|
|
2032
|
+
print(f"Generated discriminated union base class: {union_name}")
|
|
2033
|
+
|
|
962
2034
|
def convert_schema(self, schema: JsonNode, output_dir: str):
|
|
963
2035
|
"""Converts Avro schema to Java"""
|
|
964
2036
|
if not isinstance(schema, list):
|
|
965
2037
|
schema = [schema]
|
|
2038
|
+
|
|
2039
|
+
# Build type dictionary for inline schema resolution (like C# does)
|
|
2040
|
+
self.type_dict = build_flat_type_dict(schema)
|
|
2041
|
+
|
|
966
2042
|
if not os.path.exists(output_dir):
|
|
967
2043
|
os.makedirs(output_dir, exist_ok=True)
|
|
2044
|
+
base_output_dir = output_dir # Store the base directory before changing it
|
|
968
2045
|
pom_path = os.path.join(output_dir, "pom.xml")
|
|
969
2046
|
if not os.path.exists(pom_path):
|
|
970
2047
|
package_elements = self.base_package.split('.') if self.base_package else ["com", "example"]
|
|
@@ -979,6 +2056,8 @@ class AvroToJava:
|
|
|
979
2056
|
self.output_dir = output_dir
|
|
980
2057
|
for avro_schema in (x for x in schema if isinstance(x, dict)):
|
|
981
2058
|
self.generate_class_or_enum(avro_schema, '')
|
|
2059
|
+
self.generate_discriminated_union_base_classes()
|
|
2060
|
+
self.generate_tests(base_output_dir)
|
|
982
2061
|
|
|
983
2062
|
def convert(self, avro_schema_path: str, output_dir: str):
|
|
984
2063
|
"""Converts Avro schema to Java"""
|