structurize 2.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avrotize/__init__.py +64 -0
- avrotize/__main__.py +6 -0
- avrotize/_version.py +34 -0
- avrotize/asn1toavro.py +160 -0
- avrotize/avrotize.py +152 -0
- avrotize/avrotocpp.py +483 -0
- avrotize/avrotocsharp.py +1075 -0
- avrotize/avrotocsv.py +121 -0
- avrotize/avrotodatapackage.py +173 -0
- avrotize/avrotodb.py +1383 -0
- avrotize/avrotogo.py +476 -0
- avrotize/avrotographql.py +197 -0
- avrotize/avrotoiceberg.py +210 -0
- avrotize/avrotojava.py +2156 -0
- avrotize/avrotojs.py +250 -0
- avrotize/avrotojsons.py +481 -0
- avrotize/avrotojstruct.py +345 -0
- avrotize/avrotokusto.py +364 -0
- avrotize/avrotomd.py +137 -0
- avrotize/avrotools.py +168 -0
- avrotize/avrotoparquet.py +208 -0
- avrotize/avrotoproto.py +359 -0
- avrotize/avrotopython.py +624 -0
- avrotize/avrotorust.py +435 -0
- avrotize/avrotots.py +598 -0
- avrotize/avrotoxsd.py +344 -0
- avrotize/cddltostructure.py +1841 -0
- avrotize/commands.json +3337 -0
- avrotize/common.py +834 -0
- avrotize/constants.py +72 -0
- avrotize/csvtoavro.py +132 -0
- avrotize/datapackagetoavro.py +76 -0
- avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
- avrotize/dependencies/typescript/node22/package.json +16 -0
- avrotize/dependency_resolver.py +348 -0
- avrotize/dependency_version.py +432 -0
- avrotize/jsonstoavro.py +2167 -0
- avrotize/jsonstostructure.py +2642 -0
- avrotize/jstructtoavro.py +878 -0
- avrotize/kstructtoavro.py +93 -0
- avrotize/kustotoavro.py +455 -0
- avrotize/parquettoavro.py +157 -0
- avrotize/proto2parser.py +498 -0
- avrotize/proto3parser.py +403 -0
- avrotize/prototoavro.py +382 -0
- avrotize/structuretocddl.py +597 -0
- avrotize/structuretocpp.py +697 -0
- avrotize/structuretocsharp.py +2295 -0
- avrotize/structuretocsv.py +365 -0
- avrotize/structuretodatapackage.py +659 -0
- avrotize/structuretodb.py +1125 -0
- avrotize/structuretogo.py +720 -0
- avrotize/structuretographql.py +502 -0
- avrotize/structuretoiceberg.py +355 -0
- avrotize/structuretojava.py +853 -0
- avrotize/structuretojsons.py +498 -0
- avrotize/structuretokusto.py +639 -0
- avrotize/structuretomd.py +322 -0
- avrotize/structuretoproto.py +764 -0
- avrotize/structuretopython.py +772 -0
- avrotize/structuretorust.py +714 -0
- avrotize/structuretots.py +653 -0
- avrotize/structuretoxsd.py +679 -0
- avrotize/xsdtoavro.py +413 -0
- structurize-2.19.0.dist-info/METADATA +107 -0
- structurize-2.19.0.dist-info/RECORD +70 -0
- structurize-2.19.0.dist-info/WHEEL +5 -0
- structurize-2.19.0.dist-info/entry_points.txt +2 -0
- structurize-2.19.0.dist-info/licenses/LICENSE +201 -0
- structurize-2.19.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2642 @@
|
|
|
1
|
+
""" JSON Schema to JSON Structure converter. """
|
|
2
|
+
|
|
3
|
+
# pylint: disable=too-many-lines, line-too-long, too-many-branches, too-many-statements, too-many-locals, too-many-nested-blocks, too-many-arguments, too-many-instance-attributes, too-many-public-methods, too-many-boolean-expressions
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import copy
|
|
8
|
+
import re
|
|
9
|
+
import urllib.parse
|
|
10
|
+
from urllib.parse import ParseResult, urlparse, unquote
|
|
11
|
+
from typing import Any, Dict, List, Tuple, Union, Optional
|
|
12
|
+
import jsonpointer
|
|
13
|
+
from jsonpointer import JsonPointerException
|
|
14
|
+
import requests
|
|
15
|
+
|
|
16
|
+
from avrotize.common import avro_name, avro_name_with_altname, avro_namespace, find_schema_node, generic_type, set_schema_node
|
|
17
|
+
from avrotize.dependency_resolver import inline_dependencies_of, sort_messages_by_dependencies
|
|
18
|
+
|
|
19
|
+
# JSON Structure primitive types
|
|
20
|
+
structure_primitive_types = [
|
|
21
|
+
'null', 'string', 'int8', 'int16', 'int32', 'int64',
|
|
22
|
+
'uint8', 'uint16', 'uint32', 'uint64', 'float', 'double',
|
|
23
|
+
'decimal', 'boolean', 'bytes', 'date', 'time', 'datetime',
|
|
24
|
+
'duration', 'uuid', 'set', 'map', 'object', 'choice'
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class JsonToStructureConverter:
|
|
29
|
+
"""
|
|
30
|
+
Converts JSON Schema documents to JSON Structure format.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
imported_types: A dictionary of imported type schemas.
|
|
34
|
+
root_namespace: The namespace for the root schema.
|
|
35
|
+
max_recursion_depth: The maximum recursion depth.
|
|
36
|
+
types_with_unmerged_types: A list of types with unmerged types.
|
|
37
|
+
content_cache: A dictionary for caching fetched URLs.
|
|
38
|
+
utility_namespace: The namespace for utility types.
|
|
39
|
+
preserve_composition: Flag to preserve composition keywords.
|
|
40
|
+
detect_inheritance: Flag to detect inheritance patterns. detect_discriminators: Flag to detect OpenAPI discriminator patterns.
|
|
41
|
+
convert_empty_objects_to_maps: Flag to convert objects with only additionalProperties to maps.
|
|
42
|
+
split_top_level_records: Flag to split top-level records.
|
|
43
|
+
root_class_name: The name of the root class.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self) -> None:
|
|
47
|
+
self.imported_types: Dict[Any, Any] = {}
|
|
48
|
+
self.root_namespace = 'example.com'
|
|
49
|
+
self.max_recursion_depth = 40
|
|
50
|
+
self.types_with_unmerged_types: List[dict] = []
|
|
51
|
+
self.content_cache: Dict[str, str] = {}
|
|
52
|
+
self.utility_namespace = 'utility.vasters.com'
|
|
53
|
+
self.split_top_level_records = False
|
|
54
|
+
self.root_class_name = 'document'
|
|
55
|
+
self.type_registry: Dict[str, str] = {} # Track type definitions for reference resolution
|
|
56
|
+
# JSON Structure specific configuration
|
|
57
|
+
self.preserve_composition = False # Resolve composition keywords by default for JSON Structure compliance
|
|
58
|
+
self.detect_inheritance = True
|
|
59
|
+
self.detect_discriminators = True
|
|
60
|
+
self.convert_empty_objects_to_maps = True
|
|
61
|
+
|
|
62
|
+
def is_empty_type(self, structure_type):
|
|
63
|
+
"""
|
|
64
|
+
Check if the JSON Structure type is an empty type.
|
|
65
|
+
|
|
66
|
+
Parameters:
|
|
67
|
+
structure_type (any): The JSON Structure type to check.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
bool: True if the type is empty, False otherwise.
|
|
71
|
+
"""
|
|
72
|
+
if len(structure_type) == 0:
|
|
73
|
+
return True
|
|
74
|
+
if isinstance(structure_type, list):
|
|
75
|
+
return all(self.is_empty_type(t) for t in structure_type)
|
|
76
|
+
if isinstance(structure_type, dict):
|
|
77
|
+
if not 'type' in structure_type:
|
|
78
|
+
return True
|
|
79
|
+
if (structure_type['type'] == 'object' and (not 'properties' in structure_type or len(structure_type['properties']) == 0)) or \
|
|
80
|
+
(structure_type['type'] == 'choice' and (not 'choices' in structure_type or len(structure_type['choices']) == 0)) or \
|
|
81
|
+
(structure_type['type'] == 'set' and (not 'items' in structure_type or not structure_type['items'])) or \
|
|
82
|
+
(structure_type['type'] == 'map' and (not 'values' in structure_type or not structure_type['values'])):
|
|
83
|
+
return True
|
|
84
|
+
return False
|
|
85
|
+
|
|
86
|
+
def is_empty_json_type(self, json_type):
|
|
87
|
+
"""
|
|
88
|
+
Check if the JSON type is an empty type.
|
|
89
|
+
|
|
90
|
+
Parameters:
|
|
91
|
+
json_type (any): The JSON type to check.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
bool: True if the JSON type is empty, False otherwise.
|
|
95
|
+
"""
|
|
96
|
+
if len(json_type) == 0:
|
|
97
|
+
return True
|
|
98
|
+
if isinstance(json_type, list):
|
|
99
|
+
return all(self.is_empty_json_type(t) for t in json_type)
|
|
100
|
+
elif isinstance(json_type, dict):
|
|
101
|
+
if not 'type' in json_type:
|
|
102
|
+
return True
|
|
103
|
+
return False
|
|
104
|
+
|
|
105
|
+
def detect_numeric_type(self, schema: dict) -> str:
|
|
106
|
+
"""
|
|
107
|
+
Analyze schema constraints to determine the appropriate numeric type.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
schema (dict): The JSON schema object
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
str: The appropriate JSON Structure numeric type
|
|
114
|
+
"""
|
|
115
|
+
# Check for format hints first
|
|
116
|
+
format_hint = schema.get('format')
|
|
117
|
+
if format_hint:
|
|
118
|
+
format_mapping = {
|
|
119
|
+
'int8': 'int32', # Use int32 instead of int8 for better compatibility
|
|
120
|
+
'int16': 'int32', # Use int32 instead of int16 for better compatibility
|
|
121
|
+
'int32': 'int32',
|
|
122
|
+
'int64': 'int64',
|
|
123
|
+
'uint8': 'int32', # Use int32 instead of uint8 for better compatibility
|
|
124
|
+
'uint16': 'int32', # Use int32 instead of uint16 for better compatibility
|
|
125
|
+
'uint32': 'int64', # Use int64 instead of uint32 for better compatibility
|
|
126
|
+
'uint64': 'int64', # Use int64 instead of uint64 for better compatibility
|
|
127
|
+
'float': 'float',
|
|
128
|
+
'double': 'double'
|
|
129
|
+
}
|
|
130
|
+
if format_hint in format_mapping:
|
|
131
|
+
return format_mapping[format_hint]
|
|
132
|
+
|
|
133
|
+
# Analyze constraints for integer types
|
|
134
|
+
if schema.get('type') == 'integer':
|
|
135
|
+
minimum = schema.get('minimum', schema.get('exclusiveMinimum'))
|
|
136
|
+
maximum = schema.get('maximum', schema.get('exclusiveMaximum'))
|
|
137
|
+
|
|
138
|
+
# For integers with constraints, use conservative type mapping
|
|
139
|
+
if minimum is not None and maximum is not None:
|
|
140
|
+
# Both bounds specified
|
|
141
|
+
if minimum >= -2147483648 and maximum <= 2147483647:
|
|
142
|
+
return 'int32'
|
|
143
|
+
else:
|
|
144
|
+
return 'int64'
|
|
145
|
+
elif minimum is not None and minimum >= 0:
|
|
146
|
+
# Non-negative integers - use int32 for reasonable ranges
|
|
147
|
+
if maximum is None or maximum <= 2147483647:
|
|
148
|
+
return 'int32' # Conservative choice for age-like fields
|
|
149
|
+
else:
|
|
150
|
+
return 'int64'
|
|
151
|
+
else:
|
|
152
|
+
# General integers or negative minimum
|
|
153
|
+
return 'int32' # Conservative default
|
|
154
|
+
|
|
155
|
+
# For number type, check for decimal indicators
|
|
156
|
+
elif schema.get('type') == 'number':
|
|
157
|
+
if 'multipleOf' in schema:
|
|
158
|
+
multiple_of = schema['multipleOf']
|
|
159
|
+
if isinstance(multiple_of, float) or '.' in str(multiple_of):
|
|
160
|
+
return 'decimal'
|
|
161
|
+
|
|
162
|
+
# Check for precision/scale hints in description or custom properties
|
|
163
|
+
if 'precision' in schema or 'scale' in schema:
|
|
164
|
+
return 'decimal'
|
|
165
|
+
|
|
166
|
+
return 'double' # Default for floating point
|
|
167
|
+
|
|
168
|
+
return 'double' # Default fallback
|
|
169
|
+
|
|
170
|
+
def detect_temporal_type(self, schema: dict) -> str:
|
|
171
|
+
"""
|
|
172
|
+
Detect temporal types based on format.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
schema (dict): The JSON schema object
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
str: The appropriate JSON Structure temporal type
|
|
179
|
+
"""
|
|
180
|
+
format_hint = schema.get('format')
|
|
181
|
+
if format_hint:
|
|
182
|
+
temporal_mapping = {
|
|
183
|
+
'date': 'date',
|
|
184
|
+
'time': 'time',
|
|
185
|
+
'date-time': 'datetime',
|
|
186
|
+
'duration': 'duration'
|
|
187
|
+
}
|
|
188
|
+
return temporal_mapping.get(format_hint, 'string')
|
|
189
|
+
return 'string'
|
|
190
|
+
|
|
191
|
+
def detect_collection_type(self, schema: dict) -> str:
|
|
192
|
+
"""
|
|
193
|
+
Determine if array should be 'set' based on uniqueItems.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
schema (dict): The JSON schema array object
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
str: Either 'set' or array (for list) """
|
|
200
|
+
if schema.get('type') == 'array' and schema.get('uniqueItems', False):
|
|
201
|
+
return 'set'
|
|
202
|
+
return 'array'
|
|
203
|
+
|
|
204
|
+
def should_convert_to_map(self, json_object: dict) -> bool:
|
|
205
|
+
"""
|
|
206
|
+
Determine if object should be converted to map type.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
json_object (dict): The JSON schema object
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
bool: True if should be converted to map
|
|
213
|
+
"""
|
|
214
|
+
if not self.convert_empty_objects_to_maps:
|
|
215
|
+
return False
|
|
216
|
+
|
|
217
|
+
# Convert if object has only additionalProperties and no properties
|
|
218
|
+
if ('additionalProperties' in json_object and
|
|
219
|
+
(not 'properties' in json_object or len(json_object['properties']) == 0) and
|
|
220
|
+
(not 'patternProperties' in json_object or len(json_object['patternProperties']) == 0)):
|
|
221
|
+
return True
|
|
222
|
+
|
|
223
|
+
# Convert if object has only patternProperties and no properties or additionalProperties
|
|
224
|
+
if ('patternProperties' in json_object and
|
|
225
|
+
(not 'properties' in json_object or len(json_object['properties']) == 0) and
|
|
226
|
+
'additionalProperties' not in json_object):
|
|
227
|
+
return True
|
|
228
|
+
|
|
229
|
+
return False
|
|
230
|
+
|
|
231
|
+
def detect_discriminator_pattern(self, schema: dict) -> dict | None:
|
|
232
|
+
"""
|
|
233
|
+
Detect OpenAPI discriminator patterns for choice type.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
schema (dict): The JSON schema object
|
|
237
|
+
|
|
238
|
+
Returns:
|
|
239
|
+
dict | None: Discriminator info if detected, None otherwise
|
|
240
|
+
"""
|
|
241
|
+
if not self.detect_discriminators:
|
|
242
|
+
return None
|
|
243
|
+
|
|
244
|
+
# Check for OpenAPI discriminator
|
|
245
|
+
if 'discriminator' in schema:
|
|
246
|
+
discriminator = schema['discriminator']
|
|
247
|
+
if isinstance(discriminator, dict) and 'propertyName' in discriminator:
|
|
248
|
+
return {
|
|
249
|
+
'propertyName': discriminator['propertyName'],
|
|
250
|
+
'mapping': discriminator.get('mapping', {})
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
# Check for oneOf with discriminator-like pattern
|
|
256
|
+
if 'oneOf' in schema:
|
|
257
|
+
# Look for common property across all oneOf options that could be a discriminator
|
|
258
|
+
oneof_options = schema['oneOf']
|
|
259
|
+
if len(oneof_options) > 1:
|
|
260
|
+
common_props = None
|
|
261
|
+
for option in oneof_options:
|
|
262
|
+
if '$ref' in option:
|
|
263
|
+
continue # Skip refs for now
|
|
264
|
+
if 'properties' in option:
|
|
265
|
+
props = set(option['properties'].keys())
|
|
266
|
+
if common_props is None:
|
|
267
|
+
common_props = props
|
|
268
|
+
else:
|
|
269
|
+
common_props = common_props.intersection(props)
|
|
270
|
+
|
|
271
|
+
# If there's exactly one common property, it might be a discriminator
|
|
272
|
+
if common_props and len(common_props) == 1:
|
|
273
|
+
prop_name = list(common_props)[0]
|
|
274
|
+
return {
|
|
275
|
+
'property': prop_name,
|
|
276
|
+
'mapping': {} # Would need more analysis to populate
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
return None
|
|
280
|
+
|
|
281
|
+
def detect_inheritance_pattern(self, schema: dict, type_name: str = '') -> dict | None:
|
|
282
|
+
"""
|
|
283
|
+
Detect simple inheritance patterns in allOf schemas.
|
|
284
|
+
|
|
285
|
+
Only detects patterns with exactly 2 items where one is a $ref and the other
|
|
286
|
+
contains properties/required/other object schema keywords.
|
|
287
|
+
Excludes self-referential patterns.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
schema (dict): The JSON schema object
|
|
291
|
+
type_name (str): The name of the current type (to detect self-references)
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
dict | None: Inheritance info if detected, None otherwise
|
|
295
|
+
"""
|
|
296
|
+
if not self.detect_inheritance or 'allOf' not in schema:
|
|
297
|
+
return None
|
|
298
|
+
|
|
299
|
+
allof_items = schema['allOf']
|
|
300
|
+
|
|
301
|
+
# Only handle simple 2-item inheritance patterns
|
|
302
|
+
if len(allof_items) != 2:
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
# Look for pattern: [{"$ref": "..."}, {"properties": {...}}] or similar
|
|
306
|
+
ref_item = None
|
|
307
|
+
extension_item = None
|
|
308
|
+
|
|
309
|
+
for item in allof_items:
|
|
310
|
+
if '$ref' in item and len(item) == 1: # Pure reference, no other properties
|
|
311
|
+
ref_item = item
|
|
312
|
+
elif ('type' in item or 'properties' in item or 'required' in item or
|
|
313
|
+
'additionalProperties' in item) and '$ref' not in item: # Pure extension, no ref
|
|
314
|
+
extension_item = item
|
|
315
|
+
|
|
316
|
+
# Only return inheritance info for simple base + extension pattern
|
|
317
|
+
if ref_item and extension_item:
|
|
318
|
+
base_ref = ref_item['$ref']
|
|
319
|
+
|
|
320
|
+
# Check for self-referential patterns
|
|
321
|
+
if base_ref.startswith('#/definitions/'):
|
|
322
|
+
ref_type_name = base_ref[14:] # Remove '#/definitions/'
|
|
323
|
+
if ref_type_name == type_name:
|
|
324
|
+
# Self-referential pattern - don't convert to inheritance
|
|
325
|
+
return None
|
|
326
|
+
|
|
327
|
+
return {
|
|
328
|
+
'base_ref': base_ref,
|
|
329
|
+
'extension': extension_item
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
return None
|
|
333
|
+
|
|
334
|
+
def json_schema_primitive_to_structure_type(self, json_primitive: Optional[str | list], format: Optional[str], enum: Optional[list], record_name: str, field_name: str, namespace: str, dependencies: list, schema: dict) -> str | dict[str, Any] | list:
|
|
335
|
+
"""
|
|
336
|
+
Convert a JSON Schema primitive type to JSON Structure primitive type.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
json_primitive (str | list): The JSON Schema primitive type to be converted.
|
|
340
|
+
format (str | None): The format of the JSON primitive type, if applicable.
|
|
341
|
+
enum (list | None): The list of enum values, if applicable.
|
|
342
|
+
record_name (str): The name of the record.
|
|
343
|
+
field_name (str): The name of the field.
|
|
344
|
+
namespace (str): The namespace of the type.
|
|
345
|
+
dependencies (list): The list of dependencies.
|
|
346
|
+
schema (dict): The complete schema object for analysis. Returns:
|
|
347
|
+
str | dict[str,Any] | list: The converted JSON Structure primitive type. """
|
|
348
|
+
|
|
349
|
+
if isinstance(json_primitive, list):
|
|
350
|
+
if enum:
|
|
351
|
+
# Handle enum with multiple types (convert to string enum)
|
|
352
|
+
return {
|
|
353
|
+
'type': 'string',
|
|
354
|
+
'enum': list(enum)
|
|
355
|
+
}
|
|
356
|
+
else:
|
|
357
|
+
# Handle union types
|
|
358
|
+
union_types = []
|
|
359
|
+
for item in json_primitive:
|
|
360
|
+
if isinstance(item, str):
|
|
361
|
+
converted = self.json_schema_primitive_to_structure_type(
|
|
362
|
+
item, format, None, record_name, field_name, namespace, dependencies, schema)
|
|
363
|
+
union_types.append(converted)
|
|
364
|
+
elif isinstance(item, dict):
|
|
365
|
+
item_format = item.get('format', format)
|
|
366
|
+
item_enum = item.get('enum', enum)
|
|
367
|
+
item_type = item.get('type', item)
|
|
368
|
+
converted = self.json_schema_primitive_to_structure_type(
|
|
369
|
+
item_type, item_format, item_enum, record_name, field_name, namespace, dependencies, item)
|
|
370
|
+
union_types.append(converted)
|
|
371
|
+
# Always wrap as {"type": [ ... ]} for unions
|
|
372
|
+
return {"type": self.flatten_union(union_types, None, field_name)}
|
|
373
|
+
# ...existing code...
|
|
374
|
+
structure_type = None
|
|
375
|
+
|
|
376
|
+
if json_primitive == 'string':
|
|
377
|
+
if format:
|
|
378
|
+
if format in ('date', 'time', 'date-time', 'duration'):
|
|
379
|
+
structure_type = self.detect_temporal_type({'type': 'string', 'format': format})
|
|
380
|
+
elif format == 'uuid':
|
|
381
|
+
structure_type = 'uuid'
|
|
382
|
+
elif format == 'byte':
|
|
383
|
+
structure_type = 'string' # Map bytes to string in JSON Structure elif format == 'binary':
|
|
384
|
+
structure_type = 'string' # Map binary to string in JSON Structure
|
|
385
|
+
else:
|
|
386
|
+
structure_type = 'string'
|
|
387
|
+
else:
|
|
388
|
+
structure_type = 'string'
|
|
389
|
+
|
|
390
|
+
elif json_primitive == 'integer':
|
|
391
|
+
structure_type = self.detect_numeric_type({'type': 'integer', 'format': format, **schema})
|
|
392
|
+
|
|
393
|
+
elif json_primitive == 'number':
|
|
394
|
+
structure_type = self.detect_numeric_type({'type': 'number', 'format': format, **schema})
|
|
395
|
+
|
|
396
|
+
elif json_primitive == 'boolean':
|
|
397
|
+
structure_type = 'boolean'
|
|
398
|
+
|
|
399
|
+
elif json_primitive == 'null':
|
|
400
|
+
structure_type = 'null'
|
|
401
|
+
|
|
402
|
+
else:
|
|
403
|
+
# Handle case where type is not specified but enum is present
|
|
404
|
+
if json_primitive is None and enum is not None:
|
|
405
|
+
# Default to string type for enums without explicit type
|
|
406
|
+
structure_type = 'string'
|
|
407
|
+
else: # Unknown type, keep as string reference
|
|
408
|
+
if isinstance(json_primitive, str):
|
|
409
|
+
dependencies.append(json_primitive)
|
|
410
|
+
structure_type = json_primitive or 'string' # Ensure we never return None # Always return proper schema objects, not simple strings
|
|
411
|
+
if isinstance(structure_type, str):
|
|
412
|
+
result: dict[str, Any] = {'type': structure_type}
|
|
413
|
+
|
|
414
|
+
# Ensure map and set types are complete
|
|
415
|
+
if structure_type == 'map':
|
|
416
|
+
result['values'] = {'type': 'any'} # Default values type per user instruction
|
|
417
|
+
elif structure_type == 'set':
|
|
418
|
+
result['items'] = {'type': 'any'} # Default items type per user instruction
|
|
419
|
+
|
|
420
|
+
# Handle enums
|
|
421
|
+
if enum is not None:
|
|
422
|
+
result['enum'] = list(enum)
|
|
423
|
+
|
|
424
|
+
# Add constraints for string types
|
|
425
|
+
if structure_type == 'string' and isinstance(schema, dict):
|
|
426
|
+
if 'maxLength' in schema:
|
|
427
|
+
result['maxLength'] = schema['maxLength']
|
|
428
|
+
if 'minLength' in schema:
|
|
429
|
+
result['minLength'] = schema['minLength']
|
|
430
|
+
if 'pattern' in schema:
|
|
431
|
+
result['pattern'] = schema['pattern']
|
|
432
|
+
|
|
433
|
+
# Add precision/scale for decimal types
|
|
434
|
+
elif structure_type == 'decimal' and isinstance(schema, dict):
|
|
435
|
+
if 'multipleOf' in schema:
|
|
436
|
+
# Try to infer precision/scale from multipleOf
|
|
437
|
+
multiple_str = str(schema['multipleOf'])
|
|
438
|
+
if '.' in multiple_str:
|
|
439
|
+
scale = len(multiple_str.split('.')[1])
|
|
440
|
+
result['scale'] = str(scale)
|
|
441
|
+
|
|
442
|
+
return result
|
|
443
|
+
|
|
444
|
+
# If already a dict or other complex type, return as-is
|
|
445
|
+
return structure_type
|
|
446
|
+
|
|
447
|
+
def _hoist_definition(self, schema, structure_schema, name_hint):
|
|
448
|
+
"""
|
|
449
|
+
Hoist a compound schema to the top-level definitions and return a $ref.
|
|
450
|
+
"""
|
|
451
|
+
if 'definitions' not in structure_schema:
|
|
452
|
+
structure_schema['definitions'] = {}
|
|
453
|
+
# Generate a unique name
|
|
454
|
+
base = avro_name(name_hint or 'UnionType')
|
|
455
|
+
idx = 1
|
|
456
|
+
name = base
|
|
457
|
+
while name in structure_schema['definitions']:
|
|
458
|
+
idx += 1
|
|
459
|
+
name = f"{base}{idx}"
|
|
460
|
+
schema = dict(schema) # Copy
|
|
461
|
+
schema['name'] = name
|
|
462
|
+
structure_schema['definitions'][name] = schema
|
|
463
|
+
return {'$ref': f"#/definitions/{name}"}
|
|
464
|
+
|
|
465
|
+
def _ensure_schema_object(self, value, structure_schema=None, name_hint=None, force_hoist_in_union=False):
|
|
466
|
+
"""
|
|
467
|
+
Ensure that a value is wrapped as a proper JSON Structure schema object.
|
|
468
|
+
|
|
469
|
+
Args:
|
|
470
|
+
value: The value to wrap
|
|
471
|
+
structure_schema: The structure schema for hoisting definitions
|
|
472
|
+
name_hint: Hint for naming hoisted definitions
|
|
473
|
+
force_hoist_in_union: Whether to hoist complex types in union contexts
|
|
474
|
+
|
|
475
|
+
Returns:
|
|
476
|
+
A proper schema object with type or $ref
|
|
477
|
+
""" # Always return a schema object (dict with at least 'type' or '$ref') for use in properties/items/values
|
|
478
|
+
if isinstance(value, dict):
|
|
479
|
+
# Special handling for $ref
|
|
480
|
+
if '$ref' in value and len(value) == 1: # Pure $ref reference
|
|
481
|
+
if force_hoist_in_union:
|
|
482
|
+
# In union contexts, $ref should be returned as-is (not wrapped in type)
|
|
483
|
+
return value
|
|
484
|
+
else:
|
|
485
|
+
# In property contexts, wrap in type field for JSON Structure compliance
|
|
486
|
+
return {'type': value}
|
|
487
|
+
|
|
488
|
+
# Check if this dict has composition keywords - preserve as-is for JSON Structure conditional composition
|
|
489
|
+
if any(key in value for key in ['anyOf', 'oneOf', 'allOf']):
|
|
490
|
+
return value
|
|
491
|
+
|
|
492
|
+
# Ensure map types have values and set types have items
|
|
493
|
+
if value.get('type') == 'map' and 'values' not in value:
|
|
494
|
+
value = dict(value) # Create a copy to avoid modifying original
|
|
495
|
+
value['values'] = {'type': 'any'} # Default to 'any' as per user instruction
|
|
496
|
+
elif value.get('type') == 'set' and 'items' not in value:
|
|
497
|
+
value = dict(value) # Create a copy to avoid modifying original
|
|
498
|
+
value['items'] = {'type': 'any'} # Default to 'any' as per user instruction
|
|
499
|
+
# If force_hoist_in_union, check if this is a simple primitive type that should be extracted
|
|
500
|
+
if force_hoist_in_union and ('$ref' not in value):
|
|
501
|
+
# Check if this is a simple primitive type like {"type": "int32"}
|
|
502
|
+
if (len(value) == 1 and 'type' in value and
|
|
503
|
+
value['type'] in ['string', 'boolean', 'integer', 'number', 'null', 'int32', 'int64', 'float', 'double', 'decimal', 'uuid', 'date', 'time', 'datetime', 'duration', 'bytes']):
|
|
504
|
+
# Return the primitive type string directly for JSON Structure compliance
|
|
505
|
+
return value['type']
|
|
506
|
+
elif structure_schema is not None:
|
|
507
|
+
# For complex types, hoist to definitions
|
|
508
|
+
return self._hoist_definition(value, structure_schema, name_hint or 'UnionType')
|
|
509
|
+
return value
|
|
510
|
+
elif isinstance(value, str):
|
|
511
|
+
# Handle special cases where string primitives represent incomplete complex types
|
|
512
|
+
if value == 'map':
|
|
513
|
+
# Convert incomplete map type to complete structure
|
|
514
|
+
schema_obj = {'type': 'map', 'values': {'type': 'any'}}
|
|
515
|
+
elif value == 'set':
|
|
516
|
+
# Convert incomplete set type to complete structure
|
|
517
|
+
schema_obj = {'type': 'set', 'items': {'type': 'any'}}
|
|
518
|
+
else:
|
|
519
|
+
schema_obj = {'type': value}
|
|
520
|
+
|
|
521
|
+
# For JSON Structure unions, primitive types should be direct type strings, not hoisted
|
|
522
|
+
if force_hoist_in_union:
|
|
523
|
+
# Return the primitive type string directly for JSON Structure compliance
|
|
524
|
+
# But only for actual primitives, not for complex types like map/set
|
|
525
|
+
if value in ['string', 'boolean', 'integer', 'number', 'null', 'int32', 'int64', 'float', 'double', 'decimal', 'uuid', 'date', 'time', 'datetime', 'duration', 'bytes']:
|
|
526
|
+
return value
|
|
527
|
+
else:
|
|
528
|
+
# For complex types like map/set, return the complete schema object
|
|
529
|
+
return schema_obj
|
|
530
|
+
return schema_obj
|
|
531
|
+
elif isinstance(value, list):
|
|
532
|
+
# For unions, process each type appropriately
|
|
533
|
+
result = []
|
|
534
|
+
for idx, v in enumerate(value):
|
|
535
|
+
if isinstance(v, str):
|
|
536
|
+
# Primitive types in unions should be direct strings
|
|
537
|
+
result.append(v)
|
|
538
|
+
else:
|
|
539
|
+
# Complex types should be hoisted to definitions and referenced via $ref
|
|
540
|
+
obj = self._ensure_schema_object(v, structure_schema, f"{name_hint}_option_{idx}" if name_hint else None, force_hoist_in_union=True)
|
|
541
|
+
result.append(obj)
|
|
542
|
+
return {"type": result}
|
|
543
|
+
else:
|
|
544
|
+
return {'type': 'string'}
|
|
545
|
+
|
|
546
|
+
def _scan_for_uses(self, structure_schema: dict) -> list:
|
|
547
|
+
"""
|
|
548
|
+
Scan the structure schema for extension feature usage and return the list of required $uses.
|
|
549
|
+
"""
|
|
550
|
+
uses = set()
|
|
551
|
+
def scan(obj):
|
|
552
|
+
if isinstance(obj, dict):
|
|
553
|
+
for k, v in obj.items():
|
|
554
|
+
if k == 'altnames':
|
|
555
|
+
uses.add('JSONStructureAlternateNames')
|
|
556
|
+
if k in {'unit', 'currency', 'symbol'}:
|
|
557
|
+
uses.add('JSONStructureUnits')
|
|
558
|
+
if k in {'pattern', 'minLength', 'maxLength', 'minimum', 'maximum', 'exclusiveMinimum', 'exclusiveMaximum', 'multipleOf', 'const', 'enum', 'required', 'propertyNames', 'keyNames'}:
|
|
559
|
+
uses.add('JSONStructureValidation')
|
|
560
|
+
if k in {'if', 'then', 'else', 'dependentRequired', 'dependentSchemas', 'anyOf', 'allOf', 'oneOf', 'not'}:
|
|
561
|
+
uses.add('JSONStructureConditionalComposition')
|
|
562
|
+
scan(v)
|
|
563
|
+
elif isinstance(obj, list):
|
|
564
|
+
for item in obj:
|
|
565
|
+
scan(item)
|
|
566
|
+
scan(structure_schema)
|
|
567
|
+
return sorted(uses)
|
|
568
|
+
|
|
569
|
+
def _ensure_validation_extension_in_structure_schema(self, structure_schema) -> None:
|
|
570
|
+
"""
|
|
571
|
+
Ensure that the JSONStructureValidation extension is included in the $uses array.
|
|
572
|
+
This is handled automatically by the _scan_for_uses method when propertyNames or keyNames are detected.
|
|
573
|
+
|
|
574
|
+
Args:
|
|
575
|
+
structure_schema: The structure schema to update (dict or list)
|
|
576
|
+
"""
|
|
577
|
+
# No action needed - the _scan_for_uses method automatically detects
|
|
578
|
+
# propertyNames and keyNames and adds JSONStructureValidation to $uses
|
|
579
|
+
pass
|
|
580
|
+
|
|
581
|
+
def create_structure_object(self, properties: dict, required: list, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1, original_schema: dict | None = None) -> dict:
|
|
582
|
+
"""
|
|
583
|
+
Create a JSON Structure object type from properties.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
properties (dict): The properties of the object
|
|
587
|
+
required (list): List of required property names
|
|
588
|
+
record_name (str): Name of the record
|
|
589
|
+
namespace (str): Namespace
|
|
590
|
+
dependencies (list): Dependencies list
|
|
591
|
+
json_schema (dict): The full JSON schema
|
|
592
|
+
base_uri (str): Base URI
|
|
593
|
+
structure_schema (list): Structure schema list
|
|
594
|
+
record_stack (list): Record stack for recursion detection
|
|
595
|
+
recursion_depth (int): Current recursion depth
|
|
596
|
+
original_schema (dict): The original JSON schema object containing additionalProperties
|
|
597
|
+
|
|
598
|
+
Returns:
|
|
599
|
+
dict: JSON Structure object definition """ # Create the basic structure object
|
|
600
|
+
structure_obj = {
|
|
601
|
+
'type': 'object'
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
# Add required field if it's not empty
|
|
605
|
+
if required:
|
|
606
|
+
structure_obj['required'] = required
|
|
607
|
+
|
|
608
|
+
# Add name if provided
|
|
609
|
+
if record_name:
|
|
610
|
+
structure_obj['name'] = avro_name(record_name)
|
|
611
|
+
|
|
612
|
+
# Initialize properties dict only if we have properties to add
|
|
613
|
+
has_properties = bool(properties)
|
|
614
|
+
if has_properties:
|
|
615
|
+
structure_obj['properties'] = {}
|
|
616
|
+
|
|
617
|
+
# Process regular properties
|
|
618
|
+
for prop_name, prop_schema in properties.items():
|
|
619
|
+
prop_type = self.json_type_to_structure_type(
|
|
620
|
+
prop_schema, record_name, prop_name, namespace, dependencies,
|
|
621
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
622
|
+
)
|
|
623
|
+
# Normalize property name if needed
|
|
624
|
+
if not self.is_valid_identifier(prop_name):
|
|
625
|
+
normalized_name = self.normalize_identifier(prop_name)
|
|
626
|
+
prop_entry = self._ensure_schema_object(prop_type, structure_schema, normalized_name)
|
|
627
|
+
# Always create a new dict to add altnames
|
|
628
|
+
new_entry = {}
|
|
629
|
+
if isinstance(prop_entry, dict):
|
|
630
|
+
new_entry.update(prop_entry)
|
|
631
|
+
else:
|
|
632
|
+
new_entry['type'] = prop_entry
|
|
633
|
+
new_entry['altnames'] = {'json': prop_name}
|
|
634
|
+
structure_obj['properties'][normalized_name] = new_entry
|
|
635
|
+
else:
|
|
636
|
+
structure_obj['properties'][prop_name] = self._ensure_schema_object(prop_type, structure_schema, prop_name) # Handle patternProperties and additionalProperties
|
|
637
|
+
has_additional_schema = False
|
|
638
|
+
if original_schema:
|
|
639
|
+
# Check for patternProperties that coexist with properties/additionalProperties
|
|
640
|
+
pattern_properties = original_schema.get('patternProperties')
|
|
641
|
+
additional_props = original_schema.get('additionalProperties')
|
|
642
|
+
# Special case: multiple patternProperties with no properties
|
|
643
|
+
# Should create a type union of maps, not a single object with anyOf
|
|
644
|
+
# This applies whether additionalProperties is false OR a schema
|
|
645
|
+
if (pattern_properties and len(pattern_properties) > 1 and
|
|
646
|
+
(not 'properties' in original_schema or not original_schema['properties'])):
|
|
647
|
+
# Return type union of maps instead of object
|
|
648
|
+
return self.create_pattern_union_maps(
|
|
649
|
+
pattern_properties, additional_props, record_name, namespace, dependencies,
|
|
650
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
# Merge patternProperties into additionalProperties if both exist
|
|
654
|
+
if pattern_properties and ('properties' in original_schema or additional_props is not None):
|
|
655
|
+
# patternProperties coexists with properties/additionalProperties - merge into additionalProperties # Get the pattern schema for values (merge all pattern schemas)
|
|
656
|
+
if len(pattern_properties) == 1:
|
|
657
|
+
pattern_schema = list(pattern_properties.values())[0]
|
|
658
|
+
else:
|
|
659
|
+
# Multiple patterns - create a union type instead of anyOf
|
|
660
|
+
schemas = list(pattern_properties.values())
|
|
661
|
+
# Convert each schema and create a proper union
|
|
662
|
+
converted_schemas = []
|
|
663
|
+
for idx, schema in enumerate(schemas):
|
|
664
|
+
converted_schema = self.json_type_to_structure_type(
|
|
665
|
+
schema, record_name, f'pattern_{idx}', namespace, dependencies,
|
|
666
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
667
|
+
)
|
|
668
|
+
converted_schemas.append(converted_schema)
|
|
669
|
+
|
|
670
|
+
if len(converted_schemas) == 1:
|
|
671
|
+
pattern_schema = converted_schemas[0]
|
|
672
|
+
else:
|
|
673
|
+
# Create union type array - hoist compound types if needed
|
|
674
|
+
hoisted_schemas = []
|
|
675
|
+
for idx, schema in enumerate(converted_schemas):
|
|
676
|
+
hoisted_schema = self._ensure_schema_object(schema, structure_schema, f'pattern_{idx}', force_hoist_in_union=True)
|
|
677
|
+
hoisted_schemas.append(hoisted_schema)
|
|
678
|
+
pattern_schema = {'type': hoisted_schemas}
|
|
679
|
+
|
|
680
|
+
if additional_props is False:
|
|
681
|
+
# Override false additionalProperties with pattern schema
|
|
682
|
+
merged_additional = pattern_schema
|
|
683
|
+
elif additional_props is True:
|
|
684
|
+
# Keep true (allow any additional properties)
|
|
685
|
+
merged_additional = True
|
|
686
|
+
elif isinstance(additional_props, dict):
|
|
687
|
+
# Merge both schemas using a union type instead of anyOf
|
|
688
|
+
additional_converted = self.json_type_to_structure_type(
|
|
689
|
+
additional_props, record_name, 'additional', namespace, dependencies,
|
|
690
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
# Create union of additional props and pattern schema
|
|
694
|
+
additional_hoisted = self._ensure_schema_object(additional_converted, structure_schema, 'additional', force_hoist_in_union=True)
|
|
695
|
+
pattern_hoisted = self._ensure_schema_object(pattern_schema, structure_schema, 'pattern', force_hoist_in_union=True)
|
|
696
|
+
|
|
697
|
+
merged_additional = {
|
|
698
|
+
'type': [additional_hoisted, pattern_hoisted]
|
|
699
|
+
}
|
|
700
|
+
elif additional_props is None:
|
|
701
|
+
# No additionalProperties, use pattern schema
|
|
702
|
+
merged_additional = pattern_schema
|
|
703
|
+
else:
|
|
704
|
+
merged_additional = pattern_schema
|
|
705
|
+
# Convert merged schema to structure type
|
|
706
|
+
if merged_additional is not True and isinstance(merged_additional, dict):
|
|
707
|
+
additional_type = self.json_type_to_structure_type(
|
|
708
|
+
merged_additional, record_name, 'additionalProperty', namespace, dependencies,
|
|
709
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
710
|
+
)
|
|
711
|
+
structure_obj['additionalProperties'] = self._ensure_schema_object(additional_type, structure_schema, 'additionalProperty', force_hoist_in_union=True)
|
|
712
|
+
has_additional_schema = True
|
|
713
|
+
elif merged_additional is True:
|
|
714
|
+
structure_obj['additionalProperties'] = True
|
|
715
|
+
has_additional_schema = True
|
|
716
|
+
# Add propertyNames validation for the patterns
|
|
717
|
+
patterns = list(pattern_properties.keys())
|
|
718
|
+
if len(patterns) == 1:
|
|
719
|
+
# Single pattern - use it directly
|
|
720
|
+
pattern = patterns[0]
|
|
721
|
+
structure_obj['propertyNames'] = {
|
|
722
|
+
"type": "string",
|
|
723
|
+
"pattern": pattern
|
|
724
|
+
}
|
|
725
|
+
else:
|
|
726
|
+
# Multiple patterns - in JSON Structure, we cannot use anyOf for propertyNames
|
|
727
|
+
# Skip propertyNames validation when there are multiple patterns
|
|
728
|
+
# The patterns are already handled via the merged additionalProperties schema
|
|
729
|
+
pass
|
|
730
|
+
|
|
731
|
+
# Ensure $uses includes JSONStructureValidation
|
|
732
|
+
self._ensure_validation_extension_in_structure_schema(structure_schema)
|
|
733
|
+
|
|
734
|
+
elif additional_props is not None and additional_props is not False:
|
|
735
|
+
# Handle additionalProperties without patternProperties
|
|
736
|
+
if isinstance(additional_props, dict):
|
|
737
|
+
# Convert the additionalProperties schema to JSON Structure type
|
|
738
|
+
additional_type = self.json_type_to_structure_type(
|
|
739
|
+
additional_props, record_name, 'additionalProperty', namespace, dependencies,
|
|
740
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
741
|
+
)
|
|
742
|
+
structure_obj['additionalProperties'] = self._ensure_schema_object(additional_type, structure_schema, 'additionalProperty')
|
|
743
|
+
has_additional_schema = True
|
|
744
|
+
elif additional_props is True:
|
|
745
|
+
# True means any additional properties are allowed with any type
|
|
746
|
+
structure_obj['additionalProperties'] = True
|
|
747
|
+
has_additional_schema = True
|
|
748
|
+
|
|
749
|
+
# For JSON Structure compliance: If we have no properties and no additionalProperties/extension,
|
|
750
|
+
# add a default additionalProperties to make the object schema valid
|
|
751
|
+
if not has_properties and not has_additional_schema and '$extends' not in structure_obj:
|
|
752
|
+
# Add default additionalProperties to make the object valid per JSON Structure spec
|
|
753
|
+
structure_obj['additionalProperties'] = True
|
|
754
|
+
|
|
755
|
+
return structure_obj
|
|
756
|
+
|
|
757
|
+
def create_structure_choice(self, discriminator_info: dict, oneof_options: list, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
|
|
758
|
+
"""
|
|
759
|
+
Create a JSON Structure choice type from discriminator pattern.
|
|
760
|
+
|
|
761
|
+
Args:
|
|
762
|
+
discriminator_info (dict): Discriminator information
|
|
763
|
+
oneof_options (list): List of oneOf options
|
|
764
|
+
record_name (str): Name of the record
|
|
765
|
+
namespace (str): Namespace
|
|
766
|
+
dependencies (list): Dependencies list
|
|
767
|
+
json_schema (dict): The full JSON schema
|
|
768
|
+
base_uri (str): Base URI
|
|
769
|
+
structure_schema (list): Structure schema list
|
|
770
|
+
record_stack (list): Record stack for recursion detection
|
|
771
|
+
recursion_depth (int): Current recursion depth
|
|
772
|
+
|
|
773
|
+
Returns:
|
|
774
|
+
dict: JSON Structure choice definition
|
|
775
|
+
"""
|
|
776
|
+
choice_obj = {
|
|
777
|
+
'type': 'choice',
|
|
778
|
+
'discriminator': discriminator_info['property'],
|
|
779
|
+
'choices': {}
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
if record_name:
|
|
783
|
+
choice_obj['name'] = avro_name(record_name)
|
|
784
|
+
|
|
785
|
+
# Process each choice option
|
|
786
|
+
for i, option in enumerate(oneof_options):
|
|
787
|
+
if '$ref' in option:
|
|
788
|
+
# Handle reference
|
|
789
|
+
choice_key = f"option_{i}" # Default key, ideally extract from mapping
|
|
790
|
+
choice_obj['choices'][choice_key] = {'$ref': option['$ref']}
|
|
791
|
+
else:
|
|
792
|
+
# Convert option to structure type
|
|
793
|
+
choice_key = f"option_{i}"
|
|
794
|
+
choice_type = self.json_type_to_structure_type(
|
|
795
|
+
option, record_name, f"choice_{i}", namespace, dependencies,
|
|
796
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
797
|
+
)
|
|
798
|
+
choice_obj['choices'][choice_key] = choice_type
|
|
799
|
+
|
|
800
|
+
return choice_obj
|
|
801
|
+
|
|
802
|
+
def create_structure_map(self, values_schema: dict, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
|
|
803
|
+
"""
|
|
804
|
+
Create a JSON Structure map type.
|
|
805
|
+
|
|
806
|
+
Args:
|
|
807
|
+
values_schema (dict): Schema for map values
|
|
808
|
+
record_name (str): Name of the record
|
|
809
|
+
namespace (str): Namespace
|
|
810
|
+
dependencies (list): Dependencies list
|
|
811
|
+
json_schema (dict): The full JSON schema
|
|
812
|
+
base_uri (str): Base URI
|
|
813
|
+
structure_schema (list): Structure schema list
|
|
814
|
+
record_stack (list): Record stack for recursion detection
|
|
815
|
+
recursion_depth (int): Current recursion depth
|
|
816
|
+
Returns:
|
|
817
|
+
dict: JSON Structure map definition
|
|
818
|
+
"""
|
|
819
|
+
values_type = self.json_type_to_structure_type(
|
|
820
|
+
values_schema, record_name, 'value', namespace, dependencies,
|
|
821
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
822
|
+
)
|
|
823
|
+
# Always wrap as schema object values_type = self._ensure_schema_object(values_type, structure_schema, 'value')
|
|
824
|
+
return {
|
|
825
|
+
'type': 'map',
|
|
826
|
+
'values': values_type
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
def create_structure_map_with_pattern(self, values_schema: dict, pattern_properties: dict, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
|
|
830
|
+
"""
|
|
831
|
+
Create a JSON Structure map type with pattern validation using keyNames.
|
|
832
|
+
|
|
833
|
+
Args:
|
|
834
|
+
values_schema (dict): Schema for map values
|
|
835
|
+
pattern_properties (dict): The patternProperties object with patterns as keys
|
|
836
|
+
record_name (str): Name of the record
|
|
837
|
+
namespace (str): Namespace
|
|
838
|
+
dependencies (list): Dependencies list
|
|
839
|
+
json_schema (dict): The full JSON schema
|
|
840
|
+
base_uri (str): Base URI
|
|
841
|
+
structure_schema (list): Structure schema list
|
|
842
|
+
record_stack (list): Record stack for recursion detection
|
|
843
|
+
recursion_depth (int): Current recursion depth
|
|
844
|
+
|
|
845
|
+
Returns:
|
|
846
|
+
dict: JSON Structure map definition with keyNames validation
|
|
847
|
+
"""
|
|
848
|
+
map_result = self.create_structure_map(
|
|
849
|
+
values_schema, record_name, namespace, dependencies,
|
|
850
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth
|
|
851
|
+
)
|
|
852
|
+
|
|
853
|
+
if pattern_properties and len(pattern_properties) > 0:
|
|
854
|
+
# Extract patterns and create keyNames validation schema
|
|
855
|
+
patterns = list(pattern_properties.keys())
|
|
856
|
+
|
|
857
|
+
if len(patterns) == 1:
|
|
858
|
+
# Single pattern - use it directly
|
|
859
|
+
pattern = patterns[0]
|
|
860
|
+
map_result['keyNames'] = {
|
|
861
|
+
"type": "string",
|
|
862
|
+
"pattern": pattern
|
|
863
|
+
}
|
|
864
|
+
else:
|
|
865
|
+
# Multiple patterns - combine with anyOf
|
|
866
|
+
pattern_schemas = []
|
|
867
|
+
for pattern in patterns:
|
|
868
|
+
pattern_schemas.append({
|
|
869
|
+
"type": "string",
|
|
870
|
+
"pattern": pattern
|
|
871
|
+
})
|
|
872
|
+
map_result['keyNames'] = {
|
|
873
|
+
"anyOf": pattern_schemas
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
# Ensure $uses includes JSONStructureValidation
|
|
877
|
+
self._ensure_validation_extension_in_structure_schema(structure_schema)
|
|
878
|
+
|
|
879
|
+
return map_result
|
|
880
|
+
|
|
881
|
+
def create_structure_array_or_set(self, items_schema: dict, is_set: bool, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
|
|
882
|
+
"""
|
|
883
|
+
Create a JSON Structure array or set type.
|
|
884
|
+
|
|
885
|
+
Args:
|
|
886
|
+
items_schema (dict): Schema for array/set items
|
|
887
|
+
is_set (bool): True for set, False for array
|
|
888
|
+
record_name (str): Name of the record
|
|
889
|
+
namespace (str): Namespace
|
|
890
|
+
dependencies (list): Dependencies list
|
|
891
|
+
json_schema (dict): The full JSON schema
|
|
892
|
+
base_uri (str): Base URI
|
|
893
|
+
structure_schema (list): Structure schema list
|
|
894
|
+
record_stack (list): Record stack for recursion detection
|
|
895
|
+
recursion_depth (int): Current recursion depth
|
|
896
|
+
|
|
897
|
+
Returns:
|
|
898
|
+
dict: JSON Structure array/set definition
|
|
899
|
+
"""
|
|
900
|
+
items_type = self.json_type_to_structure_type(
|
|
901
|
+
items_schema, record_name, 'item', namespace, dependencies,
|
|
902
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
903
|
+
)
|
|
904
|
+
# Always wrap as schema object
|
|
905
|
+
items_type = self._ensure_schema_object(items_type, structure_schema, 'item')
|
|
906
|
+
return {
|
|
907
|
+
'type': 'set' if is_set else 'array',
|
|
908
|
+
'items': items_type
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
def add_alternate_names(self, structure: dict, original_name: str) -> dict:
|
|
912
|
+
"""
|
|
913
|
+
Add alternate names for different naming conventions.
|
|
914
|
+
|
|
915
|
+
Args:
|
|
916
|
+
structure (dict): The structure definition
|
|
917
|
+
original_name (str): The original property/type name
|
|
918
|
+
|
|
919
|
+
Returns:
|
|
920
|
+
dict: Structure with altnames added
|
|
921
|
+
"""
|
|
922
|
+
if not original_name:
|
|
923
|
+
return structure
|
|
924
|
+
|
|
925
|
+
altnames = {}
|
|
926
|
+
|
|
927
|
+
# Add camelCase if original is snake_case
|
|
928
|
+
if '_' in original_name:
|
|
929
|
+
camel_case = ''.join(word.capitalize() if i > 0 else word
|
|
930
|
+
for i, word in enumerate(original_name.split('_')))
|
|
931
|
+
altnames['camelCase'] = camel_case
|
|
932
|
+
# Add snake_case if original is camelCase
|
|
933
|
+
elif any(c.isupper() for c in original_name):
|
|
934
|
+
snake_case = re.sub('([a-z0-9])([A-Z])', r'\1_\2', original_name).lower()
|
|
935
|
+
altnames['snake_case'] = snake_case
|
|
936
|
+
|
|
937
|
+
if altnames:
|
|
938
|
+
if not isinstance(structure, dict):
|
|
939
|
+
structure = {'type': structure}
|
|
940
|
+
structure['altnames'] = altnames
|
|
941
|
+
|
|
942
|
+
return structure
|
|
943
|
+
|
|
944
|
+
def add_validation_constraints(self, structure: dict, schema: dict) -> dict:
|
|
945
|
+
"""
|
|
946
|
+
Convert JSON Schema validation constraints to JSON Structure format.
|
|
947
|
+
|
|
948
|
+
Args:
|
|
949
|
+
structure (dict): The structure definition
|
|
950
|
+
schema (dict): The original JSON schema
|
|
951
|
+
|
|
952
|
+
Returns:
|
|
953
|
+
dict: Structure with validation constraints added
|
|
954
|
+
"""
|
|
955
|
+
if not isinstance(structure, dict):
|
|
956
|
+
structure = {'type': structure}
|
|
957
|
+
|
|
958
|
+
# Copy validation constraints
|
|
959
|
+
validation_keys = [
|
|
960
|
+
'minimum', 'maximum', 'exclusiveMinimum', 'exclusiveMaximum',
|
|
961
|
+
'minLength', 'maxLength', 'pattern', 'minItems', 'maxItems',
|
|
962
|
+
'const', 'enum'
|
|
963
|
+
]
|
|
964
|
+
|
|
965
|
+
# Check if this is an int64 type
|
|
966
|
+
is_int64 = structure.get('type') == 'int64'
|
|
967
|
+
|
|
968
|
+
for key in validation_keys:
|
|
969
|
+
if key in schema:
|
|
970
|
+
value = schema[key]
|
|
971
|
+
# For int64 types, convert numeric minimum/maximum values to strings
|
|
972
|
+
if is_int64 and key in ('minimum', 'maximum', 'exclusiveMinimum', 'exclusiveMaximum') and isinstance(value, (int, float)):
|
|
973
|
+
structure[key] = str(int(value))
|
|
974
|
+
else:
|
|
975
|
+
structure[key] = value
|
|
976
|
+
|
|
977
|
+
return structure
|
|
978
|
+
|
|
979
|
+
def ensure_object_compliance(self, structure: dict) -> dict:
|
|
980
|
+
"""
|
|
981
|
+
Ensure that object types comply with JSON Structure spec requirements.
|
|
982
|
+
|
|
983
|
+
Args:
|
|
984
|
+
structure (dict): The structure definition
|
|
985
|
+
|
|
986
|
+
Returns:
|
|
987
|
+
dict: Structure with JSON Structure compliance ensured
|
|
988
|
+
"""
|
|
989
|
+
if not isinstance(structure, dict) or structure.get('type') != 'object':
|
|
990
|
+
return structure
|
|
991
|
+
|
|
992
|
+
# Check if this object type needs properties to be compliant
|
|
993
|
+
has_properties = 'properties' in structure and structure['properties']
|
|
994
|
+
has_additional_props = 'additionalProperties' in structure
|
|
995
|
+
has_extensions = '$extends' in structure
|
|
996
|
+
|
|
997
|
+
# If object has empty properties and no additionalProperties or extensions,
|
|
998
|
+
# add additionalProperties: true to make it compliant
|
|
999
|
+
if 'properties' in structure and not has_properties and not has_additional_props and not has_extensions:
|
|
1000
|
+
structure['additionalProperties'] = True
|
|
1001
|
+
|
|
1002
|
+
return structure
|
|
1003
|
+
|
|
1004
|
+
def flatten_union(self, type_list: list, structure_schema=None, name_hint=None) -> list:
|
|
1005
|
+
"""
|
|
1006
|
+
Flatten the list of types in a union into a single list.
|
|
1007
|
+
|
|
1008
|
+
Args:
|
|
1009
|
+
type_list (list): The list of types in a union.
|
|
1010
|
+
|
|
1011
|
+
Returns:
|
|
1012
|
+
list: The flattened list of types.
|
|
1013
|
+
"""
|
|
1014
|
+
flat_list = []
|
|
1015
|
+
for idx, t in enumerate(type_list):
|
|
1016
|
+
if isinstance(t, list):
|
|
1017
|
+
inner = self.flatten_union(t, structure_schema, name_hint)
|
|
1018
|
+
for u in inner:
|
|
1019
|
+
obj = self._ensure_schema_object(u, structure_schema, f"{name_hint}_option_{idx}" if name_hint else None, force_hoist_in_union=True)
|
|
1020
|
+
if obj not in flat_list:
|
|
1021
|
+
flat_list.append(obj)
|
|
1022
|
+
else:
|
|
1023
|
+
# For primitive types in unions, extract the type string directly
|
|
1024
|
+
if isinstance(t, dict) and 'type' in t and t['type'] in ['string', 'boolean', 'integer', 'number', 'null'] and len(t) == 1:
|
|
1025
|
+
# This is a simple primitive type object like {"type": "boolean"} - extract the type string
|
|
1026
|
+
if t['type'] not in flat_list:
|
|
1027
|
+
flat_list.append(t['type'])
|
|
1028
|
+
else:
|
|
1029
|
+
# For complex types, use the normal processing
|
|
1030
|
+
obj = self._ensure_schema_object(t, structure_schema, f"{name_hint}_option_{idx}" if name_hint else None, force_hoist_in_union=True)
|
|
1031
|
+
if obj not in flat_list:
|
|
1032
|
+
flat_list.append(obj)
|
|
1033
|
+
return flat_list
|
|
1034
|
+
|
|
1035
|
+
def merge_structure_schemas(self, schemas: list, structure_schemas: list, type_name: str | None = None, deps: List[str] = []) -> str | list | dict:
|
|
1036
|
+
"""Merge multiple JSON Structure type schemas into one."""
|
|
1037
|
+
|
|
1038
|
+
if len(schemas) == 1:
|
|
1039
|
+
return schemas[0]
|
|
1040
|
+
|
|
1041
|
+
merged_schema: dict = {}
|
|
1042
|
+
if type_name:
|
|
1043
|
+
merged_schema['name'] = type_name
|
|
1044
|
+
|
|
1045
|
+
for schema in schemas:
|
|
1046
|
+
schema = copy.deepcopy(schema)
|
|
1047
|
+
if isinstance(schema, dict) and 'dependencies' in schema:
|
|
1048
|
+
deps1: List[str] = merged_schema.get('dependencies', [])
|
|
1049
|
+
deps1.extend(schema['dependencies'])
|
|
1050
|
+
merged_schema['dependencies'] = deps1
|
|
1051
|
+
|
|
1052
|
+
if isinstance(schema, str):
|
|
1053
|
+
# Simple type reference
|
|
1054
|
+
if 'type' not in merged_schema:
|
|
1055
|
+
merged_schema['type'] = schema
|
|
1056
|
+
elif merged_schema['type'] != schema:
|
|
1057
|
+
# Type conflict, create union
|
|
1058
|
+
if not isinstance(merged_schema['type'], list):
|
|
1059
|
+
merged_schema['type'] = [merged_schema['type']]
|
|
1060
|
+
if schema not in merged_schema['type']:
|
|
1061
|
+
merged_schema['type'].append(schema)
|
|
1062
|
+
|
|
1063
|
+
elif isinstance(schema, dict):
|
|
1064
|
+
# Merge object schemas
|
|
1065
|
+
for key, value in schema.items():
|
|
1066
|
+
if key == 'properties' and 'properties' in merged_schema:
|
|
1067
|
+
# Merge properties
|
|
1068
|
+
for prop_name, prop_schema in value.items():
|
|
1069
|
+
if prop_name in merged_schema['properties']:
|
|
1070
|
+
# Property exists, merge types
|
|
1071
|
+
existing = merged_schema['properties'][prop_name]
|
|
1072
|
+
merged_schema['properties'][prop_name] = self.merge_structure_schemas(
|
|
1073
|
+
[existing, prop_schema], structure_schemas, None, deps)
|
|
1074
|
+
else:
|
|
1075
|
+
merged_schema['properties'][prop_name] = prop_schema
|
|
1076
|
+
elif key == 'required' and 'required' in merged_schema:
|
|
1077
|
+
# Merge required arrays
|
|
1078
|
+
merged_schema['required'] = list(set(merged_schema['required'] + value))
|
|
1079
|
+
else:
|
|
1080
|
+
merged_schema[key] = value
|
|
1081
|
+
|
|
1082
|
+
return merged_schema
|
|
1083
|
+
|
|
1084
|
+
def json_type_to_structure_type(self, json_type: str | dict, record_name: str, field_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth=1) -> dict | list | str:
|
|
1085
|
+
"""Convert a JSON Schema type to JSON Structure type."""
|
|
1086
|
+
|
|
1087
|
+
try:
|
|
1088
|
+
if recursion_depth >= self.max_recursion_depth:
|
|
1089
|
+
print(f'WARNING: Maximum recursion depth reached for {record_name} at field {field_name}')
|
|
1090
|
+
return 'string' # Fallback to string instead of generic_type() structure_type: dict = {}
|
|
1091
|
+
local_name = avro_name(field_name if field_name else record_name)
|
|
1092
|
+
|
|
1093
|
+
if isinstance(json_type, str):
|
|
1094
|
+
# Simple type reference
|
|
1095
|
+
return self.json_schema_primitive_to_structure_type(
|
|
1096
|
+
json_type, None, None, record_name, field_name, namespace, dependencies, {})
|
|
1097
|
+
|
|
1098
|
+
if isinstance(json_type, dict): # Handle inheritance pattern first (only if inheritance detection is enabled)
|
|
1099
|
+
inheritance_info = self.detect_inheritance_pattern(json_type, record_name)
|
|
1100
|
+
if inheritance_info:
|
|
1101
|
+
base_ref = inheritance_info['base_ref']
|
|
1102
|
+
extension = inheritance_info['extension']
|
|
1103
|
+
# Create abstract base type name
|
|
1104
|
+
if base_ref.startswith('#/definitions/'):
|
|
1105
|
+
base_type_name = base_ref[14:] # Remove '#/definitions/'
|
|
1106
|
+
abstract_base_name = avro_name(f"{base_type_name}Base")
|
|
1107
|
+
else:
|
|
1108
|
+
# Handle external references or other formats
|
|
1109
|
+
abstract_base_name = avro_name(f"{record_name}Base")
|
|
1110
|
+
|
|
1111
|
+
# Ensure the abstract base type exists
|
|
1112
|
+
self._ensure_abstract_base_type(base_ref, abstract_base_name, structure_schema, json_schema, base_uri)
|
|
1113
|
+
|
|
1114
|
+
structure_type = {
|
|
1115
|
+
'type': 'object',
|
|
1116
|
+
'name': record_name,
|
|
1117
|
+
'$extends': f"#/definitions/{abstract_base_name}"
|
|
1118
|
+
}
|
|
1119
|
+
if 'properties' in extension and extension['properties']:
|
|
1120
|
+
structure_type['properties'] = {}
|
|
1121
|
+
for prop_name, prop_schema in extension['properties'].items():
|
|
1122
|
+
prop_type = self.json_type_to_structure_type(
|
|
1123
|
+
prop_schema, record_name, prop_name, namespace, dependencies,
|
|
1124
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
1125
|
+
)
|
|
1126
|
+
# Normalize property name if needed
|
|
1127
|
+
if not self.is_valid_identifier(prop_name):
|
|
1128
|
+
normalized_name = self.normalize_identifier(prop_name)
|
|
1129
|
+
prop_entry = self._ensure_schema_object(prop_type, structure_schema, normalized_name)
|
|
1130
|
+
# Always create a new dict to add altnames
|
|
1131
|
+
new_entry = {}
|
|
1132
|
+
if isinstance(prop_entry, dict):
|
|
1133
|
+
new_entry.update(prop_entry)
|
|
1134
|
+
else:
|
|
1135
|
+
new_entry['type'] = prop_entry
|
|
1136
|
+
new_entry['altnames'] = {'json': prop_name}
|
|
1137
|
+
structure_type['properties'][normalized_name] = new_entry
|
|
1138
|
+
else:
|
|
1139
|
+
structure_type['properties'][prop_name] = self._ensure_schema_object(prop_type, structure_schema, prop_name)
|
|
1140
|
+
|
|
1141
|
+
if 'required' in extension:
|
|
1142
|
+
structure_type['required'] = extension['required']
|
|
1143
|
+
|
|
1144
|
+
# Copy other extension properties (validation constraints, etc.)
|
|
1145
|
+
for key, value in extension.items():
|
|
1146
|
+
if key not in ['properties', 'required', 'type']:
|
|
1147
|
+
structure_type[key] = value
|
|
1148
|
+
|
|
1149
|
+
# Apply any remaining validation constraints from the original schema
|
|
1150
|
+
structure_type = self.add_validation_constraints(structure_type, json_type)
|
|
1151
|
+
|
|
1152
|
+
return structure_type
|
|
1153
|
+
|
|
1154
|
+
# Handle discriminator pattern
|
|
1155
|
+
discriminator_info = self.detect_discriminator_pattern(json_type)
|
|
1156
|
+
if discriminator_info and 'oneOf' in json_type:
|
|
1157
|
+
return self.create_structure_choice(
|
|
1158
|
+
discriminator_info, json_type['oneOf'], record_name, namespace,
|
|
1159
|
+
dependencies, json_schema, base_uri, structure_schema, record_stack, recursion_depth
|
|
1160
|
+
) # Handle $ref first (before checking for type)
|
|
1161
|
+
if '$ref' in json_type:
|
|
1162
|
+
ref = json_type['$ref']
|
|
1163
|
+
# Normalize references to use definitions instead of $defs
|
|
1164
|
+
if ref.startswith('#/$defs/'):
|
|
1165
|
+
ref = ref.replace('#/$defs/', '#/definitions/')
|
|
1166
|
+
elif ref.startswith('#/definitions/'):
|
|
1167
|
+
# Already correct format
|
|
1168
|
+
pass
|
|
1169
|
+
|
|
1170
|
+
# Handle nested JSON Pointer references like #/definitions/pipelineCommon/execution
|
|
1171
|
+
if '/' in ref.split('#/definitions/')[-1] and ref.startswith('#/definitions/'):
|
|
1172
|
+
try:
|
|
1173
|
+
# Resolve the nested JSON Pointer reference
|
|
1174
|
+
resolved_schema, _ = self.resolve_reference(json_type, base_uri, json_schema)
|
|
1175
|
+
if resolved_schema != json_type:
|
|
1176
|
+
# We successfully resolved a nested reference, process the resolved schema # Create a new definition name based on the nested path
|
|
1177
|
+
ref_parts = ref.split('/')
|
|
1178
|
+
if len(ref_parts) >= 4: # ['#', 'definitions', 'parent', 'child', ...]
|
|
1179
|
+
parent_name = ref_parts[2]
|
|
1180
|
+
child_path = '/'.join(ref_parts[3:])
|
|
1181
|
+
new_def_name = avro_name(f"{parent_name}_{child_path.replace('/', '_')}")
|
|
1182
|
+
|
|
1183
|
+
# Process the resolved schema recursively
|
|
1184
|
+
converted_schema = self.json_type_to_structure_type(
|
|
1185
|
+
resolved_schema, new_def_name, field_name, namespace, dependencies,
|
|
1186
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
1187
|
+
)
|
|
1188
|
+
|
|
1189
|
+
# Check if we need to add to definitions
|
|
1190
|
+
if isinstance(converted_schema, dict) and converted_schema.get('type') in ['object', 'array', 'choice']:
|
|
1191
|
+
# Set name for the definition and add to structure_schema definitions
|
|
1192
|
+
converted_schema['name'] = new_def_name
|
|
1193
|
+
if 'definitions' not in structure_schema:
|
|
1194
|
+
structure_schema['definitions'] = {}
|
|
1195
|
+
structure_schema['definitions'][new_def_name] = converted_schema
|
|
1196
|
+
return {'$ref': f"#/definitions/{new_def_name}"}
|
|
1197
|
+
else:
|
|
1198
|
+
# For simple types, return the converted schema directly
|
|
1199
|
+
return converted_schema
|
|
1200
|
+
except Exception as e:
|
|
1201
|
+
# If resolution fails, fall back to original reference
|
|
1202
|
+
print(f"Failed to resolve nested reference {ref}: {e}")
|
|
1203
|
+
pass
|
|
1204
|
+
|
|
1205
|
+
# Check if we need to use type registry to normalize the reference
|
|
1206
|
+
# Extract the definition name from the reference
|
|
1207
|
+
if ref.startswith('#/definitions/'):
|
|
1208
|
+
def_name = ref[14:] # Remove '#/definitions/' prefix
|
|
1209
|
+
if def_name in self.type_registry:
|
|
1210
|
+
# Use the normalized reference from the registry
|
|
1211
|
+
ref = self.type_registry[def_name]
|
|
1212
|
+
|
|
1213
|
+
return {'$ref': ref}
|
|
1214
|
+
|
|
1215
|
+
# Handle schemas without explicit type
|
|
1216
|
+
if json_type.get('type') is None:
|
|
1217
|
+
if 'enum' in json_type:
|
|
1218
|
+
# Enum-only schema - default to string type
|
|
1219
|
+
enum_values = json_type.get('enum')
|
|
1220
|
+
structure_type = self.json_schema_primitive_to_structure_type(
|
|
1221
|
+
'string', json_type.get('format'), enum_values, record_name, field_name, namespace, dependencies, json_type
|
|
1222
|
+
)
|
|
1223
|
+
if isinstance(structure_type, dict):
|
|
1224
|
+
structure_type = self.add_validation_constraints(structure_type, json_type)
|
|
1225
|
+
return structure_type
|
|
1226
|
+
elif 'properties' in json_type or 'additionalProperties' in json_type or 'patternProperties' in json_type:
|
|
1227
|
+
# Object schema without explicit type - treat as object
|
|
1228
|
+
# Apply constraint composition conversion if applicable
|
|
1229
|
+
effective_schema = self._convert_constraint_composition_to_required(json_type)
|
|
1230
|
+
properties = effective_schema.get('properties', {})
|
|
1231
|
+
required = effective_schema.get('required', [])
|
|
1232
|
+
return self.create_structure_object(
|
|
1233
|
+
properties, required, record_name, namespace, dependencies,
|
|
1234
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth, effective_schema
|
|
1235
|
+
)
|
|
1236
|
+
elif self.has_composition_keywords(json_type):
|
|
1237
|
+
# Handle composition keywords without explicit type
|
|
1238
|
+
# Continue to composition handling below instead of returning empty object
|
|
1239
|
+
pass
|
|
1240
|
+
else:
|
|
1241
|
+
# Other schema without type - default to generic object
|
|
1242
|
+
# Create a generic object that allows any properties
|
|
1243
|
+
return {
|
|
1244
|
+
'type': 'object',
|
|
1245
|
+
'properties': {} }
|
|
1246
|
+
|
|
1247
|
+
if json_type.get('type') and isinstance(json_type['type'], str):
|
|
1248
|
+
# Check if this schema also has composition keywords that should be preserved
|
|
1249
|
+
if self.preserve_composition and self.has_composition_keywords(json_type):
|
|
1250
|
+
# Skip primitive handling and continue to composition handling below
|
|
1251
|
+
pass
|
|
1252
|
+
else:
|
|
1253
|
+
format_hint = json_type.get('format')
|
|
1254
|
+
enum_values = json_type.get('enum')
|
|
1255
|
+
# Special handling for objects
|
|
1256
|
+
if json_type['type'] == 'object':
|
|
1257
|
+
# Check if should convert to map
|
|
1258
|
+
if self.should_convert_to_map(json_type):
|
|
1259
|
+
# Handle patternProperties conversion to map
|
|
1260
|
+
pattern_properties = json_type.get('patternProperties')
|
|
1261
|
+
if pattern_properties:
|
|
1262
|
+
# Get the pattern schema for values (merge all pattern schemas)
|
|
1263
|
+
if len(pattern_properties) == 1:
|
|
1264
|
+
pattern_schema = list(pattern_properties.values())[0]
|
|
1265
|
+
else:
|
|
1266
|
+
# Multiple patterns - merge schemas using anyOf
|
|
1267
|
+
schemas = list(pattern_properties.values())
|
|
1268
|
+
pattern_schema = {'anyOf': schemas}
|
|
1269
|
+
|
|
1270
|
+
# Convert patternProperties to map with keyNames validation
|
|
1271
|
+
return self.create_structure_map_with_pattern(
|
|
1272
|
+
pattern_schema, pattern_properties, record_name, namespace, dependencies,
|
|
1273
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth
|
|
1274
|
+
)
|
|
1275
|
+
else:
|
|
1276
|
+
# Handle additionalProperties conversion to map
|
|
1277
|
+
additional_props = json_type.get('additionalProperties', True)
|
|
1278
|
+
if isinstance(additional_props, dict):
|
|
1279
|
+
return self.create_structure_map(
|
|
1280
|
+
additional_props, record_name, namespace, dependencies,
|
|
1281
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth
|
|
1282
|
+
)
|
|
1283
|
+
else: return {
|
|
1284
|
+
'type': 'map',
|
|
1285
|
+
'values': {'type': 'string'} # Default for boolean additionalProperties
|
|
1286
|
+
}
|
|
1287
|
+
else:
|
|
1288
|
+
# Regular object - first check for discriminated union patterns
|
|
1289
|
+
if 'oneOf' in json_type:
|
|
1290
|
+
choice_info = self.detect_discriminated_union_pattern(json_type)
|
|
1291
|
+
if choice_info:
|
|
1292
|
+
# Convert to JSON Structure choice type
|
|
1293
|
+
choice_result = {
|
|
1294
|
+
'type': 'choice',
|
|
1295
|
+
'choices': choice_info['choices']
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
# Add selector if specified (for tagged unions)
|
|
1299
|
+
if choice_info.get('selector'):
|
|
1300
|
+
choice_result['selector'] = choice_info['selector']
|
|
1301
|
+
|
|
1302
|
+
# Add name if we have one
|
|
1303
|
+
if record_name:
|
|
1304
|
+
choice_result['name'] = avro_name(record_name)
|
|
1305
|
+
|
|
1306
|
+
return choice_result
|
|
1307
|
+
# Regular object without discriminated union
|
|
1308
|
+
# Check if this is a bare object type that should be converted to "any"
|
|
1309
|
+
if (not json_type.get('properties') and
|
|
1310
|
+
not json_type.get('additionalProperties') and
|
|
1311
|
+
not json_type.get('patternProperties') and
|
|
1312
|
+
not json_type.get('required') and
|
|
1313
|
+
not json_type.get('$extends') and
|
|
1314
|
+
not any(k in json_type for k in ['allOf', 'anyOf', 'oneOf', 'if', 'then', 'else'])):
|
|
1315
|
+
# This is a bare "type": "object" which means "any object" in JSON Schema
|
|
1316
|
+
# Convert to "any" type in JSON Structure
|
|
1317
|
+
return {'type': 'any'}
|
|
1318
|
+
|
|
1319
|
+
# Apply constraint composition conversion if applicable
|
|
1320
|
+
effective_schema = self._convert_constraint_composition_to_required(json_type)
|
|
1321
|
+
properties = effective_schema.get('properties', {})
|
|
1322
|
+
required = effective_schema.get('required', [])
|
|
1323
|
+
return self.create_structure_object(
|
|
1324
|
+
properties, required, record_name, namespace, dependencies,
|
|
1325
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth, json_type
|
|
1326
|
+
)
|
|
1327
|
+
# Special handling for arrays
|
|
1328
|
+
elif json_type['type'] == 'array':
|
|
1329
|
+
items_schema = json_type.get('items', {'type': 'string'})
|
|
1330
|
+
is_set = self.detect_collection_type(json_type) == 'set'
|
|
1331
|
+
return self.create_structure_array_or_set(
|
|
1332
|
+
items_schema, is_set, record_name, namespace, dependencies,
|
|
1333
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth
|
|
1334
|
+
)
|
|
1335
|
+
|
|
1336
|
+
# Special handling for maps
|
|
1337
|
+
elif json_type['type'] == 'map':
|
|
1338
|
+
values_schema = json_type.get('values', {'type': 'string'})
|
|
1339
|
+
return self.create_structure_map(
|
|
1340
|
+
values_schema, record_name, namespace, dependencies,
|
|
1341
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth
|
|
1342
|
+
)
|
|
1343
|
+
|
|
1344
|
+
# Special handling for sets
|
|
1345
|
+
elif json_type['type'] == 'set':
|
|
1346
|
+
items_schema = json_type.get('items', {'type': 'string'})
|
|
1347
|
+
return self.create_structure_array_or_set(
|
|
1348
|
+
items_schema, True, record_name, namespace, dependencies,
|
|
1349
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth
|
|
1350
|
+
)
|
|
1351
|
+
|
|
1352
|
+
else:
|
|
1353
|
+
# Primitive type
|
|
1354
|
+
structure_type = self.json_schema_primitive_to_structure_type(
|
|
1355
|
+
json_type['type'], format_hint, enum_values, record_name, field_name, namespace, dependencies, json_type
|
|
1356
|
+
)
|
|
1357
|
+
# Add validation constraints
|
|
1358
|
+
if isinstance(structure_type, str):
|
|
1359
|
+
structure_type = self.add_validation_constraints({'type': structure_type}, json_type)
|
|
1360
|
+
if len(structure_type) == 1:
|
|
1361
|
+
structure_type = structure_type['type']
|
|
1362
|
+
elif isinstance(structure_type, dict):
|
|
1363
|
+
structure_type = self.add_validation_constraints(structure_type, json_type)
|
|
1364
|
+
return structure_type
|
|
1365
|
+
|
|
1366
|
+
# Handle composition keywords - resolve when preserve_composition is False
|
|
1367
|
+
if not self.preserve_composition and self.has_composition_keywords(json_type):
|
|
1368
|
+
return self.resolve_composition_keywords(
|
|
1369
|
+
json_type, record_name, field_name, namespace, dependencies,
|
|
1370
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth
|
|
1371
|
+
)
|
|
1372
|
+
|
|
1373
|
+
# Handle composition keywords
|
|
1374
|
+
if self.preserve_composition:
|
|
1375
|
+
if 'allOf' in json_type and not inheritance_info:
|
|
1376
|
+
# Non-inheritance allOf - keep as-is or merge based on configuration
|
|
1377
|
+
allof_schemas = []
|
|
1378
|
+
for allof_item in json_type['allOf']:
|
|
1379
|
+
converted = self.json_type_to_structure_type(
|
|
1380
|
+
allof_item, record_name, field_name, namespace, dependencies,
|
|
1381
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
1382
|
+
)
|
|
1383
|
+
allof_schemas.append(converted)
|
|
1384
|
+
# For now, merge them - JSON Structure supports allOf natively
|
|
1385
|
+
return {
|
|
1386
|
+
'allOf': allof_schemas
|
|
1387
|
+
}
|
|
1388
|
+
|
|
1389
|
+
if 'oneOf' in json_type and not discriminator_info: # Check if this is a discriminated union pattern
|
|
1390
|
+
choice_info = self.detect_discriminated_union_pattern(json_type)
|
|
1391
|
+
if choice_info:
|
|
1392
|
+
# Convert to JSON Structure choice type (tagged union)
|
|
1393
|
+
choice_result = {
|
|
1394
|
+
'type': 'choice',
|
|
1395
|
+
'choices': choice_info['choices']
|
|
1396
|
+
}
|
|
1397
|
+
|
|
1398
|
+
# Add selector if specified (for tagged unions)
|
|
1399
|
+
if choice_info.get('selector'):
|
|
1400
|
+
choice_result['selector'] = choice_info['selector']
|
|
1401
|
+
|
|
1402
|
+
# Add name if we have one
|
|
1403
|
+
if record_name:
|
|
1404
|
+
choice_result['name'] = avro_name(record_name)
|
|
1405
|
+
|
|
1406
|
+
return choice_result
|
|
1407
|
+
|
|
1408
|
+
# Regular oneOf without discriminator
|
|
1409
|
+
oneof_schemas = []
|
|
1410
|
+
for oneof_item in json_type['oneOf']:
|
|
1411
|
+
# For constraint-only schemas, preserve them but add type: object
|
|
1412
|
+
if self._is_constraint_only_schema(oneof_item):
|
|
1413
|
+
preserved_item = dict(oneof_item)
|
|
1414
|
+
preserved_item['type'] = 'object'
|
|
1415
|
+
# Add properties for required fields to make it valid JSON Structure
|
|
1416
|
+
if 'required' in preserved_item and 'properties' not in preserved_item:
|
|
1417
|
+
preserved_item['properties'] = {}
|
|
1418
|
+
for req_field in preserved_item['required']:
|
|
1419
|
+
preserved_item['properties'][req_field] = {'type': 'any'}
|
|
1420
|
+
# Allow additional properties since this is a constraint-only schema
|
|
1421
|
+
preserved_item['additionalProperties'] = True
|
|
1422
|
+
oneof_schemas.append(preserved_item)
|
|
1423
|
+
else:
|
|
1424
|
+
converted = self.json_type_to_structure_type(
|
|
1425
|
+
oneof_item, record_name, field_name, namespace, dependencies,
|
|
1426
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
1427
|
+
)
|
|
1428
|
+
oneof_schemas.append(converted)
|
|
1429
|
+
return {
|
|
1430
|
+
'oneOf': oneof_schemas
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1433
|
+
if 'anyOf' in json_type: # Check if this is a constraint-only anyOf pattern that should be converted to permutations
|
|
1434
|
+
anyof_items = json_type['anyOf']
|
|
1435
|
+
constraint_only = all(
|
|
1436
|
+
self._is_constraint_only_schema(item) and 'required' in item
|
|
1437
|
+
for item in anyof_items
|
|
1438
|
+
)
|
|
1439
|
+
|
|
1440
|
+
if constraint_only:
|
|
1441
|
+
# Convert constraint-only anyOf to permutations and return as object with required
|
|
1442
|
+
converted_schema = self._convert_constraint_anyof_to_permutations(json_type)
|
|
1443
|
+
|
|
1444
|
+
# Use create_structure_object to properly handle patternProperties
|
|
1445
|
+
result = self.create_structure_object(
|
|
1446
|
+
json_type.get('properties', {}),
|
|
1447
|
+
converted_schema.get('required', []),
|
|
1448
|
+
record_name, namespace, dependencies,
|
|
1449
|
+
json_schema, base_uri, structure_schema,
|
|
1450
|
+
record_stack, recursion_depth, json_type
|
|
1451
|
+
)
|
|
1452
|
+
# Add other properties from the original schema (except structural properties)
|
|
1453
|
+
for key, value in json_type.items():
|
|
1454
|
+
if key not in ['anyOf', 'type', 'properties', 'required', 'patternProperties', 'additionalProperties']:
|
|
1455
|
+
result[key] = value
|
|
1456
|
+
return result
|
|
1457
|
+
else:
|
|
1458
|
+
# Regular anyOf composition - preserve original structure when preserve_composition=True
|
|
1459
|
+
anyof_schemas = []
|
|
1460
|
+
for anyof_item in anyof_items:
|
|
1461
|
+
converted = self.json_type_to_structure_type(
|
|
1462
|
+
anyof_item, record_name, field_name, namespace, dependencies,
|
|
1463
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
1464
|
+
)
|
|
1465
|
+
anyof_schemas.append(converted)
|
|
1466
|
+
|
|
1467
|
+
return {
|
|
1468
|
+
'anyOf': anyof_schemas
|
|
1469
|
+
}
|
|
1470
|
+
|
|
1471
|
+
# Handle conditional schemas (if/then/else)
|
|
1472
|
+
if 'if' in json_type:
|
|
1473
|
+
# Preserve conditional schemas as-is in JSON Structure
|
|
1474
|
+
result = {}
|
|
1475
|
+
|
|
1476
|
+
# Process if clause
|
|
1477
|
+
if_schema = self.json_type_to_structure_type(
|
|
1478
|
+
json_type['if'], record_name, field_name, namespace, dependencies,
|
|
1479
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
1480
|
+
)
|
|
1481
|
+
result['if'] = if_schema
|
|
1482
|
+
|
|
1483
|
+
# Process then clause if present
|
|
1484
|
+
if 'then' in json_type:
|
|
1485
|
+
then_schema = self.json_type_to_structure_type(
|
|
1486
|
+
json_type['then'], record_name, field_name, namespace, dependencies,
|
|
1487
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
1488
|
+
)
|
|
1489
|
+
result['then'] = then_schema
|
|
1490
|
+
|
|
1491
|
+
# Process else clause if present
|
|
1492
|
+
if 'else' in json_type:
|
|
1493
|
+
else_schema = self.json_type_to_structure_type(
|
|
1494
|
+
json_type['else'], record_name, field_name, namespace, dependencies,
|
|
1495
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
1496
|
+
)
|
|
1497
|
+
result['else'] = else_schema
|
|
1498
|
+
# Add any other properties from the original schema
|
|
1499
|
+
for key, value in json_type.items():
|
|
1500
|
+
if key not in ['if', 'then', 'else']:
|
|
1501
|
+
if key in ['properties', 'required', 'type']:
|
|
1502
|
+
# Handle structural properties
|
|
1503
|
+
if key == 'properties':
|
|
1504
|
+
converted_props = {}
|
|
1505
|
+
for prop_name, prop_schema in value.items():
|
|
1506
|
+
prop_type = self.json_type_to_structure_type(
|
|
1507
|
+
prop_schema, record_name, prop_name, namespace, dependencies,
|
|
1508
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
1509
|
+
)
|
|
1510
|
+
converted_props[prop_name] = self._ensure_schema_object(prop_type, structure_schema, prop_name)
|
|
1511
|
+
result[key] = converted_props
|
|
1512
|
+
else:
|
|
1513
|
+
result[key] = value
|
|
1514
|
+
else:
|
|
1515
|
+
# Copy validation and other properties as-is
|
|
1516
|
+
result[key] = value
|
|
1517
|
+
|
|
1518
|
+
# Ensure conditional object schemas are valid JSON Structure
|
|
1519
|
+
if result.get('type') == 'object':
|
|
1520
|
+
# If we have no properties but we're an object type, add additionalProperties
|
|
1521
|
+
if 'properties' not in result and '$extends' not in result:
|
|
1522
|
+
result['additionalProperties'] = True
|
|
1523
|
+
# If we have empty properties, remove it and add additionalProperties instead
|
|
1524
|
+
elif 'properties' in result and not result['properties']:
|
|
1525
|
+
del result['properties']
|
|
1526
|
+
if 'additionalProperties' not in result:
|
|
1527
|
+
result['additionalProperties'] = True
|
|
1528
|
+
|
|
1529
|
+
return result
|
|
1530
|
+
|
|
1531
|
+
# Handle const
|
|
1532
|
+
if 'const' in json_type:
|
|
1533
|
+
const_value = json_type['const']
|
|
1534
|
+
if isinstance(const_value, str):
|
|
1535
|
+
return {
|
|
1536
|
+
'type': 'string',
|
|
1537
|
+
'const': const_value
|
|
1538
|
+
}
|
|
1539
|
+
elif isinstance(const_value, (int, float)):
|
|
1540
|
+
return {
|
|
1541
|
+
'type': 'int32' if isinstance(const_value, int) else 'double',
|
|
1542
|
+
'const': const_value
|
|
1543
|
+
}
|
|
1544
|
+
elif isinstance(const_value, bool):
|
|
1545
|
+
return {
|
|
1546
|
+
'type': 'boolean',
|
|
1547
|
+
'const': const_value
|
|
1548
|
+
}# Fallback for unhandled cases
|
|
1549
|
+
if 'properties' in json_type: # Treat as object even without explicit type
|
|
1550
|
+
properties = json_type['properties']
|
|
1551
|
+
required = json_type.get('required', [])
|
|
1552
|
+
return self.create_structure_object(
|
|
1553
|
+
properties, required, record_name, namespace, dependencies, json_schema, base_uri, structure_schema, record_stack, recursion_depth, json_type
|
|
1554
|
+
)
|
|
1555
|
+
|
|
1556
|
+
# Fallback
|
|
1557
|
+
return 'string'
|
|
1558
|
+
|
|
1559
|
+
except Exception as e:
|
|
1560
|
+
print(f'ERROR: Failed to convert type for {record_name}.{field_name}: {e}')
|
|
1561
|
+
return 'string'
|
|
1562
|
+
|
|
1563
|
+
def fetch_content(self, url: str | ParseResult):
|
|
1564
|
+
"""
|
|
1565
|
+
Fetches the content from the specified URL.
|
|
1566
|
+
|
|
1567
|
+
Args:
|
|
1568
|
+
url (str or ParseResult): The URL to fetch the content from.
|
|
1569
|
+
|
|
1570
|
+
Returns:
|
|
1571
|
+
str: The fetched content.
|
|
1572
|
+
|
|
1573
|
+
Raises:
|
|
1574
|
+
requests.RequestException: If there is an error while making the HTTP request.
|
|
1575
|
+
Exception: If there is an error while reading the file.
|
|
1576
|
+
"""
|
|
1577
|
+
# Parse the URL to determine the scheme
|
|
1578
|
+
if isinstance(url, str):
|
|
1579
|
+
parsed_url = urlparse(url)
|
|
1580
|
+
else:
|
|
1581
|
+
parsed_url = url
|
|
1582
|
+
|
|
1583
|
+
if parsed_url.geturl() in self.content_cache:
|
|
1584
|
+
return self.content_cache[parsed_url.geturl()]
|
|
1585
|
+
scheme = parsed_url.scheme
|
|
1586
|
+
|
|
1587
|
+
# Handle HTTP and HTTPS URLs
|
|
1588
|
+
if scheme in ['http', 'https']:
|
|
1589
|
+
response = requests.get(url if isinstance(
|
|
1590
|
+
url, str) else parsed_url.geturl(), timeout=30)
|
|
1591
|
+
# Raises an HTTPError if the response status code is 4XX/5XX
|
|
1592
|
+
response.raise_for_status()
|
|
1593
|
+
self.content_cache[parsed_url.geturl()] = response.text
|
|
1594
|
+
return response.text
|
|
1595
|
+
|
|
1596
|
+
# Handle file URLs
|
|
1597
|
+
elif scheme == 'file':
|
|
1598
|
+
# Remove the leading 'file://' from the path for compatibility
|
|
1599
|
+
file_path = parsed_url.netloc
|
|
1600
|
+
if not file_path:
|
|
1601
|
+
file_path = parsed_url.path
|
|
1602
|
+
# On Windows, a file URL might start with a '/' but it's not part of the actual path
|
|
1603
|
+
if os.name == 'nt' and file_path.startswith('/'):
|
|
1604
|
+
file_path = file_path[1:]
|
|
1605
|
+
with open(file_path, 'r', encoding='utf-8') as file:
|
|
1606
|
+
text = file.read()
|
|
1607
|
+
self.content_cache[parsed_url.geturl()] = text
|
|
1608
|
+
return text
|
|
1609
|
+
else:
|
|
1610
|
+
raise NotImplementedError(f'Unsupported URL scheme: {scheme}')
|
|
1611
|
+
|
|
1612
|
+
def resolve_reference(self, json_type: dict, base_uri: str, json_doc: dict) -> Tuple[Union[dict, Any], dict]:
|
|
1613
|
+
"""
|
|
1614
|
+
Resolve a JSON Pointer reference or a JSON $ref reference.
|
|
1615
|
+
|
|
1616
|
+
Args:
|
|
1617
|
+
json_type (dict): The JSON type containing the reference.
|
|
1618
|
+
base_uri (str): The base URI of the JSON document.
|
|
1619
|
+
json_doc (dict): The JSON document containing the reference.
|
|
1620
|
+
|
|
1621
|
+
Returns:
|
|
1622
|
+
Tuple[Union[dict, Any], dict]: A tuple containing the resolved JSON schema and the original JSON schema document.
|
|
1623
|
+
|
|
1624
|
+
Raises:
|
|
1625
|
+
Exception: If there is an error decoding JSON from the reference.
|
|
1626
|
+
Exception: If there is an error resolving the JSON Pointer reference.
|
|
1627
|
+
"""
|
|
1628
|
+
try:
|
|
1629
|
+
ref = json_type['$ref']
|
|
1630
|
+
content = None
|
|
1631
|
+
url = urlparse(ref)
|
|
1632
|
+
if url.scheme:
|
|
1633
|
+
content = self.fetch_content(ref)
|
|
1634
|
+
elif url.path:
|
|
1635
|
+
file_uri = self.compose_uri(base_uri, url)
|
|
1636
|
+
content = self.fetch_content(file_uri)
|
|
1637
|
+
if content:
|
|
1638
|
+
try:
|
|
1639
|
+
json_schema_doc = json_schema = json.loads(content)
|
|
1640
|
+
# resolve the JSON Pointer reference, if any
|
|
1641
|
+
if url.fragment:
|
|
1642
|
+
json_schema = jsonpointer.resolve_pointer(
|
|
1643
|
+
json_schema, url.fragment)
|
|
1644
|
+
return json_schema, json_schema_doc
|
|
1645
|
+
except json.JSONDecodeError:
|
|
1646
|
+
raise Exception(f'Error decoding JSON from {ref}')
|
|
1647
|
+
|
|
1648
|
+
if url.fragment:
|
|
1649
|
+
json_pointer = unquote(url.fragment)
|
|
1650
|
+
ref_schema = jsonpointer.resolve_pointer(
|
|
1651
|
+
json_doc, json_pointer)
|
|
1652
|
+
if ref_schema:
|
|
1653
|
+
return ref_schema, json_doc
|
|
1654
|
+
except JsonPointerException as e:
|
|
1655
|
+
raise Exception(
|
|
1656
|
+
f'Error resolving JSON Pointer reference for {base_uri}')
|
|
1657
|
+
return json_type, json_doc
|
|
1658
|
+
|
|
1659
|
+
def compose_uri(self, base_uri, url):
|
|
1660
|
+
"""Compose a URI from a base URI and a relative URL."""
|
|
1661
|
+
if isinstance(url, str):
|
|
1662
|
+
url = urlparse(url)
|
|
1663
|
+
if url.scheme:
|
|
1664
|
+
return url.geturl()
|
|
1665
|
+
if not url.path and not url.netloc:
|
|
1666
|
+
return base_uri
|
|
1667
|
+
if base_uri.startswith('file'):
|
|
1668
|
+
parsed_file_uri = urlparse(base_uri)
|
|
1669
|
+
dir = os.path.dirname(
|
|
1670
|
+
parsed_file_uri.netloc if parsed_file_uri.netloc else parsed_file_uri.path)
|
|
1671
|
+
filename = os.path.join(dir, url.path)
|
|
1672
|
+
file_uri = f'file://{filename}'
|
|
1673
|
+
else:
|
|
1674
|
+
# combine the base URI with the URL
|
|
1675
|
+
file_uri = urllib.parse.urljoin(base_uri, url.geturl())
|
|
1676
|
+
return file_uri
|
|
1677
|
+
|
|
1678
|
+
def convert_schema(self, json_schema_path: str, output_path: str | None = None):
|
|
1679
|
+
"""
|
|
1680
|
+
Convert a JSON Schema file to JSON Structure format.
|
|
1681
|
+
|
|
1682
|
+
Args:
|
|
1683
|
+
json_schema_path (str): Path to the input JSON Schema file
|
|
1684
|
+
output_path (str): Path for the output JSON Structure file (optional)
|
|
1685
|
+
"""
|
|
1686
|
+
# Read the JSON Schema
|
|
1687
|
+
with open(json_schema_path, 'r', encoding='utf-8') as file:
|
|
1688
|
+
json_schema = json.load(file)
|
|
1689
|
+
|
|
1690
|
+
# Convert to JSON Structure
|
|
1691
|
+
structure_schema = self.convert_json_schema_to_structure(json_schema, json_schema_path)
|
|
1692
|
+
|
|
1693
|
+
# Determine output path
|
|
1694
|
+
if not output_path:
|
|
1695
|
+
base_name = os.path.splitext(json_schema_path)[0]
|
|
1696
|
+
output_path = f"{base_name}.structure.json"
|
|
1697
|
+
# Write the result
|
|
1698
|
+
with open(output_path, 'w', encoding='utf-8') as file:
|
|
1699
|
+
# Sort properties before writing
|
|
1700
|
+
# Sort properties before writing
|
|
1701
|
+
sorted_schema = self._sort_json_structure_properties(structure_schema)
|
|
1702
|
+
json.dump(sorted_schema, file, indent=2)
|
|
1703
|
+
|
|
1704
|
+
print(f"Converted {json_schema_path} to {output_path}")
|
|
1705
|
+
return structure_schema
|
|
1706
|
+
|
|
1707
|
+
|
|
1708
|
+
def _mark_abstract_types(self, structure_schema: dict) -> None:
|
|
1709
|
+
"""
|
|
1710
|
+
Mark abstract types in the structure schema.
|
|
1711
|
+
|
|
1712
|
+
Args:
|
|
1713
|
+
structure_schema (dict): The structure schema to mark
|
|
1714
|
+
"""
|
|
1715
|
+
if 'definitions' in structure_schema:
|
|
1716
|
+
for def_name, def_schema in structure_schema['definitions'].items():
|
|
1717
|
+
if isinstance(def_schema, dict):
|
|
1718
|
+
# Mark types with only inheritance as abstract
|
|
1719
|
+
if ('$extends' in def_schema and
|
|
1720
|
+
('properties' not in def_schema or len(def_schema['properties']) == 0)):
|
|
1721
|
+
def_schema['abstract'] = True
|
|
1722
|
+
|
|
1723
|
+
# Mark choice types with discriminators as abstract
|
|
1724
|
+
if (def_schema.get('type') == 'choice' and
|
|
1725
|
+
'discriminator' in def_schema):
|
|
1726
|
+
def_schema['abstract'] = True
|
|
1727
|
+
|
|
1728
|
+
def jsons_to_structure(self, json_schema: Union[dict, list], namespace: str, base_uri: str) -> dict:
|
|
1729
|
+
"""
|
|
1730
|
+
Convert a JSON Schema to JSON Structure format.
|
|
1731
|
+
|
|
1732
|
+
Args:
|
|
1733
|
+
json_schema (dict | list): The JSON Schema to convert
|
|
1734
|
+
namespace (str): The target namespace
|
|
1735
|
+
base_uri (str): Base URI for reference resolution
|
|
1736
|
+
|
|
1737
|
+
Returns:
|
|
1738
|
+
dict: The converted JSON Structure schema
|
|
1739
|
+
"""
|
|
1740
|
+
# Clear type registry for new conversion
|
|
1741
|
+
self.type_registry.clear()
|
|
1742
|
+
|
|
1743
|
+
structure_schema: Dict[str, Any] = {
|
|
1744
|
+
"$schema": "https://json-structure.org/meta/extended/v0/#"
|
|
1745
|
+
}
|
|
1746
|
+
# Do NOT set $uses here; it will be set after scanning for actual usage
|
|
1747
|
+
|
|
1748
|
+
# Handle schema with definitions/defs
|
|
1749
|
+
if isinstance(json_schema, dict) and ('definitions' in json_schema or '$defs' in json_schema):
|
|
1750
|
+
# Process definitions
|
|
1751
|
+
defs_key = '$defs' if '$defs' in json_schema else 'definitions'
|
|
1752
|
+
json_schema_defs = json_schema[defs_key]
|
|
1753
|
+
if json_schema_defs:
|
|
1754
|
+
structure_schema['definitions'] = {}
|
|
1755
|
+
# First pass: populate type registry for reference resolution
|
|
1756
|
+
for def_name in json_schema_defs.keys():
|
|
1757
|
+
normalized_def_name = avro_name(def_name)
|
|
1758
|
+
self.type_registry[def_name] = f"#/definitions/{normalized_def_name}" # Second pass: convert each definition
|
|
1759
|
+
for def_name, def_schema in json_schema_defs.items():
|
|
1760
|
+
# Skip empty definitions or ones that are just plain values/strings
|
|
1761
|
+
if not isinstance(def_schema, dict) or not def_schema:
|
|
1762
|
+
continue
|
|
1763
|
+
|
|
1764
|
+
# Check if this is a pure container definition (only contains nested schemas, no actual schema keywords)
|
|
1765
|
+
schema_keywords = {'type', 'properties', 'items', 'additionalProperties', 'patternProperties',
|
|
1766
|
+
'oneOf', 'anyOf', 'allOf', '$ref', 'required', 'enum', 'const', 'minimum',
|
|
1767
|
+
'maximum', 'minLength', 'maxLength', 'pattern', 'format', 'if', 'then', 'else'}
|
|
1768
|
+
has_schema_keywords = any(key in def_schema for key in schema_keywords) # If it only contains nested object definitions (no schema keywords), handle as container
|
|
1769
|
+
# These are typically namespace containers like "resourceTypes" that only organize other types
|
|
1770
|
+
if not has_schema_keywords:
|
|
1771
|
+
non_meta_items = {k: v for k, v in def_schema.items()
|
|
1772
|
+
if not k.startswith('$') and k not in ['title', 'description', 'examples']}
|
|
1773
|
+
if non_meta_items and all(isinstance(value, dict) for value in non_meta_items.values()):
|
|
1774
|
+
# This looks like a pure container - but check if any references point to it
|
|
1775
|
+
ref_target = f"#/definitions/{def_name}"
|
|
1776
|
+
ref_target_normalized = f"#/definitions/{avro_name(def_name)}"
|
|
1777
|
+
|
|
1778
|
+
# Search the entire schema for references to this definition
|
|
1779
|
+
schema_str = json.dumps(json_schema)
|
|
1780
|
+
if ref_target in schema_str or ref_target_normalized in schema_str:
|
|
1781
|
+
# This container is being referenced, so we need to keep it as a valid object
|
|
1782
|
+
# Create a minimal valid object type
|
|
1783
|
+
dependencies = []
|
|
1784
|
+
normalized_def_name, original_name = avro_name_with_altname(def_name)
|
|
1785
|
+
container_def = {
|
|
1786
|
+
'type': 'object',
|
|
1787
|
+
'name': normalized_def_name,
|
|
1788
|
+
'additionalProperties': True # Allow any properties to make it valid
|
|
1789
|
+
}
|
|
1790
|
+
if original_name is not None:
|
|
1791
|
+
container_def['altnames'] = {'json': original_name}
|
|
1792
|
+
structure_schema['definitions'][normalized_def_name] = container_def
|
|
1793
|
+
continue
|
|
1794
|
+
else:
|
|
1795
|
+
# Skip pure container definitions that aren't referenced
|
|
1796
|
+
continue
|
|
1797
|
+
# Process all dictionary definitions - this includes schemas with only descriptions
|
|
1798
|
+
dependencies = []
|
|
1799
|
+
normalized_def_name, original_name = avro_name_with_altname(def_name)
|
|
1800
|
+
converted_def = self.json_type_to_structure_type(
|
|
1801
|
+
def_schema, def_name, '', namespace, dependencies,
|
|
1802
|
+
json_schema, base_uri, structure_schema, [], 1
|
|
1803
|
+
)
|
|
1804
|
+
if isinstance(converted_def, dict):
|
|
1805
|
+
converted_def['name'] = normalized_def_name
|
|
1806
|
+
# Add alternate name if the original was different
|
|
1807
|
+
if original_name is not None:
|
|
1808
|
+
if 'altnames' not in converted_def:
|
|
1809
|
+
converted_def['altnames'] = {}
|
|
1810
|
+
converted_def['altnames']['json'] = original_name
|
|
1811
|
+
structure_schema['definitions'][normalized_def_name] = converted_def
|
|
1812
|
+
else:
|
|
1813
|
+
definition_obj = {
|
|
1814
|
+
'type': converted_def,
|
|
1815
|
+
'name': normalized_def_name
|
|
1816
|
+
}
|
|
1817
|
+
# Add alternate name if the original was different
|
|
1818
|
+
if original_name is not None:
|
|
1819
|
+
definition_obj['altnames'] = {'json': original_name}
|
|
1820
|
+
structure_schema['definitions'][normalized_def_name] = definition_obj
|
|
1821
|
+
|
|
1822
|
+
# Handle root-level schema type
|
|
1823
|
+
root_type_keys = ['type', 'properties', 'items', 'additionalProperties', 'oneOf', 'anyOf', 'allOf']
|
|
1824
|
+
has_root_type = any(key in json_schema for key in root_type_keys)
|
|
1825
|
+
if has_root_type and isinstance(json_schema, dict):
|
|
1826
|
+
dependencies = []
|
|
1827
|
+
root_converted = self.json_type_to_structure_type(
|
|
1828
|
+
json_schema, self.root_class_name, '', namespace, dependencies,
|
|
1829
|
+
json_schema, base_uri, structure_schema, [], 1
|
|
1830
|
+
)
|
|
1831
|
+
|
|
1832
|
+
# Merge root type properties into schema
|
|
1833
|
+
if isinstance(root_converted, dict):
|
|
1834
|
+
for key, value in root_converted.items():
|
|
1835
|
+
if key not in structure_schema:
|
|
1836
|
+
structure_schema[key] = value
|
|
1837
|
+
else:
|
|
1838
|
+
structure_schema['type'] = root_converted
|
|
1839
|
+
# Handle schema metadata
|
|
1840
|
+
if isinstance(json_schema, dict):
|
|
1841
|
+
if '$id' in json_schema:
|
|
1842
|
+
structure_schema['$id'] = json_schema['$id']
|
|
1843
|
+
elif 'id' in json_schema:
|
|
1844
|
+
structure_schema['$id'] = json_schema['id']
|
|
1845
|
+
else:
|
|
1846
|
+
# Generate default $id if missing
|
|
1847
|
+
structure_schema['$id'] = f"https://example.com/{namespace.replace('.', '/')}.schema.json"
|
|
1848
|
+
|
|
1849
|
+
# Add description if present, or map title to description if no description exists
|
|
1850
|
+
if 'description' in json_schema:
|
|
1851
|
+
structure_schema['description'] = json_schema['description']
|
|
1852
|
+
elif 'title' in json_schema:
|
|
1853
|
+
structure_schema['description'] = json_schema['title']
|
|
1854
|
+
else:
|
|
1855
|
+
# Generate default $id for non-dict schemas
|
|
1856
|
+
structure_schema['$id'] = f"https://example.com/{namespace.replace('.', '/')}.schema.json"
|
|
1857
|
+
|
|
1858
|
+
# Mark abstract types
|
|
1859
|
+
self._mark_abstract_types(structure_schema)
|
|
1860
|
+
|
|
1861
|
+
return structure_schema
|
|
1862
|
+
|
|
1863
|
+
def convert_json_schema_to_structure(self, json_schema: Union[dict, list], base_uri: str = "") -> dict:
|
|
1864
|
+
"""
|
|
1865
|
+
Convert a JSON Schema dictionary to JSON Structure format.
|
|
1866
|
+
|
|
1867
|
+
Args:
|
|
1868
|
+
json_schema (dict): The JSON Schema to convert
|
|
1869
|
+
base_uri (str): Base URI for reference resolution
|
|
1870
|
+
|
|
1871
|
+
Returns:
|
|
1872
|
+
dict: The converted JSON Structure schema
|
|
1873
|
+
|
|
1874
|
+
Raises:
|
|
1875
|
+
ValueError: If the input schema is invalid
|
|
1876
|
+
TypeError: If the schema format is not supported
|
|
1877
|
+
"""
|
|
1878
|
+
if not isinstance(json_schema, (dict, list)):
|
|
1879
|
+
raise TypeError(f"Expected dict or list, got {type(json_schema)}")
|
|
1880
|
+
|
|
1881
|
+
if isinstance(json_schema, dict) and not json_schema:
|
|
1882
|
+
raise ValueError("Empty schema dictionary provided")
|
|
1883
|
+
|
|
1884
|
+
try:
|
|
1885
|
+
structure_schema = self.jsons_to_structure(json_schema, self.root_namespace, base_uri)
|
|
1886
|
+
# Always add a name to the root if it has a type and no name
|
|
1887
|
+
if 'type' in structure_schema and 'name' not in structure_schema:
|
|
1888
|
+
structure_schema['name'] = avro_name(self.root_class_name) # Only add $uses if the feature is actually used
|
|
1889
|
+
used = self._scan_for_uses(structure_schema)
|
|
1890
|
+
if used:
|
|
1891
|
+
structure_schema['$uses'] = used
|
|
1892
|
+
elif '$uses' in structure_schema:
|
|
1893
|
+
del structure_schema['$uses'] # Final validation to ensure map and set types are complete
|
|
1894
|
+
# validation_errors = self.validate_structure_completeness(structure_schema)
|
|
1895
|
+
# if validation_errors:
|
|
1896
|
+
# print(f"WARNING: Structure validation found incomplete types:")
|
|
1897
|
+
# for error in validation_errors:
|
|
1898
|
+
# print(f" - {error}")
|
|
1899
|
+
|
|
1900
|
+
# Validate and fix JSON Structure compliance
|
|
1901
|
+
structure_schema = self._validate_and_fix_json_structure_type(structure_schema)
|
|
1902
|
+
|
|
1903
|
+
return structure_schema
|
|
1904
|
+
except Exception as e:
|
|
1905
|
+
raise ValueError(f"Failed to convert JSON Schema to JSON Structure: {e}") from e
|
|
1906
|
+
|
|
1907
|
+
def is_valid_identifier(self, name: str) -> bool:
|
|
1908
|
+
"""
|
|
1909
|
+
Check if a name is a valid identifier (for property names, etc.).
|
|
1910
|
+
"""
|
|
1911
|
+
if not name or not isinstance(name, str):
|
|
1912
|
+
return False # Check if it's a valid Python identifier (basic check)
|
|
1913
|
+
import re
|
|
1914
|
+
return bool(re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name))
|
|
1915
|
+
|
|
1916
|
+
def normalize_identifier(self, name: str) -> str:
|
|
1917
|
+
"""
|
|
1918
|
+
Normalize a name to be a valid identifier.
|
|
1919
|
+
"""
|
|
1920
|
+
if not name or not isinstance(name, str):
|
|
1921
|
+
return 'property'
|
|
1922
|
+
|
|
1923
|
+
# Replace invalid characters with underscores
|
|
1924
|
+
import re
|
|
1925
|
+
normalized = re.sub(r'[^a-zA-Z0-9_]', '_', name)
|
|
1926
|
+
# Ensure it doesn't start with a number
|
|
1927
|
+
if normalized and normalized[0].isdigit():
|
|
1928
|
+
normalized = 'prop_' + normalized
|
|
1929
|
+
|
|
1930
|
+
# Ensure it's not empty
|
|
1931
|
+
if not normalized:
|
|
1932
|
+
normalized = 'property'
|
|
1933
|
+
|
|
1934
|
+
return normalized
|
|
1935
|
+
|
|
1936
|
+
def _validate_and_fix_json_structure_type(self, structure_type: Any) -> Any:
|
|
1937
|
+
"""
|
|
1938
|
+
Validate and fix a JSON Structure type to ensure compliance.
|
|
1939
|
+
|
|
1940
|
+
This method post-processes generated JSON Structure schemas to fix common issues:
|
|
1941
|
+
- Converts "integer" type to "number" (JSON Structure doesn't support integer)
|
|
1942
|
+
- Ensures arrays have "items"
|
|
1943
|
+
- Ensures objects have "properties"
|
|
1944
|
+
- Ensures map/set "values"/"items" are schema objects, not strings
|
|
1945
|
+
- Recursively fixes nested schemas
|
|
1946
|
+
"""
|
|
1947
|
+
if not isinstance(structure_type, dict):
|
|
1948
|
+
return structure_type
|
|
1949
|
+
|
|
1950
|
+
# Create a copy to avoid modifying the original
|
|
1951
|
+
structure_type = structure_type.copy()
|
|
1952
|
+
|
|
1953
|
+
# Fix invalid types
|
|
1954
|
+
if structure_type.get('type') == 'integer':
|
|
1955
|
+
structure_type['type'] = 'number' # JSON Structure doesn't have integer type
|
|
1956
|
+
|
|
1957
|
+
# Ensure arrays have items
|
|
1958
|
+
elif structure_type.get('type') == 'array' and 'items' not in structure_type:
|
|
1959
|
+
structure_type['items'] = {'type': 'object'} # Default to object items
|
|
1960
|
+
|
|
1961
|
+
# Ensure objects have properties (unless they extend another type)
|
|
1962
|
+
elif (structure_type.get('type') == 'object' and
|
|
1963
|
+
'properties' not in structure_type and
|
|
1964
|
+
'$ref' not in structure_type and
|
|
1965
|
+
'$extends' not in structure_type):
|
|
1966
|
+
structure_type['properties'] = {} # Default to empty properties
|
|
1967
|
+
|
|
1968
|
+
# Ensure map values are schema objects
|
|
1969
|
+
elif structure_type.get('type') == 'map' and 'values' in structure_type:
|
|
1970
|
+
values = structure_type['values']
|
|
1971
|
+
if isinstance(values, str):
|
|
1972
|
+
structure_type['values'] = {'type': values}
|
|
1973
|
+
elif isinstance(values, dict):
|
|
1974
|
+
structure_type['values'] = self._validate_and_fix_json_structure_type(values)
|
|
1975
|
+
|
|
1976
|
+
# Ensure set items are schema objects
|
|
1977
|
+
elif structure_type.get('type') == 'set' and 'items' in structure_type:
|
|
1978
|
+
items = structure_type['items']
|
|
1979
|
+
if isinstance(items, str):
|
|
1980
|
+
structure_type['items'] = {'type': items}
|
|
1981
|
+
elif isinstance(items, dict):
|
|
1982
|
+
structure_type['items'] = self._validate_and_fix_json_structure_type(items)
|
|
1983
|
+
|
|
1984
|
+
# Recursively validate nested structures - comprehensive approach
|
|
1985
|
+
if 'anyOf' in structure_type:
|
|
1986
|
+
structure_type['anyOf'] = [
|
|
1987
|
+
self._validate_and_fix_json_structure_type(item)
|
|
1988
|
+
for item in structure_type['anyOf']
|
|
1989
|
+
]
|
|
1990
|
+
elif 'oneOf' in structure_type:
|
|
1991
|
+
structure_type['oneOf'] = [
|
|
1992
|
+
self._validate_and_fix_json_structure_type(item)
|
|
1993
|
+
for item in structure_type['oneOf']
|
|
1994
|
+
]
|
|
1995
|
+
elif 'allOf' in structure_type:
|
|
1996
|
+
structure_type['allOf'] = [
|
|
1997
|
+
self._validate_and_fix_json_structure_type(item)
|
|
1998
|
+
for item in structure_type['allOf']
|
|
1999
|
+
]
|
|
2000
|
+
|
|
2001
|
+
# Handle nested schemas in various contexts
|
|
2002
|
+
if 'items' in structure_type and isinstance(structure_type['items'], dict):
|
|
2003
|
+
structure_type['items'] = self._validate_and_fix_json_structure_type(structure_type['items'])
|
|
2004
|
+
elif 'values' in structure_type and isinstance(structure_type['values'], dict):
|
|
2005
|
+
structure_type['values'] = self._validate_and_fix_json_structure_type(structure_type['values'])
|
|
2006
|
+
elif 'properties' in structure_type and isinstance(structure_type['properties'], dict):
|
|
2007
|
+
structure_type['properties'] = {
|
|
2008
|
+
k: self._validate_and_fix_json_structure_type(v)
|
|
2009
|
+
for k, v in structure_type['properties'].items()
|
|
2010
|
+
}
|
|
2011
|
+
# Handle additionalProperties
|
|
2012
|
+
if 'additionalProperties' in structure_type and isinstance(structure_type['additionalProperties'], dict):
|
|
2013
|
+
structure_type['additionalProperties'] = self._validate_and_fix_json_structure_type(structure_type['additionalProperties'])
|
|
2014
|
+
|
|
2015
|
+
# Handle definitions
|
|
2016
|
+
if 'definitions' in structure_type and isinstance(structure_type['definitions'], dict):
|
|
2017
|
+
structure_type['definitions'] = {
|
|
2018
|
+
k: self._validate_and_fix_json_structure_type(v)
|
|
2019
|
+
for k, v in structure_type['definitions'].items()
|
|
2020
|
+
}
|
|
2021
|
+
|
|
2022
|
+
# Handle $defs (JSON Schema 2019-09+)
|
|
2023
|
+
if '$defs' in structure_type and isinstance(structure_type['$defs'], dict):
|
|
2024
|
+
structure_type['$defs'] = {
|
|
2025
|
+
k: self._validate_and_fix_json_structure_type(v)
|
|
2026
|
+
for k, v in structure_type['$defs'].items()
|
|
2027
|
+
}
|
|
2028
|
+
|
|
2029
|
+
return structure_type
|
|
2030
|
+
|
|
2031
|
+
def _sort_json_structure_properties(self, schema: Any) -> Any:
|
|
2032
|
+
"""
|
|
2033
|
+
Recursively sort properties in a JSON Structure schema for consistent output.
|
|
2034
|
+
"""
|
|
2035
|
+
if not isinstance(schema, dict):
|
|
2036
|
+
return schema
|
|
2037
|
+
|
|
2038
|
+
result = {}
|
|
2039
|
+
|
|
2040
|
+
# Sort keys, putting common keys first
|
|
2041
|
+
|
|
2042
|
+
key_order = ['$schema', 'type', 'title', 'description', 'properties', 'required', 'items', 'values', 'anyOf', 'oneOf', 'allOf', '$ref', '$extends']
|
|
2043
|
+
sorted_keys = []
|
|
2044
|
+
|
|
2045
|
+
# Add keys in preferred order
|
|
2046
|
+
for key in key_order:
|
|
2047
|
+
if key in schema:
|
|
2048
|
+
sorted_keys.append(key)
|
|
2049
|
+
|
|
2050
|
+
# Add remaining keys alphabetically
|
|
2051
|
+
remaining_keys = sorted([k for k in schema.keys() if k not in key_order])
|
|
2052
|
+
sorted_keys.extend(remaining_keys)
|
|
2053
|
+
|
|
2054
|
+
# Build result with sorted keys
|
|
2055
|
+
for key in sorted_keys:
|
|
2056
|
+
value = schema[key]
|
|
2057
|
+
if key == 'properties' and isinstance(value, dict):
|
|
2058
|
+
# Sort properties alphabetically
|
|
2059
|
+
result[key] = {k: self._sort_json_structure_properties(v) for k, v in sorted(value.items())}
|
|
2060
|
+
elif key in ['anyOf', 'oneOf', 'allOf'] and isinstance(value, list):
|
|
2061
|
+
# Recursively sort composition schemas
|
|
2062
|
+
result[key] = [self._sort_json_structure_properties(item) for item in value]
|
|
2063
|
+
elif key in ['items', 'values'] and isinstance(value, dict):
|
|
2064
|
+
# Recursively sort nested schemas
|
|
2065
|
+
result[key] = self._sort_json_structure_properties(value)
|
|
2066
|
+
else:
|
|
2067
|
+
result[key] = value
|
|
2068
|
+
|
|
2069
|
+
return result
|
|
2070
|
+
|
|
2071
|
+
def _convert_constraint_composition_to_required(self, json_type: dict) -> dict:
|
|
2072
|
+
"""
|
|
2073
|
+
Convert constraint-only composition (anyOf with property requirements) to a simple required array.
|
|
2074
|
+
This is used when anyOf items only add constraints without changing the structure.
|
|
2075
|
+
"""
|
|
2076
|
+
try:
|
|
2077
|
+
# Check if this is constraint-only anyOf
|
|
2078
|
+
if 'anyOf' not in json_type:
|
|
2079
|
+
return json_type
|
|
2080
|
+
|
|
2081
|
+
# Gather all required properties from anyOf items
|
|
2082
|
+
all_required = set()
|
|
2083
|
+
base_properties = json_type.get('properties', {})
|
|
2084
|
+
|
|
2085
|
+
for anyof_item in json_type['anyOf']:
|
|
2086
|
+
if isinstance(anyof_item, dict) and 'required' in anyof_item:
|
|
2087
|
+
# Only consider it constraint-only if it doesn't define new properties
|
|
2088
|
+
item_properties = anyof_item.get('properties', {})
|
|
2089
|
+
if not item_properties or all(prop in base_properties for prop in item_properties):
|
|
2090
|
+
all_required.update(anyof_item['required'])
|
|
2091
|
+
|
|
2092
|
+
# Create simplified schema
|
|
2093
|
+
result = {k: v for k, v in json_type.items() if k != 'anyOf'}
|
|
2094
|
+
if all_required:
|
|
2095
|
+
result['required'] = sorted(list(all_required))
|
|
2096
|
+
|
|
2097
|
+
return result
|
|
2098
|
+
|
|
2099
|
+
except Exception as e:
|
|
2100
|
+
# If conversion fails, return original
|
|
2101
|
+
return json_type
|
|
2102
|
+
|
|
2103
|
+
def _is_constraint_only_schema(self, json_type: dict) -> bool:
|
|
2104
|
+
"""
|
|
2105
|
+
Check if a schema contains only constraints (no structural elements).
|
|
2106
|
+
Used to determine if anyOf items are constraint-only.
|
|
2107
|
+
"""
|
|
2108
|
+
if not isinstance(json_type, dict):
|
|
2109
|
+
return False
|
|
2110
|
+
|
|
2111
|
+
# Constraint-only keys
|
|
2112
|
+
constraint_keys = {'required', 'minProperties', 'maxProperties', 'dependencies', 'dependentRequired', 'dependentSchemas'}
|
|
2113
|
+
|
|
2114
|
+
# Structural keys that would make it not constraint-only
|
|
2115
|
+
structural_keys = {'type', 'properties', 'additionalProperties', 'patternProperties', 'items', 'anyOf', 'oneOf', 'allOf'}
|
|
2116
|
+
|
|
2117
|
+
schema_keys = set(json_type.keys()) - {'title', 'description', '$id', '$schema'}
|
|
2118
|
+
|
|
2119
|
+
# It's constraint-only if it has only constraint keys and no structural keys
|
|
2120
|
+
return bool(schema_keys & constraint_keys) and not bool(schema_keys & structural_keys)
|
|
2121
|
+
|
|
2122
|
+
def _convert_constraint_anyof_to_permutations(self, json_type: dict) -> dict:
|
|
2123
|
+
"""
|
|
2124
|
+
Convert constraint-only anyOf to a schema with required properties that represent
|
|
2125
|
+
the union of all constraint requirements.
|
|
2126
|
+
"""
|
|
2127
|
+
try:
|
|
2128
|
+
if 'anyOf' not in json_type:
|
|
2129
|
+
return json_type
|
|
2130
|
+
|
|
2131
|
+
# Collect all required properties from constraint-only anyOf items
|
|
2132
|
+
all_required = set()
|
|
2133
|
+
|
|
2134
|
+
for anyof_item in json_type['anyOf']:
|
|
2135
|
+
if isinstance(anyof_item, dict) and self._is_constraint_only_schema(anyof_item):
|
|
2136
|
+
if 'required' in anyof_item:
|
|
2137
|
+
all_required.update(anyof_item['required'])
|
|
2138
|
+
# Return schema with union of required properties
|
|
2139
|
+
result = {'required': sorted(list(all_required))} if all_required else {}
|
|
2140
|
+
return result
|
|
2141
|
+
|
|
2142
|
+
except Exception as e:
|
|
2143
|
+
# If conversion fails, return empty schema
|
|
2144
|
+
return {}
|
|
2145
|
+
|
|
2146
|
+
def has_composition_keywords(self, json_type: dict) -> bool:
|
|
2147
|
+
"""
|
|
2148
|
+
Check if a JSON schema has composition keywords (anyOf, oneOf, allOf) or conditional keywords (if/then/else).
|
|
2149
|
+
"""
|
|
2150
|
+
if not isinstance(json_type, dict):
|
|
2151
|
+
return False
|
|
2152
|
+
return any(keyword in json_type for keyword in ['anyOf', 'oneOf', 'allOf', 'if', 'then', 'else'])
|
|
2153
|
+
|
|
2154
|
+
def resolve_composition_keywords(self, json_type: dict, record_name: str, field_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int) -> dict:
|
|
2155
|
+
"""
|
|
2156
|
+
Resolve composition keywords in JSON schema by flattening them.
|
|
2157
|
+
This is a simple implementation that merges composition schemas.
|
|
2158
|
+
"""
|
|
2159
|
+
try:
|
|
2160
|
+
if 'allOf' in json_type:
|
|
2161
|
+
# Merge all schemas in allOf
|
|
2162
|
+
merged = {}
|
|
2163
|
+
for schema in json_type['allOf']:
|
|
2164
|
+
converted = self.json_type_to_structure_type(
|
|
2165
|
+
schema, record_name, field_name, namespace, dependencies,
|
|
2166
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
2167
|
+
)
|
|
2168
|
+
if isinstance(converted, dict):
|
|
2169
|
+
# Simple merge - in real scenarios this would be more complex
|
|
2170
|
+
for key, value in converted.items():
|
|
2171
|
+
if key == 'properties' and key in merged:
|
|
2172
|
+
merged[key].update(value)
|
|
2173
|
+
elif key == 'required' and key in merged:
|
|
2174
|
+
merged[key] = list(set(merged[key] + value))
|
|
2175
|
+
else:
|
|
2176
|
+
merged[key] = value
|
|
2177
|
+
return merged
|
|
2178
|
+
|
|
2179
|
+
elif 'anyOf' in json_type:
|
|
2180
|
+
# For anyOf, convert to JSON Structure type union
|
|
2181
|
+
anyof_schemas = []
|
|
2182
|
+
for anyof_item in json_type['anyOf']:
|
|
2183
|
+
converted = self.json_type_to_structure_type(
|
|
2184
|
+
anyof_item, record_name, field_name, namespace, dependencies,
|
|
2185
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
2186
|
+
)
|
|
2187
|
+
anyof_schemas.append(converted)
|
|
2188
|
+
|
|
2189
|
+
return {
|
|
2190
|
+
'type': anyof_schemas
|
|
2191
|
+
}
|
|
2192
|
+
|
|
2193
|
+
elif 'oneOf' in json_type:
|
|
2194
|
+
# First check if this is a discriminated union pattern
|
|
2195
|
+
choice_info = self.detect_discriminated_union_pattern(json_type)
|
|
2196
|
+
if choice_info:
|
|
2197
|
+
# Convert to JSON Structure choice type
|
|
2198
|
+
choice_result = {
|
|
2199
|
+
'type': 'choice',
|
|
2200
|
+
'choices': choice_info['choices']
|
|
2201
|
+
}
|
|
2202
|
+
|
|
2203
|
+
# Add selector if specified (for tagged unions)
|
|
2204
|
+
if choice_info.get('selector'):
|
|
2205
|
+
choice_result['selector'] = choice_info['selector']
|
|
2206
|
+
|
|
2207
|
+
# Add name if we have one
|
|
2208
|
+
if record_name:
|
|
2209
|
+
choice_result['name'] = avro_name(record_name)
|
|
2210
|
+
|
|
2211
|
+
return choice_result
|
|
2212
|
+
|
|
2213
|
+
# For oneOf without discriminated union, return the first option as a fallback
|
|
2214
|
+
if json_type['oneOf']:
|
|
2215
|
+
return self.json_type_to_structure_type(
|
|
2216
|
+
json_type['oneOf'][0], record_name, field_name, namespace, dependencies,
|
|
2217
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
|
|
2218
|
+
)# Fallback to map type with any values
|
|
2219
|
+
return {'type': 'map', 'values': {'type': 'any'}}
|
|
2220
|
+
|
|
2221
|
+
except Exception as e: # If resolution fails, return a basic map type
|
|
2222
|
+
return {'type': 'map', 'values': {'type': 'any'}}
|
|
2223
|
+
|
|
2224
|
+
def detect_discriminated_union_pattern(self, json_type: dict) -> dict | None:
|
|
2225
|
+
"""
|
|
2226
|
+
Detect discriminated union patterns in oneOf schemas that should be converted to choice type.
|
|
2227
|
+
|
|
2228
|
+
A discriminated union pattern is identified when:
|
|
2229
|
+
1. Schema has oneOf with multiple object schemas
|
|
2230
|
+
2. Each schema has a distinct set of required properties (mutually exclusive)
|
|
2231
|
+
3. The schemas define object structures (have properties)
|
|
2232
|
+
|
|
2233
|
+
Patterns supported:
|
|
2234
|
+
- Simple discriminated unions: Each schema has exactly one unique required property
|
|
2235
|
+
- Complex discriminated unions: Each schema has a unique combination of required properties
|
|
2236
|
+
- Tagged unions: Each schema has a discriminator property with different enum/const values
|
|
2237
|
+
|
|
2238
|
+
Args:
|
|
2239
|
+
json_type (dict): The JSON schema object with oneOf
|
|
2240
|
+
|
|
2241
|
+
Returns:
|
|
2242
|
+
dict | None: Choice type configuration or None if not a discriminated union
|
|
2243
|
+
"""
|
|
2244
|
+
if 'oneOf' not in json_type:
|
|
2245
|
+
return None
|
|
2246
|
+
|
|
2247
|
+
oneof_items = json_type['oneOf']
|
|
2248
|
+
if len(oneof_items) < 2:
|
|
2249
|
+
return None
|
|
2250
|
+
|
|
2251
|
+
# Check if all items are object schemas with properties
|
|
2252
|
+
all_schemas_are_objects = True
|
|
2253
|
+
for item in oneof_items:
|
|
2254
|
+
if not isinstance(item, dict):
|
|
2255
|
+
return None
|
|
2256
|
+
if not (item.get('type') == 'object' or 'properties' in item):
|
|
2257
|
+
all_schemas_are_objects = False
|
|
2258
|
+
break
|
|
2259
|
+
|
|
2260
|
+
if not all_schemas_are_objects:
|
|
2261
|
+
return None
|
|
2262
|
+
|
|
2263
|
+
# Pattern 1: Check for tagged unions with discriminator property
|
|
2264
|
+
discriminator_result = self._detect_tagged_union_pattern(oneof_items)
|
|
2265
|
+
if discriminator_result:
|
|
2266
|
+
return discriminator_result
|
|
2267
|
+
|
|
2268
|
+
# Pattern 2: Check for simple discriminated unions (each schema has exactly one unique required property)
|
|
2269
|
+
simple_result = self._detect_simple_discriminated_union(oneof_items)
|
|
2270
|
+
if simple_result:
|
|
2271
|
+
return simple_result
|
|
2272
|
+
|
|
2273
|
+
# Pattern 3: Check for complex discriminated unions (unique combinations of required properties)
|
|
2274
|
+
complex_result = self._detect_complex_discriminated_union(oneof_items)
|
|
2275
|
+
if complex_result:
|
|
2276
|
+
return complex_result
|
|
2277
|
+
|
|
2278
|
+
return None
|
|
2279
|
+
|
|
2280
|
+
def _detect_tagged_union_pattern(self, oneof_items: list) -> dict | None:
|
|
2281
|
+
"""
|
|
2282
|
+
Detect tagged union pattern where all schemas have the same discriminator property
|
|
2283
|
+
with different enum/const values.
|
|
2284
|
+
"""
|
|
2285
|
+
discriminator_props = {}
|
|
2286
|
+
common_discriminator = None
|
|
2287
|
+
|
|
2288
|
+
for item in oneof_items:
|
|
2289
|
+
properties = item.get('properties', {})
|
|
2290
|
+
|
|
2291
|
+
# Look for a property with enum or const value
|
|
2292
|
+
discriminator_found = False
|
|
2293
|
+
for prop_name, prop_schema in properties.items():
|
|
2294
|
+
if 'enum' in prop_schema and len(prop_schema['enum']) == 1:
|
|
2295
|
+
# Single enum value acts as discriminator
|
|
2296
|
+
disc_value = prop_schema['enum'][0]
|
|
2297
|
+
if common_discriminator is None:
|
|
2298
|
+
common_discriminator = prop_name
|
|
2299
|
+
elif common_discriminator != prop_name:
|
|
2300
|
+
return None # Different discriminator properties
|
|
2301
|
+
discriminator_props[str(disc_value)] = {'type': 'object'}
|
|
2302
|
+
discriminator_found = True
|
|
2303
|
+
break
|
|
2304
|
+
elif 'const' in prop_schema:
|
|
2305
|
+
# Const value acts as discriminator
|
|
2306
|
+
disc_value = prop_schema['const']
|
|
2307
|
+
if common_discriminator is None:
|
|
2308
|
+
common_discriminator = prop_name
|
|
2309
|
+
elif common_discriminator != prop_name:
|
|
2310
|
+
return None # Different discriminator properties
|
|
2311
|
+
discriminator_props[str(disc_value)] = {'type': 'object'}
|
|
2312
|
+
discriminator_found = True
|
|
2313
|
+
break
|
|
2314
|
+
|
|
2315
|
+
if not discriminator_found:
|
|
2316
|
+
return None
|
|
2317
|
+
|
|
2318
|
+
if common_discriminator and len(discriminator_props) == len(oneof_items):
|
|
2319
|
+
return {
|
|
2320
|
+
'type': 'choice',
|
|
2321
|
+
'choices': discriminator_props,
|
|
2322
|
+
'selector': common_discriminator # Tagged union with explicit selector
|
|
2323
|
+
}
|
|
2324
|
+
|
|
2325
|
+
return None
|
|
2326
|
+
|
|
2327
|
+
def _detect_simple_discriminated_union(self, oneof_items: list) -> dict | None:
|
|
2328
|
+
"""
|
|
2329
|
+
Detect simple discriminated union where each schema has exactly one unique required property.
|
|
2330
|
+
"""
|
|
2331
|
+
choice_mapping = {}
|
|
2332
|
+
|
|
2333
|
+
for item in oneof_items:
|
|
2334
|
+
if 'properties' not in item or 'required' not in item:
|
|
2335
|
+
return None
|
|
2336
|
+
|
|
2337
|
+
required = item['required']
|
|
2338
|
+
if not isinstance(required, list) or len(required) != 1:
|
|
2339
|
+
return None # Must have exactly one required property
|
|
2340
|
+
|
|
2341
|
+
required_prop = required[0]
|
|
2342
|
+
|
|
2343
|
+
# Check if this property name is already used by another choice
|
|
2344
|
+
if required_prop in choice_mapping:
|
|
2345
|
+
return None # Properties must be mutually exclusive
|
|
2346
|
+
|
|
2347
|
+
# Ensure the required property exists in the properties
|
|
2348
|
+
if required_prop not in item['properties']:
|
|
2349
|
+
return None # Required property must exist in properties
|
|
2350
|
+
|
|
2351
|
+
# Store the choice information - use 'any' type for discriminated unions
|
|
2352
|
+
choice_mapping[required_prop] = {
|
|
2353
|
+
'type': 'any',
|
|
2354
|
+
'description': f'Choice variant with {required_prop} property'
|
|
2355
|
+
}
|
|
2356
|
+
|
|
2357
|
+
if len(choice_mapping) == len(oneof_items):
|
|
2358
|
+
return {
|
|
2359
|
+
'type': 'choice',
|
|
2360
|
+
'choices': choice_mapping,
|
|
2361
|
+
'selector': None # Inline choice without explicit selector property
|
|
2362
|
+
}
|
|
2363
|
+
|
|
2364
|
+
return None
|
|
2365
|
+
|
|
2366
|
+
def _detect_complex_discriminated_union(self, oneof_items: list) -> dict | None:
|
|
2367
|
+
"""
|
|
2368
|
+
Detect complex discriminated union where each schema has a unique combination of required properties.
|
|
2369
|
+
"""
|
|
2370
|
+
required_sets = []
|
|
2371
|
+
choice_mapping = {}
|
|
2372
|
+
|
|
2373
|
+
for i, item in enumerate(oneof_items):
|
|
2374
|
+
if 'properties' not in item:
|
|
2375
|
+
return None
|
|
2376
|
+
|
|
2377
|
+
required = set(item.get('required', []))
|
|
2378
|
+
|
|
2379
|
+
# Check if this combination of required properties is unique
|
|
2380
|
+
for existing_set in required_sets:
|
|
2381
|
+
if required == existing_set:
|
|
2382
|
+
return None # Non-unique required property combination
|
|
2383
|
+
# Check for overlap - if sets overlap significantly, it's not a clean discriminated union
|
|
2384
|
+
overlap = required & existing_set
|
|
2385
|
+
if len(overlap) > 0 and (len(overlap) / len(required | existing_set)) > 0.5:
|
|
2386
|
+
return None # Too much overlap
|
|
2387
|
+
|
|
2388
|
+
required_sets.append(required)
|
|
2389
|
+
|
|
2390
|
+
# Create a choice name based on the required properties
|
|
2391
|
+
if len(required) == 0:
|
|
2392
|
+
choice_name = f'variant_{i}'
|
|
2393
|
+
elif len(required) == 1:
|
|
2394
|
+
choice_name = list(required)[0]
|
|
2395
|
+
else:
|
|
2396
|
+
# Sort for consistent naming
|
|
2397
|
+
sorted_props = sorted(required)
|
|
2398
|
+
choice_name = '_'.join(sorted_props[:2]) # Use first two properties for name
|
|
2399
|
+
if len(sorted_props) > 2:
|
|
2400
|
+
choice_name += '_etc'
|
|
2401
|
+
|
|
2402
|
+
# Use 'any' type for discriminated unions
|
|
2403
|
+
choice_mapping[choice_name] = {
|
|
2404
|
+
'type': 'any',
|
|
2405
|
+
'description': f'Choice variant requiring: {", ".join(sorted(required))}'
|
|
2406
|
+
}
|
|
2407
|
+
|
|
2408
|
+
if len(choice_mapping) == len(oneof_items) and len(choice_mapping) >= 2:
|
|
2409
|
+
return {
|
|
2410
|
+
'type': 'choice',
|
|
2411
|
+
'choices': choice_mapping,
|
|
2412
|
+
'selector': None # Inline choice without explicit selector property
|
|
2413
|
+
}
|
|
2414
|
+
|
|
2415
|
+
return None
|
|
2416
|
+
|
|
2417
|
+
def _ensure_abstract_base_type(self, base_ref: str, abstract_base_name: str, structure_schema: dict, json_schema: dict, base_uri: str) -> None:
|
|
2418
|
+
"""
|
|
2419
|
+
Ensure that an abstract base type exists for inheritance patterns.
|
|
2420
|
+
|
|
2421
|
+
Args:
|
|
2422
|
+
base_ref (str): The original $ref to the base type
|
|
2423
|
+
abstract_base_name (str): The name for the abstract base type
|
|
2424
|
+
structure_schema (dict): The structure schema being built
|
|
2425
|
+
json_schema (dict): The original JSON schema
|
|
2426
|
+
base_uri (str): The base URI for resolving references
|
|
2427
|
+
"""
|
|
2428
|
+
# Ensure definitions section exists
|
|
2429
|
+
if 'definitions' not in structure_schema:
|
|
2430
|
+
structure_schema['definitions'] = {}
|
|
2431
|
+
|
|
2432
|
+
# If abstract base type already exists, don't recreate it
|
|
2433
|
+
if abstract_base_name in structure_schema['definitions']:
|
|
2434
|
+
return
|
|
2435
|
+
|
|
2436
|
+
# Guard against recursive abstract base type creation
|
|
2437
|
+
if not hasattr(self, '_creating_abstract_bases'):
|
|
2438
|
+
self._creating_abstract_bases = set()
|
|
2439
|
+
|
|
2440
|
+
if abstract_base_name in self._creating_abstract_bases:
|
|
2441
|
+
print(f"WARNING: Circular reference detected while creating abstract base type {abstract_base_name}")
|
|
2442
|
+
# Create a minimal abstract type to break the cycle
|
|
2443
|
+
structure_schema['definitions'][abstract_base_name] = {
|
|
2444
|
+
'type': 'object',
|
|
2445
|
+
'abstract': True,
|
|
2446
|
+
'name': abstract_base_name,
|
|
2447
|
+
'properties': {}
|
|
2448
|
+
}
|
|
2449
|
+
return
|
|
2450
|
+
|
|
2451
|
+
# Add to the guard set
|
|
2452
|
+
self._creating_abstract_bases.add(abstract_base_name)
|
|
2453
|
+
|
|
2454
|
+
try:
|
|
2455
|
+
# Resolve the original base type reference
|
|
2456
|
+
base_schema, _ = self.resolve_reference({'$ref': base_ref}, base_uri, json_schema)
|
|
2457
|
+
|
|
2458
|
+
# Convert the base type to structure format, but without triggering inheritance conversion
|
|
2459
|
+
# to avoid infinite recursion
|
|
2460
|
+
old_detect_inheritance = self.detect_inheritance
|
|
2461
|
+
old_preserve_composition = self.preserve_composition
|
|
2462
|
+
self.detect_inheritance = False # Temporarily disable inheritance detection
|
|
2463
|
+
self.preserve_composition = False # Force composition flattening for base types
|
|
2464
|
+
|
|
2465
|
+
try:
|
|
2466
|
+
base_structure = self.json_type_to_structure_type(
|
|
2467
|
+
base_schema, abstract_base_name, '', '', [], json_schema, base_uri,
|
|
2468
|
+
structure_schema, [], 1
|
|
2469
|
+
)
|
|
2470
|
+
|
|
2471
|
+
# Mark it as abstract
|
|
2472
|
+
if isinstance(base_structure, dict):
|
|
2473
|
+
base_structure['abstract'] = True
|
|
2474
|
+
base_structure['name'] = abstract_base_name
|
|
2475
|
+
|
|
2476
|
+
# Store the abstract base type
|
|
2477
|
+
structure_schema['definitions'][abstract_base_name] = base_structure
|
|
2478
|
+
elif isinstance(base_structure, str):
|
|
2479
|
+
# If the base resolves to a simple type, create an object wrapper
|
|
2480
|
+
structure_schema['definitions'][abstract_base_name] = {
|
|
2481
|
+
'type': 'object',
|
|
2482
|
+
'abstract': True,
|
|
2483
|
+
'name': abstract_base_name,
|
|
2484
|
+
'properties': {}
|
|
2485
|
+
}
|
|
2486
|
+
|
|
2487
|
+
finally:
|
|
2488
|
+
# Restore inheritance detection and composition settings
|
|
2489
|
+
self.detect_inheritance = old_detect_inheritance
|
|
2490
|
+
self.preserve_composition = old_preserve_composition
|
|
2491
|
+
|
|
2492
|
+
except Exception as e:
|
|
2493
|
+
# If we can't resolve the base type, create a minimal abstract type
|
|
2494
|
+
print(f"WARNING: Failed to create abstract base type {abstract_base_name}: {e}")
|
|
2495
|
+
structure_schema['definitions'][abstract_base_name] = {
|
|
2496
|
+
'type': 'object',
|
|
2497
|
+
'abstract': True,
|
|
2498
|
+
'name': abstract_base_name,
|
|
2499
|
+
'properties': {}
|
|
2500
|
+
}
|
|
2501
|
+
finally:
|
|
2502
|
+
# Remove from the guard set
|
|
2503
|
+
self._creating_abstract_bases.discard(abstract_base_name)
|
|
2504
|
+
|
|
2505
|
+
def create_pattern_union_maps(self, pattern_properties: dict, additional_props, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
|
|
2506
|
+
"""
|
|
2507
|
+
Create a type union of maps for multiple patternProperties with optional additionalProperties.
|
|
2508
|
+
Each map in the union has a single pattern constraint.
|
|
2509
|
+
If additionalProperties is not False, creates an additional map for the fallback.
|
|
2510
|
+
Uses JSON Structure type union syntax: {"type": [map1, map2, ...]}
|
|
2511
|
+
All compound types are hoisted to /definitions and referenced via $ref.
|
|
2512
|
+
|
|
2513
|
+
Args:
|
|
2514
|
+
pattern_properties (dict): The patternProperties object with patterns as keys
|
|
2515
|
+
additional_props: The additionalProperties value (False, True, or schema dict)
|
|
2516
|
+
record_name (str): Name of the record
|
|
2517
|
+
namespace (str): Namespace
|
|
2518
|
+
dependencies (list): Dependencies list
|
|
2519
|
+
json_schema (dict): The full JSON schema
|
|
2520
|
+
base_uri (str): Base URI
|
|
2521
|
+
structure_schema (dict): Structure schema list
|
|
2522
|
+
record_stack (list): Record stack for recursion detection
|
|
2523
|
+
recursion_depth (int): Current recursion depth
|
|
2524
|
+
|
|
2525
|
+
Returns:
|
|
2526
|
+
dict: JSON Structure type union of maps using {"type": [...]} syntax
|
|
2527
|
+
"""
|
|
2528
|
+
# Initialize definitions if it doesn't exist
|
|
2529
|
+
if 'definitions' not in structure_schema:
|
|
2530
|
+
structure_schema['definitions'] = {}
|
|
2531
|
+
|
|
2532
|
+
# Create a map for each pattern and hoist to definitions
|
|
2533
|
+
map_refs = []
|
|
2534
|
+
for idx, (pattern, values_schema) in enumerate(pattern_properties.items()):
|
|
2535
|
+
# Create map with pattern validation
|
|
2536
|
+
map_result = self.create_structure_map(
|
|
2537
|
+
values_schema, record_name, namespace, dependencies,
|
|
2538
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth
|
|
2539
|
+
)
|
|
2540
|
+
|
|
2541
|
+
# Add keyNames validation for this specific pattern
|
|
2542
|
+
map_result['keyNames'] = {
|
|
2543
|
+
"type": "string",
|
|
2544
|
+
"pattern": pattern
|
|
2545
|
+
}
|
|
2546
|
+
|
|
2547
|
+
# Hoist this map to definitions and get a $ref
|
|
2548
|
+
pattern_safe = re.sub(r'[^a-zA-Z0-9_]', '_', pattern)
|
|
2549
|
+
map_name_hint = f"{record_name}_PatternMap_{pattern_safe}_{idx}"
|
|
2550
|
+
map_ref = self._hoist_definition(map_result, structure_schema, map_name_hint)
|
|
2551
|
+
map_refs.append(map_ref)
|
|
2552
|
+
|
|
2553
|
+
# If additionalProperties is not False, create an additional map for the fallback
|
|
2554
|
+
if additional_props is not False and additional_props is not None:
|
|
2555
|
+
if isinstance(additional_props, dict):
|
|
2556
|
+
# Create map for additionalProperties schema
|
|
2557
|
+
additional_map_result = self.create_structure_map(
|
|
2558
|
+
additional_props, record_name, namespace, dependencies,
|
|
2559
|
+
json_schema, base_uri, structure_schema, record_stack, recursion_depth
|
|
2560
|
+
)
|
|
2561
|
+
|
|
2562
|
+
# Add keyNames validation for catch-all pattern (any string)
|
|
2563
|
+
additional_map_result['keyNames'] = {
|
|
2564
|
+
"type": "string"
|
|
2565
|
+
}
|
|
2566
|
+
|
|
2567
|
+
# Hoist this map to definitions and get a $ref
|
|
2568
|
+
additional_map_name_hint = f"{record_name}_AdditionalMap"
|
|
2569
|
+
additional_map_ref = self._hoist_definition(additional_map_result, structure_schema, additional_map_name_hint)
|
|
2570
|
+
map_refs.append(additional_map_ref)
|
|
2571
|
+
elif additional_props is True:
|
|
2572
|
+
# additionalProperties: true means any type - create a map with any values
|
|
2573
|
+
any_map_result = {
|
|
2574
|
+
"type": "map",
|
|
2575
|
+
"keyNames": {
|
|
2576
|
+
"type": "string"
|
|
2577
|
+
},
|
|
2578
|
+
"values": "any"
|
|
2579
|
+
}
|
|
2580
|
+
|
|
2581
|
+
# Hoist this map to definitions and get a $ref
|
|
2582
|
+
any_map_name_hint = f"{record_name}_AnyMap"
|
|
2583
|
+
any_map_ref = self._hoist_definition(any_map_result, structure_schema, any_map_name_hint)
|
|
2584
|
+
map_refs.append(any_map_ref)
|
|
2585
|
+
|
|
2586
|
+
# Ensure $uses includes JSONStructureValidation
|
|
2587
|
+
self._ensure_validation_extension_in_structure_schema(structure_schema)
|
|
2588
|
+
|
|
2589
|
+
# Return type union using JSON Structure type array syntax with hoisted references
|
|
2590
|
+
return {
|
|
2591
|
+
"type": map_refs
|
|
2592
|
+
}
|
|
2593
|
+
|
|
2594
|
+
def convert_json_schema_to_structure(input_data: str, root_namespace: str = 'example.com', base_uri: str = '') -> str:
|
|
2595
|
+
"""
|
|
2596
|
+
Converts a JSON Schema document to JSON Structure format.
|
|
2597
|
+
|
|
2598
|
+
Args:
|
|
2599
|
+
input_data (str): The JSON Schema document as a string.
|
|
2600
|
+
root_namespace (str): The namespace for the root schema. Defaults to 'example.com'.
|
|
2601
|
+
base_uri (str): The base URI for resolving references. Defaults to ''.
|
|
2602
|
+
|
|
2603
|
+
Returns:
|
|
2604
|
+
str: The converted JSON Structure document as a string.
|
|
2605
|
+
"""
|
|
2606
|
+
converter = JsonToStructureConverter()
|
|
2607
|
+
converter.root_namespace = root_namespace
|
|
2608
|
+
|
|
2609
|
+
json_schema = json.loads(input_data)
|
|
2610
|
+
|
|
2611
|
+
# Convert the JSON Schema to JSON Structure
|
|
2612
|
+
result = converter.jsons_to_structure(json_schema, root_namespace, base_uri)
|
|
2613
|
+
|
|
2614
|
+
return json.dumps(result, indent=2)
|
|
2615
|
+
|
|
2616
|
+
def convert_json_schema_to_structure_files(
|
|
2617
|
+
json_schema_file_path: str,
|
|
2618
|
+
structure_schema_path: str,
|
|
2619
|
+
root_namespace = None
|
|
2620
|
+
) -> None:
|
|
2621
|
+
"""
|
|
2622
|
+
Convert a JSON Schema file to JSON Structure format.
|
|
2623
|
+
|
|
2624
|
+
Args:
|
|
2625
|
+
json_schema_file_path (str): Path to the input JSON Schema file
|
|
2626
|
+
structure_schema_path (str): Path to the output JSON Structure file
|
|
2627
|
+
root_namespace (str): The namespace for the root schema
|
|
2628
|
+
"""
|
|
2629
|
+
# Use default namespace if None provided
|
|
2630
|
+
if root_namespace is None:
|
|
2631
|
+
root_namespace = 'example.com'
|
|
2632
|
+
|
|
2633
|
+
# Read the JSON Schema file
|
|
2634
|
+
with open(json_schema_file_path, 'r', encoding='utf-8') as f:
|
|
2635
|
+
schema_content = f.read()
|
|
2636
|
+
|
|
2637
|
+
# Convert to JSON Structure
|
|
2638
|
+
result = convert_json_schema_to_structure(schema_content, root_namespace)
|
|
2639
|
+
|
|
2640
|
+
# Write the result
|
|
2641
|
+
with open(structure_schema_path, 'w', encoding='utf-8') as f:
|
|
2642
|
+
f.write(result)
|