structurize 2.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. avrotize/__init__.py +64 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp.py +483 -0
  7. avrotize/avrotocsharp.py +1075 -0
  8. avrotize/avrotocsv.py +121 -0
  9. avrotize/avrotodatapackage.py +173 -0
  10. avrotize/avrotodb.py +1383 -0
  11. avrotize/avrotogo.py +476 -0
  12. avrotize/avrotographql.py +197 -0
  13. avrotize/avrotoiceberg.py +210 -0
  14. avrotize/avrotojava.py +2156 -0
  15. avrotize/avrotojs.py +250 -0
  16. avrotize/avrotojsons.py +481 -0
  17. avrotize/avrotojstruct.py +345 -0
  18. avrotize/avrotokusto.py +364 -0
  19. avrotize/avrotomd.py +137 -0
  20. avrotize/avrotools.py +168 -0
  21. avrotize/avrotoparquet.py +208 -0
  22. avrotize/avrotoproto.py +359 -0
  23. avrotize/avrotopython.py +624 -0
  24. avrotize/avrotorust.py +435 -0
  25. avrotize/avrotots.py +598 -0
  26. avrotize/avrotoxsd.py +344 -0
  27. avrotize/cddltostructure.py +1841 -0
  28. avrotize/commands.json +3337 -0
  29. avrotize/common.py +834 -0
  30. avrotize/constants.py +72 -0
  31. avrotize/csvtoavro.py +132 -0
  32. avrotize/datapackagetoavro.py +76 -0
  33. avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
  34. avrotize/dependencies/typescript/node22/package.json +16 -0
  35. avrotize/dependency_resolver.py +348 -0
  36. avrotize/dependency_version.py +432 -0
  37. avrotize/jsonstoavro.py +2167 -0
  38. avrotize/jsonstostructure.py +2642 -0
  39. avrotize/jstructtoavro.py +878 -0
  40. avrotize/kstructtoavro.py +93 -0
  41. avrotize/kustotoavro.py +455 -0
  42. avrotize/parquettoavro.py +157 -0
  43. avrotize/proto2parser.py +498 -0
  44. avrotize/proto3parser.py +403 -0
  45. avrotize/prototoavro.py +382 -0
  46. avrotize/structuretocddl.py +597 -0
  47. avrotize/structuretocpp.py +697 -0
  48. avrotize/structuretocsharp.py +2295 -0
  49. avrotize/structuretocsv.py +365 -0
  50. avrotize/structuretodatapackage.py +659 -0
  51. avrotize/structuretodb.py +1125 -0
  52. avrotize/structuretogo.py +720 -0
  53. avrotize/structuretographql.py +502 -0
  54. avrotize/structuretoiceberg.py +355 -0
  55. avrotize/structuretojava.py +853 -0
  56. avrotize/structuretojsons.py +498 -0
  57. avrotize/structuretokusto.py +639 -0
  58. avrotize/structuretomd.py +322 -0
  59. avrotize/structuretoproto.py +764 -0
  60. avrotize/structuretopython.py +772 -0
  61. avrotize/structuretorust.py +714 -0
  62. avrotize/structuretots.py +653 -0
  63. avrotize/structuretoxsd.py +679 -0
  64. avrotize/xsdtoavro.py +413 -0
  65. structurize-2.19.0.dist-info/METADATA +107 -0
  66. structurize-2.19.0.dist-info/RECORD +70 -0
  67. structurize-2.19.0.dist-info/WHEEL +5 -0
  68. structurize-2.19.0.dist-info/entry_points.txt +2 -0
  69. structurize-2.19.0.dist-info/licenses/LICENSE +201 -0
  70. structurize-2.19.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2642 @@
1
+ """ JSON Schema to JSON Structure converter. """
2
+
3
+ # pylint: disable=too-many-lines, line-too-long, too-many-branches, too-many-statements, too-many-locals, too-many-nested-blocks, too-many-arguments, too-many-instance-attributes, too-many-public-methods, too-many-boolean-expressions
4
+
5
+ import json
6
+ import os
7
+ import copy
8
+ import re
9
+ import urllib.parse
10
+ from urllib.parse import ParseResult, urlparse, unquote
11
+ from typing import Any, Dict, List, Tuple, Union, Optional
12
+ import jsonpointer
13
+ from jsonpointer import JsonPointerException
14
+ import requests
15
+
16
+ from avrotize.common import avro_name, avro_name_with_altname, avro_namespace, find_schema_node, generic_type, set_schema_node
17
+ from avrotize.dependency_resolver import inline_dependencies_of, sort_messages_by_dependencies
18
+
19
+ # JSON Structure primitive types
20
+ structure_primitive_types = [
21
+ 'null', 'string', 'int8', 'int16', 'int32', 'int64',
22
+ 'uint8', 'uint16', 'uint32', 'uint64', 'float', 'double',
23
+ 'decimal', 'boolean', 'bytes', 'date', 'time', 'datetime',
24
+ 'duration', 'uuid', 'set', 'map', 'object', 'choice'
25
+ ]
26
+
27
+
28
+ class JsonToStructureConverter:
29
+ """
30
+ Converts JSON Schema documents to JSON Structure format.
31
+
32
+ Attributes:
33
+ imported_types: A dictionary of imported type schemas.
34
+ root_namespace: The namespace for the root schema.
35
+ max_recursion_depth: The maximum recursion depth.
36
+ types_with_unmerged_types: A list of types with unmerged types.
37
+ content_cache: A dictionary for caching fetched URLs.
38
+ utility_namespace: The namespace for utility types.
39
+ preserve_composition: Flag to preserve composition keywords.
40
+ detect_inheritance: Flag to detect inheritance patterns. detect_discriminators: Flag to detect OpenAPI discriminator patterns.
41
+ convert_empty_objects_to_maps: Flag to convert objects with only additionalProperties to maps.
42
+ split_top_level_records: Flag to split top-level records.
43
+ root_class_name: The name of the root class.
44
+ """
45
+
46
+ def __init__(self) -> None:
47
+ self.imported_types: Dict[Any, Any] = {}
48
+ self.root_namespace = 'example.com'
49
+ self.max_recursion_depth = 40
50
+ self.types_with_unmerged_types: List[dict] = []
51
+ self.content_cache: Dict[str, str] = {}
52
+ self.utility_namespace = 'utility.vasters.com'
53
+ self.split_top_level_records = False
54
+ self.root_class_name = 'document'
55
+ self.type_registry: Dict[str, str] = {} # Track type definitions for reference resolution
56
+ # JSON Structure specific configuration
57
+ self.preserve_composition = False # Resolve composition keywords by default for JSON Structure compliance
58
+ self.detect_inheritance = True
59
+ self.detect_discriminators = True
60
+ self.convert_empty_objects_to_maps = True
61
+
62
+ def is_empty_type(self, structure_type):
63
+ """
64
+ Check if the JSON Structure type is an empty type.
65
+
66
+ Parameters:
67
+ structure_type (any): The JSON Structure type to check.
68
+
69
+ Returns:
70
+ bool: True if the type is empty, False otherwise.
71
+ """
72
+ if len(structure_type) == 0:
73
+ return True
74
+ if isinstance(structure_type, list):
75
+ return all(self.is_empty_type(t) for t in structure_type)
76
+ if isinstance(structure_type, dict):
77
+ if not 'type' in structure_type:
78
+ return True
79
+ if (structure_type['type'] == 'object' and (not 'properties' in structure_type or len(structure_type['properties']) == 0)) or \
80
+ (structure_type['type'] == 'choice' and (not 'choices' in structure_type or len(structure_type['choices']) == 0)) or \
81
+ (structure_type['type'] == 'set' and (not 'items' in structure_type or not structure_type['items'])) or \
82
+ (structure_type['type'] == 'map' and (not 'values' in structure_type or not structure_type['values'])):
83
+ return True
84
+ return False
85
+
86
+ def is_empty_json_type(self, json_type):
87
+ """
88
+ Check if the JSON type is an empty type.
89
+
90
+ Parameters:
91
+ json_type (any): The JSON type to check.
92
+
93
+ Returns:
94
+ bool: True if the JSON type is empty, False otherwise.
95
+ """
96
+ if len(json_type) == 0:
97
+ return True
98
+ if isinstance(json_type, list):
99
+ return all(self.is_empty_json_type(t) for t in json_type)
100
+ elif isinstance(json_type, dict):
101
+ if not 'type' in json_type:
102
+ return True
103
+ return False
104
+
105
+ def detect_numeric_type(self, schema: dict) -> str:
106
+ """
107
+ Analyze schema constraints to determine the appropriate numeric type.
108
+
109
+ Args:
110
+ schema (dict): The JSON schema object
111
+
112
+ Returns:
113
+ str: The appropriate JSON Structure numeric type
114
+ """
115
+ # Check for format hints first
116
+ format_hint = schema.get('format')
117
+ if format_hint:
118
+ format_mapping = {
119
+ 'int8': 'int32', # Use int32 instead of int8 for better compatibility
120
+ 'int16': 'int32', # Use int32 instead of int16 for better compatibility
121
+ 'int32': 'int32',
122
+ 'int64': 'int64',
123
+ 'uint8': 'int32', # Use int32 instead of uint8 for better compatibility
124
+ 'uint16': 'int32', # Use int32 instead of uint16 for better compatibility
125
+ 'uint32': 'int64', # Use int64 instead of uint32 for better compatibility
126
+ 'uint64': 'int64', # Use int64 instead of uint64 for better compatibility
127
+ 'float': 'float',
128
+ 'double': 'double'
129
+ }
130
+ if format_hint in format_mapping:
131
+ return format_mapping[format_hint]
132
+
133
+ # Analyze constraints for integer types
134
+ if schema.get('type') == 'integer':
135
+ minimum = schema.get('minimum', schema.get('exclusiveMinimum'))
136
+ maximum = schema.get('maximum', schema.get('exclusiveMaximum'))
137
+
138
+ # For integers with constraints, use conservative type mapping
139
+ if minimum is not None and maximum is not None:
140
+ # Both bounds specified
141
+ if minimum >= -2147483648 and maximum <= 2147483647:
142
+ return 'int32'
143
+ else:
144
+ return 'int64'
145
+ elif minimum is not None and minimum >= 0:
146
+ # Non-negative integers - use int32 for reasonable ranges
147
+ if maximum is None or maximum <= 2147483647:
148
+ return 'int32' # Conservative choice for age-like fields
149
+ else:
150
+ return 'int64'
151
+ else:
152
+ # General integers or negative minimum
153
+ return 'int32' # Conservative default
154
+
155
+ # For number type, check for decimal indicators
156
+ elif schema.get('type') == 'number':
157
+ if 'multipleOf' in schema:
158
+ multiple_of = schema['multipleOf']
159
+ if isinstance(multiple_of, float) or '.' in str(multiple_of):
160
+ return 'decimal'
161
+
162
+ # Check for precision/scale hints in description or custom properties
163
+ if 'precision' in schema or 'scale' in schema:
164
+ return 'decimal'
165
+
166
+ return 'double' # Default for floating point
167
+
168
+ return 'double' # Default fallback
169
+
170
+ def detect_temporal_type(self, schema: dict) -> str:
171
+ """
172
+ Detect temporal types based on format.
173
+
174
+ Args:
175
+ schema (dict): The JSON schema object
176
+
177
+ Returns:
178
+ str: The appropriate JSON Structure temporal type
179
+ """
180
+ format_hint = schema.get('format')
181
+ if format_hint:
182
+ temporal_mapping = {
183
+ 'date': 'date',
184
+ 'time': 'time',
185
+ 'date-time': 'datetime',
186
+ 'duration': 'duration'
187
+ }
188
+ return temporal_mapping.get(format_hint, 'string')
189
+ return 'string'
190
+
191
+ def detect_collection_type(self, schema: dict) -> str:
192
+ """
193
+ Determine if array should be 'set' based on uniqueItems.
194
+
195
+ Args:
196
+ schema (dict): The JSON schema array object
197
+
198
+ Returns:
199
+ str: Either 'set' or array (for list) """
200
+ if schema.get('type') == 'array' and schema.get('uniqueItems', False):
201
+ return 'set'
202
+ return 'array'
203
+
204
+ def should_convert_to_map(self, json_object: dict) -> bool:
205
+ """
206
+ Determine if object should be converted to map type.
207
+
208
+ Args:
209
+ json_object (dict): The JSON schema object
210
+
211
+ Returns:
212
+ bool: True if should be converted to map
213
+ """
214
+ if not self.convert_empty_objects_to_maps:
215
+ return False
216
+
217
+ # Convert if object has only additionalProperties and no properties
218
+ if ('additionalProperties' in json_object and
219
+ (not 'properties' in json_object or len(json_object['properties']) == 0) and
220
+ (not 'patternProperties' in json_object or len(json_object['patternProperties']) == 0)):
221
+ return True
222
+
223
+ # Convert if object has only patternProperties and no properties or additionalProperties
224
+ if ('patternProperties' in json_object and
225
+ (not 'properties' in json_object or len(json_object['properties']) == 0) and
226
+ 'additionalProperties' not in json_object):
227
+ return True
228
+
229
+ return False
230
+
231
+ def detect_discriminator_pattern(self, schema: dict) -> dict | None:
232
+ """
233
+ Detect OpenAPI discriminator patterns for choice type.
234
+
235
+ Args:
236
+ schema (dict): The JSON schema object
237
+
238
+ Returns:
239
+ dict | None: Discriminator info if detected, None otherwise
240
+ """
241
+ if not self.detect_discriminators:
242
+ return None
243
+
244
+ # Check for OpenAPI discriminator
245
+ if 'discriminator' in schema:
246
+ discriminator = schema['discriminator']
247
+ if isinstance(discriminator, dict) and 'propertyName' in discriminator:
248
+ return {
249
+ 'propertyName': discriminator['propertyName'],
250
+ 'mapping': discriminator.get('mapping', {})
251
+ }
252
+
253
+ return None
254
+
255
+ # Check for oneOf with discriminator-like pattern
256
+ if 'oneOf' in schema:
257
+ # Look for common property across all oneOf options that could be a discriminator
258
+ oneof_options = schema['oneOf']
259
+ if len(oneof_options) > 1:
260
+ common_props = None
261
+ for option in oneof_options:
262
+ if '$ref' in option:
263
+ continue # Skip refs for now
264
+ if 'properties' in option:
265
+ props = set(option['properties'].keys())
266
+ if common_props is None:
267
+ common_props = props
268
+ else:
269
+ common_props = common_props.intersection(props)
270
+
271
+ # If there's exactly one common property, it might be a discriminator
272
+ if common_props and len(common_props) == 1:
273
+ prop_name = list(common_props)[0]
274
+ return {
275
+ 'property': prop_name,
276
+ 'mapping': {} # Would need more analysis to populate
277
+ }
278
+
279
+ return None
280
+
281
+ def detect_inheritance_pattern(self, schema: dict, type_name: str = '') -> dict | None:
282
+ """
283
+ Detect simple inheritance patterns in allOf schemas.
284
+
285
+ Only detects patterns with exactly 2 items where one is a $ref and the other
286
+ contains properties/required/other object schema keywords.
287
+ Excludes self-referential patterns.
288
+
289
+ Args:
290
+ schema (dict): The JSON schema object
291
+ type_name (str): The name of the current type (to detect self-references)
292
+
293
+ Returns:
294
+ dict | None: Inheritance info if detected, None otherwise
295
+ """
296
+ if not self.detect_inheritance or 'allOf' not in schema:
297
+ return None
298
+
299
+ allof_items = schema['allOf']
300
+
301
+ # Only handle simple 2-item inheritance patterns
302
+ if len(allof_items) != 2:
303
+ return None
304
+
305
+ # Look for pattern: [{"$ref": "..."}, {"properties": {...}}] or similar
306
+ ref_item = None
307
+ extension_item = None
308
+
309
+ for item in allof_items:
310
+ if '$ref' in item and len(item) == 1: # Pure reference, no other properties
311
+ ref_item = item
312
+ elif ('type' in item or 'properties' in item or 'required' in item or
313
+ 'additionalProperties' in item) and '$ref' not in item: # Pure extension, no ref
314
+ extension_item = item
315
+
316
+ # Only return inheritance info for simple base + extension pattern
317
+ if ref_item and extension_item:
318
+ base_ref = ref_item['$ref']
319
+
320
+ # Check for self-referential patterns
321
+ if base_ref.startswith('#/definitions/'):
322
+ ref_type_name = base_ref[14:] # Remove '#/definitions/'
323
+ if ref_type_name == type_name:
324
+ # Self-referential pattern - don't convert to inheritance
325
+ return None
326
+
327
+ return {
328
+ 'base_ref': base_ref,
329
+ 'extension': extension_item
330
+ }
331
+
332
+ return None
333
+
334
+ def json_schema_primitive_to_structure_type(self, json_primitive: Optional[str | list], format: Optional[str], enum: Optional[list], record_name: str, field_name: str, namespace: str, dependencies: list, schema: dict) -> str | dict[str, Any] | list:
335
+ """
336
+ Convert a JSON Schema primitive type to JSON Structure primitive type.
337
+
338
+ Args:
339
+ json_primitive (str | list): The JSON Schema primitive type to be converted.
340
+ format (str | None): The format of the JSON primitive type, if applicable.
341
+ enum (list | None): The list of enum values, if applicable.
342
+ record_name (str): The name of the record.
343
+ field_name (str): The name of the field.
344
+ namespace (str): The namespace of the type.
345
+ dependencies (list): The list of dependencies.
346
+ schema (dict): The complete schema object for analysis. Returns:
347
+ str | dict[str,Any] | list: The converted JSON Structure primitive type. """
348
+
349
+ if isinstance(json_primitive, list):
350
+ if enum:
351
+ # Handle enum with multiple types (convert to string enum)
352
+ return {
353
+ 'type': 'string',
354
+ 'enum': list(enum)
355
+ }
356
+ else:
357
+ # Handle union types
358
+ union_types = []
359
+ for item in json_primitive:
360
+ if isinstance(item, str):
361
+ converted = self.json_schema_primitive_to_structure_type(
362
+ item, format, None, record_name, field_name, namespace, dependencies, schema)
363
+ union_types.append(converted)
364
+ elif isinstance(item, dict):
365
+ item_format = item.get('format', format)
366
+ item_enum = item.get('enum', enum)
367
+ item_type = item.get('type', item)
368
+ converted = self.json_schema_primitive_to_structure_type(
369
+ item_type, item_format, item_enum, record_name, field_name, namespace, dependencies, item)
370
+ union_types.append(converted)
371
+ # Always wrap as {"type": [ ... ]} for unions
372
+ return {"type": self.flatten_union(union_types, None, field_name)}
373
+ # ...existing code...
374
+ structure_type = None
375
+
376
+ if json_primitive == 'string':
377
+ if format:
378
+ if format in ('date', 'time', 'date-time', 'duration'):
379
+ structure_type = self.detect_temporal_type({'type': 'string', 'format': format})
380
+ elif format == 'uuid':
381
+ structure_type = 'uuid'
382
+ elif format == 'byte':
383
+ structure_type = 'string' # Map bytes to string in JSON Structure elif format == 'binary':
384
+ structure_type = 'string' # Map binary to string in JSON Structure
385
+ else:
386
+ structure_type = 'string'
387
+ else:
388
+ structure_type = 'string'
389
+
390
+ elif json_primitive == 'integer':
391
+ structure_type = self.detect_numeric_type({'type': 'integer', 'format': format, **schema})
392
+
393
+ elif json_primitive == 'number':
394
+ structure_type = self.detect_numeric_type({'type': 'number', 'format': format, **schema})
395
+
396
+ elif json_primitive == 'boolean':
397
+ structure_type = 'boolean'
398
+
399
+ elif json_primitive == 'null':
400
+ structure_type = 'null'
401
+
402
+ else:
403
+ # Handle case where type is not specified but enum is present
404
+ if json_primitive is None and enum is not None:
405
+ # Default to string type for enums without explicit type
406
+ structure_type = 'string'
407
+ else: # Unknown type, keep as string reference
408
+ if isinstance(json_primitive, str):
409
+ dependencies.append(json_primitive)
410
+ structure_type = json_primitive or 'string' # Ensure we never return None # Always return proper schema objects, not simple strings
411
+ if isinstance(structure_type, str):
412
+ result: dict[str, Any] = {'type': structure_type}
413
+
414
+ # Ensure map and set types are complete
415
+ if structure_type == 'map':
416
+ result['values'] = {'type': 'any'} # Default values type per user instruction
417
+ elif structure_type == 'set':
418
+ result['items'] = {'type': 'any'} # Default items type per user instruction
419
+
420
+ # Handle enums
421
+ if enum is not None:
422
+ result['enum'] = list(enum)
423
+
424
+ # Add constraints for string types
425
+ if structure_type == 'string' and isinstance(schema, dict):
426
+ if 'maxLength' in schema:
427
+ result['maxLength'] = schema['maxLength']
428
+ if 'minLength' in schema:
429
+ result['minLength'] = schema['minLength']
430
+ if 'pattern' in schema:
431
+ result['pattern'] = schema['pattern']
432
+
433
+ # Add precision/scale for decimal types
434
+ elif structure_type == 'decimal' and isinstance(schema, dict):
435
+ if 'multipleOf' in schema:
436
+ # Try to infer precision/scale from multipleOf
437
+ multiple_str = str(schema['multipleOf'])
438
+ if '.' in multiple_str:
439
+ scale = len(multiple_str.split('.')[1])
440
+ result['scale'] = str(scale)
441
+
442
+ return result
443
+
444
+ # If already a dict or other complex type, return as-is
445
+ return structure_type
446
+
447
+ def _hoist_definition(self, schema, structure_schema, name_hint):
448
+ """
449
+ Hoist a compound schema to the top-level definitions and return a $ref.
450
+ """
451
+ if 'definitions' not in structure_schema:
452
+ structure_schema['definitions'] = {}
453
+ # Generate a unique name
454
+ base = avro_name(name_hint or 'UnionType')
455
+ idx = 1
456
+ name = base
457
+ while name in structure_schema['definitions']:
458
+ idx += 1
459
+ name = f"{base}{idx}"
460
+ schema = dict(schema) # Copy
461
+ schema['name'] = name
462
+ structure_schema['definitions'][name] = schema
463
+ return {'$ref': f"#/definitions/{name}"}
464
+
465
+ def _ensure_schema_object(self, value, structure_schema=None, name_hint=None, force_hoist_in_union=False):
466
+ """
467
+ Ensure that a value is wrapped as a proper JSON Structure schema object.
468
+
469
+ Args:
470
+ value: The value to wrap
471
+ structure_schema: The structure schema for hoisting definitions
472
+ name_hint: Hint for naming hoisted definitions
473
+ force_hoist_in_union: Whether to hoist complex types in union contexts
474
+
475
+ Returns:
476
+ A proper schema object with type or $ref
477
+ """ # Always return a schema object (dict with at least 'type' or '$ref') for use in properties/items/values
478
+ if isinstance(value, dict):
479
+ # Special handling for $ref
480
+ if '$ref' in value and len(value) == 1: # Pure $ref reference
481
+ if force_hoist_in_union:
482
+ # In union contexts, $ref should be returned as-is (not wrapped in type)
483
+ return value
484
+ else:
485
+ # In property contexts, wrap in type field for JSON Structure compliance
486
+ return {'type': value}
487
+
488
+ # Check if this dict has composition keywords - preserve as-is for JSON Structure conditional composition
489
+ if any(key in value for key in ['anyOf', 'oneOf', 'allOf']):
490
+ return value
491
+
492
+ # Ensure map types have values and set types have items
493
+ if value.get('type') == 'map' and 'values' not in value:
494
+ value = dict(value) # Create a copy to avoid modifying original
495
+ value['values'] = {'type': 'any'} # Default to 'any' as per user instruction
496
+ elif value.get('type') == 'set' and 'items' not in value:
497
+ value = dict(value) # Create a copy to avoid modifying original
498
+ value['items'] = {'type': 'any'} # Default to 'any' as per user instruction
499
+ # If force_hoist_in_union, check if this is a simple primitive type that should be extracted
500
+ if force_hoist_in_union and ('$ref' not in value):
501
+ # Check if this is a simple primitive type like {"type": "int32"}
502
+ if (len(value) == 1 and 'type' in value and
503
+ value['type'] in ['string', 'boolean', 'integer', 'number', 'null', 'int32', 'int64', 'float', 'double', 'decimal', 'uuid', 'date', 'time', 'datetime', 'duration', 'bytes']):
504
+ # Return the primitive type string directly for JSON Structure compliance
505
+ return value['type']
506
+ elif structure_schema is not None:
507
+ # For complex types, hoist to definitions
508
+ return self._hoist_definition(value, structure_schema, name_hint or 'UnionType')
509
+ return value
510
+ elif isinstance(value, str):
511
+ # Handle special cases where string primitives represent incomplete complex types
512
+ if value == 'map':
513
+ # Convert incomplete map type to complete structure
514
+ schema_obj = {'type': 'map', 'values': {'type': 'any'}}
515
+ elif value == 'set':
516
+ # Convert incomplete set type to complete structure
517
+ schema_obj = {'type': 'set', 'items': {'type': 'any'}}
518
+ else:
519
+ schema_obj = {'type': value}
520
+
521
+ # For JSON Structure unions, primitive types should be direct type strings, not hoisted
522
+ if force_hoist_in_union:
523
+ # Return the primitive type string directly for JSON Structure compliance
524
+ # But only for actual primitives, not for complex types like map/set
525
+ if value in ['string', 'boolean', 'integer', 'number', 'null', 'int32', 'int64', 'float', 'double', 'decimal', 'uuid', 'date', 'time', 'datetime', 'duration', 'bytes']:
526
+ return value
527
+ else:
528
+ # For complex types like map/set, return the complete schema object
529
+ return schema_obj
530
+ return schema_obj
531
+ elif isinstance(value, list):
532
+ # For unions, process each type appropriately
533
+ result = []
534
+ for idx, v in enumerate(value):
535
+ if isinstance(v, str):
536
+ # Primitive types in unions should be direct strings
537
+ result.append(v)
538
+ else:
539
+ # Complex types should be hoisted to definitions and referenced via $ref
540
+ obj = self._ensure_schema_object(v, structure_schema, f"{name_hint}_option_{idx}" if name_hint else None, force_hoist_in_union=True)
541
+ result.append(obj)
542
+ return {"type": result}
543
+ else:
544
+ return {'type': 'string'}
545
+
546
+ def _scan_for_uses(self, structure_schema: dict) -> list:
547
+ """
548
+ Scan the structure schema for extension feature usage and return the list of required $uses.
549
+ """
550
+ uses = set()
551
+ def scan(obj):
552
+ if isinstance(obj, dict):
553
+ for k, v in obj.items():
554
+ if k == 'altnames':
555
+ uses.add('JSONStructureAlternateNames')
556
+ if k in {'unit', 'currency', 'symbol'}:
557
+ uses.add('JSONStructureUnits')
558
+ if k in {'pattern', 'minLength', 'maxLength', 'minimum', 'maximum', 'exclusiveMinimum', 'exclusiveMaximum', 'multipleOf', 'const', 'enum', 'required', 'propertyNames', 'keyNames'}:
559
+ uses.add('JSONStructureValidation')
560
+ if k in {'if', 'then', 'else', 'dependentRequired', 'dependentSchemas', 'anyOf', 'allOf', 'oneOf', 'not'}:
561
+ uses.add('JSONStructureConditionalComposition')
562
+ scan(v)
563
+ elif isinstance(obj, list):
564
+ for item in obj:
565
+ scan(item)
566
+ scan(structure_schema)
567
+ return sorted(uses)
568
+
569
+ def _ensure_validation_extension_in_structure_schema(self, structure_schema) -> None:
570
+ """
571
+ Ensure that the JSONStructureValidation extension is included in the $uses array.
572
+ This is handled automatically by the _scan_for_uses method when propertyNames or keyNames are detected.
573
+
574
+ Args:
575
+ structure_schema: The structure schema to update (dict or list)
576
+ """
577
+ # No action needed - the _scan_for_uses method automatically detects
578
+ # propertyNames and keyNames and adds JSONStructureValidation to $uses
579
+ pass
580
+
581
+ def create_structure_object(self, properties: dict, required: list, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1, original_schema: dict | None = None) -> dict:
582
+ """
583
+ Create a JSON Structure object type from properties.
584
+
585
+ Args:
586
+ properties (dict): The properties of the object
587
+ required (list): List of required property names
588
+ record_name (str): Name of the record
589
+ namespace (str): Namespace
590
+ dependencies (list): Dependencies list
591
+ json_schema (dict): The full JSON schema
592
+ base_uri (str): Base URI
593
+ structure_schema (list): Structure schema list
594
+ record_stack (list): Record stack for recursion detection
595
+ recursion_depth (int): Current recursion depth
596
+ original_schema (dict): The original JSON schema object containing additionalProperties
597
+
598
+ Returns:
599
+ dict: JSON Structure object definition """ # Create the basic structure object
600
+ structure_obj = {
601
+ 'type': 'object'
602
+ }
603
+
604
+ # Add required field if it's not empty
605
+ if required:
606
+ structure_obj['required'] = required
607
+
608
+ # Add name if provided
609
+ if record_name:
610
+ structure_obj['name'] = avro_name(record_name)
611
+
612
+ # Initialize properties dict only if we have properties to add
613
+ has_properties = bool(properties)
614
+ if has_properties:
615
+ structure_obj['properties'] = {}
616
+
617
+ # Process regular properties
618
+ for prop_name, prop_schema in properties.items():
619
+ prop_type = self.json_type_to_structure_type(
620
+ prop_schema, record_name, prop_name, namespace, dependencies,
621
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
622
+ )
623
+ # Normalize property name if needed
624
+ if not self.is_valid_identifier(prop_name):
625
+ normalized_name = self.normalize_identifier(prop_name)
626
+ prop_entry = self._ensure_schema_object(prop_type, structure_schema, normalized_name)
627
+ # Always create a new dict to add altnames
628
+ new_entry = {}
629
+ if isinstance(prop_entry, dict):
630
+ new_entry.update(prop_entry)
631
+ else:
632
+ new_entry['type'] = prop_entry
633
+ new_entry['altnames'] = {'json': prop_name}
634
+ structure_obj['properties'][normalized_name] = new_entry
635
+ else:
636
+ structure_obj['properties'][prop_name] = self._ensure_schema_object(prop_type, structure_schema, prop_name) # Handle patternProperties and additionalProperties
637
+ has_additional_schema = False
638
+ if original_schema:
639
+ # Check for patternProperties that coexist with properties/additionalProperties
640
+ pattern_properties = original_schema.get('patternProperties')
641
+ additional_props = original_schema.get('additionalProperties')
642
+ # Special case: multiple patternProperties with no properties
643
+ # Should create a type union of maps, not a single object with anyOf
644
+ # This applies whether additionalProperties is false OR a schema
645
+ if (pattern_properties and len(pattern_properties) > 1 and
646
+ (not 'properties' in original_schema or not original_schema['properties'])):
647
+ # Return type union of maps instead of object
648
+ return self.create_pattern_union_maps(
649
+ pattern_properties, additional_props, record_name, namespace, dependencies,
650
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
651
+ )
652
+
653
+ # Merge patternProperties into additionalProperties if both exist
654
+ if pattern_properties and ('properties' in original_schema or additional_props is not None):
655
+ # patternProperties coexists with properties/additionalProperties - merge into additionalProperties # Get the pattern schema for values (merge all pattern schemas)
656
+ if len(pattern_properties) == 1:
657
+ pattern_schema = list(pattern_properties.values())[0]
658
+ else:
659
+ # Multiple patterns - create a union type instead of anyOf
660
+ schemas = list(pattern_properties.values())
661
+ # Convert each schema and create a proper union
662
+ converted_schemas = []
663
+ for idx, schema in enumerate(schemas):
664
+ converted_schema = self.json_type_to_structure_type(
665
+ schema, record_name, f'pattern_{idx}', namespace, dependencies,
666
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
667
+ )
668
+ converted_schemas.append(converted_schema)
669
+
670
+ if len(converted_schemas) == 1:
671
+ pattern_schema = converted_schemas[0]
672
+ else:
673
+ # Create union type array - hoist compound types if needed
674
+ hoisted_schemas = []
675
+ for idx, schema in enumerate(converted_schemas):
676
+ hoisted_schema = self._ensure_schema_object(schema, structure_schema, f'pattern_{idx}', force_hoist_in_union=True)
677
+ hoisted_schemas.append(hoisted_schema)
678
+ pattern_schema = {'type': hoisted_schemas}
679
+
680
+ if additional_props is False:
681
+ # Override false additionalProperties with pattern schema
682
+ merged_additional = pattern_schema
683
+ elif additional_props is True:
684
+ # Keep true (allow any additional properties)
685
+ merged_additional = True
686
+ elif isinstance(additional_props, dict):
687
+ # Merge both schemas using a union type instead of anyOf
688
+ additional_converted = self.json_type_to_structure_type(
689
+ additional_props, record_name, 'additional', namespace, dependencies,
690
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
691
+ )
692
+
693
+ # Create union of additional props and pattern schema
694
+ additional_hoisted = self._ensure_schema_object(additional_converted, structure_schema, 'additional', force_hoist_in_union=True)
695
+ pattern_hoisted = self._ensure_schema_object(pattern_schema, structure_schema, 'pattern', force_hoist_in_union=True)
696
+
697
+ merged_additional = {
698
+ 'type': [additional_hoisted, pattern_hoisted]
699
+ }
700
+ elif additional_props is None:
701
+ # No additionalProperties, use pattern schema
702
+ merged_additional = pattern_schema
703
+ else:
704
+ merged_additional = pattern_schema
705
+ # Convert merged schema to structure type
706
+ if merged_additional is not True and isinstance(merged_additional, dict):
707
+ additional_type = self.json_type_to_structure_type(
708
+ merged_additional, record_name, 'additionalProperty', namespace, dependencies,
709
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
710
+ )
711
+ structure_obj['additionalProperties'] = self._ensure_schema_object(additional_type, structure_schema, 'additionalProperty', force_hoist_in_union=True)
712
+ has_additional_schema = True
713
+ elif merged_additional is True:
714
+ structure_obj['additionalProperties'] = True
715
+ has_additional_schema = True
716
+ # Add propertyNames validation for the patterns
717
+ patterns = list(pattern_properties.keys())
718
+ if len(patterns) == 1:
719
+ # Single pattern - use it directly
720
+ pattern = patterns[0]
721
+ structure_obj['propertyNames'] = {
722
+ "type": "string",
723
+ "pattern": pattern
724
+ }
725
+ else:
726
+ # Multiple patterns - in JSON Structure, we cannot use anyOf for propertyNames
727
+ # Skip propertyNames validation when there are multiple patterns
728
+ # The patterns are already handled via the merged additionalProperties schema
729
+ pass
730
+
731
+ # Ensure $uses includes JSONStructureValidation
732
+ self._ensure_validation_extension_in_structure_schema(structure_schema)
733
+
734
+ elif additional_props is not None and additional_props is not False:
735
+ # Handle additionalProperties without patternProperties
736
+ if isinstance(additional_props, dict):
737
+ # Convert the additionalProperties schema to JSON Structure type
738
+ additional_type = self.json_type_to_structure_type(
739
+ additional_props, record_name, 'additionalProperty', namespace, dependencies,
740
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
741
+ )
742
+ structure_obj['additionalProperties'] = self._ensure_schema_object(additional_type, structure_schema, 'additionalProperty')
743
+ has_additional_schema = True
744
+ elif additional_props is True:
745
+ # True means any additional properties are allowed with any type
746
+ structure_obj['additionalProperties'] = True
747
+ has_additional_schema = True
748
+
749
+ # For JSON Structure compliance: If we have no properties and no additionalProperties/extension,
750
+ # add a default additionalProperties to make the object schema valid
751
+ if not has_properties and not has_additional_schema and '$extends' not in structure_obj:
752
+ # Add default additionalProperties to make the object valid per JSON Structure spec
753
+ structure_obj['additionalProperties'] = True
754
+
755
+ return structure_obj
756
+
757
+ def create_structure_choice(self, discriminator_info: dict, oneof_options: list, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
758
+ """
759
+ Create a JSON Structure choice type from discriminator pattern.
760
+
761
+ Args:
762
+ discriminator_info (dict): Discriminator information
763
+ oneof_options (list): List of oneOf options
764
+ record_name (str): Name of the record
765
+ namespace (str): Namespace
766
+ dependencies (list): Dependencies list
767
+ json_schema (dict): The full JSON schema
768
+ base_uri (str): Base URI
769
+ structure_schema (list): Structure schema list
770
+ record_stack (list): Record stack for recursion detection
771
+ recursion_depth (int): Current recursion depth
772
+
773
+ Returns:
774
+ dict: JSON Structure choice definition
775
+ """
776
+ choice_obj = {
777
+ 'type': 'choice',
778
+ 'discriminator': discriminator_info['property'],
779
+ 'choices': {}
780
+ }
781
+
782
+ if record_name:
783
+ choice_obj['name'] = avro_name(record_name)
784
+
785
+ # Process each choice option
786
+ for i, option in enumerate(oneof_options):
787
+ if '$ref' in option:
788
+ # Handle reference
789
+ choice_key = f"option_{i}" # Default key, ideally extract from mapping
790
+ choice_obj['choices'][choice_key] = {'$ref': option['$ref']}
791
+ else:
792
+ # Convert option to structure type
793
+ choice_key = f"option_{i}"
794
+ choice_type = self.json_type_to_structure_type(
795
+ option, record_name, f"choice_{i}", namespace, dependencies,
796
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
797
+ )
798
+ choice_obj['choices'][choice_key] = choice_type
799
+
800
+ return choice_obj
801
+
802
+ def create_structure_map(self, values_schema: dict, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
803
+ """
804
+ Create a JSON Structure map type.
805
+
806
+ Args:
807
+ values_schema (dict): Schema for map values
808
+ record_name (str): Name of the record
809
+ namespace (str): Namespace
810
+ dependencies (list): Dependencies list
811
+ json_schema (dict): The full JSON schema
812
+ base_uri (str): Base URI
813
+ structure_schema (list): Structure schema list
814
+ record_stack (list): Record stack for recursion detection
815
+ recursion_depth (int): Current recursion depth
816
+ Returns:
817
+ dict: JSON Structure map definition
818
+ """
819
+ values_type = self.json_type_to_structure_type(
820
+ values_schema, record_name, 'value', namespace, dependencies,
821
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
822
+ )
823
+ # Always wrap as schema object values_type = self._ensure_schema_object(values_type, structure_schema, 'value')
824
+ return {
825
+ 'type': 'map',
826
+ 'values': values_type
827
+ }
828
+
829
+ def create_structure_map_with_pattern(self, values_schema: dict, pattern_properties: dict, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
830
+ """
831
+ Create a JSON Structure map type with pattern validation using keyNames.
832
+
833
+ Args:
834
+ values_schema (dict): Schema for map values
835
+ pattern_properties (dict): The patternProperties object with patterns as keys
836
+ record_name (str): Name of the record
837
+ namespace (str): Namespace
838
+ dependencies (list): Dependencies list
839
+ json_schema (dict): The full JSON schema
840
+ base_uri (str): Base URI
841
+ structure_schema (list): Structure schema list
842
+ record_stack (list): Record stack for recursion detection
843
+ recursion_depth (int): Current recursion depth
844
+
845
+ Returns:
846
+ dict: JSON Structure map definition with keyNames validation
847
+ """
848
+ map_result = self.create_structure_map(
849
+ values_schema, record_name, namespace, dependencies,
850
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
851
+ )
852
+
853
+ if pattern_properties and len(pattern_properties) > 0:
854
+ # Extract patterns and create keyNames validation schema
855
+ patterns = list(pattern_properties.keys())
856
+
857
+ if len(patterns) == 1:
858
+ # Single pattern - use it directly
859
+ pattern = patterns[0]
860
+ map_result['keyNames'] = {
861
+ "type": "string",
862
+ "pattern": pattern
863
+ }
864
+ else:
865
+ # Multiple patterns - combine with anyOf
866
+ pattern_schemas = []
867
+ for pattern in patterns:
868
+ pattern_schemas.append({
869
+ "type": "string",
870
+ "pattern": pattern
871
+ })
872
+ map_result['keyNames'] = {
873
+ "anyOf": pattern_schemas
874
+ }
875
+
876
+ # Ensure $uses includes JSONStructureValidation
877
+ self._ensure_validation_extension_in_structure_schema(structure_schema)
878
+
879
+ return map_result
880
+
881
+ def create_structure_array_or_set(self, items_schema: dict, is_set: bool, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
882
+ """
883
+ Create a JSON Structure array or set type.
884
+
885
+ Args:
886
+ items_schema (dict): Schema for array/set items
887
+ is_set (bool): True for set, False for array
888
+ record_name (str): Name of the record
889
+ namespace (str): Namespace
890
+ dependencies (list): Dependencies list
891
+ json_schema (dict): The full JSON schema
892
+ base_uri (str): Base URI
893
+ structure_schema (list): Structure schema list
894
+ record_stack (list): Record stack for recursion detection
895
+ recursion_depth (int): Current recursion depth
896
+
897
+ Returns:
898
+ dict: JSON Structure array/set definition
899
+ """
900
+ items_type = self.json_type_to_structure_type(
901
+ items_schema, record_name, 'item', namespace, dependencies,
902
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
903
+ )
904
+ # Always wrap as schema object
905
+ items_type = self._ensure_schema_object(items_type, structure_schema, 'item')
906
+ return {
907
+ 'type': 'set' if is_set else 'array',
908
+ 'items': items_type
909
+ }
910
+
911
+ def add_alternate_names(self, structure: dict, original_name: str) -> dict:
912
+ """
913
+ Add alternate names for different naming conventions.
914
+
915
+ Args:
916
+ structure (dict): The structure definition
917
+ original_name (str): The original property/type name
918
+
919
+ Returns:
920
+ dict: Structure with altnames added
921
+ """
922
+ if not original_name:
923
+ return structure
924
+
925
+ altnames = {}
926
+
927
+ # Add camelCase if original is snake_case
928
+ if '_' in original_name:
929
+ camel_case = ''.join(word.capitalize() if i > 0 else word
930
+ for i, word in enumerate(original_name.split('_')))
931
+ altnames['camelCase'] = camel_case
932
+ # Add snake_case if original is camelCase
933
+ elif any(c.isupper() for c in original_name):
934
+ snake_case = re.sub('([a-z0-9])([A-Z])', r'\1_\2', original_name).lower()
935
+ altnames['snake_case'] = snake_case
936
+
937
+ if altnames:
938
+ if not isinstance(structure, dict):
939
+ structure = {'type': structure}
940
+ structure['altnames'] = altnames
941
+
942
+ return structure
943
+
944
+ def add_validation_constraints(self, structure: dict, schema: dict) -> dict:
945
+ """
946
+ Convert JSON Schema validation constraints to JSON Structure format.
947
+
948
+ Args:
949
+ structure (dict): The structure definition
950
+ schema (dict): The original JSON schema
951
+
952
+ Returns:
953
+ dict: Structure with validation constraints added
954
+ """
955
+ if not isinstance(structure, dict):
956
+ structure = {'type': structure}
957
+
958
+ # Copy validation constraints
959
+ validation_keys = [
960
+ 'minimum', 'maximum', 'exclusiveMinimum', 'exclusiveMaximum',
961
+ 'minLength', 'maxLength', 'pattern', 'minItems', 'maxItems',
962
+ 'const', 'enum'
963
+ ]
964
+
965
+ # Check if this is an int64 type
966
+ is_int64 = structure.get('type') == 'int64'
967
+
968
+ for key in validation_keys:
969
+ if key in schema:
970
+ value = schema[key]
971
+ # For int64 types, convert numeric minimum/maximum values to strings
972
+ if is_int64 and key in ('minimum', 'maximum', 'exclusiveMinimum', 'exclusiveMaximum') and isinstance(value, (int, float)):
973
+ structure[key] = str(int(value))
974
+ else:
975
+ structure[key] = value
976
+
977
+ return structure
978
+
979
+ def ensure_object_compliance(self, structure: dict) -> dict:
980
+ """
981
+ Ensure that object types comply with JSON Structure spec requirements.
982
+
983
+ Args:
984
+ structure (dict): The structure definition
985
+
986
+ Returns:
987
+ dict: Structure with JSON Structure compliance ensured
988
+ """
989
+ if not isinstance(structure, dict) or structure.get('type') != 'object':
990
+ return structure
991
+
992
+ # Check if this object type needs properties to be compliant
993
+ has_properties = 'properties' in structure and structure['properties']
994
+ has_additional_props = 'additionalProperties' in structure
995
+ has_extensions = '$extends' in structure
996
+
997
+ # If object has empty properties and no additionalProperties or extensions,
998
+ # add additionalProperties: true to make it compliant
999
+ if 'properties' in structure and not has_properties and not has_additional_props and not has_extensions:
1000
+ structure['additionalProperties'] = True
1001
+
1002
+ return structure
1003
+
1004
+ def flatten_union(self, type_list: list, structure_schema=None, name_hint=None) -> list:
1005
+ """
1006
+ Flatten the list of types in a union into a single list.
1007
+
1008
+ Args:
1009
+ type_list (list): The list of types in a union.
1010
+
1011
+ Returns:
1012
+ list: The flattened list of types.
1013
+ """
1014
+ flat_list = []
1015
+ for idx, t in enumerate(type_list):
1016
+ if isinstance(t, list):
1017
+ inner = self.flatten_union(t, structure_schema, name_hint)
1018
+ for u in inner:
1019
+ obj = self._ensure_schema_object(u, structure_schema, f"{name_hint}_option_{idx}" if name_hint else None, force_hoist_in_union=True)
1020
+ if obj not in flat_list:
1021
+ flat_list.append(obj)
1022
+ else:
1023
+ # For primitive types in unions, extract the type string directly
1024
+ if isinstance(t, dict) and 'type' in t and t['type'] in ['string', 'boolean', 'integer', 'number', 'null'] and len(t) == 1:
1025
+ # This is a simple primitive type object like {"type": "boolean"} - extract the type string
1026
+ if t['type'] not in flat_list:
1027
+ flat_list.append(t['type'])
1028
+ else:
1029
+ # For complex types, use the normal processing
1030
+ obj = self._ensure_schema_object(t, structure_schema, f"{name_hint}_option_{idx}" if name_hint else None, force_hoist_in_union=True)
1031
+ if obj not in flat_list:
1032
+ flat_list.append(obj)
1033
+ return flat_list
1034
+
1035
+ def merge_structure_schemas(self, schemas: list, structure_schemas: list, type_name: str | None = None, deps: List[str] = []) -> str | list | dict:
1036
+ """Merge multiple JSON Structure type schemas into one."""
1037
+
1038
+ if len(schemas) == 1:
1039
+ return schemas[0]
1040
+
1041
+ merged_schema: dict = {}
1042
+ if type_name:
1043
+ merged_schema['name'] = type_name
1044
+
1045
+ for schema in schemas:
1046
+ schema = copy.deepcopy(schema)
1047
+ if isinstance(schema, dict) and 'dependencies' in schema:
1048
+ deps1: List[str] = merged_schema.get('dependencies', [])
1049
+ deps1.extend(schema['dependencies'])
1050
+ merged_schema['dependencies'] = deps1
1051
+
1052
+ if isinstance(schema, str):
1053
+ # Simple type reference
1054
+ if 'type' not in merged_schema:
1055
+ merged_schema['type'] = schema
1056
+ elif merged_schema['type'] != schema:
1057
+ # Type conflict, create union
1058
+ if not isinstance(merged_schema['type'], list):
1059
+ merged_schema['type'] = [merged_schema['type']]
1060
+ if schema not in merged_schema['type']:
1061
+ merged_schema['type'].append(schema)
1062
+
1063
+ elif isinstance(schema, dict):
1064
+ # Merge object schemas
1065
+ for key, value in schema.items():
1066
+ if key == 'properties' and 'properties' in merged_schema:
1067
+ # Merge properties
1068
+ for prop_name, prop_schema in value.items():
1069
+ if prop_name in merged_schema['properties']:
1070
+ # Property exists, merge types
1071
+ existing = merged_schema['properties'][prop_name]
1072
+ merged_schema['properties'][prop_name] = self.merge_structure_schemas(
1073
+ [existing, prop_schema], structure_schemas, None, deps)
1074
+ else:
1075
+ merged_schema['properties'][prop_name] = prop_schema
1076
+ elif key == 'required' and 'required' in merged_schema:
1077
+ # Merge required arrays
1078
+ merged_schema['required'] = list(set(merged_schema['required'] + value))
1079
+ else:
1080
+ merged_schema[key] = value
1081
+
1082
+ return merged_schema
1083
+
1084
+ def json_type_to_structure_type(self, json_type: str | dict, record_name: str, field_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth=1) -> dict | list | str:
1085
+ """Convert a JSON Schema type to JSON Structure type."""
1086
+
1087
+ try:
1088
+ if recursion_depth >= self.max_recursion_depth:
1089
+ print(f'WARNING: Maximum recursion depth reached for {record_name} at field {field_name}')
1090
+ return 'string' # Fallback to string instead of generic_type() structure_type: dict = {}
1091
+ local_name = avro_name(field_name if field_name else record_name)
1092
+
1093
+ if isinstance(json_type, str):
1094
+ # Simple type reference
1095
+ return self.json_schema_primitive_to_structure_type(
1096
+ json_type, None, None, record_name, field_name, namespace, dependencies, {})
1097
+
1098
+ if isinstance(json_type, dict): # Handle inheritance pattern first (only if inheritance detection is enabled)
1099
+ inheritance_info = self.detect_inheritance_pattern(json_type, record_name)
1100
+ if inheritance_info:
1101
+ base_ref = inheritance_info['base_ref']
1102
+ extension = inheritance_info['extension']
1103
+ # Create abstract base type name
1104
+ if base_ref.startswith('#/definitions/'):
1105
+ base_type_name = base_ref[14:] # Remove '#/definitions/'
1106
+ abstract_base_name = avro_name(f"{base_type_name}Base")
1107
+ else:
1108
+ # Handle external references or other formats
1109
+ abstract_base_name = avro_name(f"{record_name}Base")
1110
+
1111
+ # Ensure the abstract base type exists
1112
+ self._ensure_abstract_base_type(base_ref, abstract_base_name, structure_schema, json_schema, base_uri)
1113
+
1114
+ structure_type = {
1115
+ 'type': 'object',
1116
+ 'name': record_name,
1117
+ '$extends': f"#/definitions/{abstract_base_name}"
1118
+ }
1119
+ if 'properties' in extension and extension['properties']:
1120
+ structure_type['properties'] = {}
1121
+ for prop_name, prop_schema in extension['properties'].items():
1122
+ prop_type = self.json_type_to_structure_type(
1123
+ prop_schema, record_name, prop_name, namespace, dependencies,
1124
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1125
+ )
1126
+ # Normalize property name if needed
1127
+ if not self.is_valid_identifier(prop_name):
1128
+ normalized_name = self.normalize_identifier(prop_name)
1129
+ prop_entry = self._ensure_schema_object(prop_type, structure_schema, normalized_name)
1130
+ # Always create a new dict to add altnames
1131
+ new_entry = {}
1132
+ if isinstance(prop_entry, dict):
1133
+ new_entry.update(prop_entry)
1134
+ else:
1135
+ new_entry['type'] = prop_entry
1136
+ new_entry['altnames'] = {'json': prop_name}
1137
+ structure_type['properties'][normalized_name] = new_entry
1138
+ else:
1139
+ structure_type['properties'][prop_name] = self._ensure_schema_object(prop_type, structure_schema, prop_name)
1140
+
1141
+ if 'required' in extension:
1142
+ structure_type['required'] = extension['required']
1143
+
1144
+ # Copy other extension properties (validation constraints, etc.)
1145
+ for key, value in extension.items():
1146
+ if key not in ['properties', 'required', 'type']:
1147
+ structure_type[key] = value
1148
+
1149
+ # Apply any remaining validation constraints from the original schema
1150
+ structure_type = self.add_validation_constraints(structure_type, json_type)
1151
+
1152
+ return structure_type
1153
+
1154
+ # Handle discriminator pattern
1155
+ discriminator_info = self.detect_discriminator_pattern(json_type)
1156
+ if discriminator_info and 'oneOf' in json_type:
1157
+ return self.create_structure_choice(
1158
+ discriminator_info, json_type['oneOf'], record_name, namespace,
1159
+ dependencies, json_schema, base_uri, structure_schema, record_stack, recursion_depth
1160
+ ) # Handle $ref first (before checking for type)
1161
+ if '$ref' in json_type:
1162
+ ref = json_type['$ref']
1163
+ # Normalize references to use definitions instead of $defs
1164
+ if ref.startswith('#/$defs/'):
1165
+ ref = ref.replace('#/$defs/', '#/definitions/')
1166
+ elif ref.startswith('#/definitions/'):
1167
+ # Already correct format
1168
+ pass
1169
+
1170
+ # Handle nested JSON Pointer references like #/definitions/pipelineCommon/execution
1171
+ if '/' in ref.split('#/definitions/')[-1] and ref.startswith('#/definitions/'):
1172
+ try:
1173
+ # Resolve the nested JSON Pointer reference
1174
+ resolved_schema, _ = self.resolve_reference(json_type, base_uri, json_schema)
1175
+ if resolved_schema != json_type:
1176
+ # We successfully resolved a nested reference, process the resolved schema # Create a new definition name based on the nested path
1177
+ ref_parts = ref.split('/')
1178
+ if len(ref_parts) >= 4: # ['#', 'definitions', 'parent', 'child', ...]
1179
+ parent_name = ref_parts[2]
1180
+ child_path = '/'.join(ref_parts[3:])
1181
+ new_def_name = avro_name(f"{parent_name}_{child_path.replace('/', '_')}")
1182
+
1183
+ # Process the resolved schema recursively
1184
+ converted_schema = self.json_type_to_structure_type(
1185
+ resolved_schema, new_def_name, field_name, namespace, dependencies,
1186
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1187
+ )
1188
+
1189
+ # Check if we need to add to definitions
1190
+ if isinstance(converted_schema, dict) and converted_schema.get('type') in ['object', 'array', 'choice']:
1191
+ # Set name for the definition and add to structure_schema definitions
1192
+ converted_schema['name'] = new_def_name
1193
+ if 'definitions' not in structure_schema:
1194
+ structure_schema['definitions'] = {}
1195
+ structure_schema['definitions'][new_def_name] = converted_schema
1196
+ return {'$ref': f"#/definitions/{new_def_name}"}
1197
+ else:
1198
+ # For simple types, return the converted schema directly
1199
+ return converted_schema
1200
+ except Exception as e:
1201
+ # If resolution fails, fall back to original reference
1202
+ print(f"Failed to resolve nested reference {ref}: {e}")
1203
+ pass
1204
+
1205
+ # Check if we need to use type registry to normalize the reference
1206
+ # Extract the definition name from the reference
1207
+ if ref.startswith('#/definitions/'):
1208
+ def_name = ref[14:] # Remove '#/definitions/' prefix
1209
+ if def_name in self.type_registry:
1210
+ # Use the normalized reference from the registry
1211
+ ref = self.type_registry[def_name]
1212
+
1213
+ return {'$ref': ref}
1214
+
1215
+ # Handle schemas without explicit type
1216
+ if json_type.get('type') is None:
1217
+ if 'enum' in json_type:
1218
+ # Enum-only schema - default to string type
1219
+ enum_values = json_type.get('enum')
1220
+ structure_type = self.json_schema_primitive_to_structure_type(
1221
+ 'string', json_type.get('format'), enum_values, record_name, field_name, namespace, dependencies, json_type
1222
+ )
1223
+ if isinstance(structure_type, dict):
1224
+ structure_type = self.add_validation_constraints(structure_type, json_type)
1225
+ return structure_type
1226
+ elif 'properties' in json_type or 'additionalProperties' in json_type or 'patternProperties' in json_type:
1227
+ # Object schema without explicit type - treat as object
1228
+ # Apply constraint composition conversion if applicable
1229
+ effective_schema = self._convert_constraint_composition_to_required(json_type)
1230
+ properties = effective_schema.get('properties', {})
1231
+ required = effective_schema.get('required', [])
1232
+ return self.create_structure_object(
1233
+ properties, required, record_name, namespace, dependencies,
1234
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth, effective_schema
1235
+ )
1236
+ elif self.has_composition_keywords(json_type):
1237
+ # Handle composition keywords without explicit type
1238
+ # Continue to composition handling below instead of returning empty object
1239
+ pass
1240
+ else:
1241
+ # Other schema without type - default to generic object
1242
+ # Create a generic object that allows any properties
1243
+ return {
1244
+ 'type': 'object',
1245
+ 'properties': {} }
1246
+
1247
+ if json_type.get('type') and isinstance(json_type['type'], str):
1248
+ # Check if this schema also has composition keywords that should be preserved
1249
+ if self.preserve_composition and self.has_composition_keywords(json_type):
1250
+ # Skip primitive handling and continue to composition handling below
1251
+ pass
1252
+ else:
1253
+ format_hint = json_type.get('format')
1254
+ enum_values = json_type.get('enum')
1255
+ # Special handling for objects
1256
+ if json_type['type'] == 'object':
1257
+ # Check if should convert to map
1258
+ if self.should_convert_to_map(json_type):
1259
+ # Handle patternProperties conversion to map
1260
+ pattern_properties = json_type.get('patternProperties')
1261
+ if pattern_properties:
1262
+ # Get the pattern schema for values (merge all pattern schemas)
1263
+ if len(pattern_properties) == 1:
1264
+ pattern_schema = list(pattern_properties.values())[0]
1265
+ else:
1266
+ # Multiple patterns - merge schemas using anyOf
1267
+ schemas = list(pattern_properties.values())
1268
+ pattern_schema = {'anyOf': schemas}
1269
+
1270
+ # Convert patternProperties to map with keyNames validation
1271
+ return self.create_structure_map_with_pattern(
1272
+ pattern_schema, pattern_properties, record_name, namespace, dependencies,
1273
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1274
+ )
1275
+ else:
1276
+ # Handle additionalProperties conversion to map
1277
+ additional_props = json_type.get('additionalProperties', True)
1278
+ if isinstance(additional_props, dict):
1279
+ return self.create_structure_map(
1280
+ additional_props, record_name, namespace, dependencies,
1281
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1282
+ )
1283
+ else: return {
1284
+ 'type': 'map',
1285
+ 'values': {'type': 'string'} # Default for boolean additionalProperties
1286
+ }
1287
+ else:
1288
+ # Regular object - first check for discriminated union patterns
1289
+ if 'oneOf' in json_type:
1290
+ choice_info = self.detect_discriminated_union_pattern(json_type)
1291
+ if choice_info:
1292
+ # Convert to JSON Structure choice type
1293
+ choice_result = {
1294
+ 'type': 'choice',
1295
+ 'choices': choice_info['choices']
1296
+ }
1297
+
1298
+ # Add selector if specified (for tagged unions)
1299
+ if choice_info.get('selector'):
1300
+ choice_result['selector'] = choice_info['selector']
1301
+
1302
+ # Add name if we have one
1303
+ if record_name:
1304
+ choice_result['name'] = avro_name(record_name)
1305
+
1306
+ return choice_result
1307
+ # Regular object without discriminated union
1308
+ # Check if this is a bare object type that should be converted to "any"
1309
+ if (not json_type.get('properties') and
1310
+ not json_type.get('additionalProperties') and
1311
+ not json_type.get('patternProperties') and
1312
+ not json_type.get('required') and
1313
+ not json_type.get('$extends') and
1314
+ not any(k in json_type for k in ['allOf', 'anyOf', 'oneOf', 'if', 'then', 'else'])):
1315
+ # This is a bare "type": "object" which means "any object" in JSON Schema
1316
+ # Convert to "any" type in JSON Structure
1317
+ return {'type': 'any'}
1318
+
1319
+ # Apply constraint composition conversion if applicable
1320
+ effective_schema = self._convert_constraint_composition_to_required(json_type)
1321
+ properties = effective_schema.get('properties', {})
1322
+ required = effective_schema.get('required', [])
1323
+ return self.create_structure_object(
1324
+ properties, required, record_name, namespace, dependencies,
1325
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth, json_type
1326
+ )
1327
+ # Special handling for arrays
1328
+ elif json_type['type'] == 'array':
1329
+ items_schema = json_type.get('items', {'type': 'string'})
1330
+ is_set = self.detect_collection_type(json_type) == 'set'
1331
+ return self.create_structure_array_or_set(
1332
+ items_schema, is_set, record_name, namespace, dependencies,
1333
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1334
+ )
1335
+
1336
+ # Special handling for maps
1337
+ elif json_type['type'] == 'map':
1338
+ values_schema = json_type.get('values', {'type': 'string'})
1339
+ return self.create_structure_map(
1340
+ values_schema, record_name, namespace, dependencies,
1341
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1342
+ )
1343
+
1344
+ # Special handling for sets
1345
+ elif json_type['type'] == 'set':
1346
+ items_schema = json_type.get('items', {'type': 'string'})
1347
+ return self.create_structure_array_or_set(
1348
+ items_schema, True, record_name, namespace, dependencies,
1349
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1350
+ )
1351
+
1352
+ else:
1353
+ # Primitive type
1354
+ structure_type = self.json_schema_primitive_to_structure_type(
1355
+ json_type['type'], format_hint, enum_values, record_name, field_name, namespace, dependencies, json_type
1356
+ )
1357
+ # Add validation constraints
1358
+ if isinstance(structure_type, str):
1359
+ structure_type = self.add_validation_constraints({'type': structure_type}, json_type)
1360
+ if len(structure_type) == 1:
1361
+ structure_type = structure_type['type']
1362
+ elif isinstance(structure_type, dict):
1363
+ structure_type = self.add_validation_constraints(structure_type, json_type)
1364
+ return structure_type
1365
+
1366
+ # Handle composition keywords - resolve when preserve_composition is False
1367
+ if not self.preserve_composition and self.has_composition_keywords(json_type):
1368
+ return self.resolve_composition_keywords(
1369
+ json_type, record_name, field_name, namespace, dependencies,
1370
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
1371
+ )
1372
+
1373
+ # Handle composition keywords
1374
+ if self.preserve_composition:
1375
+ if 'allOf' in json_type and not inheritance_info:
1376
+ # Non-inheritance allOf - keep as-is or merge based on configuration
1377
+ allof_schemas = []
1378
+ for allof_item in json_type['allOf']:
1379
+ converted = self.json_type_to_structure_type(
1380
+ allof_item, record_name, field_name, namespace, dependencies,
1381
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1382
+ )
1383
+ allof_schemas.append(converted)
1384
+ # For now, merge them - JSON Structure supports allOf natively
1385
+ return {
1386
+ 'allOf': allof_schemas
1387
+ }
1388
+
1389
+ if 'oneOf' in json_type and not discriminator_info: # Check if this is a discriminated union pattern
1390
+ choice_info = self.detect_discriminated_union_pattern(json_type)
1391
+ if choice_info:
1392
+ # Convert to JSON Structure choice type (tagged union)
1393
+ choice_result = {
1394
+ 'type': 'choice',
1395
+ 'choices': choice_info['choices']
1396
+ }
1397
+
1398
+ # Add selector if specified (for tagged unions)
1399
+ if choice_info.get('selector'):
1400
+ choice_result['selector'] = choice_info['selector']
1401
+
1402
+ # Add name if we have one
1403
+ if record_name:
1404
+ choice_result['name'] = avro_name(record_name)
1405
+
1406
+ return choice_result
1407
+
1408
+ # Regular oneOf without discriminator
1409
+ oneof_schemas = []
1410
+ for oneof_item in json_type['oneOf']:
1411
+ # For constraint-only schemas, preserve them but add type: object
1412
+ if self._is_constraint_only_schema(oneof_item):
1413
+ preserved_item = dict(oneof_item)
1414
+ preserved_item['type'] = 'object'
1415
+ # Add properties for required fields to make it valid JSON Structure
1416
+ if 'required' in preserved_item and 'properties' not in preserved_item:
1417
+ preserved_item['properties'] = {}
1418
+ for req_field in preserved_item['required']:
1419
+ preserved_item['properties'][req_field] = {'type': 'any'}
1420
+ # Allow additional properties since this is a constraint-only schema
1421
+ preserved_item['additionalProperties'] = True
1422
+ oneof_schemas.append(preserved_item)
1423
+ else:
1424
+ converted = self.json_type_to_structure_type(
1425
+ oneof_item, record_name, field_name, namespace, dependencies,
1426
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1427
+ )
1428
+ oneof_schemas.append(converted)
1429
+ return {
1430
+ 'oneOf': oneof_schemas
1431
+ }
1432
+
1433
+ if 'anyOf' in json_type: # Check if this is a constraint-only anyOf pattern that should be converted to permutations
1434
+ anyof_items = json_type['anyOf']
1435
+ constraint_only = all(
1436
+ self._is_constraint_only_schema(item) and 'required' in item
1437
+ for item in anyof_items
1438
+ )
1439
+
1440
+ if constraint_only:
1441
+ # Convert constraint-only anyOf to permutations and return as object with required
1442
+ converted_schema = self._convert_constraint_anyof_to_permutations(json_type)
1443
+
1444
+ # Use create_structure_object to properly handle patternProperties
1445
+ result = self.create_structure_object(
1446
+ json_type.get('properties', {}),
1447
+ converted_schema.get('required', []),
1448
+ record_name, namespace, dependencies,
1449
+ json_schema, base_uri, structure_schema,
1450
+ record_stack, recursion_depth, json_type
1451
+ )
1452
+ # Add other properties from the original schema (except structural properties)
1453
+ for key, value in json_type.items():
1454
+ if key not in ['anyOf', 'type', 'properties', 'required', 'patternProperties', 'additionalProperties']:
1455
+ result[key] = value
1456
+ return result
1457
+ else:
1458
+ # Regular anyOf composition - preserve original structure when preserve_composition=True
1459
+ anyof_schemas = []
1460
+ for anyof_item in anyof_items:
1461
+ converted = self.json_type_to_structure_type(
1462
+ anyof_item, record_name, field_name, namespace, dependencies,
1463
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1464
+ )
1465
+ anyof_schemas.append(converted)
1466
+
1467
+ return {
1468
+ 'anyOf': anyof_schemas
1469
+ }
1470
+
1471
+ # Handle conditional schemas (if/then/else)
1472
+ if 'if' in json_type:
1473
+ # Preserve conditional schemas as-is in JSON Structure
1474
+ result = {}
1475
+
1476
+ # Process if clause
1477
+ if_schema = self.json_type_to_structure_type(
1478
+ json_type['if'], record_name, field_name, namespace, dependencies,
1479
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1480
+ )
1481
+ result['if'] = if_schema
1482
+
1483
+ # Process then clause if present
1484
+ if 'then' in json_type:
1485
+ then_schema = self.json_type_to_structure_type(
1486
+ json_type['then'], record_name, field_name, namespace, dependencies,
1487
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1488
+ )
1489
+ result['then'] = then_schema
1490
+
1491
+ # Process else clause if present
1492
+ if 'else' in json_type:
1493
+ else_schema = self.json_type_to_structure_type(
1494
+ json_type['else'], record_name, field_name, namespace, dependencies,
1495
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1496
+ )
1497
+ result['else'] = else_schema
1498
+ # Add any other properties from the original schema
1499
+ for key, value in json_type.items():
1500
+ if key not in ['if', 'then', 'else']:
1501
+ if key in ['properties', 'required', 'type']:
1502
+ # Handle structural properties
1503
+ if key == 'properties':
1504
+ converted_props = {}
1505
+ for prop_name, prop_schema in value.items():
1506
+ prop_type = self.json_type_to_structure_type(
1507
+ prop_schema, record_name, prop_name, namespace, dependencies,
1508
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
1509
+ )
1510
+ converted_props[prop_name] = self._ensure_schema_object(prop_type, structure_schema, prop_name)
1511
+ result[key] = converted_props
1512
+ else:
1513
+ result[key] = value
1514
+ else:
1515
+ # Copy validation and other properties as-is
1516
+ result[key] = value
1517
+
1518
+ # Ensure conditional object schemas are valid JSON Structure
1519
+ if result.get('type') == 'object':
1520
+ # If we have no properties but we're an object type, add additionalProperties
1521
+ if 'properties' not in result and '$extends' not in result:
1522
+ result['additionalProperties'] = True
1523
+ # If we have empty properties, remove it and add additionalProperties instead
1524
+ elif 'properties' in result and not result['properties']:
1525
+ del result['properties']
1526
+ if 'additionalProperties' not in result:
1527
+ result['additionalProperties'] = True
1528
+
1529
+ return result
1530
+
1531
+ # Handle const
1532
+ if 'const' in json_type:
1533
+ const_value = json_type['const']
1534
+ if isinstance(const_value, str):
1535
+ return {
1536
+ 'type': 'string',
1537
+ 'const': const_value
1538
+ }
1539
+ elif isinstance(const_value, (int, float)):
1540
+ return {
1541
+ 'type': 'int32' if isinstance(const_value, int) else 'double',
1542
+ 'const': const_value
1543
+ }
1544
+ elif isinstance(const_value, bool):
1545
+ return {
1546
+ 'type': 'boolean',
1547
+ 'const': const_value
1548
+ }# Fallback for unhandled cases
1549
+ if 'properties' in json_type: # Treat as object even without explicit type
1550
+ properties = json_type['properties']
1551
+ required = json_type.get('required', [])
1552
+ return self.create_structure_object(
1553
+ properties, required, record_name, namespace, dependencies, json_schema, base_uri, structure_schema, record_stack, recursion_depth, json_type
1554
+ )
1555
+
1556
+ # Fallback
1557
+ return 'string'
1558
+
1559
+ except Exception as e:
1560
+ print(f'ERROR: Failed to convert type for {record_name}.{field_name}: {e}')
1561
+ return 'string'
1562
+
1563
+ def fetch_content(self, url: str | ParseResult):
1564
+ """
1565
+ Fetches the content from the specified URL.
1566
+
1567
+ Args:
1568
+ url (str or ParseResult): The URL to fetch the content from.
1569
+
1570
+ Returns:
1571
+ str: The fetched content.
1572
+
1573
+ Raises:
1574
+ requests.RequestException: If there is an error while making the HTTP request.
1575
+ Exception: If there is an error while reading the file.
1576
+ """
1577
+ # Parse the URL to determine the scheme
1578
+ if isinstance(url, str):
1579
+ parsed_url = urlparse(url)
1580
+ else:
1581
+ parsed_url = url
1582
+
1583
+ if parsed_url.geturl() in self.content_cache:
1584
+ return self.content_cache[parsed_url.geturl()]
1585
+ scheme = parsed_url.scheme
1586
+
1587
+ # Handle HTTP and HTTPS URLs
1588
+ if scheme in ['http', 'https']:
1589
+ response = requests.get(url if isinstance(
1590
+ url, str) else parsed_url.geturl(), timeout=30)
1591
+ # Raises an HTTPError if the response status code is 4XX/5XX
1592
+ response.raise_for_status()
1593
+ self.content_cache[parsed_url.geturl()] = response.text
1594
+ return response.text
1595
+
1596
+ # Handle file URLs
1597
+ elif scheme == 'file':
1598
+ # Remove the leading 'file://' from the path for compatibility
1599
+ file_path = parsed_url.netloc
1600
+ if not file_path:
1601
+ file_path = parsed_url.path
1602
+ # On Windows, a file URL might start with a '/' but it's not part of the actual path
1603
+ if os.name == 'nt' and file_path.startswith('/'):
1604
+ file_path = file_path[1:]
1605
+ with open(file_path, 'r', encoding='utf-8') as file:
1606
+ text = file.read()
1607
+ self.content_cache[parsed_url.geturl()] = text
1608
+ return text
1609
+ else:
1610
+ raise NotImplementedError(f'Unsupported URL scheme: {scheme}')
1611
+
1612
+ def resolve_reference(self, json_type: dict, base_uri: str, json_doc: dict) -> Tuple[Union[dict, Any], dict]:
1613
+ """
1614
+ Resolve a JSON Pointer reference or a JSON $ref reference.
1615
+
1616
+ Args:
1617
+ json_type (dict): The JSON type containing the reference.
1618
+ base_uri (str): The base URI of the JSON document.
1619
+ json_doc (dict): The JSON document containing the reference.
1620
+
1621
+ Returns:
1622
+ Tuple[Union[dict, Any], dict]: A tuple containing the resolved JSON schema and the original JSON schema document.
1623
+
1624
+ Raises:
1625
+ Exception: If there is an error decoding JSON from the reference.
1626
+ Exception: If there is an error resolving the JSON Pointer reference.
1627
+ """
1628
+ try:
1629
+ ref = json_type['$ref']
1630
+ content = None
1631
+ url = urlparse(ref)
1632
+ if url.scheme:
1633
+ content = self.fetch_content(ref)
1634
+ elif url.path:
1635
+ file_uri = self.compose_uri(base_uri, url)
1636
+ content = self.fetch_content(file_uri)
1637
+ if content:
1638
+ try:
1639
+ json_schema_doc = json_schema = json.loads(content)
1640
+ # resolve the JSON Pointer reference, if any
1641
+ if url.fragment:
1642
+ json_schema = jsonpointer.resolve_pointer(
1643
+ json_schema, url.fragment)
1644
+ return json_schema, json_schema_doc
1645
+ except json.JSONDecodeError:
1646
+ raise Exception(f'Error decoding JSON from {ref}')
1647
+
1648
+ if url.fragment:
1649
+ json_pointer = unquote(url.fragment)
1650
+ ref_schema = jsonpointer.resolve_pointer(
1651
+ json_doc, json_pointer)
1652
+ if ref_schema:
1653
+ return ref_schema, json_doc
1654
+ except JsonPointerException as e:
1655
+ raise Exception(
1656
+ f'Error resolving JSON Pointer reference for {base_uri}')
1657
+ return json_type, json_doc
1658
+
1659
+ def compose_uri(self, base_uri, url):
1660
+ """Compose a URI from a base URI and a relative URL."""
1661
+ if isinstance(url, str):
1662
+ url = urlparse(url)
1663
+ if url.scheme:
1664
+ return url.geturl()
1665
+ if not url.path and not url.netloc:
1666
+ return base_uri
1667
+ if base_uri.startswith('file'):
1668
+ parsed_file_uri = urlparse(base_uri)
1669
+ dir = os.path.dirname(
1670
+ parsed_file_uri.netloc if parsed_file_uri.netloc else parsed_file_uri.path)
1671
+ filename = os.path.join(dir, url.path)
1672
+ file_uri = f'file://{filename}'
1673
+ else:
1674
+ # combine the base URI with the URL
1675
+ file_uri = urllib.parse.urljoin(base_uri, url.geturl())
1676
+ return file_uri
1677
+
1678
+ def convert_schema(self, json_schema_path: str, output_path: str | None = None):
1679
+ """
1680
+ Convert a JSON Schema file to JSON Structure format.
1681
+
1682
+ Args:
1683
+ json_schema_path (str): Path to the input JSON Schema file
1684
+ output_path (str): Path for the output JSON Structure file (optional)
1685
+ """
1686
+ # Read the JSON Schema
1687
+ with open(json_schema_path, 'r', encoding='utf-8') as file:
1688
+ json_schema = json.load(file)
1689
+
1690
+ # Convert to JSON Structure
1691
+ structure_schema = self.convert_json_schema_to_structure(json_schema, json_schema_path)
1692
+
1693
+ # Determine output path
1694
+ if not output_path:
1695
+ base_name = os.path.splitext(json_schema_path)[0]
1696
+ output_path = f"{base_name}.structure.json"
1697
+ # Write the result
1698
+ with open(output_path, 'w', encoding='utf-8') as file:
1699
+ # Sort properties before writing
1700
+ # Sort properties before writing
1701
+ sorted_schema = self._sort_json_structure_properties(structure_schema)
1702
+ json.dump(sorted_schema, file, indent=2)
1703
+
1704
+ print(f"Converted {json_schema_path} to {output_path}")
1705
+ return structure_schema
1706
+
1707
+
1708
+ def _mark_abstract_types(self, structure_schema: dict) -> None:
1709
+ """
1710
+ Mark abstract types in the structure schema.
1711
+
1712
+ Args:
1713
+ structure_schema (dict): The structure schema to mark
1714
+ """
1715
+ if 'definitions' in structure_schema:
1716
+ for def_name, def_schema in structure_schema['definitions'].items():
1717
+ if isinstance(def_schema, dict):
1718
+ # Mark types with only inheritance as abstract
1719
+ if ('$extends' in def_schema and
1720
+ ('properties' not in def_schema or len(def_schema['properties']) == 0)):
1721
+ def_schema['abstract'] = True
1722
+
1723
+ # Mark choice types with discriminators as abstract
1724
+ if (def_schema.get('type') == 'choice' and
1725
+ 'discriminator' in def_schema):
1726
+ def_schema['abstract'] = True
1727
+
1728
+ def jsons_to_structure(self, json_schema: Union[dict, list], namespace: str, base_uri: str) -> dict:
1729
+ """
1730
+ Convert a JSON Schema to JSON Structure format.
1731
+
1732
+ Args:
1733
+ json_schema (dict | list): The JSON Schema to convert
1734
+ namespace (str): The target namespace
1735
+ base_uri (str): Base URI for reference resolution
1736
+
1737
+ Returns:
1738
+ dict: The converted JSON Structure schema
1739
+ """
1740
+ # Clear type registry for new conversion
1741
+ self.type_registry.clear()
1742
+
1743
+ structure_schema: Dict[str, Any] = {
1744
+ "$schema": "https://json-structure.org/meta/extended/v0/#"
1745
+ }
1746
+ # Do NOT set $uses here; it will be set after scanning for actual usage
1747
+
1748
+ # Handle schema with definitions/defs
1749
+ if isinstance(json_schema, dict) and ('definitions' in json_schema or '$defs' in json_schema):
1750
+ # Process definitions
1751
+ defs_key = '$defs' if '$defs' in json_schema else 'definitions'
1752
+ json_schema_defs = json_schema[defs_key]
1753
+ if json_schema_defs:
1754
+ structure_schema['definitions'] = {}
1755
+ # First pass: populate type registry for reference resolution
1756
+ for def_name in json_schema_defs.keys():
1757
+ normalized_def_name = avro_name(def_name)
1758
+ self.type_registry[def_name] = f"#/definitions/{normalized_def_name}" # Second pass: convert each definition
1759
+ for def_name, def_schema in json_schema_defs.items():
1760
+ # Skip empty definitions or ones that are just plain values/strings
1761
+ if not isinstance(def_schema, dict) or not def_schema:
1762
+ continue
1763
+
1764
+ # Check if this is a pure container definition (only contains nested schemas, no actual schema keywords)
1765
+ schema_keywords = {'type', 'properties', 'items', 'additionalProperties', 'patternProperties',
1766
+ 'oneOf', 'anyOf', 'allOf', '$ref', 'required', 'enum', 'const', 'minimum',
1767
+ 'maximum', 'minLength', 'maxLength', 'pattern', 'format', 'if', 'then', 'else'}
1768
+ has_schema_keywords = any(key in def_schema for key in schema_keywords) # If it only contains nested object definitions (no schema keywords), handle as container
1769
+ # These are typically namespace containers like "resourceTypes" that only organize other types
1770
+ if not has_schema_keywords:
1771
+ non_meta_items = {k: v for k, v in def_schema.items()
1772
+ if not k.startswith('$') and k not in ['title', 'description', 'examples']}
1773
+ if non_meta_items and all(isinstance(value, dict) for value in non_meta_items.values()):
1774
+ # This looks like a pure container - but check if any references point to it
1775
+ ref_target = f"#/definitions/{def_name}"
1776
+ ref_target_normalized = f"#/definitions/{avro_name(def_name)}"
1777
+
1778
+ # Search the entire schema for references to this definition
1779
+ schema_str = json.dumps(json_schema)
1780
+ if ref_target in schema_str or ref_target_normalized in schema_str:
1781
+ # This container is being referenced, so we need to keep it as a valid object
1782
+ # Create a minimal valid object type
1783
+ dependencies = []
1784
+ normalized_def_name, original_name = avro_name_with_altname(def_name)
1785
+ container_def = {
1786
+ 'type': 'object',
1787
+ 'name': normalized_def_name,
1788
+ 'additionalProperties': True # Allow any properties to make it valid
1789
+ }
1790
+ if original_name is not None:
1791
+ container_def['altnames'] = {'json': original_name}
1792
+ structure_schema['definitions'][normalized_def_name] = container_def
1793
+ continue
1794
+ else:
1795
+ # Skip pure container definitions that aren't referenced
1796
+ continue
1797
+ # Process all dictionary definitions - this includes schemas with only descriptions
1798
+ dependencies = []
1799
+ normalized_def_name, original_name = avro_name_with_altname(def_name)
1800
+ converted_def = self.json_type_to_structure_type(
1801
+ def_schema, def_name, '', namespace, dependencies,
1802
+ json_schema, base_uri, structure_schema, [], 1
1803
+ )
1804
+ if isinstance(converted_def, dict):
1805
+ converted_def['name'] = normalized_def_name
1806
+ # Add alternate name if the original was different
1807
+ if original_name is not None:
1808
+ if 'altnames' not in converted_def:
1809
+ converted_def['altnames'] = {}
1810
+ converted_def['altnames']['json'] = original_name
1811
+ structure_schema['definitions'][normalized_def_name] = converted_def
1812
+ else:
1813
+ definition_obj = {
1814
+ 'type': converted_def,
1815
+ 'name': normalized_def_name
1816
+ }
1817
+ # Add alternate name if the original was different
1818
+ if original_name is not None:
1819
+ definition_obj['altnames'] = {'json': original_name}
1820
+ structure_schema['definitions'][normalized_def_name] = definition_obj
1821
+
1822
+ # Handle root-level schema type
1823
+ root_type_keys = ['type', 'properties', 'items', 'additionalProperties', 'oneOf', 'anyOf', 'allOf']
1824
+ has_root_type = any(key in json_schema for key in root_type_keys)
1825
+ if has_root_type and isinstance(json_schema, dict):
1826
+ dependencies = []
1827
+ root_converted = self.json_type_to_structure_type(
1828
+ json_schema, self.root_class_name, '', namespace, dependencies,
1829
+ json_schema, base_uri, structure_schema, [], 1
1830
+ )
1831
+
1832
+ # Merge root type properties into schema
1833
+ if isinstance(root_converted, dict):
1834
+ for key, value in root_converted.items():
1835
+ if key not in structure_schema:
1836
+ structure_schema[key] = value
1837
+ else:
1838
+ structure_schema['type'] = root_converted
1839
+ # Handle schema metadata
1840
+ if isinstance(json_schema, dict):
1841
+ if '$id' in json_schema:
1842
+ structure_schema['$id'] = json_schema['$id']
1843
+ elif 'id' in json_schema:
1844
+ structure_schema['$id'] = json_schema['id']
1845
+ else:
1846
+ # Generate default $id if missing
1847
+ structure_schema['$id'] = f"https://example.com/{namespace.replace('.', '/')}.schema.json"
1848
+
1849
+ # Add description if present, or map title to description if no description exists
1850
+ if 'description' in json_schema:
1851
+ structure_schema['description'] = json_schema['description']
1852
+ elif 'title' in json_schema:
1853
+ structure_schema['description'] = json_schema['title']
1854
+ else:
1855
+ # Generate default $id for non-dict schemas
1856
+ structure_schema['$id'] = f"https://example.com/{namespace.replace('.', '/')}.schema.json"
1857
+
1858
+ # Mark abstract types
1859
+ self._mark_abstract_types(structure_schema)
1860
+
1861
+ return structure_schema
1862
+
1863
+ def convert_json_schema_to_structure(self, json_schema: Union[dict, list], base_uri: str = "") -> dict:
1864
+ """
1865
+ Convert a JSON Schema dictionary to JSON Structure format.
1866
+
1867
+ Args:
1868
+ json_schema (dict): The JSON Schema to convert
1869
+ base_uri (str): Base URI for reference resolution
1870
+
1871
+ Returns:
1872
+ dict: The converted JSON Structure schema
1873
+
1874
+ Raises:
1875
+ ValueError: If the input schema is invalid
1876
+ TypeError: If the schema format is not supported
1877
+ """
1878
+ if not isinstance(json_schema, (dict, list)):
1879
+ raise TypeError(f"Expected dict or list, got {type(json_schema)}")
1880
+
1881
+ if isinstance(json_schema, dict) and not json_schema:
1882
+ raise ValueError("Empty schema dictionary provided")
1883
+
1884
+ try:
1885
+ structure_schema = self.jsons_to_structure(json_schema, self.root_namespace, base_uri)
1886
+ # Always add a name to the root if it has a type and no name
1887
+ if 'type' in structure_schema and 'name' not in structure_schema:
1888
+ structure_schema['name'] = avro_name(self.root_class_name) # Only add $uses if the feature is actually used
1889
+ used = self._scan_for_uses(structure_schema)
1890
+ if used:
1891
+ structure_schema['$uses'] = used
1892
+ elif '$uses' in structure_schema:
1893
+ del structure_schema['$uses'] # Final validation to ensure map and set types are complete
1894
+ # validation_errors = self.validate_structure_completeness(structure_schema)
1895
+ # if validation_errors:
1896
+ # print(f"WARNING: Structure validation found incomplete types:")
1897
+ # for error in validation_errors:
1898
+ # print(f" - {error}")
1899
+
1900
+ # Validate and fix JSON Structure compliance
1901
+ structure_schema = self._validate_and_fix_json_structure_type(structure_schema)
1902
+
1903
+ return structure_schema
1904
+ except Exception as e:
1905
+ raise ValueError(f"Failed to convert JSON Schema to JSON Structure: {e}") from e
1906
+
1907
+ def is_valid_identifier(self, name: str) -> bool:
1908
+ """
1909
+ Check if a name is a valid identifier (for property names, etc.).
1910
+ """
1911
+ if not name or not isinstance(name, str):
1912
+ return False # Check if it's a valid Python identifier (basic check)
1913
+ import re
1914
+ return bool(re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', name))
1915
+
1916
+ def normalize_identifier(self, name: str) -> str:
1917
+ """
1918
+ Normalize a name to be a valid identifier.
1919
+ """
1920
+ if not name or not isinstance(name, str):
1921
+ return 'property'
1922
+
1923
+ # Replace invalid characters with underscores
1924
+ import re
1925
+ normalized = re.sub(r'[^a-zA-Z0-9_]', '_', name)
1926
+ # Ensure it doesn't start with a number
1927
+ if normalized and normalized[0].isdigit():
1928
+ normalized = 'prop_' + normalized
1929
+
1930
+ # Ensure it's not empty
1931
+ if not normalized:
1932
+ normalized = 'property'
1933
+
1934
+ return normalized
1935
+
1936
+ def _validate_and_fix_json_structure_type(self, structure_type: Any) -> Any:
1937
+ """
1938
+ Validate and fix a JSON Structure type to ensure compliance.
1939
+
1940
+ This method post-processes generated JSON Structure schemas to fix common issues:
1941
+ - Converts "integer" type to "number" (JSON Structure doesn't support integer)
1942
+ - Ensures arrays have "items"
1943
+ - Ensures objects have "properties"
1944
+ - Ensures map/set "values"/"items" are schema objects, not strings
1945
+ - Recursively fixes nested schemas
1946
+ """
1947
+ if not isinstance(structure_type, dict):
1948
+ return structure_type
1949
+
1950
+ # Create a copy to avoid modifying the original
1951
+ structure_type = structure_type.copy()
1952
+
1953
+ # Fix invalid types
1954
+ if structure_type.get('type') == 'integer':
1955
+ structure_type['type'] = 'number' # JSON Structure doesn't have integer type
1956
+
1957
+ # Ensure arrays have items
1958
+ elif structure_type.get('type') == 'array' and 'items' not in structure_type:
1959
+ structure_type['items'] = {'type': 'object'} # Default to object items
1960
+
1961
+ # Ensure objects have properties (unless they extend another type)
1962
+ elif (structure_type.get('type') == 'object' and
1963
+ 'properties' not in structure_type and
1964
+ '$ref' not in structure_type and
1965
+ '$extends' not in structure_type):
1966
+ structure_type['properties'] = {} # Default to empty properties
1967
+
1968
+ # Ensure map values are schema objects
1969
+ elif structure_type.get('type') == 'map' and 'values' in structure_type:
1970
+ values = structure_type['values']
1971
+ if isinstance(values, str):
1972
+ structure_type['values'] = {'type': values}
1973
+ elif isinstance(values, dict):
1974
+ structure_type['values'] = self._validate_and_fix_json_structure_type(values)
1975
+
1976
+ # Ensure set items are schema objects
1977
+ elif structure_type.get('type') == 'set' and 'items' in structure_type:
1978
+ items = structure_type['items']
1979
+ if isinstance(items, str):
1980
+ structure_type['items'] = {'type': items}
1981
+ elif isinstance(items, dict):
1982
+ structure_type['items'] = self._validate_and_fix_json_structure_type(items)
1983
+
1984
+ # Recursively validate nested structures - comprehensive approach
1985
+ if 'anyOf' in structure_type:
1986
+ structure_type['anyOf'] = [
1987
+ self._validate_and_fix_json_structure_type(item)
1988
+ for item in structure_type['anyOf']
1989
+ ]
1990
+ elif 'oneOf' in structure_type:
1991
+ structure_type['oneOf'] = [
1992
+ self._validate_and_fix_json_structure_type(item)
1993
+ for item in structure_type['oneOf']
1994
+ ]
1995
+ elif 'allOf' in structure_type:
1996
+ structure_type['allOf'] = [
1997
+ self._validate_and_fix_json_structure_type(item)
1998
+ for item in structure_type['allOf']
1999
+ ]
2000
+
2001
+ # Handle nested schemas in various contexts
2002
+ if 'items' in structure_type and isinstance(structure_type['items'], dict):
2003
+ structure_type['items'] = self._validate_and_fix_json_structure_type(structure_type['items'])
2004
+ elif 'values' in structure_type and isinstance(structure_type['values'], dict):
2005
+ structure_type['values'] = self._validate_and_fix_json_structure_type(structure_type['values'])
2006
+ elif 'properties' in structure_type and isinstance(structure_type['properties'], dict):
2007
+ structure_type['properties'] = {
2008
+ k: self._validate_and_fix_json_structure_type(v)
2009
+ for k, v in structure_type['properties'].items()
2010
+ }
2011
+ # Handle additionalProperties
2012
+ if 'additionalProperties' in structure_type and isinstance(structure_type['additionalProperties'], dict):
2013
+ structure_type['additionalProperties'] = self._validate_and_fix_json_structure_type(structure_type['additionalProperties'])
2014
+
2015
+ # Handle definitions
2016
+ if 'definitions' in structure_type and isinstance(structure_type['definitions'], dict):
2017
+ structure_type['definitions'] = {
2018
+ k: self._validate_and_fix_json_structure_type(v)
2019
+ for k, v in structure_type['definitions'].items()
2020
+ }
2021
+
2022
+ # Handle $defs (JSON Schema 2019-09+)
2023
+ if '$defs' in structure_type and isinstance(structure_type['$defs'], dict):
2024
+ structure_type['$defs'] = {
2025
+ k: self._validate_and_fix_json_structure_type(v)
2026
+ for k, v in structure_type['$defs'].items()
2027
+ }
2028
+
2029
+ return structure_type
2030
+
2031
+ def _sort_json_structure_properties(self, schema: Any) -> Any:
2032
+ """
2033
+ Recursively sort properties in a JSON Structure schema for consistent output.
2034
+ """
2035
+ if not isinstance(schema, dict):
2036
+ return schema
2037
+
2038
+ result = {}
2039
+
2040
+ # Sort keys, putting common keys first
2041
+
2042
+ key_order = ['$schema', 'type', 'title', 'description', 'properties', 'required', 'items', 'values', 'anyOf', 'oneOf', 'allOf', '$ref', '$extends']
2043
+ sorted_keys = []
2044
+
2045
+ # Add keys in preferred order
2046
+ for key in key_order:
2047
+ if key in schema:
2048
+ sorted_keys.append(key)
2049
+
2050
+ # Add remaining keys alphabetically
2051
+ remaining_keys = sorted([k for k in schema.keys() if k not in key_order])
2052
+ sorted_keys.extend(remaining_keys)
2053
+
2054
+ # Build result with sorted keys
2055
+ for key in sorted_keys:
2056
+ value = schema[key]
2057
+ if key == 'properties' and isinstance(value, dict):
2058
+ # Sort properties alphabetically
2059
+ result[key] = {k: self._sort_json_structure_properties(v) for k, v in sorted(value.items())}
2060
+ elif key in ['anyOf', 'oneOf', 'allOf'] and isinstance(value, list):
2061
+ # Recursively sort composition schemas
2062
+ result[key] = [self._sort_json_structure_properties(item) for item in value]
2063
+ elif key in ['items', 'values'] and isinstance(value, dict):
2064
+ # Recursively sort nested schemas
2065
+ result[key] = self._sort_json_structure_properties(value)
2066
+ else:
2067
+ result[key] = value
2068
+
2069
+ return result
2070
+
2071
+ def _convert_constraint_composition_to_required(self, json_type: dict) -> dict:
2072
+ """
2073
+ Convert constraint-only composition (anyOf with property requirements) to a simple required array.
2074
+ This is used when anyOf items only add constraints without changing the structure.
2075
+ """
2076
+ try:
2077
+ # Check if this is constraint-only anyOf
2078
+ if 'anyOf' not in json_type:
2079
+ return json_type
2080
+
2081
+ # Gather all required properties from anyOf items
2082
+ all_required = set()
2083
+ base_properties = json_type.get('properties', {})
2084
+
2085
+ for anyof_item in json_type['anyOf']:
2086
+ if isinstance(anyof_item, dict) and 'required' in anyof_item:
2087
+ # Only consider it constraint-only if it doesn't define new properties
2088
+ item_properties = anyof_item.get('properties', {})
2089
+ if not item_properties or all(prop in base_properties for prop in item_properties):
2090
+ all_required.update(anyof_item['required'])
2091
+
2092
+ # Create simplified schema
2093
+ result = {k: v for k, v in json_type.items() if k != 'anyOf'}
2094
+ if all_required:
2095
+ result['required'] = sorted(list(all_required))
2096
+
2097
+ return result
2098
+
2099
+ except Exception as e:
2100
+ # If conversion fails, return original
2101
+ return json_type
2102
+
2103
+ def _is_constraint_only_schema(self, json_type: dict) -> bool:
2104
+ """
2105
+ Check if a schema contains only constraints (no structural elements).
2106
+ Used to determine if anyOf items are constraint-only.
2107
+ """
2108
+ if not isinstance(json_type, dict):
2109
+ return False
2110
+
2111
+ # Constraint-only keys
2112
+ constraint_keys = {'required', 'minProperties', 'maxProperties', 'dependencies', 'dependentRequired', 'dependentSchemas'}
2113
+
2114
+ # Structural keys that would make it not constraint-only
2115
+ structural_keys = {'type', 'properties', 'additionalProperties', 'patternProperties', 'items', 'anyOf', 'oneOf', 'allOf'}
2116
+
2117
+ schema_keys = set(json_type.keys()) - {'title', 'description', '$id', '$schema'}
2118
+
2119
+ # It's constraint-only if it has only constraint keys and no structural keys
2120
+ return bool(schema_keys & constraint_keys) and not bool(schema_keys & structural_keys)
2121
+
2122
+ def _convert_constraint_anyof_to_permutations(self, json_type: dict) -> dict:
2123
+ """
2124
+ Convert constraint-only anyOf to a schema with required properties that represent
2125
+ the union of all constraint requirements.
2126
+ """
2127
+ try:
2128
+ if 'anyOf' not in json_type:
2129
+ return json_type
2130
+
2131
+ # Collect all required properties from constraint-only anyOf items
2132
+ all_required = set()
2133
+
2134
+ for anyof_item in json_type['anyOf']:
2135
+ if isinstance(anyof_item, dict) and self._is_constraint_only_schema(anyof_item):
2136
+ if 'required' in anyof_item:
2137
+ all_required.update(anyof_item['required'])
2138
+ # Return schema with union of required properties
2139
+ result = {'required': sorted(list(all_required))} if all_required else {}
2140
+ return result
2141
+
2142
+ except Exception as e:
2143
+ # If conversion fails, return empty schema
2144
+ return {}
2145
+
2146
+ def has_composition_keywords(self, json_type: dict) -> bool:
2147
+ """
2148
+ Check if a JSON schema has composition keywords (anyOf, oneOf, allOf) or conditional keywords (if/then/else).
2149
+ """
2150
+ if not isinstance(json_type, dict):
2151
+ return False
2152
+ return any(keyword in json_type for keyword in ['anyOf', 'oneOf', 'allOf', 'if', 'then', 'else'])
2153
+
2154
+ def resolve_composition_keywords(self, json_type: dict, record_name: str, field_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int) -> dict:
2155
+ """
2156
+ Resolve composition keywords in JSON schema by flattening them.
2157
+ This is a simple implementation that merges composition schemas.
2158
+ """
2159
+ try:
2160
+ if 'allOf' in json_type:
2161
+ # Merge all schemas in allOf
2162
+ merged = {}
2163
+ for schema in json_type['allOf']:
2164
+ converted = self.json_type_to_structure_type(
2165
+ schema, record_name, field_name, namespace, dependencies,
2166
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
2167
+ )
2168
+ if isinstance(converted, dict):
2169
+ # Simple merge - in real scenarios this would be more complex
2170
+ for key, value in converted.items():
2171
+ if key == 'properties' and key in merged:
2172
+ merged[key].update(value)
2173
+ elif key == 'required' and key in merged:
2174
+ merged[key] = list(set(merged[key] + value))
2175
+ else:
2176
+ merged[key] = value
2177
+ return merged
2178
+
2179
+ elif 'anyOf' in json_type:
2180
+ # For anyOf, convert to JSON Structure type union
2181
+ anyof_schemas = []
2182
+ for anyof_item in json_type['anyOf']:
2183
+ converted = self.json_type_to_structure_type(
2184
+ anyof_item, record_name, field_name, namespace, dependencies,
2185
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
2186
+ )
2187
+ anyof_schemas.append(converted)
2188
+
2189
+ return {
2190
+ 'type': anyof_schemas
2191
+ }
2192
+
2193
+ elif 'oneOf' in json_type:
2194
+ # First check if this is a discriminated union pattern
2195
+ choice_info = self.detect_discriminated_union_pattern(json_type)
2196
+ if choice_info:
2197
+ # Convert to JSON Structure choice type
2198
+ choice_result = {
2199
+ 'type': 'choice',
2200
+ 'choices': choice_info['choices']
2201
+ }
2202
+
2203
+ # Add selector if specified (for tagged unions)
2204
+ if choice_info.get('selector'):
2205
+ choice_result['selector'] = choice_info['selector']
2206
+
2207
+ # Add name if we have one
2208
+ if record_name:
2209
+ choice_result['name'] = avro_name(record_name)
2210
+
2211
+ return choice_result
2212
+
2213
+ # For oneOf without discriminated union, return the first option as a fallback
2214
+ if json_type['oneOf']:
2215
+ return self.json_type_to_structure_type(
2216
+ json_type['oneOf'][0], record_name, field_name, namespace, dependencies,
2217
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth + 1
2218
+ )# Fallback to map type with any values
2219
+ return {'type': 'map', 'values': {'type': 'any'}}
2220
+
2221
+ except Exception as e: # If resolution fails, return a basic map type
2222
+ return {'type': 'map', 'values': {'type': 'any'}}
2223
+
2224
+ def detect_discriminated_union_pattern(self, json_type: dict) -> dict | None:
2225
+ """
2226
+ Detect discriminated union patterns in oneOf schemas that should be converted to choice type.
2227
+
2228
+ A discriminated union pattern is identified when:
2229
+ 1. Schema has oneOf with multiple object schemas
2230
+ 2. Each schema has a distinct set of required properties (mutually exclusive)
2231
+ 3. The schemas define object structures (have properties)
2232
+
2233
+ Patterns supported:
2234
+ - Simple discriminated unions: Each schema has exactly one unique required property
2235
+ - Complex discriminated unions: Each schema has a unique combination of required properties
2236
+ - Tagged unions: Each schema has a discriminator property with different enum/const values
2237
+
2238
+ Args:
2239
+ json_type (dict): The JSON schema object with oneOf
2240
+
2241
+ Returns:
2242
+ dict | None: Choice type configuration or None if not a discriminated union
2243
+ """
2244
+ if 'oneOf' not in json_type:
2245
+ return None
2246
+
2247
+ oneof_items = json_type['oneOf']
2248
+ if len(oneof_items) < 2:
2249
+ return None
2250
+
2251
+ # Check if all items are object schemas with properties
2252
+ all_schemas_are_objects = True
2253
+ for item in oneof_items:
2254
+ if not isinstance(item, dict):
2255
+ return None
2256
+ if not (item.get('type') == 'object' or 'properties' in item):
2257
+ all_schemas_are_objects = False
2258
+ break
2259
+
2260
+ if not all_schemas_are_objects:
2261
+ return None
2262
+
2263
+ # Pattern 1: Check for tagged unions with discriminator property
2264
+ discriminator_result = self._detect_tagged_union_pattern(oneof_items)
2265
+ if discriminator_result:
2266
+ return discriminator_result
2267
+
2268
+ # Pattern 2: Check for simple discriminated unions (each schema has exactly one unique required property)
2269
+ simple_result = self._detect_simple_discriminated_union(oneof_items)
2270
+ if simple_result:
2271
+ return simple_result
2272
+
2273
+ # Pattern 3: Check for complex discriminated unions (unique combinations of required properties)
2274
+ complex_result = self._detect_complex_discriminated_union(oneof_items)
2275
+ if complex_result:
2276
+ return complex_result
2277
+
2278
+ return None
2279
+
2280
+ def _detect_tagged_union_pattern(self, oneof_items: list) -> dict | None:
2281
+ """
2282
+ Detect tagged union pattern where all schemas have the same discriminator property
2283
+ with different enum/const values.
2284
+ """
2285
+ discriminator_props = {}
2286
+ common_discriminator = None
2287
+
2288
+ for item in oneof_items:
2289
+ properties = item.get('properties', {})
2290
+
2291
+ # Look for a property with enum or const value
2292
+ discriminator_found = False
2293
+ for prop_name, prop_schema in properties.items():
2294
+ if 'enum' in prop_schema and len(prop_schema['enum']) == 1:
2295
+ # Single enum value acts as discriminator
2296
+ disc_value = prop_schema['enum'][0]
2297
+ if common_discriminator is None:
2298
+ common_discriminator = prop_name
2299
+ elif common_discriminator != prop_name:
2300
+ return None # Different discriminator properties
2301
+ discriminator_props[str(disc_value)] = {'type': 'object'}
2302
+ discriminator_found = True
2303
+ break
2304
+ elif 'const' in prop_schema:
2305
+ # Const value acts as discriminator
2306
+ disc_value = prop_schema['const']
2307
+ if common_discriminator is None:
2308
+ common_discriminator = prop_name
2309
+ elif common_discriminator != prop_name:
2310
+ return None # Different discriminator properties
2311
+ discriminator_props[str(disc_value)] = {'type': 'object'}
2312
+ discriminator_found = True
2313
+ break
2314
+
2315
+ if not discriminator_found:
2316
+ return None
2317
+
2318
+ if common_discriminator and len(discriminator_props) == len(oneof_items):
2319
+ return {
2320
+ 'type': 'choice',
2321
+ 'choices': discriminator_props,
2322
+ 'selector': common_discriminator # Tagged union with explicit selector
2323
+ }
2324
+
2325
+ return None
2326
+
2327
+ def _detect_simple_discriminated_union(self, oneof_items: list) -> dict | None:
2328
+ """
2329
+ Detect simple discriminated union where each schema has exactly one unique required property.
2330
+ """
2331
+ choice_mapping = {}
2332
+
2333
+ for item in oneof_items:
2334
+ if 'properties' not in item or 'required' not in item:
2335
+ return None
2336
+
2337
+ required = item['required']
2338
+ if not isinstance(required, list) or len(required) != 1:
2339
+ return None # Must have exactly one required property
2340
+
2341
+ required_prop = required[0]
2342
+
2343
+ # Check if this property name is already used by another choice
2344
+ if required_prop in choice_mapping:
2345
+ return None # Properties must be mutually exclusive
2346
+
2347
+ # Ensure the required property exists in the properties
2348
+ if required_prop not in item['properties']:
2349
+ return None # Required property must exist in properties
2350
+
2351
+ # Store the choice information - use 'any' type for discriminated unions
2352
+ choice_mapping[required_prop] = {
2353
+ 'type': 'any',
2354
+ 'description': f'Choice variant with {required_prop} property'
2355
+ }
2356
+
2357
+ if len(choice_mapping) == len(oneof_items):
2358
+ return {
2359
+ 'type': 'choice',
2360
+ 'choices': choice_mapping,
2361
+ 'selector': None # Inline choice without explicit selector property
2362
+ }
2363
+
2364
+ return None
2365
+
2366
+ def _detect_complex_discriminated_union(self, oneof_items: list) -> dict | None:
2367
+ """
2368
+ Detect complex discriminated union where each schema has a unique combination of required properties.
2369
+ """
2370
+ required_sets = []
2371
+ choice_mapping = {}
2372
+
2373
+ for i, item in enumerate(oneof_items):
2374
+ if 'properties' not in item:
2375
+ return None
2376
+
2377
+ required = set(item.get('required', []))
2378
+
2379
+ # Check if this combination of required properties is unique
2380
+ for existing_set in required_sets:
2381
+ if required == existing_set:
2382
+ return None # Non-unique required property combination
2383
+ # Check for overlap - if sets overlap significantly, it's not a clean discriminated union
2384
+ overlap = required & existing_set
2385
+ if len(overlap) > 0 and (len(overlap) / len(required | existing_set)) > 0.5:
2386
+ return None # Too much overlap
2387
+
2388
+ required_sets.append(required)
2389
+
2390
+ # Create a choice name based on the required properties
2391
+ if len(required) == 0:
2392
+ choice_name = f'variant_{i}'
2393
+ elif len(required) == 1:
2394
+ choice_name = list(required)[0]
2395
+ else:
2396
+ # Sort for consistent naming
2397
+ sorted_props = sorted(required)
2398
+ choice_name = '_'.join(sorted_props[:2]) # Use first two properties for name
2399
+ if len(sorted_props) > 2:
2400
+ choice_name += '_etc'
2401
+
2402
+ # Use 'any' type for discriminated unions
2403
+ choice_mapping[choice_name] = {
2404
+ 'type': 'any',
2405
+ 'description': f'Choice variant requiring: {", ".join(sorted(required))}'
2406
+ }
2407
+
2408
+ if len(choice_mapping) == len(oneof_items) and len(choice_mapping) >= 2:
2409
+ return {
2410
+ 'type': 'choice',
2411
+ 'choices': choice_mapping,
2412
+ 'selector': None # Inline choice without explicit selector property
2413
+ }
2414
+
2415
+ return None
2416
+
2417
+ def _ensure_abstract_base_type(self, base_ref: str, abstract_base_name: str, structure_schema: dict, json_schema: dict, base_uri: str) -> None:
2418
+ """
2419
+ Ensure that an abstract base type exists for inheritance patterns.
2420
+
2421
+ Args:
2422
+ base_ref (str): The original $ref to the base type
2423
+ abstract_base_name (str): The name for the abstract base type
2424
+ structure_schema (dict): The structure schema being built
2425
+ json_schema (dict): The original JSON schema
2426
+ base_uri (str): The base URI for resolving references
2427
+ """
2428
+ # Ensure definitions section exists
2429
+ if 'definitions' not in structure_schema:
2430
+ structure_schema['definitions'] = {}
2431
+
2432
+ # If abstract base type already exists, don't recreate it
2433
+ if abstract_base_name in structure_schema['definitions']:
2434
+ return
2435
+
2436
+ # Guard against recursive abstract base type creation
2437
+ if not hasattr(self, '_creating_abstract_bases'):
2438
+ self._creating_abstract_bases = set()
2439
+
2440
+ if abstract_base_name in self._creating_abstract_bases:
2441
+ print(f"WARNING: Circular reference detected while creating abstract base type {abstract_base_name}")
2442
+ # Create a minimal abstract type to break the cycle
2443
+ structure_schema['definitions'][abstract_base_name] = {
2444
+ 'type': 'object',
2445
+ 'abstract': True,
2446
+ 'name': abstract_base_name,
2447
+ 'properties': {}
2448
+ }
2449
+ return
2450
+
2451
+ # Add to the guard set
2452
+ self._creating_abstract_bases.add(abstract_base_name)
2453
+
2454
+ try:
2455
+ # Resolve the original base type reference
2456
+ base_schema, _ = self.resolve_reference({'$ref': base_ref}, base_uri, json_schema)
2457
+
2458
+ # Convert the base type to structure format, but without triggering inheritance conversion
2459
+ # to avoid infinite recursion
2460
+ old_detect_inheritance = self.detect_inheritance
2461
+ old_preserve_composition = self.preserve_composition
2462
+ self.detect_inheritance = False # Temporarily disable inheritance detection
2463
+ self.preserve_composition = False # Force composition flattening for base types
2464
+
2465
+ try:
2466
+ base_structure = self.json_type_to_structure_type(
2467
+ base_schema, abstract_base_name, '', '', [], json_schema, base_uri,
2468
+ structure_schema, [], 1
2469
+ )
2470
+
2471
+ # Mark it as abstract
2472
+ if isinstance(base_structure, dict):
2473
+ base_structure['abstract'] = True
2474
+ base_structure['name'] = abstract_base_name
2475
+
2476
+ # Store the abstract base type
2477
+ structure_schema['definitions'][abstract_base_name] = base_structure
2478
+ elif isinstance(base_structure, str):
2479
+ # If the base resolves to a simple type, create an object wrapper
2480
+ structure_schema['definitions'][abstract_base_name] = {
2481
+ 'type': 'object',
2482
+ 'abstract': True,
2483
+ 'name': abstract_base_name,
2484
+ 'properties': {}
2485
+ }
2486
+
2487
+ finally:
2488
+ # Restore inheritance detection and composition settings
2489
+ self.detect_inheritance = old_detect_inheritance
2490
+ self.preserve_composition = old_preserve_composition
2491
+
2492
+ except Exception as e:
2493
+ # If we can't resolve the base type, create a minimal abstract type
2494
+ print(f"WARNING: Failed to create abstract base type {abstract_base_name}: {e}")
2495
+ structure_schema['definitions'][abstract_base_name] = {
2496
+ 'type': 'object',
2497
+ 'abstract': True,
2498
+ 'name': abstract_base_name,
2499
+ 'properties': {}
2500
+ }
2501
+ finally:
2502
+ # Remove from the guard set
2503
+ self._creating_abstract_bases.discard(abstract_base_name)
2504
+
2505
+ def create_pattern_union_maps(self, pattern_properties: dict, additional_props, record_name: str, namespace: str, dependencies: list, json_schema: dict, base_uri: str, structure_schema: dict, record_stack: list, recursion_depth: int = 1) -> dict:
2506
+ """
2507
+ Create a type union of maps for multiple patternProperties with optional additionalProperties.
2508
+ Each map in the union has a single pattern constraint.
2509
+ If additionalProperties is not False, creates an additional map for the fallback.
2510
+ Uses JSON Structure type union syntax: {"type": [map1, map2, ...]}
2511
+ All compound types are hoisted to /definitions and referenced via $ref.
2512
+
2513
+ Args:
2514
+ pattern_properties (dict): The patternProperties object with patterns as keys
2515
+ additional_props: The additionalProperties value (False, True, or schema dict)
2516
+ record_name (str): Name of the record
2517
+ namespace (str): Namespace
2518
+ dependencies (list): Dependencies list
2519
+ json_schema (dict): The full JSON schema
2520
+ base_uri (str): Base URI
2521
+ structure_schema (dict): Structure schema list
2522
+ record_stack (list): Record stack for recursion detection
2523
+ recursion_depth (int): Current recursion depth
2524
+
2525
+ Returns:
2526
+ dict: JSON Structure type union of maps using {"type": [...]} syntax
2527
+ """
2528
+ # Initialize definitions if it doesn't exist
2529
+ if 'definitions' not in structure_schema:
2530
+ structure_schema['definitions'] = {}
2531
+
2532
+ # Create a map for each pattern and hoist to definitions
2533
+ map_refs = []
2534
+ for idx, (pattern, values_schema) in enumerate(pattern_properties.items()):
2535
+ # Create map with pattern validation
2536
+ map_result = self.create_structure_map(
2537
+ values_schema, record_name, namespace, dependencies,
2538
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
2539
+ )
2540
+
2541
+ # Add keyNames validation for this specific pattern
2542
+ map_result['keyNames'] = {
2543
+ "type": "string",
2544
+ "pattern": pattern
2545
+ }
2546
+
2547
+ # Hoist this map to definitions and get a $ref
2548
+ pattern_safe = re.sub(r'[^a-zA-Z0-9_]', '_', pattern)
2549
+ map_name_hint = f"{record_name}_PatternMap_{pattern_safe}_{idx}"
2550
+ map_ref = self._hoist_definition(map_result, structure_schema, map_name_hint)
2551
+ map_refs.append(map_ref)
2552
+
2553
+ # If additionalProperties is not False, create an additional map for the fallback
2554
+ if additional_props is not False and additional_props is not None:
2555
+ if isinstance(additional_props, dict):
2556
+ # Create map for additionalProperties schema
2557
+ additional_map_result = self.create_structure_map(
2558
+ additional_props, record_name, namespace, dependencies,
2559
+ json_schema, base_uri, structure_schema, record_stack, recursion_depth
2560
+ )
2561
+
2562
+ # Add keyNames validation for catch-all pattern (any string)
2563
+ additional_map_result['keyNames'] = {
2564
+ "type": "string"
2565
+ }
2566
+
2567
+ # Hoist this map to definitions and get a $ref
2568
+ additional_map_name_hint = f"{record_name}_AdditionalMap"
2569
+ additional_map_ref = self._hoist_definition(additional_map_result, structure_schema, additional_map_name_hint)
2570
+ map_refs.append(additional_map_ref)
2571
+ elif additional_props is True:
2572
+ # additionalProperties: true means any type - create a map with any values
2573
+ any_map_result = {
2574
+ "type": "map",
2575
+ "keyNames": {
2576
+ "type": "string"
2577
+ },
2578
+ "values": "any"
2579
+ }
2580
+
2581
+ # Hoist this map to definitions and get a $ref
2582
+ any_map_name_hint = f"{record_name}_AnyMap"
2583
+ any_map_ref = self._hoist_definition(any_map_result, structure_schema, any_map_name_hint)
2584
+ map_refs.append(any_map_ref)
2585
+
2586
+ # Ensure $uses includes JSONStructureValidation
2587
+ self._ensure_validation_extension_in_structure_schema(structure_schema)
2588
+
2589
+ # Return type union using JSON Structure type array syntax with hoisted references
2590
+ return {
2591
+ "type": map_refs
2592
+ }
2593
+
2594
+ def convert_json_schema_to_structure(input_data: str, root_namespace: str = 'example.com', base_uri: str = '') -> str:
2595
+ """
2596
+ Converts a JSON Schema document to JSON Structure format.
2597
+
2598
+ Args:
2599
+ input_data (str): The JSON Schema document as a string.
2600
+ root_namespace (str): The namespace for the root schema. Defaults to 'example.com'.
2601
+ base_uri (str): The base URI for resolving references. Defaults to ''.
2602
+
2603
+ Returns:
2604
+ str: The converted JSON Structure document as a string.
2605
+ """
2606
+ converter = JsonToStructureConverter()
2607
+ converter.root_namespace = root_namespace
2608
+
2609
+ json_schema = json.loads(input_data)
2610
+
2611
+ # Convert the JSON Schema to JSON Structure
2612
+ result = converter.jsons_to_structure(json_schema, root_namespace, base_uri)
2613
+
2614
+ return json.dumps(result, indent=2)
2615
+
2616
+ def convert_json_schema_to_structure_files(
2617
+ json_schema_file_path: str,
2618
+ structure_schema_path: str,
2619
+ root_namespace = None
2620
+ ) -> None:
2621
+ """
2622
+ Convert a JSON Schema file to JSON Structure format.
2623
+
2624
+ Args:
2625
+ json_schema_file_path (str): Path to the input JSON Schema file
2626
+ structure_schema_path (str): Path to the output JSON Structure file
2627
+ root_namespace (str): The namespace for the root schema
2628
+ """
2629
+ # Use default namespace if None provided
2630
+ if root_namespace is None:
2631
+ root_namespace = 'example.com'
2632
+
2633
+ # Read the JSON Schema file
2634
+ with open(json_schema_file_path, 'r', encoding='utf-8') as f:
2635
+ schema_content = f.read()
2636
+
2637
+ # Convert to JSON Structure
2638
+ result = convert_json_schema_to_structure(schema_content, root_namespace)
2639
+
2640
+ # Write the result
2641
+ with open(structure_schema_path, 'w', encoding='utf-8') as f:
2642
+ f.write(result)