structurize 2.18.2__py3-none-any.whl → 2.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,597 @@
1
+ """
2
+ JSON Structure to CDDL (Concise Data Definition Language) converter.
3
+
4
+ CDDL is defined in RFC 8610 and is a schema language primarily used for
5
+ expressing CBOR and JSON data structures.
6
+
7
+ RFC 8610 Compliance Summary:
8
+ ============================
9
+
10
+ Generated CDDL Features:
11
+ - Primitive types: uint, int, float16/32/64, tstr, bstr, bool, nil, any
12
+ - Maps/objects with member keys and values
13
+ - Arrays with homogeneous and heterogeneous (tuple) items
14
+ - Choice/union types (type1 / type2)
15
+ - Occurrence indicators: ? (optional), * (zero-or-more), + (one-or-more)
16
+ - Literal values (strings, integers, floats, booleans)
17
+ - Range constraints (min..max)
18
+ - Type references
19
+ - Groups and group composition
20
+ - Comments from descriptions
21
+ - Unwrap operator (~) for $extends references
22
+
23
+ Control Operators Generated:
24
+ - .size: From minLength/maxLength for strings, minItems/maxItems for arrays
25
+ - .regexp: From pattern validation
26
+ - .default: From default value
27
+ - .lt, .le, .gt, .ge: From minimum/maximum/exclusiveMinimum/exclusiveMaximum
28
+
29
+ Notes on Type Mapping:
30
+ - JSON Structure binary maps to bstr
31
+ - JSON Structure string maps to tstr
32
+ - Integer types map to int or uint based on constraints
33
+ - Float types map to float16/32/64 based on precision
34
+ """
35
+
36
+ import json
37
+ import logging
38
+ import os
39
+ from typing import Any, Dict, List, Optional, Set, Tuple, Union
40
+
41
+ from avrotize.common import avro_name
42
+
43
+
44
+ # Configure module logger
45
+ logger = logging.getLogger(__name__)
46
+
47
+ # Default indentation for CDDL output
48
+ INDENT = ' '
49
+
50
+ # Maximum recursion depth for type conversion (prevents stack overflow)
51
+ MAX_CONVERSION_DEPTH = 100
52
+
53
+
54
+ class StructureToCddlError(Exception):
55
+ """
56
+ Exception raised when JSON Structure to CDDL conversion fails.
57
+
58
+ Attributes:
59
+ message: Human-readable error description
60
+ context: Optional context about where the error occurred
61
+ cause: Optional underlying exception that caused this error
62
+ """
63
+
64
+ def __init__(self, message: str, context: Optional[str] = None,
65
+ cause: Optional[Exception] = None) -> None:
66
+ self.message = message
67
+ self.context = context
68
+ self.cause = cause
69
+ full_message = message
70
+ if context:
71
+ full_message = f"{message} (context: {context})"
72
+ super().__init__(full_message)
73
+
74
+
75
+ class StructureToCddlCycleError(StructureToCddlError):
76
+ """
77
+ Exception raised when a circular type reference is detected.
78
+
79
+ Attributes:
80
+ cycle_path: List of type names forming the cycle
81
+ """
82
+
83
+ def __init__(self, cycle_path: List[str]) -> None:
84
+ self.cycle_path = cycle_path
85
+ cycle_str = ' -> '.join(cycle_path)
86
+ super().__init__(f"Circular type reference detected: {cycle_str}")
87
+
88
+
89
+ # JSON Structure primitive types to CDDL type mapping
90
+ STRUCTURE_TO_CDDL_TYPES: Dict[str, str] = {
91
+ # Integer types
92
+ 'int8': 'int',
93
+ 'uint8': 'uint',
94
+ 'int16': 'int',
95
+ 'uint16': 'uint',
96
+ 'int32': 'int',
97
+ 'uint32': 'uint',
98
+ 'int64': 'int',
99
+ 'uint64': 'uint',
100
+ 'int128': 'int',
101
+ 'uint128': 'uint',
102
+ 'integer': 'int',
103
+
104
+ # Floating-point types
105
+ 'float': 'float32',
106
+ 'float8': 'float16',
107
+ 'float16': 'float16',
108
+ 'float32': 'float32',
109
+ 'float64': 'float64',
110
+ 'double': 'float64',
111
+ 'binary32': 'float32',
112
+ 'binary64': 'float64',
113
+ 'decimal': 'float64', # No direct CDDL equivalent
114
+ 'number': 'float64',
115
+
116
+ # String types
117
+ 'string': 'tstr',
118
+ 'binary': 'bstr',
119
+ 'bytes': 'bstr',
120
+
121
+ # Boolean and null
122
+ 'boolean': 'bool',
123
+ 'null': 'nil',
124
+
125
+ # Any type
126
+ 'any': 'any',
127
+
128
+ # Date/time types (no direct CDDL equivalents, use tstr)
129
+ 'date': 'tstr',
130
+ 'time': 'tstr',
131
+ 'datetime': 'tstr',
132
+ 'timestamp': 'tstr',
133
+ 'duration': 'tstr',
134
+
135
+ # Other types
136
+ 'uuid': 'tstr',
137
+ 'uri': 'tstr',
138
+ 'jsonpointer': 'tstr',
139
+ }
140
+
141
+
142
+ class StructureToCddlConverter:
143
+ """
144
+ Converts JSON Structure schema documents to CDDL format.
145
+ """
146
+
147
+ def __init__(self) -> None:
148
+ self.definitions: Dict[str, Dict[str, Any]] = {}
149
+ self.schema_registry: Dict[str, Dict] = {}
150
+ self.converted_types: Set[str] = set()
151
+ self.output_lines: List[str] = []
152
+ self._conversion_stack: List[str] = []
153
+ self._conversion_depth: int = 0
154
+
155
+ def _to_cddl_name(self, name: str) -> str:
156
+ """
157
+ Convert a name to a valid CDDL identifier.
158
+
159
+ CDDL identifiers can contain hyphens, so we prefer hyphenated names.
160
+ If the name has altnames.cddl, use that instead.
161
+ """
162
+ # Replace underscores with hyphens for CDDL style
163
+ return name.replace('_', '-')
164
+
165
+ def _get_original_name(self, schema: Dict[str, Any], default_name: str) -> str:
166
+ """Get the original CDDL name from altnames if available."""
167
+ altnames = schema.get('altnames', {})
168
+ if isinstance(altnames, dict) and 'cddl' in altnames:
169
+ return altnames['cddl']
170
+ return self._to_cddl_name(default_name)
171
+
172
+ def _format_comment(self, description: Optional[str], indent_level: int = 0) -> str:
173
+ """Format a description as a CDDL comment."""
174
+ if not description:
175
+ return ''
176
+
177
+ indent = INDENT * indent_level
178
+ lines = description.split('\n')
179
+ return '\n'.join(f"{indent}; {line.strip()}" for line in lines if line.strip())
180
+
181
+ def convert_structure_to_cddl(self, structure_content: str) -> str:
182
+ """
183
+ Convert JSON Structure content to CDDL format.
184
+
185
+ Args:
186
+ structure_content: The JSON Structure schema as a string
187
+
188
+ Returns:
189
+ CDDL schema as a string
190
+ """
191
+ schema = json.loads(structure_content)
192
+ return self.convert_structure_schema_to_cddl(schema)
193
+
194
+ def convert_structure_schema_to_cddl(self, schema: Dict[str, Any]) -> str:
195
+ """
196
+ Convert a JSON Structure schema dict to CDDL format.
197
+
198
+ Args:
199
+ schema: The JSON Structure schema as a dict
200
+
201
+ Returns:
202
+ CDDL schema as a string
203
+ """
204
+ # Clear state
205
+ self.definitions = schema.get('definitions', {})
206
+ self.schema_registry.clear()
207
+ self.converted_types.clear()
208
+ self.output_lines.clear()
209
+ self._conversion_stack.clear()
210
+ self._conversion_depth = 0
211
+
212
+ # Add header comment
213
+ schema_id = schema.get('$id', '')
214
+ if schema_id:
215
+ self.output_lines.append(f"; Generated from {schema_id}")
216
+ self.output_lines.append("")
217
+
218
+ # Process root type if present
219
+ root_name = schema.get('name')
220
+ root_type = schema.get('type')
221
+
222
+ if root_name and root_type:
223
+ # Root schema has inline type definition
224
+ self._convert_definition(root_name, schema)
225
+
226
+ # Process all definitions
227
+ for def_name, def_schema in self.definitions.items():
228
+ if def_name not in self.converted_types:
229
+ self._convert_definition(def_name, def_schema)
230
+
231
+ return '\n'.join(self.output_lines)
232
+
233
+ def _convert_definition(self, name: str, schema: Dict[str, Any]) -> None:
234
+ """
235
+ Convert a single type definition to CDDL.
236
+
237
+ Args:
238
+ name: The name of the type
239
+ schema: The JSON Structure schema for the type
240
+ """
241
+ if name in self.converted_types:
242
+ return
243
+
244
+ self.converted_types.add(name)
245
+
246
+ # Get original CDDL name
247
+ cddl_name = self._get_original_name(schema, name)
248
+
249
+ # Add description as comment
250
+ description = schema.get('description')
251
+ if description:
252
+ comment = self._format_comment(description)
253
+ if comment:
254
+ self.output_lines.append(comment)
255
+
256
+ # Convert the type
257
+ type_def = self._convert_type(schema, cddl_name)
258
+
259
+ # Write the rule
260
+ self.output_lines.append(f"{cddl_name} = {type_def}")
261
+ self.output_lines.append("")
262
+
263
+ def _convert_type(self, schema: Dict[str, Any], context_name: str = '') -> str:
264
+ """
265
+ Convert a JSON Structure type to CDDL type expression.
266
+
267
+ Args:
268
+ schema: The JSON Structure type schema
269
+ context_name: Name context for nested types
270
+
271
+ Returns:
272
+ CDDL type expression as a string
273
+ """
274
+ if not isinstance(schema, dict):
275
+ if isinstance(schema, str):
276
+ # Simple type reference
277
+ return self._map_primitive_type(schema)
278
+ return 'any'
279
+
280
+ # Check recursion depth
281
+ self._conversion_depth += 1
282
+ if self._conversion_depth > MAX_CONVERSION_DEPTH:
283
+ self._conversion_depth -= 1
284
+ logger.warning("Maximum conversion depth exceeded for context: %s", context_name)
285
+ raise StructureToCddlError(
286
+ f"Maximum conversion depth ({MAX_CONVERSION_DEPTH}) exceeded",
287
+ context=context_name
288
+ )
289
+
290
+ try:
291
+ # Handle $ref
292
+ if '$ref' in schema:
293
+ return self._convert_ref(schema['$ref'])
294
+
295
+ # Check for enum first (can have both type and enum)
296
+ if 'enum' in schema:
297
+ return self._convert_enum(schema['enum'])
298
+
299
+ type_value = schema.get('type')
300
+
301
+ # Handle union types (type is a list)
302
+ if isinstance(type_value, list):
303
+ return self._convert_union(type_value, schema, context_name)
304
+
305
+ # Handle specific types
306
+ if type_value == 'object':
307
+ return self._convert_object(schema, context_name)
308
+ elif type_value == 'array':
309
+ return self._convert_array(schema, context_name)
310
+ elif type_value == 'tuple':
311
+ return self._convert_tuple(schema, context_name)
312
+ elif type_value == 'map':
313
+ return self._convert_map(schema, context_name)
314
+ elif type_value is not None:
315
+ return self._convert_primitive(type_value, schema)
316
+
317
+ # Check for const
318
+ if 'const' in schema:
319
+ return self._convert_const(schema['const'])
320
+
321
+ return 'any'
322
+ finally:
323
+ self._conversion_depth -= 1
324
+
325
+ def _map_primitive_type(self, type_name: str) -> str:
326
+ """Map a JSON Structure primitive type to CDDL type."""
327
+ return STRUCTURE_TO_CDDL_TYPES.get(type_name, type_name)
328
+
329
+ def _convert_ref(self, ref: str) -> str:
330
+ """Convert a $ref to a CDDL type reference."""
331
+ if ref.startswith('#/definitions/'):
332
+ type_name = ref[len('#/definitions/'):]
333
+ # Get original name if available
334
+ if type_name in self.definitions:
335
+ return self._get_original_name(self.definitions[type_name], type_name)
336
+ return self._to_cddl_name(type_name)
337
+ elif ref.startswith('#/'):
338
+ # Local reference
339
+ return self._to_cddl_name(ref.split('/')[-1])
340
+ else:
341
+ # External reference - use as-is
342
+ return self._to_cddl_name(ref)
343
+
344
+ def _convert_union(self, types: List[Any], schema: Dict[str, Any], context_name: str) -> str:
345
+ """Convert a union type to CDDL choice."""
346
+ cddl_types = []
347
+ for t in types:
348
+ if isinstance(t, str):
349
+ cddl_types.append(self._map_primitive_type(t))
350
+ elif isinstance(t, dict):
351
+ cddl_types.append(self._convert_type(t, context_name))
352
+ else:
353
+ cddl_types.append(str(t))
354
+
355
+ return ' / '.join(cddl_types)
356
+
357
+ def _convert_object(self, schema: Dict[str, Any], context_name: str) -> str:
358
+ """Convert an object type to CDDL map."""
359
+ properties = schema.get('properties', {})
360
+ required = set(schema.get('required', []))
361
+ extends = schema.get('$extends')
362
+
363
+ if not properties and not extends:
364
+ # Empty object
365
+ return '{ }'
366
+
367
+ lines = ['{']
368
+
369
+ # Handle $extends (unwrap operator in CDDL)
370
+ if extends:
371
+ extends_refs = extends if isinstance(extends, list) else [extends]
372
+ for ext_ref in extends_refs:
373
+ if isinstance(ext_ref, str):
374
+ ref_name = self._convert_ref(ext_ref)
375
+ elif isinstance(ext_ref, dict) and '$ref' in ext_ref:
376
+ ref_name = self._convert_ref(ext_ref['$ref'])
377
+ else:
378
+ continue
379
+ lines.append(f"{INDENT}~{ref_name},")
380
+
381
+ # Convert properties
382
+ prop_lines = []
383
+ for prop_name, prop_schema in properties.items():
384
+ original_name = self._get_original_name(prop_schema, prop_name) if isinstance(prop_schema, dict) else self._to_cddl_name(prop_name)
385
+ prop_type = self._convert_type(prop_schema, f"{context_name}.{prop_name}")
386
+
387
+ # Add optional marker if not required
388
+ optional_marker = '' if prop_name in required else '? '
389
+
390
+ prop_lines.append(f"{INDENT}{optional_marker}{original_name}: {prop_type}")
391
+
392
+ lines.extend([f"{line}," for line in prop_lines[:-1]] if len(prop_lines) > 1 else [])
393
+ if prop_lines:
394
+ lines.append(f"{INDENT}{prop_lines[-1].strip()}" if len(prop_lines) == 1 else prop_lines[-1])
395
+
396
+ lines.append('}')
397
+
398
+ return '\n'.join(lines)
399
+
400
+ def _convert_array(self, schema: Dict[str, Any], context_name: str) -> str:
401
+ """Convert an array type to CDDL array."""
402
+ items = schema.get('items', {'type': 'any'})
403
+ items_type = self._convert_type(items, f"{context_name}[]")
404
+
405
+ # Get cardinality constraints
406
+ min_items = schema.get('minItems')
407
+ max_items = schema.get('maxItems')
408
+
409
+ if min_items is not None and max_items is not None:
410
+ if min_items == max_items:
411
+ return f"[{min_items}*{min_items} {items_type}]"
412
+ else:
413
+ return f"[{min_items}*{max_items} {items_type}]"
414
+ elif min_items is not None and min_items > 0:
415
+ if min_items == 1:
416
+ return f"[+ {items_type}]"
417
+ return f"[{min_items}* {items_type}]"
418
+ elif max_items is not None:
419
+ return f"[*{max_items} {items_type}]"
420
+ else:
421
+ return f"[* {items_type}]"
422
+
423
+ def _convert_tuple(self, schema: Dict[str, Any], context_name: str) -> str:
424
+ """Convert a tuple type to CDDL array with positional elements."""
425
+ tuple_order = schema.get('tuple', [])
426
+ properties = schema.get('properties', {})
427
+
428
+ if not tuple_order:
429
+ return '[ ]'
430
+
431
+ elements = []
432
+ for idx, prop_name in enumerate(tuple_order):
433
+ prop_schema = properties.get(prop_name, {'type': 'any'})
434
+ elem_type = self._convert_type(prop_schema, f"{context_name}[{idx}]")
435
+ elements.append(elem_type)
436
+
437
+ return f"[{', '.join(elements)}]"
438
+
439
+ def _convert_map(self, schema: Dict[str, Any], context_name: str) -> str:
440
+ """Convert a map type to CDDL map with computed key."""
441
+ keys = schema.get('keys', {'type': 'string'})
442
+ values = schema.get('values', {'type': 'any'})
443
+
444
+ key_type = self._convert_type(keys, f"{context_name}.keys")
445
+ value_type = self._convert_type(values, f"{context_name}.values")
446
+
447
+ return f"{{ * {key_type} => {value_type} }}"
448
+
449
+ def _convert_primitive(self, type_name: str, schema: Dict[str, Any]) -> str:
450
+ """Convert a primitive type with optional constraints."""
451
+ cddl_type = self._map_primitive_type(type_name)
452
+
453
+ # Check for numeric range - use CDDL range syntax instead of type + control operators
454
+ minimum = schema.get('minimum')
455
+ maximum = schema.get('maximum')
456
+ exclusive_min = schema.get('exclusiveMinimum')
457
+ exclusive_max = schema.get('exclusiveMaximum')
458
+
459
+ # If we have both bounds, use range syntax
460
+ if minimum is not None and maximum is not None:
461
+ return f"{minimum}..{maximum}"
462
+ elif exclusive_min is not None and exclusive_max is not None:
463
+ return f"{exclusive_min + 1}..{exclusive_max - 1}"
464
+ elif minimum is not None and exclusive_max is not None:
465
+ return f"{minimum}..{exclusive_max - 1}"
466
+ elif exclusive_min is not None and maximum is not None:
467
+ return f"{exclusive_min + 1}..{maximum}"
468
+
469
+ # For single-bound constraints or other constraints, use control operators
470
+ constraints = self._get_constraints(schema)
471
+ if constraints:
472
+ return f"{cddl_type} {constraints}"
473
+ return cddl_type
474
+
475
+ def _get_constraints(self, schema: Dict[str, Any]) -> str:
476
+ """
477
+ Get CDDL control operators for constraints.
478
+
479
+ Note: CDDL only allows ONE control operator per type. If multiple are needed,
480
+ we prioritize: pattern > size > numeric > default > const
481
+ """
482
+ # String constraints
483
+ if 'minLength' in schema or 'maxLength' in schema:
484
+ min_len = schema.get('minLength', 0)
485
+ max_len = schema.get('maxLength')
486
+ if min_len == max_len and max_len is not None:
487
+ return f".size {max_len}"
488
+ elif max_len is not None:
489
+ return f".size ({min_len}..{max_len})"
490
+ elif min_len > 0:
491
+ return f".size ({min_len}..)"
492
+
493
+ # Pattern constraint (takes priority for strings)
494
+ if 'pattern' in schema:
495
+ pattern = schema['pattern'].replace('"', '\\"')
496
+ return f'.regexp "{pattern}"'
497
+
498
+ # Numeric constraints (only if not already handled by range syntax)
499
+ minimum = schema.get('minimum')
500
+ maximum = schema.get('maximum')
501
+ exclusive_min = schema.get('exclusiveMinimum')
502
+ exclusive_max = schema.get('exclusiveMaximum')
503
+
504
+ # Single-bound numeric constraints
505
+ if minimum is not None and maximum is None and exclusive_max is None:
506
+ return f".ge {minimum}"
507
+ if maximum is not None and minimum is None and exclusive_min is None:
508
+ return f".le {maximum}"
509
+ if exclusive_min is not None and exclusive_max is None and maximum is None:
510
+ return f".gt {exclusive_min}"
511
+ if exclusive_max is not None and exclusive_min is None and minimum is None:
512
+ return f".lt {exclusive_max}"
513
+
514
+ # Default value
515
+ if 'default' in schema:
516
+ default = schema['default']
517
+ if isinstance(default, str):
518
+ return f'.default "{default}"'
519
+ elif isinstance(default, bool):
520
+ return f".default {'true' if default else 'false'}"
521
+ elif default is None:
522
+ return ".default nil"
523
+ else:
524
+ return f".default {default}"
525
+
526
+ # Const value (use .eq)
527
+ if 'const' in schema:
528
+ const = schema['const']
529
+ if isinstance(const, str):
530
+ return f'.eq "{const}"'
531
+ elif isinstance(const, bool):
532
+ return f".eq {'true' if const else 'false'}"
533
+ elif const is None:
534
+ return ".eq nil"
535
+ else:
536
+ return f".eq {const}"
537
+
538
+ return ''
539
+
540
+ def _convert_const(self, value: Any) -> str:
541
+ """Convert a const value to CDDL literal."""
542
+ if isinstance(value, str):
543
+ return f'"{value}"'
544
+ elif isinstance(value, bool):
545
+ return 'true' if value else 'false'
546
+ elif value is None:
547
+ return 'nil'
548
+ elif isinstance(value, (int, float)):
549
+ return str(value)
550
+ else:
551
+ return f'"{value}"'
552
+
553
+ def _convert_enum(self, values: List[Any]) -> str:
554
+ """Convert an enum to CDDL choice of literals."""
555
+ cddl_values = [self._convert_const(v) for v in values]
556
+ return ' / '.join(cddl_values)
557
+
558
+
559
+ def convert_structure_to_cddl(structure_content: str) -> str:
560
+ """
561
+ Convert JSON Structure content to CDDL.
562
+
563
+ Args:
564
+ structure_content: JSON Structure schema as a string
565
+
566
+ Returns:
567
+ CDDL schema as a string
568
+ """
569
+ converter = StructureToCddlConverter()
570
+ return converter.convert_structure_to_cddl(structure_content)
571
+
572
+
573
+ def convert_structure_to_cddl_files(
574
+ structure_schema_path: str,
575
+ cddl_file_path: Optional[str] = None
576
+ ) -> str:
577
+ """
578
+ Convert a JSON Structure schema file to CDDL.
579
+
580
+ Args:
581
+ structure_schema_path: Path to the JSON Structure schema file
582
+ cddl_file_path: Optional path for the output CDDL file
583
+
584
+ Returns:
585
+ CDDL schema as a string
586
+ """
587
+ with open(structure_schema_path, 'r', encoding='utf-8') as f:
588
+ structure_content = f.read()
589
+
590
+ cddl_content = convert_structure_to_cddl(structure_content)
591
+
592
+ if cddl_file_path:
593
+ os.makedirs(os.path.dirname(cddl_file_path) or '.', exist_ok=True)
594
+ with open(cddl_file_path, 'w', encoding='utf-8') as f:
595
+ f.write(cddl_content)
596
+
597
+ return cddl_content