struct2ui 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
struct2ui/schema.py ADDED
@@ -0,0 +1,1118 @@
1
+ """Schema layer: parse cfg_t/*.json into a typed tree of Field nodes.
2
+
3
+ Pure data, no Qt. Single responsibility: turn JSON files into an in-memory
4
+ schema that the UI layer can render without re-walking JSON.
5
+
6
+ Field hierarchy:
7
+ Field (abstract base; common metadata)
8
+ +-- ScalarField (bool / int / float / str)
9
+ +-- EnumField (named items dict)
10
+ +-- StructField (ordered list of children Fields)
11
+ +-- ArrayField (count + element-Field; element can be any Field)
12
+
13
+ Design notes:
14
+ - Resolution is eager: when we build the schema for a top-level type, we
15
+ follow nested struct/enum/array references *now*, so the renderer never
16
+ needs to know about cfg_t directories or files.
17
+ - Cycles are guarded by a stack of currently-resolving type names.
18
+ - Optional metadata (min/max/step/unit/tip/when/widget/render/value) are passed
19
+ through verbatim. We do not enforce them here.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import difflib
25
+ import json
26
+ import os
27
+ from dataclasses import dataclass, field
28
+ from typing import Any, Dict, List, Optional
29
+
30
+
31
+ OPTIONAL_KEYS = ('widget', 'render', 'min', 'max', 'step', 'unit', 'tip', 'when')
32
+
33
+
34
+ # --------------------------------------------------------------------------- #
35
+ # Keyword vocabulary
36
+ # --------------------------------------------------------------------------- #
37
+ # Keys recognized inside a struct's `items` field-spec dict. Anything outside
38
+ # this set in that position triggers an unknown-keyword error. Top-level dicts
39
+ # in cfg_t/*.json are NOT checked here, because their keys are user-defined
40
+ # type/const names interleaved with reserved keys like `typedefs`.
41
+
42
+ FIELD_SPEC_KEYWORDS = frozenset({
43
+ 'name', 'type', 'value', 'count',
44
+ 'widget', 'render', 'min', 'max', 'step', 'unit', 'tip', 'when',
45
+ })
46
+
47
+ # Keys recognized inside a top-level type-definition block, keyed by the
48
+ # block's own `type` value. e.g. a struct block may carry `type` + `items`,
49
+ # an enum block may carry `type` + `items`. Unknown keys at this level
50
+ # trigger an error.
51
+ BLOCK_KEYWORDS_BY_TYPE: Dict[str, frozenset] = {
52
+ 'struct': frozenset({'type', 'items'}),
53
+ 'enum': frozenset({'type', 'items'}),
54
+ }
55
+
56
+ # Keys allowed but ignored at any level (free-form metadata for humans).
57
+ IGNORED_KEYWORDS = frozenset({
58
+ 'version', 'description', 'author', 'comment', 'note',
59
+ })
60
+
61
+ # Reserved top-level keys with special meaning (not user types).
62
+ TOP_LEVEL_RESERVED = frozenset({
63
+ 'typedefs', 'flow',
64
+ }) | IGNORED_KEYWORDS
65
+
66
+
67
+ # --------------------------------------------------------------------------- #
68
+ # Load report
69
+ # --------------------------------------------------------------------------- #
70
+
71
+ @dataclass
72
+ class LoadIssue:
73
+ severity: str # 'error' | 'warning'
74
+ file: str # readable file path or label
75
+ path: str # readable JSON path, e.g. 'a_cfg_t -> items[1]'
76
+ message: str
77
+ suggestion: Optional[str] = None # e.g. "Did you mean 'tip'?"
78
+
79
+
80
+ class LoadReport:
81
+ """Collects errors and warnings from JSON loading and validation."""
82
+
83
+ def __init__(self) -> None:
84
+ self.errors: List[LoadIssue] = []
85
+ self.warnings: List[LoadIssue] = []
86
+
87
+ def error(self, file: str, path: str, message: str,
88
+ suggestion: Optional[str] = None) -> None:
89
+ self.errors.append(LoadIssue('error', file, path, message, suggestion))
90
+
91
+ def warning(self, file: str, path: str, message: str,
92
+ suggestion: Optional[str] = None) -> None:
93
+ self.warnings.append(LoadIssue('warning', file, path, message, suggestion))
94
+
95
+ @property
96
+ def has_errors(self) -> bool:
97
+ return bool(self.errors)
98
+
99
+ @property
100
+ def has_warnings(self) -> bool:
101
+ return bool(self.warnings)
102
+
103
+ def extend(self, other: 'LoadReport') -> None:
104
+ """Merge another report's issues into this one (in order)."""
105
+ self.errors.extend(other.errors)
106
+ self.warnings.extend(other.warnings)
107
+
108
+ def format(self) -> str:
109
+ if not self.errors and not self.warnings:
110
+ return '=== Configuration Load Report ===\n[OK] no issues\n'
111
+ lines: List[str] = ['=== Configuration Load Report ===']
112
+ if self.errors:
113
+ lines.append(f'[Errors: {len(self.errors)}]')
114
+ for i, it in enumerate(self.errors, 1):
115
+ lines.extend(self._format_issue(i, it))
116
+ if self.warnings:
117
+ lines.append(f'[Warnings: {len(self.warnings)}]')
118
+ for i, it in enumerate(self.warnings, 1):
119
+ lines.extend(self._format_issue(i, it))
120
+ return '\n'.join(lines) + '\n'
121
+
122
+ @staticmethod
123
+ def _format_issue(idx: int, it: LoadIssue) -> List[str]:
124
+ out = [f' [{idx}] {it.file}']
125
+ if it.path:
126
+ out.append(f' at {it.path}')
127
+ out.append(f' {it.message}')
128
+ if it.suggestion:
129
+ out.append(f' {it.suggestion}')
130
+ return out
131
+
132
+
133
+ def _suggest_keyword(unknown: str, vocabulary) -> Optional[str]:
134
+ """Return a 'Did you mean X?' hint if a close match exists, else None."""
135
+ matches = difflib.get_close_matches(unknown, list(vocabulary), n=1, cutoff=0.6)
136
+ if matches:
137
+ return f"Did you mean '{matches[0]}'?"
138
+ return None
139
+
140
+
141
+ # --------------------------------------------------------------------------- #
142
+ # Field hierarchy
143
+ # --------------------------------------------------------------------------- #
144
+
145
+ @dataclass
146
+ class Field:
147
+ name: str
148
+ c_type: str
149
+ default: Any = None
150
+ meta: Dict[str, Any] = field(default_factory=dict)
151
+
152
+ @property
153
+ def kind(self) -> str:
154
+ return type(self).__name__
155
+
156
+
157
+ @dataclass
158
+ class ScalarField(Field):
159
+ # ui_type in {'bool', 'int', 'float', 'str'}
160
+ ui_type: str = 'str'
161
+
162
+
163
+ @dataclass
164
+ class EnumField(Field):
165
+ items: Dict[str, Any] = field(default_factory=dict)
166
+
167
+
168
+ @dataclass
169
+ class StructField(Field):
170
+ children: List[Field] = field(default_factory=list)
171
+
172
+
173
+ @dataclass
174
+ class ArrayField(Field):
175
+ count: int = 0
176
+ element: Optional[Field] = None # the field describing one element
177
+
178
+
179
+ def array_dims_suffix(f: 'ArrayField') -> str:
180
+ """Bracketed dimensions for an array field's label/declaration.
181
+
182
+ Multi-dim arrays carry their original dims in meta['shape']; render them
183
+ as [a][b]... so a flat 1D storage of 12 still shows as [3][4]. Falls back
184
+ to the flat [count] when no shape metadata is present.
185
+ """
186
+ shape = f.meta.get('shape')
187
+ if isinstance(shape, (list, tuple)) and shape:
188
+ return ''.join(f'[{int(d)}]' for d in shape)
189
+ return f'[{f.count}]'
190
+
191
+
192
+ # --------------------------------------------------------------------------- #
193
+ # SchemaRegistry: loads cfg_t directory and builds Field trees on demand
194
+ # --------------------------------------------------------------------------- #
195
+
196
+ class SchemaRegistry:
197
+ """Loads all *.json files under cfg_dir and resolves type references.
198
+
199
+ Usage:
200
+ reg = SchemaRegistry('cfg_t')
201
+ root = reg.build('a_cfg_t') # -> StructField
202
+ """
203
+
204
+ def __init__(self, cfg_dir: str):
205
+ self.cfg_dir = cfg_dir
206
+ self._raw_files: Dict[str, Dict[str, Any]] = {} # base -> file json
207
+ self._enum_owner: Dict[str, str] = {} # enum name -> file base
208
+ self._struct_owner: Dict[str, str] = {} # struct name -> file base
209
+ self._alias: Dict[str, str] = {} # file base -> actual struct name (legacy)
210
+ self._consts: Dict[str, Any] = {} # global int/float consts
211
+ self._typedefs: Dict[str, str] = {} # typedef alias -> base C type
212
+ self.report: LoadReport = LoadReport()
213
+ self._load_all()
214
+ # Step 3 (S1/S2/S4): semantic checks on declared min/max/step/count
215
+ # in cfg_t. These read raw JSON directly so all known structs are
216
+ # covered (lazy build only visits structs actually referenced from
217
+ # abc.json). S3 (instance value vs declared range) lives in
218
+ # FlowValidator._check_scalar where the value is in scope.
219
+ self._audit_semantics()
220
+
221
+ # ---- raw file loading ------------------------------------------------ #
222
+ def _load_all(self) -> None:
223
+ if not os.path.isdir(self.cfg_dir):
224
+ return
225
+ for fn in sorted(os.listdir(self.cfg_dir)):
226
+ if not fn.endswith('.json'):
227
+ continue
228
+ base = fn[:-5]
229
+ file_label = os.path.join(self.cfg_dir, fn)
230
+ try:
231
+ with open(file_label, 'r', encoding='utf-8') as f:
232
+ data = json.load(f)
233
+ except json.JSONDecodeError as e:
234
+ self.report.error(file_label, '',
235
+ f'JSON parse error: {e.msg} (line {e.lineno}, col {e.colno})')
236
+ continue
237
+ except OSError as e:
238
+ self.report.error(file_label, '', f'cannot read file: {e}')
239
+ continue
240
+ if not isinstance(data, dict):
241
+ self.report.error(file_label, '',
242
+ f'top-level must be an object, got {type(data).__name__}')
243
+ continue
244
+ self._raw_files[base] = data
245
+ self._scan_keywords(file_label, data)
246
+ self._index_file(base, data)
247
+
248
+ def _index_file(self, base: str, data: Dict[str, Any]) -> None:
249
+ """Populate _enum_owner / _struct_owner / _consts / _alias from one file.
250
+
251
+ Aliasing rule: if the file's basename does not itself name a struct
252
+ (e.g. file `iir_cfg_t.json` defines struct `IIR_CFG_T`), point the
253
+ basename at the file's first struct so callers can refer to either
254
+ the basename or the actual struct name.
255
+ """
256
+ first_struct: Optional[str] = None
257
+ for k, v in data.items():
258
+ if k == 'typedefs' and isinstance(v, dict):
259
+ for alias, target in v.items():
260
+ if isinstance(alias, str) and isinstance(target, str):
261
+ self._typedefs.setdefault(alias, target)
262
+ continue
263
+ if isinstance(v, dict):
264
+ t = v.get('type')
265
+ if t == 'enum':
266
+ self._enum_owner.setdefault(k, base)
267
+ elif t == 'struct':
268
+ self._struct_owner.setdefault(k, base)
269
+ if first_struct is None:
270
+ first_struct = k
271
+ elif isinstance(v, (int, float)):
272
+ self._consts.setdefault(k, v)
273
+
274
+ if base not in self._struct_owner and first_struct is not None:
275
+ self._struct_owner[base] = base
276
+ self._alias[base] = first_struct
277
+
278
+ # ---- keyword scanning ----------------------------------------------- #
279
+ def _scan_keywords(self, file_label: str, data: Dict[str, Any]) -> None:
280
+ """Walk one cfg_t file and report unknown keywords.
281
+
282
+ Two layers are checked:
283
+ 1. Block-level keys inside each top-level type-definition dict
284
+ (struct / enum). E.g. `items` mistyped as `i2tems`.
285
+ 2. Field-spec keys inside a struct's `items` list entries.
286
+ E.g. `tip` mistyped as `tlp`.
287
+
288
+ Top-level dict keys themselves are NOT checked, because they are
289
+ user-defined type/const names mixed with reserved keys like
290
+ `typedefs`. Enum item dicts are NOT checked, because their keys
291
+ are user-defined enumerator names.
292
+ """
293
+ for top_key, top_val in data.items():
294
+ if not isinstance(top_val, dict):
295
+ continue
296
+ if top_key in TOP_LEVEL_RESERVED:
297
+ continue
298
+ block_type = top_val.get('type')
299
+ allowed = BLOCK_KEYWORDS_BY_TYPE.get(block_type)
300
+ if allowed is None:
301
+ # The block looks like a type definition (dict under a
302
+ # user-named top-level key) but lacks a recognizable `type`.
303
+ # Most likely `type` itself is mistyped, or the block-type
304
+ # value is unknown. Report and still try to scan for other
305
+ # problems (e.g. `items` mistyped) using a permissive
306
+ # whitelist so users see all issues at once.
307
+ type_lookalike = self._report_bad_block_type(
308
+ file_label, top_key, top_val)
309
+ permissive = (frozenset({'type'})
310
+ | BLOCK_KEYWORDS_BY_TYPE['struct']
311
+ | BLOCK_KEYWORDS_BY_TYPE['enum'])
312
+ self._check_block_keys(file_label, top_key, top_val,
313
+ permissive,
314
+ skip_keys={type_lookalike} if type_lookalike else None,
315
+ label='malformed')
316
+ # In a malformed block we cannot trust block['items'].
317
+ # Scan every list-valued sub-key as a candidate items list,
318
+ # so typos like `items2` still get their field specs checked.
319
+ for sub_key, sub_val in top_val.items():
320
+ if isinstance(sub_val, list):
321
+ self._scan_field_specs(file_label, top_key, sub_key,
322
+ sub_val)
323
+ continue
324
+ self._check_block_keys(file_label, top_key, top_val, allowed,
325
+ label=block_type)
326
+ if block_type == 'struct':
327
+ self._check_struct_items(file_label, top_key, top_val)
328
+
329
+ def _report_bad_block_type(self, file_label: str, top_key: str,
330
+ block: Dict[str, Any]) -> Optional[str]:
331
+ """Report a malformed type-definition block.
332
+
333
+ Returns the key (if any) that was identified as a `type` lookalike,
334
+ so the caller can avoid reporting it twice in the keyword scan.
335
+ """
336
+ if 'type' not in block:
337
+ matches = difflib.get_close_matches(
338
+ 'type',
339
+ [k for k in block.keys() if isinstance(k, str)],
340
+ n=1, cutoff=0.6)
341
+ lookalike = matches[0] if matches else None
342
+ if lookalike:
343
+ hint = f"Found similar key {lookalike!r}; did you mean 'type'?"
344
+ else:
345
+ hint = "Add a 'type' key like \"type\":\"struct\" or \"type\":\"enum\"."
346
+ self.report.error(
347
+ file_label,
348
+ top_key,
349
+ "type-definition block missing 'type' key",
350
+ hint,
351
+ )
352
+ return lookalike
353
+ bad_value = block.get('type')
354
+ known = sorted(BLOCK_KEYWORDS_BY_TYPE.keys())
355
+ suggestion = None
356
+ if isinstance(bad_value, str):
357
+ suggestion = _suggest_keyword(bad_value, known)
358
+ if suggestion is None:
359
+ suggestion = f"Expected one of: {', '.join(known)}."
360
+ self.report.error(
361
+ file_label,
362
+ f'{top_key} -> type',
363
+ f"unknown block type {bad_value!r}",
364
+ suggestion,
365
+ )
366
+ return None
367
+
368
+ def _check_block_keys(self, file_label: str, top_key: str,
369
+ block: Dict[str, Any], allowed: frozenset,
370
+ skip_keys: Optional[set] = None,
371
+ label: Optional[str] = None) -> None:
372
+ block_label = label or block.get('type') or 'block'
373
+ for k in block.keys():
374
+ if not isinstance(k, str):
375
+ continue
376
+ if k in allowed or k in IGNORED_KEYWORDS:
377
+ continue
378
+ if skip_keys and k in skip_keys:
379
+ continue
380
+ suggestion = _suggest_keyword(k, allowed | IGNORED_KEYWORDS)
381
+ self.report.error(
382
+ file_label,
383
+ f'{top_key} -> {k}',
384
+ f"unknown keyword '{k}' in {block_label} block",
385
+ suggestion,
386
+ )
387
+
388
+ def _check_struct_items(self, file_label: str, top_key: str,
389
+ block: Dict[str, Any]) -> None:
390
+ items = block.get('items')
391
+ if items is None:
392
+ self.report.error(
393
+ file_label,
394
+ f'{top_key} -> items',
395
+ "struct block is missing 'items' list",
396
+ )
397
+ return
398
+ if not isinstance(items, list):
399
+ self.report.error(
400
+ file_label,
401
+ f'{top_key} -> items',
402
+ f"struct 'items' must be a list, got {type(items).__name__}",
403
+ )
404
+ return
405
+ self._scan_field_specs(file_label, top_key, 'items', items)
406
+
407
+ def _scan_field_specs(self, file_label: str, top_key: str,
408
+ list_key: str, items: List[Any]) -> None:
409
+ """Validate keys inside each entry of a (suspected) items list.
410
+
411
+ `list_key` is the actual JSON key the list lives under, which may
412
+ be the canonical 'items' or a typo like 'items2'. Either way, the
413
+ list entries should be field-spec dicts and their keys are checked
414
+ against FIELD_SPEC_KEYWORDS.
415
+ """
416
+ for idx, spec in enumerate(items):
417
+ path_prefix = f'{top_key} -> {list_key}[{idx}]'
418
+ if not isinstance(spec, dict):
419
+ self.report.error(
420
+ file_label,
421
+ path_prefix,
422
+ f"field spec must be an object, got {type(spec).__name__}",
423
+ )
424
+ continue
425
+ for k in spec.keys():
426
+ if not isinstance(k, str):
427
+ continue
428
+ if k in FIELD_SPEC_KEYWORDS or k in IGNORED_KEYWORDS:
429
+ continue
430
+ suggestion = _suggest_keyword(
431
+ k, FIELD_SPEC_KEYWORDS | IGNORED_KEYWORDS)
432
+ self.report.error(
433
+ file_label,
434
+ f'{path_prefix} -> {k}',
435
+ f"unknown keyword '{k}' in field spec",
436
+ suggestion,
437
+ )
438
+
439
+ # ---- semantic audit (Step 3: S1 / S2 / S4) -------------------------- #
440
+ def _audit_semantics(self) -> None:
441
+ """Walk every cfg_t file's raw JSON and check declared semantics.
442
+
443
+ S1: min <= max when both are present
444
+ S2: step > 0 when present
445
+ S4: count > 0 (every dim, when count is a list)
446
+ """
447
+ for base, data in self._raw_files.items():
448
+ file_label = os.path.join(self.cfg_dir, base + '.json')
449
+ for top_key, top_val in data.items():
450
+ if (not isinstance(top_val, dict)
451
+ or top_val.get('type') != 'struct'):
452
+ continue
453
+ items = top_val.get('items')
454
+ if not isinstance(items, list):
455
+ continue
456
+ for idx, spec in enumerate(items):
457
+ if not isinstance(spec, dict):
458
+ continue
459
+ fname = spec.get('name') or f'<item {idx}>'
460
+ path = f'{top_key} -> {fname}'
461
+ self._audit_field_spec(file_label, path, spec)
462
+
463
+ def _audit_field_spec(self, file_label: str, path: str,
464
+ spec: Dict[str, Any]) -> None:
465
+ # S4: count > 0 (per dim)
466
+ if 'count' in spec:
467
+ for dim in self._iter_count_dims(spec.get('count')):
468
+ resolved = self._safe_resolve_const(dim)
469
+ if resolved is None:
470
+ continue
471
+ if resolved <= 0:
472
+ self.report.error(
473
+ file_label, f'{path} -> count',
474
+ f"array count must be > 0, got {resolved}")
475
+
476
+ # S1 / S2 only apply to numeric fields. Use the field's ui_type
477
+ # (resolved through typedefs) to filter.
478
+ ftype = spec.get('type')
479
+ if not isinstance(ftype, str):
480
+ return
481
+ ui = _ctype_to_ui(self.resolve_alias(ftype))
482
+ if ui not in ('int', 'float'):
483
+ return
484
+
485
+ mn = spec.get('min')
486
+ mx = spec.get('max')
487
+ mn_num = mn if isinstance(mn, (int, float)) and not isinstance(mn, bool) else None
488
+ mx_num = mx if isinstance(mx, (int, float)) and not isinstance(mx, bool) else None
489
+
490
+ # S1: min <= max when both present and numeric.
491
+ if mn_num is not None and mx_num is not None and mn_num > mx_num:
492
+ self.report.error(
493
+ file_label, path,
494
+ f"min ({mn_num}) must be <= max ({mx_num})")
495
+
496
+ # S2: step > 0 when present and numeric.
497
+ if 'step' in spec:
498
+ st = spec.get('step')
499
+ if isinstance(st, bool) or not isinstance(st, (int, float)):
500
+ self.report.error(
501
+ file_label, f'{path} -> step',
502
+ f"step must be a number, got {type(st).__name__}")
503
+ elif st <= 0:
504
+ self.report.error(
505
+ file_label, f'{path} -> step',
506
+ f"step must be > 0, got {st}")
507
+
508
+ @staticmethod
509
+ def _iter_count_dims(raw_count: Any):
510
+ if isinstance(raw_count, (list, tuple)):
511
+ for d in raw_count:
512
+ yield d
513
+ else:
514
+ yield raw_count
515
+
516
+ def _safe_resolve_const(self, ref: Any) -> Optional[int]:
517
+ """Like resolve_const, but returns None on unresolvable input
518
+ instead of raising. Used by audits that want to skip silently
519
+ when a constant is unknown."""
520
+ if isinstance(ref, bool):
521
+ return None
522
+ if isinstance(ref, int):
523
+ return ref
524
+ if isinstance(ref, str) and ref in self._consts:
525
+ v = self._consts[ref]
526
+ return v if isinstance(v, int) else None
527
+ return None
528
+
529
+ # ---- public API ------------------------------------------------------ #
530
+ def has(self, type_name: str) -> bool:
531
+ return type_name in self._enum_owner or type_name in self._struct_owner
532
+
533
+ def is_enum(self, type_name: str) -> bool:
534
+ return type_name in self._enum_owner
535
+
536
+ def is_struct(self, type_name: str) -> bool:
537
+ return type_name in self._struct_owner
538
+
539
+ def known_struct_names(self) -> List[str]:
540
+ """All struct cfg type names known to this registry (sorted)."""
541
+ return sorted(self._struct_owner.keys())
542
+
543
+ def resolve_const(self, ref: Any) -> int:
544
+ if isinstance(ref, int):
545
+ return ref
546
+ if isinstance(ref, str) and ref in self._consts:
547
+ try:
548
+ return int(self._consts[ref])
549
+ except Exception:
550
+ return 0
551
+ try:
552
+ return int(ref)
553
+ except Exception:
554
+ return 0
555
+
556
+ def resolve_alias(self, type_name: str) -> str:
557
+ """Follow typedef alias chain to the base C type.
558
+
559
+ E.g. with `typedefs = {"gain_t":"int32_t"}`, resolve_alias("gain_t")
560
+ returns "int32_t". Non-aliases pass through unchanged. Cycles short-
561
+ circuit at the first repeat.
562
+ """
563
+ seen = set()
564
+ cur = type_name
565
+ while isinstance(cur, str) and cur in self._typedefs and cur not in seen:
566
+ seen.add(cur)
567
+ cur = self._typedefs[cur]
568
+ return cur
569
+
570
+ def get_enum_items(self, enum_name: str) -> Dict[str, Any]:
571
+ owner = self._enum_owner.get(enum_name)
572
+ if not owner:
573
+ return {}
574
+ return self._raw_files[owner].get(enum_name, {}).get('items', {}) or {}
575
+
576
+ def build(self, type_name: str) -> StructField:
577
+ """Build the full Field tree rooted at the given struct type."""
578
+ return self._build_struct(type_name, name=type_name, stack=[])
579
+
580
+ # ---- internal builders ---------------------------------------------- #
581
+ def _build_struct(self, type_name: str, name: str, stack: List[str]) -> StructField:
582
+ if type_name in stack:
583
+ # Cycle guard: emit empty struct rather than recursing forever.
584
+ return StructField(name=name, c_type=type_name, children=[])
585
+ owner = self._struct_owner.get(type_name)
586
+ if not owner:
587
+ return StructField(name=name, c_type=type_name, children=[])
588
+ # If type_name itself is a file basename aliased to an actual struct, follow it.
589
+ actual_name = self._alias.get(type_name, type_name)
590
+ struct_def = self._raw_files[owner].get(actual_name, {})
591
+ if not isinstance(struct_def, dict) or struct_def.get('type') != 'struct':
592
+ # Fallback: scan file for first struct (defensive).
593
+ for k, v in self._raw_files[owner].items():
594
+ if isinstance(v, dict) and v.get('type') == 'struct':
595
+ struct_def = v
596
+ break
597
+ items = struct_def.get('items', []) if isinstance(struct_def, dict) else []
598
+ children: List[Field] = []
599
+ new_stack = stack + [type_name]
600
+ for raw in items:
601
+ if not isinstance(raw, dict):
602
+ continue
603
+ child = self._build_field(raw, new_stack)
604
+ if child is not None:
605
+ children.append(child)
606
+ return StructField(name=name, c_type=type_name, children=children)
607
+
608
+ def _build_field(self, raw: Dict[str, Any], stack: List[str]) -> Optional[Field]:
609
+ fname = raw.get('name')
610
+ ftype = raw.get('type')
611
+ if not fname or not ftype:
612
+ return None
613
+ meta = {k: raw[k] for k in OPTIONAL_KEYS if k in raw}
614
+ default = raw.get('value')
615
+
616
+ # Resolve typedef alias on the way in (e.g. gain_t -> int32_t).
617
+ # We do this before any branch so 'count' + alias still works.
618
+ ftype = self.resolve_alias(ftype)
619
+
620
+ # Multi-dim count: count:[a, b] is treated as a flat 1D array of
621
+ # length a*b (matching the C ABI: float m[3][4] is 12 contiguous
622
+ # floats). meta.shape carries the original dims so the widget can
623
+ # render line-broken text for readability. Storage stays 1D.
624
+ raw_count = raw.get('count')
625
+ flat_n = None
626
+ shape = None
627
+ if isinstance(raw_count, (list, tuple)):
628
+ dims = [self.resolve_const(x) for x in raw_count]
629
+ flat_n = 1
630
+ for d in dims:
631
+ flat_n *= d
632
+ shape = dims
633
+
634
+ # char[N] is conventionally a C string, not a byte array.
635
+ # int8_t[N] keeps array semantics. The 'widget' meta can override either way.
636
+ # Multi-dim char (e.g. char[3][16]) does NOT collapse to a string -
637
+ # it stays a 1D array of (a*b) chars; keep it as plain array.
638
+ if ('count' in raw and ftype.lower() == 'char'
639
+ and 'widget' not in meta and shape is None):
640
+ n = self.resolve_const(raw_count)
641
+ if default is None:
642
+ default = ''
643
+ scalar_meta = dict(meta)
644
+ scalar_meta['maxlen'] = n
645
+ return ScalarField(name=fname, c_type=f'char[{n}]', default=default,
646
+ meta=scalar_meta, ui_type='str')
647
+
648
+ # Array? -> ArrayField wrapping the element field
649
+ if 'count' in raw:
650
+ n = flat_n if shape is not None else self.resolve_const(raw_count)
651
+ elem_raw = {k: v for k, v in raw.items() if k != 'count'}
652
+ elem_raw['type'] = ftype
653
+ element = self._build_field(elem_raw, stack)
654
+ if element is None:
655
+ return None
656
+ arr_meta = dict(meta)
657
+ if shape is not None:
658
+ arr_meta['shape'] = shape
659
+ return ArrayField(name=fname, c_type=ftype, default=default,
660
+ meta=arr_meta, count=n, element=element)
661
+
662
+ # Struct?
663
+ if self.is_struct(ftype):
664
+ sub = self._build_struct(ftype, name=fname, stack=stack)
665
+ sub.default = default
666
+ sub.meta = meta
667
+ return sub
668
+
669
+ # Enum?
670
+ if self.is_enum(ftype):
671
+ items = self.get_enum_items(ftype)
672
+ if default is None and items:
673
+ default = next(iter(items.keys()))
674
+ return EnumField(name=fname, c_type=ftype, default=default,
675
+ meta=meta, items=items)
676
+
677
+ # Scalar
678
+ ui_type = _ctype_to_ui(ftype)
679
+ if default is None:
680
+ default = _ui_zero(ui_type)
681
+ return ScalarField(name=fname, c_type=ftype, default=default,
682
+ meta=meta, ui_type=ui_type)
683
+
684
+
685
+ # --------------------------------------------------------------------------- #
686
+ # Type helpers
687
+ # --------------------------------------------------------------------------- #
688
+
689
+ def _ctype_to_ui(c_type: str) -> str:
690
+ ct = (c_type or '').lower()
691
+ if ct in ('bool', '_bool'):
692
+ return 'bool'
693
+ if ct.startswith(('int', 'uint')) or ct in ('size_t', 'long', 'short'):
694
+ return 'int'
695
+ if ct in ('float', 'double'):
696
+ return 'float'
697
+ return 'str'
698
+
699
+
700
+ def _ui_zero(ui_type: str) -> Any:
701
+ if ui_type == 'bool':
702
+ return False
703
+ if ui_type == 'int':
704
+ return 0
705
+ if ui_type == 'float':
706
+ return 0.0
707
+ return ''
708
+
709
+
710
+ # --------------------------------------------------------------------------- #
711
+ # Instance values: merge user-provided dict into the schema tree
712
+ # --------------------------------------------------------------------------- #
713
+
714
+ def merge_instance(field_node: Field, provided: Any) -> Any:
715
+ """Return the effective value for `field_node`, merging `provided` over defaults.
716
+
717
+ - For ScalarField/EnumField: returns a scalar
718
+ - For StructField: returns a dict of child-name -> merged-value
719
+ - For ArrayField: returns a list of length count
720
+ """
721
+ if isinstance(field_node, ArrayField):
722
+ # widget=file: the array is represented as a single path string,
723
+ # not an expanded list. Pass the value through verbatim so the GUI
724
+ # can render the file widget.
725
+ if field_node.meta.get('widget') == 'file':
726
+ return provided if provided is not None else field_node.default
727
+ n = field_node.count
728
+ out = []
729
+ provided_list = provided if isinstance(provided, list) else []
730
+ for i in range(n):
731
+ elem = provided_list[i] if i < len(provided_list) else None
732
+ out.append(merge_instance(field_node.element, elem))
733
+ return out
734
+
735
+ if isinstance(field_node, StructField):
736
+ out = {}
737
+ prov = provided if isinstance(provided, dict) else {}
738
+ for ch in field_node.children:
739
+ out[ch.name] = merge_instance(ch, prov.get(ch.name))
740
+ return out
741
+
742
+ # scalar / enum
743
+ if provided is not None:
744
+ return provided
745
+ return field_node.default
746
+
747
+
748
+ # --------------------------------------------------------------------------- #
749
+ # Flow validator: cross-checks abc.json against a SchemaRegistry
750
+ # --------------------------------------------------------------------------- #
751
+
752
+ # abc.json structural keywords. Anything outside this set in a stage block
753
+ # triggers an unknown-keyword error. (Values inside `items` are user data,
754
+ # not keywords, and are handled by L2/L3/L4 instead.)
755
+ STAGE_KEYWORDS = frozenset({'type', 'items'})
756
+
757
+ # abc.json group-level keys that are reserved (not stage names).
758
+ GROUP_RESERVED = frozenset({'Flow'}) | IGNORED_KEYWORDS
759
+
760
+
761
+ class FlowValidator:
762
+ """Validate an abc.json flow file against a loaded SchemaRegistry.
763
+
764
+ Layers (collect-then-display, never stop on first error):
765
+ A : structural keyword spelling in stage blocks (type / items)
766
+ A2 : Flow list shape
767
+ L1 : each stage.type names a struct cfg_t known to the registry
768
+ L2 : each items key is a real field of that struct
769
+ L3 : each items value is type-compatible with the field's c_type
770
+ (C-style coercion: bool<->int, int+0.0 ok, float<-int ok, etc.)
771
+ L4 : enum-typed values must be a known enumerator string
772
+ L5 : Flow listed stages must be defined; defined stages should appear
773
+ in Flow (warning if not, since orphan stages are recoverable)
774
+ """
775
+
776
+ def __init__(self, registry: 'SchemaRegistry', flow_file: str):
777
+ self.registry = registry
778
+ self.flow_file = flow_file
779
+ self.report = LoadReport()
780
+
781
+ # ---- entrypoint ----------------------------------------------------- #
782
+ def validate(self) -> LoadReport:
783
+ if not os.path.exists(self.flow_file):
784
+ self.report.error(self.flow_file, '', 'flow file not found')
785
+ return self.report
786
+ try:
787
+ with open(self.flow_file, 'r', encoding='utf-8') as f:
788
+ data = json.load(f)
789
+ except json.JSONDecodeError as e:
790
+ self.report.error(
791
+ self.flow_file, '',
792
+ f'JSON parse error: {e.msg} (line {e.lineno}, col {e.colno})')
793
+ return self.report
794
+ except OSError as e:
795
+ self.report.error(self.flow_file, '', f'cannot read file: {e}')
796
+ return self.report
797
+ if not isinstance(data, dict):
798
+ self.report.error(
799
+ self.flow_file, '',
800
+ f'top-level must be an object, got {type(data).__name__}')
801
+ return self.report
802
+
803
+ for group_key, group in data.items():
804
+ if not isinstance(group, dict):
805
+ self.report.error(
806
+ self.flow_file, group_key,
807
+ f"group must be an object, got {type(group).__name__}")
808
+ continue
809
+ self._validate_group(group_key, group)
810
+ return self.report
811
+
812
+ # ---- group / Flow / stages ----------------------------------------- #
813
+ def _find_flow_lookalike(self, group: Dict[str, Any]) -> Optional[str]:
814
+ """Return a key in `group` that looks like a misspelled 'Flow'.
815
+
816
+ Match if either the lowercased form equals 'flow' (catches case
817
+ variants and small letter-order typos like 'flwo') or difflib's
818
+ ratio is high enough (catches inserted/dropped chars like 'F1low').
819
+ """
820
+ for k in group.keys():
821
+ if not isinstance(k, str) or k == 'Flow' or k in IGNORED_KEYWORDS:
822
+ continue
823
+ kl = k.lower()
824
+ if kl == 'flow' or difflib.get_close_matches(
825
+ kl, ['flow'], n=1, cutoff=0.6):
826
+ return k
827
+ return None
828
+
829
+ def _validate_group(self, group_key: str, group: Dict[str, Any]) -> None:
830
+ # A2: Flow shape. flow_present marks whether we have a trustworthy
831
+ # Flow list to compare stages against; if not, suppress the
832
+ # "defined but not listed in Flow" warning entirely (it would
833
+ # otherwise fire for every stage and add noise on top of the real
834
+ # error).
835
+ flow_val = group.get('Flow')
836
+ flow_list: List[str]
837
+ flow_present: bool
838
+ if flow_val is None:
839
+ # Look for a near-miss key (e.g. 'F1low', 'flow', 'Flwo') so the
840
+ # user gets one actionable error instead of two unrelated ones.
841
+ misspelled = self._find_flow_lookalike(group)
842
+ if misspelled is not None:
843
+ self.report.error(
844
+ self.flow_file, f'{group_key} -> {misspelled}',
845
+ f"unknown key '{misspelled}'", "Did you mean 'Flow'?")
846
+ else:
847
+ self.report.error(self.flow_file, group_key,
848
+ "group is missing 'Flow' list")
849
+ flow_list = []
850
+ flow_present = False
851
+ elif not isinstance(flow_val, list):
852
+ self.report.error(
853
+ self.flow_file, f'{group_key} -> Flow',
854
+ f"Flow must be a list, got {type(flow_val).__name__}")
855
+ flow_list = []
856
+ flow_present = False
857
+ else:
858
+ flow_list = [s for s in flow_val if isinstance(s, str)]
859
+ flow_present = True
860
+
861
+ # Stage candidates: every key that is not a group-reserved key,
862
+ # excluding any 'Flow' look-alike already reported above (so it
863
+ # does not get re-flagged as a malformed stage).
864
+ misspelled_flow = None
865
+ if not flow_present and flow_val is None:
866
+ misspelled_flow = self._find_flow_lookalike(group)
867
+ stage_keys = [k for k in group.keys()
868
+ if k not in GROUP_RESERVED and k != misspelled_flow]
869
+
870
+ # L5 - part 1: every stage in Flow must be defined.
871
+ for stage_name in flow_list:
872
+ if stage_name not in stage_keys:
873
+ self.report.error(
874
+ self.flow_file,
875
+ f'{group_key} -> Flow',
876
+ f"Flow references stage '{stage_name}' but no such "
877
+ f"stage block is defined")
878
+
879
+ # L5 - part 2: every defined stage should appear in Flow (warning).
880
+ # Skip when Flow is missing/malformed - the comparison baseline is
881
+ # untrustworthy and would only generate noise.
882
+ if flow_present:
883
+ flow_set = set(flow_list)
884
+ for stage_name in stage_keys:
885
+ if stage_name not in flow_set:
886
+ self.report.warning(
887
+ self.flow_file,
888
+ f'{group_key} -> {stage_name}',
889
+ f"stage '{stage_name}' is defined but not listed in Flow")
890
+
891
+ for stage_name in stage_keys:
892
+ stage = group[stage_name]
893
+ if not isinstance(stage, dict):
894
+ self.report.error(
895
+ self.flow_file,
896
+ f'{group_key} -> {stage_name}',
897
+ f"stage must be an object, got {type(stage).__name__}")
898
+ continue
899
+ self._validate_stage(group_key, stage_name, stage)
900
+
901
+ def _validate_stage(self, group_key: str, stage_name: str,
902
+ stage: Dict[str, Any]) -> None:
903
+ path = f'{group_key} -> {stage_name}'
904
+
905
+ # A: structural keyword spelling. Track which STAGE_KEYWORDS were
906
+ # misspelled so we can suppress redundant "missing 'type'/'items'"
907
+ # errors below - if 'typ1e' is already flagged as a typo for 'type',
908
+ # also reporting "missing 'type'" only adds noise.
909
+ misspelled_targets: set = set()
910
+ vocab = STAGE_KEYWORDS | IGNORED_KEYWORDS
911
+ vocab_list = list(vocab)
912
+ for k in stage.keys():
913
+ if not isinstance(k, str):
914
+ continue
915
+ if k in STAGE_KEYWORDS or k in IGNORED_KEYWORDS:
916
+ continue
917
+ matches = difflib.get_close_matches(k, vocab_list, n=1, cutoff=0.6)
918
+ suggestion = (f"Did you mean '{matches[0]}'?"
919
+ if matches else None)
920
+ if matches and matches[0] in STAGE_KEYWORDS:
921
+ misspelled_targets.add(matches[0])
922
+ self.report.error(
923
+ self.flow_file,
924
+ f'{path} -> {k}',
925
+ f"unknown keyword '{k}' in stage block",
926
+ suggestion,
927
+ )
928
+
929
+ # L1: type must name a known struct cfg_t.
930
+ type_name = stage.get('type')
931
+ if type_name is None:
932
+ if 'type' not in misspelled_targets:
933
+ self.report.error(self.flow_file, path,
934
+ "stage is missing 'type'")
935
+ return
936
+ if not isinstance(type_name, str):
937
+ self.report.error(
938
+ self.flow_file, f'{path} -> type',
939
+ f"'type' must be a string, got {type(type_name).__name__}")
940
+ return
941
+ if not self.registry.has(type_name):
942
+ known = self.registry.known_struct_names()
943
+ suggestion = _suggest_keyword(type_name, known) if known else None
944
+ self.report.error(
945
+ self.flow_file, f'{path} -> type',
946
+ f"unknown cfg type '{type_name}'", suggestion)
947
+ return
948
+ if not self.registry.is_struct(type_name):
949
+ self.report.error(
950
+ self.flow_file, f'{path} -> type',
951
+ f"stage cfg type '{type_name}' must be a struct, not "
952
+ f"an enum")
953
+ return
954
+
955
+ # L2~L4: walk the schema tree.
956
+ provided = stage.get('items')
957
+ if provided is None:
958
+ if 'items' not in misspelled_targets:
959
+ self.report.error(self.flow_file, path,
960
+ "stage is missing 'items'")
961
+ return
962
+ if not isinstance(provided, dict):
963
+ self.report.error(
964
+ self.flow_file, f'{path} -> items',
965
+ f"'items' must be an object, got {type(provided).__name__}")
966
+ return
967
+ root = self.registry.build(type_name)
968
+ self._check_struct_values(path, root, provided)
969
+
970
+ # ---- value-vs-schema recursion -------------------------------------- #
971
+ def _check_struct_values(self, path: str, struct: 'StructField',
972
+ provided: Dict[str, Any]) -> None:
973
+ children_by_name = {ch.name: ch for ch in struct.children}
974
+ for key, val in provided.items():
975
+ sub_path = f'{path} -> {key}'
976
+ child = children_by_name.get(key)
977
+ if child is None:
978
+ # L2
979
+ suggestion = _suggest_keyword(key, list(children_by_name))
980
+ self.report.error(
981
+ self.flow_file, sub_path,
982
+ f"unknown field '{key}' (cfg type {struct.c_type})",
983
+ suggestion,
984
+ )
985
+ continue
986
+ self._check_field_value(sub_path, child, val)
987
+
988
+ def _check_field_value(self, path: str, field_node: 'Field',
989
+ value: Any) -> None:
990
+ if isinstance(field_node, StructField):
991
+ if not isinstance(value, dict):
992
+ self.report.error(
993
+ self.flow_file, path,
994
+ f"expected object for struct field '{field_node.name}', "
995
+ f"got {type(value).__name__}")
996
+ return
997
+ self._check_struct_values(path, field_node, value)
998
+ return
999
+ if isinstance(field_node, ArrayField):
1000
+ # widget=file: array is represented as a single path string.
1001
+ if field_node.meta.get('widget') == 'file':
1002
+ if not isinstance(value, str):
1003
+ self.report.error(
1004
+ self.flow_file, path,
1005
+ f"expected file path string for '{field_node.name}', "
1006
+ f"got {type(value).__name__}")
1007
+ return
1008
+ if not isinstance(value, list):
1009
+ self.report.error(
1010
+ self.flow_file, path,
1011
+ f"expected list for array field '{field_node.name}' "
1012
+ f"(count={field_node.count}), got {type(value).__name__}")
1013
+ return
1014
+ if len(value) != field_node.count:
1015
+ self.report.warning(
1016
+ self.flow_file, path,
1017
+ f"array length {len(value)} does not match "
1018
+ f"declared count {field_node.count}")
1019
+ for i, elem in enumerate(value):
1020
+ self._check_field_value(f'{path}[{i}]',
1021
+ field_node.element, elem)
1022
+ return
1023
+ if isinstance(field_node, EnumField):
1024
+ # L4
1025
+ if not isinstance(value, str):
1026
+ self.report.error(
1027
+ self.flow_file, path,
1028
+ f"expected enumerator string for '{field_node.name}' "
1029
+ f"(enum {field_node.c_type}), got {type(value).__name__}")
1030
+ return
1031
+ if value not in field_node.items:
1032
+ suggestion = _suggest_keyword(value, list(field_node.items))
1033
+ self.report.error(
1034
+ self.flow_file, path,
1035
+ f"unknown enumerator '{value}' for enum "
1036
+ f"{field_node.c_type}", suggestion)
1037
+ return
1038
+ if isinstance(field_node, ScalarField):
1039
+ self._check_scalar(path, field_node, value)
1040
+
1041
+ def _check_scalar(self, path: str, field_node: 'ScalarField',
1042
+ value: Any) -> None:
1043
+ ui = field_node.ui_type
1044
+ # bool: accepts bool or 0/1 ints. No range check (only two values).
1045
+ if ui == 'bool':
1046
+ if isinstance(value, bool):
1047
+ return
1048
+ if isinstance(value, int) and value in (0, 1):
1049
+ return
1050
+ self.report.error(
1051
+ self.flow_file, path,
1052
+ f"expected bool for '{field_node.name}' "
1053
+ f"(c_type {field_node.c_type}), got "
1054
+ f"{type(value).__name__}")
1055
+ return
1056
+ # int: accepts bool (treated as 0/1), int, or float with no
1057
+ # fractional part. After type check passes, S3 range check runs.
1058
+ if ui == 'int':
1059
+ if isinstance(value, bool):
1060
+ self._check_range(path, field_node, int(value))
1061
+ return
1062
+ if isinstance(value, int):
1063
+ self._check_range(path, field_node, value)
1064
+ return
1065
+ if isinstance(value, float) and value.is_integer():
1066
+ self._check_range(path, field_node, int(value))
1067
+ return
1068
+ self.report.error(
1069
+ self.flow_file, path,
1070
+ f"expected integer for '{field_node.name}' "
1071
+ f"(c_type {field_node.c_type}), got "
1072
+ f"{type(value).__name__}"
1073
+ + (f" with fractional part {value!r}"
1074
+ if isinstance(value, float) else ''))
1075
+ return
1076
+ # float: accepts bool/int/float; S3 range check after type passes.
1077
+ if ui == 'float':
1078
+ if isinstance(value, bool):
1079
+ self._check_range(path, field_node, float(int(value)))
1080
+ return
1081
+ if isinstance(value, (int, float)):
1082
+ self._check_range(path, field_node, float(value))
1083
+ return
1084
+ self.report.error(
1085
+ self.flow_file, path,
1086
+ f"expected number for '{field_node.name}' "
1087
+ f"(c_type {field_node.c_type}), got "
1088
+ f"{type(value).__name__}")
1089
+ return
1090
+ # str / char[N]: accepts strings only.
1091
+ if ui == 'str':
1092
+ if isinstance(value, str):
1093
+ return
1094
+ self.report.error(
1095
+ self.flow_file, path,
1096
+ f"expected string for '{field_node.name}' "
1097
+ f"(c_type {field_node.c_type}), got "
1098
+ f"{type(value).__name__}")
1099
+ return
1100
+
1101
+ def _check_range(self, path: str, field_node: 'ScalarField',
1102
+ value) -> None:
1103
+ """S3: numeric value must fall in [min, max] when declared."""
1104
+ meta = field_node.meta
1105
+ mn = meta.get('min')
1106
+ mx = meta.get('max')
1107
+ mn_ok = isinstance(mn, (int, float)) and not isinstance(mn, bool)
1108
+ mx_ok = isinstance(mx, (int, float)) and not isinstance(mx, bool)
1109
+ if mn_ok and value < mn:
1110
+ self.report.error(
1111
+ self.flow_file, path,
1112
+ f"value {value} is below declared min {mn}")
1113
+ return
1114
+ if mx_ok and value > mx:
1115
+ self.report.error(
1116
+ self.flow_file, path,
1117
+ f"value {value} exceeds declared max {mx}")
1118
+ return