tablecodec 0.0.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tablecodec/__init__.py ADDED
@@ -0,0 +1,29 @@
1
+ """tablecodec — neutral IR + codec registry for image-based table datasets.
2
+
3
+ Public API (M1):
4
+
5
+ - :class:`tablecodec.ir.BBox`, :class:`GridCell`, :class:`TableSample`
6
+ - :func:`validate` and :data:`profiles`
7
+ - :class:`ValidationError`
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from tablecodec.ir import BBox, GridCell, TableSample
13
+ from tablecodec.loss import LossReport, analyze_loss
14
+ from tablecodec.validate import Profile, ValidationError, profiles, validate
15
+
16
+ __all__ = [
17
+ "BBox",
18
+ "GridCell",
19
+ "LossReport",
20
+ "Profile",
21
+ "TableSample",
22
+ "ValidationError",
23
+ "__version__",
24
+ "analyze_loss",
25
+ "profiles",
26
+ "validate",
27
+ ]
28
+
29
+ __version__: str = "0.0.18"
@@ -0,0 +1,311 @@
1
+ """SPEC §5.2 invariants I-01..I-07 as independent check functions.
2
+
3
+ Each ``check_iXX`` returns a list of :class:`ValidationError` describing every
4
+ violation it found (empty list = pass). Functions never raise on data; they
5
+ raise only on programmer error (e.g. wrong type passed in).
6
+
7
+ Functions are pure and read-only. They never mutate the input.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+
14
+ from tablecodec.ir import TableSample
15
+
16
+ __all__ = [
17
+ "ValidationError",
18
+ "check_i01_nrows_ncols_positive",
19
+ "check_i02_cell_in_bounds",
20
+ "check_i03_span_in_bounds",
21
+ "check_i04_grid_exact_cover",
22
+ "check_i05_bbox_well_formed",
23
+ "check_i06_header_contiguous_top",
24
+ "check_i07_tokens_is_tuple",
25
+ ]
26
+
27
+ # Cap how many gap coordinates I-04 enumerates in its error message,
28
+ # so a totally empty grid does not produce a multi-megabyte string.
29
+ _GAP_PREVIEW_LIMIT = 5
30
+
31
+
32
+ @dataclass(frozen=True, slots=True)
33
+ class ValidationError:
34
+ """A single invariant violation.
35
+
36
+ Attributes:
37
+ invariant: The SPEC §5.2 id (``"I-01"`` ... ``"I-07"``).
38
+ message: Human-readable description of the violation.
39
+ cell_index: Index into ``TableSample.cells`` if applicable, else
40
+ ``None`` (e.g. grid-level invariants like I-01, I-04).
41
+ """
42
+
43
+ invariant: str
44
+ message: str
45
+ cell_index: int | None = None
46
+
47
+
48
+ # ---------- I-01: nrows >= 1 and ncols >= 1 ----------
49
+
50
+
51
+ def check_i01_nrows_ncols_positive(sample: TableSample) -> list[ValidationError]:
52
+ errors: list[ValidationError] = []
53
+ if sample.nrows < 1:
54
+ errors.append(
55
+ ValidationError(invariant="I-01", message=f"nrows must be >= 1, got {sample.nrows}")
56
+ )
57
+ if sample.ncols < 1:
58
+ errors.append(
59
+ ValidationError(invariant="I-01", message=f"ncols must be >= 1, got {sample.ncols}")
60
+ )
61
+ return errors
62
+
63
+
64
+ # ---------- I-02: 0 <= row < nrows, 0 <= col < ncols ----------
65
+
66
+
67
+ def check_i02_cell_in_bounds(sample: TableSample) -> list[ValidationError]:
68
+ errors: list[ValidationError] = []
69
+ for idx, cell in enumerate(sample.cells):
70
+ if cell.row < 0 or cell.row >= sample.nrows:
71
+ errors.append(
72
+ ValidationError(
73
+ invariant="I-02",
74
+ message=(f"row {cell.row} out of [0, {sample.nrows}) at cell index {idx}"),
75
+ cell_index=idx,
76
+ )
77
+ )
78
+ if cell.col < 0 or cell.col >= sample.ncols:
79
+ errors.append(
80
+ ValidationError(
81
+ invariant="I-02",
82
+ message=(f"col {cell.col} out of [0, {sample.ncols}) at cell index {idx}"),
83
+ cell_index=idx,
84
+ )
85
+ )
86
+ return errors
87
+
88
+
89
+ # ---------- I-03: row + rowspan <= nrows, col + colspan <= ncols ----------
90
+
91
+
92
+ def check_i03_span_in_bounds(sample: TableSample) -> list[ValidationError]:
93
+ errors: list[ValidationError] = []
94
+ for idx, cell in enumerate(sample.cells):
95
+ # SPEC §5.1: rowspan/colspan must be >= 1.
96
+ if cell.rowspan < 1:
97
+ errors.append(
98
+ ValidationError(
99
+ invariant="I-03",
100
+ message=f"rowspan must be >= 1, got {cell.rowspan} at cell index {idx}",
101
+ cell_index=idx,
102
+ )
103
+ )
104
+ if cell.colspan < 1:
105
+ errors.append(
106
+ ValidationError(
107
+ invariant="I-03",
108
+ message=f"colspan must be >= 1, got {cell.colspan} at cell index {idx}",
109
+ cell_index=idx,
110
+ )
111
+ )
112
+ if cell.row + cell.rowspan > sample.nrows:
113
+ errors.append(
114
+ ValidationError(
115
+ invariant="I-03",
116
+ message=(
117
+ f"row+rowspan = {cell.row + cell.rowspan} exceeds nrows "
118
+ f"{sample.nrows} at cell index {idx}"
119
+ ),
120
+ cell_index=idx,
121
+ )
122
+ )
123
+ if cell.col + cell.colspan > sample.ncols:
124
+ errors.append(
125
+ ValidationError(
126
+ invariant="I-03",
127
+ message=(
128
+ f"col+colspan = {cell.col + cell.colspan} exceeds ncols "
129
+ f"{sample.ncols} at cell index {idx}"
130
+ ),
131
+ cell_index=idx,
132
+ )
133
+ )
134
+ return errors
135
+
136
+
137
+ # ---------- I-04: union of cell footprints exactly covers the grid ----------
138
+
139
+
140
+ def check_i04_grid_exact_cover(sample: TableSample) -> list[ValidationError]:
141
+ """Check the cell footprints exactly cover the ``nrows × ncols`` grid.
142
+
143
+ Implementation: 2D occupancy bitmap. For every cell, iterate over its
144
+ footprint and increment the count at each (row, col). Overlap = any
145
+ count > 1; gap = any count == 0 inside the grid.
146
+
147
+ Out-of-grid cell coordinates (caught by I-02/I-03) are skipped here so
148
+ this check never raises; under-coverage of the in-grid cells is still
149
+ reported, which is the right user-visible outcome.
150
+
151
+ Complexity: O(N) where N = sum of all footprint areas.
152
+ """
153
+ errors: list[ValidationError] = []
154
+
155
+ # I-01 must hold for the grid to make sense.
156
+ if sample.nrows < 1 or sample.ncols < 1:
157
+ # I-01 already reports this; do not double-report under I-04.
158
+ return errors
159
+
160
+ occupancy = [[0] * sample.ncols for _ in range(sample.nrows)]
161
+
162
+ for idx, cell in enumerate(sample.cells):
163
+ # Defensive clipping: stay within bounds even if I-02/I-03 violated.
164
+ r0 = max(0, cell.row)
165
+ c0 = max(0, cell.col)
166
+ r1 = min(sample.nrows, cell.row + max(1, cell.rowspan))
167
+ c1 = min(sample.ncols, cell.col + max(1, cell.colspan))
168
+
169
+ for r in range(r0, r1):
170
+ row = occupancy[r]
171
+ for c in range(c0, c1):
172
+ row[c] += 1
173
+ if row[c] > 1:
174
+ errors.append(
175
+ ValidationError(
176
+ invariant="I-04",
177
+ message=(
178
+ f"overlap at (row={r}, col={c}); cell index {idx} "
179
+ f"overlaps a previously placed cell"
180
+ ),
181
+ cell_index=idx,
182
+ )
183
+ )
184
+
185
+ gaps: list[tuple[int, int]] = [
186
+ (r, c) for r, row in enumerate(occupancy) for c, count in enumerate(row) if count == 0
187
+ ]
188
+ if gaps:
189
+ # Report a single I-04 error with the first few coordinates to
190
+ # keep error volume bounded for pathological cases.
191
+ preview = gaps[:_GAP_PREVIEW_LIMIT]
192
+ extra = len(gaps) - _GAP_PREVIEW_LIMIT
193
+ suffix = f" (+{extra} more)" if extra > 0 else ""
194
+ errors.append(
195
+ ValidationError(
196
+ invariant="I-04",
197
+ message=f"gap(s) in grid coverage at {preview}{suffix}",
198
+ )
199
+ )
200
+
201
+ return errors
202
+
203
+
204
+ # ---------- I-05: bbox well-formed when set ----------
205
+
206
+
207
+ def _is_content_empty(tokens: tuple[str, ...]) -> bool:
208
+ """Whether a cell's tokens localize no content.
209
+
210
+ A cell is content-empty when its tokens, concatenated, contain no
211
+ non-whitespace character (`tokens == ()`, `("",)`, `(" ",)`, ...).
212
+ Markup-bearing tokens (e.g. `("<sup>",)`) are NOT empty: the core IR
213
+ does not model HTML, so it treats any non-whitespace token as content
214
+ (spec §5.2, ADR 0010).
215
+ """
216
+ return "".join(tokens).strip() == ""
217
+
218
+
219
+ def check_i05_bbox_well_formed(sample: TableSample) -> list[ValidationError]:
220
+ errors: list[ValidationError] = []
221
+ for idx, cell in enumerate(sample.cells):
222
+ bbox = cell.bbox
223
+ if bbox is None:
224
+ continue
225
+ if _is_content_empty(cell.tokens):
226
+ # I-05 guards a box that *localizes content*. An empty cell
227
+ # localizes nothing and datasets routinely give it a zero-area
228
+ # placeholder box, so its geometry is out of scope (spec §5.2,
229
+ # ADR 0010, refining ADR 0007). The bbox itself is still kept
230
+ # on the IR.
231
+ continue
232
+ x0, y0, x1, y1 = bbox
233
+ if x0 >= x1:
234
+ errors.append(
235
+ ValidationError(
236
+ invariant="I-05",
237
+ message=(f"bbox x0 >= x1 ({x0} >= {x1}) at cell index {idx}"),
238
+ cell_index=idx,
239
+ )
240
+ )
241
+ if y0 >= y1:
242
+ errors.append(
243
+ ValidationError(
244
+ invariant="I-05",
245
+ message=(f"bbox y0 >= y1 ({y0} >= {y1}) at cell index {idx}"),
246
+ cell_index=idx,
247
+ )
248
+ )
249
+ return errors
250
+
251
+
252
+ # ---------- I-06: header cells form a contiguous top region ----------
253
+
254
+
255
+ def check_i06_header_contiguous_top(sample: TableSample) -> list[ValidationError]:
256
+ """Check headers form a contiguous top-region of the grid.
257
+
258
+ Reads "contiguous top region" as: there exists an integer
259
+ ``H in [0, nrows]`` such that every cell anchored at ``row < H`` is a
260
+ header and every cell anchored at ``row >= H`` is a body cell. H is the
261
+ smallest row at which any body cell appears.
262
+ """
263
+ errors: list[ValidationError] = []
264
+
265
+ body_rows = [c.row for c in sample.cells if c.role == "body"]
266
+ header_rows = [c.row for c in sample.cells if c.role == "header"]
267
+ if not header_rows:
268
+ return errors
269
+
270
+ if not body_rows:
271
+ # All headers — fine; the header region spans the whole grid.
272
+ return errors
273
+
274
+ first_body_row = min(body_rows)
275
+
276
+ for idx, cell in enumerate(sample.cells):
277
+ if cell.role == "header" and cell.row >= first_body_row:
278
+ errors.append(
279
+ ValidationError(
280
+ invariant="I-06",
281
+ message=(
282
+ f"header cell at row {cell.row} (cell index {idx}) appears "
283
+ f"at or below the first body row {first_body_row}"
284
+ ),
285
+ cell_index=idx,
286
+ )
287
+ )
288
+ return errors
289
+
290
+
291
+ # ---------- I-07: tokens is a tuple (never None) ----------
292
+
293
+
294
+ def check_i07_tokens_is_tuple(sample: TableSample) -> list[ValidationError]:
295
+ errors: list[ValidationError] = []
296
+ for idx, cell in enumerate(sample.cells):
297
+ # Runtime defense: callers can bypass the static type with
298
+ # ``object.__setattr__`` and inject None / list. SPEC §5.2 I-07
299
+ # requires this be reported, so the runtime check is intentional.
300
+ if not isinstance(cell.tokens, tuple): # pyright: ignore[reportUnnecessaryIsInstance]
301
+ errors.append(
302
+ ValidationError(
303
+ invariant="I-07",
304
+ message=(
305
+ f"tokens must be a tuple (possibly empty) at cell index "
306
+ f"{idx}, got {type(cell.tokens).__name__}"
307
+ ),
308
+ cell_index=idx,
309
+ )
310
+ )
311
+ return errors
tablecodec/cli.py ADDED
@@ -0,0 +1,314 @@
1
+ """Click-based command line interface (SPEC §12).
2
+
3
+ Optional: requires the ``[cli]`` extra (``pip install "tablecodec[cli]"``).
4
+ Importing this module without click installed will fail with a clear
5
+ ``ImportError`` — by design, since the rest of the package must run
6
+ without click.
7
+
8
+ Subcommands implemented for M6:
9
+
10
+ - ``validate`` — run a profile against every record in a file.
11
+ - ``convert`` — re-encode a file from one codec to another.
12
+ - ``stats`` — print sample / cell / span counts.
13
+ - ``diff`` — record-by-record diff of two same-codec files.
14
+ - ``analyze-loss`` — static loss report for a codec pair.
15
+ - ``codecs list`` — list registered codec names.
16
+
17
+ All commands stream their input. Exit codes:
18
+
19
+ - ``0`` success / no findings.
20
+ - ``1`` validation failures, diff mismatches, or recoverable errors.
21
+ - ``2`` argument / usage error (click default).
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import dataclasses
27
+ import json
28
+ import sys
29
+ from collections.abc import Iterable, Iterator
30
+ from pathlib import Path
31
+ from typing import Any
32
+
33
+ import click
34
+
35
+ from tablecodec import codecs
36
+ from tablecodec import io as tio
37
+ from tablecodec.codecs._base import Codec
38
+ from tablecodec.codecs.builtins import BUILTIN_CODECS
39
+ from tablecodec.ir import TableSample
40
+ from tablecodec.loss import analyze_loss
41
+ from tablecodec.validate import Profile, profiles, validate
42
+
43
+ _PROFILE_NAMES = ["LENIENT", "DEFAULT", "PUBTABNET_2_0", "TABLEFORMER", "STRICT"]
44
+
45
+
46
+ def _ensure_builtins_registered() -> None:
47
+ existing = set(codecs.list_codecs())
48
+ for codec in BUILTIN_CODECS:
49
+ if codec.name not in existing:
50
+ codecs.register(codec)
51
+ # SPEC §6.2: also pick up third-party codecs that self-register via the
52
+ # `tablecodec.codecs` entry-point group (idempotent / no-op if none).
53
+ codecs.load_plugins()
54
+
55
+
56
+ def _resolve_codec_name(name: str) -> Codec:
57
+ try:
58
+ return codecs.get(name)
59
+ except KeyError as exc:
60
+ msg = f"unknown codec {name!r}. Run `tablecodec codecs list` to see registered names."
61
+ raise click.UsageError(msg) from exc
62
+
63
+
64
+ def _resolve_profile(name: str) -> Profile:
65
+ upper = name.upper()
66
+ if upper not in _PROFILE_NAMES:
67
+ msg = f"unknown profile {name!r}. Available: {', '.join(_PROFILE_NAMES)}."
68
+ raise click.UsageError(msg)
69
+ profile: Profile = getattr(profiles, upper)
70
+ return profile
71
+
72
+
73
+ @click.group()
74
+ @click.version_option(package_name="tablecodec")
75
+ def main() -> None:
76
+ """tablecodec command-line interface."""
77
+ _ensure_builtins_registered()
78
+
79
+
80
+ # ---------- validate ----------
81
+
82
+
83
+ @main.command("validate")
84
+ @click.argument("source", type=click.Path(exists=True, dir_okay=False, path_type=Path))
85
+ @click.option(
86
+ "--profile",
87
+ "profile_name",
88
+ default="DEFAULT",
89
+ show_default=True,
90
+ help="Validation profile (see SPEC §8).",
91
+ )
92
+ @click.option(
93
+ "--codec",
94
+ "codec_name",
95
+ default=None,
96
+ help="Codec name; if omitted, auto-detect from the file.",
97
+ )
98
+ @click.option("--json", "as_json", is_flag=True, help="Emit machine-readable JSON.")
99
+ def validate_cmd(source: Path, profile_name: str, codec_name: str | None, as_json: bool) -> None:
100
+ """Validate every record in SOURCE against the chosen profile."""
101
+ profile = _resolve_profile(profile_name)
102
+ codec = _resolve_codec_name(codec_name) if codec_name else None
103
+ findings: list[dict[str, Any]] = []
104
+ sample_count = 0
105
+ for sample_index, sample in enumerate(tio.open(source, codec=codec)):
106
+ sample_count += 1
107
+ for err in validate(sample, profile=profile):
108
+ findings.append(
109
+ {
110
+ "record": sample_index,
111
+ "filename": sample.filename,
112
+ "invariant": err.invariant,
113
+ "message": err.message,
114
+ "cell_index": err.cell_index,
115
+ }
116
+ )
117
+ if as_json:
118
+ click.echo(
119
+ json.dumps(
120
+ {
121
+ "source": str(source),
122
+ "profile": profile.name,
123
+ "sample_count": sample_count,
124
+ "findings": findings,
125
+ },
126
+ ensure_ascii=False,
127
+ )
128
+ )
129
+ else:
130
+ for finding in findings:
131
+ click.echo(f"record={finding['record']} {finding['invariant']}: {finding['message']}")
132
+ click.echo(f"checked {sample_count} record(s), {len(findings)} finding(s)")
133
+ if findings:
134
+ sys.exit(1)
135
+
136
+
137
+ # ---------- convert ----------
138
+
139
+
140
+ @main.command("convert")
141
+ @click.argument("input_path", type=click.Path(exists=True, dir_okay=False, path_type=Path))
142
+ @click.argument("output_path", type=click.Path(dir_okay=False, path_type=Path))
143
+ @click.option("--from", "from_codec", required=True, help="Source codec name.")
144
+ @click.option("--to", "to_codec", required=True, help="Target codec name.")
145
+ @click.option(
146
+ "--dry-run",
147
+ is_flag=True,
148
+ help="Do not read the input; print the static analyze_loss report and exit.",
149
+ )
150
+ def convert_cmd(
151
+ input_path: Path,
152
+ output_path: Path,
153
+ from_codec: str,
154
+ to_codec: str,
155
+ dry_run: bool,
156
+ ) -> None:
157
+ """Re-encode INPUT_PATH from --from to --to, writing OUTPUT_PATH."""
158
+ src = _resolve_codec_name(from_codec)
159
+ tgt = _resolve_codec_name(to_codec)
160
+ if dry_run:
161
+ report = analyze_loss(source=from_codec, target=to_codec)
162
+ click.echo(json.dumps(dataclasses.asdict(_serialize_report(report))))
163
+ return
164
+ written = _stream_convert(input_path, output_path, src, tgt)
165
+ click.echo(f"wrote {written} record(s) to {output_path}")
166
+
167
+
168
+ def _stream_convert(input_path: Path, output_path: Path, src: Codec, tgt: Codec) -> int:
169
+ written = 0
170
+
171
+ def _consume() -> Iterator[TableSample]:
172
+ nonlocal written
173
+ with input_path.open(encoding="utf-8") as handle:
174
+ for sample in src.read(handle):
175
+ written += 1
176
+ yield sample
177
+
178
+ with output_path.open("w", encoding="utf-8") as sink:
179
+ tgt.write(_consume(), sink)
180
+ return written
181
+
182
+
183
+ @dataclasses.dataclass(frozen=True, slots=True)
184
+ class _SerializableReport:
185
+ source: str
186
+ target: str
187
+ source_fields_dropped_on_read: list[str]
188
+ ir_fields_unrepresentable_in_target: list[str]
189
+ round_trip_classification: str
190
+
191
+
192
+ def _serialize_report(report: Any) -> _SerializableReport:
193
+ return _SerializableReport(
194
+ source=report.source,
195
+ target=report.target,
196
+ source_fields_dropped_on_read=sorted(report.source_fields_dropped_on_read),
197
+ ir_fields_unrepresentable_in_target=sorted(report.ir_fields_unrepresentable_in_target),
198
+ round_trip_classification=report.round_trip_classification,
199
+ )
200
+
201
+
202
+ # ---------- stats ----------
203
+
204
+
205
+ @main.command("stats")
206
+ @click.argument("source", type=click.Path(exists=True, dir_okay=False, path_type=Path))
207
+ @click.option(
208
+ "--codec",
209
+ "codec_name",
210
+ default=None,
211
+ help="Codec name; if omitted, auto-detect.",
212
+ )
213
+ @click.option("--json", "as_json", is_flag=True)
214
+ def stats_cmd(source: Path, codec_name: str | None, as_json: bool) -> None:
215
+ """Print sample / cell / spanned-cell counts for SOURCE."""
216
+ codec = _resolve_codec_name(codec_name) if codec_name else None
217
+ sample_count = 0
218
+ cell_count = 0
219
+ spanned_count = 0
220
+ for sample in tio.open(source, codec=codec):
221
+ sample_count += 1
222
+ cell_count += len(sample.cells)
223
+ spanned_count += sum(1 for c in sample.cells if c.rowspan != 1 or c.colspan != 1)
224
+ payload = {
225
+ "source": str(source),
226
+ "samples": sample_count,
227
+ "cells": cell_count,
228
+ "spanned_cells": spanned_count,
229
+ }
230
+ if as_json:
231
+ click.echo(json.dumps(payload))
232
+ else:
233
+ click.echo(f"samples: {sample_count}")
234
+ click.echo(f"cells: {cell_count}")
235
+ click.echo(f"spanned cells: {spanned_count}")
236
+
237
+
238
+ # ---------- diff ----------
239
+
240
+
241
+ @main.command("diff")
242
+ @click.argument("a_path", type=click.Path(exists=True, dir_okay=False, path_type=Path))
243
+ @click.argument("b_path", type=click.Path(exists=True, dir_okay=False, path_type=Path))
244
+ @click.option("--codec", "codec_name", default=None)
245
+ def diff_cmd(a_path: Path, b_path: Path, codec_name: str | None) -> None:
246
+ """Record-by-record diff of A_PATH and B_PATH (same codec on both sides)."""
247
+ codec = _resolve_codec_name(codec_name) if codec_name else None
248
+ a_iter = tio.open(a_path, codec=codec)
249
+ b_iter = tio.open(b_path, codec=codec)
250
+ diffs = list(_iter_diffs(a_iter, b_iter))
251
+ for index, side, sample in diffs:
252
+ if side == "both":
253
+ click.echo(f"differ @ record {index}: {sample}")
254
+ elif side == "left-only":
255
+ click.echo(f"only in A @ record {index}: filename={sample}")
256
+ else:
257
+ click.echo(f"only in B @ record {index}: filename={sample}")
258
+ click.echo(f"{len(diffs)} difference(s)")
259
+ if diffs:
260
+ sys.exit(1)
261
+
262
+
263
+ def _iter_diffs(
264
+ a: Iterable[TableSample], b: Iterable[TableSample]
265
+ ) -> Iterator[tuple[int, str, str]]:
266
+ a_it = iter(a)
267
+ b_it = iter(b)
268
+ index = 0
269
+ while True:
270
+ a_sample = next(a_it, None)
271
+ b_sample = next(b_it, None)
272
+ if a_sample is None and b_sample is None:
273
+ return
274
+ if a_sample is None:
275
+ assert b_sample is not None # narrows for pyright
276
+ yield index, "right-only", b_sample.filename
277
+ elif b_sample is None:
278
+ yield index, "left-only", a_sample.filename
279
+ elif a_sample != b_sample:
280
+ yield index, "both", f"{a_sample.filename} != {b_sample.filename}"
281
+ index += 1
282
+
283
+
284
+ # ---------- analyze-loss ----------
285
+
286
+
287
+ @main.command("analyze-loss")
288
+ @click.option("--from", "from_codec", required=True)
289
+ @click.option("--to", "to_codec", required=True)
290
+ def analyze_loss_cmd(from_codec: str, to_codec: str) -> None:
291
+ """Static loss report for the FROM -> TO codec pair."""
292
+ _resolve_codec_name(from_codec)
293
+ _resolve_codec_name(to_codec)
294
+ report = analyze_loss(source=from_codec, target=to_codec)
295
+ click.echo(json.dumps(dataclasses.asdict(_serialize_report(report))))
296
+
297
+
298
+ # ---------- codecs list ----------
299
+
300
+
301
+ @main.group("codecs")
302
+ def codecs_group() -> None:
303
+ """Inspect the in-process codec registry."""
304
+
305
+
306
+ @codecs_group.command("list")
307
+ def codecs_list_cmd() -> None:
308
+ """List registered codec names."""
309
+ for name in codecs.list_codecs():
310
+ click.echo(name)
311
+
312
+
313
+ if __name__ == "__main__": # pragma: no cover
314
+ main()