trace-digitiser 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. trace_digitiser-0.1.0/PKG-INFO +176 -0
  2. trace_digitiser-0.1.0/README.md +157 -0
  3. trace_digitiser-0.1.0/pyproject.toml +42 -0
  4. trace_digitiser-0.1.0/setup.cfg +4 -0
  5. trace_digitiser-0.1.0/src/trace_digitiser/__init__.py +222 -0
  6. trace_digitiser-0.1.0/src/trace_digitiser/calibration.py +283 -0
  7. trace_digitiser-0.1.0/src/trace_digitiser/cli.py +123 -0
  8. trace_digitiser-0.1.0/src/trace_digitiser/diagnostics.py +144 -0
  9. trace_digitiser-0.1.0/src/trace_digitiser/digitise.py +85 -0
  10. trace_digitiser-0.1.0/src/trace_digitiser/geometry.py +61 -0
  11. trace_digitiser-0.1.0/src/trace_digitiser/io.py +74 -0
  12. trace_digitiser-0.1.0/src/trace_digitiser/line_detection.py +182 -0
  13. trace_digitiser-0.1.0/src/trace_digitiser/models.py +148 -0
  14. trace_digitiser-0.1.0/src/trace_digitiser/ocr.py +240 -0
  15. trace_digitiser-0.1.0/src/trace_digitiser/panel_detection.py +816 -0
  16. trace_digitiser-0.1.0/src/trace_digitiser/summarise.py +68 -0
  17. trace_digitiser-0.1.0/src/trace_digitiser/synthetic.py +206 -0
  18. trace_digitiser-0.1.0/src/trace_digitiser/trace_detection.py +337 -0
  19. trace_digitiser-0.1.0/src/trace_digitiser/x_calibration.py +228 -0
  20. trace_digitiser-0.1.0/src/trace_digitiser.egg-info/PKG-INFO +176 -0
  21. trace_digitiser-0.1.0/src/trace_digitiser.egg-info/SOURCES.txt +27 -0
  22. trace_digitiser-0.1.0/src/trace_digitiser.egg-info/dependency_links.txt +1 -0
  23. trace_digitiser-0.1.0/src/trace_digitiser.egg-info/entry_points.txt +2 -0
  24. trace_digitiser-0.1.0/src/trace_digitiser.egg-info/requires.txt +11 -0
  25. trace_digitiser-0.1.0/src/trace_digitiser.egg-info/top_level.txt +1 -0
  26. trace_digitiser-0.1.0/tests/test_geometry.py +45 -0
  27. trace_digitiser-0.1.0/tests/test_integration.py +250 -0
  28. trace_digitiser-0.1.0/tests/test_panel_detection.py +74 -0
  29. trace_digitiser-0.1.0/tests/test_trace_detection.py +35 -0
@@ -0,0 +1,176 @@
1
+ Metadata-Version: 2.4
2
+ Name: trace-digitiser
3
+ Version: 0.1.0
4
+ Summary: Template-free computer-vision digitisation pipeline for raster scientific line plots.
5
+ Author: Trace Digitiser Contributors
6
+ License: MIT
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: opencv-python-headless>=4.8
10
+ Requires-Dist: pytesseract>=0.3.10
11
+ Requires-Dist: Pillow>=10.0
12
+ Requires-Dist: pandas>=2.0
13
+ Requires-Dist: numpy>=1.24
14
+ Requires-Dist: matplotlib>=3.7
15
+ Provides-Extra: dev
16
+ Requires-Dist: pytest>=7.4; extra == "dev"
17
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
18
+ Requires-Dist: ruff>=0.4; extra == "dev"
19
+
20
+ # trace-digitiser
21
+
22
+ Template-free computer-vision digitisation pipeline for raster scientific line plots.
23
+
24
+ Given a raster image of a scientific figure (PNG, JPEG, etc.), this tool detects plot panels, reads y-axis tick labels via OCR, segments coloured traces, and exports digitised data as CSV files — all without requiring manual calibration points, known trace colours, or panel coordinates. The user supplies only a high-level layout hint (e.g. "stacked", "horizontal", "grid").
25
+
26
+ ## Installation
27
+
28
+ Requires Python 3.10+ and [Tesseract OCR](https://github.com/tesseract-ocr/tesseract).
29
+
30
+ ```bash
31
+ # Install Tesseract (Ubuntu/Debian)
32
+ sudo apt-get install tesseract-ocr
33
+
34
+ # Install Tesseract (macOS)
35
+ brew install tesseract
36
+
37
+ # Install the package
38
+ pip install -e .
39
+
40
+ # With development dependencies
41
+ pip install -e ".[dev]"
42
+ ```
43
+
44
+ ## Quick start
45
+
46
+ ### Python API
47
+
48
+ ```python
49
+ from trace_digitiser import digitise
50
+
51
+ result = digitise(
52
+ "figure.jpg",
53
+ layout_mode="stacked",
54
+ expected_rows=2,
55
+ expected_cols=1,
56
+ output_dir="outputs",
57
+ )
58
+
59
+ # Digitised traces as a DataFrame
60
+ print(result.trace_data.head())
61
+
62
+ # Panel metadata
63
+ for panel in result.panels:
64
+ print(f"Panel {panel.panel_id}: {panel.width}×{panel.height}px, "
65
+ f"calibration={panel.calibration.scale_type}")
66
+ ```
67
+
68
+ ### Command line
69
+
70
+ ```bash
71
+ # Single image
72
+ trace-digitiser figure.jpg --layout stacked --rows 2 --cols 1 -o outputs/
73
+
74
+ # Batch processing
75
+ trace-digitiser figures/*.jpg --layout auto -o results/
76
+
77
+ # Generate synthetic test figures
78
+ trace-digitiser --generate-test-figures -o test_figures/
79
+ ```
80
+
81
+ ### Layout modes
82
+
83
+ | Mode | Use case | Example |
84
+ |------|----------|---------|
85
+ | `single` | One plot panel | `--layout single` |
86
+ | `stacked` | Vertically stacked panels | `--layout stacked --rows 2 --cols 1` |
87
+ | `horizontal` | Side-by-side panels | `--layout horizontal --rows 1 --cols 3` |
88
+ | `grid` | Row×column subplot grid | `--layout grid --rows 2 --cols 2` |
89
+ | `auto` | Unconstrained detection | `--layout auto` |
90
+
91
+ ## Output files
92
+
93
+ For input `figure.jpg` with default settings:
94
+
95
+ | File | Description |
96
+ |------|-------------|
97
+ | `figure_automated_digitised_trace_by_column.csv` | One row per x-pixel column per trace, with `center_estimate`, `upper_envelope`, `lower_envelope` |
98
+ | `figure_automated_panel_metadata.csv` | Panel coordinates, calibration parameters, detected x-labels |
99
+ | `figure_automated_digitised_summary_by_label.csv` | Summary statistics per detected x-label interval (written only if labels are found) |
100
+
101
+ ## Project structure
102
+
103
+ ```
104
+ trace_digitiser/
105
+ ├── pyproject.toml
106
+ ├── src/trace_digitiser/
107
+ │ ├── __init__.py # Public API: digitise()
108
+ │ ├── models.py # Dataclasses: Panel, Calibration, Trace, DigitiserResult
109
+ │ ├── io.py # Image loading, CSV output
110
+ │ ├── geometry.py # Box area/IoU/containment helpers
111
+ │ ├── line_detection.py # Horizontal and vertical line detectors
112
+ │ ├── panel_detection.py # Candidate generation, deduplication, layout selection
113
+ │ ├── calibration.py # Y-axis OCR calibration (linear + log) and cross-row propagation
114
+ │ ├── x_calibration.py # X-axis OCR calibration (numeric ticks)
115
+ │ ├── ocr.py # Tesseract OCR wrappers for tick and x-label reading
116
+ │ ├── trace_detection.py # HSV/CIELAB colour segmentation + achromatic trace detection
117
+ │ ├── digitise.py # Column-by-column trace digitisation
118
+ │ ├── summarise.py # Interval-label summarisation
119
+ │ ├── diagnostics.py # Debug overlays and diagnostic file output
120
+ │ ├── synthetic.py # Synthetic test-figure generation
121
+ │ └── cli.py # Command-line interface
122
+ └── tests/
123
+ ├── conftest.py # Shared fixtures (synthetic images)
124
+ ├── test_geometry.py
125
+ ├── test_panel_detection.py
126
+ ├── test_trace_detection.py
127
+ └── test_integration.py # Panel count, calibration quality, trace RMSE
128
+ ```
129
+
130
+ ## Development
131
+
132
+ ```bash
133
+ # Run tests
134
+ pytest
135
+
136
+ # Run tests with coverage
137
+ pytest --cov=trace_digitiser
138
+
139
+ # Lint
140
+ ruff check src/ tests/
141
+ ```
142
+
143
+ ## Limitations
144
+
145
+ This tool digitises visible pixels from raster images. It does not recover original raw data. Key limitations:
146
+
147
+ - **Linear and log y-axes supported** — broken, dual, and other nonlinear axes are not supported yet.
148
+ - **X-axis calibration is best-effort** — numeric x-ticks are OCR'd when possible; otherwise x-values are normalised 0–1 or labelled by interval.
149
+ - **Black/grey traces partially supported** — achromatic trace detection is available but works best when traces have enough contrast against the background.
150
+ - **Requires visible structure** — axes, borders, or gridlines must be present for panel detection.
151
+ - **OCR fragility** — small, rotated, or low-contrast tick labels may fail.
152
+
153
+ ## Changelog
154
+
155
+ ### v0.1.0
156
+
157
+ **Refactored from notebook to installable package** with 15 modules, dataclasses, CLI, and test suite.
158
+
159
+ **Panel detection improvements:**
160
+ - Hint-aware `split_y_clusters` — uses the user's `expected_rows` to split evenly-spaced gridlines at the largest gaps, even when the inter-panel gap is only marginally wider than intra-panel gaps.
161
+ - Post-hoc panel subdivision — `apply_layout_hint` can split oversized candidates at their gridlines when initial detection yields fewer panels than expected.
162
+ - Improved grid detection — properly cross-matches row/column structure.
163
+
164
+ **Trace detection improvements:**
165
+ - CIELAB clustering mode (`use_lab=True`) — clusters non-background pixels in perceptually uniform colour space using mini-batch k-means.
166
+ - Achromatic trace detection — a separate pass detects black/grey traces by looking for low-saturation, horizontally continuous structures distinct from grid/axis lines.
167
+
168
+ **Calibration improvements:**
169
+ - Log-scale y-axis detection — when OCR'd tick values are better explained by `log10(y_value) = a * y_pixel + b`, the calibration uses log scale automatically.
170
+ - X-axis calibration — OCRs numeric x-axis ticks, fits `x_value = a * x_pixel + b`, and adds `x_value` column to the trace CSV.
171
+
172
+ **Infrastructure:**
173
+ - Diagnostic overlays save to files via `save_diagnostics=True`.
174
+ - Tesseract error handling — graceful recovery from OCR crashes on degenerate crops.
175
+ - No Colab dependency.
176
+ - Quantitative test suite with panel count, calibration quality, and trace RMSE metrics against synthetic ground truth.
@@ -0,0 +1,157 @@
1
+ # trace-digitiser
2
+
3
+ Template-free computer-vision digitisation pipeline for raster scientific line plots.
4
+
5
+ Given a raster image of a scientific figure (PNG, JPEG, etc.), this tool detects plot panels, reads y-axis tick labels via OCR, segments coloured traces, and exports digitised data as CSV files — all without requiring manual calibration points, known trace colours, or panel coordinates. The user supplies only a high-level layout hint (e.g. "stacked", "horizontal", "grid").
6
+
7
+ ## Installation
8
+
9
+ Requires Python 3.10+ and [Tesseract OCR](https://github.com/tesseract-ocr/tesseract).
10
+
11
+ ```bash
12
+ # Install Tesseract (Ubuntu/Debian)
13
+ sudo apt-get install tesseract-ocr
14
+
15
+ # Install Tesseract (macOS)
16
+ brew install tesseract
17
+
18
+ # Install the package
19
+ pip install -e .
20
+
21
+ # With development dependencies
22
+ pip install -e ".[dev]"
23
+ ```
24
+
25
+ ## Quick start
26
+
27
+ ### Python API
28
+
29
+ ```python
30
+ from trace_digitiser import digitise
31
+
32
+ result = digitise(
33
+ "figure.jpg",
34
+ layout_mode="stacked",
35
+ expected_rows=2,
36
+ expected_cols=1,
37
+ output_dir="outputs",
38
+ )
39
+
40
+ # Digitised traces as a DataFrame
41
+ print(result.trace_data.head())
42
+
43
+ # Panel metadata
44
+ for panel in result.panels:
45
+ print(f"Panel {panel.panel_id}: {panel.width}×{panel.height}px, "
46
+ f"calibration={panel.calibration.scale_type}")
47
+ ```
48
+
49
+ ### Command line
50
+
51
+ ```bash
52
+ # Single image
53
+ trace-digitiser figure.jpg --layout stacked --rows 2 --cols 1 -o outputs/
54
+
55
+ # Batch processing
56
+ trace-digitiser figures/*.jpg --layout auto -o results/
57
+
58
+ # Generate synthetic test figures
59
+ trace-digitiser --generate-test-figures -o test_figures/
60
+ ```
61
+
62
+ ### Layout modes
63
+
64
+ | Mode | Use case | Example |
65
+ |------|----------|---------|
66
+ | `single` | One plot panel | `--layout single` |
67
+ | `stacked` | Vertically stacked panels | `--layout stacked --rows 2 --cols 1` |
68
+ | `horizontal` | Side-by-side panels | `--layout horizontal --rows 1 --cols 3` |
69
+ | `grid` | Row×column subplot grid | `--layout grid --rows 2 --cols 2` |
70
+ | `auto` | Unconstrained detection | `--layout auto` |
71
+
72
+ ## Output files
73
+
74
+ For input `figure.jpg` with default settings:
75
+
76
+ | File | Description |
77
+ |------|-------------|
78
+ | `figure_automated_digitised_trace_by_column.csv` | One row per x-pixel column per trace, with `center_estimate`, `upper_envelope`, `lower_envelope` |
79
+ | `figure_automated_panel_metadata.csv` | Panel coordinates, calibration parameters, detected x-labels |
80
+ | `figure_automated_digitised_summary_by_label.csv` | Summary statistics per detected x-label interval (written only if labels are found) |
81
+
82
+ ## Project structure
83
+
84
+ ```
85
+ trace_digitiser/
86
+ ├── pyproject.toml
87
+ ├── src/trace_digitiser/
88
+ │ ├── __init__.py # Public API: digitise()
89
+ │ ├── models.py # Dataclasses: Panel, Calibration, Trace, DigitiserResult
90
+ │ ├── io.py # Image loading, CSV output
91
+ │ ├── geometry.py # Box area/IoU/containment helpers
92
+ │ ├── line_detection.py # Horizontal and vertical line detectors
93
+ │ ├── panel_detection.py # Candidate generation, deduplication, layout selection
94
+ │ ├── calibration.py # Y-axis OCR calibration (linear + log) and cross-row propagation
95
+ │ ├── x_calibration.py # X-axis OCR calibration (numeric ticks)
96
+ │ ├── ocr.py # Tesseract OCR wrappers for tick and x-label reading
97
+ │ ├── trace_detection.py # HSV/CIELAB colour segmentation + achromatic trace detection
98
+ │ ├── digitise.py # Column-by-column trace digitisation
99
+ │ ├── summarise.py # Interval-label summarisation
100
+ │ ├── diagnostics.py # Debug overlays and diagnostic file output
101
+ │ ├── synthetic.py # Synthetic test-figure generation
102
+ │ └── cli.py # Command-line interface
103
+ └── tests/
104
+ ├── conftest.py # Shared fixtures (synthetic images)
105
+ ├── test_geometry.py
106
+ ├── test_panel_detection.py
107
+ ├── test_trace_detection.py
108
+ └── test_integration.py # Panel count, calibration quality, trace RMSE
109
+ ```
110
+
111
+ ## Development
112
+
113
+ ```bash
114
+ # Run tests
115
+ pytest
116
+
117
+ # Run tests with coverage
118
+ pytest --cov=trace_digitiser
119
+
120
+ # Lint
121
+ ruff check src/ tests/
122
+ ```
123
+
124
+ ## Limitations
125
+
126
+ This tool digitises visible pixels from raster images. It does not recover original raw data. Key limitations:
127
+
128
+ - **Linear and log y-axes supported** — broken, dual, and other nonlinear axes are not supported yet.
129
+ - **X-axis calibration is best-effort** — numeric x-ticks are OCR'd when possible; otherwise x-values are normalised 0–1 or labelled by interval.
130
+ - **Black/grey traces partially supported** — achromatic trace detection is available but works best when traces have enough contrast against the background.
131
+ - **Requires visible structure** — axes, borders, or gridlines must be present for panel detection.
132
+ - **OCR fragility** — small, rotated, or low-contrast tick labels may fail.
133
+
134
+ ## Changelog
135
+
136
+ ### v0.1.0
137
+
138
+ **Refactored from notebook to installable package** with 15 modules, dataclasses, CLI, and test suite.
139
+
140
+ **Panel detection improvements:**
141
+ - Hint-aware `split_y_clusters` — uses the user's `expected_rows` to split evenly-spaced gridlines at the largest gaps, even when the inter-panel gap is only marginally wider than intra-panel gaps.
142
+ - Post-hoc panel subdivision — `apply_layout_hint` can split oversized candidates at their gridlines when initial detection yields fewer panels than expected.
143
+ - Improved grid detection — properly cross-matches row/column structure.
144
+
145
+ **Trace detection improvements:**
146
+ - CIELAB clustering mode (`use_lab=True`) — clusters non-background pixels in perceptually uniform colour space using mini-batch k-means.
147
+ - Achromatic trace detection — a separate pass detects black/grey traces by looking for low-saturation, horizontally continuous structures distinct from grid/axis lines.
148
+
149
+ **Calibration improvements:**
150
+ - Log-scale y-axis detection — when OCR'd tick values are better explained by `log10(y_value) = a * y_pixel + b`, the calibration uses log scale automatically.
151
+ - X-axis calibration — OCRs numeric x-axis ticks, fits `x_value = a * x_pixel + b`, and adds `x_value` column to the trace CSV.
152
+
153
+ **Infrastructure:**
154
+ - Diagnostic overlays save to files via `save_diagnostics=True`.
155
+ - Tesseract error handling — graceful recovery from OCR crashes on degenerate crops.
156
+ - No Colab dependency.
157
+ - Quantitative test suite with panel count, calibration quality, and trace RMSE metrics against synthetic ground truth.
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["setuptools"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "trace-digitiser"
7
+ version = "0.1.0"
8
+ description = "Template-free computer-vision digitisation pipeline for raster scientific line plots."
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.10"
12
+ authors = [{ name = "Trace Digitiser Contributors" }]
13
+
14
+ dependencies = [
15
+ "opencv-python-headless>=4.8",
16
+ "pytesseract>=0.3.10",
17
+ "Pillow>=10.0",
18
+ "pandas>=2.0",
19
+ "numpy>=1.24",
20
+ "matplotlib>=3.7",
21
+ ]
22
+
23
+ [project.optional-dependencies]
24
+ dev = [
25
+ "pytest>=7.4",
26
+ "pytest-cov>=4.1",
27
+ "ruff>=0.4",
28
+ ]
29
+
30
+ [project.scripts]
31
+ trace-digitiser = "trace_digitiser.cli:main"
32
+
33
+ [tool.setuptools.packages.find]
34
+ where = ["src"]
35
+
36
+ [tool.pytest.ini_options]
37
+ testpaths = ["tests"]
38
+ addopts = "-v --tb=short"
39
+
40
+ [tool.ruff]
41
+ line-length = 120
42
+ target-version = "py310"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,222 @@
1
+ """trace_digitiser — template-free digitisation of raster scientific line plots.
2
+
3
+ Quick start::
4
+
5
+ from trace_digitiser import digitise
6
+
7
+ result = digitise(
8
+ "figure.jpg",
9
+ layout_mode="stacked",
10
+ expected_rows=2,
11
+ expected_cols=1,
12
+ output_dir="outputs",
13
+ )
14
+
15
+ print(result.trace_data.head())
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from pathlib import Path
21
+ from typing import Optional
22
+
23
+ import pandas as pd
24
+
25
+ from .calibration import propagate_y_calibration_across_rows, robust_y_calibration
26
+ from .diagnostics import draw_digitised_trace, draw_panel_overlay, draw_trace_mask, print_panel_summary
27
+ from .digitise import digitise_trace_mask
28
+ from .io import build_panel_metadata, load_image, save_outputs
29
+ from .models import Calibration, DigitiserResult, Panel, Trace, XLabel
30
+ from .ocr import detect_x_labels
31
+ from .panel_detection import find_plot_panels
32
+ from .summarise import summarise_by_detected_labels
33
+ from .trace_detection import detect_trace_masks
34
+ from .x_calibration import calibrate_x_axis
35
+
36
+ __all__ = [
37
+ "digitise",
38
+ "Calibration",
39
+ "DigitiserResult",
40
+ "Panel",
41
+ "Trace",
42
+ "XLabel",
43
+ ]
44
+
45
+
46
+ def digitise(
47
+ image_path: str | Path,
48
+ *,
49
+ layout_mode: str = "auto",
50
+ expected_rows: Optional[int] = None,
51
+ expected_cols: Optional[int] = None,
52
+ expected_panels: Optional[int] = None,
53
+ output_dir: Optional[str | Path] = None,
54
+ output_prefix: Optional[str] = None,
55
+ show_debug: bool = False,
56
+ save_diagnostics: bool = False,
57
+ ) -> DigitiserResult:
58
+ """End-to-end chart digitisation pipeline.
59
+
60
+ Parameters
61
+ ----------
62
+ image_path : str or Path
63
+ Path to the input raster image.
64
+ layout_mode : str
65
+ ``"auto"``, ``"single"``, ``"stacked"``, ``"horizontal"``, or
66
+ ``"grid"``.
67
+ expected_rows, expected_cols, expected_panels : int, optional
68
+ Layout hints that constrain panel selection.
69
+ output_dir : str or Path, optional
70
+ Directory for CSV outputs. Defaults to current directory.
71
+ output_prefix : str, optional
72
+ Prefix for output filenames. Defaults to the image stem.
73
+ show_debug : bool
74
+ If True, display inline diagnostic plots (for interactive use).
75
+ save_diagnostics : bool
76
+ If True, write diagnostic overlay PNGs to *output_dir*.
77
+
78
+ Returns
79
+ -------
80
+ DigitiserResult
81
+ Structured result with panels, traces, DataFrames, and paths.
82
+ """
83
+ image_path = Path(image_path)
84
+ rgb = load_image(image_path)
85
+
86
+ if output_prefix is None:
87
+ output_prefix = image_path.stem
88
+
89
+ diag_dir: Optional[Path] = None
90
+ if save_diagnostics:
91
+ diag_dir = Path(output_dir or ".") / "diagnostics"
92
+ diag_dir.mkdir(parents=True, exist_ok=True)
93
+
94
+ if show_debug:
95
+ print("Processing:", image_path)
96
+ print("Image size:", rgb.shape[1], "×", rgb.shape[0])
97
+
98
+ # ------------------------------------------------------------------
99
+ # 1. Detect panels
100
+ # ------------------------------------------------------------------
101
+ panels, h_lines, v_lines = find_plot_panels(
102
+ rgb,
103
+ layout_mode=layout_mode,
104
+ expected_rows=expected_rows,
105
+ expected_cols=expected_cols,
106
+ expected_panels=expected_panels,
107
+ )
108
+
109
+ if show_debug or save_diagnostics:
110
+ print_panel_summary(panels, h_lines, v_lines, layout_mode)
111
+ draw_panel_overlay(rgb, panels, h_lines, v_lines, output_dir=diag_dir, show=show_debug)
112
+
113
+ # ------------------------------------------------------------------
114
+ # 2. Y-axis calibration
115
+ # ------------------------------------------------------------------
116
+ for p in panels:
117
+ calib = robust_y_calibration(rgb, p, verbose=show_debug)
118
+ p["y_calibration"] = calib.to_dict()
119
+
120
+ panels = propagate_y_calibration_across_rows(panels, verbose=show_debug)
121
+
122
+ # ------------------------------------------------------------------
123
+ # 2b. X-axis calibration (numeric x ticks)
124
+ # ------------------------------------------------------------------
125
+ for p in panels:
126
+ x_cal = calibrate_x_axis(rgb, p, verbose=show_debug)
127
+ if x_cal is not None:
128
+ p["x_calibration"] = x_cal
129
+
130
+ # ------------------------------------------------------------------
131
+ # 3. Trace detection and digitisation
132
+ # ------------------------------------------------------------------
133
+ all_trace_frames: list[pd.DataFrame] = []
134
+ trace_debug: list[tuple[dict, dict]] = []
135
+
136
+ for p in panels:
137
+ masks = detect_trace_masks(rgb, p)
138
+ if show_debug:
139
+ print(f"Panel {p['panel_id']}: detected {len(masks)} coloured trace(s)")
140
+
141
+ for tr in masks:
142
+ if show_debug:
143
+ print(" ", {k: v for k, v in tr.items() if k != "mask"})
144
+ all_trace_frames.append(digitise_trace_mask(p, tr))
145
+ trace_debug.append((p, tr))
146
+
147
+ trace_data = pd.concat(all_trace_frames, ignore_index=True) if all_trace_frames else pd.DataFrame()
148
+
149
+ # ------------------------------------------------------------------
150
+ # 4. X-label OCR
151
+ # ------------------------------------------------------------------
152
+ for p in panels:
153
+ p["x_labels"] = detect_x_labels(rgb, p)
154
+ if show_debug:
155
+ print(f"Panel {p['panel_id']} x labels:")
156
+ for lab in p["x_labels"]:
157
+ print(f" {lab['text']:>8s} x={lab['x']:.1f} conf={lab['conf']:.1f}")
158
+
159
+ # ------------------------------------------------------------------
160
+ # 5. Interval summaries
161
+ # ------------------------------------------------------------------
162
+ summary_by_label = summarise_by_detected_labels(trace_data, panels)
163
+
164
+ # ------------------------------------------------------------------
165
+ # 6. Diagnostics
166
+ # ------------------------------------------------------------------
167
+ if show_debug or save_diagnostics:
168
+ for p, tr in trace_debug:
169
+ draw_trace_mask(rgb, p, tr, output_dir=diag_dir, show=show_debug)
170
+
171
+ if not trace_data.empty:
172
+ for (panel_id, trace_id), _ in trace_data.groupby(["panel_id", "trace_id"]):
173
+ draw_digitised_trace(trace_data, panel_id, trace_id, output_dir=diag_dir, show=show_debug)
174
+
175
+ # ------------------------------------------------------------------
176
+ # 7. Save outputs
177
+ # ------------------------------------------------------------------
178
+ panel_metadata = build_panel_metadata(panels)
179
+ trace_csv, meta_csv, summary_csv = save_outputs(
180
+ trace_data, panel_metadata, summary_by_label, output_prefix, output_dir
181
+ )
182
+
183
+ if show_debug:
184
+ print("Wrote:")
185
+ print(" -", trace_csv)
186
+ print(" -", meta_csv)
187
+ if summary_csv:
188
+ print(" -", summary_csv)
189
+ else:
190
+ print(" - no label summary; fewer than two labels detected")
191
+
192
+ # ------------------------------------------------------------------
193
+ # 8. Build structured result
194
+ # ------------------------------------------------------------------
195
+ return DigitiserResult(
196
+ image_path=image_path,
197
+ rgb=rgb,
198
+ panels=[
199
+ Panel(
200
+ panel_id=p["panel_id"],
201
+ x0=p["x0"],
202
+ x1=p["x1"],
203
+ y_top=p["y_top"],
204
+ y_bottom=p["y_bottom"],
205
+ gridline_y=p["gridline_y"],
206
+ source=p["source"],
207
+ score=p["score"],
208
+ layout_mode=p.get("layout_mode", layout_mode),
209
+ calibration=Calibration(**p["y_calibration"]) if "y_calibration" in p else None,
210
+ x_labels=[
211
+ XLabel(**lab) for lab in p.get("x_labels", [])
212
+ ],
213
+ )
214
+ for p in panels
215
+ ],
216
+ trace_data=trace_data,
217
+ summary_by_label=summary_by_label,
218
+ panel_metadata=panel_metadata,
219
+ trace_csv_path=trace_csv,
220
+ summary_csv_path=summary_csv,
221
+ metadata_csv_path=meta_csv,
222
+ )