uprightkit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- uprightkit/__init__.py +40 -0
- uprightkit/api.py +244 -0
- uprightkit/cli.py +66 -0
- uprightkit/core.py +166 -0
- uprightkit-0.1.0.dist-info/METADATA +114 -0
- uprightkit-0.1.0.dist-info/RECORD +10 -0
- uprightkit-0.1.0.dist-info/WHEEL +5 -0
- uprightkit-0.1.0.dist-info/entry_points.txt +2 -0
- uprightkit-0.1.0.dist-info/licenses/LICENSE +21 -0
- uprightkit-0.1.0.dist-info/top_level.txt +1 -0
uprightkit/__init__.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
uprightKit — lightweight, pure-OpenCV document deskewing and orientation
|
|
3
|
+
correction. No deep learning, no heavy model downloads.
|
|
4
|
+
|
|
5
|
+
Quick start
|
|
6
|
+
-----------
|
|
7
|
+
import uprightkit
|
|
8
|
+
|
|
9
|
+
# Single image, write result to disk
|
|
10
|
+
uprightkit.fix_image("scan.jpg", output_path="scan_fixed.jpg")
|
|
11
|
+
|
|
12
|
+
# PDF, page by page, write result to disk (requires: pip install uprightkit[pdf])
|
|
13
|
+
uprightkit.fix_pdf("doc.pdf", output_path="doc_fixed.pdf")
|
|
14
|
+
|
|
15
|
+
# Already have a numpy array (e.g. from cv2.imread)?
|
|
16
|
+
fixed = uprightkit.fix_array(image)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from .api import (
|
|
20
|
+
InvalidImageError,
|
|
21
|
+
PdfSupportMissingError,
|
|
22
|
+
UprightKitError,
|
|
23
|
+
fix_array,
|
|
24
|
+
fix_image,
|
|
25
|
+
fix_pdf,
|
|
26
|
+
)
|
|
27
|
+
from .core import fix_skew_and_orientation
|
|
28
|
+
|
|
29
|
+
__version__ = "0.1.0"
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"fix_array",
|
|
33
|
+
"fix_image",
|
|
34
|
+
"fix_pdf",
|
|
35
|
+
"fix_skew_and_orientation",
|
|
36
|
+
"UprightKitError",
|
|
37
|
+
"InvalidImageError",
|
|
38
|
+
"PdfSupportMissingError",
|
|
39
|
+
"__version__",
|
|
40
|
+
]
|
uprightkit/api.py
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
"""
|
|
2
|
+
High-level, user-facing functions for uprightKit.
|
|
3
|
+
|
|
4
|
+
These are the functions most users should import:
|
|
5
|
+
|
|
6
|
+
import uprightkit
|
|
7
|
+
|
|
8
|
+
uprightkit.fix_image("scan.jpg", output_path="scan_fixed.jpg")
|
|
9
|
+
uprightkit.fix_pdf("doc.pdf", output_path="doc_fixed.pdf")
|
|
10
|
+
fixed_array = uprightkit.fix_array(my_numpy_image)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import List, Optional, Union
|
|
18
|
+
|
|
19
|
+
import cv2
|
|
20
|
+
import numpy as np
|
|
21
|
+
|
|
22
|
+
from .core import InvalidImageError, UprightKitError, fix_skew_and_orientation
|
|
23
|
+
|
|
24
|
+
PathLike = Union[str, "os.PathLike[str]"]
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"fix_array",
|
|
28
|
+
"fix_image",
|
|
29
|
+
"fix_pdf",
|
|
30
|
+
"UprightKitError",
|
|
31
|
+
"InvalidImageError",
|
|
32
|
+
"PdfSupportMissingError",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class PdfSupportMissingError(UprightKitError):
|
|
37
|
+
"""Raised when PDF features are used without the 'pdf' extra installed."""
|
|
38
|
+
|
|
39
|
+
def __init__(self) -> None:
|
|
40
|
+
super().__init__(
|
|
41
|
+
"PDF support requires the optional 'pymupdf' dependency. "
|
|
42
|
+
"Install it with: pip install uprightkit[pdf]"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def fix_array(image: np.ndarray, **kwargs) -> np.ndarray:
|
|
47
|
+
"""
|
|
48
|
+
Straighten an in-memory BGR image array (e.g. from cv2.imread/imdecode).
|
|
49
|
+
|
|
50
|
+
This is a thin, explicitly-named wrapper around
|
|
51
|
+
``uprightkit.core.fix_skew_and_orientation`` for callers who prefer to
|
|
52
|
+
work entirely with arrays (e.g. inside an existing CV pipeline) without
|
|
53
|
+
touching the filesystem. All keyword arguments are forwarded; see
|
|
54
|
+
``fix_skew_and_orientation`` for the full list (``correct_180_flip``,
|
|
55
|
+
``skew_angle_tolerance``, etc).
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
np.ndarray
|
|
60
|
+
The straightened image, as a new array (input is not modified).
|
|
61
|
+
"""
|
|
62
|
+
return fix_skew_and_orientation(image, **kwargs)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def fix_image(
|
|
66
|
+
input_path: PathLike,
|
|
67
|
+
output_path: Optional[PathLike] = None,
|
|
68
|
+
*,
|
|
69
|
+
jpeg_quality: int = 95,
|
|
70
|
+
**kwargs,
|
|
71
|
+
) -> np.ndarray:
|
|
72
|
+
"""
|
|
73
|
+
Straighten a single image file on disk.
|
|
74
|
+
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
input_path:
|
|
78
|
+
Path to a JPEG/PNG/BMP/TIFF/etc image readable by OpenCV.
|
|
79
|
+
output_path:
|
|
80
|
+
If given, the corrected image is written here. The format is
|
|
81
|
+
inferred from the file extension (e.g. ``.png``, ``.jpg``).
|
|
82
|
+
If omitted, nothing is written to disk — only the array is returned.
|
|
83
|
+
jpeg_quality:
|
|
84
|
+
JPEG quality (0-100) used only when writing to a ``.jpg``/``.jpeg``
|
|
85
|
+
output path. Ignored for other formats.
|
|
86
|
+
**kwargs:
|
|
87
|
+
Forwarded to ``fix_skew_and_orientation`` (e.g. ``correct_180_flip``,
|
|
88
|
+
``skew_angle_tolerance``, ``flip_density_ratio``).
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
np.ndarray
|
|
93
|
+
The corrected image as a BGR numpy array.
|
|
94
|
+
|
|
95
|
+
Raises
|
|
96
|
+
------
|
|
97
|
+
FileNotFoundError
|
|
98
|
+
If ``input_path`` does not exist.
|
|
99
|
+
InvalidImageError
|
|
100
|
+
If the file exists but cannot be decoded as an image.
|
|
101
|
+
"""
|
|
102
|
+
input_path = Path(input_path)
|
|
103
|
+
if not input_path.is_file():
|
|
104
|
+
raise FileNotFoundError(f"no such file: {input_path}")
|
|
105
|
+
|
|
106
|
+
image = cv2.imread(str(input_path), cv2.IMREAD_COLOR)
|
|
107
|
+
if image is None:
|
|
108
|
+
raise InvalidImageError(
|
|
109
|
+
f"could not decode '{input_path}' as an image "
|
|
110
|
+
"(unsupported format or corrupt file)"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
fixed = fix_skew_and_orientation(image, **kwargs)
|
|
114
|
+
|
|
115
|
+
if output_path is not None:
|
|
116
|
+
_write_image(fixed, Path(output_path), jpeg_quality=jpeg_quality)
|
|
117
|
+
|
|
118
|
+
return fixed
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def fix_pdf(
|
|
122
|
+
input_path: PathLike,
|
|
123
|
+
output_path: Optional[PathLike] = None,
|
|
124
|
+
*,
|
|
125
|
+
dpi: int = 200,
|
|
126
|
+
**kwargs,
|
|
127
|
+
) -> List[np.ndarray]:
|
|
128
|
+
"""
|
|
129
|
+
Straighten every page of a PDF, page by page.
|
|
130
|
+
|
|
131
|
+
Each page is rasterized to an image at the given DPI, run through the
|
|
132
|
+
same skew/orientation correction used for single images, and (if
|
|
133
|
+
``output_path`` is given) reassembled into a new PDF.
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
input_path:
|
|
138
|
+
Path to the source PDF.
|
|
139
|
+
output_path:
|
|
140
|
+
If given, a new PDF with all pages corrected is written here.
|
|
141
|
+
If omitted, only the in-memory list of corrected page images is
|
|
142
|
+
returned (nothing is written to disk).
|
|
143
|
+
dpi:
|
|
144
|
+
Resolution used when rasterizing each PDF page before processing.
|
|
145
|
+
Higher values preserve more detail but are slower and use more
|
|
146
|
+
memory. 150-300 is a reasonable range for most documents.
|
|
147
|
+
**kwargs:
|
|
148
|
+
Forwarded to ``fix_skew_and_orientation`` for every page (e.g.
|
|
149
|
+
``correct_180_flip``, ``skew_angle_tolerance``).
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
list[np.ndarray]
|
|
154
|
+
One corrected BGR image array per page, in page order.
|
|
155
|
+
|
|
156
|
+
Raises
|
|
157
|
+
------
|
|
158
|
+
PdfSupportMissingError
|
|
159
|
+
If PyMuPDF (``pymupdf``) is not installed.
|
|
160
|
+
FileNotFoundError
|
|
161
|
+
If ``input_path`` does not exist.
|
|
162
|
+
InvalidImageError
|
|
163
|
+
If a given page fails to rasterize into a usable image.
|
|
164
|
+
"""
|
|
165
|
+
try:
|
|
166
|
+
import fitz # PyMuPDF
|
|
167
|
+
except ImportError as exc:
|
|
168
|
+
raise PdfSupportMissingError() from exc
|
|
169
|
+
|
|
170
|
+
input_path = Path(input_path)
|
|
171
|
+
if not input_path.is_file():
|
|
172
|
+
raise FileNotFoundError(f"no such file: {input_path}")
|
|
173
|
+
|
|
174
|
+
zoom = dpi / 72.0 # PyMuPDF's base render resolution is 72 DPI.
|
|
175
|
+
matrix = fitz.Matrix(zoom, zoom)
|
|
176
|
+
|
|
177
|
+
fixed_pages: List[np.ndarray] = []
|
|
178
|
+
|
|
179
|
+
with fitz.open(str(input_path)) as pdf:
|
|
180
|
+
if pdf.page_count == 0:
|
|
181
|
+
raise InvalidImageError(f"'{input_path}' has no pages")
|
|
182
|
+
|
|
183
|
+
for page_index in range(pdf.page_count):
|
|
184
|
+
page = pdf.load_page(page_index)
|
|
185
|
+
pix = page.get_pixmap(matrix=matrix)
|
|
186
|
+
|
|
187
|
+
# PyMuPDF gives RGB(A); OpenCV expects BGR.
|
|
188
|
+
img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
|
|
189
|
+
pix.height, pix.width, pix.n
|
|
190
|
+
)
|
|
191
|
+
if pix.n == 4:
|
|
192
|
+
img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
|
|
193
|
+
elif pix.n == 3:
|
|
194
|
+
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
|
195
|
+
elif pix.n == 1:
|
|
196
|
+
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
|
197
|
+
else:
|
|
198
|
+
raise InvalidImageError(
|
|
199
|
+
f"page {page_index} has unexpected channel count: {pix.n}"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
fixed_pages.append(fix_skew_and_orientation(img, **kwargs))
|
|
203
|
+
|
|
204
|
+
if output_path is not None:
|
|
205
|
+
_write_pdf(fixed_pages, Path(output_path))
|
|
206
|
+
|
|
207
|
+
return fixed_pages
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _write_image(image: np.ndarray, output_path: Path, *, jpeg_quality: int) -> None:
|
|
211
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
212
|
+
ext = output_path.suffix.lower()
|
|
213
|
+
|
|
214
|
+
params = []
|
|
215
|
+
if ext in (".jpg", ".jpeg"):
|
|
216
|
+
params = [cv2.IMWRITE_JPEG_QUALITY, int(jpeg_quality)]
|
|
217
|
+
|
|
218
|
+
ok = cv2.imwrite(str(output_path), image, params)
|
|
219
|
+
if not ok:
|
|
220
|
+
raise UprightKitError(f"failed to write output image to '{output_path}'")
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _write_pdf(pages: List[np.ndarray], output_path: Path) -> None:
|
|
224
|
+
try:
|
|
225
|
+
import fitz # PyMuPDF
|
|
226
|
+
except ImportError as exc:
|
|
227
|
+
raise PdfSupportMissingError() from exc
|
|
228
|
+
|
|
229
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
230
|
+
|
|
231
|
+
doc = fitz.open()
|
|
232
|
+
try:
|
|
233
|
+
for page_img in pages:
|
|
234
|
+
ok, encoded = cv2.imencode(".png", page_img)
|
|
235
|
+
if not ok:
|
|
236
|
+
raise UprightKitError("failed to encode a page while building output PDF")
|
|
237
|
+
|
|
238
|
+
h, w = page_img.shape[:2]
|
|
239
|
+
page = doc.new_page(width=w, height=h)
|
|
240
|
+
page.insert_image(page.rect, stream=encoded.tobytes())
|
|
241
|
+
|
|
242
|
+
doc.save(str(output_path))
|
|
243
|
+
finally:
|
|
244
|
+
doc.close()
|
uprightkit/cli.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command-line interface for uprightKit.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
uprightkit scan.jpg fixed.jpg
|
|
6
|
+
uprightkit document.pdf fixed.pdf
|
|
7
|
+
uprightkit scan.jpg fixed.jpg --no-flip-fix
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import sys
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from .api import InvalidImageError, PdfSupportMissingError, UprightKitError, fix_image, fix_pdf
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def main(argv: list[str] | None = None) -> int:
|
|
20
|
+
parser = argparse.ArgumentParser(
|
|
21
|
+
prog="uprightkit",
|
|
22
|
+
description="Straighten a scanned image or PDF (fix skew + 180-degree flips).",
|
|
23
|
+
)
|
|
24
|
+
parser.add_argument("input", help="path to the input image or PDF")
|
|
25
|
+
parser.add_argument("output", help="path to write the corrected output")
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--no-flip-fix",
|
|
28
|
+
action="store_true",
|
|
29
|
+
help="only fix skew angle; skip the 180-degree upside-down check",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--dpi",
|
|
33
|
+
type=int,
|
|
34
|
+
default=200,
|
|
35
|
+
help="rasterization DPI for PDF pages (default: 200, ignored for images)",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
args = parser.parse_args(argv)
|
|
39
|
+
|
|
40
|
+
input_path = Path(args.input)
|
|
41
|
+
is_pdf = input_path.suffix.lower() == ".pdf"
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
if is_pdf:
|
|
45
|
+
fix_pdf(
|
|
46
|
+
input_path,
|
|
47
|
+
output_path=args.output,
|
|
48
|
+
dpi=args.dpi,
|
|
49
|
+
correct_180_flip=not args.no_flip_fix,
|
|
50
|
+
)
|
|
51
|
+
else:
|
|
52
|
+
fix_image(
|
|
53
|
+
input_path,
|
|
54
|
+
output_path=args.output,
|
|
55
|
+
correct_180_flip=not args.no_flip_fix,
|
|
56
|
+
)
|
|
57
|
+
except (FileNotFoundError, InvalidImageError, PdfSupportMissingError, UprightKitError) as exc:
|
|
58
|
+
print(f"uprightkit: error: {exc}", file=sys.stderr)
|
|
59
|
+
return 1
|
|
60
|
+
|
|
61
|
+
print(f"uprightkit: wrote corrected output to {args.output}")
|
|
62
|
+
return 0
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__":
|
|
66
|
+
sys.exit(main())
|
uprightkit/core.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core image-straightening logic for uprightKit.
|
|
3
|
+
|
|
4
|
+
This module contains zero web-framework code on purpose: it is meant to be
|
|
5
|
+
imported and called directly from any Python project (scripts, notebooks,
|
|
6
|
+
other web frameworks, preprocessing pipelines, etc).
|
|
7
|
+
|
|
8
|
+
Algorithm (unchanged from the original prototype):
|
|
9
|
+
1. Detect fine skew angle using a probabilistic Hough transform over
|
|
10
|
+
Canny edges, then rotate to straighten it.
|
|
11
|
+
2. Detect gross 180-degree (upside-down) flips by comparing the
|
|
12
|
+
"ink density" of dilated text edges in the top half vs bottom half
|
|
13
|
+
of the image, then rotate 180 degrees if the bottom is much denser.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import math
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
import cv2
|
|
22
|
+
import numpy as np
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class UprightKitError(Exception):
|
|
26
|
+
"""Base exception for uprightKit errors."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class InvalidImageError(UprightKitError):
|
|
30
|
+
"""Raised when an image cannot be read or is empty/invalid."""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def fix_skew_and_orientation(
|
|
34
|
+
image: np.ndarray,
|
|
35
|
+
*,
|
|
36
|
+
hough_threshold: int = 100,
|
|
37
|
+
min_line_length: int = 100,
|
|
38
|
+
max_line_gap: int = 10,
|
|
39
|
+
skew_angle_tolerance: float = 0.5,
|
|
40
|
+
flip_density_ratio: float = 1.15,
|
|
41
|
+
correct_180_flip: bool = True,
|
|
42
|
+
) -> np.ndarray:
|
|
43
|
+
"""
|
|
44
|
+
Straighten a document image: fix small skew angle and 180-degree flips.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
image:
|
|
49
|
+
A BGR image as a numpy array (the format OpenCV uses natively,
|
|
50
|
+
e.g. from ``cv2.imread`` or ``cv2.imdecode``).
|
|
51
|
+
hough_threshold, min_line_length, max_line_gap:
|
|
52
|
+
Passed through to ``cv2.HoughLinesP`` for line detection. Increase
|
|
53
|
+
``hough_threshold`` if you get false-positive rotations on noisy
|
|
54
|
+
images; decrease it if skew isn't being detected at all.
|
|
55
|
+
skew_angle_tolerance:
|
|
56
|
+
Minimum |angle| in degrees before a rotation is applied. Avoids
|
|
57
|
+
introducing blur from micro-rotations on already-straight images.
|
|
58
|
+
flip_density_ratio:
|
|
59
|
+
How much denser the bottom half's text edges must be than the top
|
|
60
|
+
half's before the image is flagged as upside-down. Tune this if
|
|
61
|
+
your documents don't have a "text-heavy top" layout (see Notes).
|
|
62
|
+
correct_180_flip:
|
|
63
|
+
Set to False to skip the upside-down detection step entirely and
|
|
64
|
+
only correct fine skew. Useful for document types where the
|
|
65
|
+
top/bottom density heuristic doesn't apply (e.g. photos, forms
|
|
66
|
+
with a centered logo, mostly-blank-top layouts).
|
|
67
|
+
|
|
68
|
+
Returns
|
|
69
|
+
-------
|
|
70
|
+
np.ndarray
|
|
71
|
+
A new BGR image, rotated/straightened. The input array is not
|
|
72
|
+
modified in place.
|
|
73
|
+
|
|
74
|
+
Raises
|
|
75
|
+
------
|
|
76
|
+
InvalidImageError
|
|
77
|
+
If ``image`` is None, empty, or not a valid 2D/3D array.
|
|
78
|
+
|
|
79
|
+
Notes
|
|
80
|
+
-----
|
|
81
|
+
The 180-degree flip check assumes text/content is denser near the top
|
|
82
|
+
of a correctly-oriented page (true for many ID cards, forms, and
|
|
83
|
+
letters, but not universally true for all document types). If you're
|
|
84
|
+
getting incorrect flips on your document type, pass
|
|
85
|
+
``correct_180_flip=False`` and handle orientation separately.
|
|
86
|
+
"""
|
|
87
|
+
if image is None or not isinstance(image, np.ndarray) or image.size == 0:
|
|
88
|
+
raise InvalidImageError("image is None, empty, or not a valid numpy array")
|
|
89
|
+
|
|
90
|
+
if image.ndim not in (2, 3):
|
|
91
|
+
raise InvalidImageError(f"expected a 2D or 3D image array, got shape {image.shape}")
|
|
92
|
+
|
|
93
|
+
# Work on a copy so callers' original arrays are never mutated.
|
|
94
|
+
image = image.copy()
|
|
95
|
+
h, w = image.shape[:2]
|
|
96
|
+
|
|
97
|
+
if h == 0 or w == 0:
|
|
98
|
+
raise InvalidImageError(f"image has a zero dimension: shape={image.shape}")
|
|
99
|
+
|
|
100
|
+
gray = _to_gray(image)
|
|
101
|
+
blur = cv2.GaussianBlur(gray, (5, 5), 0)
|
|
102
|
+
edges = cv2.Canny(blur, 50, 150)
|
|
103
|
+
|
|
104
|
+
# --- 1. Fine skew correction via Hough transform ---
|
|
105
|
+
lines = cv2.HoughLinesP(
|
|
106
|
+
edges,
|
|
107
|
+
rho=1,
|
|
108
|
+
theta=np.pi / 180,
|
|
109
|
+
threshold=hough_threshold,
|
|
110
|
+
minLineLength=min_line_length,
|
|
111
|
+
maxLineGap=max_line_gap,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
median_angle = _median_skew_angle(lines)
|
|
115
|
+
|
|
116
|
+
if median_angle is not None and abs(median_angle) > skew_angle_tolerance:
|
|
117
|
+
M = cv2.getRotationMatrix2D((w // 2, h // 2), median_angle, 1.0)
|
|
118
|
+
image = cv2.warpAffine(
|
|
119
|
+
image, M, (w, h),
|
|
120
|
+
flags=cv2.INTER_CUBIC,
|
|
121
|
+
borderMode=cv2.BORDER_REPLICATE,
|
|
122
|
+
)
|
|
123
|
+
# Recompute edges on the now-straightened image for the next step.
|
|
124
|
+
gray = _to_gray(image)
|
|
125
|
+
blur = cv2.GaussianBlur(gray, (5, 5), 0)
|
|
126
|
+
edges = cv2.Canny(blur, 50, 150)
|
|
127
|
+
|
|
128
|
+
# --- 2. Gross 180-degree flip correction via density profiling ---
|
|
129
|
+
if correct_180_flip:
|
|
130
|
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 3))
|
|
131
|
+
dilated = cv2.dilate(edges, kernel, iterations=2)
|
|
132
|
+
|
|
133
|
+
mid_y = h // 2
|
|
134
|
+
top_density = cv2.countNonZero(dilated[0:mid_y, :])
|
|
135
|
+
bottom_density = cv2.countNonZero(dilated[mid_y:h, :])
|
|
136
|
+
|
|
137
|
+
if bottom_density > top_density * flip_density_ratio:
|
|
138
|
+
image = cv2.rotate(image, cv2.ROTATE_180)
|
|
139
|
+
|
|
140
|
+
return image
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _to_gray(image: np.ndarray) -> np.ndarray:
|
|
144
|
+
"""Convert BGR or grayscale input to grayscale."""
|
|
145
|
+
if image.ndim == 2:
|
|
146
|
+
return image
|
|
147
|
+
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _median_skew_angle(lines: Optional[np.ndarray]) -> Optional[float]:
|
|
151
|
+
"""Compute the median line angle, ignoring near-vertical lines."""
|
|
152
|
+
if lines is None:
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
angles = []
|
|
156
|
+
for line in lines:
|
|
157
|
+
x1, y1, x2, y2 = line[0]
|
|
158
|
+
angle = math.degrees(math.atan2(y2 - y1, x2 - x1))
|
|
159
|
+
# Filter out extreme vertical lines (e.g. card/page borders).
|
|
160
|
+
if not (45 < abs(angle) < 135):
|
|
161
|
+
angles.append(angle)
|
|
162
|
+
|
|
163
|
+
if not angles:
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
return float(np.median(angles))
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: uprightkit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Lightweight, pure-OpenCV document deskewing and orientation correction — no deep learning required.
|
|
5
|
+
Author-email: Girija Geddavalasa <thatsgirijag@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/yourusername/uprightkit
|
|
8
|
+
Project-URL: Issues, https://github.com/yourusername/uprightkit/issues
|
|
9
|
+
Keywords: opencv,deskew,image-processing,document-scanner,ocr-preprocessing,pdf
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Multimedia :: Graphics :: Capture :: Scanners
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Image Processing
|
|
22
|
+
Requires-Python: >=3.8
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE
|
|
25
|
+
Requires-Dist: opencv-python-headless>=4.5
|
|
26
|
+
Requires-Dist: numpy>=1.20
|
|
27
|
+
Provides-Extra: pdf
|
|
28
|
+
Requires-Dist: pymupdf>=1.23; extra == "pdf"
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
31
|
+
Requires-Dist: build; extra == "dev"
|
|
32
|
+
Requires-Dist: twine; extra == "dev"
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
# uprightKit
|
|
36
|
+
|
|
37
|
+
Lightweight, pure-OpenCV document deskewing and orientation correction.
|
|
38
|
+
No deep learning, no model downloads, no GPU — just classical computer
|
|
39
|
+
vision (Hough transform + edge-density profiling).
|
|
40
|
+
|
|
41
|
+
It fixes two common problems with scanned/photographed documents:
|
|
42
|
+
|
|
43
|
+
1. **Fine skew** — the page is tilted a few degrees.
|
|
44
|
+
2. **180-degree flips** — the page was scanned/photographed upside down.
|
|
45
|
+
|
|
46
|
+
## Install
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install uprightkit
|
|
50
|
+
|
|
51
|
+
# with PDF support:
|
|
52
|
+
pip install uprightkit[pdf]
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Usage
|
|
56
|
+
|
|
57
|
+
### Single image
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
import uprightkit
|
|
61
|
+
|
|
62
|
+
uprightkit.fix_image("scan.jpg", output_path="scan_fixed.jpg")
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### PDF (page by page)
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
import uprightkit
|
|
69
|
+
|
|
70
|
+
uprightkit.fix_pdf("document.pdf", output_path="document_fixed.pdf")
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Already have a numpy array (e.g. in an existing OpenCV pipeline)?
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
import cv2
|
|
77
|
+
import uprightkit
|
|
78
|
+
|
|
79
|
+
image = cv2.imread("scan.jpg")
|
|
80
|
+
fixed = uprightkit.fix_array(image) # returns a numpy array, BGR
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Command line
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
uprightkit scan.jpg scan_fixed.jpg
|
|
87
|
+
uprightkit document.pdf document_fixed.pdf
|
|
88
|
+
uprightkit scan.jpg scan_fixed.jpg --no-flip-fix # skip the 180-degree check
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Tuning
|
|
92
|
+
|
|
93
|
+
The 180-degree flip detector assumes a correctly-oriented page has more
|
|
94
|
+
text/content density near the top half than the bottom half. This holds for
|
|
95
|
+
many ID cards, forms, and letters — but not for every document layout. If you
|
|
96
|
+
get incorrect flips on your documents, disable it:
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
uprightkit.fix_image("scan.jpg", output_path="out.jpg", correct_180_flip=False)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Other tunable parameters (`skew_angle_tolerance`, `flip_density_ratio`,
|
|
103
|
+
`hough_threshold`, `min_line_length`, `max_line_gap`) are documented in the
|
|
104
|
+
docstring of `uprightkit.fix_skew_and_orientation`.
|
|
105
|
+
|
|
106
|
+
## Why no deep learning?
|
|
107
|
+
|
|
108
|
+
This is intentionally a fast, dependency-light tool meant for preprocessing
|
|
109
|
+
pipelines (e.g. before OCR) where you don't want to pull in a multi-hundred-MB
|
|
110
|
+
model or pay GPU inference cost just to straighten a page.
|
|
111
|
+
|
|
112
|
+
## License
|
|
113
|
+
|
|
114
|
+
MIT --- see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
uprightkit/__init__.py,sha256=8hBgriWY8H6aTFmLgLdPbmcLA56riL8fIFzCld2rvPM,939
|
|
2
|
+
uprightkit/api.py,sha256=L9gU4YsWdajc6lb7mSBwsES_8oHnW6IG9Fq6tryBhUo,7467
|
|
3
|
+
uprightkit/cli.py,sha256=5UBz8665mACzu0q4Eix6a9RbqvyxN5Bdg6ahT4TMQqI,1868
|
|
4
|
+
uprightkit/core.py,sha256=3EqLh8zSBFT_2rC7sU7HX5IuEsRSHH2ZBKPHN6TzRC8,5752
|
|
5
|
+
uprightkit-0.1.0.dist-info/licenses/LICENSE,sha256=cqbuOPC-kxXJwX4q6wE-ZeZ6wXgwGIaN_HQy_NO2-Sc,1068
|
|
6
|
+
uprightkit-0.1.0.dist-info/METADATA,sha256=e2Iunjut1vTTy_xuwbFB9KjgfMsRqS2BV9b4rSjAo4Y,3596
|
|
7
|
+
uprightkit-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
8
|
+
uprightkit-0.1.0.dist-info/entry_points.txt,sha256=LQ54r0cp-2mSgq8k4UzmrL3ykJE_7wcYqpKTDDNQkco,51
|
|
9
|
+
uprightkit-0.1.0.dist-info/top_level.txt,sha256=UYe3-PS_k8QktjAId_Dl7ISye-uu6lRX54IpzncufaM,11
|
|
10
|
+
uprightkit-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 [Your Name]
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
uprightkit
|