smoothify 0.2.3__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {smoothify-0.2.3 → smoothify-0.3.1}/.gitignore +3 -0
  2. {smoothify-0.2.3 → smoothify-0.3.1}/.vscode/settings.json +1 -0
  3. smoothify-0.3.1/CHANGELOG.md +67 -0
  4. {smoothify-0.2.3 → smoothify-0.3.1}/PKG-INFO +19 -10
  5. {smoothify-0.2.3 → smoothify-0.3.1}/README.md +18 -9
  6. smoothify-0.3.1/benchmarks/bench_water.py +93 -0
  7. smoothify-0.3.1/examples/Water_Smoothed.gpkg +0 -0
  8. smoothify-0.3.1/examples/merge_holes_examples.ipynb +388 -0
  9. smoothify-0.3.1/examples/real_world_water_example.ipynb +773 -0
  10. smoothify-0.3.1/examples/smoothify_vs_shapely_comparison.ipynb +683 -0
  11. smoothify-0.3.1/examples/usage_examples.ipynb +837 -0
  12. smoothify-0.3.1/fuzz/__init__.py +27 -0
  13. smoothify-0.3.1/fuzz/generators.py +193 -0
  14. smoothify-0.3.1/fuzz/oracle.py +93 -0
  15. smoothify-0.3.1/fuzz/runner.py +177 -0
  16. smoothify-0.3.1/images/generate_pipeline_graphic.py +168 -0
  17. smoothify-0.3.1/images/pipeline_steps.png +0 -0
  18. {smoothify-0.2.3 → smoothify-0.3.1}/pytest.ini +1 -0
  19. smoothify-0.3.1/scripts/fuzz_run.py +143 -0
  20. smoothify-0.3.1/scripts/fuzz_visualize.py +89 -0
  21. {smoothify-0.2.3 → smoothify-0.3.1}/smoothify/_version.py +3 -3
  22. {smoothify-0.2.3 → smoothify-0.3.1}/smoothify/coordinator.py +11 -0
  23. {smoothify-0.2.3 → smoothify-0.3.1}/smoothify/geometry_ops.py +158 -29
  24. {smoothify-0.2.3 → smoothify-0.3.1}/smoothify/smoothify_core.py +176 -36
  25. {smoothify-0.2.3 → smoothify-0.3.1}/smoothify.egg-info/PKG-INFO +19 -10
  26. {smoothify-0.2.3 → smoothify-0.3.1}/smoothify.egg-info/SOURCES.txt +19 -0
  27. smoothify-0.3.1/tests/test_congruence_dedup.py +68 -0
  28. smoothify-0.3.1/tests/test_convexity_artifacts.py +66 -0
  29. smoothify-0.3.1/tests/test_corner_rounding.py +75 -0
  30. smoothify-0.3.1/tests/test_data/convex_pixel_rectangle.gpkg +0 -0
  31. smoothify-0.3.1/tests/test_fuzz.py +54 -0
  32. smoothify-0.3.1/tests/test_merge_holes.py +230 -0
  33. smoothify-0.3.1/tests/test_self_intersecting_variant.py +86 -0
  34. smoothify-0.3.1/tests/test_synthetic_blob_fuzz.py +51 -0
  35. smoothify-0.3.1/tests/test_water_quality_sweep.py +103 -0
  36. smoothify-0.2.3/CHANGELOG.md +0 -40
  37. smoothify-0.2.3/examples/Water_Smoothed.gpkg +0 -0
  38. smoothify-0.2.3/examples/real_world_water_example.ipynb +0 -773
  39. smoothify-0.2.3/examples/smoothify_vs_shapely_comparison.ipynb +0 -683
  40. smoothify-0.2.3/examples/usage_examples.ipynb +0 -830
  41. {smoothify-0.2.3 → smoothify-0.3.1}/.github/workflows/ci.yml +0 -0
  42. {smoothify-0.2.3 → smoothify-0.3.1}/.github/workflows/publish.yml +0 -0
  43. {smoothify-0.2.3 → smoothify-0.3.1}/.pre-commit-config.yaml +0 -0
  44. {smoothify-0.2.3 → smoothify-0.3.1}/.python-version +0 -0
  45. {smoothify-0.2.3 → smoothify-0.3.1}/LICENSE +0 -0
  46. {smoothify-0.2.3 → smoothify-0.3.1}/RELEASING.md +0 -0
  47. {smoothify-0.2.3 → smoothify-0.3.1}/examples/Water.gpkg +0 -0
  48. {smoothify-0.2.3 → smoothify-0.3.1}/images/example_1_polygon.png +0 -0
  49. {smoothify-0.2.3 → smoothify-0.3.1}/images/example_2_linestring.png +0 -0
  50. {smoothify-0.2.3 → smoothify-0.3.1}/images/example_3_iterations.png +0 -0
  51. {smoothify-0.2.3 → smoothify-0.3.1}/images/example_4_merging.png +0 -0
  52. {smoothify-0.2.3 → smoothify-0.3.1}/images/generate_example_images.ipynb +0 -0
  53. {smoothify-0.2.3 → smoothify-0.3.1}/images/generate_readme_image.ipynb +0 -0
  54. {smoothify-0.2.3 → smoothify-0.3.1}/images/smoothify_hero.png +0 -0
  55. {smoothify-0.2.3 → smoothify-0.3.1}/images/smoothify_logo.png +0 -0
  56. {smoothify-0.2.3 → smoothify-0.3.1}/pyproject.toml +0 -0
  57. {smoothify-0.2.3 → smoothify-0.3.1}/setup.cfg +0 -0
  58. {smoothify-0.2.3 → smoothify-0.3.1}/smoothify/__init__.py +0 -0
  59. {smoothify-0.2.3 → smoothify-0.3.1}/smoothify/py.typed +0 -0
  60. {smoothify-0.2.3 → smoothify-0.3.1}/smoothify.egg-info/dependency_links.txt +0 -0
  61. {smoothify-0.2.3 → smoothify-0.3.1}/smoothify.egg-info/requires.txt +0 -0
  62. {smoothify-0.2.3 → smoothify-0.3.1}/smoothify.egg-info/top_level.txt +0 -0
  63. {smoothify-0.2.3 → smoothify-0.3.1}/tests/README.md +0 -0
  64. {smoothify-0.2.3 → smoothify-0.3.1}/tests/__init__.py +0 -0
  65. {smoothify-0.2.3 → smoothify-0.3.1}/tests/conftest.py +0 -0
  66. {smoothify-0.2.3 → smoothify-0.3.1}/tests/test_all_geometry_types.py +0 -0
  67. {smoothify-0.2.3 → smoothify-0.3.1}/tests/test_area_tolerance.py +0 -0
  68. {smoothify-0.2.3 → smoothify-0.3.1}/tests/test_auto_segment_length.py +0 -0
  69. {smoothify-0.2.3 → smoothify-0.3.1}/tests/test_chaikin.py +0 -0
  70. {smoothify-0.2.3 → smoothify-0.3.1}/tests/test_data/naip_owm_water_bodies.geojson +0 -0
  71. {smoothify-0.2.3 → smoothify-0.3.1}/tests/test_edge_cases_coverage.py +0 -0
  72. {smoothify-0.2.3 → smoothify-0.3.1}/tests/test_geometry_types.py +0 -0
  73. {smoothify-0.2.3 → smoothify-0.3.1}/tests/test_invalid_polygon.py +0 -0
  74. {smoothify-0.2.3 → smoothify-0.3.1}/tests/test_real_world_data.py +0 -0
  75. {smoothify-0.2.3 → smoothify-0.3.1}/tests/test_smoothify_api.py +0 -0
  76. {smoothify-0.2.3 → smoothify-0.3.1}/tests/test_smoothify_core.py +0 -0
  77. {smoothify-0.2.3 → smoothify-0.3.1}/tests/visual_tests.ipynb +0 -0
@@ -184,3 +184,6 @@ smoothify/_version.py
184
184
 
185
185
  # uv lockfile (not tracked — library, resolve fresh)
186
186
  uv.lock
187
+
188
+ # Benchmarks
189
+ benchmarks/baseline_output.gpkg
@@ -12,6 +12,7 @@
12
12
  "geodataframe",
13
13
  "geopandas",
14
14
  "gpkg",
15
+ "hausdorffs",
15
16
  "ipykernel",
16
17
  "joblib",
17
18
  "lightgreen",
@@ -0,0 +1,67 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [Unreleased]
6
+
7
+ ## [0.3.1] - 2026-06-15
8
+
9
+ ### Fixed
10
+ - Fixed an `AssertionError` crash (`Resulting geometry must be Polygon or LineString. Got <GeometryCollection>`) when smoothing certain valid thin/staircase polygons only a few `segment_length`s across. A rotated start-point variant could self-intersect after simplify + Chaikin where it rounds a neck about one `segment_length` wide; `make_valid()` then repaired that variant into a polygon *plus* a zero-area line filament at the pinch, and `unary_union` kept the 1-D debris, so the variant union became a `GeometryCollection` the rest of the pipeline could not handle. Zero-area debris is now stripped from each repaired variant before the union, so the result stays a clean, valid `Polygon`.
11
+ - Improved the sharp-concave-fold repair (introduced in 0.3.0) for thin features that shed significant area during smoothing. The closing (dilate-erode) seal now operates on the area-preserved result and restores area again afterwards, instead of sealing the area-deficient pre-buffer variant union — which the area-preservation buffer could then re-expand into the same cusp. This reduces residual folds on narrow arms.
12
+
13
+ ## [0.3.0] - 2026-06-12
14
+
15
+ ### Changed
16
+ - Single-core smoothing is ~3.5x faster on typical raster-derived data (benchmarked on `examples/Water.gpkg`: 6.8s → 1.9s, including the cost of the new fold-repair check below). Output differences are sub-pixel (bounded by the algorithm's own start-point noise floor); area preservation accuracy is unchanged or slightly better. The main changes:
17
+ - Removed the reversed-direction smoothing variants for polygons: Chaikin corner cutting is direction-invariant on closed rings, so they duplicated the forward variants bit-for-bit and only inflated the variant union (output unchanged).
18
+ - Holes are now subtracted in a single `difference` call against their union instead of one `intersection` + `difference` pair per hole (output unchanged).
19
+ - The tiny merge/dissolve buffer now uses mitre joins, which keep corners as single vertices instead of adding ~8 arc vertices per corner (boundary differences at the millimetre scale of the buffer itself).
20
+ - Pre-union Chaikin smoothing of the start-point variants is capped at 2 iterations; detail beyond that was erased by the post-union simplify anyway, while doubling the vertex count entering the expensive union. The final smoothing pass still runs the full `smooth_iterations`.
21
+ - The area-preservation root finder brackets the root from one side using its linear estimate, caches evaluations, and uses a step tolerance derived from the area tolerance via the perimeter, roughly halving the number of buffer operations.
22
+ - Congruent geometries (translated copies of the same shape, common in raster-derived data) are now smoothed once and the result translated to each occurrence; this also reduces work dispatched to parallel workers.
23
+
24
+ ### Added
25
+ - New `merge_holes` option (default `True`): holes that touch or nearly touch (e.g. diagonally adjacent raster cells) are joined before smoothing, so they smooth into one coherent opening instead of separate overlapping shapes leaving a fake land bridge. Pass `merge_holes=False` for the previous per-hole behaviour. Mirrors what `merge_multipolygons` does for shells.
26
+ - `examples/merge_holes_examples.ipynb`: worked examples of `merge_holes` and its interplay with `merge_collection` (touching holes, overlapping donuts, holes split across features).
27
+ - `benchmarks/bench_water.py`: single-core timing/profiling benchmark with baseline output comparison.
28
+
29
+ ### Fixed
30
+ - Fixed sharp concave folds in smoothed output on shapes with features about one `segment_length` wide (e.g. a hole with a one-pixel-wide arm, as produced by `merge_holes` joining a small hole to a larger one). The start-point smoothing variants can disagree about such features, leaving a forked slit in their union that the area-preservation shrink sharpens into a cusp. The final result is now checked for sharp concave turns and, when one is found, recomputed from the variant union sealed with a small closing (dilate-erode at `segment_length / 4`).
31
+ - Fixed sharp cusps where a smoothed hole crosses the independently smoothed exterior and gets clipped by the hole subtraction (previously up to a 180-degree fold at the tangential crossing points). When detected, the result is repaired with a small opening + closing (at `segment_length / 4`), which removes hair-thin material needles and seals thin slits without visibly moving the boundary.
32
+ - Fixed shapes with long straight edges being massively over-rounded (e.g. a large square with a small `segment_length` collapsed into a circle). The simplify steps strip all collinear vertices from straight edges, and Chaikin's corner cuts scale with segment length, so corner rounding grew with edge length instead of `segment_length`. Geometries are now re-segmentized after each simplify step, capping corner rounding at roughly `segment_length` while still smoothing raster staircase artifacts into curves. Applies to all geometry types. Note: outputs are somewhat denser (more vertices) and smoothing is ~20% slower on typical raster-derived data.
33
+
34
+ ## [0.2.3] - 2026-06-02
35
+
36
+ ### Added
37
+ - The package now ships inline type hints and a `py.typed` marker, so type checkers (mypy, Pyright/Pylance) pick up `smoothify()`'s signatures and overloads in downstream code.
38
+
39
+ ### Changed
40
+ - Invalid geometries (e.g. self-intersecting polygons) are now returned unchanged with a warning instead of crashing with a cryptic error or silently collapsing to an empty geometry. Behaviour is consistent across single geometries, lists/collections, and GeoDataFrames (and regardless of `merge_collection`). Repair them with shapely's `make_valid()` first if you want them smoothed.
41
+
42
+ ## [0.2.2] - 2026-03-24
43
+
44
+ ### Fixed
45
+ - Fixed LineString smoothing: self-intersecting lines are no longer run through the polygon-only `make_valid`/`unary_union` step, which could split them into a MultiLineString at crossing points
46
+ - Preserve coordinate dimensionality during Chaikin corner cutting instead of forcing 2D output, so smoothed geometries keep their original dimensions
47
+
48
+ ## [0.2.1] - 2026-02-25
49
+
50
+ ### Fixed
51
+ - Fixed `TopologyException` crash on thin/elongated polygons by validating smoothed variants before union
52
+ - Fixed crash when hole subtraction splits a polygon into a MultiPolygon (e.g. tiny holes relative to segment length)
53
+
54
+ ## [0.2.0] - 2026-02-25
55
+
56
+ ### Fixed
57
+ - `smooth_iterations=0` now returns the original input unchanged instead of running the geometry through segmentize/simplify pipeline without smoothing
58
+
59
+ ## [0.1.0] - 2025-11-25
60
+
61
+ ### Added
62
+ - Initial public release
63
+ - Core smoothing functionality using Chaikin's corner-cutting algorithm
64
+ - Support for all Shapely geometry types (Polygon, LineString, MultiPolygon, etc.)
65
+ - Automatic segment length detection
66
+ - Parallel processing support
67
+ - Area preservation for polygons
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: smoothify
3
- Version: 0.2.3
3
+ Version: 0.3.1
4
4
  Summary: Transform pixelated geometries from raster data into smooth natural looking features
5
5
  Author-email: Nick Wright <nicholas.wright@dpird.wa.gov.au>
6
6
  License-Expression: MIT
@@ -61,7 +61,7 @@ Polygons and lines derived from classified raster data (e.g., ML model predictio
61
61
  Smoothify applies an optimized implementation of Chaikin's corner-cutting algorithm along with other geometric processing to create smooth, natural-looking features while:
62
62
 
63
63
  - Preserving the general shape and area of polygons
64
- - Supporting all shapley geometry types
64
+ - Supporting all shapely geometry types
65
65
  - Handling shapes with interior holes
66
66
  - Efficiently processing large datasets with multiprocessing
67
67
 
@@ -118,6 +118,7 @@ Example notebooks:
118
118
  - [Usage examples](https://github.com/DPIRD-DMA/Smoothify/blob/main/examples/usage_examples.ipynb)
119
119
  - [Smoothify vs. Shapely comparison](https://github.com/DPIRD-DMA/Smoothify/blob/main/examples/smoothify_vs_shapely_comparison.ipynb)
120
120
  - [Real-world water example](https://github.com/DPIRD-DMA/Smoothify/blob/main/examples/real_world_water_example.ipynb)
121
+ - [Merging holes](https://github.com/DPIRD-DMA/Smoothify/blob/main/examples/merge_holes_examples.ipynb)
121
122
 
122
123
 
123
124
  ### Basic Polygon Smoothing
@@ -222,6 +223,7 @@ smoothed = smoothify(
222
223
  | `merge_collection` | bool | True | Whether to merge/dissolve adjacent geometries in collections before smoothing |
223
224
  | `merge_field` | str | None | **GeoDataFrame only**: Column name to use for dissolving geometries. Only valid when `merge_collection=True`. If None, dissolves all geometries together. If specified, dissolves geometries grouped by the column values |
224
225
  | `merge_multipolygons` | bool | True | Whether to merge adjacent polygons within MultiPolygons before smoothing |
226
+ | `merge_holes` | bool | True | Whether to join holes that touch or nearly touch (e.g. diagonally adjacent raster cells) before smoothing, so they smooth into one coherent opening instead of separate overlapping shapes |
225
227
  | `preserve_area` | bool | True | Whether to restore original area after smoothing via buffering (applies to Polygons only) |
226
228
  | `area_tolerance` | float | 0.01 | Percentage of original area allowed as error (e.g., 0.01 = 0.01% error = 99.99% preservation). Only affects Polygons when preserve_area=True |
227
229
 
@@ -229,14 +231,20 @@ smoothed = smoothify(
229
231
 
230
232
  Smoothify uses an advanced multi-step smoothing pipeline:
231
233
 
234
+ <p align="left">
235
+ <img src="https://raw.githubusercontent.com/DPIRD-DMA/Smoothify/main/images/pipeline_steps.png" alt="Smoothify pipeline steps" width="800">
236
+ </p>
237
+
232
238
 
233
- 1. Adds intermediate vertices along line segments (segmentize)
234
- 2. Generates multiple rotated variants (for Polygons) to avoid artifacts
235
- 3. Simplifies each variant to remove noise
236
- 4. Applies Chaikin corner cutting to smooth
237
- 5. Merges all variants via union to eliminate start-point artifacts
238
- 6. Applies final smoothing pass
239
- 7. Optionally restores original area via buffering (for Polygons)
239
+ 1. Joins touching holes (for Polygons, when `merge_holes=True`) so they smooth as one opening
240
+ 2. Adds intermediate vertices along line segments (segmentize)
241
+ 3. Generates multiple rotated variants (for Polygons) to avoid artifacts
242
+ 4. Simplifies each variant to remove noise
243
+ 5. Applies Chaikin corner cutting to smooth
244
+ 6. Merges all variants via union to eliminate start-point artifacts
245
+ 7. Applies final smoothing pass
246
+ 8. Optionally restores original area via buffering (for Polygons)
247
+ 9. Detects and repairs any sharp folds left by features near the smoothing scale (e.g. one-pixel-wide arms), using a small morphological opening/closing bounded at `segment_length / 4`
240
248
 
241
249
  ## Invalid Geometries
242
250
 
@@ -257,9 +265,10 @@ smoothed = smoothify(make_valid(polygon), segment_length=1.0)
257
265
  ## Performance Considerations
258
266
 
259
267
  - **Parallel Processing**: For large GeoDataFrames or collections, use `num_cores` = 0 to enable parallel processing
268
+ - **Duplicate Shapes**: Geometries that are translated copies of the same shape (common in raster-derived data, e.g. single-pixel polygons) are automatically smoothed once and the result reused
260
269
  - **Smoothing Iterations**: Values of 3-5 typically provide good results. Higher values create smoother output but increase processing time and vertex count
261
270
  - **Memory Usage**: Scales with geometry complexity. The algorithm creates multiple variants during smoothing
262
- - **Optimal segment_length**: Should match the original raster cell size (pixel size) or be slightly larger for best results
271
+ - **Optimal segment_length**: Anything from about half the original raster pixel size and up should produce reasonable output — larger values produce more rounded output, smaller values stay more faithful to the original geometry
263
272
 
264
273
  ## Running the Tests
265
274
 
@@ -28,7 +28,7 @@ Polygons and lines derived from classified raster data (e.g., ML model predictio
28
28
  Smoothify applies an optimized implementation of Chaikin's corner-cutting algorithm along with other geometric processing to create smooth, natural-looking features while:
29
29
 
30
30
  - Preserving the general shape and area of polygons
31
- - Supporting all shapley geometry types
31
+ - Supporting all shapely geometry types
32
32
  - Handling shapes with interior holes
33
33
  - Efficiently processing large datasets with multiprocessing
34
34
 
@@ -85,6 +85,7 @@ Example notebooks:
85
85
  - [Usage examples](https://github.com/DPIRD-DMA/Smoothify/blob/main/examples/usage_examples.ipynb)
86
86
  - [Smoothify vs. Shapely comparison](https://github.com/DPIRD-DMA/Smoothify/blob/main/examples/smoothify_vs_shapely_comparison.ipynb)
87
87
  - [Real-world water example](https://github.com/DPIRD-DMA/Smoothify/blob/main/examples/real_world_water_example.ipynb)
88
+ - [Merging holes](https://github.com/DPIRD-DMA/Smoothify/blob/main/examples/merge_holes_examples.ipynb)
88
89
 
89
90
 
90
91
  ### Basic Polygon Smoothing
@@ -189,6 +190,7 @@ smoothed = smoothify(
189
190
  | `merge_collection` | bool | True | Whether to merge/dissolve adjacent geometries in collections before smoothing |
190
191
  | `merge_field` | str | None | **GeoDataFrame only**: Column name to use for dissolving geometries. Only valid when `merge_collection=True`. If None, dissolves all geometries together. If specified, dissolves geometries grouped by the column values |
191
192
  | `merge_multipolygons` | bool | True | Whether to merge adjacent polygons within MultiPolygons before smoothing |
193
+ | `merge_holes` | bool | True | Whether to join holes that touch or nearly touch (e.g. diagonally adjacent raster cells) before smoothing, so they smooth into one coherent opening instead of separate overlapping shapes |
192
194
  | `preserve_area` | bool | True | Whether to restore original area after smoothing via buffering (applies to Polygons only) |
193
195
  | `area_tolerance` | float | 0.01 | Percentage of original area allowed as error (e.g., 0.01 = 0.01% error = 99.99% preservation). Only affects Polygons when preserve_area=True |
194
196
 
@@ -196,14 +198,20 @@ smoothed = smoothify(
196
198
 
197
199
  Smoothify uses an advanced multi-step smoothing pipeline:
198
200
 
201
+ <p align="left">
202
+ <img src="https://raw.githubusercontent.com/DPIRD-DMA/Smoothify/main/images/pipeline_steps.png" alt="Smoothify pipeline steps" width="800">
203
+ </p>
204
+
199
205
 
200
- 1. Adds intermediate vertices along line segments (segmentize)
201
- 2. Generates multiple rotated variants (for Polygons) to avoid artifacts
202
- 3. Simplifies each variant to remove noise
203
- 4. Applies Chaikin corner cutting to smooth
204
- 5. Merges all variants via union to eliminate start-point artifacts
205
- 6. Applies final smoothing pass
206
- 7. Optionally restores original area via buffering (for Polygons)
206
+ 1. Joins touching holes (for Polygons, when `merge_holes=True`) so they smooth as one opening
207
+ 2. Adds intermediate vertices along line segments (segmentize)
208
+ 3. Generates multiple rotated variants (for Polygons) to avoid artifacts
209
+ 4. Simplifies each variant to remove noise
210
+ 5. Applies Chaikin corner cutting to smooth
211
+ 6. Merges all variants via union to eliminate start-point artifacts
212
+ 7. Applies final smoothing pass
213
+ 8. Optionally restores original area via buffering (for Polygons)
214
+ 9. Detects and repairs any sharp folds left by features near the smoothing scale (e.g. one-pixel-wide arms), using a small morphological opening/closing bounded at `segment_length / 4`
207
215
 
208
216
  ## Invalid Geometries
209
217
 
@@ -224,9 +232,10 @@ smoothed = smoothify(make_valid(polygon), segment_length=1.0)
224
232
  ## Performance Considerations
225
233
 
226
234
  - **Parallel Processing**: For large GeoDataFrames or collections, use `num_cores` = 0 to enable parallel processing
235
+ - **Duplicate Shapes**: Geometries that are translated copies of the same shape (common in raster-derived data, e.g. single-pixel polygons) are automatically smoothed once and the result reused
227
236
  - **Smoothing Iterations**: Values of 3-5 typically provide good results. Higher values create smoother output but increase processing time and vertex count
228
237
  - **Memory Usage**: Scales with geometry complexity. The algorithm creates multiple variants during smoothing
229
- - **Optimal segment_length**: Should match the original raster cell size (pixel size) or be slightly larger for best results
238
+ - **Optimal segment_length**: Anything from about half the original raster pixel size and up should produce reasonable output — larger values produce more rounded output, smaller values stay more faithful to the original geometry
230
239
 
231
240
  ## Running the Tests
232
241
 
@@ -0,0 +1,93 @@
1
+ """Benchmark smoothify on examples/Water.gpkg (single core).
2
+
3
+ Usage:
4
+ python benchmarks/bench_water.py # time it
5
+ python benchmarks/bench_water.py --profile # cProfile, print top hotspots
6
+ python benchmarks/bench_water.py --save-baseline # save output for comparison
7
+ python benchmarks/bench_water.py --compare # compare output to baseline
8
+ """
9
+
10
+ import argparse
11
+ import cProfile
12
+ import pstats
13
+ import time
14
+ from pathlib import Path
15
+
16
+ import geopandas as gpd
17
+ import numpy as np
18
+ import shapely
19
+
20
+ from smoothify import smoothify
21
+
22
+ ROOT = Path(__file__).resolve().parent.parent
23
+ DATA = ROOT / "examples" / "Water.gpkg"
24
+ BASELINE = ROOT / "benchmarks" / "baseline_output.gpkg"
25
+
26
+
27
+ def run(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
28
+ return smoothify(gdf, num_cores=1)
29
+
30
+
31
+ def quality_report(result: gpd.GeoDataFrame, baseline: gpd.GeoDataFrame) -> None:
32
+ """Compare result to baseline: per-feature symmetric difference / hausdorff."""
33
+ if len(result) != len(baseline):
34
+ print(f"FEATURE COUNT DIFFERS: {len(result)} vs baseline {len(baseline)}")
35
+ return
36
+ sym_ratios = []
37
+ hausdorffs = []
38
+ for g_new, g_old in zip(result.geometry, baseline.geometry, strict=True):
39
+ if g_old.is_empty and g_new.is_empty:
40
+ continue
41
+ denom = g_old.area if g_old.area > 0 else 1.0
42
+ sym_ratios.append(g_new.symmetric_difference(g_old).area / denom)
43
+ hausdorffs.append(g_new.hausdorff_distance(g_old))
44
+ sym = np.array(sym_ratios)
45
+ hd = np.array(hausdorffs)
46
+ print(f"identical features: {(sym == 0).sum()}/{len(sym)}")
47
+ print(f"sym-diff area ratio mean: {sym.mean():.2e} max: {sym.max():.2e}")
48
+ print(f"hausdorff dist (m) mean: {hd.mean():.3f} max: {hd.max():.3f}")
49
+
50
+
51
+ def main() -> None:
52
+ parser = argparse.ArgumentParser()
53
+ parser.add_argument("--profile", action="store_true")
54
+ parser.add_argument("--save-baseline", action="store_true")
55
+ parser.add_argument("--compare", action="store_true")
56
+ parser.add_argument("--repeat", type=int, default=1)
57
+ parser.add_argument("--limit", type=int, default=0, help="only first N features")
58
+ args = parser.parse_args()
59
+
60
+ gdf = gpd.read_file(DATA)
61
+ if args.limit:
62
+ gdf = gdf.head(args.limit).copy()
63
+ nverts = shapely.get_num_coordinates(gdf.geometry.values).sum()
64
+ print(f"{len(gdf)} features, {nverts} vertices")
65
+
66
+ if args.profile:
67
+ profiler = cProfile.Profile()
68
+ profiler.enable()
69
+ result = run(gdf)
70
+ profiler.disable()
71
+ stats = pstats.Stats(profiler)
72
+ stats.sort_stats("cumulative").print_stats(30)
73
+ stats.sort_stats("tottime").print_stats(30)
74
+ else:
75
+ times = []
76
+ result = None
77
+ for _ in range(args.repeat):
78
+ t0 = time.perf_counter()
79
+ result = run(gdf)
80
+ times.append(time.perf_counter() - t0)
81
+ print(f"time: best {min(times):.2f}s " + " ".join(f"{t:.2f}" for t in times))
82
+
83
+ assert result is not None
84
+ if args.save_baseline:
85
+ result.to_file(BASELINE, driver="GPKG")
86
+ print(f"baseline saved to {BASELINE}")
87
+ elif args.compare:
88
+ baseline = gpd.read_file(BASELINE)
89
+ quality_report(result, baseline)
90
+
91
+
92
+ if __name__ == "__main__":
93
+ main()