tiny-osm 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tiny_osm/__init__.py +24 -0
- tiny_osm/_geojson.py +325 -0
- tiny_osm/_logging.py +160 -0
- tiny_osm/exceptions.py +12 -0
- tiny_osm/osm_fetch.py +347 -0
- tiny_osm-0.1.0.dist-info/METADATA +152 -0
- tiny_osm-0.1.0.dist-info/RECORD +10 -0
- tiny_osm-0.1.0.dist-info/WHEEL +4 -0
- tiny_osm-0.1.0.dist-info/licenses/AUTHORS.md +3 -0
- tiny_osm-0.1.0.dist-info/licenses/LICENSE +21 -0
tiny_osm/__init__.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""TinyOSM: Fetch OpenStreetMap data for a bounding box."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
6
|
+
|
|
7
|
+
from tiny_osm import exceptions
|
|
8
|
+
from tiny_osm._logging import configure_logger
|
|
9
|
+
from tiny_osm.exceptions import OverpassError
|
|
10
|
+
from tiny_osm.osm_fetch import OSMFilters, fetch
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
__version__ = version("tiny_osm")
|
|
14
|
+
except PackageNotFoundError:
|
|
15
|
+
__version__ = "999"
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"OSMFilters",
|
|
19
|
+
"OverpassError",
|
|
20
|
+
"__version__",
|
|
21
|
+
"configure_logger",
|
|
22
|
+
"exceptions",
|
|
23
|
+
"fetch",
|
|
24
|
+
]
|
tiny_osm/_geojson.py
ADDED
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
"""Convert Overpass API responses into GeoJSON Features.
|
|
2
|
+
|
|
3
|
+
The Overpass queries built by :mod:`tiny_osm.osm_fetch` use ``out geom;``,
|
|
4
|
+
so every way and every relation member already carries its lat/lon
|
|
5
|
+
coordinates inline - no secondary node-ID lookup is needed.
|
|
6
|
+
|
|
7
|
+
This module turns that raw response stream into a valid GeoJSON
|
|
8
|
+
``FeatureCollection`` dict that can be fed directly to
|
|
9
|
+
``geopandas.GeoDataFrame.from_features`` without any post-processing.
|
|
10
|
+
Responses from multiple tile/layer queries are deduplicated by
|
|
11
|
+
``(type, id)`` in a single pass, so callers can pass the full list of
|
|
12
|
+
per-query responses straight through.
|
|
13
|
+
|
|
14
|
+
Geometry rules
|
|
15
|
+
--------------
|
|
16
|
+
* ``node`` elements become ``Point`` features **only if they carry tags**.
|
|
17
|
+
Untagged nodes that appear in Overpass responses are orphan geometry
|
|
18
|
+
primitives and are silently dropped.
|
|
19
|
+
* ``way`` elements become ``LineString`` features, unless the way is
|
|
20
|
+
closed and its tags mark it as an area (see :func:`_is_area`) - in
|
|
21
|
+
which case it becomes a ``Polygon``.
|
|
22
|
+
* ``relation`` elements become ``Polygon`` or ``MultiPolygon`` features
|
|
23
|
+
**only when ``type=multipolygon``**. Other relation types (routes,
|
|
24
|
+
boundaries, turn restrictions, …) are silently dropped: they don't fit
|
|
25
|
+
the highway/waterway/water-body data model tiny-osm targets.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
from tiny_osm._logging import logger
|
|
33
|
+
|
|
34
|
+
__all__ = ["elements_to_features"]
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
# Tag heuristics
|
|
38
|
+
# ---------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
# Keys whose presence on a closed way marks it as an area (polygon).
|
|
41
|
+
# Derived from the OSM wiki Polygon Features table, restricted to the tags
|
|
42
|
+
# tiny-osm can plausibly return. Tags not in this set default to "line",
|
|
43
|
+
# so a closed road without ``area=yes`` stays a LineString.
|
|
44
|
+
_AREA_KEYS: frozenset[str] = frozenset(
|
|
45
|
+
{
|
|
46
|
+
"amenity",
|
|
47
|
+
"basin",
|
|
48
|
+
"boundary",
|
|
49
|
+
"building",
|
|
50
|
+
"landuse",
|
|
51
|
+
"leisure",
|
|
52
|
+
"natural",
|
|
53
|
+
"place",
|
|
54
|
+
"water",
|
|
55
|
+
"waterway", # e.g. waterway=riverbank, waterway=dock - closed = polygon
|
|
56
|
+
}
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# ``natural`` values that stay as linestrings even when closed.
|
|
60
|
+
_NATURAL_LINE_VALUES: frozenset[str] = frozenset({"coastline", "cliff", "ridge", "tree_row"})
|
|
61
|
+
|
|
62
|
+
# ``waterway`` values that stay as linestrings even when closed.
|
|
63
|
+
_WATERWAY_LINE_VALUES: frozenset[str] = frozenset(
|
|
64
|
+
{"river", "stream", "canal", "drain", "ditch", "brook", "tidal_channel"}
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _is_area(tags: dict[str, str]) -> bool:
|
|
69
|
+
"""Return ``True`` if a closed way's tags imply a polygon geometry.
|
|
70
|
+
|
|
71
|
+
Implements the Polygon Features rule tree:
|
|
72
|
+
|
|
73
|
+
1. ``area=yes`` → always polygon
|
|
74
|
+
2. ``area=no`` → always line
|
|
75
|
+
3. ``natural=coastline|cliff|ridge|tree_row`` → line
|
|
76
|
+
4. ``waterway=river|stream|canal|...`` → line (even when closed)
|
|
77
|
+
5. Any area-implying key present → polygon
|
|
78
|
+
6. Otherwise → line
|
|
79
|
+
"""
|
|
80
|
+
area = tags.get("area")
|
|
81
|
+
if area == "yes":
|
|
82
|
+
return True
|
|
83
|
+
if area == "no":
|
|
84
|
+
return False
|
|
85
|
+
if tags.get("natural") in _NATURAL_LINE_VALUES:
|
|
86
|
+
return False
|
|
87
|
+
if tags.get("waterway") in _WATERWAY_LINE_VALUES:
|
|
88
|
+
return False
|
|
89
|
+
return any(k in tags for k in _AREA_KEYS)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# ---------------------------------------------------------------------------
|
|
93
|
+
# Coordinate helpers
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _coords_from_geometry(geom: list[dict[str, float]]) -> list[list[float]]:
|
|
98
|
+
"""Convert Overpass ``[{lat, lon}, ...]`` to GeoJSON ``[[lon, lat], ...]``."""
|
|
99
|
+
return [[p["lon"], p["lat"]] for p in geom]
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _is_closed(coords: list[list[float]]) -> bool:
|
|
103
|
+
"""A ring needs at least 4 points (3 unique + closure) and equal endpoints."""
|
|
104
|
+
return len(coords) >= 4 and coords[0] == coords[-1]
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _make_feature(
|
|
108
|
+
osm_type: str,
|
|
109
|
+
osm_id: int,
|
|
110
|
+
geometry: dict[str, Any],
|
|
111
|
+
tags: dict[str, str],
|
|
112
|
+
) -> dict[str, Any]:
|
|
113
|
+
"""Assemble a GeoJSON Feature.
|
|
114
|
+
|
|
115
|
+
The ``id`` is a string of the form ``"way/12345"`` so that identifiers
|
|
116
|
+
remain unique across node/way/relation spaces. Tags go directly into
|
|
117
|
+
``properties``; an ``osm_type`` key is added so downstream code can
|
|
118
|
+
tell node-Points from way-LineStrings even after flattening into a
|
|
119
|
+
GeoDataFrame.
|
|
120
|
+
"""
|
|
121
|
+
return {
|
|
122
|
+
"type": "Feature",
|
|
123
|
+
"id": f"{osm_type}/{osm_id}",
|
|
124
|
+
"geometry": geometry,
|
|
125
|
+
"properties": {"osm_type": osm_type, "osm_id": osm_id, **tags},
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# ---------------------------------------------------------------------------
|
|
130
|
+
# Element converters
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _node_to_feature(node: dict[str, Any]) -> dict[str, Any] | None:
|
|
135
|
+
tags = node.get("tags")
|
|
136
|
+
if not tags:
|
|
137
|
+
return None # orphan member node - drop
|
|
138
|
+
return _make_feature(
|
|
139
|
+
"node",
|
|
140
|
+
node["id"],
|
|
141
|
+
{"type": "Point", "coordinates": [node["lon"], node["lat"]]},
|
|
142
|
+
tags,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _way_to_feature(way: dict[str, Any]) -> dict[str, Any] | None:
|
|
147
|
+
tags = way.get("tags") or {}
|
|
148
|
+
geometry = way.get("geometry")
|
|
149
|
+
if not geometry:
|
|
150
|
+
return None # way outside queried bbox or otherwise empty
|
|
151
|
+
|
|
152
|
+
coords = _coords_from_geometry(geometry)
|
|
153
|
+
if len(coords) < 2:
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
if _is_closed(coords) and _is_area(tags):
|
|
157
|
+
return _make_feature(
|
|
158
|
+
"way",
|
|
159
|
+
way["id"],
|
|
160
|
+
{"type": "Polygon", "coordinates": [coords]},
|
|
161
|
+
tags,
|
|
162
|
+
)
|
|
163
|
+
return _make_feature(
|
|
164
|
+
"way",
|
|
165
|
+
way["id"],
|
|
166
|
+
{"type": "LineString", "coordinates": coords},
|
|
167
|
+
tags,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
# ---------------------------------------------------------------------------
|
|
172
|
+
# Multipolygon ring assembly
|
|
173
|
+
# ---------------------------------------------------------------------------
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _assemble_rings(ways: list[list[list[float]]]) -> list[list[list[float]]]: # noqa: C901
|
|
177
|
+
"""Stitch a list of way coordinate sequences into closed rings.
|
|
178
|
+
|
|
179
|
+
Ways that arrive already closed pass through unchanged. Open ways are
|
|
180
|
+
joined pairwise by shared endpoints - each iteration looks for a
|
|
181
|
+
remaining way whose first or last point matches the current chain's
|
|
182
|
+
first or last point, and extends (reversing if needed). Fragments
|
|
183
|
+
that cannot be closed are logged and dropped.
|
|
184
|
+
"""
|
|
185
|
+
closed: list[list[list[float]]] = []
|
|
186
|
+
remaining: list[list[list[float]]] = []
|
|
187
|
+
for way in ways:
|
|
188
|
+
if len(way) < 2:
|
|
189
|
+
continue
|
|
190
|
+
(closed if _is_closed(way) else remaining).append(way)
|
|
191
|
+
|
|
192
|
+
while remaining:
|
|
193
|
+
chain = list(remaining.pop(0))
|
|
194
|
+
extended = True
|
|
195
|
+
while extended and not _is_closed(chain):
|
|
196
|
+
extended = False
|
|
197
|
+
for i, way in enumerate(remaining):
|
|
198
|
+
if way[0] == chain[-1]:
|
|
199
|
+
chain.extend(way[1:])
|
|
200
|
+
elif way[-1] == chain[-1]:
|
|
201
|
+
chain.extend(reversed(way[:-1]))
|
|
202
|
+
elif way[-1] == chain[0]:
|
|
203
|
+
chain = list(way) + chain[1:]
|
|
204
|
+
elif way[0] == chain[0]:
|
|
205
|
+
chain = list(reversed(way)) + chain[1:]
|
|
206
|
+
else:
|
|
207
|
+
continue
|
|
208
|
+
remaining.pop(i)
|
|
209
|
+
extended = True
|
|
210
|
+
break
|
|
211
|
+
if _is_closed(chain):
|
|
212
|
+
closed.append(chain)
|
|
213
|
+
else:
|
|
214
|
+
logger.debug(f"multipolygon fragment with {len(chain)} points could not close")
|
|
215
|
+
return closed
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _ring_contains_point(ring: list[list[float]], point: list[float]) -> bool:
|
|
219
|
+
"""Ray-casting point-in-ring test. Input ring is assumed closed."""
|
|
220
|
+
x, y = point
|
|
221
|
+
inside = False
|
|
222
|
+
n = len(ring)
|
|
223
|
+
j = n - 1
|
|
224
|
+
for i in range(n):
|
|
225
|
+
xi, yi = ring[i]
|
|
226
|
+
xj, yj = ring[j]
|
|
227
|
+
if ((yi > y) != (yj > y)) and (x < (xj - xi) * (y - yi) / (yj - yi) + xi):
|
|
228
|
+
inside = not inside
|
|
229
|
+
j = i
|
|
230
|
+
return inside
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _relation_to_feature(relation: dict[str, Any]) -> dict[str, Any] | None: # noqa: C901
|
|
234
|
+
tags = relation.get("tags") or {}
|
|
235
|
+
if tags.get("type") != "multipolygon":
|
|
236
|
+
return None # only multipolygon relations produce geometries here
|
|
237
|
+
|
|
238
|
+
outer_ways: list[list[list[float]]] = []
|
|
239
|
+
inner_ways: list[list[list[float]]] = []
|
|
240
|
+
for member in relation.get("members", []):
|
|
241
|
+
if member.get("type") != "way":
|
|
242
|
+
continue
|
|
243
|
+
member_geom = member.get("geometry")
|
|
244
|
+
if not member_geom:
|
|
245
|
+
continue
|
|
246
|
+
coords = _coords_from_geometry(member_geom)
|
|
247
|
+
if len(coords) < 2:
|
|
248
|
+
continue
|
|
249
|
+
# Members without an explicit role default to "outer" - matches the
|
|
250
|
+
# osmnx + JOSM convention for legacy relations.
|
|
251
|
+
if member.get("role") == "inner":
|
|
252
|
+
inner_ways.append(coords)
|
|
253
|
+
else:
|
|
254
|
+
outer_ways.append(coords)
|
|
255
|
+
|
|
256
|
+
outer_rings = _assemble_rings(outer_ways)
|
|
257
|
+
if not outer_rings:
|
|
258
|
+
return None
|
|
259
|
+
inner_rings = _assemble_rings(inner_ways)
|
|
260
|
+
|
|
261
|
+
# Each polygon starts as [outer]; we append its contained inner rings.
|
|
262
|
+
polygons: list[list[list[list[float]]]] = [[ring] for ring in outer_rings]
|
|
263
|
+
for inner in inner_rings:
|
|
264
|
+
for idx, outer in enumerate(outer_rings):
|
|
265
|
+
# A point on the inner ring is either inside its outer shell or
|
|
266
|
+
# inside no shell at all; testing the first vertex is sufficient.
|
|
267
|
+
if _ring_contains_point(outer, inner[0]):
|
|
268
|
+
polygons[idx].append(inner)
|
|
269
|
+
break
|
|
270
|
+
|
|
271
|
+
if len(polygons) == 1:
|
|
272
|
+
return _make_feature(
|
|
273
|
+
"relation",
|
|
274
|
+
relation["id"],
|
|
275
|
+
{"type": "Polygon", "coordinates": polygons[0]},
|
|
276
|
+
tags,
|
|
277
|
+
)
|
|
278
|
+
return _make_feature(
|
|
279
|
+
"relation",
|
|
280
|
+
relation["id"],
|
|
281
|
+
{"type": "MultiPolygon", "coordinates": polygons},
|
|
282
|
+
tags,
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
# ---------------------------------------------------------------------------
|
|
287
|
+
# Public entry point
|
|
288
|
+
# ---------------------------------------------------------------------------
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _element_to_feature(element: dict[str, Any]) -> dict[str, Any] | None:
|
|
292
|
+
"""Dispatch a single OSM element to its type-specific converter."""
|
|
293
|
+
el_type = element.get("type")
|
|
294
|
+
if el_type == "node":
|
|
295
|
+
return _node_to_feature(element)
|
|
296
|
+
if el_type == "way":
|
|
297
|
+
return _way_to_feature(element)
|
|
298
|
+
if el_type == "relation":
|
|
299
|
+
return _relation_to_feature(element)
|
|
300
|
+
return None
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def elements_to_features(responses: list[dict[str, Any]]) -> dict[str, Any]:
|
|
304
|
+
"""Convert Overpass responses into a GeoJSON FeatureCollection dict.
|
|
305
|
+
|
|
306
|
+
Elements are deduplicated by ``(type, id)`` across all responses - so
|
|
307
|
+
the same way appearing in two tile responses is emitted once. Any
|
|
308
|
+
``remark`` field present in a response is surfaced via the logger.
|
|
309
|
+
Orphan member nodes (nodes without tags) and relations other than
|
|
310
|
+
``type=multipolygon`` are silently dropped. The returned dict is a
|
|
311
|
+
valid GeoJSON FeatureCollection:
|
|
312
|
+
|
|
313
|
+
>>> fc = elements_to_features(responses)
|
|
314
|
+
>>> import geopandas as gpd
|
|
315
|
+
>>> gdf = gpd.GeoDataFrame.from_features(fc["features"], crs="EPSG:4326")
|
|
316
|
+
"""
|
|
317
|
+
seen: dict[tuple[str, int], dict[str, Any]] = {}
|
|
318
|
+
for resp in responses:
|
|
319
|
+
if remark := resp.get("remark"):
|
|
320
|
+
logger.warning(f"Overpass remarked: {remark!r}")
|
|
321
|
+
for el in resp.get("elements", []):
|
|
322
|
+
seen.setdefault((el["type"], el["id"]), el)
|
|
323
|
+
|
|
324
|
+
features = [f for el in seen.values() if (f := _element_to_feature(el)) is not None]
|
|
325
|
+
return {"type": "FeatureCollection", "features": features}
|
tiny_osm/_logging.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Logging utilities for tiny_osm.
|
|
2
|
+
|
|
3
|
+
Example:
|
|
4
|
+
-------
|
|
5
|
+
>>> from tiny_osm import configure_logger, logger
|
|
6
|
+
>>> configure_logger(verbose=True) # console shows DEBUG+
|
|
7
|
+
>>> configure_logger(file="run.log") # also log to file
|
|
8
|
+
>>> logger.info("Starting pipeline")
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import sys
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import IO, Literal
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"configure_logger",
|
|
21
|
+
"generate_log_path",
|
|
22
|
+
"get_log_file_path",
|
|
23
|
+
"logger",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger("tiny_osm")
|
|
27
|
+
logger.setLevel(logging.DEBUG)
|
|
28
|
+
logger.propagate = False
|
|
29
|
+
|
|
30
|
+
_file_handler: logging.FileHandler | None = None
|
|
31
|
+
_console_handler: logging.StreamHandler[IO[str]] | None = None
|
|
32
|
+
|
|
33
|
+
_handlers = logger.handlers
|
|
34
|
+
if not _handlers:
|
|
35
|
+
_sh: logging.StreamHandler[IO[str]] = logging.StreamHandler(sys.stderr)
|
|
36
|
+
_sh.setFormatter(logging.Formatter("%(levelname)-8s %(message)s"))
|
|
37
|
+
_sh.setLevel(logging.WARNING)
|
|
38
|
+
logger.addHandler(_sh)
|
|
39
|
+
_console_handler = _sh
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def generate_log_path(work_dir: Path, prefix: str = "tiny_osm") -> Path:
|
|
43
|
+
"""Generate a timestamped log file path.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
work_dir : Path
|
|
48
|
+
Directory where the log file will be created.
|
|
49
|
+
prefix : str, optional
|
|
50
|
+
Prefix for the log file name. Defaults to ``"tiny_osm"``.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
Path
|
|
55
|
+
Path to the log file (e.g., ``<work_dir>/tiny_osm-20260206-140112.log``).
|
|
56
|
+
"""
|
|
57
|
+
timestamp = datetime.now().astimezone().strftime("%Y%m%d-%H%M%S")
|
|
58
|
+
return work_dir / f"{prefix}-{timestamp}.log"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_log_file_path() -> Path | None:
|
|
62
|
+
"""Get the current log file path if file logging is enabled."""
|
|
63
|
+
if _file_handler is not None:
|
|
64
|
+
return Path(_file_handler.baseFilename)
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _validate_level(level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] | int) -> int:
|
|
69
|
+
if isinstance(level, str):
|
|
70
|
+
level_upper = level.upper()
|
|
71
|
+
if level_upper not in ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"):
|
|
72
|
+
msg = f"Invalid log level: {level!r}. Must be DEBUG, INFO, WARNING, ERROR, or CRITICAL."
|
|
73
|
+
raise ValueError(msg)
|
|
74
|
+
return getattr(logging, level_upper)
|
|
75
|
+
|
|
76
|
+
if level not in (
|
|
77
|
+
logging.DEBUG,
|
|
78
|
+
logging.INFO,
|
|
79
|
+
logging.WARNING,
|
|
80
|
+
logging.ERROR,
|
|
81
|
+
logging.CRITICAL,
|
|
82
|
+
):
|
|
83
|
+
msg = f"Invalid log level: {level!r}. Must be a valid logging level constant."
|
|
84
|
+
raise ValueError(msg)
|
|
85
|
+
return level
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def configure_logger( # noqa: C901
|
|
89
|
+
*,
|
|
90
|
+
verbose: bool | None = None,
|
|
91
|
+
level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] | int | None = None,
|
|
92
|
+
file: str | Path | None = None,
|
|
93
|
+
file_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] | int | None = None,
|
|
94
|
+
file_mode: Literal["a", "w"] = "a",
|
|
95
|
+
file_only: bool = False,
|
|
96
|
+
) -> None:
|
|
97
|
+
"""Configure logging settings.
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
verbose : bool, optional
|
|
102
|
+
Shortcut: ``True`` sets console to DEBUG, ``False`` to WARNING.
|
|
103
|
+
If both ``level`` and ``verbose`` are given, ``level`` wins.
|
|
104
|
+
level : str or int, optional
|
|
105
|
+
Console logging level (``"DEBUG"``, ``"INFO"``, ``"WARNING"``, etc.).
|
|
106
|
+
file : str or Path, optional
|
|
107
|
+
Enable file logging at this path. Pass ``None`` to disable file logging.
|
|
108
|
+
file_level : str or int, optional
|
|
109
|
+
File handler level. Defaults to ``DEBUG``.
|
|
110
|
+
file_mode : {'a', 'w'}, optional
|
|
111
|
+
Append or overwrite the log file. Defaults to ``'a'``.
|
|
112
|
+
file_only : bool, optional
|
|
113
|
+
If ``True``, disable console logging. Requires ``file`` to be set.
|
|
114
|
+
"""
|
|
115
|
+
global _file_handler # noqa: PLW0603
|
|
116
|
+
|
|
117
|
+
if level is not None:
|
|
118
|
+
level_int = _validate_level(level)
|
|
119
|
+
if _console_handler is not None:
|
|
120
|
+
_console_handler.setLevel(level_int)
|
|
121
|
+
elif verbose is not None:
|
|
122
|
+
console_level = logging.DEBUG if verbose else logging.WARNING
|
|
123
|
+
if _console_handler is not None:
|
|
124
|
+
_console_handler.setLevel(console_level)
|
|
125
|
+
|
|
126
|
+
if file is not None:
|
|
127
|
+
if _file_handler is not None:
|
|
128
|
+
logger.removeHandler(_file_handler)
|
|
129
|
+
_file_handler.close()
|
|
130
|
+
_file_handler = None
|
|
131
|
+
|
|
132
|
+
file_level_int = _validate_level(file_level) if file_level is not None else logging.DEBUG
|
|
133
|
+
|
|
134
|
+
filepath = Path(file)
|
|
135
|
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
136
|
+
|
|
137
|
+
if file_mode not in ("a", "w"):
|
|
138
|
+
msg = f"Invalid file_mode: {file_mode!r}. Must be 'a' or 'w'."
|
|
139
|
+
raise ValueError(msg)
|
|
140
|
+
|
|
141
|
+
_file_handler = logging.FileHandler(filepath, mode=file_mode)
|
|
142
|
+
_file_handler.setLevel(file_level_int)
|
|
143
|
+
_file_handler.setFormatter(
|
|
144
|
+
logging.Formatter(
|
|
145
|
+
fmt="[%(asctime)s] %(levelname)-8s %(message)s",
|
|
146
|
+
datefmt="%Y/%m/%d %H:%M:%S",
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
logger.addHandler(_file_handler)
|
|
150
|
+
|
|
151
|
+
if _file_handler.stream is not None: # pyright: ignore[reportUnnecessaryComparison]
|
|
152
|
+
_file_handler.stream.reconfigure(line_buffering=True)
|
|
153
|
+
|
|
154
|
+
if file_only and _console_handler is not None:
|
|
155
|
+
logger.removeHandler(_console_handler)
|
|
156
|
+
|
|
157
|
+
elif _file_handler is not None:
|
|
158
|
+
logger.removeHandler(_file_handler)
|
|
159
|
+
_file_handler.close()
|
|
160
|
+
_file_handler = None
|
tiny_osm/exceptions.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Exceptions for tiny_osm."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__all__ = ["OverpassError"]
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class OverpassError(RuntimeError):
|
|
9
|
+
"""Raised when all Overpass API mirrors fail to return a response."""
|
|
10
|
+
|
|
11
|
+
def __init__(self) -> None:
|
|
12
|
+
super().__init__("All Overpass mirrors failed.")
|
tiny_osm/osm_fetch.py
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
"""Fetch OpenStreetMap data for a bounding box as a GeoJSON FeatureCollection.
|
|
2
|
+
|
|
3
|
+
Large bounding boxes are automatically subdivided into tiles and fetched
|
|
4
|
+
sequentially via the Overpass API. Responses from every tile are merged,
|
|
5
|
+
deduplicated by ``(type, id)``, and converted to valid GeoJSON.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import atexit
|
|
11
|
+
import contextlib
|
|
12
|
+
import math
|
|
13
|
+
import time
|
|
14
|
+
from collections.abc import Sequence
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import httpx
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
from orjson import loads as _json_loads # pyright: ignore[reportMissingImports]
|
|
21
|
+
except ImportError:
|
|
22
|
+
from json import loads as _json_loads
|
|
23
|
+
|
|
24
|
+
from tiny_osm._geojson import elements_to_features
|
|
25
|
+
from tiny_osm._logging import logger
|
|
26
|
+
from tiny_osm.exceptions import OverpassError
|
|
27
|
+
|
|
28
|
+
__all__ = ["OSMFilters", "fetch"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
_OVERPASS_MIRRORS = [
|
|
32
|
+
"https://overpass-api.de/api/interpreter",
|
|
33
|
+
"https://overpass.private.coffee/api/interpreter", # no rate limits
|
|
34
|
+
]
|
|
35
|
+
_OVERPASS_SERVER_TIMEOUT = 180 # seconds the server spends executing the query
|
|
36
|
+
_OVERPASS_TRANSFER_BUFFER = 30 # seconds to receive the response body
|
|
37
|
+
_CLIENT_TIMEOUT = _OVERPASS_SERVER_TIMEOUT + _OVERPASS_TRANSFER_BUFFER
|
|
38
|
+
_REQUEST_ROUNDS = 3 # max rounds - each round tries every mirror once
|
|
39
|
+
_TRANSPORT_BACKOFF = 1.0 # base seconds for transport-error backoff within a round
|
|
40
|
+
_BUSY_BACKOFF = 55.0 # seconds to wait between rounds
|
|
41
|
+
_USER_AGENT = "tiny-osm (https://github.com/cheginit/tiny-osm)"
|
|
42
|
+
_HTTP_HEADERS = {
|
|
43
|
+
"User-Agent": _USER_AGENT,
|
|
44
|
+
"referer": _USER_AGENT,
|
|
45
|
+
"Accept-Language": "en",
|
|
46
|
+
}
|
|
47
|
+
_STATUS_POLL_PAUSE = 5 # seconds between /status re-polls when no slot is free
|
|
48
|
+
_STATUS_POLL_MAX = 12 # maximum number of /status polls before giving up
|
|
49
|
+
|
|
50
|
+
_MAX_QUERY_AREA_M2 = 50_000 * 50_000 # 2 500 km²
|
|
51
|
+
|
|
52
|
+
# Module-level HTTP clients — one per mirror so TCP/TLS connections are
|
|
53
|
+
# reused across ``fetch`` calls for the lifetime of the process.
|
|
54
|
+
_TIMEOUT = httpx.Timeout(_CLIENT_TIMEOUT)
|
|
55
|
+
_CLIENTS: dict[str, httpx.Client] = {
|
|
56
|
+
mirror: httpx.Client(timeout=_TIMEOUT, headers=_HTTP_HEADERS) for mirror in _OVERPASS_MIRRORS
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _close_clients() -> None:
|
|
61
|
+
"""Close all module-level HTTP clients on process exit."""
|
|
62
|
+
for c in _CLIENTS.values():
|
|
63
|
+
c.close()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
atexit.register(_close_clients)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class OSMFilters:
|
|
70
|
+
"""Predefined Overpass QL tag-filter strings for :func:`fetch`.
|
|
71
|
+
|
|
72
|
+
Each attribute is a valid ``osm_filter`` value — either a single string
|
|
73
|
+
or a tuple of strings (which :func:`fetch` issues as separate queries
|
|
74
|
+
and merges into one FeatureCollection). Any other Overpass QL tag-filter
|
|
75
|
+
string works too.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
HIGHWAY: str = (
|
|
79
|
+
'["highway"]["area"!~"yes"]'
|
|
80
|
+
'["highway"!~"abandoned|construction|no|planned|platform|proposed|'
|
|
81
|
+
'raceway|razed|rest_area|services"]'
|
|
82
|
+
)
|
|
83
|
+
WATERWAY: str = '["waterway"]'
|
|
84
|
+
WATER_BODY: tuple[str, ...] = (
|
|
85
|
+
'["natural"="water"]["water"~"basin|pond|reservoir|detention|retention|lake|lagoon"]',
|
|
86
|
+
'["natural"="water"][!"water"]',
|
|
87
|
+
'["landuse"="basin"]',
|
|
88
|
+
'["basin"~"retention|detention|infiltration|stormwater"]',
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _bbox_area_m2(left: float, bottom: float, right: float, top: float) -> float:
|
|
93
|
+
"""Approximate bounding box area via an equirectangular projection."""
|
|
94
|
+
lat_m = (top - bottom) * 111_320.0
|
|
95
|
+
lon_m = (right - left) * 111_320.0 * math.cos(math.radians((bottom + top) / 2))
|
|
96
|
+
return abs(lat_m * lon_m)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _subdivide_bbox(
|
|
100
|
+
left: float, bottom: float, right: float, top: float
|
|
101
|
+
) -> list[tuple[float, float, float, float]]:
|
|
102
|
+
"""Split bbox into an nxn grid if it exceeds _MAX_QUERY_AREA_M2."""
|
|
103
|
+
area = _bbox_area_m2(left, bottom, right, top)
|
|
104
|
+
if area <= _MAX_QUERY_AREA_M2:
|
|
105
|
+
return [(left, bottom, right, top)]
|
|
106
|
+
n = math.ceil(math.sqrt(area / _MAX_QUERY_AREA_M2))
|
|
107
|
+
lon_step = (right - left) / n
|
|
108
|
+
lat_step = (top - bottom) / n
|
|
109
|
+
return [
|
|
110
|
+
(
|
|
111
|
+
left + i * lon_step,
|
|
112
|
+
bottom + j * lat_step,
|
|
113
|
+
left + (i + 1) * lon_step,
|
|
114
|
+
bottom + (j + 1) * lat_step,
|
|
115
|
+
)
|
|
116
|
+
for i in range(n)
|
|
117
|
+
for j in range(n)
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _validate_bbox(left: float, bottom: float, right: float, top: float) -> None:
|
|
122
|
+
"""Validate that the bbox is a well-formed WGS84 bounding box."""
|
|
123
|
+
if left >= right:
|
|
124
|
+
msg = f"left ({left}) must be less than right ({right})"
|
|
125
|
+
raise ValueError(msg)
|
|
126
|
+
if bottom >= top:
|
|
127
|
+
msg = f"bottom ({bottom}) must be less than top ({top})"
|
|
128
|
+
raise ValueError(msg)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _build_overpass_query(
|
|
132
|
+
left: float,
|
|
133
|
+
bottom: float,
|
|
134
|
+
right: float,
|
|
135
|
+
top: float,
|
|
136
|
+
osm_filter: str,
|
|
137
|
+
) -> str:
|
|
138
|
+
bbox_str = f"{bottom},{left},{top},{right}"
|
|
139
|
+
settings = f"[out:json][timeout:{_OVERPASS_SERVER_TIMEOUT}]"
|
|
140
|
+
# Query both ways and relations so that polygon features (e.g.
|
|
141
|
+
# multipolygon water bodies) are captured alongside linear ones.
|
|
142
|
+
# `out geom;` embeds coordinates inline — no node-recursion needed.
|
|
143
|
+
return f"{settings};(way{osm_filter}({bbox_str});relation{osm_filter}({bbox_str}););out geom;"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _parse_status(text: str) -> tuple[str, float]:
|
|
147
|
+
"""Parse a /status response body into a (action, seconds) decision.
|
|
148
|
+
|
|
149
|
+
Returns one of:
|
|
150
|
+
|
|
151
|
+
- ``("done", 0)`` - a slot is free, the mirror has no slot management, or
|
|
152
|
+
the format is unrecognized; proceed with the query immediately.
|
|
153
|
+
- ``("poll", 0)`` - all slots are currently occupied; sleep briefly and
|
|
154
|
+
re-poll ``/status``.
|
|
155
|
+
- ``("wait", N)`` - a slot becomes free in ``N`` seconds; sleep then proceed.
|
|
156
|
+
"""
|
|
157
|
+
lines = text.strip().splitlines()
|
|
158
|
+
|
|
159
|
+
# Rate limit: 0 means the mirror has no slot management (e.g. private.coffee).
|
|
160
|
+
rate_limit = 0
|
|
161
|
+
for line in lines:
|
|
162
|
+
if line.startswith("Rate limit:"):
|
|
163
|
+
with contextlib.suppress(IndexError, ValueError):
|
|
164
|
+
rate_limit = int(line.split(":")[1].strip())
|
|
165
|
+
break
|
|
166
|
+
if rate_limit == 0:
|
|
167
|
+
return ("done", 0.0)
|
|
168
|
+
|
|
169
|
+
# Line index 4 holds the slot-availability status when rate_limit > 0.
|
|
170
|
+
try:
|
|
171
|
+
slot_line = lines[4]
|
|
172
|
+
first = slot_line.split()[0]
|
|
173
|
+
except IndexError:
|
|
174
|
+
return ("done", 0.0)
|
|
175
|
+
|
|
176
|
+
# Format A: "N slots available now."
|
|
177
|
+
try:
|
|
178
|
+
available = int(first)
|
|
179
|
+
except ValueError:
|
|
180
|
+
available = None
|
|
181
|
+
if available is not None:
|
|
182
|
+
return ("done", 0.0) if available > 0 else ("poll", 0.0)
|
|
183
|
+
|
|
184
|
+
# Format B: Slot available after an ISO timestamp, in N seconds.
|
|
185
|
+
if first == "Slot":
|
|
186
|
+
try:
|
|
187
|
+
tokens = slot_line.split()
|
|
188
|
+
wait = max(float(tokens[tokens.index("in") + 1]), 1.0)
|
|
189
|
+
except (ValueError, IndexError):
|
|
190
|
+
return ("poll", 0.0)
|
|
191
|
+
else:
|
|
192
|
+
return ("wait", wait)
|
|
193
|
+
|
|
194
|
+
# Format C: "Currently running queries:", all slots occupied.
|
|
195
|
+
# Anything else is an unrecognized status, proceed with the query.
|
|
196
|
+
return ("poll", 0.0) if first == "Currently" else ("done", 0.0)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _overpass_pause(mirror: str) -> None:
|
|
200
|
+
"""Block until an Overpass slot is available on *mirror*.
|
|
201
|
+
|
|
202
|
+
Fetches ``{mirror_base}/status`` and parses the slot-availability line.
|
|
203
|
+
Servers with ``Rate limit: 0`` (e.g. overpass.private.coffee) are treated
|
|
204
|
+
as always-available and return immediately. If the status endpoint is
|
|
205
|
+
unreachable the function returns immediately so the query is attempted
|
|
206
|
+
regardless.
|
|
207
|
+
"""
|
|
208
|
+
status_url = mirror.replace("/interpreter", "/status")
|
|
209
|
+
|
|
210
|
+
for _ in range(_STATUS_POLL_MAX):
|
|
211
|
+
try:
|
|
212
|
+
resp = _CLIENTS[mirror].get(status_url, timeout=10)
|
|
213
|
+
resp.raise_for_status()
|
|
214
|
+
except httpx.HTTPError:
|
|
215
|
+
return # unreachable - proceed anyway
|
|
216
|
+
|
|
217
|
+
action, seconds = _parse_status(resp.text)
|
|
218
|
+
if action == "done":
|
|
219
|
+
return
|
|
220
|
+
if action == "wait":
|
|
221
|
+
logger.info(f"No Overpass slot on {mirror!r}; waiting {seconds:.0f}s…")
|
|
222
|
+
time.sleep(seconds)
|
|
223
|
+
return
|
|
224
|
+
# action == "poll": all slots occupied, re-check after a short pause.
|
|
225
|
+
time.sleep(_STATUS_POLL_PAUSE)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _is_retryable(exc: Exception) -> bool:
|
|
229
|
+
"""Return True for transient errors worth retrying."""
|
|
230
|
+
if isinstance(exc, httpx.HTTPStatusError):
|
|
231
|
+
status = exc.response.status_code
|
|
232
|
+
# 429: rate-limited - our slot is occupied by a still-running timed-out query;
|
|
233
|
+
# _overpass_pause on the next attempt will poll /status and wait for a free slot.
|
|
234
|
+
return status == 429 or status >= 500
|
|
235
|
+
return isinstance(exc, httpx.TransportError)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _fetch_one_query(query: str) -> dict[str, Any]:
|
|
239
|
+
"""Send one query, rotating across mirrors and retrying on transient errors.
|
|
240
|
+
|
|
241
|
+
Each "round" tries every mirror in ``_OVERPASS_MIRRORS`` once. On a
|
|
242
|
+
transient error (429/503/504 or a transport error) the next mirror is
|
|
243
|
+
tried *immediately* - because the primary mirror's 504 often comes back
|
|
244
|
+
within seconds, falling over to the alternate mirror is much faster than
|
|
245
|
+
sleeping 55 s and retrying the same mirror. Only if *all* mirrors fail
|
|
246
|
+
in a round do we sleep ``_BUSY_BACKOFF`` before the next round, giving
|
|
247
|
+
the servers time to recover.
|
|
248
|
+
"""
|
|
249
|
+
last_exc: Exception | None = None
|
|
250
|
+
for round_idx in range(_REQUEST_ROUNDS):
|
|
251
|
+
for mirror in _OVERPASS_MIRRORS:
|
|
252
|
+
# On retry rounds, check /status first: by now the rate-limit
|
|
253
|
+
# window has likely reset and /status reflects real availability.
|
|
254
|
+
# On the first round we skip the check - /status only tracks
|
|
255
|
+
# rate-limit slots, not general server load, so it can say
|
|
256
|
+
# "proceed" while the server is still too busy to respond (504).
|
|
257
|
+
if round_idx > 0:
|
|
258
|
+
_overpass_pause(mirror)
|
|
259
|
+
try:
|
|
260
|
+
resp = _CLIENTS[mirror].post(mirror, data={"data": query})
|
|
261
|
+
resp.raise_for_status()
|
|
262
|
+
return _json_loads(resp.content)
|
|
263
|
+
except httpx.HTTPStatusError as exc:
|
|
264
|
+
last_exc = exc
|
|
265
|
+
if not _is_retryable(exc):
|
|
266
|
+
raise
|
|
267
|
+
logger.info(f"HTTP {exc.response.status_code} on {mirror!r}; trying next mirror…")
|
|
268
|
+
except httpx.TransportError as exc:
|
|
269
|
+
last_exc = exc
|
|
270
|
+
logger.debug(f"Transport error on {mirror!r}: {exc}; trying next mirror…")
|
|
271
|
+
time.sleep(_TRANSPORT_BACKOFF)
|
|
272
|
+
# All mirrors failed this round - sleep before the next round.
|
|
273
|
+
if round_idx < _REQUEST_ROUNDS - 1:
|
|
274
|
+
logger.info(
|
|
275
|
+
f"All mirrors failed round {round_idx + 1}; "
|
|
276
|
+
f"waiting {_BUSY_BACKOFF:.0f}s before next round…"
|
|
277
|
+
)
|
|
278
|
+
time.sleep(_BUSY_BACKOFF)
|
|
279
|
+
assert last_exc is not None # noqa: S101 - loop always sets it on failure
|
|
280
|
+
raise last_exc
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def fetch(
|
|
284
|
+
left: float,
|
|
285
|
+
bottom: float,
|
|
286
|
+
right: float,
|
|
287
|
+
top: float,
|
|
288
|
+
osm_filter: str | Sequence[str],
|
|
289
|
+
) -> dict[str, Any]:
|
|
290
|
+
"""Fetch OSM data within a bounding box as a GeoJSON FeatureCollection.
|
|
291
|
+
|
|
292
|
+
Large bounding boxes are automatically subdivided into tiles and fetched
|
|
293
|
+
across rotating Overpass mirrors with retries. The response is returned
|
|
294
|
+
as a valid GeoJSON ``FeatureCollection`` dict that can be fed directly
|
|
295
|
+
to ``geopandas`` with no post-processing.
|
|
296
|
+
|
|
297
|
+
Parameters
|
|
298
|
+
----------
|
|
299
|
+
left, bottom, right, top : float
|
|
300
|
+
Bounding box coordinates in WGS84 (EPSG:4326).
|
|
301
|
+
osm_filter : str or sequence of str
|
|
302
|
+
Overpass QL tag-filter string(s) selecting which features to fetch.
|
|
303
|
+
Use a member of :class:`OSMFilters` for the built-in presets
|
|
304
|
+
(``OSMFilters.HIGHWAY``, ``OSMFilters.WATERWAY``,
|
|
305
|
+
``OSMFilters.WATER_BODY``) or pass any valid Overpass QL tag-filter
|
|
306
|
+
(e.g. ``'["amenity"="restaurant"]'``). When a sequence is given,
|
|
307
|
+
each filter is queried separately and the results are merged into a
|
|
308
|
+
single deduplicated FeatureCollection.
|
|
309
|
+
|
|
310
|
+
Returns
|
|
311
|
+
-------
|
|
312
|
+
dict[str, Any]
|
|
313
|
+
A GeoJSON FeatureCollection dict — ``{"type": "FeatureCollection",
|
|
314
|
+
"features": [...]}`` — where every feature has an ``osm_type`` and
|
|
315
|
+
``osm_id`` in its ``properties`` alongside the OSM tags.
|
|
316
|
+
|
|
317
|
+
Raises
|
|
318
|
+
------
|
|
319
|
+
ValueError
|
|
320
|
+
If the bounding box is invalid.
|
|
321
|
+
OverpassError
|
|
322
|
+
If all Overpass mirrors fail to return a response.
|
|
323
|
+
|
|
324
|
+
Examples
|
|
325
|
+
--------
|
|
326
|
+
Fetch highway features using a preset:
|
|
327
|
+
|
|
328
|
+
>>> import geopandas as gpd
|
|
329
|
+
>>> fc = fetch(-97.75, 30.25, -97.70, 30.30, osm_filter=OSMFilters.HIGHWAY)
|
|
330
|
+
>>> gdf = gpd.GeoDataFrame.from_features(fc, crs=4326)
|
|
331
|
+
|
|
332
|
+
Fetch with a custom Overpass filter:
|
|
333
|
+
|
|
334
|
+
>>> fc = fetch(-97.75, 30.25, -97.70, 30.30, osm_filter='["amenity"="restaurant"]')
|
|
335
|
+
"""
|
|
336
|
+
_validate_bbox(left, bottom, right, top)
|
|
337
|
+
|
|
338
|
+
filters = [osm_filter] if isinstance(osm_filter, str) else list(osm_filter)
|
|
339
|
+
tiles = _subdivide_bbox(left, bottom, right, top)
|
|
340
|
+
queries = [_build_overpass_query(*tile, f) for tile in tiles for f in filters]
|
|
341
|
+
|
|
342
|
+
try:
|
|
343
|
+
responses = [_fetch_one_query(q) for q in queries]
|
|
344
|
+
except Exception as exc:
|
|
345
|
+
raise OverpassError from exc
|
|
346
|
+
|
|
347
|
+
return elements_to_features(responses)
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tiny-osm
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Fetch OpenStreetMap road, waterway, and water-body data for a bounding box.
|
|
5
|
+
Project-URL: Changelog, https://github.com/cheginit/tiny-osm/blob/main/CHANGELOG.md
|
|
6
|
+
Project-URL: CI, https://github.com/cheginit/tiny-osm/actions
|
|
7
|
+
Project-URL: Documentation, https://cheginit.github.io/tiny-osm
|
|
8
|
+
Project-URL: Homepage, https://cheginit.github.io/tiny-osm
|
|
9
|
+
Project-URL: Issues, https://github.com/cheginit/tiny-osm/issues
|
|
10
|
+
Author-email: Taher Chegini <cheginit@gmail.com>
|
|
11
|
+
License: MIT
|
|
12
|
+
License-File: AUTHORS.md
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python
|
|
19
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering
|
|
25
|
+
Classifier: Typing :: Typed
|
|
26
|
+
Requires-Python: >=3.11
|
|
27
|
+
Requires-Dist: httpx
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: geopandas>=1; extra == 'dev'
|
|
30
|
+
Requires-Dist: git-cliff; extra == 'dev'
|
|
31
|
+
Requires-Dist: ipykernel; extra == 'dev'
|
|
32
|
+
Requires-Dist: ipywidgets; extra == 'dev'
|
|
33
|
+
Requires-Dist: jupytext; extra == 'dev'
|
|
34
|
+
Requires-Dist: matplotlib; extra == 'dev'
|
|
35
|
+
Requires-Dist: nbconvert; extra == 'dev'
|
|
36
|
+
Requires-Dist: orjson; extra == 'dev'
|
|
37
|
+
Requires-Dist: pyproj; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
39
|
+
Provides-Extra: docs
|
|
40
|
+
Requires-Dist: jupytext; extra == 'docs'
|
|
41
|
+
Requires-Dist: mike>=2; extra == 'docs'
|
|
42
|
+
Requires-Dist: mkdocs-jupyter; extra == 'docs'
|
|
43
|
+
Requires-Dist: mkdocs-materialx; extra == 'docs'
|
|
44
|
+
Requires-Dist: mkdocs>=1.6; extra == 'docs'
|
|
45
|
+
Requires-Dist: mkdocstrings[python]>=0.27; extra == 'docs'
|
|
46
|
+
Requires-Dist: ruff; extra == 'docs'
|
|
47
|
+
Provides-Extra: lint
|
|
48
|
+
Requires-Dist: codespell; extra == 'lint'
|
|
49
|
+
Requires-Dist: pre-commit; extra == 'lint'
|
|
50
|
+
Provides-Extra: test
|
|
51
|
+
Requires-Dist: coverage[toml]; extra == 'test'
|
|
52
|
+
Requires-Dist: pytest-cov; extra == 'test'
|
|
53
|
+
Requires-Dist: pytest-sugar; extra == 'test'
|
|
54
|
+
Provides-Extra: typecheck
|
|
55
|
+
Requires-Dist: pyright; extra == 'typecheck'
|
|
56
|
+
Description-Content-Type: text/markdown
|
|
57
|
+
|
|
58
|
+
# TinyOSM: Lightweight OpenStreetMap GeoJSON Fetcher
|
|
59
|
+
|
|
60
|
+
[](https://pypi.org/project/tiny-osm/)
|
|
61
|
+
[](https://anaconda.org/conda-forge/tiny-osm)
|
|
62
|
+
[](https://opensource.org/licenses/MIT)
|
|
63
|
+
[](https://mybinder.org/v2/gh/cheginit/tiny-osm/HEAD?labpath=docs%2Fexamples)
|
|
64
|
+
|
|
65
|
+
[](https://codecov.io/gh/cheginit/tiny-osm)
|
|
66
|
+
[](https://github.com/cheginit/tiny-osm/actions/workflows/test.yml)
|
|
67
|
+
[](https://github.com/cheginit/tiny-osm/actions/workflows/docs.yml)
|
|
68
|
+
[](https://pepy.tech/project/tiny-osm)
|
|
69
|
+
|
|
70
|
+
Fetch OpenStreetMap data for a bounding box as GeoJSON. One function, one dependency,
|
|
71
|
+
minimal footprint.
|
|
72
|
+
|
|
73
|
+
## Why
|
|
74
|
+
|
|
75
|
+
[OSMnx](https://github.com/gboeing/osmnx) is the go-to tool for working with
|
|
76
|
+
OpenStreetMap data in Python. It handles street-network analysis, building footprints,
|
|
77
|
+
amenity lookups, graph-theoretic routing, and more. But it pulls in `geopandas`,
|
|
78
|
+
`shapely`, and `networkx`, a heavy dependency stack, that can be slow to install in
|
|
79
|
+
constrained environments like AWS Lambda, minimal Docker images, or CI runners.
|
|
80
|
+
|
|
81
|
+
`tiny-osm` exists for the narrower case where you just need **OSM features as clean
|
|
82
|
+
GeoJSON**. It ships with presets for roads, waterways, and water bodies, and accepts any
|
|
83
|
+
Overpass QL tag-filter for custom queries. It has one runtime dependency (`httpx`),
|
|
84
|
+
installs in seconds, and returns standards-compliant GeoJSON that works with any
|
|
85
|
+
downstream tool such as `geopandas`, `shapely`, or even a JavaScript map library.
|
|
86
|
+
|
|
87
|
+
| | `tiny-osm` | `osmnx` |
|
|
88
|
+
| ------------- | ----------------------- | -------------------------------------------- |
|
|
89
|
+
| Dependencies | 1 (`httpx`) | 7+ (`geopandas`, `shapely`, `networkx`, ...) |
|
|
90
|
+
| Output format | GeoJSON dicts | GeoDataFrames/`networkx` graphs |
|
|
91
|
+
| Scope | Any Overpass tag-filter | Full OSM toolkit |
|
|
92
|
+
| API surface | 1 function | Dozens of modules |
|
|
93
|
+
|
|
94
|
+
## Installation
|
|
95
|
+
|
|
96
|
+
```console
|
|
97
|
+
pip install tiny-osm
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Or with conda/pixi:
|
|
101
|
+
|
|
102
|
+
```console
|
|
103
|
+
pixi add tiny-osm
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
> **Tip:** If [`orjson`](https://github.com/ijl/orjson) is installed, `tiny-osm` uses it
|
|
107
|
+
> automatically for faster JSON parsing of Overpass API responses.
|
|
108
|
+
|
|
109
|
+
## Usage
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
import tiny_osm
|
|
113
|
+
|
|
114
|
+
# Each call returns a GeoJSON FeatureCollection dict
|
|
115
|
+
bbox = (-97.75, 30.25, -97.70, 30.30)
|
|
116
|
+
highways = tiny_osm.fetch(*bbox, osm_filter=tiny_osm.OSMFilters.HIGHWAY)
|
|
117
|
+
waterways = tiny_osm.fetch(*bbox, osm_filter=tiny_osm.OSMFilters.WATERWAY)
|
|
118
|
+
water_bodies = tiny_osm.fetch(*bbox, osm_filter=tiny_osm.OSMFilters.WATER_BODY)
|
|
119
|
+
|
|
120
|
+
# Load into geopandas (optional - tiny-osm doesn't require it)
|
|
121
|
+
import geopandas as gpd
|
|
122
|
+
|
|
123
|
+
gdf = gpd.GeoDataFrame.from_features(highways, crs=4326)
|
|
124
|
+
|
|
125
|
+
# Custom Overpass QL filter
|
|
126
|
+
parks = tiny_osm.fetch(*bbox, osm_filter='["leisure"="park"]')
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Filters
|
|
130
|
+
|
|
131
|
+
| `osm_filter` | What it queries |
|
|
132
|
+
| ----------------------- | --------------------------------------------------------------------------- |
|
|
133
|
+
| `OSMFilters.HIGHWAY` | `highway=*` ways (excluding areas, abandoned, planned, raceway) |
|
|
134
|
+
| `OSMFilters.WATERWAY` | `waterway=*` ways |
|
|
135
|
+
| `OSMFilters.WATER_BODY` | Closed water features: ponds, lakes, reservoirs, detention/retention basins |
|
|
136
|
+
| any string | Raw Overpass QL tag-filter (e.g. `'["amenity"="restaurant"]'`) |
|
|
137
|
+
|
|
138
|
+
### What it handles
|
|
139
|
+
|
|
140
|
+
- Auto-subdivides large bounding boxes into tiles
|
|
141
|
+
- Retries across multiple Overpass API mirrors with automatic failover
|
|
142
|
+
- Assembles multi-polygon relations (ring stitching, hole assignment)
|
|
143
|
+
- Deduplicates elements across tile boundaries
|
|
144
|
+
- Applies the OSM polygon-features rule tree (area=yes/no, coastline exceptions, etc.)
|
|
145
|
+
|
|
146
|
+
## Contributing
|
|
147
|
+
|
|
148
|
+
Contributions are welcome! See [CONTRIBUTING.md](CONTRIBUTING.md) for details.
|
|
149
|
+
|
|
150
|
+
## License
|
|
151
|
+
|
|
152
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
tiny_osm/__init__.py,sha256=SaWAX3065qiepj6GT3roSP1_BmPVWFUmbph29sis5cg,558
|
|
2
|
+
tiny_osm/_geojson.py,sha256=yqReHOMMiJ8DkDsnL3UBFqF2oO0uSv5ZABDndQxjrV8,11603
|
|
3
|
+
tiny_osm/_logging.py,sha256=tEEwprtQuaC8ahPQl9JE2kWmSozxUnHAvaE9buUkF3E,5321
|
|
4
|
+
tiny_osm/exceptions.py,sha256=3J5pSE7j_8kfskM3uAbGkHixLbb2SuESnWlZNWTSQAE,297
|
|
5
|
+
tiny_osm/osm_fetch.py,sha256=FeEZPyVc9l55PM1XTpaS2qxr8Usv-hlGW9AdB7m3r5k,13103
|
|
6
|
+
tiny_osm-0.1.0.dist-info/METADATA,sha256=TBZCTOvBeaEg3OeU69MNuOKFK-IWvLDY3oMsUXhpRuo,6849
|
|
7
|
+
tiny_osm-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
8
|
+
tiny_osm-0.1.0.dist-info/licenses/AUTHORS.md,sha256=W8oRmxEdM_L4-KeEqNzoI0x3oPRMMAroXZNEuO7AwV0,70
|
|
9
|
+
tiny_osm-0.1.0.dist-info/licenses/LICENSE,sha256=pqsW5hip0KzY90LsJbL-z4uKR788CZbOCkJ8y031ddo,1070
|
|
10
|
+
tiny_osm-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Taher Chegini
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|