ssb-sgis 0.3.13__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +6 -4
- sgis/geopandas_tools/bounds.py +2 -6
- sgis/geopandas_tools/buffer_dissolve_explode.py +149 -45
- sgis/geopandas_tools/cleaning.py +66 -594
- sgis/geopandas_tools/conversion.py +92 -12
- sgis/geopandas_tools/duplicates.py +53 -23
- sgis/geopandas_tools/general.py +35 -0
- sgis/geopandas_tools/neighbors.py +31 -1
- sgis/geopandas_tools/overlay.py +143 -63
- sgis/geopandas_tools/polygons_as_rings.py +1 -1
- sgis/io/dapla_functions.py +7 -14
- sgis/maps/explore.py +29 -3
- sgis/maps/map.py +16 -4
- sgis/maps/maps.py +95 -49
- sgis/parallel/parallel.py +73 -35
- sgis/raster/torchgeo.py +30 -20
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/METADATA +6 -6
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/RECORD +20 -20
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/LICENSE +0 -0
- {ssb_sgis-0.3.13.dist-info → ssb_sgis-1.0.1.dist-info}/WHEEL +0 -0
sgis/maps/maps.py
CHANGED
|
@@ -8,11 +8,11 @@ The 'qtm' function shows a simple static map of one or more GeoDataFrames.
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import inspect
|
|
11
|
-
import warnings
|
|
12
11
|
from numbers import Number
|
|
13
12
|
from typing import Any
|
|
14
13
|
|
|
15
14
|
from geopandas import GeoDataFrame, GeoSeries
|
|
15
|
+
from pyproj import CRS
|
|
16
16
|
from shapely import Geometry
|
|
17
17
|
|
|
18
18
|
from ..geopandas_tools.conversion import to_gdf as to_gdf_func
|
|
@@ -24,24 +24,23 @@ from .map import Map
|
|
|
24
24
|
from .thematicmap import ThematicMap
|
|
25
25
|
|
|
26
26
|
|
|
27
|
+
try:
|
|
28
|
+
from torchgeo.datasets.geo import RasterDataset
|
|
29
|
+
except ImportError:
|
|
30
|
+
|
|
31
|
+
class RasterDataset:
|
|
32
|
+
"""Placeholder"""
|
|
33
|
+
|
|
34
|
+
|
|
27
35
|
def _get_location_mask(kwargs: dict, gdfs) -> tuple[GeoDataFrame | None, dict]:
|
|
28
36
|
try:
|
|
29
37
|
crs = get_common_crs(gdfs)
|
|
30
38
|
except IndexError:
|
|
31
|
-
|
|
32
|
-
crs = [x for x in kwargs.values() if hasattr(x, "crs")][0].crs
|
|
33
|
-
except IndexError:
|
|
34
|
-
crs = None
|
|
35
|
-
except Exception:
|
|
36
|
-
crs = set()
|
|
37
|
-
for x in kwargs.values():
|
|
38
|
-
try:
|
|
39
|
-
crs.add(x.crs)
|
|
40
|
-
except Exception:
|
|
41
|
-
pass
|
|
39
|
+
for x in kwargs.values():
|
|
42
40
|
try:
|
|
43
|
-
crs =
|
|
44
|
-
|
|
41
|
+
crs = CRS(x.crs) if hasattr(x, "crs") else CRS(x["crs"])
|
|
42
|
+
break
|
|
43
|
+
except Exception:
|
|
45
44
|
crs = None
|
|
46
45
|
|
|
47
46
|
masks = {
|
|
@@ -73,6 +72,7 @@ def explore(
|
|
|
73
72
|
*gdfs: GeoDataFrame | dict[str, GeoDataFrame],
|
|
74
73
|
column: str | None = None,
|
|
75
74
|
center: Any | None = None,
|
|
75
|
+
center_4326: Any | None = None,
|
|
76
76
|
labels: tuple[str] | None = None,
|
|
77
77
|
max_zoom: int = 40,
|
|
78
78
|
browser: bool = False,
|
|
@@ -120,19 +120,23 @@ def explore(
|
|
|
120
120
|
|
|
121
121
|
Examples
|
|
122
122
|
--------
|
|
123
|
-
>>>
|
|
124
|
-
>>> roads = read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/
|
|
125
|
-
>>> points = read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/
|
|
123
|
+
>>> import sgis as sg
|
|
124
|
+
>>> roads = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/roads_oslo_2022.parquet")
|
|
125
|
+
>>> points = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet")
|
|
126
126
|
|
|
127
|
-
|
|
127
|
+
Explore the area 500 meter around a given point. Coordinates are in UTM 33 format (25833).
|
|
128
128
|
|
|
129
|
-
>>> explore(roads, points)
|
|
129
|
+
>>> sg.explore(roads, points, center=(262274.6528, 6650143.176, 500))
|
|
130
|
+
|
|
131
|
+
Same as above, but with coordinates given as WGS84, same as the coordinates displayed in the corner of the map.
|
|
132
|
+
|
|
133
|
+
>>> sg.explore(roads, points, center_4326=(10.7463, 59.92, 500))
|
|
130
134
|
|
|
131
135
|
With additional arguments.
|
|
132
136
|
|
|
133
137
|
>>> roads["meters"] = roads.length
|
|
134
138
|
>>> points["meters"] = points.length
|
|
135
|
-
>>> explore(roads, points, column="meters", cmap="plasma", max_zoom=60)
|
|
139
|
+
>>> sg.explore(roads, points, column="meters", cmap="plasma", max_zoom=60, center_4326=(10.7463, 59.92, 500))
|
|
136
140
|
"""
|
|
137
141
|
|
|
138
142
|
gdfs, column, kwargs = Map._separate_args(gdfs, column, kwargs)
|
|
@@ -154,18 +158,37 @@ def explore(
|
|
|
154
158
|
**kwargs,
|
|
155
159
|
)
|
|
156
160
|
|
|
161
|
+
try:
|
|
162
|
+
to_crs = gdfs[0].crs
|
|
163
|
+
except IndexError:
|
|
164
|
+
try:
|
|
165
|
+
to_crs = [x for x in kwargs.values() if hasattr(x, "crs")][0].crs
|
|
166
|
+
except IndexError:
|
|
167
|
+
to_crs = None
|
|
168
|
+
|
|
169
|
+
if center_4326 is not None:
|
|
170
|
+
from_crs = 4326
|
|
171
|
+
center = center_4326
|
|
172
|
+
elif "crs" in kwargs:
|
|
173
|
+
from_crs = kwargs.pop("crs")
|
|
174
|
+
else:
|
|
175
|
+
from_crs = to_crs
|
|
176
|
+
|
|
157
177
|
if center is not None:
|
|
158
178
|
size = size or 1000
|
|
159
179
|
if isinstance(center, str) and not is_wkt(center):
|
|
160
|
-
mask = address_to_gdf(center, crs=
|
|
161
|
-
elif isinstance(center, GeoDataFrame):
|
|
180
|
+
mask = address_to_gdf(center, crs=from_crs)
|
|
181
|
+
elif isinstance(center, (GeoDataFrame, GeoSeries)):
|
|
162
182
|
mask = center
|
|
163
183
|
else:
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
184
|
+
if isinstance(center, (tuple, list)) and len(center) == 3:
|
|
185
|
+
*center, size = center
|
|
186
|
+
mask = to_gdf_func(center, crs=from_crs)
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
mask = mask.to_crs(to_crs)
|
|
190
|
+
except ValueError:
|
|
191
|
+
pass
|
|
169
192
|
|
|
170
193
|
if get_geom_type(mask) in ["point", "line"]:
|
|
171
194
|
mask = mask.buffer(size)
|
|
@@ -190,7 +213,7 @@ def explore(
|
|
|
190
213
|
**kwargs,
|
|
191
214
|
)
|
|
192
215
|
|
|
193
|
-
if m.gdfs is None:
|
|
216
|
+
if m.gdfs is None and not len(m.raster_datasets):
|
|
194
217
|
return
|
|
195
218
|
|
|
196
219
|
if not kwargs.pop("explore", True):
|
|
@@ -286,7 +309,7 @@ def samplemap(
|
|
|
286
309
|
smooth_factor=smooth_factor,
|
|
287
310
|
**kwargs,
|
|
288
311
|
)
|
|
289
|
-
if m.gdfs is None:
|
|
312
|
+
if m.gdfs is None and not len(m.raster_datasets):
|
|
290
313
|
return
|
|
291
314
|
if mask is not None:
|
|
292
315
|
m._gdfs = [gdf.clip(mask) for gdf in m._gdfs]
|
|
@@ -395,7 +418,7 @@ def clipmap(
|
|
|
395
418
|
smooth_factor=smooth_factor,
|
|
396
419
|
**kwargs,
|
|
397
420
|
)
|
|
398
|
-
if m.gdfs is None:
|
|
421
|
+
if m.gdfs is None and not len(m.raster_datasets):
|
|
399
422
|
return
|
|
400
423
|
|
|
401
424
|
m._gdfs = [gdf.clip(mask) for gdf in m._gdfs]
|
|
@@ -421,36 +444,63 @@ def clipmap(
|
|
|
421
444
|
qtm(m._gdf, column=m.column, cmap=m._cmap, k=m.k)
|
|
422
445
|
|
|
423
446
|
|
|
424
|
-
def explore_locals(*gdfs,
|
|
425
|
-
"""
|
|
447
|
+
def explore_locals(*gdfs, convert: bool = True, **kwargs):
|
|
448
|
+
"""Displays all local variables with geometries (GeoDataFrame etc.).
|
|
426
449
|
|
|
427
|
-
|
|
428
|
-
du jobber i.
|
|
450
|
+
Local means inside a function or file/notebook.
|
|
429
451
|
|
|
430
452
|
Args:
|
|
431
|
-
*gdfs:
|
|
432
|
-
|
|
453
|
+
*gdfs: Additional GeoDataFrames.
|
|
454
|
+
convert: If True (default), non-GeoDataFrames will be converted
|
|
455
|
+
to GeoDataFrames if possible.
|
|
456
|
+
**kwargs: keyword arguments passed to sg.explore.
|
|
433
457
|
"""
|
|
434
|
-
frame = inspect.currentframe()
|
|
435
458
|
|
|
459
|
+
def as_dict(obj):
|
|
460
|
+
if hasattr(obj, "__dict__"):
|
|
461
|
+
return obj.__dict__
|
|
462
|
+
elif isinstance(obj, dict):
|
|
463
|
+
return obj
|
|
464
|
+
raise TypeError
|
|
465
|
+
|
|
466
|
+
frame = inspect.currentframe().f_back
|
|
467
|
+
|
|
468
|
+
allowed_types = (GeoDataFrame, GeoSeries, Geometry, RasterDataset)
|
|
469
|
+
|
|
470
|
+
local_gdfs = {}
|
|
436
471
|
while True:
|
|
437
|
-
local_gdfs = {}
|
|
438
472
|
for name, value in frame.f_locals.items():
|
|
439
|
-
if isinstance(value, GeoDataFrame):
|
|
473
|
+
if isinstance(value, GeoDataFrame) and len(value):
|
|
440
474
|
local_gdfs[name] = value
|
|
441
475
|
continue
|
|
442
|
-
if not
|
|
476
|
+
if not convert:
|
|
443
477
|
continue
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
478
|
+
|
|
479
|
+
if isinstance(value, dict) or hasattr(value, "__dict__"):
|
|
480
|
+
# add dicts or classes with GeoDataFrames to kwargs
|
|
481
|
+
for key, value in as_dict(value).items():
|
|
482
|
+
if isinstance(value, allowed_types):
|
|
483
|
+
gdf = clean_geoms(to_gdf_func(value))
|
|
484
|
+
if len(gdf):
|
|
485
|
+
local_gdfs[key] = gdf
|
|
486
|
+
|
|
487
|
+
elif isinstance(value, dict) or hasattr(value, "__dict__"):
|
|
488
|
+
try:
|
|
489
|
+
for k, v in value.items():
|
|
490
|
+
if isinstance(v, allowed_types):
|
|
491
|
+
gdf = clean_geoms(to_gdf_func(v))
|
|
492
|
+
if len(gdf):
|
|
493
|
+
local_gdfs[k] = gdf
|
|
494
|
+
except Exception:
|
|
495
|
+
# no need to raise here
|
|
496
|
+
pass
|
|
497
|
+
|
|
448
498
|
continue
|
|
449
|
-
#
|
|
450
499
|
try:
|
|
451
500
|
gdf = clean_geoms(to_gdf_func(value))
|
|
452
501
|
if len(gdf):
|
|
453
502
|
local_gdfs[name] = gdf
|
|
503
|
+
continue
|
|
454
504
|
except Exception:
|
|
455
505
|
pass
|
|
456
506
|
|
|
@@ -462,10 +512,6 @@ def explore_locals(*gdfs, to_gdf: bool = True, **kwargs):
|
|
|
462
512
|
if not frame:
|
|
463
513
|
break
|
|
464
514
|
|
|
465
|
-
mask = kwargs.pop("mask", None)
|
|
466
|
-
if mask is not None:
|
|
467
|
-
local_gdfs = {name: gdf.clip(mask) for name, gdf in local_gdfs.items()}
|
|
468
|
-
|
|
469
515
|
explore(*gdfs, **local_gdfs, **kwargs)
|
|
470
516
|
|
|
471
517
|
|
sgis/parallel/parallel.py
CHANGED
|
@@ -2,6 +2,7 @@ import functools
|
|
|
2
2
|
import inspect
|
|
3
3
|
import itertools
|
|
4
4
|
import multiprocessing
|
|
5
|
+
import warnings
|
|
5
6
|
from collections.abc import Callable, Collection, Iterable
|
|
6
7
|
from pathlib import Path
|
|
7
8
|
from typing import Any
|
|
@@ -102,6 +103,7 @@ class Parallel:
|
|
|
102
103
|
iterable: Collection,
|
|
103
104
|
args: tuple | None = None,
|
|
104
105
|
kwargs: dict | None = None,
|
|
106
|
+
chunksize: int = 1,
|
|
105
107
|
) -> list[Any]:
|
|
106
108
|
"""Run functions in parallel with items of an iterable as 0th arguemnt.
|
|
107
109
|
|
|
@@ -185,7 +187,11 @@ class Parallel:
|
|
|
185
187
|
with multiprocessing.get_context(self.context).Pool(
|
|
186
188
|
processes, maxtasksperchild=self.maxtasksperchild, **self.kwargs
|
|
187
189
|
) as pool:
|
|
188
|
-
|
|
190
|
+
try:
|
|
191
|
+
return pool.map(func_with_kwargs, iterable, chunksize=chunksize)
|
|
192
|
+
except Exception as e:
|
|
193
|
+
pool.terminate()
|
|
194
|
+
raise e
|
|
189
195
|
|
|
190
196
|
with joblib.Parallel(
|
|
191
197
|
n_jobs=processes, backend=self.backend, **self.kwargs
|
|
@@ -198,6 +204,7 @@ class Parallel:
|
|
|
198
204
|
iterable: Collection[Iterable[Any]],
|
|
199
205
|
args: tuple | None = None,
|
|
200
206
|
kwargs: dict | None = None,
|
|
207
|
+
chunksize: int = 1,
|
|
201
208
|
) -> list[Any]:
|
|
202
209
|
"""Run functions in parallel where items of the iterable are unpacked.
|
|
203
210
|
|
|
@@ -282,7 +289,11 @@ class Parallel:
|
|
|
282
289
|
with multiprocessing.get_context(self.context).Pool(
|
|
283
290
|
processes, maxtasksperchild=self.maxtasksperchild, **self.kwargs
|
|
284
291
|
) as pool:
|
|
285
|
-
|
|
292
|
+
try:
|
|
293
|
+
return pool.starmap(func_with_kwargs, iterable, chunksize=chunksize)
|
|
294
|
+
except Exception as e:
|
|
295
|
+
pool.terminate()
|
|
296
|
+
raise e
|
|
286
297
|
|
|
287
298
|
with joblib.Parallel(
|
|
288
299
|
n_jobs=processes, backend=self.backend, **self.kwargs
|
|
@@ -363,22 +374,30 @@ class Parallel:
|
|
|
363
374
|
|
|
364
375
|
Args:
|
|
365
376
|
in_data: Dictionary with dataset names as keys and file paths or
|
|
366
|
-
(Geo)DataFrames as values.
|
|
377
|
+
(Geo)DataFrames as values. Note that the files will be read
|
|
378
|
+
in parallel if file paths are used.
|
|
367
379
|
out_data: Either a single folder path or a dictionary with same keys as
|
|
368
|
-
'in_data' and folder paths as values. If a single folder is
|
|
380
|
+
'in_data' and folder paths as values. If a single folder is passed,
|
|
369
381
|
the 'in_data' keys will be used as subfolders.
|
|
370
|
-
|
|
382
|
+
municipalities: GeoDataFrame of municipalities (or similar) of which to
|
|
383
|
+
split the data by.
|
|
384
|
+
with_neighbors: If True, the resulting data will include
|
|
385
|
+
neighbor municipalities, as well as the munipality itself.
|
|
386
|
+
Defaults to False.
|
|
371
387
|
funcdict: Dictionary with the keys of 'in_data' and functions as values.
|
|
372
388
|
The functions should take a GeoDataFrame as input and return a
|
|
373
389
|
GeoDataFrame. The function will be excecuted before the right after
|
|
374
390
|
the data is read.
|
|
375
391
|
file_type: Defaults to parquet.
|
|
376
392
|
muni_number_col: String column name with municipality
|
|
377
|
-
number/identifier. Defaults to KOMMUNENR.
|
|
393
|
+
number/identifier. Defaults to KOMMUNENR. If the column is not present
|
|
394
|
+
in the data to be split, the data will be intersected with the
|
|
395
|
+
municipalities.
|
|
378
396
|
strict: If False (default), the dictionaries 'out_data' and 'funcdict' does
|
|
379
397
|
not have to have the same length as 'in_data'.
|
|
380
398
|
write_empty: If False (default), municipalities with no data will be skipped.
|
|
381
399
|
If True, an empty parquet file will be written.
|
|
400
|
+
clip: If True (default), the data will be clipped.
|
|
382
401
|
"""
|
|
383
402
|
shared_kwds = {
|
|
384
403
|
"municipalities": municipalities,
|
|
@@ -389,6 +408,7 @@ class Parallel:
|
|
|
389
408
|
"clip": clip,
|
|
390
409
|
"max_rows_per_chunk": max_rows_per_chunk,
|
|
391
410
|
"processes_in_clip": processes_in_clip,
|
|
411
|
+
"strict": strict,
|
|
392
412
|
}
|
|
393
413
|
|
|
394
414
|
if isinstance(out_data, (str, Path)):
|
|
@@ -512,6 +532,7 @@ def write_municipality_data(
|
|
|
512
532
|
clip: bool = True,
|
|
513
533
|
max_rows_per_chunk: int = 150_000,
|
|
514
534
|
processes_in_clip: int = 1,
|
|
535
|
+
strict: bool = True,
|
|
515
536
|
) -> None:
|
|
516
537
|
write_func = (
|
|
517
538
|
_write_neighbor_municipality_data
|
|
@@ -530,6 +551,7 @@ def write_municipality_data(
|
|
|
530
551
|
clip=clip,
|
|
531
552
|
max_rows_per_chunk=max_rows_per_chunk,
|
|
532
553
|
processes_in_clip=processes_in_clip,
|
|
554
|
+
strict=strict,
|
|
533
555
|
)
|
|
534
556
|
|
|
535
557
|
|
|
@@ -540,6 +562,7 @@ def _validate_data(data: str | list[str]) -> str:
|
|
|
540
562
|
return data[0]
|
|
541
563
|
elif not isinstance(data, GeoDataFrame):
|
|
542
564
|
raise TypeError("'data' Must be a file path or a GeoDataFrame. Got", type(data))
|
|
565
|
+
return data
|
|
543
566
|
|
|
544
567
|
|
|
545
568
|
def _get_out_path(out_folder, muni, file_type):
|
|
@@ -557,6 +580,7 @@ def _write_municipality_data(
|
|
|
557
580
|
clip: bool = True,
|
|
558
581
|
max_rows_per_chunk: int = 150_000,
|
|
559
582
|
processes_in_clip: int = 1,
|
|
583
|
+
strict: bool = True,
|
|
560
584
|
) -> None:
|
|
561
585
|
data = _validate_data(data)
|
|
562
586
|
|
|
@@ -583,6 +607,7 @@ def _write_municipality_data(
|
|
|
583
607
|
clip,
|
|
584
608
|
max_rows_per_chunk,
|
|
585
609
|
processes_in_clip=processes_in_clip,
|
|
610
|
+
strict=strict,
|
|
586
611
|
)
|
|
587
612
|
|
|
588
613
|
for muni in municipalities[muni_number_col]:
|
|
@@ -612,6 +637,7 @@ def _write_neighbor_municipality_data(
|
|
|
612
637
|
clip: bool = True,
|
|
613
638
|
max_rows_per_chunk: int = 150_000,
|
|
614
639
|
processes_in_clip: int = 1,
|
|
640
|
+
strict: bool = True,
|
|
615
641
|
) -> None:
|
|
616
642
|
data = _validate_data(data)
|
|
617
643
|
|
|
@@ -628,6 +654,7 @@ def _write_neighbor_municipality_data(
|
|
|
628
654
|
clip,
|
|
629
655
|
max_rows_per_chunk,
|
|
630
656
|
processes_in_clip,
|
|
657
|
+
strict=strict,
|
|
631
658
|
)
|
|
632
659
|
|
|
633
660
|
if municipalities.index.name != muni_number_col:
|
|
@@ -659,6 +686,7 @@ def _fix_missing_muni_numbers(
|
|
|
659
686
|
clip,
|
|
660
687
|
max_rows_per_chunk,
|
|
661
688
|
processes_in_clip,
|
|
689
|
+
strict,
|
|
662
690
|
):
|
|
663
691
|
if muni_number_col in gdf and gdf[muni_number_col].notna().all():
|
|
664
692
|
if municipalities is None:
|
|
@@ -666,8 +694,13 @@ def _fix_missing_muni_numbers(
|
|
|
666
694
|
if diffs := set(gdf[muni_number_col].values).difference(
|
|
667
695
|
set(municipalities[muni_number_col].values)
|
|
668
696
|
):
|
|
669
|
-
|
|
670
|
-
|
|
697
|
+
message = (
|
|
698
|
+
f"Different municipality numbers: {diffs}. Set 'strict=False' to ignore"
|
|
699
|
+
)
|
|
700
|
+
if strict:
|
|
701
|
+
raise ValueError(message)
|
|
702
|
+
else:
|
|
703
|
+
warnings.warn(message)
|
|
671
704
|
return gdf
|
|
672
705
|
|
|
673
706
|
if municipalities is None:
|
|
@@ -694,12 +727,11 @@ def _fix_missing_muni_numbers(
|
|
|
694
727
|
if not clip:
|
|
695
728
|
notna_anymore = isna.sjoin(municipalities).drop(columns="index_right")
|
|
696
729
|
else:
|
|
697
|
-
notna_anymore =
|
|
730
|
+
notna_anymore = parallel_overlay(
|
|
698
731
|
isna,
|
|
699
|
-
municipalities,
|
|
700
|
-
|
|
701
|
-
max_rows_per_chunk,
|
|
702
|
-
processes_in_clip,
|
|
732
|
+
municipalities[[muni_number_col, municipalities._geometry_column_name]],
|
|
733
|
+
processes=processes_in_clip,
|
|
734
|
+
max_rows_per_chunk=max_rows_per_chunk,
|
|
703
735
|
)
|
|
704
736
|
|
|
705
737
|
return pd.concat([notna, notna_anymore], ignore_index=True)
|
|
@@ -707,40 +739,46 @@ def _fix_missing_muni_numbers(
|
|
|
707
739
|
if not clip:
|
|
708
740
|
return gdf.sjoin(municipalities).drop(columns="index_right")
|
|
709
741
|
else:
|
|
710
|
-
return
|
|
742
|
+
return parallel_overlay(
|
|
711
743
|
gdf,
|
|
712
|
-
municipalities,
|
|
713
|
-
|
|
714
|
-
max_rows_per_chunk,
|
|
715
|
-
processes_in_clip,
|
|
744
|
+
municipalities[[muni_number_col, municipalities._geometry_column_name]],
|
|
745
|
+
processes=processes_in_clip,
|
|
746
|
+
max_rows_per_chunk=max_rows_per_chunk,
|
|
716
747
|
)
|
|
717
748
|
|
|
718
749
|
|
|
719
|
-
def
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
muni_number_col: str,
|
|
750
|
+
def parallel_overlay(
|
|
751
|
+
df1: GeoDataFrame,
|
|
752
|
+
df2: GeoDataFrame,
|
|
753
|
+
# muni_number_col: str,
|
|
754
|
+
processes: int,
|
|
723
755
|
max_rows_per_chunk: int,
|
|
724
|
-
|
|
756
|
+
backend: str = "loky",
|
|
757
|
+
**kwargs,
|
|
725
758
|
) -> GeoDataFrame:
|
|
726
|
-
|
|
727
|
-
return clean_overlay(df, municipalities)
|
|
759
|
+
# df2 = df2[[muni_number_col, df2._geometry_column_name]]
|
|
728
760
|
|
|
729
|
-
|
|
761
|
+
if len(df1) < max_rows_per_chunk:
|
|
762
|
+
return clean_overlay(df1, df2, **kwargs)
|
|
730
763
|
|
|
731
|
-
|
|
732
|
-
chunks = np.array_split(np.arange(len(df)), n_chunks)
|
|
764
|
+
# df2 = df2.dissolve(by=muni_number_col, as_index=False)
|
|
733
765
|
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
df = df.iloc[list(sorted_xs)]
|
|
766
|
+
n_chunks = len(df1) // max_rows_per_chunk
|
|
767
|
+
chunks = np.array_split(np.arange(len(df1)), n_chunks)
|
|
737
768
|
|
|
738
|
-
|
|
769
|
+
try:
|
|
770
|
+
x_mapper = dict(enumerate(df1.centroid))
|
|
771
|
+
sorted_xs = dict(reversed(sorted(x_mapper.items(), key=lambda item: item[1])))
|
|
772
|
+
df1 = df1.iloc[list(sorted_xs)]
|
|
773
|
+
except TypeError:
|
|
774
|
+
pass
|
|
739
775
|
|
|
740
|
-
|
|
776
|
+
df1_chunked: list[GeoDataFrame] = [df1.iloc[chunk] for chunk in chunks]
|
|
777
|
+
|
|
778
|
+
out = Parallel(processes, backend=backend).map(
|
|
741
779
|
_clean_intersection,
|
|
742
|
-
|
|
743
|
-
args=(
|
|
780
|
+
df1_chunked,
|
|
781
|
+
args=(df2,),
|
|
744
782
|
)
|
|
745
783
|
return pd.concat(out, ignore_index=True)
|
|
746
784
|
|
sgis/raster/torchgeo.py
CHANGED
|
@@ -8,6 +8,7 @@ import rasterio.merge
|
|
|
8
8
|
from rasterio.io import DatasetReader
|
|
9
9
|
from rasterio.vrt import WarpedVRT
|
|
10
10
|
from torchgeo.datasets.geo import RasterDataset
|
|
11
|
+
from torchgeo.datasets.sentinel import Sentinel2 as TorchgeoSentinel2
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
try:
|
|
@@ -28,28 +29,16 @@ from ..io.opener import opener
|
|
|
28
29
|
from .bands import SENTINEL2_FILENAME_REGEX
|
|
29
30
|
|
|
30
31
|
|
|
31
|
-
class
|
|
32
|
-
"""
|
|
32
|
+
class GCSRasterDataset(RasterDataset):
|
|
33
|
+
"""Wrapper around torchgeo's RasterDataset that works in and outside of Dapla (stat norway)."""
|
|
33
34
|
|
|
34
35
|
def __init__(self, *args, **kwargs):
|
|
35
36
|
super().__init__(*args, **kwargs)
|
|
36
37
|
if is_dapla():
|
|
37
38
|
[file.close() for file in self.files]
|
|
38
39
|
|
|
39
|
-
def _get_gcs_paths(self, paths: str | Iterable[str], fs=None) -> set[str]:
|
|
40
|
-
if fs is None:
|
|
41
|
-
fs = dp.FileClient.get_gcs_file_system()
|
|
42
|
-
|
|
43
|
-
# Using set to remove any duplicates if directories are overlapping
|
|
44
|
-
out_paths: set[str] = set()
|
|
45
|
-
for path in paths:
|
|
46
|
-
pathname = os.path.join(path, "**", self.filename_glob)
|
|
47
|
-
if is_dapla():
|
|
48
|
-
out_paths |= {x for x in fs.glob(pathname, recursive=True) if "." in x}
|
|
49
|
-
return out_paths
|
|
50
|
-
|
|
51
40
|
@property
|
|
52
|
-
def files(self) -> set[
|
|
41
|
+
def files(self) -> set[GCSFile] | set[str]:
|
|
53
42
|
"""A list of all files in the dataset.
|
|
54
43
|
|
|
55
44
|
Returns:
|
|
@@ -64,7 +53,12 @@ class DaplaRasterDataset(RasterDataset):
|
|
|
64
53
|
|
|
65
54
|
if is_dapla():
|
|
66
55
|
fs = dp.FileClient.get_gcs_file_system()
|
|
67
|
-
files = {
|
|
56
|
+
files: set[GCSFile] = {
|
|
57
|
+
fs.open(x)
|
|
58
|
+
for x in _get_gcs_paths(
|
|
59
|
+
paths, filename_glob=self.filename_glob, file_system=fs
|
|
60
|
+
)
|
|
61
|
+
}
|
|
68
62
|
return files
|
|
69
63
|
|
|
70
64
|
# Using set to remove any duplicates if directories are overlapping
|
|
@@ -107,7 +101,24 @@ class DaplaRasterDataset(RasterDataset):
|
|
|
107
101
|
return src
|
|
108
102
|
|
|
109
103
|
|
|
110
|
-
|
|
104
|
+
def _get_gcs_paths(
|
|
105
|
+
paths: str | Iterable[str], filename_glob: str, file_system=None
|
|
106
|
+
) -> set[str]:
|
|
107
|
+
if file_system is None:
|
|
108
|
+
file_system = dp.FileClient.get_gcs_file_system()
|
|
109
|
+
|
|
110
|
+
# Using set to remove any duplicates if directories are overlapping
|
|
111
|
+
out_paths: set[str] = set()
|
|
112
|
+
for path in paths:
|
|
113
|
+
pathname = os.path.join(path, "**", filename_glob)
|
|
114
|
+
if is_dapla():
|
|
115
|
+
out_paths |= {
|
|
116
|
+
x for x in file_system.glob(pathname, recursive=True) if "." in x
|
|
117
|
+
}
|
|
118
|
+
return out_paths
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class Sentinel2(GCSRasterDataset):
|
|
111
122
|
"""Works like torchgeo's Sentinel2, with custom regexes."""
|
|
112
123
|
|
|
113
124
|
date_format: str = "%Y%m%d"
|
|
@@ -115,9 +126,6 @@ class Sentinel2(DaplaRasterDataset):
|
|
|
115
126
|
|
|
116
127
|
filename_regex = SENTINEL2_FILENAME_REGEX
|
|
117
128
|
|
|
118
|
-
_indexes = 1
|
|
119
|
-
_nodata = 0
|
|
120
|
-
|
|
121
129
|
all_bands = [
|
|
122
130
|
# "B1",
|
|
123
131
|
"B2",
|
|
@@ -138,3 +146,5 @@ class Sentinel2(DaplaRasterDataset):
|
|
|
138
146
|
separate_files = True
|
|
139
147
|
|
|
140
148
|
cmap: dict[int, tuple[int, int, int, int]] = {}
|
|
149
|
+
|
|
150
|
+
plot = TorchgeoSentinel2.plot
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ssb-sgis
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: GIS functions used at Statistics Norway.
|
|
5
5
|
Home-page: https://github.com/statisticsnorway/ssb-sgis
|
|
6
6
|
License: MIT
|
|
7
7
|
Author: Statistics Norway
|
|
8
8
|
Author-email: ort@ssb.no
|
|
9
9
|
Requires-Python: >=3.10,<4
|
|
10
|
-
Classifier: Development Status ::
|
|
10
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
11
11
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
@@ -18,16 +18,16 @@ Requires-Dist: branca (>=0.6.0)
|
|
|
18
18
|
Requires-Dist: dask (>=2024.1.1)
|
|
19
19
|
Requires-Dist: folium (>=0.14.0)
|
|
20
20
|
Requires-Dist: geocoder (>=1.38.1)
|
|
21
|
-
Requires-Dist: geopandas (
|
|
22
|
-
Requires-Dist: igraph (
|
|
21
|
+
Requires-Dist: geopandas (>=0.14.0)
|
|
22
|
+
Requires-Dist: igraph (>=0.11.2)
|
|
23
23
|
Requires-Dist: ipython (>=8.13.2)
|
|
24
24
|
Requires-Dist: jenkspy (>=0.3.2)
|
|
25
25
|
Requires-Dist: mapclassify (>=2.5.0)
|
|
26
26
|
Requires-Dist: matplotlib (>=3.7.0)
|
|
27
27
|
Requires-Dist: networkx (>=3.0)
|
|
28
28
|
Requires-Dist: numpy (>=1.24.2)
|
|
29
|
-
Requires-Dist: pandas (
|
|
30
|
-
Requires-Dist: pip (
|
|
29
|
+
Requires-Dist: pandas (>=2.0.3)
|
|
30
|
+
Requires-Dist: pip (>=23.2.1)
|
|
31
31
|
Requires-Dist: pyarrow (>=11.0.0)
|
|
32
32
|
Requires-Dist: rasterio (>=1.3.8)
|
|
33
33
|
Requires-Dist: requests (>=2.28.2)
|