ssb-sgis 1.0.15__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sgis/__init__.py +1 -5
- sgis/conf.py +16 -0
- sgis/geopandas_tools/bounds.py +1 -0
- sgis/geopandas_tools/buffer_dissolve_explode.py +9 -9
- sgis/io/dapla_functions.py +233 -64
- sgis/maps/thematicmap.py +9 -2
- sgis/parallel/parallel.py +5 -5
- {ssb_sgis-1.0.15.dist-info → ssb_sgis-1.1.1.dist-info}/METADATA +1 -1
- {ssb_sgis-1.0.15.dist-info → ssb_sgis-1.1.1.dist-info}/RECORD +11 -10
- {ssb_sgis-1.0.15.dist-info → ssb_sgis-1.1.1.dist-info}/LICENSE +0 -0
- {ssb_sgis-1.0.15.dist-info → ssb_sgis-1.1.1.dist-info}/WHEEL +0 -0
sgis/__init__.py
CHANGED
|
@@ -1,10 +1,6 @@
|
|
|
1
|
-
config = {
|
|
2
|
-
"n_jobs": 1,
|
|
3
|
-
}
|
|
4
|
-
|
|
5
|
-
|
|
6
1
|
import sgis.raster.indices as indices
|
|
7
2
|
|
|
3
|
+
from .conf import config
|
|
8
4
|
from .geopandas_tools.bounds import Gridlooper
|
|
9
5
|
from .geopandas_tools.bounds import bounds_to_points
|
|
10
6
|
from .geopandas_tools.bounds import bounds_to_polygon
|
sgis/conf.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
try:
|
|
2
|
+
from gcsfs import GCSFileSystem
|
|
3
|
+
except ImportError:
|
|
4
|
+
|
|
5
|
+
class GCSFileSystem:
|
|
6
|
+
"""Placeholder."""
|
|
7
|
+
|
|
8
|
+
def __init__(self, *args, **kwargs) -> None:
|
|
9
|
+
"""Placeholder."""
|
|
10
|
+
raise ImportError("gcsfs")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
config = {
|
|
14
|
+
"n_jobs": 1,
|
|
15
|
+
"file_system": GCSFileSystem,
|
|
16
|
+
}
|
sgis/geopandas_tools/bounds.py
CHANGED
|
@@ -669,6 +669,7 @@ def bounds_to_points(
|
|
|
669
669
|
0 MULTIPOINT (1.00000 0.00000, 1.00000 1.00000, ...
|
|
670
670
|
1 MULTIPOINT (0.00000 0.00000)
|
|
671
671
|
"""
|
|
672
|
+
gdf = gdf.copy() if copy else gdf
|
|
672
673
|
as_bounds = bounds_to_polygon(gdf, copy=copy)
|
|
673
674
|
if isinstance(gdf, GeoSeries):
|
|
674
675
|
return GeoSeries(extract_unique_points(as_bounds), index=gdf.index)
|
|
@@ -5,7 +5,7 @@ for the following:
|
|
|
5
5
|
|
|
6
6
|
- Geometries are made valid after buffer and dissolve.
|
|
7
7
|
|
|
8
|
-
- The buffer resolution defaults to
|
|
8
|
+
- The buffer resolution defaults to 30 (geopandas' default is 16).
|
|
9
9
|
|
|
10
10
|
- If 'by' is not specified, the index will be labeled 0, 1, …, n - 1 after exploded, instead of 0, 0, …, 0 as it will with the geopandas defaults.
|
|
11
11
|
|
|
@@ -49,7 +49,7 @@ def buffdissexp(
|
|
|
49
49
|
gdf: GeoDataFrame,
|
|
50
50
|
distance: int | float,
|
|
51
51
|
*,
|
|
52
|
-
resolution: int =
|
|
52
|
+
resolution: int = 30,
|
|
53
53
|
index_parts: bool = False,
|
|
54
54
|
copy: bool = True,
|
|
55
55
|
grid_size: float | int | None = None,
|
|
@@ -68,7 +68,7 @@ def buffdissexp(
|
|
|
68
68
|
distance: the distance (meters, degrees, depending on the crs) to buffer
|
|
69
69
|
the geometry by
|
|
70
70
|
resolution: The number of segments used to approximate a quarter circle.
|
|
71
|
-
Here defaults to
|
|
71
|
+
Here defaults to 30, as opposed to the default 16 in geopandas.
|
|
72
72
|
index_parts: If False (default), the index after dissolve is respected. If
|
|
73
73
|
True, an integer index level is added during explode.
|
|
74
74
|
copy: Whether to copy the GeoDataFrame before buffering. Defaults to True.
|
|
@@ -101,7 +101,7 @@ def buffdissexp(
|
|
|
101
101
|
def buffdiss(
|
|
102
102
|
gdf: GeoDataFrame,
|
|
103
103
|
distance: int | float,
|
|
104
|
-
resolution: int =
|
|
104
|
+
resolution: int = 30,
|
|
105
105
|
copy: bool = True,
|
|
106
106
|
n_jobs: int = 1,
|
|
107
107
|
join_style: int | str = "round",
|
|
@@ -119,7 +119,7 @@ def buffdiss(
|
|
|
119
119
|
distance: the distance (meters, degrees, depending on the crs) to buffer
|
|
120
120
|
the geometry by
|
|
121
121
|
resolution: The number of segments used to approximate a quarter circle.
|
|
122
|
-
Here defaults to
|
|
122
|
+
Here defaults to 30, as opposed to the default 16 in geopandas.
|
|
123
123
|
join_style: Buffer join style.
|
|
124
124
|
copy: Whether to copy the GeoDataFrame before buffering. Defaults to True.
|
|
125
125
|
n_jobs: Number of threads to use. Defaults to 1.
|
|
@@ -511,7 +511,7 @@ def buffdissexp_by_cluster(
|
|
|
511
511
|
gdf: GeoDataFrame,
|
|
512
512
|
distance: int | float,
|
|
513
513
|
*,
|
|
514
|
-
resolution: int =
|
|
514
|
+
resolution: int = 30,
|
|
515
515
|
copy: bool = True,
|
|
516
516
|
n_jobs: int = 1,
|
|
517
517
|
join_style: int | str = "round",
|
|
@@ -532,7 +532,7 @@ def buffdissexp_by_cluster(
|
|
|
532
532
|
distance: the distance (meters, degrees, depending on the crs) to buffer
|
|
533
533
|
the geometry by
|
|
534
534
|
resolution: The number of segments used to approximate a quarter circle.
|
|
535
|
-
Here defaults to
|
|
535
|
+
Here defaults to 30, as opposed to the default 16 in geopandas.
|
|
536
536
|
join_style: Buffer join style.
|
|
537
537
|
copy: Whether to copy the GeoDataFrame before buffering. Defaults to True.
|
|
538
538
|
n_jobs: int = 1,
|
|
@@ -554,7 +554,7 @@ def buffdissexp_by_cluster(
|
|
|
554
554
|
def buff(
|
|
555
555
|
gdf: GeoDataFrame | GeoSeries,
|
|
556
556
|
distance: int | float,
|
|
557
|
-
resolution: int =
|
|
557
|
+
resolution: int = 30,
|
|
558
558
|
copy: bool = True,
|
|
559
559
|
join_style: int | str = "round",
|
|
560
560
|
**buffer_kwargs,
|
|
@@ -566,7 +566,7 @@ def buff(
|
|
|
566
566
|
distance: the distance (meters, degrees, depending on the crs) to buffer
|
|
567
567
|
the geometry by
|
|
568
568
|
resolution: The number of segments used to approximate a quarter circle.
|
|
569
|
-
Here defaults to
|
|
569
|
+
Here defaults to 30, as opposed to the default 16 in geopandas.
|
|
570
570
|
join_style: Buffer join style.
|
|
571
571
|
copy: Whether to copy the GeoDataFrame before buffering. Defaults to True.
|
|
572
572
|
**buffer_kwargs: additional keyword arguments passed to geopandas' buffer.
|
sgis/io/dapla_functions.py
CHANGED
|
@@ -2,35 +2,42 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import functools
|
|
6
|
+
import glob
|
|
5
7
|
import json
|
|
6
8
|
import multiprocessing
|
|
7
9
|
import os
|
|
10
|
+
import shutil
|
|
11
|
+
import uuid
|
|
8
12
|
from collections.abc import Iterable
|
|
13
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
9
14
|
from pathlib import Path
|
|
10
15
|
|
|
11
|
-
import dapla as dp
|
|
12
16
|
import geopandas as gpd
|
|
13
17
|
import joblib
|
|
14
18
|
import pandas as pd
|
|
15
19
|
import pyarrow
|
|
16
20
|
import pyarrow.parquet as pq
|
|
17
21
|
import shapely
|
|
22
|
+
from gcsfs import GCSFileSystem
|
|
18
23
|
from geopandas import GeoDataFrame
|
|
19
24
|
from geopandas import GeoSeries
|
|
20
25
|
from geopandas.io.arrow import _geopandas_to_arrow
|
|
21
26
|
from pandas import DataFrame
|
|
22
27
|
from pyarrow import ArrowInvalid
|
|
23
28
|
|
|
29
|
+
from ..geopandas_tools.conversion import to_shapely
|
|
24
30
|
from ..geopandas_tools.general import get_common_crs
|
|
25
31
|
from ..geopandas_tools.sfilter import sfilter
|
|
26
32
|
|
|
27
33
|
PANDAS_FALLBACK_INFO = " Set pandas_fallback=True to ignore this error."
|
|
34
|
+
from ..conf import config
|
|
28
35
|
|
|
29
36
|
|
|
30
37
|
def read_geopandas(
|
|
31
38
|
gcs_path: str | Path | list[str | Path] | tuple[str | Path] | GeoSeries,
|
|
32
39
|
pandas_fallback: bool = False,
|
|
33
|
-
file_system:
|
|
40
|
+
file_system: GCSFileSystem | None = None,
|
|
34
41
|
mask: GeoSeries | GeoDataFrame | shapely.Geometry | tuple | None = None,
|
|
35
42
|
threads: int | None = None,
|
|
36
43
|
**kwargs,
|
|
@@ -62,7 +69,7 @@ def read_geopandas(
|
|
|
62
69
|
A GeoDataFrame if it has rows. If zero rows, a pandas DataFrame is returned.
|
|
63
70
|
"""
|
|
64
71
|
if file_system is None:
|
|
65
|
-
file_system =
|
|
72
|
+
file_system = config["file_system"]()
|
|
66
73
|
|
|
67
74
|
if not isinstance(gcs_path, (str | Path | os.PathLike)):
|
|
68
75
|
kwargs |= {"file_system": file_system, "pandas_fallback": pandas_fallback}
|
|
@@ -129,6 +136,18 @@ def read_geopandas(
|
|
|
129
136
|
except TypeError as e:
|
|
130
137
|
raise TypeError(f"Unexpected type {type(gcs_path)}.") from e
|
|
131
138
|
|
|
139
|
+
if has_partitions(gcs_path, file_system):
|
|
140
|
+
filters = kwargs.pop("filters", None)
|
|
141
|
+
return _read_partitioned_parquet(
|
|
142
|
+
gcs_path,
|
|
143
|
+
file_system=file_system,
|
|
144
|
+
mask=mask,
|
|
145
|
+
pandas_fallback=pandas_fallback,
|
|
146
|
+
threads=threads,
|
|
147
|
+
filters=filters,
|
|
148
|
+
**kwargs,
|
|
149
|
+
)
|
|
150
|
+
|
|
132
151
|
if "parquet" in gcs_path or "prqt" in gcs_path:
|
|
133
152
|
with file_system.open(gcs_path, mode="rb") as file:
|
|
134
153
|
try:
|
|
@@ -138,8 +157,7 @@ def read_geopandas(
|
|
|
138
157
|
raise e.__class__(
|
|
139
158
|
f"{e.__class__.__name__}: {e} for {gcs_path}."
|
|
140
159
|
) from e
|
|
141
|
-
df =
|
|
142
|
-
|
|
160
|
+
df = pd.read_parquet(file, **kwargs)
|
|
143
161
|
if pandas_fallback or not len(df):
|
|
144
162
|
return df
|
|
145
163
|
else:
|
|
@@ -157,7 +175,7 @@ def read_geopandas(
|
|
|
157
175
|
except ValueError as e:
|
|
158
176
|
if "Missing geo metadata" not in str(e) and "geometry" not in str(e):
|
|
159
177
|
raise e
|
|
160
|
-
df =
|
|
178
|
+
df = pd.read_parquet(file, **kwargs)
|
|
161
179
|
|
|
162
180
|
if pandas_fallback or not len(df):
|
|
163
181
|
return df
|
|
@@ -168,7 +186,7 @@ def read_geopandas(
|
|
|
168
186
|
) from e
|
|
169
187
|
except Exception as e:
|
|
170
188
|
raise e.__class__(
|
|
171
|
-
f"{e.__class__.__name__}: {e} for {
|
|
189
|
+
f"{e.__class__.__name__}: {e} for {gcs_path}." + more_txt
|
|
172
190
|
) from e
|
|
173
191
|
|
|
174
192
|
if mask is not None:
|
|
@@ -177,33 +195,44 @@ def read_geopandas(
|
|
|
177
195
|
|
|
178
196
|
|
|
179
197
|
def _get_bounds_parquet(
|
|
180
|
-
path: str | Path, file_system:
|
|
198
|
+
path: str | Path, file_system: GCSFileSystem, pandas_fallback: bool = False
|
|
199
|
+
) -> tuple[list[float], dict] | tuple[None, None]:
|
|
200
|
+
with file_system.open(path, "rb") as file:
|
|
201
|
+
return _get_bounds_parquet_from_open_file(file, file_system)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _get_bounds_parquet_from_open_file(
|
|
205
|
+
file, file_system
|
|
181
206
|
) -> tuple[list[float], dict] | tuple[None, None]:
|
|
182
|
-
|
|
207
|
+
geo_metadata = _get_geo_metadata(file, file_system)
|
|
208
|
+
if not geo_metadata:
|
|
209
|
+
return None, None
|
|
210
|
+
return geo_metadata["bbox"], geo_metadata["crs"]
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _get_geo_metadata(file, file_system) -> dict:
|
|
214
|
+
meta = pq.read_schema(file).metadata
|
|
215
|
+
geo_metadata = json.loads(meta[b"geo"])
|
|
216
|
+
try:
|
|
217
|
+
primary_column = geo_metadata["primary_column"]
|
|
218
|
+
except KeyError as e:
|
|
219
|
+
raise KeyError(e, geo_metadata) from e
|
|
220
|
+
try:
|
|
221
|
+
return geo_metadata["columns"][primary_column]
|
|
222
|
+
except KeyError as e:
|
|
183
223
|
try:
|
|
184
|
-
num_rows = pq.read_metadata(
|
|
224
|
+
num_rows = pq.read_metadata(file).num_rows
|
|
185
225
|
except ArrowInvalid as e:
|
|
186
|
-
if not file_system.isfile(
|
|
187
|
-
return
|
|
188
|
-
raise ArrowInvalid(e,
|
|
226
|
+
if not file_system.isfile(file):
|
|
227
|
+
return {}
|
|
228
|
+
raise ArrowInvalid(e, file) from e
|
|
189
229
|
if not num_rows:
|
|
190
|
-
return
|
|
191
|
-
|
|
192
|
-
try:
|
|
193
|
-
meta = json.loads(meta[b"geo"])["columns"]["geometry"]
|
|
194
|
-
except KeyError as e:
|
|
195
|
-
if pandas_fallback:
|
|
196
|
-
return None, None
|
|
197
|
-
raise KeyError(
|
|
198
|
-
f"{e.__class__.__name__}: {e} for {path}." + PANDAS_FALLBACK_INFO,
|
|
199
|
-
# f"{num_rows=}",
|
|
200
|
-
# meta,
|
|
201
|
-
) from e
|
|
202
|
-
return meta["bbox"], meta["crs"]
|
|
230
|
+
return {}
|
|
231
|
+
return {}
|
|
203
232
|
|
|
204
233
|
|
|
205
|
-
def _get_columns(path: str | Path, file_system:
|
|
206
|
-
with file_system.open(path) as f:
|
|
234
|
+
def _get_columns(path: str | Path, file_system: GCSFileSystem) -> pd.Index:
|
|
235
|
+
with file_system.open(path, "rb") as f:
|
|
207
236
|
schema = pq.read_schema(f)
|
|
208
237
|
index_cols = _get_index_cols(schema)
|
|
209
238
|
return pd.Index(schema.names).difference(index_cols)
|
|
@@ -216,7 +245,7 @@ def _get_index_cols(schema: pyarrow.Schema) -> list[str]:
|
|
|
216
245
|
|
|
217
246
|
def get_bounds_series(
|
|
218
247
|
paths: list[str | Path] | tuple[str | Path],
|
|
219
|
-
file_system:
|
|
248
|
+
file_system: GCSFileSystem | None = None,
|
|
220
249
|
threads: int | None = None,
|
|
221
250
|
pandas_fallback: bool = False,
|
|
222
251
|
) -> GeoSeries:
|
|
@@ -227,7 +256,7 @@ def get_bounds_series(
|
|
|
227
256
|
|
|
228
257
|
Args:
|
|
229
258
|
paths: Iterable of file paths in gcs.
|
|
230
|
-
file_system: Optional instance of
|
|
259
|
+
file_system: Optional instance of GCSFileSystem.
|
|
231
260
|
If None, an instance is created within the function.
|
|
232
261
|
Note that this is slower in long loops.
|
|
233
262
|
threads: Number of threads to use if reading multiple files. Defaults to
|
|
@@ -242,8 +271,7 @@ def get_bounds_series(
|
|
|
242
271
|
---------
|
|
243
272
|
>>> import sgis as sg
|
|
244
273
|
>>> import dapla as dp
|
|
245
|
-
>>>
|
|
246
|
-
>>> all_paths = file_system.ls("...")
|
|
274
|
+
>>> all_paths = GCSFileSystem().ls("...")
|
|
247
275
|
|
|
248
276
|
Get the bounds of all your file paths, indexed by path.
|
|
249
277
|
|
|
@@ -275,7 +303,7 @@ def get_bounds_series(
|
|
|
275
303
|
|
|
276
304
|
"""
|
|
277
305
|
if file_system is None:
|
|
278
|
-
file_system =
|
|
306
|
+
file_system = config["file_system"]()
|
|
279
307
|
|
|
280
308
|
if threads is None:
|
|
281
309
|
threads = min(len(paths), int(multiprocessing.cpu_count())) or 1
|
|
@@ -307,8 +335,8 @@ def write_geopandas(
|
|
|
307
335
|
gcs_path: str | Path,
|
|
308
336
|
overwrite: bool = True,
|
|
309
337
|
pandas_fallback: bool = False,
|
|
310
|
-
file_system:
|
|
311
|
-
|
|
338
|
+
file_system: GCSFileSystem | None = None,
|
|
339
|
+
partition_cols=None,
|
|
312
340
|
**kwargs,
|
|
313
341
|
) -> None:
|
|
314
342
|
"""Writes a GeoDataFrame to the speficied format.
|
|
@@ -324,13 +352,7 @@ def write_geopandas(
|
|
|
324
352
|
not be written with geopandas and the number of rows is more than 0. If True,
|
|
325
353
|
the file will be written without geo-metadata if >0 rows.
|
|
326
354
|
file_system: Optional file sustem.
|
|
327
|
-
|
|
328
|
-
Writing a bbox column can be computationally expensive, but allows you to specify
|
|
329
|
-
a bbox in : func:read_parquet for filtered reading.
|
|
330
|
-
Note: this bbox column is part of the newer GeoParquet 1.1 specification and should be
|
|
331
|
-
considered as experimental. While writing the column is backwards compatible, using it
|
|
332
|
-
for filtering may not be supported by all readers.
|
|
333
|
-
|
|
355
|
+
partition_cols: Column(s) to partition by. Only for parquet files.
|
|
334
356
|
**kwargs: Additional keyword arguments passed to parquet.write_table
|
|
335
357
|
(for parquet) or geopandas' to_file method (if not parquet).
|
|
336
358
|
"""
|
|
@@ -340,22 +362,25 @@ def write_geopandas(
|
|
|
340
362
|
except TypeError as e:
|
|
341
363
|
raise TypeError(f"Unexpected type {type(gcs_path)}.") from e
|
|
342
364
|
|
|
343
|
-
if
|
|
365
|
+
if file_system is None:
|
|
366
|
+
file_system = config["file_system"]()
|
|
367
|
+
|
|
368
|
+
if not overwrite and file_system.exists(gcs_path):
|
|
344
369
|
raise ValueError("File already exists.")
|
|
345
370
|
|
|
346
371
|
if not isinstance(df, GeoDataFrame):
|
|
347
372
|
raise ValueError("DataFrame must be GeoDataFrame.")
|
|
348
373
|
|
|
349
|
-
if
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
if not len(df):
|
|
374
|
+
if not len(df) and has_partitions(gcs_path, file_system):
|
|
375
|
+
return
|
|
376
|
+
elif not len(df):
|
|
353
377
|
if pandas_fallback:
|
|
354
378
|
df = pd.DataFrame(df)
|
|
355
379
|
df.geometry = df.geometry.astype(str)
|
|
356
380
|
df.geometry = None
|
|
357
381
|
try:
|
|
358
|
-
|
|
382
|
+
with file_system.open(gcs_path, "wb") as file:
|
|
383
|
+
df.to_parquet(gcs_path, **kwargs)
|
|
359
384
|
except Exception as e:
|
|
360
385
|
more_txt = PANDAS_FALLBACK_INFO if not pandas_fallback else ""
|
|
361
386
|
raise e.__class__(
|
|
@@ -363,17 +388,22 @@ def write_geopandas(
|
|
|
363
388
|
) from e
|
|
364
389
|
return
|
|
365
390
|
|
|
366
|
-
file_system = dp.FileClient.get_gcs_file_system()
|
|
367
|
-
|
|
368
391
|
if ".parquet" in gcs_path or "prqt" in gcs_path:
|
|
369
|
-
|
|
392
|
+
if partition_cols is not None:
|
|
393
|
+
return _write_partitioned_geoparquet(
|
|
394
|
+
df,
|
|
395
|
+
gcs_path,
|
|
396
|
+
partition_cols,
|
|
397
|
+
file_system,
|
|
398
|
+
**kwargs,
|
|
399
|
+
)
|
|
400
|
+
with file_system.open(gcs_path, mode="wb") as file:
|
|
370
401
|
table = _geopandas_to_arrow(
|
|
371
402
|
df,
|
|
372
403
|
index=df.index,
|
|
373
404
|
schema_version=None,
|
|
374
|
-
write_covering_bbox=write_covering_bbox,
|
|
375
405
|
)
|
|
376
|
-
pq.write_table(table,
|
|
406
|
+
pq.write_table(table, file, compression="snappy", **kwargs)
|
|
377
407
|
return
|
|
378
408
|
|
|
379
409
|
layer = kwargs.pop("layer", None)
|
|
@@ -393,17 +423,156 @@ def write_geopandas(
|
|
|
393
423
|
df.to_file(file, driver=driver, layer=layer)
|
|
394
424
|
|
|
395
425
|
|
|
396
|
-
def
|
|
397
|
-
|
|
426
|
+
def _remove_file(path, file_system) -> None:
|
|
427
|
+
try:
|
|
428
|
+
file_system.rm_file(path)
|
|
429
|
+
except (AttributeError, TypeError, PermissionError):
|
|
430
|
+
try:
|
|
431
|
+
shutil.rmtree(path)
|
|
432
|
+
except NotADirectoryError:
|
|
433
|
+
try:
|
|
434
|
+
os.remove(path)
|
|
435
|
+
except PermissionError:
|
|
436
|
+
pass
|
|
437
|
+
|
|
398
438
|
|
|
399
|
-
|
|
400
|
-
|
|
439
|
+
def _write_partitioned_geoparquet(df, path, partition_cols, file_system, **kwargs):
|
|
440
|
+
path = Path(path)
|
|
441
|
+
unique_id = uuid.uuid4()
|
|
401
442
|
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
443
|
+
try:
|
|
444
|
+
glob_func = functools.partial(file_system.glob, detail=False)
|
|
445
|
+
except AttributeError:
|
|
446
|
+
glob_func = functools.partial(glob.glob, recursive=True)
|
|
447
|
+
|
|
448
|
+
args: list[tuple[Path, DataFrame]] = []
|
|
449
|
+
dirs: list[Path] = set()
|
|
450
|
+
for group, rows in df.groupby(partition_cols):
|
|
451
|
+
name = (
|
|
452
|
+
"/".join(
|
|
453
|
+
f"{col}={value}"
|
|
454
|
+
for col, value in zip(partition_cols, group, strict=True)
|
|
455
|
+
)
|
|
456
|
+
+ f"/{unique_id}.parquet"
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
dirs.add((path / name).parent)
|
|
460
|
+
args.append((path / name, rows))
|
|
461
|
+
|
|
462
|
+
if file_system.exists(path) and not has_partitions(path, file_system):
|
|
463
|
+
_remove_file(path, file_system)
|
|
464
|
+
|
|
465
|
+
for dir_ in dirs:
|
|
466
|
+
try:
|
|
467
|
+
os.makedirs(dir_, exist_ok=True)
|
|
468
|
+
except (OSError, FileNotFoundError, FileExistsError) as e:
|
|
469
|
+
print(e)
|
|
470
|
+
pass
|
|
471
|
+
|
|
472
|
+
def threaded_write(path_rows):
|
|
473
|
+
new_path, rows = path_rows
|
|
474
|
+
for sibling_path in glob_func(str(Path(new_path).with_name("**"))):
|
|
475
|
+
if not paths_are_equal(sibling_path, Path(new_path).parent):
|
|
476
|
+
_remove_file(sibling_path, file_system)
|
|
477
|
+
with file_system.open(new_path, mode="wb") as file:
|
|
478
|
+
table = _geopandas_to_arrow(
|
|
479
|
+
rows,
|
|
480
|
+
index=df.index,
|
|
481
|
+
schema_version=None,
|
|
482
|
+
)
|
|
483
|
+
pq.write_table(table, file, compression="snappy", **kwargs)
|
|
484
|
+
|
|
485
|
+
with ThreadPoolExecutor() as executor:
|
|
486
|
+
list(executor.map(threaded_write, args))
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def _read_partitioned_parquet(
|
|
490
|
+
path, filters, file_system, mask, pandas_fallback, threads, **kwargs
|
|
491
|
+
):
|
|
492
|
+
try:
|
|
493
|
+
glob_func = functools.partial(file_system.glob, detail=False)
|
|
494
|
+
except AttributeError:
|
|
495
|
+
glob_func = functools.partial(glob.glob, recursive=True)
|
|
496
|
+
|
|
497
|
+
filters = filters or []
|
|
498
|
+
new_filters = []
|
|
499
|
+
for filt in filters:
|
|
500
|
+
if "in" in filt:
|
|
501
|
+
values = [
|
|
502
|
+
x.strip("(")
|
|
503
|
+
.strip(")")
|
|
504
|
+
.strip("[")
|
|
505
|
+
.strip("]")
|
|
506
|
+
.strip("{")
|
|
507
|
+
.strip("}")
|
|
508
|
+
.strip(" ")
|
|
509
|
+
for x in filt[-1].split(",")
|
|
510
|
+
]
|
|
511
|
+
filt = [filt[0] + "=" + x for x in values]
|
|
512
|
+
else:
|
|
513
|
+
filt = ["".join(filt)]
|
|
514
|
+
new_filters.append(filt)
|
|
515
|
+
|
|
516
|
+
def intersects(file, mask) -> bool:
|
|
517
|
+
bbox, _ = _get_bounds_parquet_from_open_file(file, file_system)
|
|
518
|
+
return shapely.box(*bbox).intersects(to_shapely(mask))
|
|
519
|
+
|
|
520
|
+
def read(path) -> GeoDataFrame | None:
|
|
521
|
+
with file_system.open(path, "rb") as file:
|
|
522
|
+
if mask is not None and not intersects(file, mask):
|
|
523
|
+
return
|
|
524
|
+
|
|
525
|
+
schema = kwargs.pop("schema", pq.read_schema(file))
|
|
526
|
+
|
|
527
|
+
return gpd.read_parquet(file, schema=schema, **kwargs)
|
|
528
|
+
|
|
529
|
+
with ThreadPoolExecutor() as executor:
|
|
530
|
+
results = [
|
|
531
|
+
x
|
|
532
|
+
for x in (
|
|
533
|
+
executor.map(
|
|
534
|
+
read,
|
|
535
|
+
(
|
|
536
|
+
path
|
|
537
|
+
for path in glob_func(str(Path(path) / "**/*.parquet"))
|
|
538
|
+
if all(
|
|
539
|
+
any(subfilt in Path(path).parts for subfilt in filt)
|
|
540
|
+
for filt in new_filters
|
|
541
|
+
)
|
|
542
|
+
),
|
|
543
|
+
)
|
|
544
|
+
)
|
|
545
|
+
if x is not None
|
|
546
|
+
]
|
|
547
|
+
if results:
|
|
548
|
+
if mask is not None:
|
|
549
|
+
return sfilter(pd.concat(results), mask)
|
|
550
|
+
return pd.concat(results)
|
|
551
|
+
|
|
552
|
+
# add columns to empty DataFrame
|
|
553
|
+
first_path = next(iter(glob_func(str(Path(path) / "**/*.parquet"))))
|
|
554
|
+
return gpd.GeoDataFrame(
|
|
555
|
+
columns=list(dict.fromkeys(_get_columns(first_path, file_system)))
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
|
|
559
|
+
def paths_are_equal(path1: Path | str, path2: Path | str) -> bool:
|
|
560
|
+
return Path(path1).parts == Path(path2).parts
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
def has_partitions(path, file_system) -> bool:
|
|
564
|
+
try:
|
|
565
|
+
glob_func = functools.partial(file_system.glob, detail=False)
|
|
566
|
+
except AttributeError:
|
|
567
|
+
glob_func = functools.partial(glob.glob, recursive=True)
|
|
568
|
+
|
|
569
|
+
return bool(
|
|
570
|
+
[
|
|
571
|
+
x
|
|
572
|
+
for x in glob_func(str(Path(path) / "**/*.parquet"))
|
|
573
|
+
if not paths_are_equal(x, path)
|
|
574
|
+
]
|
|
575
|
+
)
|
|
407
576
|
|
|
408
577
|
|
|
409
578
|
def check_files(
|
|
@@ -419,7 +588,7 @@ def check_files(
|
|
|
419
588
|
within_minutes: Optionally include only files that were updated in the
|
|
420
589
|
last n minutes.
|
|
421
590
|
"""
|
|
422
|
-
file_system =
|
|
591
|
+
file_system = config["file_system"]()
|
|
423
592
|
|
|
424
593
|
# (recursive doesn't work, so doing recursive search below)
|
|
425
594
|
info = file_system.ls(folder, detail=True, recursive=True)
|
|
@@ -474,7 +643,7 @@ def check_files(
|
|
|
474
643
|
|
|
475
644
|
|
|
476
645
|
def _get_files_in_subfolders(folderinfo: list[dict]) -> list[tuple]:
|
|
477
|
-
file_system =
|
|
646
|
+
file_system = config["file_system"]()
|
|
478
647
|
|
|
479
648
|
fileinfo = []
|
|
480
649
|
|
sgis/maps/thematicmap.py
CHANGED
|
@@ -280,7 +280,10 @@ class ThematicMap(Map):
|
|
|
280
280
|
return self
|
|
281
281
|
|
|
282
282
|
def add_background(
|
|
283
|
-
self,
|
|
283
|
+
self,
|
|
284
|
+
gdf: GeoDataFrame,
|
|
285
|
+
color: str | None = None,
|
|
286
|
+
**kwargs,
|
|
284
287
|
) -> "ThematicMap":
|
|
285
288
|
"""Add a GeoDataFrame as a background layer.
|
|
286
289
|
|
|
@@ -288,6 +291,7 @@ class ThematicMap(Map):
|
|
|
288
291
|
gdf: a GeoDataFrame.
|
|
289
292
|
color: Single color. Defaults to gray (shade depends on whether the map
|
|
290
293
|
facecolor is black or white).
|
|
294
|
+
**kwargs: Keyword arguments sent to GeoDataFrame.plot.
|
|
291
295
|
"""
|
|
292
296
|
if color:
|
|
293
297
|
self.bg_gdf_color = color
|
|
@@ -299,6 +303,7 @@ class ThematicMap(Map):
|
|
|
299
303
|
)
|
|
300
304
|
if self.bounds is None:
|
|
301
305
|
self.bounds = to_bbox(self._gdf.total_bounds)
|
|
306
|
+
self.bg_gdf_kwargs = kwargs
|
|
302
307
|
return self
|
|
303
308
|
|
|
304
309
|
def plot(self, **kwargs) -> None:
|
|
@@ -515,7 +520,9 @@ class ThematicMap(Map):
|
|
|
515
520
|
def _actually_add_background(self) -> None:
|
|
516
521
|
self.ax.set_xlim([self.minx - self.diffx * 0.03, self.maxx + self.diffx * 0.03])
|
|
517
522
|
self.ax.set_ylim([self.miny - self.diffy * 0.03, self.maxy + self.diffy * 0.03])
|
|
518
|
-
self._background_gdfs.plot(
|
|
523
|
+
self._background_gdfs.plot(
|
|
524
|
+
ax=self.ax, color=self.bg_gdf_color, **self.bg_gdf_kwargs
|
|
525
|
+
)
|
|
519
526
|
|
|
520
527
|
@staticmethod
|
|
521
528
|
def _get_matplotlib_figure_and_axix(
|
sgis/parallel/parallel.py
CHANGED
|
@@ -590,7 +590,7 @@ class Parallel:
|
|
|
590
590
|
with_neighbors: bool = False,
|
|
591
591
|
funcdict: dict[str, Callable] | None = None,
|
|
592
592
|
file_type: str = "parquet",
|
|
593
|
-
muni_number_col: str = "
|
|
593
|
+
muni_number_col: str = "komm_nr",
|
|
594
594
|
strict: bool = False,
|
|
595
595
|
write_empty: bool = False,
|
|
596
596
|
id_assign_func: Callable | functools.partial = clean_overlay,
|
|
@@ -622,7 +622,7 @@ class Parallel:
|
|
|
622
622
|
the data is read.
|
|
623
623
|
file_type: Defaults to parquet.
|
|
624
624
|
muni_number_col: String column name with municipality
|
|
625
|
-
number/identifier. Defaults to
|
|
625
|
+
number/identifier. Defaults to komm_nr. If the column is not present
|
|
626
626
|
in the data to be split, the data will be intersected with the
|
|
627
627
|
municipalities.
|
|
628
628
|
strict: If False (default), the dictionaries 'out_data' and 'funcdict' does
|
|
@@ -761,7 +761,7 @@ def write_municipality_data(
|
|
|
761
761
|
out_folder: str,
|
|
762
762
|
municipalities: GeoDataFrame | list[str] | None = None,
|
|
763
763
|
with_neighbors: bool = False,
|
|
764
|
-
muni_number_col: str = "
|
|
764
|
+
muni_number_col: str = "komm_nr",
|
|
765
765
|
file_type: str = "parquet",
|
|
766
766
|
func: Callable | None = None,
|
|
767
767
|
write_empty: bool = False,
|
|
@@ -840,7 +840,7 @@ def _write_municipality_data(
|
|
|
840
840
|
data: str | GeoDataFrame | DataFrame,
|
|
841
841
|
out_folder: str,
|
|
842
842
|
municipalities: GeoDataFrame | list[str] | None = None,
|
|
843
|
-
muni_number_col: str = "
|
|
843
|
+
muni_number_col: str = "komm_nr",
|
|
844
844
|
file_type: str = "parquet",
|
|
845
845
|
func: Callable | None = None,
|
|
846
846
|
write_empty: bool = False,
|
|
@@ -896,7 +896,7 @@ def _write_neighbor_municipality_data(
|
|
|
896
896
|
data: str | GeoDataFrame | DataFrame,
|
|
897
897
|
out_folder: str,
|
|
898
898
|
municipalities: GeoDataFrame,
|
|
899
|
-
muni_number_col: str = "
|
|
899
|
+
muni_number_col: str = "komm_nr",
|
|
900
900
|
file_type: str = "parquet",
|
|
901
901
|
func: Callable | None = None,
|
|
902
902
|
write_empty: bool = False,
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
sgis/__init__.py,sha256=
|
|
1
|
+
sgis/__init__.py,sha256=h6B-UD121eAYNpesXIPfEYISrSTN6mfZd7aXPfcMiqs,7382
|
|
2
|
+
sgis/conf.py,sha256=-PraZWfHZerrMVKutPv7u-MezwAG7RlhGgmf5z-iPxA,304
|
|
2
3
|
sgis/debug_config.py,sha256=Tfr19kU46hSkkspsIJcrUWvlhaL4U3-f8xEPkujSCAQ,593
|
|
3
4
|
sgis/exceptions.py,sha256=WNaEBPNNx0rmz-YDzlFX4vIE7ocJQruUTqS2RNAu2zU,660
|
|
4
5
|
sgis/geopandas_tools/__init__.py,sha256=bo8lFMcltOz7TtWAi52_ekR2gd3mjfBfKeMDV5zuqFY,28
|
|
5
|
-
sgis/geopandas_tools/bounds.py,sha256=
|
|
6
|
-
sgis/geopandas_tools/buffer_dissolve_explode.py,sha256=
|
|
6
|
+
sgis/geopandas_tools/bounds.py,sha256=MUtm3w6P_t7RfZ8WJz5iKqG2PyVzFMiAALf6AMLAewk,23798
|
|
7
|
+
sgis/geopandas_tools/buffer_dissolve_explode.py,sha256=t9GJqRMDsHEU74RIlqeMr4QBgbTK0hYlXL4af1RKIks,19955
|
|
7
8
|
sgis/geopandas_tools/centerlines.py,sha256=Q65Sx01SeAlulBEd9oaZkB2maBBNdLcJwAbTILg4SPU,11848
|
|
8
9
|
sgis/geopandas_tools/cleaning.py,sha256=tkHH5wm0BF_1EXMsh3rM9DqWOwtr7JKkqCCi6uymSP4,60214
|
|
9
10
|
sgis/geopandas_tools/conversion.py,sha256=o3QJZLfaqqpJNdWWNKfQn_dS77uJxxRxWZxhf18vPXs,25505
|
|
@@ -19,7 +20,7 @@ sgis/geopandas_tools/polygons_as_rings.py,sha256=BX_GZS6F9I4NbEpiOlNBd7zywJjdfdJ
|
|
|
19
20
|
sgis/geopandas_tools/sfilter.py,sha256=SLcMYprQwnY5DNo0R7TGXk4m6u26H8o4PRn-RPhmeZY,9345
|
|
20
21
|
sgis/helpers.py,sha256=3NqPfVBKlZcZTiMJrsTAlDv5tNKDHrJr_8NimutVzQg,8797
|
|
21
22
|
sgis/io/_is_dapla.py,sha256=wmfkSe98IrLhUg3dtXZusV6OVC8VlY1kbc5EQDf3P-Q,358
|
|
22
|
-
sgis/io/dapla_functions.py,sha256=
|
|
23
|
+
sgis/io/dapla_functions.py,sha256=rTdTrxUQLM0NtnuYKiVFXInagh6wCWWr4lUTbzk_V0Q,23130
|
|
23
24
|
sgis/io/opener.py,sha256=HWO3G1NB6bpXKM94JadCD513vjat1o1TFjWGWzyVasg,898
|
|
24
25
|
sgis/io/read_parquet.py,sha256=FvZYv1rLkUlrSaUY6QW6E1yntmntTeQuZ9ZRgCDO4IM,3776
|
|
25
26
|
sgis/maps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -30,7 +31,7 @@ sgis/maps/legend.py,sha256=lVRVCkhPmJRjGK23obFJZAO3qp6du1LYnobkkN7DPkc,26279
|
|
|
30
31
|
sgis/maps/map.py,sha256=smaf9i53EoRZWmZjn9UuqlhzUvVs1XKo2ItIpHxyuik,29592
|
|
31
32
|
sgis/maps/maps.py,sha256=gxu0rgcVygjudRtM1dVRmsUMilMUIg3vG-UgvASM91E,23072
|
|
32
33
|
sgis/maps/norge_i_bilder.json,sha256=W_mFfte3DxugWbEudZ5fadZ2JeFYb0hyab2Quf4oJME,481311
|
|
33
|
-
sgis/maps/thematicmap.py,sha256=
|
|
34
|
+
sgis/maps/thematicmap.py,sha256=w6q4_gIr8BubQgsPJkc6WXk-tmplDLGcKyjphhFp7ng,21873
|
|
34
35
|
sgis/maps/tilesources.py,sha256=F4mFHxPwkiPJdVKzNkScTX6xbJAMIUtlTq4mQ83oguw,1746
|
|
35
36
|
sgis/maps/wms.py,sha256=XHlCszR0raPbmUc2wYpQ_XRHnSJ6c1ic3w2dNnfMRm4,6252
|
|
36
37
|
sgis/networkanalysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -47,7 +48,7 @@ sgis/networkanalysis/networkanalysis.py,sha256=-g7slZLFNxUZSUMvVmf7zax-9IOXz1NGC
|
|
|
47
48
|
sgis/networkanalysis/networkanalysisrules.py,sha256=9sXigaCzvKhXFwpeVNMtOiIK3_Hzp9yDpFklmEEAPak,12956
|
|
48
49
|
sgis/networkanalysis/nodes.py,sha256=atFSpqz-_uJHMrf6MC0zhrrcWIydRMFZrsaHC2xr1GU,3374
|
|
49
50
|
sgis/networkanalysis/traveling_salesman.py,sha256=Jjo6bHY4KJ-eK0LycyTy0sWxZjgITs5MBllZ_G9FhTE,5655
|
|
50
|
-
sgis/parallel/parallel.py,sha256=
|
|
51
|
+
sgis/parallel/parallel.py,sha256=eyIXPp6nhUhLh1rwkfPLayG5hAi3i8PxmwFHxGCo-k4,39677
|
|
51
52
|
sgis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
53
|
sgis/raster/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
54
|
sgis/raster/base.py,sha256=tiZEuMcVK6hOm_aIjWhQ1WGshcjsxT1fFkuBSLFiMC0,7785
|
|
@@ -56,7 +57,7 @@ sgis/raster/indices.py,sha256=-J1HYmnT240iozvgagvyis6K0_GHZHRuUrPOgyoeIrY,223
|
|
|
56
57
|
sgis/raster/regex.py,sha256=kYhVpRYzoXutx1dSYmqMoselWXww7MMEsTPmLZwHjbM,3759
|
|
57
58
|
sgis/raster/sentinel_config.py,sha256=nySDqn2R8M6W8jguoBeSAK_zzbAsqmaI59i32446FwY,1268
|
|
58
59
|
sgis/raster/zonal.py,sha256=D4Gyptw-yOLTCO41peIuYbY-DANsJCG19xXDlf1QAz4,2299
|
|
59
|
-
ssb_sgis-1.
|
|
60
|
-
ssb_sgis-1.
|
|
61
|
-
ssb_sgis-1.
|
|
62
|
-
ssb_sgis-1.
|
|
60
|
+
ssb_sgis-1.1.1.dist-info/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
|
|
61
|
+
ssb_sgis-1.1.1.dist-info/METADATA,sha256=dwdCCyDOy441bczyc2JwdB9eZo_BtmTqL0Dqq05IZeQ,11740
|
|
62
|
+
ssb_sgis-1.1.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
63
|
+
ssb_sgis-1.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|