ssb-sgis 1.0.15__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sgis/__init__.py CHANGED
@@ -1,10 +1,6 @@
1
- config = {
2
- "n_jobs": 1,
3
- }
4
-
5
-
6
1
  import sgis.raster.indices as indices
7
2
 
3
+ from .conf import config
8
4
  from .geopandas_tools.bounds import Gridlooper
9
5
  from .geopandas_tools.bounds import bounds_to_points
10
6
  from .geopandas_tools.bounds import bounds_to_polygon
sgis/conf.py ADDED
@@ -0,0 +1,16 @@
1
+ try:
2
+ from gcsfs import GCSFileSystem
3
+ except ImportError:
4
+
5
+ class GCSFileSystem:
6
+ """Placeholder."""
7
+
8
+ def __init__(self, *args, **kwargs) -> None:
9
+ """Placeholder."""
10
+ raise ImportError("gcsfs")
11
+
12
+
13
+ config = {
14
+ "n_jobs": 1,
15
+ "file_system": GCSFileSystem,
16
+ }
@@ -669,6 +669,7 @@ def bounds_to_points(
669
669
  0 MULTIPOINT (1.00000 0.00000, 1.00000 1.00000, ...
670
670
  1 MULTIPOINT (0.00000 0.00000)
671
671
  """
672
+ gdf = gdf.copy() if copy else gdf
672
673
  as_bounds = bounds_to_polygon(gdf, copy=copy)
673
674
  if isinstance(gdf, GeoSeries):
674
675
  return GeoSeries(extract_unique_points(as_bounds), index=gdf.index)
@@ -5,7 +5,7 @@ for the following:
5
5
 
6
6
  - Geometries are made valid after buffer and dissolve.
7
7
 
8
- - The buffer resolution defaults to 50 (geopandas' default is 16).
8
+ - The buffer resolution defaults to 30 (geopandas' default is 16).
9
9
 
10
10
  - If 'by' is not specified, the index will be labeled 0, 1, …, n - 1 after exploded, instead of 0, 0, …, 0 as it will with the geopandas defaults.
11
11
 
@@ -49,7 +49,7 @@ def buffdissexp(
49
49
  gdf: GeoDataFrame,
50
50
  distance: int | float,
51
51
  *,
52
- resolution: int = 50,
52
+ resolution: int = 30,
53
53
  index_parts: bool = False,
54
54
  copy: bool = True,
55
55
  grid_size: float | int | None = None,
@@ -68,7 +68,7 @@ def buffdissexp(
68
68
  distance: the distance (meters, degrees, depending on the crs) to buffer
69
69
  the geometry by
70
70
  resolution: The number of segments used to approximate a quarter circle.
71
- Here defaults to 50, as opposed to the default 16 in geopandas.
71
+ Here defaults to 30, as opposed to the default 16 in geopandas.
72
72
  index_parts: If False (default), the index after dissolve is respected. If
73
73
  True, an integer index level is added during explode.
74
74
  copy: Whether to copy the GeoDataFrame before buffering. Defaults to True.
@@ -101,7 +101,7 @@ def buffdissexp(
101
101
  def buffdiss(
102
102
  gdf: GeoDataFrame,
103
103
  distance: int | float,
104
- resolution: int = 50,
104
+ resolution: int = 30,
105
105
  copy: bool = True,
106
106
  n_jobs: int = 1,
107
107
  join_style: int | str = "round",
@@ -119,7 +119,7 @@ def buffdiss(
119
119
  distance: the distance (meters, degrees, depending on the crs) to buffer
120
120
  the geometry by
121
121
  resolution: The number of segments used to approximate a quarter circle.
122
- Here defaults to 50, as opposed to the default 16 in geopandas.
122
+ Here defaults to 30, as opposed to the default 16 in geopandas.
123
123
  join_style: Buffer join style.
124
124
  copy: Whether to copy the GeoDataFrame before buffering. Defaults to True.
125
125
  n_jobs: Number of threads to use. Defaults to 1.
@@ -511,7 +511,7 @@ def buffdissexp_by_cluster(
511
511
  gdf: GeoDataFrame,
512
512
  distance: int | float,
513
513
  *,
514
- resolution: int = 50,
514
+ resolution: int = 30,
515
515
  copy: bool = True,
516
516
  n_jobs: int = 1,
517
517
  join_style: int | str = "round",
@@ -532,7 +532,7 @@ def buffdissexp_by_cluster(
532
532
  distance: the distance (meters, degrees, depending on the crs) to buffer
533
533
  the geometry by
534
534
  resolution: The number of segments used to approximate a quarter circle.
535
- Here defaults to 50, as opposed to the default 16 in geopandas.
535
+ Here defaults to 30, as opposed to the default 16 in geopandas.
536
536
  join_style: Buffer join style.
537
537
  copy: Whether to copy the GeoDataFrame before buffering. Defaults to True.
538
538
  n_jobs: int = 1,
@@ -554,7 +554,7 @@ def buffdissexp_by_cluster(
554
554
  def buff(
555
555
  gdf: GeoDataFrame | GeoSeries,
556
556
  distance: int | float,
557
- resolution: int = 50,
557
+ resolution: int = 30,
558
558
  copy: bool = True,
559
559
  join_style: int | str = "round",
560
560
  **buffer_kwargs,
@@ -566,7 +566,7 @@ def buff(
566
566
  distance: the distance (meters, degrees, depending on the crs) to buffer
567
567
  the geometry by
568
568
  resolution: The number of segments used to approximate a quarter circle.
569
- Here defaults to 50, as opposed to the default 16 in geopandas.
569
+ Here defaults to 30, as opposed to the default 16 in geopandas.
570
570
  join_style: Buffer join style.
571
571
  copy: Whether to copy the GeoDataFrame before buffering. Defaults to True.
572
572
  **buffer_kwargs: additional keyword arguments passed to geopandas' buffer.
@@ -2,35 +2,42 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import functools
6
+ import glob
5
7
  import json
6
8
  import multiprocessing
7
9
  import os
10
+ import shutil
11
+ import uuid
8
12
  from collections.abc import Iterable
13
+ from concurrent.futures import ThreadPoolExecutor
9
14
  from pathlib import Path
10
15
 
11
- import dapla as dp
12
16
  import geopandas as gpd
13
17
  import joblib
14
18
  import pandas as pd
15
19
  import pyarrow
16
20
  import pyarrow.parquet as pq
17
21
  import shapely
22
+ from gcsfs import GCSFileSystem
18
23
  from geopandas import GeoDataFrame
19
24
  from geopandas import GeoSeries
20
25
  from geopandas.io.arrow import _geopandas_to_arrow
21
26
  from pandas import DataFrame
22
27
  from pyarrow import ArrowInvalid
23
28
 
29
+ from ..geopandas_tools.conversion import to_shapely
24
30
  from ..geopandas_tools.general import get_common_crs
25
31
  from ..geopandas_tools.sfilter import sfilter
26
32
 
27
33
  PANDAS_FALLBACK_INFO = " Set pandas_fallback=True to ignore this error."
34
+ from ..conf import config
28
35
 
29
36
 
30
37
  def read_geopandas(
31
38
  gcs_path: str | Path | list[str | Path] | tuple[str | Path] | GeoSeries,
32
39
  pandas_fallback: bool = False,
33
- file_system: dp.gcs.GCSFileSystem | None = None,
40
+ file_system: GCSFileSystem | None = None,
34
41
  mask: GeoSeries | GeoDataFrame | shapely.Geometry | tuple | None = None,
35
42
  threads: int | None = None,
36
43
  **kwargs,
@@ -62,7 +69,7 @@ def read_geopandas(
62
69
  A GeoDataFrame if it has rows. If zero rows, a pandas DataFrame is returned.
63
70
  """
64
71
  if file_system is None:
65
- file_system = dp.FileClient.get_gcs_file_system()
72
+ file_system = config["file_system"]()
66
73
 
67
74
  if not isinstance(gcs_path, (str | Path | os.PathLike)):
68
75
  kwargs |= {"file_system": file_system, "pandas_fallback": pandas_fallback}
@@ -129,6 +136,18 @@ def read_geopandas(
129
136
  except TypeError as e:
130
137
  raise TypeError(f"Unexpected type {type(gcs_path)}.") from e
131
138
 
139
+ if has_partitions(gcs_path, file_system):
140
+ filters = kwargs.pop("filters", None)
141
+ return _read_partitioned_parquet(
142
+ gcs_path,
143
+ file_system=file_system,
144
+ mask=mask,
145
+ pandas_fallback=pandas_fallback,
146
+ threads=threads,
147
+ filters=filters,
148
+ **kwargs,
149
+ )
150
+
132
151
  if "parquet" in gcs_path or "prqt" in gcs_path:
133
152
  with file_system.open(gcs_path, mode="rb") as file:
134
153
  try:
@@ -138,8 +157,7 @@ def read_geopandas(
138
157
  raise e.__class__(
139
158
  f"{e.__class__.__name__}: {e} for {gcs_path}."
140
159
  ) from e
141
- df = dp.read_pandas(gcs_path, **kwargs)
142
-
160
+ df = pd.read_parquet(file, **kwargs)
143
161
  if pandas_fallback or not len(df):
144
162
  return df
145
163
  else:
@@ -157,7 +175,7 @@ def read_geopandas(
157
175
  except ValueError as e:
158
176
  if "Missing geo metadata" not in str(e) and "geometry" not in str(e):
159
177
  raise e
160
- df = dp.read_pandas(gcs_path, **kwargs)
178
+ df = pd.read_parquet(file, **kwargs)
161
179
 
162
180
  if pandas_fallback or not len(df):
163
181
  return df
@@ -168,7 +186,7 @@ def read_geopandas(
168
186
  ) from e
169
187
  except Exception as e:
170
188
  raise e.__class__(
171
- f"{e.__class__.__name__}: {e} for {df}." + more_txt
189
+ f"{e.__class__.__name__}: {e} for {gcs_path}." + more_txt
172
190
  ) from e
173
191
 
174
192
  if mask is not None:
@@ -177,33 +195,44 @@ def read_geopandas(
177
195
 
178
196
 
179
197
  def _get_bounds_parquet(
180
- path: str | Path, file_system: dp.gcs.GCSFileSystem, pandas_fallback: bool = False
198
+ path: str | Path, file_system: GCSFileSystem, pandas_fallback: bool = False
199
+ ) -> tuple[list[float], dict] | tuple[None, None]:
200
+ with file_system.open(path, "rb") as file:
201
+ return _get_bounds_parquet_from_open_file(file, file_system)
202
+
203
+
204
+ def _get_bounds_parquet_from_open_file(
205
+ file, file_system
181
206
  ) -> tuple[list[float], dict] | tuple[None, None]:
182
- with file_system.open(path) as f:
207
+ geo_metadata = _get_geo_metadata(file, file_system)
208
+ if not geo_metadata:
209
+ return None, None
210
+ return geo_metadata["bbox"], geo_metadata["crs"]
211
+
212
+
213
+ def _get_geo_metadata(file, file_system) -> dict:
214
+ meta = pq.read_schema(file).metadata
215
+ geo_metadata = json.loads(meta[b"geo"])
216
+ try:
217
+ primary_column = geo_metadata["primary_column"]
218
+ except KeyError as e:
219
+ raise KeyError(e, geo_metadata) from e
220
+ try:
221
+ return geo_metadata["columns"][primary_column]
222
+ except KeyError as e:
183
223
  try:
184
- num_rows = pq.read_metadata(f).num_rows
224
+ num_rows = pq.read_metadata(file).num_rows
185
225
  except ArrowInvalid as e:
186
- if not file_system.isfile(f):
187
- return None, None
188
- raise ArrowInvalid(e, path) from e
226
+ if not file_system.isfile(file):
227
+ return {}
228
+ raise ArrowInvalid(e, file) from e
189
229
  if not num_rows:
190
- return None, None
191
- meta = pq.read_schema(f).metadata
192
- try:
193
- meta = json.loads(meta[b"geo"])["columns"]["geometry"]
194
- except KeyError as e:
195
- if pandas_fallback:
196
- return None, None
197
- raise KeyError(
198
- f"{e.__class__.__name__}: {e} for {path}." + PANDAS_FALLBACK_INFO,
199
- # f"{num_rows=}",
200
- # meta,
201
- ) from e
202
- return meta["bbox"], meta["crs"]
230
+ return {}
231
+ return {}
203
232
 
204
233
 
205
- def _get_columns(path: str | Path, file_system: dp.gcs.GCSFileSystem) -> pd.Index:
206
- with file_system.open(path) as f:
234
+ def _get_columns(path: str | Path, file_system: GCSFileSystem) -> pd.Index:
235
+ with file_system.open(path, "rb") as f:
207
236
  schema = pq.read_schema(f)
208
237
  index_cols = _get_index_cols(schema)
209
238
  return pd.Index(schema.names).difference(index_cols)
@@ -216,7 +245,7 @@ def _get_index_cols(schema: pyarrow.Schema) -> list[str]:
216
245
 
217
246
  def get_bounds_series(
218
247
  paths: list[str | Path] | tuple[str | Path],
219
- file_system: dp.gcs.GCSFileSystem | None = None,
248
+ file_system: GCSFileSystem | None = None,
220
249
  threads: int | None = None,
221
250
  pandas_fallback: bool = False,
222
251
  ) -> GeoSeries:
@@ -227,7 +256,7 @@ def get_bounds_series(
227
256
 
228
257
  Args:
229
258
  paths: Iterable of file paths in gcs.
230
- file_system: Optional instance of dp.gcs.GCSFileSystem.
259
+ file_system: Optional instance of GCSFileSystem.
231
260
  If None, an instance is created within the function.
232
261
  Note that this is slower in long loops.
233
262
  threads: Number of threads to use if reading multiple files. Defaults to
@@ -242,8 +271,7 @@ def get_bounds_series(
242
271
  ---------
243
272
  >>> import sgis as sg
244
273
  >>> import dapla as dp
245
- >>> file_system = dp.FileClient.get_gcs_file_system()
246
- >>> all_paths = file_system.ls("...")
274
+ >>> all_paths = GCSFileSystem().ls("...")
247
275
 
248
276
  Get the bounds of all your file paths, indexed by path.
249
277
 
@@ -275,7 +303,7 @@ def get_bounds_series(
275
303
 
276
304
  """
277
305
  if file_system is None:
278
- file_system = dp.FileClient.get_gcs_file_system()
306
+ file_system = config["file_system"]()
279
307
 
280
308
  if threads is None:
281
309
  threads = min(len(paths), int(multiprocessing.cpu_count())) or 1
@@ -307,8 +335,8 @@ def write_geopandas(
307
335
  gcs_path: str | Path,
308
336
  overwrite: bool = True,
309
337
  pandas_fallback: bool = False,
310
- file_system: dp.gcs.GCSFileSystem | None = None,
311
- write_covering_bbox: bool = False,
338
+ file_system: GCSFileSystem | None = None,
339
+ partition_cols=None,
312
340
  **kwargs,
313
341
  ) -> None:
314
342
  """Writes a GeoDataFrame to the speficied format.
@@ -324,13 +352,7 @@ def write_geopandas(
324
352
  not be written with geopandas and the number of rows is more than 0. If True,
325
353
  the file will be written without geo-metadata if >0 rows.
326
354
  file_system: Optional file sustem.
327
- write_covering_bbox: Writes the bounding box column for each row entry with column name "bbox".
328
- Writing a bbox column can be computationally expensive, but allows you to specify
329
- a bbox in : func:read_parquet for filtered reading.
330
- Note: this bbox column is part of the newer GeoParquet 1.1 specification and should be
331
- considered as experimental. While writing the column is backwards compatible, using it
332
- for filtering may not be supported by all readers.
333
-
355
+ partition_cols: Column(s) to partition by. Only for parquet files.
334
356
  **kwargs: Additional keyword arguments passed to parquet.write_table
335
357
  (for parquet) or geopandas' to_file method (if not parquet).
336
358
  """
@@ -340,22 +362,25 @@ def write_geopandas(
340
362
  except TypeError as e:
341
363
  raise TypeError(f"Unexpected type {type(gcs_path)}.") from e
342
364
 
343
- if not overwrite and exists(gcs_path):
365
+ if file_system is None:
366
+ file_system = config["file_system"]()
367
+
368
+ if not overwrite and file_system.exists(gcs_path):
344
369
  raise ValueError("File already exists.")
345
370
 
346
371
  if not isinstance(df, GeoDataFrame):
347
372
  raise ValueError("DataFrame must be GeoDataFrame.")
348
373
 
349
- if file_system is None:
350
- file_system = dp.FileClient.get_gcs_file_system()
351
-
352
- if not len(df):
374
+ if not len(df) and has_partitions(gcs_path, file_system):
375
+ return
376
+ elif not len(df):
353
377
  if pandas_fallback:
354
378
  df = pd.DataFrame(df)
355
379
  df.geometry = df.geometry.astype(str)
356
380
  df.geometry = None
357
381
  try:
358
- dp.write_pandas(df, gcs_path, **kwargs)
382
+ with file_system.open(gcs_path, "wb") as file:
383
+ df.to_parquet(gcs_path, **kwargs)
359
384
  except Exception as e:
360
385
  more_txt = PANDAS_FALLBACK_INFO if not pandas_fallback else ""
361
386
  raise e.__class__(
@@ -363,17 +388,22 @@ def write_geopandas(
363
388
  ) from e
364
389
  return
365
390
 
366
- file_system = dp.FileClient.get_gcs_file_system()
367
-
368
391
  if ".parquet" in gcs_path or "prqt" in gcs_path:
369
- with file_system.open(gcs_path, mode="wb") as buffer:
392
+ if partition_cols is not None:
393
+ return _write_partitioned_geoparquet(
394
+ df,
395
+ gcs_path,
396
+ partition_cols,
397
+ file_system,
398
+ **kwargs,
399
+ )
400
+ with file_system.open(gcs_path, mode="wb") as file:
370
401
  table = _geopandas_to_arrow(
371
402
  df,
372
403
  index=df.index,
373
404
  schema_version=None,
374
- write_covering_bbox=write_covering_bbox,
375
405
  )
376
- pq.write_table(table, buffer, compression="snappy", **kwargs)
406
+ pq.write_table(table, file, compression="snappy", **kwargs)
377
407
  return
378
408
 
379
409
  layer = kwargs.pop("layer", None)
@@ -393,17 +423,156 @@ def write_geopandas(
393
423
  df.to_file(file, driver=driver, layer=layer)
394
424
 
395
425
 
396
- def exists(path: str | Path) -> bool:
397
- """Returns True if the path exists, and False if it doesn't.
426
+ def _remove_file(path, file_system) -> None:
427
+ try:
428
+ file_system.rm_file(path)
429
+ except (AttributeError, TypeError, PermissionError):
430
+ try:
431
+ shutil.rmtree(path)
432
+ except NotADirectoryError:
433
+ try:
434
+ os.remove(path)
435
+ except PermissionError:
436
+ pass
437
+
398
438
 
399
- Args:
400
- path (str): The path to the file or directory.
439
+ def _write_partitioned_geoparquet(df, path, partition_cols, file_system, **kwargs):
440
+ path = Path(path)
441
+ unique_id = uuid.uuid4()
401
442
 
402
- Returns:
403
- True if the path exists, False if not.
404
- """
405
- file_system = dp.FileClient.get_gcs_file_system()
406
- return file_system.exists(path)
443
+ try:
444
+ glob_func = functools.partial(file_system.glob, detail=False)
445
+ except AttributeError:
446
+ glob_func = functools.partial(glob.glob, recursive=True)
447
+
448
+ args: list[tuple[Path, DataFrame]] = []
449
+ dirs: list[Path] = set()
450
+ for group, rows in df.groupby(partition_cols):
451
+ name = (
452
+ "/".join(
453
+ f"{col}={value}"
454
+ for col, value in zip(partition_cols, group, strict=True)
455
+ )
456
+ + f"/{unique_id}.parquet"
457
+ )
458
+
459
+ dirs.add((path / name).parent)
460
+ args.append((path / name, rows))
461
+
462
+ if file_system.exists(path) and not has_partitions(path, file_system):
463
+ _remove_file(path, file_system)
464
+
465
+ for dir_ in dirs:
466
+ try:
467
+ os.makedirs(dir_, exist_ok=True)
468
+ except (OSError, FileNotFoundError, FileExistsError) as e:
469
+ print(e)
470
+ pass
471
+
472
+ def threaded_write(path_rows):
473
+ new_path, rows = path_rows
474
+ for sibling_path in glob_func(str(Path(new_path).with_name("**"))):
475
+ if not paths_are_equal(sibling_path, Path(new_path).parent):
476
+ _remove_file(sibling_path, file_system)
477
+ with file_system.open(new_path, mode="wb") as file:
478
+ table = _geopandas_to_arrow(
479
+ rows,
480
+ index=df.index,
481
+ schema_version=None,
482
+ )
483
+ pq.write_table(table, file, compression="snappy", **kwargs)
484
+
485
+ with ThreadPoolExecutor() as executor:
486
+ list(executor.map(threaded_write, args))
487
+
488
+
489
+ def _read_partitioned_parquet(
490
+ path, filters, file_system, mask, pandas_fallback, threads, **kwargs
491
+ ):
492
+ try:
493
+ glob_func = functools.partial(file_system.glob, detail=False)
494
+ except AttributeError:
495
+ glob_func = functools.partial(glob.glob, recursive=True)
496
+
497
+ filters = filters or []
498
+ new_filters = []
499
+ for filt in filters:
500
+ if "in" in filt:
501
+ values = [
502
+ x.strip("(")
503
+ .strip(")")
504
+ .strip("[")
505
+ .strip("]")
506
+ .strip("{")
507
+ .strip("}")
508
+ .strip(" ")
509
+ for x in filt[-1].split(",")
510
+ ]
511
+ filt = [filt[0] + "=" + x for x in values]
512
+ else:
513
+ filt = ["".join(filt)]
514
+ new_filters.append(filt)
515
+
516
+ def intersects(file, mask) -> bool:
517
+ bbox, _ = _get_bounds_parquet_from_open_file(file, file_system)
518
+ return shapely.box(*bbox).intersects(to_shapely(mask))
519
+
520
+ def read(path) -> GeoDataFrame | None:
521
+ with file_system.open(path, "rb") as file:
522
+ if mask is not None and not intersects(file, mask):
523
+ return
524
+
525
+ schema = kwargs.pop("schema", pq.read_schema(file))
526
+
527
+ return gpd.read_parquet(file, schema=schema, **kwargs)
528
+
529
+ with ThreadPoolExecutor() as executor:
530
+ results = [
531
+ x
532
+ for x in (
533
+ executor.map(
534
+ read,
535
+ (
536
+ path
537
+ for path in glob_func(str(Path(path) / "**/*.parquet"))
538
+ if all(
539
+ any(subfilt in Path(path).parts for subfilt in filt)
540
+ for filt in new_filters
541
+ )
542
+ ),
543
+ )
544
+ )
545
+ if x is not None
546
+ ]
547
+ if results:
548
+ if mask is not None:
549
+ return sfilter(pd.concat(results), mask)
550
+ return pd.concat(results)
551
+
552
+ # add columns to empty DataFrame
553
+ first_path = next(iter(glob_func(str(Path(path) / "**/*.parquet"))))
554
+ return gpd.GeoDataFrame(
555
+ columns=list(dict.fromkeys(_get_columns(first_path, file_system)))
556
+ )
557
+
558
+
559
+ def paths_are_equal(path1: Path | str, path2: Path | str) -> bool:
560
+ return Path(path1).parts == Path(path2).parts
561
+
562
+
563
+ def has_partitions(path, file_system) -> bool:
564
+ try:
565
+ glob_func = functools.partial(file_system.glob, detail=False)
566
+ except AttributeError:
567
+ glob_func = functools.partial(glob.glob, recursive=True)
568
+
569
+ return bool(
570
+ [
571
+ x
572
+ for x in glob_func(str(Path(path) / "**/*.parquet"))
573
+ if not paths_are_equal(x, path)
574
+ ]
575
+ )
407
576
 
408
577
 
409
578
  def check_files(
@@ -419,7 +588,7 @@ def check_files(
419
588
  within_minutes: Optionally include only files that were updated in the
420
589
  last n minutes.
421
590
  """
422
- file_system = dp.FileClient.get_gcs_file_system()
591
+ file_system = config["file_system"]()
423
592
 
424
593
  # (recursive doesn't work, so doing recursive search below)
425
594
  info = file_system.ls(folder, detail=True, recursive=True)
@@ -474,7 +643,7 @@ def check_files(
474
643
 
475
644
 
476
645
  def _get_files_in_subfolders(folderinfo: list[dict]) -> list[tuple]:
477
- file_system = dp.FileClient.get_gcs_file_system()
646
+ file_system = config["file_system"]()
478
647
 
479
648
  fileinfo = []
480
649
 
sgis/maps/thematicmap.py CHANGED
@@ -280,7 +280,10 @@ class ThematicMap(Map):
280
280
  return self
281
281
 
282
282
  def add_background(
283
- self, gdf: GeoDataFrame, color: str | None = None
283
+ self,
284
+ gdf: GeoDataFrame,
285
+ color: str | None = None,
286
+ **kwargs,
284
287
  ) -> "ThematicMap":
285
288
  """Add a GeoDataFrame as a background layer.
286
289
 
@@ -288,6 +291,7 @@ class ThematicMap(Map):
288
291
  gdf: a GeoDataFrame.
289
292
  color: Single color. Defaults to gray (shade depends on whether the map
290
293
  facecolor is black or white).
294
+ **kwargs: Keyword arguments sent to GeoDataFrame.plot.
291
295
  """
292
296
  if color:
293
297
  self.bg_gdf_color = color
@@ -299,6 +303,7 @@ class ThematicMap(Map):
299
303
  )
300
304
  if self.bounds is None:
301
305
  self.bounds = to_bbox(self._gdf.total_bounds)
306
+ self.bg_gdf_kwargs = kwargs
302
307
  return self
303
308
 
304
309
  def plot(self, **kwargs) -> None:
@@ -515,7 +520,9 @@ class ThematicMap(Map):
515
520
  def _actually_add_background(self) -> None:
516
521
  self.ax.set_xlim([self.minx - self.diffx * 0.03, self.maxx + self.diffx * 0.03])
517
522
  self.ax.set_ylim([self.miny - self.diffy * 0.03, self.maxy + self.diffy * 0.03])
518
- self._background_gdfs.plot(ax=self.ax, color=self.bg_gdf_color)
523
+ self._background_gdfs.plot(
524
+ ax=self.ax, color=self.bg_gdf_color, **self.bg_gdf_kwargs
525
+ )
519
526
 
520
527
  @staticmethod
521
528
  def _get_matplotlib_figure_and_axix(
sgis/parallel/parallel.py CHANGED
@@ -590,7 +590,7 @@ class Parallel:
590
590
  with_neighbors: bool = False,
591
591
  funcdict: dict[str, Callable] | None = None,
592
592
  file_type: str = "parquet",
593
- muni_number_col: str = "KOMMUNENR",
593
+ muni_number_col: str = "komm_nr",
594
594
  strict: bool = False,
595
595
  write_empty: bool = False,
596
596
  id_assign_func: Callable | functools.partial = clean_overlay,
@@ -622,7 +622,7 @@ class Parallel:
622
622
  the data is read.
623
623
  file_type: Defaults to parquet.
624
624
  muni_number_col: String column name with municipality
625
- number/identifier. Defaults to KOMMUNENR. If the column is not present
625
+ number/identifier. Defaults to komm_nr. If the column is not present
626
626
  in the data to be split, the data will be intersected with the
627
627
  municipalities.
628
628
  strict: If False (default), the dictionaries 'out_data' and 'funcdict' does
@@ -761,7 +761,7 @@ def write_municipality_data(
761
761
  out_folder: str,
762
762
  municipalities: GeoDataFrame | list[str] | None = None,
763
763
  with_neighbors: bool = False,
764
- muni_number_col: str = "KOMMUNENR",
764
+ muni_number_col: str = "komm_nr",
765
765
  file_type: str = "parquet",
766
766
  func: Callable | None = None,
767
767
  write_empty: bool = False,
@@ -840,7 +840,7 @@ def _write_municipality_data(
840
840
  data: str | GeoDataFrame | DataFrame,
841
841
  out_folder: str,
842
842
  municipalities: GeoDataFrame | list[str] | None = None,
843
- muni_number_col: str = "KOMMUNENR",
843
+ muni_number_col: str = "komm_nr",
844
844
  file_type: str = "parquet",
845
845
  func: Callable | None = None,
846
846
  write_empty: bool = False,
@@ -896,7 +896,7 @@ def _write_neighbor_municipality_data(
896
896
  data: str | GeoDataFrame | DataFrame,
897
897
  out_folder: str,
898
898
  municipalities: GeoDataFrame,
899
- muni_number_col: str = "KOMMUNENR",
899
+ muni_number_col: str = "komm_nr",
900
900
  file_type: str = "parquet",
901
901
  func: Callable | None = None,
902
902
  write_empty: bool = False,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ssb-sgis
3
- Version: 1.0.15
3
+ Version: 1.1.1
4
4
  Summary: GIS functions used at Statistics Norway.
5
5
  Home-page: https://github.com/statisticsnorway/ssb-sgis
6
6
  License: MIT
@@ -1,9 +1,10 @@
1
- sgis/__init__.py,sha256=Bh-W4cB6-1uc-xRzUxqxECwwoennpdlikZI3gwXtZ7E,7389
1
+ sgis/__init__.py,sha256=h6B-UD121eAYNpesXIPfEYISrSTN6mfZd7aXPfcMiqs,7382
2
+ sgis/conf.py,sha256=-PraZWfHZerrMVKutPv7u-MezwAG7RlhGgmf5z-iPxA,304
2
3
  sgis/debug_config.py,sha256=Tfr19kU46hSkkspsIJcrUWvlhaL4U3-f8xEPkujSCAQ,593
3
4
  sgis/exceptions.py,sha256=WNaEBPNNx0rmz-YDzlFX4vIE7ocJQruUTqS2RNAu2zU,660
4
5
  sgis/geopandas_tools/__init__.py,sha256=bo8lFMcltOz7TtWAi52_ekR2gd3mjfBfKeMDV5zuqFY,28
5
- sgis/geopandas_tools/bounds.py,sha256=BpQFoON-sFXt4zmkUi07Epc2Cj-8OXZAPBfKqT99T4I,23760
6
- sgis/geopandas_tools/buffer_dissolve_explode.py,sha256=5Dy-HfLm-n4IjlLm98Wtufl4IuCw0Zglyy5VlEH0mak,19955
6
+ sgis/geopandas_tools/bounds.py,sha256=MUtm3w6P_t7RfZ8WJz5iKqG2PyVzFMiAALf6AMLAewk,23798
7
+ sgis/geopandas_tools/buffer_dissolve_explode.py,sha256=t9GJqRMDsHEU74RIlqeMr4QBgbTK0hYlXL4af1RKIks,19955
7
8
  sgis/geopandas_tools/centerlines.py,sha256=Q65Sx01SeAlulBEd9oaZkB2maBBNdLcJwAbTILg4SPU,11848
8
9
  sgis/geopandas_tools/cleaning.py,sha256=tkHH5wm0BF_1EXMsh3rM9DqWOwtr7JKkqCCi6uymSP4,60214
9
10
  sgis/geopandas_tools/conversion.py,sha256=o3QJZLfaqqpJNdWWNKfQn_dS77uJxxRxWZxhf18vPXs,25505
@@ -19,7 +20,7 @@ sgis/geopandas_tools/polygons_as_rings.py,sha256=BX_GZS6F9I4NbEpiOlNBd7zywJjdfdJ
19
20
  sgis/geopandas_tools/sfilter.py,sha256=SLcMYprQwnY5DNo0R7TGXk4m6u26H8o4PRn-RPhmeZY,9345
20
21
  sgis/helpers.py,sha256=3NqPfVBKlZcZTiMJrsTAlDv5tNKDHrJr_8NimutVzQg,8797
21
22
  sgis/io/_is_dapla.py,sha256=wmfkSe98IrLhUg3dtXZusV6OVC8VlY1kbc5EQDf3P-Q,358
22
- sgis/io/dapla_functions.py,sha256=e4EYjZK2Sxt5OwxI6a8y-ye7EpjLJFbG80qAZWcfsiE,18216
23
+ sgis/io/dapla_functions.py,sha256=rTdTrxUQLM0NtnuYKiVFXInagh6wCWWr4lUTbzk_V0Q,23130
23
24
  sgis/io/opener.py,sha256=HWO3G1NB6bpXKM94JadCD513vjat1o1TFjWGWzyVasg,898
24
25
  sgis/io/read_parquet.py,sha256=FvZYv1rLkUlrSaUY6QW6E1yntmntTeQuZ9ZRgCDO4IM,3776
25
26
  sgis/maps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -30,7 +31,7 @@ sgis/maps/legend.py,sha256=lVRVCkhPmJRjGK23obFJZAO3qp6du1LYnobkkN7DPkc,26279
30
31
  sgis/maps/map.py,sha256=smaf9i53EoRZWmZjn9UuqlhzUvVs1XKo2ItIpHxyuik,29592
31
32
  sgis/maps/maps.py,sha256=gxu0rgcVygjudRtM1dVRmsUMilMUIg3vG-UgvASM91E,23072
32
33
  sgis/maps/norge_i_bilder.json,sha256=W_mFfte3DxugWbEudZ5fadZ2JeFYb0hyab2Quf4oJME,481311
33
- sgis/maps/thematicmap.py,sha256=yAE1xEfubJcDmBlOJf-Q3SVae1ZHIEMP-YB95Wy8cRw,21691
34
+ sgis/maps/thematicmap.py,sha256=w6q4_gIr8BubQgsPJkc6WXk-tmplDLGcKyjphhFp7ng,21873
34
35
  sgis/maps/tilesources.py,sha256=F4mFHxPwkiPJdVKzNkScTX6xbJAMIUtlTq4mQ83oguw,1746
35
36
  sgis/maps/wms.py,sha256=XHlCszR0raPbmUc2wYpQ_XRHnSJ6c1ic3w2dNnfMRm4,6252
36
37
  sgis/networkanalysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -47,7 +48,7 @@ sgis/networkanalysis/networkanalysis.py,sha256=-g7slZLFNxUZSUMvVmf7zax-9IOXz1NGC
47
48
  sgis/networkanalysis/networkanalysisrules.py,sha256=9sXigaCzvKhXFwpeVNMtOiIK3_Hzp9yDpFklmEEAPak,12956
48
49
  sgis/networkanalysis/nodes.py,sha256=atFSpqz-_uJHMrf6MC0zhrrcWIydRMFZrsaHC2xr1GU,3374
49
50
  sgis/networkanalysis/traveling_salesman.py,sha256=Jjo6bHY4KJ-eK0LycyTy0sWxZjgITs5MBllZ_G9FhTE,5655
50
- sgis/parallel/parallel.py,sha256=SlC_mOwvSSyWTKUcxLMGkuWHUkEC6dXTlN0Jn5cAtxA,39687
51
+ sgis/parallel/parallel.py,sha256=eyIXPp6nhUhLh1rwkfPLayG5hAi3i8PxmwFHxGCo-k4,39677
51
52
  sgis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
53
  sgis/raster/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
54
  sgis/raster/base.py,sha256=tiZEuMcVK6hOm_aIjWhQ1WGshcjsxT1fFkuBSLFiMC0,7785
@@ -56,7 +57,7 @@ sgis/raster/indices.py,sha256=-J1HYmnT240iozvgagvyis6K0_GHZHRuUrPOgyoeIrY,223
56
57
  sgis/raster/regex.py,sha256=kYhVpRYzoXutx1dSYmqMoselWXww7MMEsTPmLZwHjbM,3759
57
58
  sgis/raster/sentinel_config.py,sha256=nySDqn2R8M6W8jguoBeSAK_zzbAsqmaI59i32446FwY,1268
58
59
  sgis/raster/zonal.py,sha256=D4Gyptw-yOLTCO41peIuYbY-DANsJCG19xXDlf1QAz4,2299
59
- ssb_sgis-1.0.15.dist-info/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
60
- ssb_sgis-1.0.15.dist-info/METADATA,sha256=bRaj-9WssZ9IsI2IPEtI_uyLGhpEWd52xfMst-vI3g4,11741
61
- ssb_sgis-1.0.15.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
62
- ssb_sgis-1.0.15.dist-info/RECORD,,
60
+ ssb_sgis-1.1.1.dist-info/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
61
+ ssb_sgis-1.1.1.dist-info/METADATA,sha256=dwdCCyDOy441bczyc2JwdB9eZo_BtmTqL0Dqq05IZeQ,11740
62
+ ssb_sgis-1.1.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
63
+ ssb_sgis-1.1.1.dist-info/RECORD,,