ssb-sgis 1.1.1__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,29 +9,39 @@ import multiprocessing
9
9
  import os
10
10
  import shutil
11
11
  import uuid
12
+ from collections.abc import Callable
12
13
  from collections.abc import Iterable
13
14
  from concurrent.futures import ThreadPoolExecutor
15
+ from io import BytesIO
14
16
  from pathlib import Path
15
17
 
16
18
  import geopandas as gpd
17
19
  import joblib
18
20
  import pandas as pd
19
21
  import pyarrow
22
+ import pyarrow.dataset
23
+ import pyarrow.dataset as ds
20
24
  import pyarrow.parquet as pq
21
25
  import shapely
22
- from gcsfs import GCSFileSystem
23
26
  from geopandas import GeoDataFrame
24
27
  from geopandas import GeoSeries
25
28
  from geopandas.io.arrow import _geopandas_to_arrow
26
29
  from pandas import DataFrame
27
30
  from pyarrow import ArrowInvalid
28
31
 
32
+ from ..conf import config
29
33
  from ..geopandas_tools.conversion import to_shapely
30
34
  from ..geopandas_tools.general import get_common_crs
31
35
  from ..geopandas_tools.sfilter import sfilter
36
+ from ..helpers import _get_file_system
37
+
38
+ try:
39
+ from gcsfs import GCSFileSystem
40
+ except ImportError:
41
+ pass
32
42
 
33
43
  PANDAS_FALLBACK_INFO = " Set pandas_fallback=True to ignore this error."
34
- from ..conf import config
44
+ NULL_VALUE = "__HIVE_DEFAULT_PARTITION__"
35
45
 
36
46
 
37
47
  def read_geopandas(
@@ -40,6 +50,7 @@ def read_geopandas(
40
50
  file_system: GCSFileSystem | None = None,
41
51
  mask: GeoSeries | GeoDataFrame | shapely.Geometry | tuple | None = None,
42
52
  threads: int | None = None,
53
+ filters: pyarrow.dataset.Expression | None = None,
43
54
  **kwargs,
44
55
  ) -> GeoDataFrame | DataFrame:
45
56
  """Reads geoparquet or other geodata from one or more files on GCS.
@@ -62,18 +73,18 @@ def read_geopandas(
62
73
  with a bbox that intersects the mask are read, then filtered by location.
63
74
  threads: Number of threads to use if reading multiple files. Defaults to
64
75
  the number of files to read or the number of available threads (if lower).
76
+ filters: To filter out data. Either a pyarrow.dataset.Expression, or a list in the
77
+ structure [[(column, op, val), …],…] where op is [==, =, >, >=, <, <=, !=, in, not in].
78
+ More details here: https://pandas.pydata.org/docs/reference/api/pandas.read_parquet.html
65
79
  **kwargs: Additional keyword arguments passed to geopandas' read_parquet
66
80
  or read_file, depending on the file type.
67
81
 
68
82
  Returns:
69
83
  A GeoDataFrame if it has rows. If zero rows, a pandas DataFrame is returned.
70
84
  """
71
- if file_system is None:
72
- file_system = config["file_system"]()
85
+ file_system = _get_file_system(file_system, kwargs)
73
86
 
74
87
  if not isinstance(gcs_path, (str | Path | os.PathLike)):
75
- kwargs |= {"file_system": file_system, "pandas_fallback": pandas_fallback}
76
-
77
88
  cols = {}
78
89
  if mask is not None:
79
90
  if not isinstance(gcs_path, GeoSeries):
@@ -112,7 +123,16 @@ def read_geopandas(
112
123
  # recursive read with threads
113
124
  with joblib.Parallel(n_jobs=threads, backend="threading") as parallel:
114
125
  dfs: list[GeoDataFrame] = parallel(
115
- joblib.delayed(read_geopandas)(x, **kwargs) for x in paths
126
+ joblib.delayed(read_geopandas)(
127
+ x,
128
+ filters=filters,
129
+ file_system=file_system,
130
+ pandas_fallback=pandas_fallback,
131
+ mask=mask,
132
+ threads=threads,
133
+ **kwargs,
134
+ )
135
+ for x in paths
116
136
  )
117
137
 
118
138
  if dfs:
@@ -130,34 +150,35 @@ def read_geopandas(
130
150
  return sfilter(df, mask)
131
151
  return df
132
152
 
133
- if not isinstance(gcs_path, str):
134
- try:
135
- gcs_path = str(gcs_path)
136
- except TypeError as e:
137
- raise TypeError(f"Unexpected type {type(gcs_path)}.") from e
138
-
139
- if has_partitions(gcs_path, file_system):
140
- filters = kwargs.pop("filters", None)
141
- return _read_partitioned_parquet(
142
- gcs_path,
143
- file_system=file_system,
144
- mask=mask,
145
- pandas_fallback=pandas_fallback,
146
- threads=threads,
147
- filters=filters,
148
- **kwargs,
153
+ child_paths = has_partitions(gcs_path, file_system)
154
+ if child_paths:
155
+ return gpd.GeoDataFrame(
156
+ _read_partitioned_parquet(
157
+ gcs_path,
158
+ read_func=_read_geopandas,
159
+ file_system=file_system,
160
+ mask=mask,
161
+ pandas_fallback=pandas_fallback,
162
+ filters=filters,
163
+ child_paths=child_paths,
164
+ **kwargs,
165
+ )
149
166
  )
150
167
 
151
168
  if "parquet" in gcs_path or "prqt" in gcs_path:
152
169
  with file_system.open(gcs_path, mode="rb") as file:
153
170
  try:
154
- df = gpd.read_parquet(file, **kwargs)
171
+ df = gpd.read_parquet(
172
+ file, filters=filters, filesystem=file_system, **kwargs
173
+ )
155
174
  except ValueError as e:
156
175
  if "Missing geo metadata" not in str(e) and "geometry" not in str(e):
157
176
  raise e.__class__(
158
177
  f"{e.__class__.__name__}: {e} for {gcs_path}."
159
178
  ) from e
160
- df = pd.read_parquet(file, **kwargs)
179
+ df = pd.read_parquet(
180
+ file, filters=filters, filesystem=file_system, **kwargs
181
+ )
161
182
  if pandas_fallback or not len(df):
162
183
  return df
163
184
  else:
@@ -171,11 +192,16 @@ def read_geopandas(
171
192
  else:
172
193
  with file_system.open(gcs_path, mode="rb") as file:
173
194
  try:
174
- df = gpd.read_file(file, **kwargs)
195
+ df = gpd.read_file(
196
+ file, filters=filters, filesystem=file_system, **kwargs
197
+ )
175
198
  except ValueError as e:
176
199
  if "Missing geo metadata" not in str(e) and "geometry" not in str(e):
177
200
  raise e
178
- df = pd.read_parquet(file, **kwargs)
201
+ file_type: str = Path(gcs_path).suffix.strip(".")
202
+ df = getattr(pd, f"read_{file_type}")(
203
+ file, filters=filters, filesystem=file_system, **kwargs
204
+ )
179
205
 
180
206
  if pandas_fallback or not len(df):
181
207
  return df
@@ -302,8 +328,7 @@ def get_bounds_series(
302
328
  ... )
303
329
 
304
330
  """
305
- if file_system is None:
306
- file_system = config["file_system"]()
331
+ file_system = _get_file_system(file_system, {})
307
332
 
308
333
  if threads is None:
309
334
  threads = min(len(paths), int(multiprocessing.cpu_count())) or 1
@@ -337,6 +362,7 @@ def write_geopandas(
337
362
  pandas_fallback: bool = False,
338
363
  file_system: GCSFileSystem | None = None,
339
364
  partition_cols=None,
365
+ existing_data_behavior: str = "error",
340
366
  **kwargs,
341
367
  ) -> None:
342
368
  """Writes a GeoDataFrame to the speficied format.
@@ -353,6 +379,8 @@ def write_geopandas(
353
379
  the file will be written without geo-metadata if >0 rows.
354
380
  file_system: Optional file sustem.
355
381
  partition_cols: Column(s) to partition by. Only for parquet files.
382
+ existing_data_behavior : 'error' | 'overwrite_or_ignore' | 'delete_matching'.
383
+ Defaults to 'error'. More info: https://arrow.apache.org/docs/python/generated/pyarrow.dataset.write_dataset.html
356
384
  **kwargs: Additional keyword arguments passed to parquet.write_table
357
385
  (for parquet) or geopandas' to_file method (if not parquet).
358
386
  """
@@ -362,16 +390,16 @@ def write_geopandas(
362
390
  except TypeError as e:
363
391
  raise TypeError(f"Unexpected type {type(gcs_path)}.") from e
364
392
 
365
- if file_system is None:
366
- file_system = config["file_system"]()
393
+ file_system = _get_file_system(file_system, kwargs)
367
394
 
368
395
  if not overwrite and file_system.exists(gcs_path):
369
396
  raise ValueError("File already exists.")
370
397
 
371
398
  if not isinstance(df, GeoDataFrame):
372
- raise ValueError("DataFrame must be GeoDataFrame.")
399
+ raise ValueError(f"DataFrame must be GeoDataFrame. Got {type(df)}.")
373
400
 
374
401
  if not len(df) and has_partitions(gcs_path, file_system):
402
+ # no need to write empty df
375
403
  return
376
404
  elif not len(df):
377
405
  if pandas_fallback:
@@ -380,7 +408,7 @@ def write_geopandas(
380
408
  df.geometry = None
381
409
  try:
382
410
  with file_system.open(gcs_path, "wb") as file:
383
- df.to_parquet(gcs_path, **kwargs)
411
+ df.to_parquet(file, **kwargs)
384
412
  except Exception as e:
385
413
  more_txt = PANDAS_FALLBACK_INFO if not pandas_fallback else ""
386
414
  raise e.__class__(
@@ -395,15 +423,12 @@ def write_geopandas(
395
423
  gcs_path,
396
424
  partition_cols,
397
425
  file_system,
426
+ existing_data_behavior=existing_data_behavior,
427
+ write_func=_to_geopandas,
398
428
  **kwargs,
399
429
  )
400
430
  with file_system.open(gcs_path, mode="wb") as file:
401
- table = _geopandas_to_arrow(
402
- df,
403
- index=df.index,
404
- schema_version=None,
405
- )
406
- pq.write_table(table, file, compression="snappy", **kwargs)
431
+ df.to_parquet(file, **kwargs)
407
432
  return
408
433
 
409
434
  layer = kwargs.pop("layer", None)
@@ -419,14 +444,40 @@ def write_geopandas(
419
444
  else:
420
445
  driver = None
421
446
 
422
- with file_system.open(gcs_path, "wb") as file:
423
- df.to_file(file, driver=driver, layer=layer)
447
+ with BytesIO() as buffer:
448
+ df.to_file(buffer, driver=driver)
449
+ buffer.seek(0) # Rewind the buffer to the beginning
450
+
451
+ # Upload buffer content to the desired storage
452
+ with file_system.open(gcs_path, "wb") as file:
453
+ file.write(buffer.read())
454
+
455
+
456
+ def _to_geopandas(df, path, **kwargs) -> None:
457
+ table = _geopandas_to_arrow(
458
+ df,
459
+ index=df.index,
460
+ schema_version=None,
461
+ )
462
+
463
+ if "schema" in kwargs:
464
+ schema = kwargs.pop("schema")
465
+
466
+ # make sure to get the actual metadata
467
+ schema = pyarrow.schema(
468
+ [(schema.field(col).name, schema.field(col).type) for col in schema.names],
469
+ metadata=table.schema.metadata,
470
+ )
471
+ table = table.select(schema.names).cast(schema)
472
+
473
+ pq.write_table(table, path, compression="snappy", **kwargs)
424
474
 
425
475
 
426
476
  def _remove_file(path, file_system) -> None:
427
477
  try:
428
- file_system.rm_file(path)
429
- except (AttributeError, TypeError, PermissionError):
478
+ file_system.rm_file(str(path))
479
+ except (AttributeError, TypeError, PermissionError) as e:
480
+ print(path, type(e), e)
430
481
  try:
431
482
  shutil.rmtree(path)
432
483
  except NotADirectoryError:
@@ -436,10 +487,27 @@ def _remove_file(path, file_system) -> None:
436
487
  pass
437
488
 
438
489
 
439
- def _write_partitioned_geoparquet(df, path, partition_cols, file_system, **kwargs):
490
+ def _write_partitioned_geoparquet(
491
+ df,
492
+ path,
493
+ partition_cols,
494
+ file_system=None,
495
+ write_func: Callable = _to_geopandas,
496
+ existing_data_behavior: str = "error",
497
+ **kwargs,
498
+ ):
499
+ if isinstance(partition_cols, str):
500
+ partition_cols = [partition_cols]
501
+
502
+ file_system = _get_file_system(file_system, kwargs)
503
+
440
504
  path = Path(path)
441
505
  unique_id = uuid.uuid4()
442
506
 
507
+ for col in partition_cols:
508
+ if df[col].isna().all() and not kwargs.get("schema"):
509
+ raise ValueError("Must specify 'schema' when all rows are NA.")
510
+
443
511
  try:
444
512
  glob_func = functools.partial(file_system.glob, detail=False)
445
513
  except AttributeError:
@@ -447,10 +515,10 @@ def _write_partitioned_geoparquet(df, path, partition_cols, file_system, **kwarg
447
515
 
448
516
  args: list[tuple[Path, DataFrame]] = []
449
517
  dirs: list[Path] = set()
450
- for group, rows in df.groupby(partition_cols):
518
+ for group, rows in df.groupby(partition_cols, dropna=False):
451
519
  name = (
452
520
  "/".join(
453
- f"{col}={value}"
521
+ f"{col}={value if not pd.isna(value) else NULL_VALUE}"
454
522
  for col, value in zip(partition_cols, group, strict=True)
455
523
  )
456
524
  + f"/{unique_id}.parquet"
@@ -459,59 +527,164 @@ def _write_partitioned_geoparquet(df, path, partition_cols, file_system, **kwarg
459
527
  dirs.add((path / name).parent)
460
528
  args.append((path / name, rows))
461
529
 
462
- if file_system.exists(path) and not has_partitions(path, file_system):
530
+ if file_system.exists(path) and file_system.isfile(path):
463
531
  _remove_file(path, file_system)
464
532
 
465
- for dir_ in dirs:
466
- try:
467
- os.makedirs(dir_, exist_ok=True)
468
- except (OSError, FileNotFoundError, FileExistsError) as e:
469
- print(e)
470
- pass
533
+ if kwargs.get("schema"):
534
+ schema = kwargs.pop("schema")
535
+ elif isinstance(df, GeoDataFrame):
536
+ geom_name = df.geometry.name
537
+ pandas_columns = [col for col in df if col != geom_name]
538
+ schema = pyarrow.Schema.from_pandas(df[pandas_columns], preserve_index=True)
539
+ index_columns = _get_index_cols(schema)
540
+ schema = pyarrow.schema(
541
+ [
542
+ (
543
+ (schema.field(col).name, schema.field(col).type)
544
+ if col != geom_name
545
+ else (geom_name, pyarrow.binary())
546
+ )
547
+ for col in [*df.columns, *index_columns]
548
+ # for col in df.columns
549
+ ]
550
+ )
551
+ else:
552
+ schema = pyarrow.Schema.from_pandas(df, preserve_index=True)
553
+
554
+ def get_siblings(path: str, paths: list[str]) -> list[str]:
555
+ parts = path.parts
556
+ return {x for x in paths if all(part in parts for part in x.parts)}
471
557
 
472
558
  def threaded_write(path_rows):
473
559
  new_path, rows = path_rows
560
+ # for sibling_path in get_siblings(new_path, child_paths):
474
561
  for sibling_path in glob_func(str(Path(new_path).with_name("**"))):
475
562
  if not paths_are_equal(sibling_path, Path(new_path).parent):
476
- _remove_file(sibling_path, file_system)
477
- with file_system.open(new_path, mode="wb") as file:
478
- table = _geopandas_to_arrow(
479
- rows,
480
- index=df.index,
481
- schema_version=None,
482
- )
483
- pq.write_table(table, file, compression="snappy", **kwargs)
563
+ if existing_data_behavior == "delete_matching":
564
+ _remove_file(sibling_path, file_system)
565
+ elif existing_data_behavior == "error":
566
+ raise pyarrow.ArrowInvalid(
567
+ f"Could not write to {path} as the directory is not empty and existing_data_behavior is to error"
568
+ )
569
+ try:
570
+ with file_system.open(new_path, mode="wb") as file:
571
+ write_func(rows, file, schema=schema, **kwargs)
572
+ except FileNotFoundError:
573
+ file_system.makedirs(str(Path(new_path).parent), exist_ok=True)
574
+ with file_system.open(new_path, mode="wb") as file:
575
+ write_func(rows, file, schema=schema, **kwargs)
484
576
 
485
577
  with ThreadPoolExecutor() as executor:
486
578
  list(executor.map(threaded_write, args))
487
579
 
488
580
 
581
+ def _filters_to_expression(filters) -> list[ds.Expression]:
582
+ if filters is None:
583
+ return None
584
+ elif isinstance(filters, pyarrow.dataset.Expression):
585
+ return filters
586
+
587
+ for filt in filters:
588
+ if "in" in filt and isinstance(filt[-1], str):
589
+ raise ValueError(
590
+ "Using strings with 'in' is ambigous. Use a list of strings."
591
+ )
592
+ try:
593
+ return pq.core.filters_to_expression(filters)
594
+ except ValueError as e:
595
+ raise ValueError(f"{e}: {filters}") from e
596
+
597
+
598
+ def expression_match_path(expression: ds.Expression, path: str) -> bool:
599
+ """Check if a file path match a pyarrow Expression.
600
+
601
+ Examples:
602
+ --------
603
+ >>> import pyarrow.compute as pc
604
+ >>> path = 'data/file.parquet/x=1/y=10/name0.parquet'
605
+ >>> expression = (pc.Field("x") == 1) & (pc.Field("y") == 10)
606
+ >>> expression_match_path(path, expression)
607
+ True
608
+ >>> expression = (pc.Field("x") == 1) & (pc.Field("y") == 5)
609
+ >>> expression_match_path(path, expression)
610
+ False
611
+ >>> expression = (pc.Field("x") == 1) & (pc.Field("z") == 10)
612
+ >>> expression_match_path(path, expression)
613
+ False
614
+ """
615
+ if NULL_VALUE in path:
616
+ return True
617
+ # build a one lengthed pyarrow.Table of the partitioning in the file path
618
+ values = []
619
+ names = []
620
+ for part in Path(path).parts:
621
+ if part.count("=") != 1:
622
+ continue
623
+ name, value = part.split("=")
624
+ values.append([value])
625
+ names.append(name)
626
+ table = pyarrow.Table.from_arrays(values, names=names)
627
+ try:
628
+ table = table.filter(expression)
629
+ except pyarrow.ArrowInvalid as e:
630
+ if "No match for FieldRef" not in str(e):
631
+ raise e
632
+ # cannot determine if the expression match without reading the file
633
+ return True
634
+ return bool(len(table))
635
+
636
+
637
+ def _read_geopandas(file, pandas_fallback: bool, **kwargs):
638
+ try:
639
+ return gpd.read_parquet(file, **kwargs)
640
+ except Exception as e:
641
+ if not pandas_fallback:
642
+ raise e
643
+ df = pd.read_parquet(file, **kwargs)
644
+ if len(df):
645
+ raise e
646
+ return df
647
+
648
+
649
+ def _read_pandas(gcs_path: str, **kwargs):
650
+ file_system = _get_file_system(None, kwargs)
651
+
652
+ child_paths = has_partitions(gcs_path, file_system)
653
+ if child_paths:
654
+ return gpd.GeoDataFrame(
655
+ _read_partitioned_parquet(
656
+ gcs_path,
657
+ read_func=pd.read_parquet,
658
+ file_system=file_system,
659
+ mask=None,
660
+ child_paths=child_paths,
661
+ **kwargs,
662
+ )
663
+ )
664
+
665
+ with file_system.open(gcs_path, "rb") as file:
666
+ return pd.read_parquet(file, **kwargs)
667
+
668
+
489
669
  def _read_partitioned_parquet(
490
- path, filters, file_system, mask, pandas_fallback, threads, **kwargs
670
+ path: str,
671
+ read_func: Callable,
672
+ filters=None,
673
+ file_system=None,
674
+ mask=None,
675
+ child_paths: list[str] | None = None,
676
+ **kwargs,
491
677
  ):
492
- try:
493
- glob_func = functools.partial(file_system.glob, detail=False)
494
- except AttributeError:
495
- glob_func = functools.partial(glob.glob, recursive=True)
678
+ file_system = _get_file_system(file_system, kwargs)
496
679
 
497
- filters = filters or []
498
- new_filters = []
499
- for filt in filters:
500
- if "in" in filt:
501
- values = [
502
- x.strip("(")
503
- .strip(")")
504
- .strip("[")
505
- .strip("]")
506
- .strip("{")
507
- .strip("}")
508
- .strip(" ")
509
- for x in filt[-1].split(",")
510
- ]
511
- filt = [filt[0] + "=" + x for x in values]
512
- else:
513
- filt = ["".join(filt)]
514
- new_filters.append(filt)
680
+ if child_paths is None:
681
+ try:
682
+ glob_func = functools.partial(file_system.glob)
683
+ except AttributeError:
684
+ glob_func = functools.partial(glob.glob, recursive=True)
685
+ child_paths = list(glob_func(str(Path(path) / "**/*.parquet")))
686
+
687
+ filters = _filters_to_expression(filters)
515
688
 
516
689
  def intersects(file, mask) -> bool:
517
690
  bbox, _ = _get_bounds_parquet_from_open_file(file, file_system)
@@ -522,9 +695,13 @@ def _read_partitioned_parquet(
522
695
  if mask is not None and not intersects(file, mask):
523
696
  return
524
697
 
525
- schema = kwargs.pop("schema", pq.read_schema(file))
698
+ schema = kwargs.get("schema", pq.read_schema(file))
699
+ # copy kwargs because mutable
700
+ new_kwargs = {
701
+ key: value for key, value in kwargs.items() if key != "schema"
702
+ }
526
703
 
527
- return gpd.read_parquet(file, schema=schema, **kwargs)
704
+ return read_func(file, schema=schema, filters=filters, **new_kwargs)
528
705
 
529
706
  with ThreadPoolExecutor() as executor:
530
707
  results = [
@@ -534,11 +711,8 @@ def _read_partitioned_parquet(
534
711
  read,
535
712
  (
536
713
  path
537
- for path in glob_func(str(Path(path) / "**/*.parquet"))
538
- if all(
539
- any(subfilt in Path(path).parts for subfilt in filt)
540
- for filt in new_filters
541
- )
714
+ for path in child_paths
715
+ if filters is None or expression_match_path(filters, path)
542
716
  ),
543
717
  )
544
718
  )
@@ -550,8 +724,8 @@ def _read_partitioned_parquet(
550
724
  return pd.concat(results)
551
725
 
552
726
  # add columns to empty DataFrame
553
- first_path = next(iter(glob_func(str(Path(path) / "**/*.parquet"))))
554
- return gpd.GeoDataFrame(
727
+ first_path = next(iter(child_paths + [path]))
728
+ return pd.DataFrame(
555
729
  columns=list(dict.fromkeys(_get_columns(first_path, file_system)))
556
730
  )
557
731
 
@@ -560,19 +734,17 @@ def paths_are_equal(path1: Path | str, path2: Path | str) -> bool:
560
734
  return Path(path1).parts == Path(path2).parts
561
735
 
562
736
 
563
- def has_partitions(path, file_system) -> bool:
737
+ def has_partitions(path, file_system) -> list[str]:
564
738
  try:
565
739
  glob_func = functools.partial(file_system.glob, detail=False)
566
740
  except AttributeError:
567
741
  glob_func = functools.partial(glob.glob, recursive=True)
568
742
 
569
- return bool(
570
- [
571
- x
572
- for x in glob_func(str(Path(path) / "**/*.parquet"))
573
- if not paths_are_equal(x, path)
574
- ]
575
- )
743
+ return [
744
+ x
745
+ for x in glob_func(str(Path(path) / "**/*.parquet"))
746
+ if not paths_are_equal(x, path)
747
+ ]
576
748
 
577
749
 
578
750
  def check_files(
sgis/maps/explore.py CHANGED
@@ -4,7 +4,6 @@ This module holds the Explore class, which is the basis for the explore, samplem
4
4
  clipmap functions from the 'maps' module.
5
5
  """
6
6
 
7
- import os
8
7
  import random
9
8
  import re
10
9
  import warnings
@@ -44,6 +43,7 @@ from ..geopandas_tools.general import clean_geoms
44
43
  from ..geopandas_tools.general import make_all_singlepart
45
44
  from ..geopandas_tools.geometry_types import get_geom_type
46
45
  from ..geopandas_tools.geometry_types import to_single_geom_type
46
+ from ..helpers import _get_file_system
47
47
  from .wms import WmsLoader
48
48
 
49
49
  try:
@@ -114,6 +114,20 @@ _MAP_KWARGS = [
114
114
  ]
115
115
 
116
116
 
117
+ class HtmlViewer:
118
+ """To be passed to IPython.display.display to show as map in Jupyter."""
119
+
120
+ def __init__(self, path: str, file_system=None) -> None:
121
+ """Takes a file path."""
122
+ self.file_system = _get_file_system(file_system, {})
123
+ self.path = path
124
+
125
+ def _repr_html_(self) -> str:
126
+ """Method to be used by IPython.display.display."""
127
+ with self.file_system.open(self.path, "r") as file:
128
+ return file.read()
129
+
130
+
117
131
  class MeasureControlFix(plugins.MeasureControl):
118
132
  """Monkey-patch to fix a bug in the lenght measurement control.
119
133
 
@@ -281,6 +295,7 @@ class Explore(Map):
281
295
  max_nodata_percentage: int = 100,
282
296
  display: bool = True,
283
297
  wms: WmsLoader | None = None,
298
+ file_system=None,
284
299
  **kwargs,
285
300
  ) -> None:
286
301
  """Initialiser.
@@ -311,6 +326,8 @@ class Explore(Map):
311
326
  image arrays.
312
327
  display: Whether to display the map interactively.
313
328
  wms: A WmsLoader instance for loading image tiles as layers. E.g. NorgeIBilderWms.
329
+ file_system: Any file system instance with an 'open' method. Used to write html file
330
+ to 'out_path'.
314
331
  **kwargs: Additional keyword arguments. Can also be geometry-like objects
315
332
  where the key is the label.
316
333
  """
@@ -329,6 +346,7 @@ class Explore(Map):
329
346
  self.display = display
330
347
  self.wms = [wms] if isinstance(wms, WmsLoader) else wms
331
348
  self.legend = None
349
+ self.file_system = _get_file_system(file_system, kwargs)
332
350
 
333
351
  self.browser = browser
334
352
  if not self.browser and "show_in_browser" in kwargs:
@@ -614,8 +632,9 @@ class Explore(Map):
614
632
 
615
633
  def save(self, path: str) -> None:
616
634
  """Save the map to local disk as an html document."""
617
- with open(path, "w") as f:
635
+ with self.file_system.open(path, "w") as f:
618
636
  f.write(self.map._repr_html_())
637
+ print(f"display(sg.HtmlViewer('{self.out_path}'))")
619
638
 
620
639
  def _explore(self, **kwargs) -> None:
621
640
  self.kwargs = self.kwargs | kwargs
@@ -629,10 +648,9 @@ class Explore(Map):
629
648
  self._create_continous_map()
630
649
 
631
650
  if self.out_path:
632
- with open(
633
- os.getcwd() + "/" + self.out_path.strip(".html") + ".html", "w"
634
- ) as f:
651
+ with self.file_system.open(self.out_path, "w") as f:
635
652
  f.write(self.map._repr_html_())
653
+ print(f"display(sg.HtmlViewer('{self.out_path}'))")
636
654
  elif self.browser:
637
655
  run_html_server(self.map._repr_html_())
638
656
  elif not self.display:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ssb-sgis
3
- Version: 1.1.1
3
+ Version: 1.1.2
4
4
  Summary: GIS functions used at Statistics Norway.
5
5
  Home-page: https://github.com/statisticsnorway/ssb-sgis
6
6
  License: MIT