ssb-sgis 1.1.12__py3-none-any.whl → 1.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -425,12 +425,6 @@ def eliminate_by_longest(
425
425
  explore_locals(center=_DEBUG_CONFIG["center"])
426
426
 
427
427
  if not _recurse and len(isolated):
428
- if 0:
429
- isolated.geometry = isolated.buffer(
430
- -PRECISION,
431
- resolution=1,
432
- join_style=2,
433
- )
434
428
  out, isolated = _recursively_eliminate_new_neighbors(
435
429
  out,
436
430
  isolated,
@@ -109,7 +109,7 @@ def read_geopandas(
109
109
  if single_eq_filter:
110
110
  try:
111
111
  expression = "".join(next(iter(filters))).replace("==", "=")
112
- glob_func = _get_glob(file_system)
112
+ glob_func = _get_glob_func(file_system)
113
113
  paths = glob_func(str(Path(gcs_path) / expression))
114
114
  if paths:
115
115
  return _read_geopandas_from_iterable(
@@ -543,7 +543,7 @@ def _write_partitioned_geoparquet(
543
543
  if df[col].isna().all() and not kwargs.get("schema"):
544
544
  raise ValueError("Must specify 'schema' when all rows are NA.")
545
545
 
546
- glob_func = _get_glob(file_system)
546
+ glob_func = _get_glob_func(file_system)
547
547
 
548
548
  if file_system.exists(path) and file_system.isfile(path):
549
549
  _remove_file(path, file_system)
@@ -596,7 +596,7 @@ def _write_partitioned_geoparquet(
596
596
  executor.map(threaded_write, dfs, paths)
597
597
 
598
598
 
599
- def _get_glob(file_system) -> functools.partial:
599
+ def _get_glob_func(file_system) -> functools.partial:
600
600
  try:
601
601
  return functools.partial(file_system.glob)
602
602
  except AttributeError:
@@ -724,9 +724,9 @@ def _read_partitioned_parquet(
724
724
  **kwargs,
725
725
  ):
726
726
  file_system = _get_file_system(file_system, kwargs)
727
+ glob_func = _get_glob_func(file_system)
727
728
 
728
729
  if child_paths is None:
729
- glob_func = _get_glob(file_system)
730
730
  child_paths = list(glob_func(str(Path(path) / "**/*.parquet")))
731
731
 
732
732
  filters = _filters_to_expression(filters)
@@ -735,18 +735,29 @@ def _read_partitioned_parquet(
735
735
  bbox, _ = _get_bounds_parquet_from_open_file(file, file_system)
736
736
  return shapely.box(*bbox).intersects(to_shapely(mask))
737
737
 
738
- def read(path: str) -> pyarrow.Table | None:
739
- with file_system.open(path, "rb") as file:
740
- if mask is not None and not intersects(file, mask):
741
- return
738
+ def read(child_path: str) -> pyarrow.Table | None:
739
+ try:
740
+ with file_system.open(child_path, "rb") as file:
741
+ if mask is not None and not intersects(file, mask):
742
+ return
742
743
 
743
- # 'get' instead of 'pop' because dict is mutable
744
- schema = kwargs.get("schema", pq.read_schema(file))
745
- new_kwargs = {
746
- key: value for key, value in kwargs.items() if key != "schema"
747
- }
744
+ # 'get' instead of 'pop' because dict is mutable
745
+ schema = kwargs.get("schema", pq.read_schema(file))
746
+ new_kwargs = {
747
+ key: value for key, value in kwargs.items() if key != "schema"
748
+ }
748
749
 
749
- return read_func(file, schema=schema, filters=filters, **new_kwargs)
750
+ return read_func(file, schema=schema, filters=filters, **new_kwargs)
751
+ except ArrowInvalid as e:
752
+ if not len(
753
+ {
754
+ x
755
+ for x in glob_func(str(Path(child_path) / "**"))
756
+ if not paths_are_equal(child_path, x)
757
+ }
758
+ ):
759
+ raise e
760
+ # allow not being able to read hard-to-delete empty directories
750
761
 
751
762
  with ThreadPoolExecutor() as executor:
752
763
  results = [
@@ -790,7 +801,7 @@ def paths_are_equal(path1: Path | str, path2: Path | str) -> bool:
790
801
 
791
802
 
792
803
  def get_child_paths(path, file_system) -> list[str]:
793
- glob_func = _get_glob(file_system)
804
+ glob_func = _get_glob_func(file_system)
794
805
  return [
795
806
  x
796
807
  for x in glob_func(str(Path(path) / "**/*.parquet"))
sgis/maps/explore.py CHANGED
@@ -828,7 +828,7 @@ class Explore(Map):
828
828
  if not len(gdf):
829
829
  continue
830
830
 
831
- gdf = self._to_single_geom_type(make_all_singlepart(gdf))
831
+ gdf = self._to_single_geom_type(make_all_singlepart(gdf, ignore_index=True))
832
832
 
833
833
  if not len(gdf):
834
834
  continue
sgis/maps/map.py CHANGED
@@ -729,6 +729,8 @@ class Map:
729
729
  """Place the column values into groups."""
730
730
  bins = bins.copy()
731
731
 
732
+ assert gdf.index.is_unique
733
+
732
734
  # if equal lenght, convert to integer and check for equality
733
735
  if len(bins) == len(self._unique_values):
734
736
  if gdf[self._column].isna().all():
@@ -2,6 +2,7 @@ import datetime
2
2
  import functools
3
3
  import glob
4
4
  import itertools
5
+ import json
5
6
  import os
6
7
  import random
7
8
  import re
@@ -468,8 +469,8 @@ class _ImageBase:
468
469
  regexes = (regexes,)
469
470
  return tuple(re.compile(regexes, flags=re.VERBOSE) for regexes in regexes)
470
471
 
471
- @staticmethod
472
472
  def _metadata_to_nested_dict(
473
+ self,
473
474
  metadata: str | Path | os.PathLike | dict | pd.DataFrame | None,
474
475
  ) -> dict[str, dict[str, Any]]:
475
476
  """Construct metadata dict from dictlike, DataFrame or file path.
@@ -932,14 +933,20 @@ class Band(_ImageBandBase):
932
933
  }
933
934
 
934
935
  if self.metadata:
936
+ parent = _fix_path(str(Path(self.path).parent))
935
937
  if self.path is not None:
936
938
  self.metadata = {
937
- key: value
938
- for key, value in self.metadata.items()
939
- if key == self.path
939
+ key: value for key, value in self.metadata.items() if key == parent
940
940
  }
941
- this_metadata = self.metadata[self.path]
942
- for key, value in this_metadata.items():
941
+ for key, value in self.metadata.get(parent, {}).items():
942
+ if key == "bands" and self.band_id in value:
943
+ band_metadata = value[self.band_id]
944
+ for band_key, band_value in band_metadata.items():
945
+ if band_key in dir(self):
946
+ setattr(self, f"_{band_key}", band_value)
947
+ else:
948
+ setattr(self, band_key, band_value)
949
+ continue
943
950
  if key in dir(self):
944
951
  setattr(self, f"_{key}", value)
945
952
  else:
@@ -1592,6 +1599,22 @@ class Image(_ImageBandBase):
1592
1599
  else:
1593
1600
  self._all_file_paths = None
1594
1601
 
1602
+ if not self.metadata and "metadata.json" in {
1603
+ Path(x).name for x in self._all_file_paths
1604
+ }:
1605
+ with _open_func(
1606
+ next(
1607
+ iter(
1608
+ {
1609
+ x
1610
+ for x in self._all_file_paths
1611
+ if str(x).endswith("metadata.json")
1612
+ }
1613
+ )
1614
+ )
1615
+ ) as file:
1616
+ self.metadata = json.load(file)
1617
+
1595
1618
  if df is None:
1596
1619
  if not self._all_file_paths:
1597
1620
  self._all_file_paths = {self.path}
@@ -1616,12 +1639,10 @@ class Image(_ImageBandBase):
1616
1639
  key: value for key, value in self.metadata.items() if self.path in key
1617
1640
  }
1618
1641
 
1619
- if self.metadata:
1620
- try:
1621
- metadata = self.metadata[self.path]
1622
- except KeyError as e:
1623
- metadata = {}
1624
- for key, value in metadata.items():
1642
+ if self.metadata.get(self.path, {}):
1643
+ for key, value in self.metadata[self.path].items():
1644
+ if key in {"bands"}:
1645
+ continue
1625
1646
  if key in dir(self):
1626
1647
  setattr(self, f"_{key}", value)
1627
1648
  else:
@@ -1705,6 +1726,56 @@ class Image(_ImageBandBase):
1705
1726
 
1706
1727
  return self
1707
1728
 
1729
+ def get_image_metadata_dict(self) -> dict:
1730
+ """Creates a nested dict of metadata.
1731
+
1732
+ The dict structure will be:
1733
+
1734
+ {
1735
+ image_path: {
1736
+ image_attribute: value,
1737
+ ...,
1738
+ "bands": {
1739
+ band_id: {
1740
+ band_attribute: band_value,
1741
+ },
1742
+ ...,
1743
+ }
1744
+ }
1745
+ }
1746
+ """
1747
+ path = self.path
1748
+ metadata = {
1749
+ path: {
1750
+ "bounds": self.bounds,
1751
+ "crs": str(pyproj.CRS(self.crs).to_string()),
1752
+ }
1753
+ }
1754
+ for key in self.metadata_attributes:
1755
+ metadata[path][key] = getattr(self, key)
1756
+
1757
+ metadata[path]["bands"] = {}
1758
+ for band in self:
1759
+ metadata[path]["bands"][band.band_id] = {}
1760
+ for key in band.metadata_attributes:
1761
+ if key in self.metadata_attributes:
1762
+ continue
1763
+ metadata[path]["bands"][band.band_id][key] = getattr(band, key)
1764
+ return metadata
1765
+
1766
+ def write_image_metadata(self) -> None:
1767
+ """Write file 'metadata.json' under image path.
1768
+
1769
+ The file will be used to give the image attributes
1770
+ and avoid the much slower metadata fetching with rasterio.
1771
+
1772
+ See method 'get_image_metadata_dict' for info on the structure of
1773
+ the json file.
1774
+ """
1775
+ metadata = self.get_image_metadata_dict()
1776
+ with _open_func(str(Path(self.path) / "metadata.json"), "w") as file:
1777
+ json.dump(metadata, file)
1778
+
1708
1779
  def _construct_image_from_bands(
1709
1780
  self, data: Sequence[Band], res: int | None
1710
1781
  ) -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ssb-sgis
3
- Version: 1.1.12
3
+ Version: 1.1.14
4
4
  Summary: GIS functions used at Statistics Norway.
5
5
  Home-page: https://github.com/statisticsnorway/ssb-sgis
6
6
  License: MIT
@@ -15,21 +15,21 @@ sgis/geopandas_tools/geometry_types.py,sha256=ijQDbQaZPqPGjBl707H4yooNXpk21RXyat
15
15
  sgis/geopandas_tools/neighbors.py,sha256=vduQlHeoZjHyD5pxDbjfonQ3-LAHGfPETxV7-L6Sg4M,16634
16
16
  sgis/geopandas_tools/overlay.py,sha256=pMQK86t0ixKErHQsP0HC8RXHUXNxDCNdH6FK1MEJamM,25779
17
17
  sgis/geopandas_tools/point_operations.py,sha256=JM4hvfIVxZaZdGNlGzcCurrKzkgC_b9hzbFYN42f9WY,6972
18
- sgis/geopandas_tools/polygon_operations.py,sha256=FJ-dXCxLHRsmp0oXsmBOFRprFFwmhrxqOPZkW2WWWQM,50088
18
+ sgis/geopandas_tools/polygon_operations.py,sha256=LN5w1EC7ywJLmKlkk_MyIiKbaYJNp9wWPWPb0xlORLs,49923
19
19
  sgis/geopandas_tools/polygons_as_rings.py,sha256=BX_GZS6F9I4NbEpiOlNBd7zywJjdfdJVi_MkeONBuiM,14941
20
20
  sgis/geopandas_tools/sfilter.py,sha256=SLcMYprQwnY5DNo0R7TGXk4m6u26H8o4PRn-RPhmeZY,9345
21
21
  sgis/helpers.py,sha256=_h7ke9hJrRNhHW-ZX3gA95fOrX2s1ADKBMxc94p2F4Q,9627
22
22
  sgis/io/__init__.py,sha256=uyBr20YDqB2bQttrd5q1JuGOvX32A-MSvS7Wmw5f5qg,177
23
23
  sgis/io/_is_dapla.py,sha256=wmfkSe98IrLhUg3dtXZusV6OVC8VlY1kbc5EQDf3P-Q,358
24
- sgis/io/dapla_functions.py,sha256=Dp2oimCDY9L2_FBoAgDttqG9nd9aU06v5tXA1iB_aDc,30289
24
+ sgis/io/dapla_functions.py,sha256=tRP8TrFnlxJ5gx3BZSWsvBAAzUveUysGPUxSHRewIck,30729
25
25
  sgis/io/opener.py,sha256=HWO3G1NB6bpXKM94JadCD513vjat1o1TFjWGWzyVasg,898
26
26
  sgis/io/read_parquet.py,sha256=FvZYv1rLkUlrSaUY6QW6E1yntmntTeQuZ9ZRgCDO4IM,3776
27
27
  sgis/maps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  sgis/maps/examine.py,sha256=Pb0dH8JazU5E2svfQrzHO1Bi-sjy5SeyY6zoeMO34jE,9369
29
- sgis/maps/explore.py,sha256=ReML94ek49vkToXRK9vA_c5Zzg8O7n0GoeqyeuwaBpo,47879
29
+ sgis/maps/explore.py,sha256=0QDPaj5YU2Jps5iujuHu-vk_R9mFh_aOhbN1eU-fxcI,47898
30
30
  sgis/maps/httpserver.py,sha256=eCDoB9x74kSLiGEj2X3O91t3oscY_ia17UNuaaJ6tCc,2472
31
31
  sgis/maps/legend.py,sha256=lVRVCkhPmJRjGK23obFJZAO3qp6du1LYnobkkN7DPkc,26279
32
- sgis/maps/map.py,sha256=vvdWSYZN9U0pbv6-l6l5xG3HUWj09fE4qloOe9fF8L8,30403
32
+ sgis/maps/map.py,sha256=lwQUJvK3I9dPNSxPeRty9ICodz_GXQBN1OHhghI7IsE,30439
33
33
  sgis/maps/maps.py,sha256=gxu0rgcVygjudRtM1dVRmsUMilMUIg3vG-UgvASM91E,23072
34
34
  sgis/maps/norge_i_bilder.json,sha256=W_mFfte3DxugWbEudZ5fadZ2JeFYb0hyab2Quf4oJME,481311
35
35
  sgis/maps/thematicmap.py,sha256=w6q4_gIr8BubQgsPJkc6WXk-tmplDLGcKyjphhFp7ng,21873
@@ -54,12 +54,12 @@ sgis/parallel/parallel.py,sha256=CzHetSAr9wvSrEDFTqDq2xAsNuG1ig22-vcEOIoUVv4,396
54
54
  sgis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
55
  sgis/raster/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
56
  sgis/raster/base.py,sha256=tiZEuMcVK6hOm_aIjWhQ1WGshcjsxT1fFkuBSLFiMC0,7785
57
- sgis/raster/image_collection.py,sha256=hpIVKBizlhn5YyxXoSpXHNVo0eVy5PfTnJCwpw2V1rQ,123152
57
+ sgis/raster/image_collection.py,sha256=qmxYGgdXN1ngSfpuOzyFKlxFG6YhqHmbd697tHaEQec,125639
58
58
  sgis/raster/indices.py,sha256=-J1HYmnT240iozvgagvyis6K0_GHZHRuUrPOgyoeIrY,223
59
59
  sgis/raster/regex.py,sha256=kYhVpRYzoXutx1dSYmqMoselWXww7MMEsTPmLZwHjbM,3759
60
60
  sgis/raster/sentinel_config.py,sha256=nySDqn2R8M6W8jguoBeSAK_zzbAsqmaI59i32446FwY,1268
61
61
  sgis/raster/zonal.py,sha256=D4Gyptw-yOLTCO41peIuYbY-DANsJCG19xXDlf1QAz4,2299
62
- ssb_sgis-1.1.12.dist-info/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
63
- ssb_sgis-1.1.12.dist-info/METADATA,sha256=UMYFkb-iBAu40ktwp9hVhdXoQ4MHvO4osBaCwCG5xHU,11741
64
- ssb_sgis-1.1.12.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
65
- ssb_sgis-1.1.12.dist-info/RECORD,,
62
+ ssb_sgis-1.1.14.dist-info/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
63
+ ssb_sgis-1.1.14.dist-info/METADATA,sha256=bMv_HRcZc-YOhOKnCVp-91KuVPmAEwIiQI1-UWZuQpM,11741
64
+ ssb_sgis-1.1.14.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
65
+ ssb_sgis-1.1.14.dist-info/RECORD,,