ssb-sgis 1.2.3__py3-none-any.whl → 1.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sgis/__init__.py CHANGED
@@ -78,6 +78,7 @@ from .geopandas_tools.polygon_operations import get_polygon_clusters
78
78
  from .geopandas_tools.polygon_operations import split_polygons_by_lines
79
79
  from .geopandas_tools.polygons_as_rings import PolygonsAsRings
80
80
  from .geopandas_tools.runners import GridSizeOverlayRunner
81
+ from .geopandas_tools.runners import GridSizeUnionRunner
81
82
  from .geopandas_tools.runners import OverlayRunner
82
83
  from .geopandas_tools.runners import RTreeQueryRunner
83
84
  from .geopandas_tools.runners import UnionRunner
@@ -603,7 +603,6 @@ def split_by_neighbors(df, split_by, tolerance, grid_size=None) -> GeoDataFrame:
603
603
  buff(df, tolerance),
604
604
  how="identity",
605
605
  grid_size=grid_size,
606
- geom_type="polygon",
607
606
  )
608
607
  .pipe(get_line_segments)
609
608
  .reset_index(drop=True)
@@ -120,6 +120,53 @@ class UnionRunner(AbstractRunner):
120
120
  return agged
121
121
 
122
122
 
123
+ @dataclass
124
+ class GridSizeUnionRunner(UnionRunner):
125
+ """Run shapely.union_all with pandas.groupby for different grid sizes until no GEOSException is raised.
126
+
127
+ Subclasses must implement a 'run' method that takes the arguments
128
+ 'df' (GeoDataFrame or GeoSeries), 'by' (optional column to group by), 'grid_size'
129
+ (passed to shapely.union_all) and **kwargs passed to pandas.DataFrame.groupby.
130
+ Defaults to None, meaning the default runner with number of workers set
131
+ to 'n_jobs'.
132
+
133
+
134
+ Args:
135
+ n_jobs: Number of workers.
136
+ backend: Backend for the workers.
137
+ """
138
+
139
+ n_jobs: int
140
+ backend: str | None = None
141
+ grid_sizes: list[float | int] | None = None
142
+
143
+ def __post_init__(self) -> None:
144
+ """Check that grid_sizes is passed."""
145
+ if self.grid_sizes is None:
146
+ raise ValueError(
147
+ f"must set 'grid_sizes' in the {self.__class__.__name__} initialiser."
148
+ )
149
+
150
+ def run(
151
+ self,
152
+ df: GeoDataFrame | GeoSeries | pd.DataFrame | pd.Series,
153
+ by: str | list[str] | None = None,
154
+ grid_size: int | float | None = None,
155
+ **kwargs,
156
+ ) -> GeoSeries | GeoDataFrame:
157
+ """Run groupby on geometries in parallel (if n_jobs > 1) with grid_sizes."""
158
+ try:
159
+ return super().run(df, by=by, grid_size=grid_size, **kwargs)
160
+ except GEOSException:
161
+ pass
162
+ for i, grid_size in enumerate(self.grid_sizes):
163
+ try:
164
+ return super().run(df, by=by, grid_size=grid_size, **kwargs)
165
+ except GEOSException as e:
166
+ if i == len(self.grid_sizes) - 1:
167
+ raise e
168
+
169
+
123
170
  def _strtree_query(
124
171
  arr1: np.ndarray,
125
172
  arr2: np.ndarray,
@@ -40,7 +40,10 @@ from ..helpers import _get_file_system
40
40
  try:
41
41
  from gcsfs import GCSFileSystem
42
42
  except ImportError:
43
- pass
43
+
44
+ class GCSFileSystem:
45
+ """Placeholder."""
46
+
44
47
 
45
48
  PANDAS_FALLBACK_INFO = " Set pandas_fallback=True to ignore this error."
46
49
  NULL_VALUE = "__HIVE_DEFAULT_PARTITION__"
@@ -96,6 +99,7 @@ def read_geopandas(
96
99
  file_system=file_system,
97
100
  use_threads=use_threads,
98
101
  pandas_fallback=pandas_fallback,
102
+ filters=filters,
99
103
  **kwargs,
100
104
  )
101
105
 
@@ -108,7 +112,9 @@ def read_geopandas(
108
112
  # because glob is slow without GCSFileSystem from the root partition
109
113
  if single_eq_filter:
110
114
  try:
111
- expression = "".join(next(iter(filters))).replace("==", "=")
115
+ expression: list[str] = "".join(
116
+ [str(x) for x in next(iter(filters))]
117
+ ).replace("==", "=")
112
118
  glob_func = _get_glob_func(file_system)
113
119
  suffix: str = Path(gcs_path).suffix
114
120
  paths = glob_func(str(Path(gcs_path) / expression / f"*{suffix}"))
@@ -119,6 +125,7 @@ def read_geopandas(
119
125
  file_system=file_system,
120
126
  use_threads=use_threads,
121
127
  pandas_fallback=pandas_fallback,
128
+ filters=filters,
122
129
  **kwargs,
123
130
  )
124
131
  except FileNotFoundError:
@@ -182,7 +189,11 @@ def _read_geopandas_from_iterable(
182
189
  paths = list(bounds_series.index)
183
190
 
184
191
  results: list[pyarrow.Table] = _read_pyarrow_with_treads(
185
- paths, file_system=file_system, mask=mask, use_threads=use_threads, **kwargs
192
+ paths,
193
+ file_system=file_system,
194
+ mask=mask,
195
+ use_threads=use_threads,
196
+ **kwargs,
186
197
  )
187
198
  if results:
188
199
  try:
@@ -198,10 +209,15 @@ def _read_geopandas_from_iterable(
198
209
 
199
210
 
200
211
  def _read_pyarrow_with_treads(
201
- paths: list[str | Path | os.PathLike], file_system, use_threads, mask, **kwargs
212
+ paths: list[str | Path | os.PathLike],
213
+ file_system,
214
+ use_threads,
215
+ mask,
216
+ filters,
217
+ **kwargs,
202
218
  ) -> list[pyarrow.Table]:
203
219
  read_partial = functools.partial(
204
- _read_pyarrow, mask=mask, file_system=file_system, **kwargs
220
+ _read_pyarrow, filters=filters, mask=mask, file_system=file_system, **kwargs
205
221
  )
206
222
  if not use_threads:
207
223
  return [x for x in map(read_partial, paths) if x is not None]
@@ -645,7 +661,7 @@ def expression_match_path(expression: ds.Expression, path: str) -> bool:
645
661
  """Check if a file path match a pyarrow Expression.
646
662
 
647
663
  Examples:
648
- --------
664
+ ---------
649
665
  >>> import pyarrow.compute as pc
650
666
  >>> path = 'data/file.parquet/x=1/y=10/name0.parquet'
651
667
  >>> expression = (pc.Field("x") == 1) & (pc.Field("y") == 10)
@@ -758,6 +774,7 @@ def _read_partitioned_parquet(
758
774
  ),
759
775
  file_system=file_system,
760
776
  mask=mask,
777
+ filters=filters,
761
778
  use_threads=use_threads,
762
779
  **kwargs,
763
780
  )
sgis/maps/thematicmap.py CHANGED
@@ -296,13 +296,11 @@ class ThematicMap(Map):
296
296
  if self._gdf[self._column].isna().any():
297
297
  isnas = []
298
298
  for label, gdf in self._gdfs.items():
299
-
300
299
  isnas.append(gdf[gdf[self._column].isna()])
301
300
  self._gdfs[label] = gdf[gdf[self._column].notna()]
302
- color = self.facecolor if nan_hatch else self.nan_color
303
301
  self._more_data[nan_label] = {
304
302
  "gdf": pd.concat(isnas, ignore_index=True),
305
- "color": color,
303
+ "color": self.nan_color,
306
304
  "hatch": nan_hatch,
307
305
  } | new_kwargs
308
306
  self._gdf = pd.concat(self.gdfs.values(), ignore_index=True)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ssb-sgis
3
- Version: 1.2.3
3
+ Version: 1.2.4
4
4
  Summary: GIS functions used at Statistics Norway.
5
5
  Home-page: https://github.com/statisticsnorway/ssb-sgis
6
6
  License: MIT
@@ -1,4 +1,4 @@
1
- sgis/__init__.py,sha256=40JJSoA8sXzp7VNoUCeFuEbZPGuEbnMjpypmNxPsMA0,7690
1
+ sgis/__init__.py,sha256=FcE8XYmVlyO-6s15AC8dLqIjp2KZpar2ND3siIxX830,7747
2
2
  sgis/conf.py,sha256=pLqmvIKoKmXoW8chja3iQpbDUp9Z39vzl97MGH8ZHW0,2614
3
3
  sgis/debug_config.py,sha256=Tfr19kU46hSkkspsIJcrUWvlhaL4U3-f8xEPkujSCAQ,593
4
4
  sgis/exceptions.py,sha256=WNaEBPNNx0rmz-YDzlFX4vIE7ocJQruUTqS2RNAu2zU,660
@@ -6,7 +6,7 @@ sgis/geopandas_tools/__init__.py,sha256=bo8lFMcltOz7TtWAi52_ekR2gd3mjfBfKeMDV5zu
6
6
  sgis/geopandas_tools/bounds.py,sha256=YJyF0gp78hFAjLLZmDquRKCBAtbt7QouG3snTcJeNQs,23822
7
7
  sgis/geopandas_tools/buffer_dissolve_explode.py,sha256=ReIgoeh6CUVcLmsUZ_pyoWYg6iBZzYiGmFq6CMOKRvE,19535
8
8
  sgis/geopandas_tools/centerlines.py,sha256=Q65Sx01SeAlulBEd9oaZkB2maBBNdLcJwAbTILg4SPU,11848
9
- sgis/geopandas_tools/cleaning.py,sha256=PkAVVZ84ahek8_uVlTRtCO8nCWO6DdGltWLTmPOsvxM,24426
9
+ sgis/geopandas_tools/cleaning.py,sha256=fST0xFztmyn-QUOAfvjZmu7aO_zPiolWK7gd7TR6ffI,24393
10
10
  sgis/geopandas_tools/conversion.py,sha256=w3W0Utaw7SESRR659percNLwOY9_yfg6DL5hcuM1CUA,25017
11
11
  sgis/geopandas_tools/duplicates.py,sha256=TDDM4u1n7SIkyJrOfl1Lno92AmUPqtXBHsj1IUKC0hI,14992
12
12
  sgis/geopandas_tools/general.py,sha256=YRpNEdwTHyFdQOdAfbCmYXS7PxoDjXxoagwpteXkYdI,43937
@@ -17,13 +17,13 @@ sgis/geopandas_tools/overlay.py,sha256=5i9u8GgFuU0fCqzELsbIaoUPhw-E7eZHl_yKB0wEc
17
17
  sgis/geopandas_tools/point_operations.py,sha256=JM4hvfIVxZaZdGNlGzcCurrKzkgC_b9hzbFYN42f9WY,6972
18
18
  sgis/geopandas_tools/polygon_operations.py,sha256=v-B9IgbFfm4dVHKPyzvmnNiqVCdtl9ddpCsQpZZ-9sU,49284
19
19
  sgis/geopandas_tools/polygons_as_rings.py,sha256=BX_GZS6F9I4NbEpiOlNBd7zywJjdfdJVi_MkeONBuiM,14941
20
- sgis/geopandas_tools/runners.py,sha256=IMClw80YAUGyPyowBXNidZu-z_rErpzj0w-GUQRI-Y0,11320
20
+ sgis/geopandas_tools/runners.py,sha256=J4lH0RXYDYTLVeQFgNv8gEY0E97QGIQ4zPW5vfoxgDU,12979
21
21
  sgis/geopandas_tools/sfilter.py,sha256=BPz6-_9B7QdyYmVatZXavdHj7FIW_ztIyJHQOkKJt7A,10284
22
22
  sgis/geopandas_tools/utils.py,sha256=X0pRvB1tWgV_0BCrRS1HU9LtLGnZCpvVPxyqM9JGb0Y,1415
23
23
  sgis/helpers.py,sha256=4N6vFWQ3TYVzRHNcWY_fNa_GkFuaZB3vtCkkFND-qs0,9628
24
24
  sgis/io/__init__.py,sha256=uyBr20YDqB2bQttrd5q1JuGOvX32A-MSvS7Wmw5f5qg,177
25
25
  sgis/io/_is_dapla.py,sha256=wmfkSe98IrLhUg3dtXZusV6OVC8VlY1kbc5EQDf3P-Q,358
26
- sgis/io/dapla_functions.py,sha256=cwCmwgOoWOaYMJ35hgAJtCJ7m8bQBM4RsL7y3afjnLc,30539
26
+ sgis/io/dapla_functions.py,sha256=AbP400wE8wd3KhfoX_BoEC6ba9fR0gCg8qWSqdkJ5dU,30815
27
27
  sgis/io/opener.py,sha256=HWO3G1NB6bpXKM94JadCD513vjat1o1TFjWGWzyVasg,898
28
28
  sgis/io/read_parquet.py,sha256=FvZYv1rLkUlrSaUY6QW6E1yntmntTeQuZ9ZRgCDO4IM,3776
29
29
  sgis/maps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -34,7 +34,7 @@ sgis/maps/legend.py,sha256=qq2RkebuaNAdFztlXrDOWbN0voeK5w5VycmRKyx0NdM,26512
34
34
  sgis/maps/map.py,sha256=q0gqOg_DD1ea7B_8__nuFN1dYA7o3uIxS6KuJwmVNhQ,30269
35
35
  sgis/maps/maps.py,sha256=Ti4nfm6bz_8ZWVHoEHF61moI3LQH9Ee6Jd8NHzInWkw,23135
36
36
  sgis/maps/norge_i_bilder.json,sha256=VKmb7rg4jvgc8_Ve1fFnHyZ_Dkv4T5GTA0UCpqpFAi4,492751
37
- sgis/maps/thematicmap.py,sha256=xTX_Y6ZXLcjNCgfUOr87pAMD4cjKxhFudZZXn_ewpII,25171
37
+ sgis/maps/thematicmap.py,sha256=Z3o_Bca0oty5Cn35pZfX5Qy52sXDVIMVSFD6IlZrovo,25111
38
38
  sgis/maps/tilesources.py,sha256=F4mFHxPwkiPJdVKzNkScTX6xbJAMIUtlTq4mQ83oguw,1746
39
39
  sgis/maps/wms.py,sha256=UjsKAvrZkcYRgjVGGeg6XA4Lkx6WU9w6WTyeSkwN78w,6931
40
40
  sgis/networkanalysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -61,7 +61,7 @@ sgis/raster/indices.py,sha256=efJmgfPg_VuSzXFosXV661IendF8CwPFWtMhyP4TMUg,222
61
61
  sgis/raster/regex.py,sha256=kYhVpRYzoXutx1dSYmqMoselWXww7MMEsTPmLZwHjbM,3759
62
62
  sgis/raster/sentinel_config.py,sha256=nySDqn2R8M6W8jguoBeSAK_zzbAsqmaI59i32446FwY,1268
63
63
  sgis/raster/zonal.py,sha256=D4Gyptw-yOLTCO41peIuYbY-DANsJCG19xXDlf1QAz4,2299
64
- ssb_sgis-1.2.3.dist-info/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
65
- ssb_sgis-1.2.3.dist-info/METADATA,sha256=14UzO4efbS_k7OMKyV7rCQjZknag0TTuwQ6sgaQvaww,11740
66
- ssb_sgis-1.2.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
67
- ssb_sgis-1.2.3.dist-info/RECORD,,
64
+ ssb_sgis-1.2.4.dist-info/LICENSE,sha256=np3IfD5m0ZUofn_kVzDZqliozuiO6wrktw3LRPjyEiI,1073
65
+ ssb_sgis-1.2.4.dist-info/METADATA,sha256=0vA7hJ78jGcP1uHtgeeNYN-Ne_9xZQ_fX3SRamcESxw,11740
66
+ ssb_sgis-1.2.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
67
+ ssb_sgis-1.2.4.dist-info/RECORD,,