ssb-sgis 1.0.9__tar.gz → 1.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/PKG-INFO +1 -1
  2. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/pyproject.toml +1 -1
  3. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/bounds.py +2 -0
  4. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/cleaning.py +3 -3
  5. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/io/_is_dapla.py +2 -5
  6. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/maps/explore.py +191 -172
  7. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/maps/maps.py +16 -43
  8. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/raster/image_collection.py +130 -67
  9. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/LICENSE +0 -0
  10. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/README.md +0 -0
  11. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/__init__.py +0 -0
  12. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/debug_config.py +0 -0
  13. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/exceptions.py +0 -0
  14. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/__init__.py +0 -0
  15. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/buffer_dissolve_explode.py +0 -0
  16. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/centerlines.py +0 -0
  17. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/conversion.py +0 -0
  18. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/duplicates.py +0 -0
  19. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/general.py +0 -0
  20. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/geocoding.py +0 -0
  21. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/geometry_types.py +0 -0
  22. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/neighbors.py +0 -0
  23. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/overlay.py +0 -0
  24. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/point_operations.py +0 -0
  25. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/polygon_operations.py +0 -0
  26. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/polygons_as_rings.py +0 -0
  27. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/geopandas_tools/sfilter.py +0 -0
  28. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/helpers.py +0 -0
  29. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/io/dapla_functions.py +0 -0
  30. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/io/opener.py +0 -0
  31. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/io/read_parquet.py +0 -0
  32. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/maps/__init__.py +0 -0
  33. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/maps/examine.py +0 -0
  34. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/maps/httpserver.py +0 -0
  35. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/maps/legend.py +0 -0
  36. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/maps/map.py +0 -0
  37. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/maps/thematicmap.py +0 -0
  38. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/maps/tilesources.py +0 -0
  39. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/__init__.py +0 -0
  40. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/_get_route.py +0 -0
  41. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/_od_cost_matrix.py +0 -0
  42. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/_points.py +0 -0
  43. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/_service_area.py +0 -0
  44. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/closing_network_holes.py +0 -0
  45. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/cutting_lines.py +0 -0
  46. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/directednetwork.py +0 -0
  47. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/finding_isolated_networks.py +0 -0
  48. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/network.py +0 -0
  49. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/networkanalysis.py +0 -0
  50. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/networkanalysisrules.py +0 -0
  51. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/nodes.py +0 -0
  52. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/networkanalysis/traveling_salesman.py +0 -0
  53. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/parallel/parallel.py +0 -0
  54. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/py.typed +0 -0
  55. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/raster/__init__.py +0 -0
  56. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/raster/base.py +0 -0
  57. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/raster/indices.py +0 -0
  58. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/raster/regex.py +0 -0
  59. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/raster/sentinel_config.py +0 -0
  60. {ssb_sgis-1.0.9 → ssb_sgis-1.0.11}/src/sgis/raster/zonal.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ssb-sgis
3
- Version: 1.0.9
3
+ Version: 1.0.11
4
4
  Summary: GIS functions used at Statistics Norway.
5
5
  Home-page: https://github.com/statisticsnorway/ssb-sgis
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ssb-sgis"
3
- version = "1.0.9"
3
+ version = "1.0.11"
4
4
  description = "GIS functions used at Statistics Norway."
5
5
  authors = ["Morten Letnes <morten.letnes@ssb.no>"]
6
6
  license = "MIT"
@@ -697,4 +697,6 @@ def get_total_bounds(
697
697
  raise e2 from e
698
698
  else:
699
699
  continue
700
+ if not xs or not ys:
701
+ raise ValueError(f"No bounds found for {geometries}")
700
702
  return min(xs), min(ys), max(xs), max(ys)
@@ -579,9 +579,9 @@ def _snap_to_anchors(
579
579
  # browser=True,
580
580
  )
581
581
 
582
- print(
583
- "line_is_simple", line_is_simple, range_index, i, index, j
584
- ) # , j2, j3, x)
582
+ # print(
583
+ # "line_is_simple", line_is_simple, range_index, i, index, j
584
+ # ) # , j2, j3, x)
585
585
 
586
586
  if not line_is_simple:
587
587
  # for j4 in range(len(ring)):
@@ -7,8 +7,5 @@ import os
7
7
 
8
8
 
9
9
  def is_dapla() -> bool:
10
- """From https://github.com/statisticsnorway/ssb-altinn-python/blob/main/src/altinn/utils.py."""
11
- try:
12
- return os.environ["GCS_TOKEN_PROVIDER_KEY"] == "google"
13
- except KeyError:
14
- return False
10
+ """Simply checks if an os environment variable contains the text 'dapla'."""
11
+ return any("dapla" in key.lower() for key in os.environ)
@@ -18,6 +18,7 @@ from typing import ClassVar
18
18
  import branca as bc
19
19
  import folium
20
20
  import geopandas as gpd
21
+ import joblib
21
22
  import matplotlib
22
23
  import matplotlib.pyplot as plt
23
24
  import numpy as np
@@ -189,6 +190,10 @@ def _single_band_to_arr(band, mask, name, raster_data_dict):
189
190
  arr = band.clip(mask).values
190
191
  else:
191
192
  arr = band.load(indexes=1, bounds=mask).values
193
+
194
+ if _is_too_much_nodata([arr], band.nodata):
195
+ return False
196
+
192
197
  bounds: tuple = (
193
198
  _any_to_bbox_crs4326(mask, band.crs)
194
199
  if mask is not None
@@ -205,7 +210,28 @@ def _single_band_to_arr(band, mask, name, raster_data_dict):
205
210
  raster_data_dict["cmap"] = band.cmap or "Grays"
206
211
  raster_data_dict["arr"] = arr
207
212
  raster_data_dict["bounds"] = bounds
208
- raster_data_dict["label"] = name
213
+ raster_data_dict["label"] = band.name or name
214
+ raster_data_dict["date"] = band.date
215
+ return True
216
+
217
+
218
+ def _is_too_much_nodata(
219
+ arrays: list[np.ndarray],
220
+ nodata: int | None = None,
221
+ max_nodata_percentage: int = 100,
222
+ ) -> bool:
223
+ return (
224
+ any(arr.shape[0] == 0 for arr in arrays)
225
+ or any(
226
+ (
227
+ isinstance(arr, np.ma.core.MaskedArray)
228
+ and np.mean((arr.mask) | (arr.data == nodata) | (np.isnan(arr.data)))
229
+ > (max_nodata_percentage / 100)
230
+ )
231
+ for arr in arrays
232
+ )
233
+ or any(np.mean(arr == nodata) > (max_nodata_percentage / 100) for arr in arrays)
234
+ )
209
235
 
210
236
 
211
237
  def _any_to_bbox_crs4326(obj, crs):
@@ -240,6 +266,7 @@ class Explore(Map):
240
266
  text: str | None = None,
241
267
  decimals: int = 6,
242
268
  max_images: int = 10,
269
+ max_nodata_percentage: int = 100,
243
270
  **kwargs,
244
271
  ) -> None:
245
272
  """Initialiser.
@@ -266,6 +293,8 @@ class Explore(Map):
266
293
  map. Defaults to 15.
267
294
  text: Optional text for a text box in the map.
268
295
  decimals: Number of decimals in the coordinates.
296
+ max_nodata_percentage: Maximum percentage nodata values (e.g. clouds) ro allow in
297
+ image arrays.
269
298
  **kwargs: Additional keyword arguments. Can also be geometry-like objects
270
299
  where the key is the label.
271
300
  """
@@ -280,6 +309,7 @@ class Explore(Map):
280
309
  self.text = text
281
310
  self.decimals = decimals
282
311
  self.max_images = max_images
312
+ self.max_nodata_percentage = max_nodata_percentage
283
313
  self.legend = None
284
314
 
285
315
  self.browser = browser
@@ -471,7 +501,10 @@ class Explore(Map):
471
501
 
472
502
  random_point = sample.sample_points(size=1)
473
503
 
474
- self.center = (random_point.geometry.iloc[0].x, random_point.geometry.iloc[0].y)
504
+ self.center = (
505
+ float(random_point.geometry.iloc[0].x),
506
+ float(random_point.geometry.iloc[0].y),
507
+ )
475
508
  print(f"center={self.center}, size={size}")
476
509
 
477
510
  mask = random_point.buffer(size)
@@ -509,18 +542,26 @@ class Explore(Map):
509
542
 
510
543
  def _load_rasters_as_images(self):
511
544
  self.raster_data = []
512
- n_added_images = 0
513
545
  self._show_rasters = True
514
- for name, value in self.rasters.items():
515
- data, n_added_images = self._image_collection_to_background_map(
516
- value,
517
- self.mask,
518
- name,
519
- max_images=self.max_images,
520
- n_added_images=n_added_images,
546
+
547
+ with joblib.Parallel(len(self.rasters) or 1, backend="threading") as parallel:
548
+ results = parallel(
549
+ joblib.delayed(_image_collection_to_background_map)(
550
+ raster,
551
+ name,
552
+ self.mask,
553
+ self.max_images,
554
+ self.max_nodata_percentage,
555
+ )
556
+ for name, raster in self.rasters.items()
521
557
  )
558
+
559
+ for data in results:
522
560
  self.raster_data += data
523
561
 
562
+ if len(self.raster_data) > 6:
563
+ self._show_rasters = False
564
+
524
565
  def _rasters_to_background_maps(self):
525
566
  for raster_data_dict in self.raster_data:
526
567
  try:
@@ -1061,160 +1102,6 @@ class Explore(Map):
1061
1102
  **kwargs,
1062
1103
  )
1063
1104
 
1064
- def _image_collection_to_background_map(
1065
- self,
1066
- image_collection: ImageCollection | Image | Band,
1067
- mask: Any | None,
1068
- name: str,
1069
- max_images: int,
1070
- n_added_images: int,
1071
- rbg_bands: list[str] = (("B04", "B02", "B03"), ("B4", "B2", "B3")),
1072
- ) -> tuple[list[dict], int]:
1073
- out = []
1074
-
1075
- if all(isinstance(x, str) for x in rbg_bands):
1076
- rbg_bands = (rbg_bands,)
1077
-
1078
- if isinstance(image_collection, ImageCollection):
1079
- images = image_collection.images
1080
- name = None
1081
- elif isinstance(image_collection, Image):
1082
- img = image_collection
1083
- if not _intersects_if_not_none_or_empty(
1084
- mask, img.bounds
1085
- ): # is not None and not to_shapely(mask).intersects(
1086
- # to_shapely(img.bounds)
1087
- # ):
1088
- return out, n_added_images
1089
-
1090
- if len(img) == 1:
1091
- band = next(iter(img))
1092
- raster_data_dict = {}
1093
- out.append(raster_data_dict)
1094
- name = _determine_label(band, name, out, n_added_images)
1095
- _single_band_to_arr(band, mask, name, raster_data_dict)
1096
- n_added_images += 1
1097
- return out, n_added_images
1098
- elif len(img) < 3:
1099
- raster_data_dict = {}
1100
- out.append(raster_data_dict)
1101
- for band in img:
1102
- name = _determine_label(band, None, out, n_added_images)
1103
- _single_band_to_arr(band, mask, name, raster_data_dict)
1104
- n_added_images += 1
1105
- return out, n_added_images
1106
- else:
1107
- images = [image_collection]
1108
-
1109
- elif isinstance(image_collection, Band):
1110
- band = image_collection
1111
-
1112
- if not _intersects_if_not_none_or_empty(
1113
- mask, band.bounds
1114
- ): # mask is not None and not to_shapely(mask).intersects(
1115
- # to_shapely(band.bounds)
1116
- # ):
1117
- return out, n_added_images
1118
-
1119
- raster_data_dict = {}
1120
- out.append(raster_data_dict)
1121
- _single_band_to_arr(band, mask, name, raster_data_dict)
1122
- return out, n_added_images
1123
-
1124
- else:
1125
- raise TypeError(type(image_collection))
1126
-
1127
- if max(len(out), len(images)) + n_added_images > max_images:
1128
- warnings.warn(
1129
- f"Showing only a sample of {max_images}. Set 'max_images.", stacklevel=1
1130
- )
1131
- self._show_rasters = False
1132
- random.shuffle(images)
1133
-
1134
- images = images[: (max_images - n_added_images)]
1135
- images = (
1136
- list(sorted([img for img in images if img.date is not None]))
1137
- + sorted(
1138
- [
1139
- img
1140
- for img in images
1141
- if img.date is None and img.path is not None
1142
- ],
1143
- key=lambda x: x.path,
1144
- )
1145
- + [img for img in images if img.date is None and img.path is None]
1146
- )
1147
-
1148
- for image in images:
1149
-
1150
- if not _intersects_if_not_none_or_empty(
1151
- mask, image.bounds
1152
- ): # mask is not None and not to_shapely(mask).intersects(
1153
- # to_shapely(image.bounds)
1154
- # ):
1155
- continue
1156
-
1157
- raster_data_dict = {}
1158
- out.append(raster_data_dict)
1159
-
1160
- if len(image) < 3:
1161
- for band in image:
1162
- name = _determine_label(band, None, out, n_added_images)
1163
- _single_band_to_arr(band, mask, name, raster_data_dict)
1164
- n_added_images += 1
1165
- continue
1166
-
1167
- def load(band_id: str) -> Band:
1168
- band = image[band_id]
1169
- if band.has_array and mask is not None:
1170
- band = band.clip(mask, copy=True)
1171
- elif not band.has_array:
1172
- band = band.load(indexes=1, bounds=mask)
1173
- return band
1174
-
1175
- for red, blue, green in rbg_bands:
1176
- try:
1177
- red_band = load(red)
1178
- except KeyError:
1179
- continue
1180
- try:
1181
- blue_band = load(blue)
1182
- except KeyError:
1183
- continue
1184
- try:
1185
- green_band = load(green)
1186
- except KeyError:
1187
- continue
1188
- break
1189
-
1190
- crs = red_band.crs
1191
- bounds = to_bbox(to_gdf(red_band.bounds, crs).to_crs(4326))
1192
-
1193
- red_band = red_band.values
1194
- blue_band = blue_band.values
1195
- green_band = green_band.values
1196
-
1197
- if (
1198
- red_band.shape[0] == 0
1199
- or blue_band.shape[0] == 0
1200
- or green_band.shape[0] == 0
1201
- ):
1202
- continue
1203
-
1204
- # to 3d array in shape (x, y, 3)
1205
- rbg_image = np.stack([red_band, blue_band, green_band], axis=2)
1206
-
1207
- raster_data_dict["arr"] = rbg_image
1208
- raster_data_dict["bounds"] = bounds
1209
- raster_data_dict["cmap"] = None
1210
- raster_data_dict["label"] = _determine_label(
1211
- image, name, out, n_added_images
1212
- )
1213
-
1214
- n_added_images += 1
1215
-
1216
- return out, n_added_images
1217
-
1218
1105
 
1219
1106
  def _tooltip_popup(
1220
1107
  type_: str, fields: Any, gdf: GeoDataFrame, **kwargs
@@ -1252,29 +1139,24 @@ def _intersects_if_not_none_or_empty(obj: Any, other: Any) -> bool:
1252
1139
  return obj.intersects(to_shapely(other))
1253
1140
 
1254
1141
 
1255
- def _determine_label(
1256
- obj: Image | Band | ImageCollection, obj_name: str | None, out: list[dict], i: int
1257
- ) -> str:
1142
+ def _determine_label(obj: Image | Band | ImageCollection, obj_name: str | None) -> str:
1258
1143
  # Prefer the object's name
1259
1144
  if obj_name:
1260
1145
  # Avoid the generic label e.g. Image(1)
1261
1146
  does_not_have_generic_name = (
1262
- re.sub("(\d+)", "", obj_name) != f"{obj.__class__.__name__}()"
1147
+ re.sub(r"(\d+)", "", obj_name) != f"{obj.__class__.__name__}()"
1263
1148
  )
1264
1149
  if does_not_have_generic_name:
1265
1150
  return obj_name
1266
1151
  try:
1267
- # Images/Bands/Collections constructed from arrays have no path stems
1268
1152
  if obj.name:
1269
1153
  name = obj.name
1270
1154
  else:
1155
+ # Images/Bands/Collections constructed from arrays have no path stems
1271
1156
  name = str(obj)[:23]
1272
1157
  except (AttributeError, ValueError):
1273
1158
  name = str(obj)[:23]
1274
1159
 
1275
- if name in [x["label"] for x in out if "label" in x]:
1276
- name += f"_{i}"
1277
-
1278
1160
  return name
1279
1161
 
1280
1162
 
@@ -1448,3 +1330,140 @@ def get_textbox(text: str) -> str:
1448
1330
  </style>
1449
1331
  {{% endmacro %}}
1450
1332
  """
1333
+
1334
+
1335
+ def _add_one_image(
1336
+ image: Image, mask, rbg_bands, name: str, max_nodata_percentage: int
1337
+ ) -> dict:
1338
+
1339
+ raster_data_dict = {}
1340
+
1341
+ if len(image) < 3:
1342
+ for band in image:
1343
+ name = _determine_label(band, band.name or name)
1344
+ _single_band_to_arr(band, mask, name, raster_data_dict)
1345
+ return raster_data_dict
1346
+
1347
+ def load(band_id: str) -> Band:
1348
+ band = image[band_id]
1349
+ if band.has_array and mask is not None:
1350
+ band = band.clip(mask, copy=True)
1351
+ elif not band.has_array:
1352
+ band = band.load(indexes=1, bounds=mask)
1353
+ return band
1354
+
1355
+ for red, blue, green in rbg_bands:
1356
+ try:
1357
+ red_band = load(red)
1358
+ except KeyError:
1359
+ continue
1360
+ try:
1361
+ blue_band = load(blue)
1362
+ except KeyError:
1363
+ continue
1364
+ try:
1365
+ green_band = load(green)
1366
+ except KeyError:
1367
+ continue
1368
+ break
1369
+
1370
+ crs = red_band.crs
1371
+ bounds = to_bbox(to_gdf(red_band.bounds, crs).to_crs(4326))
1372
+
1373
+ red_band = red_band.values
1374
+ blue_band = blue_band.values
1375
+ green_band = green_band.values
1376
+
1377
+ if _is_too_much_nodata(
1378
+ [red_band, blue_band, green_band], image.nodata, max_nodata_percentage
1379
+ ):
1380
+ return
1381
+
1382
+ # to 3d array in shape (x, y, 3)
1383
+ rbg_image = np.stack([red_band, blue_band, green_band], axis=2)
1384
+
1385
+ raster_data_dict["arr"] = rbg_image
1386
+ raster_data_dict["bounds"] = bounds
1387
+ raster_data_dict["cmap"] = None
1388
+ raster_data_dict["label"] = _determine_label(image, image.name or name)
1389
+ raster_data_dict["date"] = image.date
1390
+
1391
+ return raster_data_dict
1392
+
1393
+
1394
+ def _image_collection_to_background_map(
1395
+ image_collection: ImageCollection | Image | Band,
1396
+ name: str,
1397
+ mask: Any | None,
1398
+ max_images: int,
1399
+ max_nodata_percentage: int,
1400
+ rbg_bands: list[str] = (("B04", "B02", "B03"), ("B4", "B2", "B3")),
1401
+ ) -> tuple[list[dict], int]:
1402
+ out = []
1403
+ n_added_images = 0
1404
+
1405
+ if all(isinstance(x, str) for x in rbg_bands):
1406
+ rbg_bands = (rbg_bands,)
1407
+
1408
+ if isinstance(image_collection, ImageCollection):
1409
+ if mask is not None:
1410
+ image_collection = image_collection.filter(bbox=mask)
1411
+ images: list[Image] = image_collection.images
1412
+ name = None
1413
+ elif isinstance(image_collection, Image):
1414
+ images: list[Image] = [image_collection]
1415
+ name = image_collection.name
1416
+
1417
+ elif isinstance(image_collection, Band):
1418
+ band = image_collection
1419
+
1420
+ if not _intersects_if_not_none_or_empty(mask, band.bounds):
1421
+ return out
1422
+
1423
+ raster_data_dict = {}
1424
+ out.append(raster_data_dict)
1425
+ _single_band_to_arr(band, mask, name, raster_data_dict)
1426
+ return out
1427
+
1428
+ else:
1429
+ raise TypeError(type(image_collection))
1430
+
1431
+ if max(len(out), len(images)) + n_added_images > max_images:
1432
+ warnings.warn(
1433
+ f"Showing only a sample of {max_images}. Set 'max_images.", stacklevel=1
1434
+ )
1435
+ random.shuffle(images)
1436
+
1437
+ while n_added_images < max_images:
1438
+ n_max = min(max_images - n_added_images, len(images))
1439
+ if not n_max:
1440
+ break
1441
+ n_images_was = len(images)
1442
+ these_images = images[:n_max]
1443
+ images = images[n_max:]
1444
+ assert n_images_was == sum([len(these_images), len(images)])
1445
+ with joblib.Parallel(n_max, backend="threading") as parallel:
1446
+ results = parallel(
1447
+ joblib.delayed(_add_one_image)(
1448
+ img, mask, rbg_bands, name, max_nodata_percentage
1449
+ )
1450
+ for img in these_images
1451
+ )
1452
+
1453
+ for x in results:
1454
+ if not x:
1455
+ continue
1456
+ i = 1
1457
+ while x["label"] in {y["label"] for y in out}:
1458
+ x["label"] = x["label"].rstrip(f"_{i}", "") + f"_{i + 1}"
1459
+ i += 1
1460
+
1461
+ n_added_images += 1
1462
+ out.append(x)
1463
+
1464
+ if all(x["date"] for x in out):
1465
+ out = sorted(out, key=lambda x: x["date"])
1466
+ else:
1467
+ out = sorted(out, key=lambda x: x["label"])
1468
+
1469
+ return out
@@ -86,7 +86,7 @@ def explore(
86
86
  smooth_factor: int | float = 1.5,
87
87
  size: int | None = None,
88
88
  max_images: int = 10,
89
- images_to_gdf: bool = False,
89
+ max_nodata_percentage: int = 100,
90
90
  **kwargs,
91
91
  ) -> Explore:
92
92
  """Interactive map of GeoDataFrames with layers that can be toggled on/off.
@@ -116,8 +116,8 @@ def explore(
116
116
  1000.
117
117
  max_images: Maximum number of images (Image, ImageCollection, Band) to show per
118
118
  map. Defaults to 10.
119
- images_to_gdf: If True (not default), images (Image, ImageCollection, Band)
120
- will be converted to GeoDataFrame and added to the map.
119
+ max_nodata_percentage: Maximum percentage nodata values (e.g. clouds) ro allow in
120
+ image arrays.
121
121
  **kwargs: Keyword arguments to pass to geopandas.GeoDataFrame.explore, for
122
122
  instance 'cmap' to change the colors, 'scheme' to change how the data
123
123
  is grouped. This defaults to 'fisherjenkssampled' for numeric data.
@@ -165,6 +165,8 @@ def explore(
165
165
  mask=mask,
166
166
  browser=browser,
167
167
  max_zoom=max_zoom,
168
+ max_images=max_images,
169
+ max_nodata_percentage=max_nodata_percentage,
168
170
  **kwargs,
169
171
  )
170
172
 
@@ -211,46 +213,6 @@ def explore(
211
213
  else:
212
214
  mask = mask4326.to_crs(to_crs)
213
215
 
214
- # else:
215
- # mask_flipped = mask
216
-
217
- # # coords = mask.get_coordinates()
218
- # if (
219
- # (mask_flipped.distance(bounds) > size).all()
220
- # # and coords["x"].max() < 180
221
- # # and coords["y"].max() < 180
222
- # # and coords["x"].min() > -180
223
- # # and coords["y"].min() > -180
224
- # ):
225
- # try:
226
- # bounds4326 = to_gdf(bounds, to_crs).to_crs(4326).geometry.iloc[0]
227
- # except ValueError:
228
- # bounds4326 = to_gdf(bounds, to_crs).set_crs(4326).geometry.iloc[0]
229
-
230
- # mask4326 = mask.set_crs(4326, allow_override=True)
231
-
232
- # if (mask4326.distance(bounds4326) > size).all():
233
- # # try flipping coordinates
234
- # x, y = list(mask4326.geometry.iloc[0].coords)[0]
235
- # mask4326 = to_gdf([y, x], 4326)
236
-
237
- # mask = mask4326
238
-
239
- # # if mask4326.intersects(bounds4326).any():
240
- # # mask = mask4326
241
- # # else:
242
- # # try:
243
- # # mask = mask.to_crs(to_crs)
244
- # # except ValueError:
245
- # # pass
246
- # else:
247
- # mask = mask_flipped
248
-
249
- # try:
250
- # mask = mask.to_crs(to_crs)
251
- # except ValueError:
252
- # pass
253
-
254
216
  if get_geom_type(mask) in ["point", "line"]:
255
217
  mask = mask.buffer(size)
256
218
 
@@ -260,6 +222,8 @@ def explore(
260
222
  mask=mask,
261
223
  browser=browser,
262
224
  max_zoom=max_zoom,
225
+ max_images=max_images,
226
+ max_nodata_percentage=max_nodata_percentage,
263
227
  **kwargs,
264
228
  )
265
229
 
@@ -270,6 +234,7 @@ def explore(
270
234
  max_zoom=max_zoom,
271
235
  smooth_factor=smooth_factor,
272
236
  max_images=max_images,
237
+ max_nodata_percentage=max_nodata_percentage,
273
238
  **kwargs,
274
239
  )
275
240
 
@@ -294,6 +259,7 @@ def samplemap(
294
259
  explore: bool = True,
295
260
  browser: bool = False,
296
261
  max_images: int = 10,
262
+ max_nodata_percentage: int = 100,
297
263
  **kwargs,
298
264
  ) -> Explore:
299
265
  """Shows an interactive map of a random area of GeoDataFrames.
@@ -327,6 +293,8 @@ def samplemap(
327
293
  If True the maps will be opened in a browser folder.
328
294
  max_images: Maximum number of images (Image, ImageCollection, Band) to show per
329
295
  map. Defaults to 10.
296
+ max_nodata_percentage: Maximum percentage nodata values (e.g. clouds) ro allow in
297
+ image arrays.
330
298
  **kwargs: Keyword arguments to pass to geopandas.GeoDataFrame.explore, for
331
299
  instance 'cmap' to change the colors, 'scheme' to change how the data
332
300
  is grouped. This defaults to 'fisherjenkssampled' for numeric data.
@@ -409,6 +377,7 @@ def samplemap(
409
377
  explore=explore,
410
378
  smooth_factor=smooth_factor,
411
379
  max_images=max_images,
380
+ max_nodata_percentage=max_nodata_percentage,
412
381
  **kwargs,
413
382
  )
414
383
 
@@ -422,6 +391,7 @@ def clipmap(
422
391
  smooth_factor: int | float = 1.5,
423
392
  browser: bool = False,
424
393
  max_images: int = 10,
394
+ max_nodata_percentage: int = 100,
425
395
  **kwargs,
426
396
  ) -> Explore | Map:
427
397
  """Shows an interactive map of a of GeoDataFrames clipped to the mask extent.
@@ -450,6 +420,8 @@ def clipmap(
450
420
  If True the maps will be opened in a browser folder.
451
421
  max_images: Maximum number of images (Image, ImageCollection, Band) to show per
452
422
  map. Defaults to 10.
423
+ max_nodata_percentage: Maximum percentage nodata values (e.g. clouds) ro allow in
424
+ image arrays.
453
425
  **kwargs: Keyword arguments to pass to geopandas.GeoDataFrame.explore, for
454
426
  instance 'cmap' to change the colors, 'scheme' to change how the data
455
427
  is grouped. This defaults to 'fisherjenkssampled' for numeric data.
@@ -484,6 +456,7 @@ def clipmap(
484
456
  max_zoom=max_zoom,
485
457
  smooth_factor=smooth_factor,
486
458
  max_images=max_images,
459
+ max_nodata_percentage=max_nodata_percentage,
487
460
  **kwargs,
488
461
  )
489
462
  m.mask = mask
@@ -177,6 +177,90 @@ def _get_child_paths_threaded(data: Sequence[str]) -> set[str]:
177
177
  return set(itertools.chain.from_iterable(all_paths))
178
178
 
179
179
 
180
+ @dataclass
181
+ class PixelwiseResults:
182
+ """Container of pixelwise results to be converted to numpy/geopandas."""
183
+
184
+ row_indices: np.ndarray
185
+ col_indices: np.ndarray
186
+ results: list[Any]
187
+ res: int | tuple[int, int]
188
+ bounds: tuple[float, float, float, float]
189
+ shape: tuple[int, int]
190
+ crs: Any
191
+ nodata: int | float | None
192
+
193
+ def to_tuple(self) -> tuple[int, int, Any]:
194
+ """Return 3-length tuple of row indices, column indices and pixelwise results."""
195
+ return self.row_indices, self.col_indices, self.results
196
+
197
+ def to_dict(self) -> dict[tuple[int, int], Any]:
198
+ """Return dictionary with row and column indices as keys and pixelwise results as values."""
199
+ return {
200
+ (int(row), int(col)): value
201
+ for row, col, value in zip(
202
+ self.row_indices, self.col_indices, self.results, strict=True
203
+ )
204
+ }
205
+
206
+ def to_geopandas(self, column: str = "value") -> GeoDataFrame:
207
+ """Return GeoDataFrame with pixel geometries and values from the pixelwise operation."""
208
+ minx, miny = self.bounds[:2]
209
+ resx, resy = _res_as_tuple(self.res)
210
+
211
+ minxs = np.full(self.row_indices.shape, minx) + (minx * self.row_indices * resx)
212
+ minys = np.full(self.col_indices.shape, miny) + (miny * self.col_indices * resy)
213
+ maxxs = minxs + resx
214
+ maxys = minys + resy
215
+
216
+ return GeoDataFrame(
217
+ {
218
+ column: self.results,
219
+ "geometry": [
220
+ box(minx, miny, maxx, maxy)
221
+ for minx, miny, maxx, maxy in zip(
222
+ minxs, minys, maxxs, maxys, strict=True
223
+ )
224
+ ],
225
+ },
226
+ index=[self.row_indices, self.col_indices],
227
+ crs=self.crs,
228
+ )
229
+
230
+ def to_numpy(self) -> np.ndarray | tuple[np.ndarray, ...]:
231
+ """Reshape pixelwise results to 2d numpy arrays in the shape of the full arrays of the image bands."""
232
+ try:
233
+ n_out_arrays = len(next(iter(self.results)))
234
+ except TypeError:
235
+ n_out_arrays = 1
236
+
237
+ out_arrays = [
238
+ np.full(self.shape, self.nodata).astype(np.float64)
239
+ for _ in range(n_out_arrays)
240
+ ]
241
+
242
+ for row, col, these_results in zip(
243
+ self.row_indices, self.col_indices, self.results, strict=True
244
+ ):
245
+ if these_results is None:
246
+ continue
247
+ for i, arr in enumerate(out_arrays):
248
+ try:
249
+ arr[row, col] = these_results[i]
250
+ except TypeError:
251
+ arr[row, col] = these_results
252
+
253
+ for i, array in enumerate(out_arrays):
254
+ all_are_integers = np.all(np.mod(array, 1) == 0)
255
+ if all_are_integers:
256
+ out_arrays[i] = array.astype(int)
257
+
258
+ if len(out_arrays) == 1:
259
+ return out_arrays[0]
260
+
261
+ return tuple(out_arrays)
262
+
263
+
180
264
  class ImageCollectionGroupBy:
181
265
  """Iterator and merger class returned from groupby.
182
266
 
@@ -573,6 +657,12 @@ class _ImageBandBase(_ImageBase):
573
657
  return self._month
574
658
  return str(self.date).replace("-", "").replace("/", "")[4:6]
575
659
 
660
+ @property
661
+ def day(self) -> str:
662
+ if hasattr(self, "_day") and self._day:
663
+ return self._day
664
+ return str(self.date).replace("-", "").replace("/", "")[6:8]
665
+
576
666
  @property
577
667
  def name(self) -> str | None:
578
668
  if hasattr(self, "_name") and self._name is not None:
@@ -617,19 +707,19 @@ class _ImageBandBase(_ImageBase):
617
707
  return nonmissing_metadata_attributes
618
708
 
619
709
  # read all xml content once
620
- file_contents: list[str] = []
710
+ file_contents: dict[str, str] = {}
621
711
  for path in self._all_file_paths:
622
712
  if ".xml" not in path:
623
713
  continue
624
714
  with _open_func(path, "rb") as file:
625
- file_contents.append(file.read().decode("utf-8"))
715
+ file_contents[path] = file.read().decode("utf-8")
626
716
 
627
717
  def is_last_xml(i: int) -> bool:
628
718
  return i == len(file_contents) - 1
629
719
 
630
720
  for attr, value in missing_metadata_attributes.items():
631
721
  results = None
632
- for i, file_content in enumerate(file_contents):
722
+ for i, file_content in enumerate(file_contents.values()):
633
723
  if isinstance(value, str) and value in dir(self):
634
724
  # method or a hardcoded value
635
725
  value: Callable | Any = getattr(self, value)
@@ -639,7 +729,7 @@ class _ImageBandBase(_ImageBase):
639
729
  results = value(file_content)
640
730
  except _RegexError as e:
641
731
  if is_last_xml(i):
642
- raise e.__class__(self.path, e) from e
732
+ raise e.__class__(self.path, list(file_contents), e) from e
643
733
  continue
644
734
  if results is not None:
645
735
  break
@@ -804,9 +894,7 @@ class Band(_ImageBandBase):
804
894
  )
805
895
  else:
806
896
  self._path = _fix_path(str(data))
807
- if callable(res) and res() is None:
808
- res = None
809
- self._res = res
897
+ self._res = res if not (callable(res) and res() is None) else None
810
898
 
811
899
  if cmap is not None:
812
900
  self.cmap = cmap
@@ -1476,7 +1564,7 @@ class Image(_ImageBandBase):
1476
1564
  f"'data' must be string, Path-like or a sequence of Band. Got {data}"
1477
1565
  )
1478
1566
 
1479
- self._res = res
1567
+ self._res = res if not (callable(res) and res() is None) else None
1480
1568
  self._path = _fix_path(data)
1481
1569
 
1482
1570
  if all_file_paths is None and self.path:
@@ -1490,7 +1578,7 @@ class Image(_ImageBandBase):
1490
1578
 
1491
1579
  if df is None:
1492
1580
  if not self._all_file_paths:
1493
- self._all_file_paths = [self.path]
1581
+ self._all_file_paths = {self.path}
1494
1582
  df = self._create_metadata_df(self._all_file_paths)
1495
1583
 
1496
1584
  df["image_path"] = df["image_path"].astype(str)
@@ -1515,7 +1603,7 @@ class Image(_ImageBandBase):
1515
1603
  if self.metadata:
1516
1604
  try:
1517
1605
  metadata = self.metadata[self.path]
1518
- except KeyError:
1606
+ except KeyError as e:
1519
1607
  metadata = {}
1520
1608
  for key, value in metadata.items():
1521
1609
  if key in dir(self):
@@ -1955,7 +2043,7 @@ class ImageCollection(_ImageBase):
1955
2043
  self.nodata = nodata
1956
2044
  self.level = level
1957
2045
  self.processes = processes
1958
- self._res = res
2046
+ self._res = res if not (callable(res) and res() is None) else None
1959
2047
  self._crs = None
1960
2048
 
1961
2049
  self._df = None
@@ -1980,9 +2068,9 @@ class ImageCollection(_ImageBase):
1980
2068
  ) from e
1981
2069
  raise e
1982
2070
  if self.level:
1983
- self._all_file_paths = [
2071
+ self._all_file_paths = {
1984
2072
  path for path in self._all_file_paths if self.level in path
1985
- ]
2073
+ }
1986
2074
  self._df = self._create_metadata_df(self._all_file_paths)
1987
2075
  return
1988
2076
 
@@ -1994,9 +2082,9 @@ class ImageCollection(_ImageBase):
1994
2082
  self._all_file_paths = _get_all_file_paths(self.path)
1995
2083
 
1996
2084
  if self.level:
1997
- self._all_file_paths = [
2085
+ self._all_file_paths = {
1998
2086
  path for path in self._all_file_paths if self.level in path
1999
- ]
2087
+ }
2000
2088
 
2001
2089
  self._df = self._create_metadata_df(self._all_file_paths)
2002
2090
 
@@ -2079,6 +2167,7 @@ class ImageCollection(_ImageBase):
2079
2167
  kwargs: dict | None = None,
2080
2168
  index_aligned_kwargs: dict | None = None,
2081
2169
  masked: bool = True,
2170
+ processes: int | None = None,
2082
2171
  ) -> np.ndarray | tuple[np.ndarray] | None:
2083
2172
  """Run a function for each pixel.
2084
2173
 
@@ -2108,13 +2197,23 @@ class ImageCollection(_ImageBase):
2108
2197
  else:
2109
2198
  mask_array = None
2110
2199
 
2111
- return pixelwise(
2200
+ nonmissing_row_indices, nonmissing_col_indices, results = pixelwise(
2112
2201
  func=func,
2113
2202
  values=values,
2114
2203
  mask_array=mask_array,
2115
2204
  index_aligned_kwargs=index_aligned_kwargs,
2116
2205
  kwargs=kwargs,
2117
- processes=self.processes,
2206
+ processes=processes or self.processes,
2207
+ )
2208
+
2209
+ return PixelwiseResults(
2210
+ nonmissing_row_indices,
2211
+ nonmissing_col_indices,
2212
+ results,
2213
+ shape=values.shape[1:],
2214
+ res=self.res,
2215
+ bounds=self.bounds,
2216
+ crs=self.crs,
2118
2217
  nodata=self.nodata or np.nan,
2119
2218
  )
2120
2219
 
@@ -2552,15 +2651,9 @@ class ImageCollection(_ImageBase):
2552
2651
 
2553
2652
  other = to_shapely(other)
2554
2653
 
2555
- if self.processes == 1:
2556
- intersects_list: pd.Series = GeoSeries(
2557
- [img.union_all() for img in self]
2558
- ).intersects(other)
2559
- else:
2560
- with joblib.Parallel(n_jobs=self.processes, backend="loky") as parallel:
2561
- intersects_list: list[bool] = parallel(
2562
- joblib.delayed(_intesects)(image, other) for image in self
2563
- )
2654
+ intersects_list: pd.Series = GeoSeries(
2655
+ [img.union_all() for img in self]
2656
+ ).intersects(other)
2564
2657
 
2565
2658
  self.images = [
2566
2659
  image
@@ -2990,21 +3083,23 @@ class Sentinel2Config:
2990
3083
  xml_file,
2991
3084
  )
2992
3085
  if match_ is None:
2993
- raise _RegexError()
3086
+ return None
2994
3087
 
2995
3088
  if "NOT_REFINED" in match_.group(0):
2996
3089
  return False
2997
3090
  elif "REFINED" in match_.group(0):
2998
3091
  return True
2999
3092
  else:
3000
- raise _RegexError()
3093
+ raise _RegexError(xml_file)
3001
3094
 
3002
3095
  def _get_boa_quantification_value(self, xml_file: str) -> int:
3003
3096
  return int(
3004
3097
  _extract_regex_match_from_string(
3005
3098
  xml_file,
3006
3099
  (
3007
- r'<BOA_QUANTIFICATION_VALUE unit="none">-?(\d+)</BOA_QUANTIFICATION_VALUE>',
3100
+ r'<BOA_QUANTIFICATION_VALUE unit="none">(\d+)</BOA_QUANTIFICATION_VALUE>',
3101
+ # r'<BOA_QUANTIFICATION_VALUE unit="none">-?(\d+)</BOA_QUANTIFICATION_VALUE>',
3102
+ r'<QUANTIFICATION_VALUE unit="none">?(\d+)</QUANTIFICATION_VALUE>',
3008
3103
  ),
3009
3104
  )
3010
3105
  )
@@ -3424,10 +3519,6 @@ def _band_apply(band: Band, func: Callable, **kwargs) -> Band:
3424
3519
  return band.apply(func, **kwargs)
3425
3520
 
3426
3521
 
3427
- def _clip_band(band: Band, mask, **kwargs) -> Band:
3428
- return band.clip(mask, **kwargs)
3429
-
3430
-
3431
3522
  def _merge_by_band(collection: ImageCollection, **kwargs) -> Image:
3432
3523
  return collection.merge_by_band(**kwargs)
3433
3524
 
@@ -3553,26 +3644,25 @@ def pixelwise(
3553
3644
  index_aligned_kwargs: dict | None = None,
3554
3645
  kwargs: dict | None = None,
3555
3646
  processes: int = 1,
3556
- nodata=np.nan,
3557
- ) -> Any:
3647
+ ) -> tuple[np.ndarray, np.ndarray, list[Any]]:
3558
3648
  """Run a function for each pixel of a 3d array."""
3559
3649
  index_aligned_kwargs = index_aligned_kwargs or {}
3560
3650
  kwargs = kwargs or {}
3561
3651
 
3562
3652
  if mask_array is not None:
3653
+ # skip pixels where all values are masked
3563
3654
  not_all_missing = np.all(mask_array, axis=0) == False
3564
-
3565
3655
  else:
3566
3656
  mask_array = np.full(values.shape, False)
3567
3657
  not_all_missing = np.full(values.shape[1:], True)
3568
3658
 
3569
- nonmissing_row_indices, nonmissing_col_indices = not_all_missing.nonzero()
3570
-
3571
3659
  def select_pixel_values(row: int, col: int) -> np.ndarray:
3572
3660
  return values[~mask_array[:, row, col], row, col]
3573
3661
 
3662
+ # loop through long 1d arrays of aligned row and col indices
3663
+ nonmissing_row_indices, nonmissing_col_indices = not_all_missing.nonzero()
3574
3664
  with joblib.Parallel(n_jobs=processes, backend="loky") as parallel:
3575
- results: list[tuple[np.float64, np.float64]] = parallel(
3665
+ results: list[Any] = parallel(
3576
3666
  joblib.delayed(func)(
3577
3667
  select_pixel_values(row, col),
3578
3668
  **kwargs,
@@ -3586,31 +3676,4 @@ def pixelwise(
3586
3676
  )
3587
3677
  )
3588
3678
 
3589
- if all(x is None for x in results):
3590
- return
3591
-
3592
- try:
3593
- n_out_arrays = len(next(iter(results)))
3594
- except TypeError:
3595
- n_out_arrays = 1
3596
-
3597
- out_arrays = tuple(np.full(values.shape[1:], nodata) for _ in range(n_out_arrays))
3598
-
3599
- counter = 0
3600
- for row, col in zip(nonmissing_row_indices, nonmissing_col_indices, strict=True):
3601
- these_results = results[counter]
3602
- if these_results is None:
3603
- counter += 1
3604
- continue
3605
- for i, arr in enumerate(out_arrays):
3606
- try:
3607
- arr[row, col] = these_results[i]
3608
- except TypeError:
3609
- arr[row, col] = these_results
3610
- counter += 1
3611
- assert counter == len(results), (counter, len(results))
3612
-
3613
- if len(out_arrays) == 1:
3614
- return out_arrays[0]
3615
-
3616
- return out_arrays
3679
+ return nonmissing_row_indices, nonmissing_col_indices, results
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes