ssb-sgis 1.2.1__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sgis/maps/map.py CHANGED
@@ -107,7 +107,7 @@ class Map:
107
107
  k: int = 5,
108
108
  bins: tuple[float] | None = None,
109
109
  nan_label: str = "Missing",
110
- nan_color="#c2c2c2",
110
+ nan_color: str | None = None,
111
111
  scheme: str = DEFAULT_SCHEME,
112
112
  cmap: str | None = None,
113
113
  categorical: bool | None = None,
@@ -137,44 +137,41 @@ class Map:
137
137
  self._k = k
138
138
  self.nan_label = nan_label
139
139
  self.nan_color = nan_color
140
+ self._nan_color_was_none = nan_color is None
140
141
  self._cmap = cmap
141
142
  self.scheme = scheme
143
+ self._categories_colors_dict = {}
144
+ self._more_data = {}
142
145
 
143
146
  # need to get the object names of the gdfs before copying. Only getting,
144
147
  # not setting, labels. So the original gdfs don't get the label column.
145
- self.labels: list[str] = [
146
- _determine_best_name(gdf, column, i) for i, gdf in enumerate(gdfs)
147
- ]
148
+ self._gdfs: dict[str, GeoDataFrame] = {
149
+ _determine_best_name(gdf, column, i): gdf for i, gdf in enumerate(gdfs)
150
+ }
148
151
 
149
152
  show = kwargs.pop("show", True)
150
153
  if isinstance(show, (int, bool)):
151
- show_temp = [bool(show) for _ in range(len(gdfs))]
154
+ self.show = {label: bool(show) for label in self._gdfs}
152
155
  elif not hasattr(show, "__iter__"):
153
156
  raise ValueError(
154
157
  "'show' must be boolean or an iterable of boleans same "
155
158
  f"length as gdfs ({len(gdfs)}). Got len {len(show)}"
156
159
  )
157
160
  else:
158
- show_temp = show
161
+ self.show = {label: bool(show) for label in self._gdfs}
159
162
 
160
- show_args = show_temp[: len(gdfs)]
163
+ show_args = list(self.show.values())[: len(gdfs)]
161
164
  # gdfs that are in kwargs
162
- show_kwargs = show_temp[len(gdfs) :]
163
- self._gdfs = []
164
- new_labels = []
165
- self.show = []
166
- for label, gdf, show in zip(self.labels, gdfs, show_args, strict=False):
165
+ show_kwargs = list(self.show.values())[len(gdfs) :]
166
+ for (label, gdf), show in zip(self._gdfs.items(), show_args, strict=False):
167
167
  if not len(gdf):
168
168
  continue
169
169
 
170
- gdf = clean_geoms(gdf).reset_index(drop=True)
170
+ gdf = clean_geoms(to_gdf(gdf)).reset_index(drop=True)
171
171
  if not len(gdf):
172
172
  continue
173
-
174
- self._gdfs.append(to_gdf(gdf))
175
- new_labels.append(label)
176
- self.show.append(show)
177
- self.labels = new_labels
173
+ self._gdfs[label] = gdf
174
+ self.show[label] = show
178
175
 
179
176
  # pop all geometry-like items from kwargs into self._gdfs
180
177
  i = 0
@@ -184,14 +181,13 @@ class Map:
184
181
  value = to_gdf(value)
185
182
  if not len(value):
186
183
  continue
187
- self._gdfs.append(to_gdf(value))
188
- self.labels.append(key)
184
+ self._gdfs[key] = to_gdf(value)
189
185
  try:
190
186
  show = show_kwargs[i]
191
187
  i += 1
192
188
  except IndexError:
193
189
  pass
194
- self.show.append(show)
190
+ self.show[key] = show
195
191
  except Exception:
196
192
  self.kwargs[key] = value
197
193
 
@@ -204,43 +200,92 @@ class Map:
204
200
  if categorical is not None:
205
201
  self._is_categorical = categorical
206
202
 
207
- if not self._gdfs or not any(len(gdf) for gdf in self._gdfs):
208
- self._gdfs = []
203
+ if not self._gdfs or not any(len(gdf) for gdf in self._gdfs.values()):
204
+ self._gdfs = {}
205
+ self._gdf = self._get_gdf_template()
209
206
  if categorical is None:
210
207
  self._is_categorical = True
211
208
  self._unique_values = []
212
- self._nan_idx = []
209
+ self._column = "label"
213
210
  return
214
211
 
215
- if not self.labels:
216
- self._set_labels()
217
-
218
212
  self._gdfs = self._to_common_crs_and_one_geom_col(self._gdfs)
219
213
  if categorical is None:
220
214
  self._is_categorical = self._check_if_categorical()
221
215
 
216
+ for label, gdf in self._gdfs.items():
217
+ gdf["label"] = label
218
+ self._gdfs[label] = gdf
219
+
222
220
  if self._column:
223
221
  self._fillna_if_col_is_missing()
224
222
  else:
225
- gdfs = []
226
- for gdf, label in zip(self._gdfs, self.labels, strict=True):
227
- gdf["label"] = label
228
- gdfs.append(gdf)
229
223
  self._column = "label"
230
- self._gdfs = gdfs
231
224
 
232
225
  try:
233
- self._gdf = pd.concat(self._gdfs, ignore_index=True)
226
+ self._gdf = pd.concat(self._gdfs.values(), ignore_index=True)
234
227
  except ValueError:
235
- crs = get_common_crs(self._gdfs)
236
- for gdf in self._gdfs:
228
+ crs = get_common_crs(self._gdfs.values())
229
+ for gdf in self._gdfs.values():
237
230
  gdf.crs = crs
238
231
  self._gdf = pd.concat(self._gdfs, ignore_index=True)
239
232
 
240
- self._nan_idx = self._gdf[self._column].isna()
241
233
  self._to_categorical()
242
234
  self._get_unique_values()
243
235
 
236
+ def _get_gdf_template(self):
237
+ return GeoDataFrame(
238
+ {
239
+ "geometry": [],
240
+ "label": [],
241
+ "color": [],
242
+ }
243
+ | ({self._column: []} if self._column is not None else {})
244
+ )
245
+
246
+ def _prepare_continous_map(self) -> None:
247
+ """Create bins if not already done and adjust k if needed."""
248
+ if self.scheme is None:
249
+ return
250
+
251
+ if self.bins is None:
252
+ self.bins = self._create_bins(self._gdf, self._column)
253
+ if len(self.bins) <= self._k and len(self.bins) != len(self._unique_values):
254
+ self._k = len(self.bins)
255
+ elif not all(self._gdf[self._column].isna()):
256
+ self.bins = self._add_minmax_to_bins(self.bins)
257
+ if len(self._unique_values) <= len(self.bins):
258
+ self._k = len(self.bins) # - 1
259
+ else:
260
+ self._unique_values = self.nan_label
261
+ self._k = 1
262
+
263
+ def _prepare_categorical_plot(self):
264
+ """Map values to colors."""
265
+ self._make_categories_colors_dict()
266
+ if self._gdf is not None and len(self._gdf):
267
+ self._fix_nans()
268
+
269
+ def _map(value, label):
270
+ try:
271
+ return self._categories_colors_dict[value]
272
+ except KeyError as e:
273
+ if label in self._categories_colors_dict:
274
+ return self._categories_colors_dict[label]
275
+ if not pd.isna(value):
276
+ raise e
277
+ return self.nan_color
278
+
279
+ for label, gdf in self._gdfs.items():
280
+ gdf["color"] = [_map(value, label) for value in gdf[self._column]]
281
+ self._gdfs[label] = gdf
282
+ self._gdf["color"] = [
283
+ _map(value, label)
284
+ for value, label in zip(
285
+ self._gdf[self._column], self._gdf["label"], strict=False
286
+ )
287
+ ]
288
+
244
289
  def _to_categorical(self):
245
290
  if not (self._is_categorical and self.column is not None):
246
291
  return
@@ -288,10 +333,10 @@ class Map:
288
333
  Because floats don't always equal each other. This will make very
289
334
  similar values count as the same value in the color classification.
290
335
  """
291
- array = self._gdf.loc[list(~self._nan_idx), self._column]
336
+ array = self._gdf[self._column].dropna()
292
337
  self._min = np.min(array)
293
338
  self._max = np.max(array)
294
- self._get_multiplier(array)
339
+ self._get_multiplier(array.astype(np.float64))
295
340
 
296
341
  unique = array.reset_index(drop=True).drop_duplicates()
297
342
  as_int = self._array_to_large_int(unique)
@@ -318,7 +363,7 @@ class Map:
318
363
 
319
364
  Adding this as an attribute to use later in _classify_from_bins.
320
365
  """
321
- if np.max(array) == 0:
366
+ if not len(array) or np.max(array) == 0:
322
367
  self._multiplier: int = 1
323
368
  return
324
369
 
@@ -341,53 +386,23 @@ class Map:
341
386
  # make sure they are lists
342
387
  bins = [bin_ for bin_ in bins]
343
388
 
344
- if min(bins) > 0 and min(
345
- self._gdf.loc[list(~self._nan_idx), self._column]
346
- ) < min(bins):
347
- num = min(self._gdf.loc[list(~self._nan_idx), self._column])
348
- # if isinstance(num, float):
349
- # num -= (
350
- # float(f"1e-{abs(self.legend.rounding)}")
351
- # if self.legend and self.legend.rounding
352
- # else 0
353
- # )
389
+ if min(bins) > 0 and (self._gdf[self._column].dropna().min()) < min(bins):
390
+ num = self._gdf[self._column].dropna().min()
354
391
  bins = [num] + bins
355
392
 
356
- if min(bins) < 0 and min(
357
- self._gdf.loc[list(~self._nan_idx), self._column]
358
- ) < min(bins):
359
- num = min(self._gdf.loc[list(~self._nan_idx), self._column])
360
- # if isinstance(num, float):
361
- # num -= (
362
- # float(f"1e-{abs(self.legend.rounding)}")
363
- # if self.legend and self.legend.rounding
364
- # else 0
365
- # )
393
+ if min(bins) < 0 and (self._gdf[self._column].dropna().min()) < min(bins):
394
+ num = self._gdf[self._column].dropna().min()
395
+
366
396
  bins = [num] + bins
367
397
 
368
- if max(bins) > 0 and max(
369
- self._gdf.loc[self._gdf[self._column].notna(), self._column]
370
- ) > max(bins):
371
- num = max(self._gdf.loc[self._gdf[self._column].notna(), self._column])
372
- # if isinstance(num, float):
373
- # num += (
374
- # float(f"1e-{abs(self.legend.rounding)}")
375
- # if self.legend and self.legend.rounding
376
- # else 0
377
- # )
398
+ if max(bins) > 0 and (self._gdf[self._column].dropna().max()) > max(bins):
399
+ num = self._gdf[self._column].dropna().max()
378
400
  bins = bins + [num]
379
401
 
380
402
  if max(bins) < 0 and max(
381
403
  self._gdf.loc[self._gdf[self._column].notna(), self._column]
382
404
  ) < max(bins):
383
405
  num = max(self._gdf.loc[self._gdf[self._column].notna(), self._column])
384
- # if isinstance(num, float):
385
- # num += (
386
- # float(f"1e-{abs(self.legend.rounding)}")
387
- # if self.legend and self.legend.rounding
388
- # else 0
389
- # )
390
-
391
406
  bins = bins + [num]
392
407
 
393
408
  def adjust_bin(num: int | float, i: int) -> int | float:
@@ -494,40 +509,14 @@ class Map:
494
509
 
495
510
  return gdfs, column, kwargs
496
511
 
497
- def _prepare_continous_map(self) -> None:
498
- """Create bins if not already done and adjust k if needed."""
499
- if self.scheme is None:
500
- return
501
-
502
- if self.bins is None:
503
- self.bins = self._create_bins(self._gdf, self._column)
504
- if len(self.bins) <= self._k and len(self.bins) != len(self._unique_values):
505
- self._k = len(self.bins)
506
- elif not all(self._gdf[self._column].isna()):
507
- self.bins = self._add_minmax_to_bins(self.bins)
508
- if len(self._unique_values) <= len(self.bins):
509
- self._k = len(self.bins) # - 1
510
- else:
511
- self._unique_values = self.nan_label
512
- self._k = 1
513
-
514
- def _set_labels(self) -> None:
515
- """Setting the labels after copying the gdfs."""
516
- gdfs = []
517
- for i, gdf in enumerate(self._gdfs):
518
- gdf["label"] = self.labels[i]
519
- gdfs.append(gdf)
520
- self._gdfs = gdfs
521
-
522
512
  def _to_common_crs_and_one_geom_col(
523
- self, gdfs: list[GeoDataFrame]
524
- ) -> list[GeoDataFrame]:
513
+ self, gdfs: dict[str, GeoDataFrame]
514
+ ) -> dict[str, GeoDataFrame]:
525
515
  """Need common crs and max one geometry column."""
526
- crs_list = list({gdf.crs for gdf in gdfs if gdf.crs is not None})
516
+ crs_list = list({gdf.crs for gdf in gdfs.values() if gdf.crs is not None})
527
517
  if crs_list:
528
518
  self.crs = crs_list[0]
529
- new_gdfs = []
530
- for gdf in gdfs:
519
+ for label, gdf in gdfs.items():
531
520
  gdf = gdf.reset_index(drop=True)
532
521
  gdf = drop_inactive_geometry_columns(gdf).pipe(_rename_geometry_if)
533
522
  if crs_list:
@@ -535,17 +524,18 @@ class Map:
535
524
  gdf = gdf.to_crs(self.crs)
536
525
  except ValueError:
537
526
  gdf = gdf.set_crs(self.crs)
538
- new_gdfs.append(gdf)
539
- return new_gdfs
527
+ gdfs[label] = gdf
528
+ return gdfs
540
529
 
541
530
  def _fillna_if_col_is_missing(self) -> None:
542
531
  n = 0
543
- for gdf in self._gdfs:
532
+ for label, gdf in self._gdfs.items():
544
533
  if self._column in gdf.columns:
545
534
  gdf[self._column] = gdf[self._column].fillna(pd.NA)
546
535
  n += 1
547
536
  else:
548
537
  gdf[self._column] = pd.NA
538
+ self._gdfs[label] = gdf
549
539
 
550
540
  maybe_area = 1 if "area" in self._column else 0
551
541
  maybe_length = (
@@ -576,7 +566,7 @@ class Map:
576
566
 
577
567
  all_nan = 0
578
568
  col_not_present = 0
579
- for gdf in self._gdfs:
569
+ for gdf in self._gdfs.values():
580
570
  if self._column not in gdf:
581
571
  if maybe_area_km2 and unit_is_meters(gdf):
582
572
  gdf["area_km2"] = gdf.area / 1_000_000
@@ -620,34 +610,33 @@ class Map:
620
610
  self._categories_colors_dict = {
621
611
  category: _CATEGORICAL_CMAP[i]
622
612
  for i, category in enumerate(self._unique_values)
623
- }
613
+ } | self._categories_colors_dict
624
614
  elif self._cmap:
625
615
  cmap = matplotlib.colormaps.get_cmap(self._cmap)
626
616
 
627
617
  self._categories_colors_dict = {
628
618
  category: colors.to_hex(cmap(int(i)))
629
619
  for i, category in enumerate(self._unique_values)
630
- }
620
+ } | self._categories_colors_dict
631
621
  else:
632
622
  cmap = matplotlib.colormaps.get_cmap("tab20")
633
623
 
634
624
  self._categories_colors_dict = {
635
625
  category: colors.to_hex(cmap(int(i)))
636
626
  for i, category in enumerate(self._unique_values)
637
- }
627
+ } | self._categories_colors_dict
638
628
 
639
629
  def _fix_nans(self) -> None:
640
- if any(self._nan_idx):
641
- self._gdf[self._column] = self._gdf[self._column].fillna(self.nan_label)
642
- self._categories_colors_dict[self.nan_label] = self.nan_color
643
-
644
- new_gdfs = []
645
- for gdf in self._gdfs:
646
- gdf["color"] = gdf[self._column].map(self._categories_colors_dict)
647
- new_gdfs.append(gdf)
648
- self._gdfs = new_gdfs
630
+ def hasnans(df: GeoDataFrame, label: str) -> bool:
631
+ if label in self._categories_colors_dict:
632
+ return False
633
+ return df[self._column].isna().any()
649
634
 
650
- self._gdf["color"] = self._gdf[self._column].map(self._categories_colors_dict)
635
+ if any(hasnans(df, label) for label, df in self._gdfs.items()):
636
+ self._gdf[self._column] = self._gdf[self._column].fillna(self.nan_label)
637
+ self._categories_colors_dict[self.nan_label] = (
638
+ self._categories_colors_dict.get(self.nan_label, self.nan_color)
639
+ )
651
640
 
652
641
  def _create_bins(self, gdf: GeoDataFrame, column: str) -> np.ndarray:
653
642
  """Make bin list of length k + 1, or length of unique values.
@@ -657,7 +646,7 @@ class Map:
657
646
  If 'scheme' is not specified, the jenks_breaks function is used, which is
658
647
  much faster than the one from Mapclassifier.
659
648
  """
660
- if not len(gdf.loc[list(~self._nan_idx), column]):
649
+ if not gdf[column].notna().any():
661
650
  return np.array([0])
662
651
 
663
652
  n_classes = (
@@ -673,12 +662,10 @@ class Map:
673
662
  n_classes = len(self._unique_values)
674
663
 
675
664
  if self.scheme == "jenks":
676
- bins = jenks_breaks(
677
- gdf.loc[list(~self._nan_idx), column], n_classes=n_classes
678
- )
665
+ bins = jenks_breaks(gdf[column].dropna(), n_classes=n_classes)
679
666
  else:
680
667
  binning = classify(
681
- np.asarray(gdf.loc[list(~self._nan_idx), column]),
668
+ np.asarray(gdf[column].dropna()),
682
669
  scheme=self.scheme,
683
670
  # k=self._k,
684
671
  k=n_classes,
@@ -720,10 +707,9 @@ class Map:
720
707
  cmap = matplotlib.colormaps.get_cmap(self._cmap)
721
708
  colors_ = [
722
709
  colors.to_hex(cmap(int(i)))
723
- # for i in np.linspace(self.cmap_start, self.cmap_stop, num=self._k)
724
710
  for i in np.linspace(self.cmap_start, self.cmap_stop, num=n)
725
711
  ]
726
- if any(self._nan_idx):
712
+ if self._gdf[self._column].isna().any():
727
713
  colors_ = colors_ + [self.nan_color]
728
714
  return np.array(colors_)
729
715
 
@@ -774,7 +760,7 @@ class Map:
774
760
 
775
761
  So from e.g. [0,2,4] to [0,1,2].
776
762
 
777
- Otherwise, will get index error when classifying colors.
763
+ Otherwise, we will get index error when classifying colors.
778
764
  """
779
765
  rank_dict = {val: rank for rank, val in enumerate(np.unique(classified))}
780
766
 
sgis/maps/maps.py CHANGED
@@ -479,7 +479,7 @@ def clipmap(
479
479
  if m.gdfs is None and not len(m.rasters):
480
480
  return m
481
481
 
482
- m._gdfs = [gdf.clip(mask) for gdf in m._gdfs]
482
+ m._gdfs = {label: gdf.clip(mask) for label, gdf in m._gdfs.items()}
483
483
  m._gdf = m._gdf.clip(mask)
484
484
  m._nan_idx = m._gdf[m._column].isna()
485
485
  m._get_unique_values()
@@ -494,7 +494,7 @@ def clipmap(
494
494
  if m.gdfs is None:
495
495
  return m
496
496
 
497
- m._gdfs = [gdf.clip(mask) for gdf in m._gdfs]
497
+ m._gdfs = {label: gdf.clip(mask) for label, gdf in m._gdfs.items()}
498
498
  m._gdf = m._gdf.clip(mask)
499
499
  m._nan_idx = m._gdf[m._column].isna()
500
500
  m._get_unique_values()