yirgacheffe 1.9.2__py3-none-any.whl → 1.9.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yirgacheffe might be problematic. Click here for more details.
- yirgacheffe/__init__.py +5 -0
- yirgacheffe/_core.py +6 -6
- yirgacheffe/_operators.py +94 -27
- yirgacheffe/operators.py +1 -1
- yirgacheffe/rounding.py +2 -1
- yirgacheffe-1.9.4.dist-info/METADATA +149 -0
- {yirgacheffe-1.9.2.dist-info → yirgacheffe-1.9.4.dist-info}/RECORD +11 -11
- yirgacheffe-1.9.2.dist-info/METADATA +0 -600
- {yirgacheffe-1.9.2.dist-info → yirgacheffe-1.9.4.dist-info}/WHEEL +0 -0
- {yirgacheffe-1.9.2.dist-info → yirgacheffe-1.9.4.dist-info}/entry_points.txt +0 -0
- {yirgacheffe-1.9.2.dist-info → yirgacheffe-1.9.4.dist-info}/licenses/LICENSE +0 -0
- {yirgacheffe-1.9.2.dist-info → yirgacheffe-1.9.4.dist-info}/top_level.txt +0 -0
yirgacheffe/__init__.py
CHANGED
|
@@ -12,9 +12,14 @@ except ModuleNotFoundError:
|
|
|
12
12
|
pyproject_data = tomllib.load(f)
|
|
13
13
|
__version__ = pyproject_data["project"]["version"]
|
|
14
14
|
|
|
15
|
+
from .layers import YirgacheffeLayer
|
|
15
16
|
from ._core import read_raster, read_rasters, read_shape, read_shape_like, constant, read_narrow_raster
|
|
16
17
|
from .constants import WGS_84_PROJECTION
|
|
17
18
|
from .window import Area, MapProjection, Window
|
|
18
19
|
from ._backends.enumeration import dtype as DataType
|
|
19
20
|
|
|
21
|
+
from ._operators import where, minimum, maximum, clip, log, log2, log10, exp, exp2, nan_to_num, isin, \
|
|
22
|
+
floor, ceil # pylint: disable=W0611
|
|
23
|
+
from ._operators import abs, round # pylint: disable=W0611,W0622
|
|
24
|
+
|
|
20
25
|
gdal.UseExceptions()
|
yirgacheffe/_core.py
CHANGED
|
@@ -16,7 +16,7 @@ def read_raster(
|
|
|
16
16
|
filename: Path | str,
|
|
17
17
|
band: int = 1,
|
|
18
18
|
ignore_nodata: bool = False,
|
|
19
|
-
) ->
|
|
19
|
+
) -> YirgacheffeLayer:
|
|
20
20
|
"""Open a raster file (e.g., GeoTIFF).
|
|
21
21
|
|
|
22
22
|
Args:
|
|
@@ -38,7 +38,7 @@ def read_narrow_raster(
|
|
|
38
38
|
filename: Path | str,
|
|
39
39
|
band: int = 1,
|
|
40
40
|
ignore_nodata: bool = False,
|
|
41
|
-
) ->
|
|
41
|
+
) -> YirgacheffeLayer:
|
|
42
42
|
"""Open a 1 pixel wide raster file as a global raster.
|
|
43
43
|
|
|
44
44
|
This exists for the special use case where an area per pixel raster would have the same value per horizontal row
|
|
@@ -58,7 +58,7 @@ def read_narrow_raster(
|
|
|
58
58
|
def read_rasters(
|
|
59
59
|
filenames : Sequence[Path | str],
|
|
60
60
|
tiled: bool=False
|
|
61
|
-
) ->
|
|
61
|
+
) -> YirgacheffeLayer:
|
|
62
62
|
"""Open a set of raster files (e.g., GeoTIFFs) as a single layer.
|
|
63
63
|
|
|
64
64
|
Args:
|
|
@@ -86,7 +86,7 @@ def read_shape(
|
|
|
86
86
|
where_filter: str | None = None,
|
|
87
87
|
datatype: DataType | None = None,
|
|
88
88
|
burn_value: int | float | str = 1,
|
|
89
|
-
) ->
|
|
89
|
+
) -> YirgacheffeLayer:
|
|
90
90
|
"""Open a polygon file (e.g., GeoJSON, GPKG, or ESRI Shape File).
|
|
91
91
|
|
|
92
92
|
Args:
|
|
@@ -124,7 +124,7 @@ def read_shape_like(
|
|
|
124
124
|
where_filter: str | None = None,
|
|
125
125
|
datatype: DataType | None = None,
|
|
126
126
|
burn_value: int | float | str = 1,
|
|
127
|
-
) ->
|
|
127
|
+
) -> YirgacheffeLayer:
|
|
128
128
|
"""Open a polygon file (e.g., GeoJSON, GPKG, or ESRI Shape File).
|
|
129
129
|
|
|
130
130
|
Args:
|
|
@@ -146,7 +146,7 @@ def read_shape_like(
|
|
|
146
146
|
burn_value,
|
|
147
147
|
)
|
|
148
148
|
|
|
149
|
-
def constant(value: int | float) ->
|
|
149
|
+
def constant(value: int | float) -> YirgacheffeLayer:
|
|
150
150
|
"""Generate a layer that has the same value in all pixels regardless of scale, projection, and area.
|
|
151
151
|
|
|
152
152
|
Generally this should not be necessary unless you must have the constant as the first term in an
|
yirgacheffe/_operators.py
CHANGED
|
@@ -10,12 +10,12 @@ import sys
|
|
|
10
10
|
import tempfile
|
|
11
11
|
import time
|
|
12
12
|
import types
|
|
13
|
+
from collections.abc import Callable
|
|
13
14
|
from contextlib import ExitStack
|
|
14
15
|
from enum import Enum
|
|
15
16
|
from multiprocessing import Semaphore, Process
|
|
16
17
|
from multiprocessing.managers import SharedMemoryManager
|
|
17
18
|
from pathlib import Path
|
|
18
|
-
from typing import Callable
|
|
19
19
|
|
|
20
20
|
import deprecation
|
|
21
21
|
import numpy as np
|
|
@@ -376,6 +376,7 @@ class LayerMathMixin:
|
|
|
376
376
|
)
|
|
377
377
|
if max_pixels:
|
|
378
378
|
downsample = max(window.xsize, window.ysize) // max_pixels
|
|
379
|
+
downsample = max(downsample, 1)
|
|
379
380
|
data = raw_data[::downsample,::downsample]
|
|
380
381
|
else:
|
|
381
382
|
data = raw_data
|
|
@@ -392,31 +393,34 @@ class LayerMathMixin:
|
|
|
392
393
|
class LayerOperation(LayerMathMixin):
|
|
393
394
|
|
|
394
395
|
@staticmethod
|
|
396
|
+
@deprecation.deprecated(
|
|
397
|
+
deprecated_in="1.10",
|
|
398
|
+
removed_in="2.0",
|
|
399
|
+
current_version=__version__,
|
|
400
|
+
details="Use from top level module instead."
|
|
401
|
+
)
|
|
395
402
|
def where(cond, a, b):
|
|
396
|
-
return
|
|
397
|
-
cond,
|
|
398
|
-
op.WHERE,
|
|
399
|
-
rhs=a,
|
|
400
|
-
other=b
|
|
401
|
-
)
|
|
403
|
+
return where(cond, a, b)
|
|
402
404
|
|
|
403
405
|
@staticmethod
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
406
|
+
@deprecation.deprecated(
|
|
407
|
+
deprecated_in="1.10",
|
|
408
|
+
removed_in="2.0",
|
|
409
|
+
current_version=__version__,
|
|
410
|
+
details="Use from top level module instead."
|
|
411
|
+
)
|
|
412
|
+
def minimum(a, b):
|
|
413
|
+
return minimum(a, b)
|
|
411
414
|
|
|
412
415
|
@staticmethod
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
416
|
+
@deprecation.deprecated(
|
|
417
|
+
deprecated_in="1.10",
|
|
418
|
+
removed_in="2.0",
|
|
419
|
+
current_version=__version__,
|
|
420
|
+
details="Use from top level module instead."
|
|
421
|
+
)
|
|
422
|
+
def maximum(a, b):
|
|
423
|
+
return maximum(a, b)
|
|
420
424
|
|
|
421
425
|
def __init__(
|
|
422
426
|
self,
|
|
@@ -998,7 +1002,8 @@ class LayerOperation(LayerMathMixin):
|
|
|
998
1002
|
self,
|
|
999
1003
|
filename: Path | str,
|
|
1000
1004
|
and_sum: bool = False,
|
|
1001
|
-
parallelism: int | bool | None = None
|
|
1005
|
+
parallelism: int | bool | None = None,
|
|
1006
|
+
callback: Callable[[float], None] | None = None,
|
|
1002
1007
|
) -> float | None:
|
|
1003
1008
|
"""Saves a calculation to a raster file, optionally also returning the sum of pixels.
|
|
1004
1009
|
|
|
@@ -1008,6 +1013,8 @@ class LayerOperation(LayerMathMixin):
|
|
|
1008
1013
|
that value.
|
|
1009
1014
|
parallelism: If passed, attempt to use multiple CPU cores up to the number provided, or if set to True,
|
|
1010
1015
|
yirgacheffe will pick a sensible value.
|
|
1016
|
+
callback: If passed, this callback will be called periodically with a progress update for the saving,
|
|
1017
|
+
with a value between 0.0 and 1.0.
|
|
1011
1018
|
|
|
1012
1019
|
Returns:
|
|
1013
1020
|
Either returns None, or the sum of the pixels in the resulting raster if `and_sum` was specified.
|
|
@@ -1025,12 +1032,12 @@ class LayerOperation(LayerMathMixin):
|
|
|
1025
1032
|
from yirgacheffe.layers.rasters import RasterLayer # type: ignore # pylint: disable=C0415
|
|
1026
1033
|
with RasterLayer.empty_raster_layer_like(self, filename=tempory_file.name) as layer:
|
|
1027
1034
|
if parallelism is None:
|
|
1028
|
-
result = self.save(layer, and_sum=and_sum)
|
|
1035
|
+
result = self.save(layer, and_sum=and_sum, callback=callback)
|
|
1029
1036
|
else:
|
|
1030
1037
|
if isinstance(parallelism, bool):
|
|
1031
1038
|
# Parallel save treats None as "work it out"
|
|
1032
1039
|
parallelism = None
|
|
1033
|
-
result = self.parallel_save(layer, and_sum=and_sum, parallelism=parallelism)
|
|
1040
|
+
result = self.parallel_save(layer, and_sum=and_sum, callback=callback, parallelism=parallelism)
|
|
1034
1041
|
|
|
1035
1042
|
os.makedirs(target_dir, exist_ok=True)
|
|
1036
1043
|
os.rename(src=tempory_file.name, dst=filename)
|
|
@@ -1120,10 +1127,70 @@ class ShaderStyleOperation(LayerOperation):
|
|
|
1120
1127
|
|
|
1121
1128
|
return result
|
|
1122
1129
|
|
|
1130
|
+
def where(cond, a, b):
|
|
1131
|
+
"""Return elements chosen from `a` or `b` depending on `cond`.
|
|
1132
|
+
|
|
1133
|
+
Behaves like numpy.where(condition, x, y), returning a layer operation
|
|
1134
|
+
where elements from `a` are selected where `cond` is True, and elements
|
|
1135
|
+
from `b` are selected where `cond` is False.
|
|
1136
|
+
|
|
1137
|
+
Args:
|
|
1138
|
+
cond: Layer or constant used as condition. Where True, yield `a`, otherwise yield `b`.
|
|
1139
|
+
a: Layer or constant with values from which to choose where `cond` is True.
|
|
1140
|
+
b: Layer or constant with values from which to choose where `cond` is False.
|
|
1141
|
+
|
|
1142
|
+
Returns:
|
|
1143
|
+
New layer representing the conditional selection.
|
|
1144
|
+
"""
|
|
1145
|
+
return LayerOperation(
|
|
1146
|
+
cond,
|
|
1147
|
+
op.WHERE,
|
|
1148
|
+
rhs=a,
|
|
1149
|
+
other=b
|
|
1150
|
+
)
|
|
1151
|
+
|
|
1152
|
+
def maximum(a, b):
|
|
1153
|
+
"""Element-wise maximum of layer elements.
|
|
1154
|
+
|
|
1155
|
+
Behaves like numpy.maximum(x1, x2), comparing two layers element-by-element
|
|
1156
|
+
and returning a new layer with the maximum values.
|
|
1157
|
+
|
|
1158
|
+
Args:
|
|
1159
|
+
a: First layer or constant to compare.
|
|
1160
|
+
b: Second layer or constant to compare.
|
|
1161
|
+
|
|
1162
|
+
Returns:
|
|
1163
|
+
New layer representing the element-wise maximum of the inputs.
|
|
1164
|
+
"""
|
|
1165
|
+
return LayerOperation(
|
|
1166
|
+
a,
|
|
1167
|
+
op.MAXIMUM,
|
|
1168
|
+
b,
|
|
1169
|
+
window_op=WindowOperation.UNION,
|
|
1170
|
+
)
|
|
1171
|
+
|
|
1172
|
+
def minimum(a, b):
|
|
1173
|
+
"""Element-wise minimum of layer elements.
|
|
1174
|
+
|
|
1175
|
+
Behaves like numpy.minimum(x1, x2), comparing two layers element-by-element
|
|
1176
|
+
and returning a new layer with the minimum values.
|
|
1177
|
+
|
|
1178
|
+
Args:
|
|
1179
|
+
a: First layer or constant to compare.
|
|
1180
|
+
b: Second layer or constant to compare.
|
|
1181
|
+
|
|
1182
|
+
Returns:
|
|
1183
|
+
New layer representing the element-wise minimum of the inputs.
|
|
1184
|
+
"""
|
|
1185
|
+
return LayerOperation(
|
|
1186
|
+
a,
|
|
1187
|
+
op.MINIMUM,
|
|
1188
|
+
rhs=b,
|
|
1189
|
+
window_op=WindowOperation.UNION,
|
|
1190
|
+
)
|
|
1191
|
+
|
|
1192
|
+
|
|
1123
1193
|
# We provide these module level accessors as it's often nicer to write `log(x/y)` rather than `(x/y).log()`
|
|
1124
|
-
where = LayerOperation.where
|
|
1125
|
-
minumum = LayerOperation.minimum
|
|
1126
|
-
maximum = LayerOperation.maximum
|
|
1127
1194
|
clip = LayerOperation.clip
|
|
1128
1195
|
log = LayerOperation.log
|
|
1129
1196
|
log2 = LayerOperation.log2
|
yirgacheffe/operators.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Eventually all this should be moved to the top level in 2.0, but for backwards compatibility in 1.x needs
|
|
2
2
|
# to remain here
|
|
3
3
|
|
|
4
|
-
from ._operators import where,
|
|
4
|
+
from ._operators import where, minimum, maximum, clip, log, log2, log10, exp, exp2, nan_to_num, isin, \
|
|
5
5
|
floor, ceil # pylint: disable=W0611
|
|
6
6
|
from ._operators import abs, round # pylint: disable=W0611,W0622
|
|
7
7
|
from ._backends.enumeration import dtype as DataType # pylint: disable=W0611
|
yirgacheffe/rounding.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
|
+
from typing import Sequence
|
|
2
3
|
|
|
3
4
|
import math
|
|
4
5
|
import sys
|
|
@@ -40,7 +41,7 @@ def round_down_pixels(value: float, pixelscale: float) -> int:
|
|
|
40
41
|
else:
|
|
41
42
|
return math.floor(value)
|
|
42
43
|
|
|
43
|
-
def are_pixel_scales_equal_enough(pixel_scales:
|
|
44
|
+
def are_pixel_scales_equal_enough(pixel_scales: Sequence[PixelScale | None]) -> bool:
|
|
44
45
|
# some layers (e.g., constant layers) have no scale, and always work, so filter
|
|
45
46
|
# them out first
|
|
46
47
|
cleaned_pixel_scales: list[PixelScale] = [x for x in pixel_scales if x is not None]
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: yirgacheffe
|
|
3
|
+
Version: 1.9.4
|
|
4
|
+
Summary: Abstraction of gdal datasets for doing basic math operations
|
|
5
|
+
Author-email: Michael Dales <mwd24@cam.ac.uk>
|
|
6
|
+
License-Expression: ISC
|
|
7
|
+
Project-URL: Homepage, https://yirgacheffe.org/
|
|
8
|
+
Project-URL: Repository, https://github.com/quantifyearth/yirgacheffe.git
|
|
9
|
+
Project-URL: Issues, https://github.com/quantifyearth/yirgacheffe/issues
|
|
10
|
+
Project-URL: Changelog, https://yirgacheffe.org/latest/changelog/
|
|
11
|
+
Keywords: gdal,gis,geospatial,declarative
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: GIS
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: numpy<3.0,>=1.24
|
|
23
|
+
Requires-Dist: gdal[numpy]<4.0,>=3.8
|
|
24
|
+
Requires-Dist: scikit-image<1.0,>=0.20
|
|
25
|
+
Requires-Dist: torch
|
|
26
|
+
Requires-Dist: dill
|
|
27
|
+
Requires-Dist: deprecation
|
|
28
|
+
Requires-Dist: tomli
|
|
29
|
+
Requires-Dist: h3
|
|
30
|
+
Requires-Dist: pyproj
|
|
31
|
+
Provides-Extra: mlx
|
|
32
|
+
Requires-Dist: mlx; extra == "mlx"
|
|
33
|
+
Provides-Extra: matplotlib
|
|
34
|
+
Requires-Dist: matplotlib; extra == "matplotlib"
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: mypy; extra == "dev"
|
|
37
|
+
Requires-Dist: pylint; extra == "dev"
|
|
38
|
+
Requires-Dist: pytest; extra == "dev"
|
|
39
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
40
|
+
Requires-Dist: build; extra == "dev"
|
|
41
|
+
Requires-Dist: twine; extra == "dev"
|
|
42
|
+
Requires-Dist: mkdocs-material; extra == "dev"
|
|
43
|
+
Requires-Dist: mkdocstrings-python; extra == "dev"
|
|
44
|
+
Requires-Dist: mike; extra == "dev"
|
|
45
|
+
Requires-Dist: mkdocs-gen-files; extra == "dev"
|
|
46
|
+
Dynamic: license-file
|
|
47
|
+
|
|
48
|
+
# Yirgacheffe: a declarative geospatial library for Python to make data-science with maps easier
|
|
49
|
+
|
|
50
|
+
[](https://github.com/quantifyearth/yirgacheffe/actions)
|
|
51
|
+
[](https://yirgacheffe.org)
|
|
52
|
+
[](https://pypi.org/project/yirgacheffe/)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
## Overview
|
|
56
|
+
|
|
57
|
+
Yirgacheffe is a declarative geospatial library, allowing you to operate on both raster and polygon geospatial datasets without having to do all the tedious book keeping around layer alignment or dealing with hardware concerns around memory or parallelism. you can load into memory safely.
|
|
58
|
+
|
|
59
|
+
Example common use-cases:
|
|
60
|
+
|
|
61
|
+
* Do the datasets overlap? Yirgacheffe will let you define either the intersection or the union of a set of different datasets, scaling up or down the area as required.
|
|
62
|
+
* Rasterisation of vector layers: if you have a vector dataset then you can add that to your computation and yirgaceffe will rasterize it on demand, so you never need to store more data in memory than necessary.
|
|
63
|
+
* Do the raster layers get big and take up large amounts of memory? Yirgacheffe will let you do simple numerical operations with layers directly and then worry about the memory management behind the scenes for you.
|
|
64
|
+
* Parallelisation of operations over many CPU cores.
|
|
65
|
+
* Built in support for optionally using GPUs via [MLX](https://ml-explore.github.io/mlx/build/html/index.html) support.
|
|
66
|
+
|
|
67
|
+
## Installation
|
|
68
|
+
|
|
69
|
+
Yirgacheffe is available via pypi, so can be installed with pip for example:
|
|
70
|
+
|
|
71
|
+
```SystemShell
|
|
72
|
+
$ pip install yirgacheffe
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Documentation
|
|
76
|
+
|
|
77
|
+
The documentation can be found on [yirgacheffe.org](https://yirgacheffe.org/)
|
|
78
|
+
|
|
79
|
+
## Simple examples:
|
|
80
|
+
|
|
81
|
+
Here is how to do cloud removal from [Sentinel-2 data](https://browser.dataspace.copernicus.eu/?zoom=14&lat=6.15468&lng=38.20581&themeId=DEFAULT-THEME&visualizationUrl=U2FsdGVkX1944lrmeTJcaSsnoxNMp4oucN1AjklGUANHd2cRZWyXnepHvzpaOWzMhH8SrWQo%2BqrOvOnu6f9FeCMrS%2FDZmvjzID%2FoE1tbOCEHK8ohPXjFqYojeR9%2B82ri&datasetId=S2_L2A_CDAS&fromTime=2025-09-09T00%3A00%3A00.000Z&toTime=2025-09-09T23%3A59%3A59.999Z&layerId=1_TRUE_COLOR&demSource3D=%22MAPZEN%22&cloudCoverage=30&dateMode=SINGLE), using the [Scene Classification Layer](https://custom-scripts.sentinel-hub.com/custom-scripts/sentinel-2/scene-classification/) data:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
import yirgaceffe as yg
|
|
85
|
+
|
|
86
|
+
with (
|
|
87
|
+
yg.read_raster("T37NCG_20250909T073609_B06_20m.jp2") as vre2,
|
|
88
|
+
yg.read_raster("T37NCG_20250909T073609_SCL_20m.jp2") as scl,
|
|
89
|
+
):
|
|
90
|
+
is_cloud = (scl == 8) | (scl == 9) | (scl == 10) # various cloud types
|
|
91
|
+
is_shadow = (scl == 3)
|
|
92
|
+
is_bad = is_cloud | is_shadow
|
|
93
|
+
|
|
94
|
+
masked_vre2 = yg.where(is_bad, float("nan"), vre2)
|
|
95
|
+
masked_vre2.to_geotiff("vre2_cleaned.tif")
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
or a species' [Area of Habitat](https://www.sciencedirect.com/science/article/pii/S0169534719301892) calculation:
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
import yirgaceffe as yg
|
|
102
|
+
|
|
103
|
+
with (
|
|
104
|
+
yg.read_raster("habitats.tif") as habitat_map,
|
|
105
|
+
yg.read_raster('elevation.tif') as elevation_map,
|
|
106
|
+
yg.read_shape('species123.geojson') as range_map,
|
|
107
|
+
):
|
|
108
|
+
refined_habitat = habitat_map.isin([...species habitat codes...])
|
|
109
|
+
refined_elevation = (elevation_map >= species_min) & (elevation_map <= species_max)
|
|
110
|
+
aoh = refined_habitat * refined_elevation * range_polygon * area_per_pixel_map
|
|
111
|
+
print(f'Area of habitat: {aoh.sum()}')
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Citation
|
|
115
|
+
|
|
116
|
+
If you use Yirgacheffe in your research, please cite our paper:
|
|
117
|
+
|
|
118
|
+
> Michael Winston Dales, Alison Eyres, Patrick Ferris, Francesca A. Ridley, Simon Tarr, and Anil Madhavapeddy. 2025. Yirgacheffe: A Declarative Approach to Geospatial Data. In *Proceedings of the 2nd ACM SIGPLAN International Workshop on Programming for the Planet* (PROPL '25). Association for Computing Machinery, New York, NY, USA, 47–54. https://doi.org/10.1145/3759536.3763806
|
|
119
|
+
|
|
120
|
+
<details>
|
|
121
|
+
<summary>BibTeX</summary>
|
|
122
|
+
|
|
123
|
+
```bibtex
|
|
124
|
+
@inproceedings{10.1145/3759536.3763806,
|
|
125
|
+
author = {Dales, Michael Winston and Eyres, Alison and Ferris, Patrick and Ridley, Francesca A. and Tarr, Simon and Madhavapeddy, Anil},
|
|
126
|
+
title = {Yirgacheffe: A Declarative Approach to Geospatial Data},
|
|
127
|
+
year = {2025},
|
|
128
|
+
isbn = {9798400721618},
|
|
129
|
+
publisher = {Association for Computing Machinery},
|
|
130
|
+
address = {New York, NY, USA},
|
|
131
|
+
url = {https://doi.org/10.1145/3759536.3763806},
|
|
132
|
+
doi = {10.1145/3759536.3763806},
|
|
133
|
+
abstract = {We present Yirgacheffe, a declarative geospatial library that allows spatial algorithms to be implemented concisely, supports parallel execution, and avoids common errors by automatically handling data (large geospatial rasters) and resources (cores, memory, GPUs). Our primary user domain comprises ecologists, where a typical problem involves cleaning messy occurrence data, overlaying it over tiled rasters, combining layers, and deriving actionable insights from the results. We describe the successes of this approach towards driving key pipelines related to global biodiversity and describe the capability gaps that remain, hoping to motivate more research into geospatial domain-specific languages.},
|
|
134
|
+
booktitle = {Proceedings of the 2nd ACM SIGPLAN International Workshop on Programming for the Planet},
|
|
135
|
+
pages = {47–54},
|
|
136
|
+
numpages = {8},
|
|
137
|
+
keywords = {Biodiversity, Declarative, Geospatial, Python},
|
|
138
|
+
location = {Singapore, Singapore},
|
|
139
|
+
series = {PROPL '25}
|
|
140
|
+
}
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
</details>
|
|
144
|
+
|
|
145
|
+
## Thanks
|
|
146
|
+
|
|
147
|
+
Thanks to discussion and feedback from my colleagues, particularly Alison Eyres, Patrick Ferris, Amelia Holcomb, and Anil Madhavapeddy.
|
|
148
|
+
|
|
149
|
+
Inspired by the work of Daniele Baisero in his AoH library.
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
yirgacheffe/__init__.py,sha256=
|
|
2
|
-
yirgacheffe/_core.py,sha256
|
|
3
|
-
yirgacheffe/_operators.py,sha256=
|
|
1
|
+
yirgacheffe/__init__.py,sha256=JWQOJP_RPlBmN_N3vPGH-9FByjRQKCWUSnsh4fPNtW8,915
|
|
2
|
+
yirgacheffe/_core.py,sha256=-X6wTjAKagcjWcH14IKp75Nf-kANO_Nsd8wKm5XEQzE,5750
|
|
3
|
+
yirgacheffe/_operators.py,sha256=VGQ9AOOJP6cxsr_G2kxdaPaXqKi7K1csocxsudlRwVc,43440
|
|
4
4
|
yirgacheffe/constants.py,sha256=bKUjOGNj19zwggV79lJgK7tiv51DH2-rgNOKswl2gvQ,293
|
|
5
|
-
yirgacheffe/operators.py,sha256=
|
|
5
|
+
yirgacheffe/operators.py,sha256=Y1KkNt79N1elR4ZplQaQngx29wdf2QFF_5la4PI3EhI,412
|
|
6
6
|
yirgacheffe/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
yirgacheffe/rounding.py,sha256=
|
|
7
|
+
yirgacheffe/rounding.py,sha256=Jzd9qlLnLpigT95GbQTByvYOo639Nfq4LBEVyvhYdoc,2289
|
|
8
8
|
yirgacheffe/window.py,sha256=QuyBLOwKFI0XkEQ4Bd2hdELPbJSfHL7mt5KSi7CIHcE,9505
|
|
9
9
|
yirgacheffe/_backends/__init__.py,sha256=jN-2iRrHStnPI6cNL7XhwhsROtI0EaGfIrbF5c-ECV0,334
|
|
10
10
|
yirgacheffe/_backends/enumeration.py,sha256=9bcCXz9Ssrh8Oh1iazodkx6Gm2kQBi9HQ9z9zehS4AE,1806
|
|
@@ -19,9 +19,9 @@ yirgacheffe/layers/h3layer.py,sha256=Rq1bFo7CApIh5NdBcV7hSj3hm-DszY79nhYsTRAvJ_g
|
|
|
19
19
|
yirgacheffe/layers/rasters.py,sha256=zBE9uXm6LvAQF2_XdQzcOgJQOQWGmuPflY5JNDrUf3k,13527
|
|
20
20
|
yirgacheffe/layers/rescaled.py,sha256=gEFbXeYxX1nVn7eQYmbGww90_yc5ENmgQrD_WxXxpQE,3352
|
|
21
21
|
yirgacheffe/layers/vectors.py,sha256=A27kuTr0C9BZhHG0-cplNEa7aSNcse37Pm9xTjEzv-c,19990
|
|
22
|
-
yirgacheffe-1.9.
|
|
23
|
-
yirgacheffe-1.9.
|
|
24
|
-
yirgacheffe-1.9.
|
|
25
|
-
yirgacheffe-1.9.
|
|
26
|
-
yirgacheffe-1.9.
|
|
27
|
-
yirgacheffe-1.9.
|
|
22
|
+
yirgacheffe-1.9.4.dist-info/licenses/LICENSE,sha256=dNSHwUCJr6axStTKDEdnJtfmDdFqlE3h1NPCveqPfnY,757
|
|
23
|
+
yirgacheffe-1.9.4.dist-info/METADATA,sha256=cUAABIwPG9_RTO0ylH25O_7usbtQneZwTUCsfk_SLIs,7407
|
|
24
|
+
yirgacheffe-1.9.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
25
|
+
yirgacheffe-1.9.4.dist-info/entry_points.txt,sha256=j4KgHXbVGbGyfTySc1ypBdERpfihO4WNjppvCdE9HjE,52
|
|
26
|
+
yirgacheffe-1.9.4.dist-info/top_level.txt,sha256=9DBFlKO2Ld3hG6TuE3qOTd3Tt8ugTiXil4AN4Wr9_y0,12
|
|
27
|
+
yirgacheffe-1.9.4.dist-info/RECORD,,
|
|
@@ -1,600 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: yirgacheffe
|
|
3
|
-
Version: 1.9.2
|
|
4
|
-
Summary: Abstraction of gdal datasets for doing basic math operations
|
|
5
|
-
Author-email: Michael Dales <mwd24@cam.ac.uk>
|
|
6
|
-
License-Expression: ISC
|
|
7
|
-
Project-URL: Homepage, https://yirgacheffe.org/
|
|
8
|
-
Project-URL: Repository, https://github.com/quantifyearth/yirgacheffe.git
|
|
9
|
-
Project-URL: Issues, https://github.com/quantifyearth/yirgacheffe/issues
|
|
10
|
-
Project-URL: Changelog, https://yirgacheffe.org/latest/changelog/
|
|
11
|
-
Keywords: gdal,gis,geospatial,declarative
|
|
12
|
-
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
-
Classifier: Intended Audience :: Science/Research
|
|
14
|
-
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
-
Classifier: Topic :: Scientific/Engineering :: GIS
|
|
19
|
-
Requires-Python: >=3.10
|
|
20
|
-
Description-Content-Type: text/markdown
|
|
21
|
-
License-File: LICENSE
|
|
22
|
-
Requires-Dist: numpy<3.0,>=1.24
|
|
23
|
-
Requires-Dist: gdal[numpy]<4.0,>=3.8
|
|
24
|
-
Requires-Dist: scikit-image<1.0,>=0.20
|
|
25
|
-
Requires-Dist: torch
|
|
26
|
-
Requires-Dist: dill
|
|
27
|
-
Requires-Dist: deprecation
|
|
28
|
-
Requires-Dist: tomli
|
|
29
|
-
Requires-Dist: h3
|
|
30
|
-
Requires-Dist: pyproj
|
|
31
|
-
Provides-Extra: mlx
|
|
32
|
-
Requires-Dist: mlx; extra == "mlx"
|
|
33
|
-
Provides-Extra: matplotlib
|
|
34
|
-
Requires-Dist: matplotlib; extra == "matplotlib"
|
|
35
|
-
Provides-Extra: dev
|
|
36
|
-
Requires-Dist: mypy; extra == "dev"
|
|
37
|
-
Requires-Dist: pylint; extra == "dev"
|
|
38
|
-
Requires-Dist: pytest; extra == "dev"
|
|
39
|
-
Requires-Dist: pytest-cov; extra == "dev"
|
|
40
|
-
Requires-Dist: build; extra == "dev"
|
|
41
|
-
Requires-Dist: twine; extra == "dev"
|
|
42
|
-
Requires-Dist: mkdocs-material; extra == "dev"
|
|
43
|
-
Requires-Dist: mkdocstrings-python; extra == "dev"
|
|
44
|
-
Requires-Dist: mike; extra == "dev"
|
|
45
|
-
Requires-Dist: mkdocs-gen-files; extra == "dev"
|
|
46
|
-
Dynamic: license-file
|
|
47
|
-
|
|
48
|
-
# Yirgacheffe: a declarative geospatial library for Python to make data-science with maps easier
|
|
49
|
-
|
|
50
|
-
[](https://github.com/quantifyearth/yirgacheffe/actions)
|
|
51
|
-
[](https://yirgacheffe.org)
|
|
52
|
-
[](https://pypi.org/project/yirgacheffe/)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
## Overview
|
|
56
|
-
|
|
57
|
-
Yirgacheffe is an attempt to wrap raster and polygon geospatial datasets such that you can do computational work on them as a whole or at the pixel level, but without having to do a lot of the grunt work of working out where you need to be in rasters, or managing how much you can load into memory safely.
|
|
58
|
-
|
|
59
|
-
Example common use-cases:
|
|
60
|
-
|
|
61
|
-
* Do the datasets overlap? Yirgacheffe will let you define either the intersection or the union of a set of different datasets, scaling up or down the area as required.
|
|
62
|
-
* Rasterisation of vector layers: if you have a vector dataset then you can add that to your computation and yirgaceffe will rasterize it on demand, so you never need to store more data in memory than necessary.
|
|
63
|
-
* Do the raster layers get big and take up large amounts of memory? Yirgacheffe will let you do simple numerical operations with layers directly and then worry about the memory management behind the scenes for you.
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
## Installation
|
|
67
|
-
|
|
68
|
-
Yirgacheffe is available via pypi, so can be installed with pip for example:
|
|
69
|
-
|
|
70
|
-
```SystemShell
|
|
71
|
-
$ pip install yirgacheffe
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
## Basic usage
|
|
75
|
-
|
|
76
|
-
They main unit of data in Yirgacheffe is a "layer", which wraps either a raster dataset or polygon data, and then you can do work on layers without having to worry (unless you choose to) about how they align - Yirgacheffe will work out all the details around overlapping
|
|
77
|
-
|
|
78
|
-
The motivation for Yirgacheffe layers is to make working with gdal data slightly easier - it just hides some common operations, such as incremental loading to save memory, or letting you align layers to generate either the intersection result or the union result.
|
|
79
|
-
|
|
80
|
-
For example, if we wanted to do a simple [Area of Habitat](https://github.com/quantifyearth/aoh-calculator/) calculation, whereby we find the pixels where a species resides by combining its range polygon, its habitat preferences, and its elevation preferences, the code would be like this:
|
|
81
|
-
|
|
82
|
-
```python
|
|
83
|
-
import yirgaceffe as yg
|
|
84
|
-
|
|
85
|
-
habitat_map = yg.read_raster("habitats.tif")
|
|
86
|
-
elevation_map = yg.read_raster('elevation.tif')
|
|
87
|
-
range_polygon = yg.read_shape('species123.geojson')
|
|
88
|
-
area_per_pixel_map = yg.read_raster('area_per_pixel.tif')
|
|
89
|
-
|
|
90
|
-
refined_habitat = habitat_map.isin([...species habitat codes...])
|
|
91
|
-
refined_elevation = (elevation_map >= species_min) && (elevation_map <= species_max)
|
|
92
|
-
|
|
93
|
-
aoh = refined_habitat * refined_elevation * range_polygon * area_per_pixel_map
|
|
94
|
-
|
|
95
|
-
print(f'area for species 123: {aoh.sum()}')
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
Similarly, you could save the result to a new raster layer:
|
|
99
|
-
|
|
100
|
-
```python
|
|
101
|
-
...
|
|
102
|
-
aoh.to_geotiff("result.tif")
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
Yirgacheffe will automatically infer if you want to do an intersection of maps or a union of the maps based on the operators you use (see below for a full table). You can explicitly override that if you want.
|
|
106
|
-
|
|
107
|
-
### Lazy loading and evaluation
|
|
108
|
-
|
|
109
|
-
Yirgacheffe uses a technique from computer science called "lazy evaluation", which means that only when you resolve a calculation will Yirgacheffe do any work. So in the first code example given, the work is only calculated when you call the `sum()` method. All the other intermediary results such as `refined_habitat` and `refined_elevation` are not calculated either until that final `sum()` is called. You could easily call sum on those intermediaries if you wanted and get their results, and that would cause them to be evaluated then.
|
|
110
|
-
|
|
111
|
-
Similarly, when you load a layer, be it a raster layer or a vector layer from polygon data, the full data for the file isn't loaded until it's needed for a calculation, and even then only the part of the data necessary will be loaded or rasterized. Furthermore, Yirgacheffe will load the data in chunks, letting you work with rasters bigger than those that would otherwise fit within your computer's memory.
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
### Automatic expanding and contracting layers
|
|
115
|
-
|
|
116
|
-
When you load raster layers that aren't of equal geographic area (that is, they have a different origin, size, or both)then Yirgacheffe will do all the math internally to ensure that it aligns the pixels geospatially when doing calculations.
|
|
117
|
-
|
|
118
|
-
If size adjustments are needed, then Yirgacheffe will infer from the calculations you're doing if it needs to either crop or enlarge layers. For instance, if you're summing two rasters it'll expand them to be the union of their two areas before adding them, filling in the missing parts with zeros. If you're multiplying or doing a logical AND of pixels then it'll find the intersection between the two rasters (as areas missing in one would cause the other layer to result in zero anyway).
|
|
119
|
-
|
|
120
|
-
Whilst it is hoped that the default behaviour makes sense in most cases, we can't anticipate all usages, and so if you want to be explicit about the result of any maps you can specify it yourself.
|
|
121
|
-
|
|
122
|
-
For example, to tell Yirgacheffe to make a union of a set of layers you'd write:
|
|
123
|
-
|
|
124
|
-
```python
|
|
125
|
-
layers = [habitat_map, elevation_map, range_polygon]
|
|
126
|
-
union_area = YirgacheffeLayer.find_union(layers)
|
|
127
|
-
for layer in layers:
|
|
128
|
-
layer.set_window_for_union(union_area)
|
|
129
|
-
```
|
|
130
|
-
|
|
131
|
-
There is a similar set of methods for using the intersection.
|
|
132
|
-
|
|
133
|
-
If you have set either the intersection window or union window on a layer and you wish to undo that restriction, then you can simply call `reset_window()` on the layer.
|
|
134
|
-
|
|
135
|
-
### Direct access to data
|
|
136
|
-
|
|
137
|
-
If doing per-layer operations isn't applicable for your application, you can read the pixel values for all layers (including VectorLayers) by calling `read_array` similarly to how you would for GDAL. The data you access will be relative to the specified window - that is, if you've called either `set_window_for_intersection` or `set_window_for_union` then `read_array` will be relative to that and Yirgacheffe will clip or expand the data with zero values as necessary.
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
## Layer types
|
|
141
|
-
|
|
142
|
-
Note that as part of the move to the next major release, 2.0, we are adding simpler ways to create layers. Not all of those have been implemented yet, which is why this section has some inconsistencies. However, given many of the common cases are already covered, we present the new 2.0 style methods (`read_raster` and similar) here so you can write cleaner code today rather than making people wait for the final 2.0 release.
|
|
143
|
-
|
|
144
|
-
### RasterLayer
|
|
145
|
-
|
|
146
|
-
This is your basic GDAL raster layer, which you load from a geotiff.
|
|
147
|
-
|
|
148
|
-
```python
|
|
149
|
-
from yirgaceffe.layers import RasterLayer
|
|
150
|
-
|
|
151
|
-
with RasterLayer.layer_from_file('test1.tif') as layer:
|
|
152
|
-
total = layer.sum()
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
The new 2.0 way of doing this is:
|
|
156
|
-
|
|
157
|
-
```python
|
|
158
|
-
import yirgacheffe as yg
|
|
159
|
-
|
|
160
|
-
with yg.read_raster('test.tif') as layer:
|
|
161
|
-
total = layer.sum()
|
|
162
|
-
```
|
|
163
|
-
|
|
164
|
-
You can also create empty layers ready for you to store results, either by taking the dimensions from an existing layer. In both these cases you can either provide a filename to which the data will be written, or if you do not provide a filename then the layer will only exist in memory - this will be more efficient if the layer is being used for intermediary results.
|
|
165
|
-
|
|
166
|
-
```python
|
|
167
|
-
with RasterLayer.empty_raster_layer_like(layer1, "results.tiff") as result:
|
|
168
|
-
...
|
|
169
|
-
```
|
|
170
|
-
|
|
171
|
-
Or you can specify the geographic area directly:
|
|
172
|
-
|
|
173
|
-
```python
|
|
174
|
-
with RasterLayer.empty_raster_layer(
|
|
175
|
-
Area(left=-10.0, top=10.0, right=-5.0, bottom=5.0),
|
|
176
|
-
PixelScale(0.005,-0.005),
|
|
177
|
-
gdal.GDT_Float64,
|
|
178
|
-
"results.tiff"
|
|
179
|
-
) as result:
|
|
180
|
-
...
|
|
181
|
-
```
|
|
182
|
-
|
|
183
|
-
You can also create a new layer that is a scaled version of an existing layer:
|
|
184
|
-
|
|
185
|
-
```python
|
|
186
|
-
with RasterLayer.layer_from_file('test1.tif') as source:
|
|
187
|
-
scaled = RasterLayer.scaled_raster_from_raster(source, PixelScale(0.0001, -0.0001), 'scaled.tif')
|
|
188
|
-
```
|
|
189
|
-
|
|
190
|
-
If the data is from a GeoTIFF that has a nodata value specified, then pixel values with that specified nodata value in them will be converted to NaN. You can override that by providing `ignore_nodata=True` as an optional argument to `layer_from_file` (or with the new 2.0 API, `read_raster`). You can find out if a layer has a nodata value by accessing the `nodata` property - it is None if there is no such value.
|
|
191
|
-
|
|
192
|
-
### VectorLayer
|
|
193
|
-
|
|
194
|
-
This layer will load vector data and rasterize it on demand as part of a calculation - because it only rasterizes the data when needed, it is memory efficient.
|
|
195
|
-
|
|
196
|
-
Because it will be rasterized you need to specify the pixel scale and map projection to be used when rasterising the data, and the common way to do that is by using one of your other layers.
|
|
197
|
-
|
|
198
|
-
```python
|
|
199
|
-
from yirgaceffe import WGS_84_PROJECTION
|
|
200
|
-
from yirgaceffe.window import PixelScale
|
|
201
|
-
from yirgaceffe.layers import VectorLayer
|
|
202
|
-
|
|
203
|
-
with VectorLayer.layer_from_file('range.gpkg', PixelScale(0.001, -0.001), WGS_84_PROJECTION) as layer:
|
|
204
|
-
...
|
|
205
|
-
```
|
|
206
|
-
|
|
207
|
-
The new 2.0 way of doing this is, if you plan to use the vector layer in calculation with other raster layers that will have projection information:
|
|
208
|
-
|
|
209
|
-
```python
|
|
210
|
-
import yirgacheffe as yg
|
|
211
|
-
|
|
212
|
-
with yg.read_shape('range.gpkg') as layer:
|
|
213
|
-
...
|
|
214
|
-
```
|
|
215
|
-
|
|
216
|
-
Of if you plan to use the layer on its own and want to specify a rasterisation projection you can do:
|
|
217
|
-
|
|
218
|
-
```python
|
|
219
|
-
import yirgacheffe as yg
|
|
220
|
-
|
|
221
|
-
with yg.read_shape('range.gpkg', (yg.WGS_84_PROJECTION, (0.001, -0.001))) as layer:
|
|
222
|
-
...
|
|
223
|
-
```
|
|
224
|
-
|
|
225
|
-
### GroupLayer
|
|
226
|
-
|
|
227
|
-
You can combine several layers into one virtual layer to save you worrying about how to merge them if you don't want to manually add the layers together. Useful when you have tile sets for example. Any area not covered by a layer in the group will return zeros.
|
|
228
|
-
|
|
229
|
-
```python
|
|
230
|
-
tile1 = RasterLayer.layer_from_file('tile_N10_E10.tif')
|
|
231
|
-
tile2 = RasterLayer.layer_from_file('tile_N20_E10.tif')
|
|
232
|
-
all_tiles = GroupLayer([tile1, tile2])
|
|
233
|
-
```
|
|
234
|
-
|
|
235
|
-
If you provide tiles that overlap then they will be rendered in reverse one, so in the above example if tile1 and tile2 overlap, then in that region you'd get the data from tile1.
|
|
236
|
-
|
|
237
|
-
To save you specifying each layer, there is a convenience method to let you just load a set of TIFs by filename:
|
|
238
|
-
|
|
239
|
-
```python
|
|
240
|
-
with GroupLayer.layer_from_files(['tile_N10_E10.tif', 'tile_N20_E10.tif']) as all_tiles:
|
|
241
|
-
...
|
|
242
|
-
```
|
|
243
|
-
|
|
244
|
-
Or you can just specify a directory and it'll find the tifs in there (you can also add your own custom file filter too):
|
|
245
|
-
|
|
246
|
-
```python
|
|
247
|
-
with GroupLayer.layer_from_directory('.') as all_tiles:
|
|
248
|
-
...
|
|
249
|
-
```
|
|
250
|
-
|
|
251
|
-
The new 2.0 way of doing this is:
|
|
252
|
-
|
|
253
|
-
```python
|
|
254
|
-
import yirgacheffe as yg
|
|
255
|
-
|
|
256
|
-
with yg.read_rasters(['tile_N10_E10.tif', 'tile_N20_E10.tif']) as all_tiles:
|
|
257
|
-
...
|
|
258
|
-
```
|
|
259
|
-
|
|
260
|
-
If any of the layers have a `nodata` value specified, then any pixel with that value will be masked out to allow data from other layers to be visible.
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
### TiledGroupLayer
|
|
264
|
-
|
|
265
|
-
This is a specialisation of GroupLayer, which you can use if your layers are all the same size and form a grid, as is often the case with map tiles. In this case the rendering code can be optimised and this class is significantly faster that GroupLayer.
|
|
266
|
-
|
|
267
|
-
```python
|
|
268
|
-
tile1 = RasterLayer.layer_from_file('tile_N10_E10.tif')
|
|
269
|
-
tile2 = RasterLayer.layer_from_file('tile_N20_E10.tif')
|
|
270
|
-
all_tiles = TiledGroupLayer([tile1, tile2])
|
|
271
|
-
```
|
|
272
|
-
|
|
273
|
-
The new 2.0 way of doing this is:
|
|
274
|
-
|
|
275
|
-
```python
|
|
276
|
-
import yirgacheffe as yg
|
|
277
|
-
|
|
278
|
-
with yg.read_rasters(['tile_N10_E10.tif', 'tile_N20_E10.tif'], tiled=True) as all_tiles:
|
|
279
|
-
...
|
|
280
|
-
```
|
|
281
|
-
|
|
282
|
-
Notes:
|
|
283
|
-
|
|
284
|
-
* You can have missing tiles, and these will be filled in with zeros.
|
|
285
|
-
* You can have tiles that overlap, so long as they still conform to the rule that all tiles are the same size and on a grid.
|
|
286
|
-
|
|
287
|
-
### Constants
|
|
288
|
-
|
|
289
|
-
At times it is useful to have a fixed constant in an expression. Typically, similar to numpy, if an expression involving layers has a constant in, Yirgacheffe will apply that to all pixels in the equation without need for further elaboration:
|
|
290
|
-
|
|
291
|
-
```python
|
|
292
|
-
with yg.read_raster("some_data.tif") as layer:
|
|
293
|
-
doubled_layer = layer * 2.0
|
|
294
|
-
...
|
|
295
|
-
```
|
|
296
|
-
|
|
297
|
-
This can be useful in tasks where you have an optional layer in your code. For example, here the code optionally loads an area-per-pixel layer, which if not present can just be substituted with a 1.0:
|
|
298
|
-
|
|
299
|
-
```python
|
|
300
|
-
try:
|
|
301
|
-
area_layer = yg.read_raster('myarea.tiff')
|
|
302
|
-
except FileDoesNotExist:
|
|
303
|
-
area_layer = 1.0
|
|
304
|
-
```
|
|
305
|
-
|
|
306
|
-
However, as with numpy, Python can not make the correct inference if the constant value is the first term in the equation. In that case you need to explicitly wrap the value with `constant` to help Python understand what is happening:
|
|
307
|
-
|
|
308
|
-
```python
|
|
309
|
-
with yg.read_raster("some_data.tif") as layer:
|
|
310
|
-
result = yg.constant(1.0) / layer
|
|
311
|
-
```
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
### H3CellLayer
|
|
315
|
-
|
|
316
|
-
If you have H3 installed, you can generate a mask layer based on an H3 cell identifier, where pixels inside the cell will have a value of 1, and those outside will have a value of 0.
|
|
317
|
-
|
|
318
|
-
Becuase it will be rasterized you need to specify the pixel scale and map projection to be used when rasterising the data, and the common way to do that is by using one of your other layers.
|
|
319
|
-
|
|
320
|
-
```python
|
|
321
|
-
hex_cell_layer = H3CellLayer('88972eac11fffff', layer1.pixel_scale, layer1.projection)
|
|
322
|
-
```
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
### UniformAreaLayer
|
|
326
|
-
|
|
327
|
-
In certain calculations you find you have a layer where all the rows of data are the same - notably geotiffs that contain the area of a given pixel do this due to how conventional map projections work. It's hugely inefficient to load the full map into memory, so whilst you could just load them as `Layer` types, we recommend you do:
|
|
328
|
-
|
|
329
|
-
```python
|
|
330
|
-
with UniformAreaLayer('area.tiff') as layer:
|
|
331
|
-
....
|
|
332
|
-
```
|
|
333
|
-
|
|
334
|
-
Note that loading this data can still be very slow, due to how image compression works. So if you plan to use area.tiff more than once, we recommend use save an optimised version - this will do the slow uncompression once and then save a minimal file to speed up future processing:
|
|
335
|
-
|
|
336
|
-
```python
|
|
337
|
-
if not os.path.exists('yirgacheffe_area.tiff'):
|
|
338
|
-
UniformAreaLayer.generate_narrow_area_projection('area.tiff', 'yirgacheffe_area.tiff')
|
|
339
|
-
area_layer = UniformAreaLayer('yirgacheffe_area.tiff')
|
|
340
|
-
```
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
## Supported operations on layers
|
|
344
|
-
|
|
345
|
-
Once you have two layers, you can perform numerical analysis on them similar to how numpy works:
|
|
346
|
-
|
|
347
|
-
### Add, subtract, multiple, divide
|
|
348
|
-
|
|
349
|
-
Pixel-wise addition, subtraction, multiplication or division (both true and floor division), and remainder. Either between arrays, or with constants:
|
|
350
|
-
|
|
351
|
-
```python
|
|
352
|
-
with RasterLayer.layer_from_file('test1.tif') as layer1:
|
|
353
|
-
with RasterLayer.layer_from_file('test2.tif') as layer2:
|
|
354
|
-
with RasterLayer.empty_raster_layer_like(layer1, 'result.tif') as result:
|
|
355
|
-
calc = layer1 + layer2
|
|
356
|
-
calc.save(result)
|
|
357
|
-
```
|
|
358
|
-
|
|
359
|
-
or
|
|
360
|
-
|
|
361
|
-
```python
|
|
362
|
-
with RasterLayer.layer_from_file('test1.tif') as layer1:
|
|
363
|
-
with RasterLayer.empty_raster_layer_like(layer1, 'result.tif') as result:
|
|
364
|
-
calc = layer1 * 42.0
|
|
365
|
-
calc.save(result)
|
|
366
|
-
```
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
The new 2.0 way of doing these are:
|
|
370
|
-
|
|
371
|
-
```python
|
|
372
|
-
with yg.read_raster('test1.tif') as layer1:
|
|
373
|
-
with yg.read_raster('test2.tif') as layer2:
|
|
374
|
-
result = layer1 + layer2
|
|
375
|
-
result.to_geotiff("result.tif")
|
|
376
|
-
```
|
|
377
|
-
|
|
378
|
-
or
|
|
379
|
-
|
|
380
|
-
```python
|
|
381
|
-
with yg.read_raster('test1.tif') as layer1:
|
|
382
|
-
result = layer1 * 42.0
|
|
383
|
-
result.to_geotiff("result.tif")
|
|
384
|
-
```
|
|
385
|
-
|
|
386
|
-
### Boolean testing
|
|
387
|
-
|
|
388
|
-
Testing for equality, less than, less than or equal, greater than, and greater than or equal are supported on layers, along with logical or and logical and, as per this example, where `elevation_upper` and `elevation_lower` are scalar values:
|
|
389
|
-
|
|
390
|
-
```
|
|
391
|
-
filtered_elevation = (min_elevation_map <= elevation_upper) & (max_elevation_map >= elevation_lower)
|
|
392
|
-
```
|
|
393
|
-
|
|
394
|
-
### Power
|
|
395
|
-
|
|
396
|
-
Pixel-wise raising to a constant power:
|
|
397
|
-
|
|
398
|
-
```python
|
|
399
|
-
with RasterLayer.layer_from_file('test1.tif') as layer1:
|
|
400
|
-
with RasterLayer.empty_raster_layer_like(layer1, 'result.tif') as result:
|
|
401
|
-
calc = layer1 ** 0.65
|
|
402
|
-
calc.save(result)
|
|
403
|
-
```
|
|
404
|
-
|
|
405
|
-
### Log, Exp, Clip, etc.
|
|
406
|
-
|
|
407
|
-
The following math operators common to numpy and other libraries are currently supported:
|
|
408
|
-
|
|
409
|
-
* abs
|
|
410
|
-
* ceil
|
|
411
|
-
* clip
|
|
412
|
-
* exp
|
|
413
|
-
* exp2
|
|
414
|
-
* floor
|
|
415
|
-
* isin
|
|
416
|
-
* log
|
|
417
|
-
* log2
|
|
418
|
-
* log10
|
|
419
|
-
* maximum
|
|
420
|
-
* minimum
|
|
421
|
-
* nan_to_num
|
|
422
|
-
* round
|
|
423
|
-
|
|
424
|
-
Typically these can be invoked either on a layer as a method:
|
|
425
|
-
|
|
426
|
-
```python
|
|
427
|
-
calc = layer1.log10()
|
|
428
|
-
```
|
|
429
|
-
|
|
430
|
-
Or via the operators module, as it's sometimes nicer to do it this way when chaining together operations in a single expression:
|
|
431
|
-
|
|
432
|
-
```python
|
|
433
|
-
import yirgaceffe.operators as yo
|
|
434
|
-
|
|
435
|
-
calc = yo.log10(layer1 / layer2)
|
|
436
|
-
```
|
|
437
|
-
|
|
438
|
-
### 2D matrix convolution
|
|
439
|
-
|
|
440
|
-
To facilitate image processing algorithms you can supply a weight matrix to generate a processed image. Currently this support only works for square weight matrices of an odd size.
|
|
441
|
-
|
|
442
|
-
For example, to apply a blur function to a raster:
|
|
443
|
-
|
|
444
|
-
```python
|
|
445
|
-
blur_filter = np.array([
|
|
446
|
-
[0.0, 0.1, 0.0],
|
|
447
|
-
[0.1, 0.6, 0.1],
|
|
448
|
-
[0.0, 0.1, 0.0],
|
|
449
|
-
])
|
|
450
|
-
with RasterLayer.layer_from_file('original.tif') as layer1:
|
|
451
|
-
with RasterLayer.empty_raster_layer_like(layer1, 'blurred.tif') as result:
|
|
452
|
-
calc = layer1.conv2d(blur_filter)
|
|
453
|
-
calc.save(result)
|
|
454
|
-
```
|
|
455
|
-
|
|
456
|
-
### Type conversion
|
|
457
|
-
|
|
458
|
-
Similar to numpy and other Python numerical libraries, Yirgacheffe will automatically deal with simple type conversion where possible, however sometimes explicit conversion is either necessary or desired. Similar to numpy, there is an `astype` operator that lets you set the conversion:
|
|
459
|
-
|
|
460
|
-
```python
|
|
461
|
-
from yirgacheffe.operations import DataType
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
with RasterLayer.layer_from_file('float_data.tif') as float_layer:
|
|
465
|
-
int_layer = float_layer.astype(DataType.Int32)
|
|
466
|
-
```
|
|
467
|
-
|
|
468
|
-
### Apply
|
|
469
|
-
|
|
470
|
-
You can specify a function that takes either data from one layer or from two layers, and returns the processed data. There's two version of this: one that lets you specify a numpy function that'll be applied to the layer data as an array, or one that is more shader like that lets you do pixel wise processing.
|
|
471
|
-
|
|
472
|
-
Firstly the numpy version looks like this:
|
|
473
|
-
|
|
474
|
-
```python
|
|
475
|
-
def is_over_ten(input_array):
|
|
476
|
-
return numpy.where(input_array > 10.0, 0.0, 1.0)
|
|
477
|
-
|
|
478
|
-
layer1 = RasterLayer.layer_from_file('test1.tif')
|
|
479
|
-
result = RasterLayer.empty_raster_layer_like(layer1, 'result.tif')
|
|
480
|
-
|
|
481
|
-
calc = layer1.numpy_apply(is_over_ten)
|
|
482
|
-
|
|
483
|
-
calc.save(result)
|
|
484
|
-
```
|
|
485
|
-
|
|
486
|
-
or
|
|
487
|
-
|
|
488
|
-
```python
|
|
489
|
-
def simple_add(first_array, second_array):
|
|
490
|
-
return first_array + second_array
|
|
491
|
-
|
|
492
|
-
layer1 = RasterLayer.layer_from_file('test1.tif')
|
|
493
|
-
layer2 = RasterLayer.layer_from_file('test2.tif')
|
|
494
|
-
result = RasterLayer.empty_raster_layer_like(layer1, 'result.tif')
|
|
495
|
-
|
|
496
|
-
calc = layer1.numpy_apply(simple_add, layer2)
|
|
497
|
-
|
|
498
|
-
calc.save(result)
|
|
499
|
-
```
|
|
500
|
-
|
|
501
|
-
If you want to do something specific on the pixel level, then you can also do that, again either on a unary or binary form.
|
|
502
|
-
|
|
503
|
-
```python
|
|
504
|
-
def is_over_ten(input_pixel):
|
|
505
|
-
return 1.0 if input_pixel > 10 else 0.0
|
|
506
|
-
|
|
507
|
-
layer1 = RasterLayer.layer_from_file('test1.tif')
|
|
508
|
-
result = RasterLayer.empty_raster_layer_like(layer1, 'result.tif')
|
|
509
|
-
|
|
510
|
-
calc = layer1.shader_apply(is_over_ten)
|
|
511
|
-
|
|
512
|
-
calc.save(result)
|
|
513
|
-
```
|
|
514
|
-
|
|
515
|
-
Note that in general `numpy_apply` is considerably faster than `shader_apply`.
|
|
516
|
-
|
|
517
|
-
## Getting an answer out
|
|
518
|
-
|
|
519
|
-
There are two ways to store the result of a computation. In all the above examples we use the `save` call, to which you pass a gdal dataset band, into which the results will be written. You can optionally pass a callback to save which will be called for each chunk of data processed and give you the amount of progress made so far as a number between 0.0 and 1.0:
|
|
520
|
-
|
|
521
|
-
```python
|
|
522
|
-
def print_progress(p)
|
|
523
|
-
print(f"We have made {p * 100} percent progress")
|
|
524
|
-
|
|
525
|
-
...
|
|
526
|
-
|
|
527
|
-
calc.save(result, callback=print_progress)
|
|
528
|
-
```
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
The alternative is to call `sum` which will give you a total:
|
|
532
|
-
|
|
533
|
-
```python
|
|
534
|
-
with (
|
|
535
|
-
RasterLayer.layer_from_file(...) as area_layer,
|
|
536
|
-
VectorLayer(...) as mask_layer
|
|
537
|
-
):
|
|
538
|
-
intersection = RasterLayer.find_intersection([area_layer, mask_layer])
|
|
539
|
-
area_layer.set_intersection_window(intersection)
|
|
540
|
-
mask_layer.set_intersection_window(intersection)
|
|
541
|
-
|
|
542
|
-
calc = area_layer * mask_layer
|
|
543
|
-
|
|
544
|
-
total_area = calc.sum()
|
|
545
|
-
```
|
|
546
|
-
|
|
547
|
-
Similar to sum, you can also call `min` and `max` on a layer or calculation.
|
|
548
|
-
|
|
549
|
-
## Experimental
|
|
550
|
-
|
|
551
|
-
The following features are considered experimental - they have test cases to show them working in limited circumstances, but they've not yet been tested on a wide range of use cases. We hope that you will try them out and let us know how they work out.
|
|
552
|
-
|
|
553
|
-
### RescaledRasterLayer
|
|
554
|
-
|
|
555
|
-
The RescaledRasterLayer will take a GeoTIFF and do on demand rescaling in memory to get the layer to match other layers you're working on.
|
|
556
|
-
|
|
557
|
-
```python
|
|
558
|
-
with RasterLayer.layer_from_file("high_density_file.tif") as high_density:
|
|
559
|
-
with RescaledRasterLayer.layer_from_file("low_density_file.tif", high_density.pixel_scale) as matched_density:
|
|
560
|
-
|
|
561
|
-
# Normally this next line would fail with two RasterLayers as they ahve a different pixel density
|
|
562
|
-
intersection = RasterLayer.find_intersection([high_density, matched_density])
|
|
563
|
-
high_density.set_intersection_window(intersection)
|
|
564
|
-
matched_density.set_intersection_window(intersection)
|
|
565
|
-
|
|
566
|
-
calc = high_density * matched_density
|
|
567
|
-
total = calc.sum()
|
|
568
|
-
|
|
569
|
-
```
|
|
570
|
-
|
|
571
|
-
### Parallel saving
|
|
572
|
-
|
|
573
|
-
There is a parallel version of save that can use multiple CPU cores at once to speed up work, that is added as an experimental feature for testing in our wider codebase, which will run concurrently the save over many threads.
|
|
574
|
-
|
|
575
|
-
```python
|
|
576
|
-
calc.parallel_save(result)
|
|
577
|
-
```
|
|
578
|
-
|
|
579
|
-
By default it will use as many CPU cores as are available, but if you want to limit that you can pass an extra argument to constrain that:
|
|
580
|
-
|
|
581
|
-
```python
|
|
582
|
-
calc.parallel_save(result, parallelism=4)
|
|
583
|
-
```
|
|
584
|
-
|
|
585
|
-
Because of the number of tricks that Python plays under the hood this feature needs a bunch of testing to let us remove the experimental flag, but in order to get that testing we need to put it out there! Hopefully in the next release we can remove the experimental warning.
|
|
586
|
-
|
|
587
|
-
## GPU support
|
|
588
|
-
|
|
589
|
-
Yirgacheffe has multiple backends, with more planned. Currently you can set the `YIRGACHEFFE_BACKEND` environmental variable to select which one to use. The default is `NUMPY`:
|
|
590
|
-
|
|
591
|
-
* NUMPY: CPU based calculation using [numpy](https://numpy.org/)
|
|
592
|
-
* MLX: Apple/Intel GPU support with CPU fallback based on [MLX](https://ml-explore.github.io/mlx/build/html/index.html)
|
|
593
|
-
|
|
594
|
-
Note that GPU isn't always faster than CPU - it very much depends on the workload, so testing your particular use-case is important.
|
|
595
|
-
|
|
596
|
-
## Thanks
|
|
597
|
-
|
|
598
|
-
Thanks to discussion and feedback from my colleagues, particularly Alison Eyres, Patrick Ferris, Amelia Holcomb, and Anil Madhavapeddy.
|
|
599
|
-
|
|
600
|
-
Inspired by the work of Daniele Baisero in his AoH library.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|