PyPI - yirgacheffe - Versions diffs - 1.2.0__py3-none-any.whl - Mend

yirgacheffe 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

yirgacheffe/__init__.py +17 -0
yirgacheffe/backends/__init__.py +13 -0
yirgacheffe/backends/enumeration.py +33 -0
yirgacheffe/backends/mlx.py +156 -0
yirgacheffe/backends/numpy.py +110 -0
yirgacheffe/constants.py +1 -0
yirgacheffe/h3layer.py +2 -0
yirgacheffe/layers/__init__.py +44 -0
yirgacheffe/layers/area.py +91 -0
yirgacheffe/layers/base.py +265 -0
yirgacheffe/layers/constant.py +41 -0
yirgacheffe/layers/group.py +357 -0
yirgacheffe/layers/h3layer.py +203 -0
yirgacheffe/layers/rasters.py +333 -0
yirgacheffe/layers/rescaled.py +94 -0
yirgacheffe/layers/vectors.py +380 -0
yirgacheffe/operators.py +738 -0
yirgacheffe/rounding.py +57 -0
yirgacheffe/window.py +141 -0
yirgacheffe-1.2.0.dist-info/METADATA +473 -0
yirgacheffe-1.2.0.dist-info/RECORD +25 -0
yirgacheffe-1.2.0.dist-info/WHEEL +5 -0
yirgacheffe-1.2.0.dist-info/entry_points.txt +2 -0
yirgacheffe-1.2.0.dist-info/licenses/LICENSE +7 -0
yirgacheffe-1.2.0.dist-info/top_level.txt +1 -0

yirgacheffe/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+from osgeo import gdal
+try:
+    from importlib import metadata
+    __version__ = metadata.version(__name__)
+except ModuleNotFoundError:
+    __version__ = "unknown"
+gdal.UseExceptions()
+# I don't really want this here, but it's just too useful having it exposed
+WGS_84_PROJECTION = 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,'\
+    'AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],'\
+    'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],'\
+    'AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]'
+# For legacy reasons [facepalm]
+WSG_84_PROJECTION = WGS_84_PROJECTION

yirgacheffe/backends/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+import os
+BACKEND = os.environ.get("YIRGACHEFFE_BACKEND", "NUMPY").upper()
+match BACKEND:
+    case "MLX":
+        from . import mlx
+        backend = mlx
+    case "NUMPY":
+        from . import numpy
+        backend = numpy
+    case _:
+        raise NotImplementedError("Only NUMPY and MLX backends supported")

yirgacheffe/backends/enumeration.py ADDED Viewed

@@ -0,0 +1,33 @@
+from enum import Enum
+class operators(Enum):
+    ADD = 1
+    SUB = 2
+    MUL = 3
+    TRUEDIV = 4
+    POW = 5
+    EQ = 6
+    NE = 7
+    LT = 8
+    LE = 9
+    GT = 10
+    GE = 11
+    AND = 12
+    OR = 13
+    LOG = 14
+    LOG2 = 15
+    LOG10 = 16
+    EXP = 17
+    EXP2 = 18
+    CLIP = 19
+    WHERE = 20
+    MIN = 21
+    MAX = 22
+    SUM = 23
+    MINIMUM = 24
+    MAXIMUM = 25
+    NAN_TO_NUM = 26
+    ISIN = 27
+    REMAINDER = 28
+    FLOORDIV = 29
+    CONV2D = 30

yirgacheffe/backends/mlx.py ADDED Viewed

@@ -0,0 +1,156 @@
+import numpy as np
+import mlx.core as mx # pylint: disable=E0001,E0611,E0401
+import mlx.nn
+from .enumeration import operators as op
+array_t = mx.array
+float_t = mx.float32
+promote = mx.array
+demote_array = np.asarray
+demote_scalar = np.float64
+eval_op = mx.eval
+add_op = mx.add
+sub_op = mx.array.__sub__
+truediv_op = mx.array.__truediv__
+pow_op = mx.array.__pow__
+eq_op = mx.array.__eq__
+ne_op =mx.array.__ne__
+lt_op = mx.less
+le_op = mx.less_equal
+gt_op = mx.greater
+ge_op = mx.greater_equal
+and_op = mx.array.__and__
+or_op = mx.array.__or__
+log = mx.log
+log2 = mx.log2
+log10 = mx.log10
+exp = mx.exp
+clip = mx.clip
+where = mx.where
+min_op = mx.min
+max_op = mx.max
+maximum = mx.maximum
+minimum = mx.minimum
+zeros = mx.zeros
+pad = mx.pad
+isscalar = np.isscalar
+full = mx.full
+allclose = mx.allclose
+remainder_op = mx.remainder
+floordiv_op = mx.array.__floordiv__
+def sum_op(a):
+    # There are weird issues around how MLX overflows int8, so just promote the data ahead of summing
+    match a.dtype:
+        case mx.int8:
+            res = mx.sum(a.astype(mx.int32))
+        case mx.uint8:
+            res = mx.sum(a.astype(mx.uint32))
+        case _:
+            res = mx.sum(a)
+    return demote_scalar(res)
+def _is_float(x):
+    if isinstance(x, float):
+        return True
+    try:
+        np_floats = [np.dtype('float16'), np.dtype('float32'), np.dtype('float64')]
+        if x.dtype in np_floats:
+            return True
+        match x.dtype:
+            case mx.float32 | mx.float64:
+                return True
+            case _:
+                return False
+    except AttributeError:
+        return False
+def mul_op(a, b):
+    # numpy will promote an operation between float and int to float, whereas it looks like mlx does the inverse
+    # and so for consistency with the numpy path, we do some fiddling here if necessary
+    if _is_float(b):
+        match a.dtype:
+            case mx.int8 | mx.int32 | mx.uint8 | mx.uint32:
+                a = a.astype(mx.float32)
+            case mx.int64 | mx.uint64:
+                a = a.astype(mx.float64)
+            case _:
+                pass
+    return mx.multiply(a, b)
+def exp2(a):
+    mx.eval(a)
+    return promote(np.exp2(a))
+def nan_to_num(a, nan, posinf, neginf, copy): # pylint: disable=W0613
+    return mx.nan_to_num(a, float(nan), posinf, neginf)
+def isin(a, test_elements):
+    # There is no `isin` on MLX currently, so we need to fallback to CPU behaviour here
+    # https://ml-explore.github.io/mlx/build/html/dev/custom_metal_kernels.html#using-shape-strides
+    mx.eval(a)
+    return promote(np.isin(a, test_elements))
+def conv2d_op(data, weights):
+    # From numpy.py: torch wants to process dimensions of channels of width of height
+    # but mlx wants to process dimensions of width of height of channels, so we end up
+    # having to reshape the data, as we only ever use one channel.
+    # Which is why both the data and weights get nested into two arrays here,
+    # and then we have to unpack it from that nesting.
+    weights = mx.array(weights)
+    original_data_shape = data.shape
+    original_weights_shape = weights.shape
+    unshifted_preped_weights = np.array([[weights]])
+    conv_weights_shape = [1] + list(original_weights_shape) + [1]
+    preped_weights = mx.array(np.reshape(unshifted_preped_weights, conv_weights_shape))
+    conv = mlx.nn.Conv2d(1, 1, weights.shape, bias=False)
+    conv.weight = preped_weights
+    conv_data_shape = [1] + list(original_data_shape) + [1]
+    unshifted_data_shape = np.array([[data]])
+    preped_data = mx.array(np.reshape(unshifted_data_shape, conv_data_shape))
+    shifted_res = conv(preped_data)[0]
+    res = np.reshape(shifted_res, [1] + list(shifted_res.shape)[:-1])
+    return res[0]
+operator_map = {
+    op.ADD: mx.array.__add__,
+    op.SUB: mx.array.__sub__,
+    op.MUL: mul_op,
+    op.TRUEDIV: mx.array.__truediv__,
+    op.POW: mx.array.__pow__,
+    op.EQ: mx.array.__eq__,
+    op.NE: mx.array.__ne__,
+    op.LT: mx.array.__lt__,
+    op.LE: mx.array.__le__,
+    op.GT: mx.array.__gt__,
+    op.GE: mx.array.__ge__,
+    op.AND: mx.array.__and__,
+    op.OR: mx.array.__or__,
+    op.LOG: mx.log,
+    op.LOG2: mx.log2,
+    op.LOG10: mx.log10,
+    op.EXP: mx.exp,
+    op.EXP2: exp2,
+    op.CLIP: mx.clip,
+    op.WHERE: mx.where,
+    op.MIN: mx.min,
+    op.MAX:mx.max,
+    op.MINIMUM: mx.minimum,
+    op.MAXIMUM: mx.maximum,
+    op.NAN_TO_NUM: nan_to_num,
+    op.ISIN: isin,
+    op.REMAINDER: mx.remainder,
+    op.FLOORDIV: mx.array.__floordiv__,
+    op.CONV2D: conv2d_op,
+}

yirgacheffe/backends/numpy.py ADDED Viewed

@@ -0,0 +1,110 @@
+import numpy as np
+import torch
+from .enumeration import operators as op
+array_t = np.ndarray
+float_t = np.float64
+promote = lambda a: a
+demote_array = lambda a: a
+demote_scalar = lambda a: a
+eval_op = lambda a: a
+add_op = np.ndarray.__add__
+sub_op = np.ndarray.__sub__
+mul_op = np.ndarray.__mul__
+truediv_op = np.ndarray.__truediv__
+pow_op = np.ndarray.__pow__
+eq_op = np.ndarray.__eq__
+ne_op = np.ndarray.__ne__
+lt_op = np.ndarray.__lt__
+le_op = np.ndarray.__le__
+gt_op = np.ndarray.__gt__
+ge_op = np.ndarray.__ge__
+and_op = np.ndarray.__and__
+or_op = np.ndarray.__or__
+nan_to_num = np.nan_to_num
+isin = np.isin
+log = np.log
+log2 = np.log2
+log10 = np.log10
+exp = np.exp
+exp2 = np.exp2
+clip = np.clip
+where = np.where
+min_op = np.min
+max_op = np.max
+maximum = np.maximum
+minimum = np.minimum
+zeros = np.zeros
+pad = np.pad
+sum_op = lambda a: np.sum(a.astype(np.float64))
+isscalar = np.isscalar
+full = np.full
+allclose = np.allclose
+remainder_op = np.ndarray.__mod__
+floordiv_op = np.ndarray.__floordiv__
+def conv2d_op(data, weights):
+    # torch wants to process dimensions of channels of width of height
+    # Which is why both the data and weights get nested into two arrays here,
+    # and then we have to unpack it from that nesting.
+    preped_weights = np.array([[weights]])
+    conv = torch.nn.Conv2d(1, 1, weights.shape, bias=False)
+    conv.weight = torch.nn.Parameter(torch.from_numpy(preped_weights))
+    preped_data = torch.from_numpy(np.array([[data]]))
+    res = conv(preped_data)
+    return res.detach().numpy()[0][0]
+operator_map = {
+	op.ADD: np.ndarray.__add__,
+	op.SUB: np.ndarray.__sub__,
+	op.MUL: np.ndarray.__mul__,
+	op.TRUEDIV: np.ndarray.__truediv__,
+	op.POW: np.ndarray.__pow__,
+	op.EQ: np.ndarray.__eq__,
+	op.NE: np.ndarray.__ne__,
+	op.LT: np.ndarray.__lt__,
+	op.LE: np.ndarray.__le__,
+	op.GT: np.ndarray.__gt__,
+	op.GE: np.ndarray.__ge__,
+	op.AND: np.ndarray.__and__,
+	op.OR: np.ndarray.__or__,
+	op.LOG: np.log,
+	op.LOG2: np.log2,
+	op.LOG10: np.log10,
+	op.EXP: np.exp,
+	op.EXP2: np.exp2,
+	op.CLIP: np.clip,
+	op.WHERE: np.where,
+	op.MIN: np.min,
+	op.MAX: np.max,
+	op.MINIMUM: np.minimum,
+	op.MAXIMUM: np.maximum,
+	op.NAN_TO_NUM: np.nan_to_num,
+	op.ISIN: np.isin,
+	op.REMAINDER: np.ndarray.__mod__,
+	op.FLOORDIV: np.ndarray.__floordiv__,
+	op.CONV2D: conv2d_op,
+}
+operator_str_map = {
+	op.POW: "np.ndarray.__pow__(%s, %s)",
+	op.LOG: "np.log(%s)",
+	op.LOG2: "np.log2(%s)",
+	op.LOG10: "np.log10(%s)",
+	op.EXP: "np.exp(%s)",
+	op.EXP2: "np.exp2(%s)",
+	op.CLIP: "np.clip",
+	op.WHERE: "np.where(%s, %s, %s)",
+	op.MIN: "np.min(%s)",
+	op.MAX: "np.max(%s)",
+	op.MINIMUM: "np.minimum(%s)",
+	op.MAXIMUM: "np.maximum(%s)",
+	op.NAN_TO_NUM: "np.nan_to_num(%s)",
+	op.ISIN: "np.isin(%s, %s)",
+}

yirgacheffe/constants.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ YSTEP = 512

yirgacheffe/h3layer.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # for legacy compatibility
2	+ from .layers.h3layer import H3CellLayer # pylint: disable=W0611

yirgacheffe/layers/__init__.py ADDED Viewed

@@ -0,0 +1,44 @@
+from osgeo import ogr
+from ..window import PixelScale
+from .base import YirgacheffeLayer
+from .rasters import RasterLayer, InvalidRasterBand
+from .rescaled import RescaledRasterLayer
+from .vectors import RasteredVectorLayer, VectorLayer
+from .area import UniformAreaLayer
+from .constant import ConstantLayer
+from .group import GroupLayer, TiledGroupLayer
+try:
+    from .h3layer import H3CellLayer
+except ModuleNotFoundError:
+    pass
+class Layer(RasterLayer):
+    """A place holder for now, at some point I want to replace Layer with RasterLayer."""
+class VectorRangeLayer(RasteredVectorLayer):
+    """Deprecated older name for VectorLayer"""
+    def __init__(self, range_vectors: str, where_filter: str, scale: PixelScale, projection: str):
+        vectors = ogr.Open(range_vectors)
+        if vectors is None:
+            raise FileNotFoundError(range_vectors)
+        layer = vectors.GetLayer()
+        if where_filter is not None:
+            layer.SetAttributeFilter(where_filter)
+        super().__init__(layer, scale, projection)
+class DynamicVectorRangeLayer(VectorLayer):
+    """Deprecated older name DynamicVectorLayer"""
+    def __init__(self, range_vectors: str, where_filter: str, scale: PixelScale, projection: str):
+        vectors = ogr.Open(range_vectors)
+        if vectors is None:
+            raise FileNotFoundError(range_vectors)
+        layer = vectors.GetLayer()
+        if where_filter is not None:
+            layer.SetAttributeFilter(where_filter)
+        super().__init__(layer, scale, projection)

yirgacheffe/layers/area.py ADDED Viewed

@@ -0,0 +1,91 @@
+from math import ceil, floor
+from typing import Any, Optional
+import numpy
+from osgeo import gdal
+from ..window import Area, Window
+from .rasters import RasterLayer
+class UniformAreaLayer(RasterLayer):
+    """If you have a pixel area map where all the row entries are identical, then you
+    can speed up the AoH calculations by simplifying that to a 1 pixel wide map and then
+    synthesizing the rest of the data at calc time, as decompressing the large compressed
+    TIFF files is quite slow. This class is used to load such a dataset.
+    If you have a file that is large that you'd like to shrink you can call the static method
+    generate_narrow_area_projection which will shrink the file and correct the geo info.
+    """
+    @staticmethod
+    def generate_narrow_area_projection(source_filename: str, target_filename: str) -> None:
+        source = gdal.Open(source_filename, gdal.GA_ReadOnly)
+        if source is None:
+            raise FileNotFoundError(source_filename)
+        if not UniformAreaLayer.is_uniform_area_projection(source):
+            raise ValueError("Data in area pixel map is not uniform across rows")
+        source_band = source.GetRasterBand(1)
+        target = gdal.GetDriverByName('GTiff').Create(
+            target_filename,
+            1,
+            source.RasterYSize,
+            1,
+            source_band.DataType,
+            ['COMPRESS=LZW']
+        )
+        target.SetProjection(source.GetProjection())
+        target.SetGeoTransform(source.GetGeoTransform())
+        # Although the output is 1 pixel wide, the input can be very wide, so we do this in stages
+        # otherwise gdal eats all the memory
+        step = 1000
+        target_band = target.GetRasterBand(1)
+        for yoffset in range(0, source.RasterYSize, step):
+            this_step = step
+            if (yoffset + this_step) > source.RasterYSize:
+                this_step = source.RasterYSize - yoffset
+            data = source_band.ReadAsArray(0, yoffset, 1, this_step)
+            target_band.WriteArray(data, 0, yoffset)
+    @staticmethod
+    def is_uniform_area_projection(dataset) -> bool:
+        "Check that the dataset conforms to the assumption that all rows contain the same value. Likely to be slow."
+        band = dataset.GetRasterBand(1)
+        for yoffset in range(dataset.RasterYSize):
+            row = band.ReadAsArray(0, yoffset, dataset.RasterXSize, 1)
+            if not numpy.all(numpy.isclose(row, row[0])):
+                return False
+        return True
+    def __init__(self, dataset, name: Optional[str] = None, band: int = 1):
+        if dataset.RasterXSize > 1:
+            raise ValueError("Expected a shrunk dataset")
+        self.databand = dataset.GetRasterBand(1).ReadAsArray(0, 0, 1, dataset.RasterYSize)
+        super().__init__(dataset, name, band)
+        transform = dataset.GetGeoTransform()
+        pixel_scale = self.pixel_scale
+        assert pixel_scale # from raster we should always have one
+        self._underlying_area = Area(
+            floor(-180 / pixel_scale.xstep) * pixel_scale.xstep,
+            self.area.top,
+            ceil(180 / pixel_scale.xstep) * pixel_scale.xstep,
+            self.area.bottom
+        )
+        self._active_area = self._underlying_area
+        self._window = Window(
+            xoff=0,
+            yoff=0,
+            xsize=int((self.area.right - self.area.left) / transform[1]),
+            ysize=dataset.RasterYSize,
+        )
+        self._raster_xsize = self.window.xsize
+    def read_array_with_window(self, xoffset: int, yoffset: int, xsize: int, ysize: int, window: Window) -> Any:
+        if ysize <= 0:
+            raise ValueError("Request dimensions must be positive and non-zero")
+        offset = window.yoff + yoffset
+        return self.databand[offset:offset + ysize]