Skip to content

Binarize

binarize(raster, thresholds, bands=None, nodata=None)

Binarize data based on a given threshold.

Replaces values less or equal threshold with 0. Replaces values greater than the threshold with 1.

Takes one nodata value which will be re-written after transformation.

If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands. The threshold can be set for each band individually.

Parameters:

Name Type Description Default
raster DatasetReader

Data object to be transformed.

required
bands Optional[Sequence[int]]

Selection of bands to be transformed.

None
thresholds Sequence[Number]

Threshold values for transformation.

required
nodata Optional[Number]

Nodata value to be considered.

None

Returns:

Name Type Description
out_array ndarray

The transformed data.

out_meta dict

Updated metadata.

out_settings dict

Log of input settings and calculated statistics if available.

Raises:

Type Description
InvalidRasterBandException

The input contains invalid band numbers.

NonMatchingParameterLengthsException

The input does not match the number of selected bands.

Source code in eis_toolkit/transformations/binarize.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
@beartype
def binarize(  # type: ignore[no-any-unimported]
    raster: rasterio.io.DatasetReader,
    thresholds: Sequence[Number],
    bands: Optional[Sequence[int]] = None,
    nodata: Optional[Number] = None,
) -> Tuple[np.ndarray, dict, dict]:
    """
    Binarize data based on a given threshold.

    Replaces values less or equal threshold with 0.
    Replaces values greater than the threshold with 1.

    Takes one nodata value which will be re-written after transformation.

    If no band/column selection specified, all bands/columns will be used.
    If a parameter contains only 1 entry, it will be applied for all bands.
    The threshold can be set for each band individually.

    Args:
        raster: Data object to be transformed.
        bands: Selection of bands to be transformed.
        thresholds: Threshold values for transformation.
        nodata: Nodata value to be considered.

    Returns:
        out_array: The transformed data.
        out_meta: Updated metadata.
        out_settings: Log of input settings and calculated statistics if available.

    Raises:
        InvalidRasterBandException: The input contains invalid band numbers.
        NonMatchingParameterLengthsException: The input does not match the number of selected bands.
    """
    bands = list(range(1, raster.count + 1)) if bands is None else bands
    nodata = cast_scalar_to_int(raster.nodata if nodata is None else nodata)

    if check_raster_bands(raster, bands) is False:
        raise InvalidRasterBandException("Invalid band selection.")

    if check_parameter_length(bands, thresholds) is False:
        raise NonMatchingParameterLengthsException("Invalid threshold length.")

    expanded_args = expand_and_zip(bands, thresholds)
    thresholds = [element[1] for element in expanded_args]

    out_settings = {}

    for i in range(0, len(bands)):
        band_array = raster.read(bands[i])
        inital_dtype = band_array.dtype

        band_mask = np.isin(band_array, nodata)
        band_array = _binarize(band_array, threshold=thresholds[i])
        band_array = np.where(band_mask, nodata, band_array)

        if not check_dtype_for_int(nodata):
            band_array = band_array.astype(inital_dtype)
        else:
            band_array = band_array.astype(np.min_scalar_type(nodata))

        band_array = np.expand_dims(band_array, axis=0)

        if i == 0:
            out_array = band_array.copy()
        else:
            out_array = np.vstack((out_array, band_array))

        current_transform = f"transformation {i + 1}"
        current_settings = {
            "band_origin": bands[i],
            "threshold": thresholds[i],
            "nodata": nodata,
        }

        out_settings[current_transform] = current_settings

    out_meta = raster.meta.copy()
    out_meta.update({"count": len(bands), "nodata": nodata, "dtype": out_array.dtype.name})

    return out_array, out_meta, out_settings