Skip to content

Linear

min_max_scaling(raster, bands=None, new_range=[(0, 1)], nodata=None)

Normalize data based on a specified new range.

Uses the provided new minimum and maximum to transform data into the new interval. Takes one nodata value that will be ignored in calculations.

If no band/column selection specified, all bands/columns will be used. The new_range can be set for each band individually. If a parameter contains only 1 entry, it will be applied for all bands.

Parameters:

Name Type Description Default
raster DatasetReader

Data object to be transformed.

required
bands Optional[Sequence[int]]

Selection of bands to be transformed.

None
new_range Sequence[Tuple[Number, Number]]

The new interval data will be transformed into. First value corresponds to min, second to max.

[(0, 1)]
nodata Optional[Number]

Nodata value to be considered.

None

Returns:

Name Type Description
out_array ndarray

The transformed data.

out_meta dict

Updated metadata.

out_settings dict

Log of input settings and calculated statistics if available.

Raises:

Type Description
InvalidRasterBandException

The input contains invalid band numbers.

NonMatchingParameterLengthsException

The input does not match the number of selected bands.

InvalidParameterValueException

The input does not match the requirements (values, order of values).

Source code in eis_toolkit/transformations/linear.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
@beartype
def min_max_scaling(  # type: ignore[no-any-unimported]
    raster: rasterio.io.DatasetReader,
    bands: Optional[Sequence[int]] = None,
    new_range: Sequence[Tuple[Number, Number]] = [(0, 1)],
    nodata: Optional[Number] = None,
) -> Tuple[np.ndarray, dict, dict]:
    """
    Normalize data based on a specified new range.

    Uses the provided new minimum and maximum to transform data into the new interval.
    Takes one nodata value that will be ignored in calculations.

    If no band/column selection specified, all bands/columns will be used.
    The new_range can be set for each band individually.
    If a parameter contains only 1 entry, it will be applied for all bands.

    Args:
        raster: Data object to be transformed.
        bands: Selection of bands to be transformed.
        new_range: The new interval data will be transformed into. First value corresponds to min, second to max.
        nodata: Nodata value to be considered.

    Returns:
        out_array: The transformed data.
        out_meta: Updated metadata.
        out_settings: Log of input settings and calculated statistics if available.

    Raises:
        InvalidRasterBandException: The input contains invalid band numbers.
        NonMatchingParameterLengthsException: The input does not match the number of selected bands.
        InvalidParameterValueException: The input does not match the requirements (values, order of values).
    """
    bands = list(range(1, raster.count + 1)) if bands is None else bands
    nodata = raster.nodata if nodata is None else nodata

    if check_raster_bands(raster, bands) is False:
        raise InvalidRasterBandException("Invalid band selection")

    if check_parameter_length(bands, new_range) is False:
        raise NonMatchingParameterLengthsException("Invalid new_range length")

    for item in new_range:
        if not check_minmax_position(item):
            raise InvalidParameterValueException(f"Invalid min-max values provided: {item}")

    expanded_args = expand_and_zip(bands, new_range)
    new_range = [element[1] for element in expanded_args]

    out_settings = {}
    out_decimals = set_max_precision()

    for i in range(0, len(bands)):
        band_array = raster.read(bands[i])
        band_array = cast_array_to_float(band_array, cast_int=True)
        band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)

        band_array = _min_max_scaling(band_array.astype(np.float64), new_range=new_range[i])

        band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)
        band_array = nan_to_nodata(band_array, nodata_value=nodata)
        band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)

        band_array = np.expand_dims(band_array, axis=0)

        if i == 0:
            out_array = band_array.copy()
        else:
            out_array = np.vstack((out_array, band_array))

        current_transform = f"transformation {i + 1}"
        current_settings = {
            "band_origin": bands[i],
            "scaled_min": new_range[i][0],
            "scaled_max": new_range[i][1],
            "nodata": nodata,
            "decimal_places": out_decimals,
        }

        out_settings[current_transform] = current_settings

    out_meta = raster.meta.copy()
    out_meta.update({"count": len(bands), "nodata": nodata, "dtype": out_array.dtype.name})

    return out_array, out_meta, out_settings

z_score_normalization(raster, bands=None, nodata=None)

Normalize data based on mean and standard deviation.

Results will have a mean = 0 and standard deviation = 1. Takes one nodata value that will be ignored in calculations.

If no band/column selection specified, all bands/columns will be used. If a parameter contains only 1 entry, it will be applied for all bands.

Parameters:

Name Type Description Default
raster DatasetReader

Data object to be transformed.

required
bands Optional[Sequence[int]]

Selection of bands to be transformed.

None
nodata Optional[Number]

Nodata value to be considered.

None

Returns:

Name Type Description
out_array ndarray

The transformed data.

out_meta dict

Updated metadata.

out_settings dict

Log of input settings and calculated statistics if available.

Raises:

Type Description
InvalidRasterBandException

The input contains invalid band numbers.

NonMatchingParameterLengthsException

The input does not match the number of selected bands.

Source code in eis_toolkit/transformations/linear.py
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
@beartype
def z_score_normalization(  # type: ignore[no-any-unimported]
    raster: rasterio.io.DatasetReader,
    bands: Optional[Sequence[int]] = None,
    nodata: Optional[Number] = None,
) -> Tuple[np.ndarray, dict, dict]:
    """
    Normalize data based on mean and standard deviation.

    Results will have a mean = 0 and standard deviation = 1.
    Takes one nodata value that will be ignored in calculations.

    If no band/column selection specified, all bands/columns will be used.
    If a parameter contains only 1 entry, it will be applied for all bands.

    Args:
        raster: Data object to be transformed.
        bands: Selection of bands to be transformed.
        nodata: Nodata value to be considered.

    Returns:
        out_array: The transformed data.
        out_meta: Updated metadata.
        out_settings: Log of input settings and calculated statistics if available.

    Raises:
        InvalidRasterBandException: The input contains invalid band numbers.
        NonMatchingParameterLengthsException: The input does not match the number of selected bands.
    """
    bands = list(range(1, raster.count + 1)) if bands is None else bands
    nodata = raster.nodata if nodata is None else nodata

    if check_raster_bands(raster, bands) is False:
        raise InvalidRasterBandException("Invalid band selection.")

    out_settings = {}
    out_decimals = set_max_precision()

    for i in range(0, len(bands)):
        band_array = raster.read(bands[i])
        band_array = cast_array_to_float(band_array, cast_int=True)
        band_array = replace_values(band_array, values_to_replace=[nodata, np.inf], replace_value=np.nan)

        band_array, mean_array, sd_array = _z_score_normalization(band_array.astype(np.float64))

        band_array = truncate_decimal_places(band_array, decimal_places=out_decimals)
        band_array = nan_to_nodata(band_array, nodata_value=nodata)
        band_array = cast_array_to_float(band_array, scalar=nodata, cast_float=True)

        band_array = np.expand_dims(band_array, axis=0)

        if i == 0:
            out_array = band_array.copy()
        else:
            out_array = np.vstack((out_array, band_array))

        current_transform = f"transformation {i + 1}"
        current_settings = {
            "band_origin": bands[i],
            "original_mean": truncate_decimal_places(mean_array, decimal_places=out_decimals),
            "original_sd": truncate_decimal_places(sd_array, decimal_places=out_decimals),
            "nodata": nodata,
            "decimal_places": out_decimals,
        }

        out_settings[current_transform] = current_settings

    out_meta = raster.meta.copy()
    out_meta.update({"count": len(bands), "nodata": nodata, "dtype": out_array.dtype.name})

    return out_array, out_meta, out_settings