Skip to content

File I/O utilities

get_output_paths_from_common_name(outputs, directory, common_name, extension, first_num=1)

Get output paths for cases where outputs should be just numbered.

Combines directory, given common file name, number and extension. Outputs are used to get the number used as suffix. Include dot in the extension, for example '.tif'.

This tool is designed mainly for convenience in CLI functions.

Parameters:

Name Type Description Default
input_paths

Outputs. Used just to iterate and get numbers for suffixes.

required
directory Path

Path of the output directory.

required
common_name str

Common name used as the basis of each output file name. A number is appended to this.

required
extension str

The extension used for the output path, for example ".tif".

required
first_num int

The first number used as a suffix.

1

Returns:

Type Description
Sequence[Path]

List of output paths.

Source code in eis_toolkit/utilities/file_io.py
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
def get_output_paths_from_common_name(
    outputs: Sequence[Any], directory: Path, common_name: str, extension: str, first_num: int = 1
) -> Sequence[Path]:
    """
    Get output paths for cases where outputs should be just numbered.

    Combines directory, given common file name, number and extension. Outputs are used
    to get the number used as suffix.
    Include dot in the extension, for example '.tif'.

    This tool is designed mainly for convenience in CLI functions.

    Args:
        input_paths: Outputs. Used just to iterate and get numbers for suffixes.
        directory: Path of the output directory.
        common_name: Common name used as the basis of each output file name. A number is appended to this.
        extension: The extension used for the output path, for example ".tif".
        first_num: The first number used as a suffix.

    Returns:
        List of output paths.
    """
    output_paths = []
    for i in range(first_num, len(outputs) + first_num):
        output_path = directory.joinpath(common_name + f"_{i}" + extension)
        output_paths.append(output_path)

        output_paths.append

    return output_paths

get_output_paths_from_inputs(input_paths, directory, suffix, extension)

Get output paths using input paths to extract file name bases.

Combines directory, file name extracted from input path, suffix and extension. Include dot in the extension, for example '.tif'.

This tool is designed mainly for convenience in CLI functions.

Parameters:

Name Type Description Default
input_paths Sequence[Path]

Input paths.

required
directory Path

Path of the output directory.

required
suffix str

Common suffix added to the end of each output file name, for example "nodata_unified".

required
extension str

The extension used for the output path, for example ".tif".

required

Returns:

Type Description
Sequence[Path]

List of output paths.

Source code in eis_toolkit/utilities/file_io.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def get_output_paths_from_inputs(
    input_paths: Sequence[Path], directory: Path, suffix: str, extension: str
) -> Sequence[Path]:
    """
    Get output paths using input paths to extract file name bases.

    Combines directory, file name extracted from input path, suffix and extension.
    Include dot in the extension, for example '.tif'.

    This tool is designed mainly for convenience in CLI functions.

    Args:
        input_paths: Input paths.
        directory: Path of the output directory.
        suffix: Common suffix added to the end of each output file name, for example "nodata_unified".
        extension: The extension used for the output path, for example ".tif".

    Returns:
        List of output paths.
    """
    output_paths = []
    for input_path in input_paths:
        input_file_name_with_extension = os.path.split(input_path)[1]
        input_file_name = os.path.splitext(input_file_name_with_extension)[0]
        output_file_name = f"{input_file_name}_{suffix}"
        output_path = directory.joinpath(output_file_name + extension)
        output_paths.append(output_path)

    return output_paths

get_output_paths_from_names(file_names, directory, suffix, extension)

Get output paths directly from given file names.

Combines directory, file name, suffix and extension. Include dot in the extension, for example '.tif'.

This tool is designed mainly for convenience in CLI functions.

Parameters:

Name Type Description Default
input_paths

Raw file names.

required
directory Path

Path of the output directory.

required
suffix str

Common suffix added to the end of each output file name, for example "nodata_unified".

required
extension str

The extension used for the output path, for example ".tif".

required

Returns:

Type Description
Sequence[Path]

List of output paths.

Source code in eis_toolkit/utilities/file_io.py
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
def get_output_paths_from_names(
    file_names: Sequence[str], directory: Path, suffix: str, extension: str
) -> Sequence[Path]:
    """
    Get output paths directly from given file names.

    Combines directory, file name, suffix and extension.
    Include dot in the extension, for example '.tif'.

    This tool is designed mainly for convenience in CLI functions.

    Args:
        input_paths: Raw file names.
        directory: Path of the output directory.
        suffix: Common suffix added to the end of each output file name, for example "nodata_unified".
        extension: The extension used for the output path, for example ".tif".

    Returns:
        List of output paths.
    """
    output_paths = []
    for name in file_names:
        output_file_name = f"{name}_{suffix}"
        output_path = directory.joinpath(output_file_name + extension)
        output_paths.append(output_path)

    return output_paths

read_and_stack_rasters(raster_files, nodata_handling='convert_to_nan')

Read multiple raster files and stack all their bands into a single 3D array.

Checks that all rasters have the same grid properties. If there are any differences, exception is raised.

Parameters:

Name Type Description Default
raster_files Sequence[Path]

List of paths to raster files.

required
nodata_handling Literal[convert_to_nan, unify, raise_exception, none]

How to handle raster nodata. convert_to_nan to changes all nodata to np.nan, unify changes all rasters to use -9999 as their nodata value, raise_exception raises an exception if all rasters do not have the same nodata value, and none does not do anything for nodata.

'convert_to_nan'

Returns:

Type Description
ndarray

3D array with shape (total bands, height, width).

Sequence[Profile]

List of raster profiles.

Raises:

Type Description
NonMatchingRasterMetadataException

If input rasters do not have same grid properties or nodata_handling is set to raise exception and mismatching nodata is encountered.

Source code in eis_toolkit/utilities/file_io.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def read_and_stack_rasters(
    raster_files: Sequence[Path],
    nodata_handling: Literal["convert_to_nan", "unify", "raise_exception", "none"] = "convert_to_nan",
) -> Tuple[np.ndarray, Sequence[rasterio.profiles.Profile]]:
    """
    Read multiple raster files and stack all their bands into a single 3D array.

    Checks that all rasters have the same grid properties. If there are any differences, exception is raised.

    Args:
        raster_files: List of paths to raster files.
        nodata_handling: How to handle raster nodata. convert_to_nan to changes all nodata to np.nan, unify
            changes all rasters to use -9999 as their nodata value, raise_exception raises an exception if
            all rasters do not have the same nodata value, and none does not do anything for nodata.

    Returns:
        3D array with shape (total bands, height, width).
        List of raster profiles.

    Raises:
        NonMatchingRasterMetadataException: If input rasters do not have same grid properties or nodata_handling
            is set to raise exception and mismatching nodata is encountered.
    """
    bands = []
    nodata_values = []
    profiles = []

    for raster_file in raster_files:
        with rasterio.open(raster_file) as raster:
            # Read all bands from each raster
            profile = raster.profile
            for i in range(1, raster.count + 1):
                band_data = raster.read(i)

                if nodata_handling == "convert_to_nan":
                    band_data[band_data == profile["nodata"]] = np.nan
                elif nodata_handling == "unify":
                    band_data[band_data == profile["nodata"]] = -9999
                    profile["nodata"] = -9999
                elif nodata_handling == "raise_exception":
                    nodata_values.append(profile["nodata"])
                    if len(set(nodata_values)) > 1:
                        raise exceptions.NonMatchingRasterMetadataException("Input rasters have varying nodata values.")
                elif nodata_handling == "none":
                    pass
                bands.append(band_data)
            profiles.append(profile)

    if not check_raster_grids(profiles, same_extent=True):
        raise exceptions.NonMatchingRasterMetadataException("Input rasters should have the same properties.")

    # Stack all bands into a single 3D array
    stacked_arrays = np.stack(bands, axis=0)
    return stacked_arrays, profiles

read_file(file_path)

Read an input file trying different readers.

First tries to read to a rasterio DatasetReader, then to a GeoDataFrame, then to a DataFrame. If none of the readers succeed, raises an exception.

Parameters:

Name Type Description Default
file_path Path

Input file path.

required

Returns:

Type Description
Union[DatasetReader, GeoDataFrame, DataFrame]

The input file data in the opened format.

Raises:

Type Description
FileReadError

None of the readers succeeded to read the input file.

Source code in eis_toolkit/utilities/file_io.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
@beartype
def read_file(file_path: Path) -> Union[rasterio.io.DatasetReader, gpd.GeoDataFrame, pd.DataFrame]:
    """Read an input file trying different readers.

    First tries to read to a rasterio DatasetReader, then to a GeoDataFrame, then to a DataFrame.
    If none of the readers succeed, raises an exception.

    Args:
        file_path: Input file path.

    Returns:
        The input file data in the opened format.

    Raises:
        FileReadError:None of the readers succeeded to read the input file.
    """

    # Try to read as a raster first
    try:
        data = read_raster(file_path)
    except exceptions.FileReadError:

        # Try to read as a GeoDataFrame
        try:
            data = gpd.read_file(file_path)
        except (ValueError, OSError):

            # Try to read as a DataFrame
            try:
                data = pd.read_csv(file_path)
            except pd.errors.ParserError:
                # If none of the readers succeeded, raise an exception
                raise exceptions.FileReadError(f"Failed to file {file_path} as raster, geodataframe or dataframe")

    return data

read_raster(file_path)

Read a raster file to a rasterio DatasetReader.

Parameters:

Name Type Description Default
file_path Path

Input file path.

required

Returns:

Type Description
DatasetReader

File data as a Rasterio DatasetReader.

Raises:

Type Description
FileReadError

Rasterio failed to open the input file.

Source code in eis_toolkit/utilities/file_io.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
@beartype
def read_raster(file_path: Path) -> rasterio.io.DatasetReader:
    """Read a raster file to a rasterio DatasetReader.

    Args:
        file_path: Input file path.

    Returns:
        File data as a Rasterio DatasetReader.

    Raises:
        FileReadError: Rasterio failed to open the input file.
    """
    try:
        data = rasterio.open(file_path)
    except rasterio.errors.RasterioIOError:
        raise exceptions.FileReadError(f"Failed to read raster data from {file_path}.")
    return data

read_tabular(file_path)

Read tabular data to a DataFrame.

Parameters:

Name Type Description Default
file_path Path

Input file path.

required

Returns:

Type Description
DataFrame

File data as a DataFrame.

Raises:

Type Description
FileReadError

Pandas failed to open the input file.

Source code in eis_toolkit/utilities/file_io.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
@beartype
def read_tabular(file_path: Path) -> pd.DataFrame:
    """Read tabular data to a DataFrame.

    Args:
        file_path: Input file path.

    Returns:
        File data as a DataFrame.

    Raises:
        FileReadError: Pandas failed to open the input file.
    """
    try:
        data = pd.read_csv(file_path)
    except pd.errors.ParserError:
        raise exceptions.FileReadError(f"Failed to read tabular data from {file_path}.")
    return data

read_vector(file_path)

Read a vector file to a GeoDataFrame.

Parameters:

Name Type Description Default
file_path Path

Input file path.

required

Returns:

Type Description
GeoDataFrame

File data as a GeoDataFrame.

Raises:

Type Description
FileReadError

Geopandas failed to read the input file.

Source code in eis_toolkit/utilities/file_io.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
@beartype
def read_vector(file_path: Path) -> gpd.GeoDataFrame:
    """Read a vector file to a GeoDataFrame.

    Args:
        file_path: Input file path.

    Returns:
        File data as a GeoDataFrame.

    Raises:
        FileReadError: Geopandas failed to read the input file.
    """
    try:
        data = gpd.read_file(file_path)
    except (ValueError, OSError):
        raise exceptions.FileReadError(f"Failed to read vector data from {file_path}.")
    return data