Skip to content

Plot parallel coordinates

plot_parallel_coordinates(df, color_column_name, plot_title=None, palette_name=None, curved_lines=True)

Plot a parallel coordinates plot.

Automatically removes all rows containing null/nan values. Tries to convert columns to numeric to be able to plot them. If more than 8 columns are present (after numeric filtering), keeps only the first 8 to plot.

Parameters:

Name Type Description Default
df DataFrame

The DataFrame to plot.

required
color_column_name str

The name of the column in df to use for color encoding.

required
plot_title Optional[str]

The title for the plot. Default is None.

None
palette_name Optional[str]

The name of the color palette to use. Default is None.

None
curved_lines bool

If True, the plot will have curved instead of straight lines. Default is True.

True

Returns:

Type Description
Figure

A matplotlib figure containing the parallel coordinates plot.

Raises:

Type Description
EmptyDataFrameException

Raised when the DataFrame is empty.

InvalidColumnException

Raised when the color column is not found in the DataFrame.

InconsistentDataTypesException

Raised when the color column has multiple data types.

Source code in eis_toolkit/exploratory_analyses/parallel_coordinates.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
@beartype
def plot_parallel_coordinates(
    df: pd.DataFrame,
    color_column_name: str,
    plot_title: Optional[str] = None,
    palette_name: Optional[str] = None,
    curved_lines: bool = True,
) -> matplotlib.figure.Figure:
    """Plot a parallel coordinates plot.

    Automatically removes all rows containing null/nan values. Tries to convert columns to numeric
    to be able to plot them. If more than 8 columns are present (after numeric filtering), keeps only
    the first 8 to plot.

    Args:
        df: The DataFrame to plot.
        color_column_name: The name of the column in df to use for color encoding.
        plot_title: The title for the plot. Default is None.
        palette_name: The name of the color palette to use. Default is None.
        curved_lines: If True, the plot will have curved instead of straight lines. Default is True.

    Returns:
        A matplotlib figure containing the parallel coordinates plot.

    Raises:
        EmptyDataFrameException: Raised when the DataFrame is empty.
        InvalidColumnException: Raised when the color column is not found in the DataFrame.
        InconsistentDataTypesException: Raised when the color column has multiple data types.
    """

    if df.empty:
        raise EmptyDataFrameException("The input DataFrame is empty.")

    if color_column_name not in df.columns:
        raise InvalidColumnException(f"The provided color column {color_column_name} is not found in the DataFrame.")

    df = df.convert_dtypes()
    df = df.apply(pd.to_numeric, errors="ignore")

    color_data = df[color_column_name].to_numpy()
    if len(set([type(elem) for elem in color_data])) != 1:
        raise InconsistentDataTypesException(
            "The color column should have a consistent datatype. Multiple data types detected in the color column."
        )

    df = df.select_dtypes(include=np.number)

    # Drop non-numeric columns and the column used for coloring
    columns_to_drop = [color_column_name]
    for column in df.columns.values:
        if df[column].isnull().all():
            columns_to_drop.append(column)
    df = df.loc[:, ~df.columns.isin(columns_to_drop)]

    # Keep only first 8 columns if more are still present
    if len(df.columns.values) > 8:
        df = df.iloc[:, :8]

    data_labels = df.columns.values
    data = df.to_numpy()

    fig = _plot_parallel_coordinates(
        data=data,
        data_labels=data_labels,
        color_data=color_data,
        color_column_name=color_column_name,
        plot_title=plot_title,
        palette_name=palette_name,
        curved_lines=curved_lines,
    )
    return fig