Source code for phasik.classes.TemporalData

"""
Base class for time series data
"""

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.signal

__all__ = ["TemporalData"]

comparators = {"minima": np.less, "maxima": np.greater}


[docs]class TemporalData:
    """Class representing any sort of temporal data for a specified list of variables

    Since teneto's TemporalNetwork class requires all times to start at zero, we again have the concept of 'true' times
    as well as times offset to start at zero.
    """

    def __init__(self, temporal_data, variables, times, true_times):
        """
        Parameters
        ----------
        temporal_data : ndarray
            2D array whose columns are the variables we're interested in and whose rows are the
            temporal data for these variables
        variables : list of str
            List (not a numpy array) of variable names
        times : ndarray
            1D array based on true_times but with values shifted to start at zero
        true_times ndarray
             1D array of time points
        """

        self.temporal_data = temporal_data
        self.variables = variables
        self.times = times
        self.true_times = true_times

[docs]    @classmethod
    def from_dict(class_, dict_, times, true_times, i_tend=None):
        """
        Create a TemporalData from a dict with variables as keys and time series as values

        Parameters
        ----------
        dict_ : dict
            Dictionary containing the temporal data. Keys are variables names and values
            are the associated time series.
        times : ndarray
            1D array based on true_times but with values shifted to start at zero
        true_times ndarray
             1D array of time points
        i_tend : int, optional
            Index after which to cut the time series. Default: None (not cut).

        Returns
        -------
        TemporalData
        """

        variables = list(dict_.keys())
        temporal_data = np.array(list(dict_.values()))
        if i_tend is not None:
            temporal_data = temporal_data[:, 0:i_tend]
            times = times[0:i_tend]
            true_times = true_times[0:i_tend]
        return class_(temporal_data.T, variables, times, true_times)

[docs]    @classmethod
    def from_df(class_, df, times, true_times, i_tend=None):
        """
        Create a TemporalData from a DataFrame with variables as columns and timepoints as rows

        Parameters
        ----------
        df : pandas DataFrame
            DataFrame containing the temporal data. Indices correspond to time points and
            columns to variables.
        times : ndarray
            1D array based on true_times but with values shifted to start at zero
        true_times ndarray
             1D array of time points
        i_tend : int, optional
            Index after which to cut the time series. Default: None (not cut).

        Returns
        -------
        TemporalData
        """

        variables = df.columns.tolist()
        temporal_data = df.to_numpy()
        if i_tend is not None:
            temporal_data = temporal_data[0:i_tend, :]
            times = times[0:i_tend]
            true_times = true_times[0:i_tend]
        return class_(temporal_data, variables, times, true_times)

[docs]    def downsample(self, skip):
        """
        Return a downsampled copy of the TemporalData object, by skipping ever 'skip' elements

        Parameters
        ----------
        skip : int, optional
            How many times to skip: 1 out of `skip` is kept.

        Returns
        -------
        TemporalData
        """
        return TemporalData(
            self.temporal_data[::skip, :],
            self.variables,
            self.times[::skip],
            self.true_times[::skip],
        )

[docs]    def series(self, variable):
        """
        Get temporal data for a particular variable

        Parameters
        ----------
        variable : str
            The name of a variable for which there is temporal data.

        Returns
        -------
        np.ndarray
            Time series
        """
        return self.temporal_data[:, self.variables.index(variable)]

[docs]    def to_dict(self):
        """Convert temporal data to dict format, with variables as keys and time series as values"""
        return {variable: self.series(variable) for variable in self.variables}

[docs]    def to_df(self):
        """Convert temporal data to pandas.DataFrame format, with variables as rows and time points as columns"""
        # was
        return pd.DataFrame.from_dict(self.to_dict()).transpose()
        # but time index was range() by default, not working for downsampling
        # now, indices set as times (maybe need to change to time indices in future)
        # return pd.DataFrame(self.temporal_data.T, index=self.variables, columns=self.times)

[docs]    def save_to_csv(self, filepath, index=False):
        """
        Save temporal data to .csv file, with variables as rows and time points as columns.

        Parameters
        ----------
        filepath : str
            Path at which to save the file.
        index : bool, optional
            Whether to save the index. Default: False.

        Returns
        -------
        None
        """
        df = self.to_df().transpose()
        df.to_csv(filepath, index=index)

[docs]    def normalise(self):
        """Return new TemporalData where each time series is normalised by its maximum"""

        temporal_data_normalised = self.temporal_data / np.max(
            self.temporal_data, axis=0
        )

        return TemporalData(
            temporal_data_normalised,
            self.variables,
            self.times,
            self.true_times,
        )

[docs]    def relative_optima(self, variable, optima_type):
        """Get relative optima for a particular variable

        Parameters
        ----------
        variable : str
            Name of the variable whose optima we want
        optima_type : {'minima', 'maxima'}
            Type of optima to look for

        Returns
        -------
        optima : list_like
            Value of the variable at its optima
        optima_times : list_like
            Times at which these optima occur
        """

        series = self.series(variable)
        optima_times = scipy.signal.argrelextrema(series, comparators[optima_type])
        optima = series[optima_times]
        return optima, optima_times

[docs]    def plot_relative_optima(
        self,
        variable,
        optima_type,
        ax=None,
        use_true_times=True,
        plot_var=True,
    ):
        """Plot relative optima for a prticular variable

        Parameters
        ----------
        variable : str
            The name of the variable whose optima we want to plot
        optima_type : {'minima', 'maxima'}
            Wether to look for minima or maxima
        ax : matplotlib.Axes, optional
            Axes on which to plot
        use_true_times : bool, optional
            Whether to use the 'actual' times or offset the times so that they start at zero
        plot_var : bool, optional
            Wether to plot the times series associated to the variable.

        Returns
        -------
        None
        """

        if ax is None:
            ax = plt.gca()

        times = self.true_times if use_true_times else self.times
        if plot_var:
            ax.plot(times, self.series(variable), "o-")
        mass_minima, mass_minima_times = self.relative_optima(variable, optima_type)
        ax.plot(times[mass_minima_times], mass_minima, "ro")

[docs]    def plot_series(
        self,
        variables=None,
        ax=None,
        norm=False,
        add_labels=True,
        labels_xvals=None,
        use_true_times=True,
    ):
        """Plot particular variables' values over time

        Parameters
        ----------
        variables : list of str
            Names of the variable whose values we want to plot
        ax : matplotlib.Axes, optional
            Axes on which to plot
        norm : bool, optional
            Whether or not to normalise the time series by dividing through by the max (default False)
        add_labels : bool, optional
            Whether to label the variables when plotting (default True)
        labels_xvals : list of float, optional
            The positions along the x-axis at which to place the variables' labels (if using). If set to
            None (default), labels will be placed at regular intervals along x-axis.
        use_true_times : bool, optional
            Whether to use the 'actual' times or offset the times so that they start at zero (default True)

        Returns
        -------
        None
        """

        if ax is None:
            ax = plt.gca()
        if variables is None:
            variables = self.variables
        times = self.true_times if use_true_times else self.times

        for variable in variables:
            y = normed(self.series(variable)) if norm else self.series(variable)
            ax.plot(times, y, label=variable)

        if add_labels:
            if not labels_xvals:
                # Add evenly-spaced labels
                labels_interval = len(times) // (len(variables) + 1)
                labels_xvals = [
                    times[labels_interval * (i + 1)] for i in range(len(variables))
                ]


def normed(x):
    return x / np.max(x)