Source code for phasik.classes.TemporalData

"""
Base class for time series data
"""

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.signal

__all__ = ["TemporalData"]

comparators = {"minima": np.less, "maxima": np.greater}


[docs]class TemporalData: """Class representing any sort of temporal data for a specified list of variables Since teneto's TemporalNetwork class requires all times to start at zero, we again have the concept of 'true' times as well as times offset to start at zero. """ def __init__(self, temporal_data, variables, times, true_times): """ Parameters ---------- temporal_data : ndarray 2D array whose columns are the variables we're interested in and whose rows are the temporal data for these variables variables : list of str List (not a numpy array) of variable names times : ndarray 1D array based on true_times but with values shifted to start at zero true_times ndarray 1D array of time points """ self.temporal_data = temporal_data self.variables = variables self.times = times self.true_times = true_times
[docs] @classmethod def from_dict(class_, dict_, times, true_times, i_tend=None): """ Create a TemporalData from a dict with variables as keys and time series as values Parameters ---------- dict_ : dict Dictionary containing the temporal data. Keys are variables names and values are the associated time series. times : ndarray 1D array based on true_times but with values shifted to start at zero true_times ndarray 1D array of time points i_tend : int, optional Index after which to cut the time series. Default: None (not cut). Returns ------- TemporalData """ variables = list(dict_.keys()) temporal_data = np.array(list(dict_.values())) if i_tend is not None: temporal_data = temporal_data[:, 0:i_tend] times = times[0:i_tend] true_times = true_times[0:i_tend] return class_(temporal_data.T, variables, times, true_times)
[docs] @classmethod def from_df(class_, df, times, true_times, i_tend=None): """ Create a TemporalData from a DataFrame with variables as columns and timepoints as rows Parameters ---------- df : pandas DataFrame DataFrame containing the temporal data. Indices correspond to time points and columns to variables. times : ndarray 1D array based on true_times but with values shifted to start at zero true_times ndarray 1D array of time points i_tend : int, optional Index after which to cut the time series. Default: None (not cut). Returns ------- TemporalData """ variables = df.columns.tolist() temporal_data = df.to_numpy() if i_tend is not None: temporal_data = temporal_data[0:i_tend, :] times = times[0:i_tend] true_times = true_times[0:i_tend] return class_(temporal_data, variables, times, true_times)
[docs] def downsample(self, skip): """ Return a downsampled copy of the TemporalData object, by skipping ever 'skip' elements Parameters ---------- skip : int, optional How many times to skip: 1 out of `skip` is kept. Returns ------- TemporalData """ return TemporalData( self.temporal_data[::skip, :], self.variables, self.times[::skip], self.true_times[::skip], )
[docs] def series(self, variable): """ Get temporal data for a particular variable Parameters ---------- variable : str The name of a variable for which there is temporal data. Returns ------- np.ndarray Time series """ return self.temporal_data[:, self.variables.index(variable)]
[docs] def to_dict(self): """Convert temporal data to dict format, with variables as keys and time series as values""" return {variable: self.series(variable) for variable in self.variables}
[docs] def to_df(self): """Convert temporal data to pandas.DataFrame format, with variables as rows and time points as columns""" # was return pd.DataFrame.from_dict(self.to_dict()).transpose()
# but time index was range() by default, not working for downsampling # now, indices set as times (maybe need to change to time indices in future) # return pd.DataFrame(self.temporal_data.T, index=self.variables, columns=self.times)
[docs] def save_to_csv(self, filepath, index=False): """ Save temporal data to .csv file, with variables as rows and time points as columns. Parameters ---------- filepath : str Path at which to save the file. index : bool, optional Whether to save the index. Default: False. Returns ------- None """ df = self.to_df().transpose() df.to_csv(filepath, index=index)
[docs] def normalise(self): """Return new TemporalData where each time series is normalised by its maximum""" temporal_data_normalised = self.temporal_data / np.max( self.temporal_data, axis=0 ) return TemporalData( temporal_data_normalised, self.variables, self.times, self.true_times, )
[docs] def relative_optima(self, variable, optima_type): """Get relative optima for a particular variable Parameters ---------- variable : str Name of the variable whose optima we want optima_type : {'minima', 'maxima'} Type of optima to look for Returns ------- optima : list_like Value of the variable at its optima optima_times : list_like Times at which these optima occur """ series = self.series(variable) optima_times = scipy.signal.argrelextrema(series, comparators[optima_type]) optima = series[optima_times] return optima, optima_times
[docs] def plot_relative_optima( self, variable, optima_type, ax=None, use_true_times=True, plot_var=True, ): """Plot relative optima for a prticular variable Parameters ---------- variable : str The name of the variable whose optima we want to plot optima_type : {'minima', 'maxima'} Wether to look for minima or maxima ax : matplotlib.Axes, optional Axes on which to plot use_true_times : bool, optional Whether to use the 'actual' times or offset the times so that they start at zero plot_var : bool, optional Wether to plot the times series associated to the variable. Returns ------- None """ if ax is None: ax = plt.gca() times = self.true_times if use_true_times else self.times if plot_var: ax.plot(times, self.series(variable), "o-") mass_minima, mass_minima_times = self.relative_optima(variable, optima_type) ax.plot(times[mass_minima_times], mass_minima, "ro")
[docs] def plot_series( self, variables=None, ax=None, norm=False, add_labels=True, labels_xvals=None, use_true_times=True, ): """Plot particular variables' values over time Parameters ---------- variables : list of str Names of the variable whose values we want to plot ax : matplotlib.Axes, optional Axes on which to plot norm : bool, optional Whether or not to normalise the time series by dividing through by the max (default False) add_labels : bool, optional Whether to label the variables when plotting (default True) labels_xvals : list of float, optional The positions along the x-axis at which to place the variables' labels (if using). If set to None (default), labels will be placed at regular intervals along x-axis. use_true_times : bool, optional Whether to use the 'actual' times or offset the times so that they start at zero (default True) Returns ------- None """ if ax is None: ax = plt.gca() if variables is None: variables = self.variables times = self.true_times if use_true_times else self.times for variable in variables: y = normed(self.series(variable)) if norm else self.series(variable) ax.plot(times, y, label=variable) if add_labels: if not labels_xvals: # Add evenly-spaced labels labels_interval = len(times) // (len(variables) + 1) labels_xvals = [ times[labels_interval * (i + 1)] for i in range(len(variables)) ]
def normed(x): return x / np.max(x)