"""
Base class for time series data
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.signal
__all__ = ["TemporalData"]
comparators = {"minima": np.less, "maxima": np.greater}
[docs]class TemporalData:
"""Class representing any sort of temporal data for a specified list of variables
Since teneto's TemporalNetwork class requires all times to start at zero, we again have the concept of 'true' times
as well as times offset to start at zero.
"""
def __init__(self, temporal_data, variables, times, true_times):
"""
Parameters
----------
temporal_data : ndarray
2D array whose columns are the variables we're interested in and whose rows are the
temporal data for these variables
variables : list of str
List (not a numpy array) of variable names
times : ndarray
1D array based on true_times but with values shifted to start at zero
true_times ndarray
1D array of time points
"""
self.temporal_data = temporal_data
self.variables = variables
self.times = times
self.true_times = true_times
[docs] @classmethod
def from_dict(class_, dict_, times, true_times, i_tend=None):
"""
Create a TemporalData from a dict with variables as keys and time series as values
Parameters
----------
dict_ : dict
Dictionary containing the temporal data. Keys are variables names and values
are the associated time series.
times : ndarray
1D array based on true_times but with values shifted to start at zero
true_times ndarray
1D array of time points
i_tend : int, optional
Index after which to cut the time series. Default: None (not cut).
Returns
-------
TemporalData
"""
variables = list(dict_.keys())
temporal_data = np.array(list(dict_.values()))
if i_tend is not None:
temporal_data = temporal_data[:, 0:i_tend]
times = times[0:i_tend]
true_times = true_times[0:i_tend]
return class_(temporal_data.T, variables, times, true_times)
[docs] @classmethod
def from_df(class_, df, times, true_times, i_tend=None):
"""
Create a TemporalData from a DataFrame with variables as columns and timepoints as rows
Parameters
----------
df : pandas DataFrame
DataFrame containing the temporal data. Indices correspond to time points and
columns to variables.
times : ndarray
1D array based on true_times but with values shifted to start at zero
true_times ndarray
1D array of time points
i_tend : int, optional
Index after which to cut the time series. Default: None (not cut).
Returns
-------
TemporalData
"""
variables = df.columns.tolist()
temporal_data = df.to_numpy()
if i_tend is not None:
temporal_data = temporal_data[0:i_tend, :]
times = times[0:i_tend]
true_times = true_times[0:i_tend]
return class_(temporal_data, variables, times, true_times)
[docs] def downsample(self, skip):
"""
Return a downsampled copy of the TemporalData object, by skipping ever 'skip' elements
Parameters
----------
skip : int, optional
How many times to skip: 1 out of `skip` is kept.
Returns
-------
TemporalData
"""
return TemporalData(
self.temporal_data[::skip, :],
self.variables,
self.times[::skip],
self.true_times[::skip],
)
[docs] def series(self, variable):
"""
Get temporal data for a particular variable
Parameters
----------
variable : str
The name of a variable for which there is temporal data.
Returns
-------
np.ndarray
Time series
"""
return self.temporal_data[:, self.variables.index(variable)]
[docs] def to_dict(self):
"""Convert temporal data to dict format, with variables as keys and time series as values"""
return {variable: self.series(variable) for variable in self.variables}
[docs] def to_df(self):
"""Convert temporal data to pandas.DataFrame format, with variables as rows and time points as columns"""
# was
return pd.DataFrame.from_dict(self.to_dict()).transpose()
# but time index was range() by default, not working for downsampling
# now, indices set as times (maybe need to change to time indices in future)
# return pd.DataFrame(self.temporal_data.T, index=self.variables, columns=self.times)
[docs] def save_to_csv(self, filepath, index=False):
"""
Save temporal data to .csv file, with variables as rows and time points as columns.
Parameters
----------
filepath : str
Path at which to save the file.
index : bool, optional
Whether to save the index. Default: False.
Returns
-------
None
"""
df = self.to_df().transpose()
df.to_csv(filepath, index=index)
[docs] def normalise(self):
"""Return new TemporalData where each time series is normalised by its maximum"""
temporal_data_normalised = self.temporal_data / np.max(
self.temporal_data, axis=0
)
return TemporalData(
temporal_data_normalised,
self.variables,
self.times,
self.true_times,
)
[docs] def relative_optima(self, variable, optima_type):
"""Get relative optima for a particular variable
Parameters
----------
variable : str
Name of the variable whose optima we want
optima_type : {'minima', 'maxima'}
Type of optima to look for
Returns
-------
optima : list_like
Value of the variable at its optima
optima_times : list_like
Times at which these optima occur
"""
series = self.series(variable)
optima_times = scipy.signal.argrelextrema(series, comparators[optima_type])
optima = series[optima_times]
return optima, optima_times
[docs] def plot_relative_optima(
self,
variable,
optima_type,
ax=None,
use_true_times=True,
plot_var=True,
):
"""Plot relative optima for a prticular variable
Parameters
----------
variable : str
The name of the variable whose optima we want to plot
optima_type : {'minima', 'maxima'}
Wether to look for minima or maxima
ax : matplotlib.Axes, optional
Axes on which to plot
use_true_times : bool, optional
Whether to use the 'actual' times or offset the times so that they start at zero
plot_var : bool, optional
Wether to plot the times series associated to the variable.
Returns
-------
None
"""
if ax is None:
ax = plt.gca()
times = self.true_times if use_true_times else self.times
if plot_var:
ax.plot(times, self.series(variable), "o-")
mass_minima, mass_minima_times = self.relative_optima(variable, optima_type)
ax.plot(times[mass_minima_times], mass_minima, "ro")
[docs] def plot_series(
self,
variables=None,
ax=None,
norm=False,
add_labels=True,
labels_xvals=None,
use_true_times=True,
):
"""Plot particular variables' values over time
Parameters
----------
variables : list of str
Names of the variable whose values we want to plot
ax : matplotlib.Axes, optional
Axes on which to plot
norm : bool, optional
Whether or not to normalise the time series by dividing through by the max (default False)
add_labels : bool, optional
Whether to label the variables when plotting (default True)
labels_xvals : list of float, optional
The positions along the x-axis at which to place the variables' labels (if using). If set to
None (default), labels will be placed at regular intervals along x-axis.
use_true_times : bool, optional
Whether to use the 'actual' times or offset the times so that they start at zero (default True)
Returns
-------
None
"""
if ax is None:
ax = plt.gca()
if variables is None:
variables = self.variables
times = self.true_times if use_true_times else self.times
for variable in variables:
y = normed(self.series(variable)) if norm else self.series(variable)
ax.plot(times, y, label=variable)
if add_labels:
if not labels_xvals:
# Add evenly-spaced labels
labels_interval = len(times) // (len(variables) + 1)
labels_xvals = [
times[labels_interval * (i + 1)] for i in range(len(variables))
]
def normed(x):
return x / np.max(x)