Source code for fitgrid.epochs

# -*- coding: utf-8 -*-
"""Data structure for validating and efficiently slicing
fixed-length segments of typically multichannel time-series data.

"""

import numpy as np
import pandas as pd

from .errors import FitGridError
from . import tools


[docs]class Epochs: """Container class used for storing epochs tables and exposing statsmodels. Parameters ---------- epochs_table : pandas DataFrame long form dataframe containing epochs with equal indices time : str time column name epoch_id : str epoch identifier column name channels : list of str list of channel names to serve as dependent variables Returns ------- epochs : Epochs epochs object """ def __init__(self, epochs_table, time, epoch_id, channels): # channels must be a list of strings if not isinstance(channels, list) or not all( isinstance(item, str) for item in channels ): raise FitGridError('channels should be a list of strings.') # all channels must be present as epochs table columns missing_channels = set(channels) - set(epochs_table.columns) if missing_channels: raise FitGridError( 'channels should all be present in the epochs table, ' f'the following are missing: {missing_channels}' ) if not isinstance(epochs_table, pd.DataFrame): raise FitGridError('epochs_table must be a Pandas DataFrame.') # these index columns are required for consistency checks for item in (epoch_id, time): if item not in epochs_table.index.names: raise FitGridError( f'{item} must be a column in the epochs table index.' ) # check no duplicate column names in index and regular columns names = list(epochs_table.index.names) + list(epochs_table.columns) deduped_names = tools.deduplicate_list(names) if deduped_names != names: raise FitGridError('Duplicate column names not allowed.') # make our own copy so we are immune to modification to original table table = ( epochs_table.copy().reset_index().set_index(epoch_id).sort_index() ) assert table.index.names == [epoch_id] snapshots = table.groupby(time) # check that snapshots across epochs have equal index by transitivity prev_group = None for idx, cur_group in snapshots: if prev_group is not None: if not prev_group.index.equals(cur_group.index): raise FitGridError( f'Snapshot {idx} differs from ' f'previous snapshot in {epoch_id} index:\n' f'Current snapshot\'s indices:\n' f'{cur_group.index}\n' f'Previous snapshot\'s indices:\n' f'{prev_group.index}' ) prev_group = cur_group if not prev_group.index.is_unique: dupes = tools.get_index_duplicates_table(table, epoch_id) raise FitGridError( f'Duplicate values in {epoch_id} index not allowed:\n{dupes}' ) # checks passed, set instance variables self.time = time self.epoch_id = epoch_id self.channels = channels self.table = table self._snapshots = snapshots self.epoch_index = tools.get_first_group(snapshots).index.copy() self.time_index = pd.Index([time for time, _ in snapshots], name=time)
[docs] def distances(self): """Return scaled Euclidean distances of epochs from the "mean" epoch. Returns ------- distances : pandas Series or DataFrame Series or DataFrame with epoch distances Notes ----- Distances are scaled by dividing by the max. """ table = self.table.reset_index().set_index([self.epoch_id, self.time])[ self.channels ] n_channels = len(table.columns) n_epochs = len(table.index.unique(level=self.epoch_id)) n_samples = len(table.index.unique(level=self.time)) assert table.values.size == n_channels * n_epochs * n_samples values = table.values.reshape(n_epochs, n_samples, n_channels) mean = values.mean(axis=0) diff = values - mean def l2_norm(data, axis=1): return np.sqrt((data * data).sum(axis=axis)) # first n_samples is axis 1, then n_channels, leaving epochs distances_arr = l2_norm(l2_norm(diff)) distances_arr_scaled = distances_arr / distances_arr.max() distances = pd.Series(distances_arr_scaled, index=self.epoch_index) return distances
[docs] def plot_averages(self, channels=None, negative_up=True): """Plot grand mean averages for each channel, negative up by default. Parameters ---------- channels : list of str, optional, defaults to all channels list of channel names to plot the averages negative_up : bool, optional, default True by convention, ERPs are plotted negative voltage up Returns ------- fig : matplotlib.figure.Figure figure containing plots axes : numpy.ndarray of matplotlib.axes.Axes axes objects """ if channels is None: channels = self.channels from . import plots data = self._snapshots.mean() fig, axes = plots.stripchart(data[channels], negative_up=negative_up) return fig, axes