Source code for fitgrid.models

from os import environ
from math import ceil
from functools import partial
from multiprocessing import Pool
from contextlib import redirect_stdout
from io import StringIO

import numpy as np
import pandas as pd
from statsmodels.formula.api import ols
from tqdm import tqdm

from .errors import FitGridError
from . import tools
from .fitgrid import FitGrid, LMFitGrid, LMERFitGrid


[docs]def validate_LHS(epochs, LHS):

    # must be a list of strings
    if not (
        isinstance(LHS, list) and all(isinstance(item, str) for item in LHS)
    ):
        raise FitGridError('LHS must be a list of strings.')

    # all LHS items must be present in the epochs_table
    missing = set(LHS) - set(epochs.table.columns)
    if missing:
        raise FitGridError(
            'Items in LHS should all be present in the epochs table, '
            f'the following are missing: {missing}'
        )


[docs]def validate_RHS(RHS):

    # validate RHS
    if RHS is None:
        raise FitGridError('Specify the RHS argument.')
    if not isinstance(RHS, str):
        raise FitGridError('RHS has to be a string.')


[docs]def process_key_and_group(key_and_group, function, channels):
    key, group = key_and_group
    results = {channel: function(group, channel) for channel in channels}
    return pd.Series(results, name=key)


[docs]def run_model(
    epochs, function, channels=None, parallel=False, n_cores=4, quiet=False
):
    """Run an arbitrary model on the epochs.

    Parameters
    ----------
    epochs : Epochs
        the epochs object on which the model is to be run
    function : Python function
        function that runs a model, see Notes below for details
    channels : list of str
        list of channels to serve as dependent variables
    parallel : bool, defaults to False
        set to True in order to run in parallel
    n_cores : int, defaults to 4
        number of processes to run in parallel
    quiet : bool, defaults to False
        set to True to disable progress bar display

    Returns
    -------
    grid : FitGrid
        a FitGrid object containing the results

    Notes
    -----
    The function should take two parameters, ``data`` and ``channel``, run
    some model on the data, and return an object containing the results.
    ``data`` will be a snapshot across epochs at a single timepoint,
    containing all channels of interest. ``channel`` is the name of the
    target variable that the function runs the model against (uses it as
    the dependent variable).

    Examples
    --------
    Here's an example of a function that can be passed to ``run_model``::

        def regression(data, channel):
            formula = channel + ' ~ continuous + categorical'
            return ols(formula, data).fit()

    """

    _grid = _run_model(
        epochs,
        function,
        channels=channels,
        parallel=parallel,
        n_cores=n_cores,
        quiet=quiet,
    )
    return FitGrid(_grid, epochs.epoch_index, epochs.time)


def _run_model(
    epochs, function, channels=None, parallel=False, n_cores=4, quiet=False
):

    if channels is None:
        channels = epochs.channels

    validate_LHS(epochs, channels)

    groups = tqdm(epochs._snapshots, disable=quiet)
    processor = partial(
        process_key_and_group, function=function, channels=channels
    )

    if parallel:
        chunksize = ceil(len(groups) / n_cores)
        with tools.single_threaded(np):
            with Pool(n_cores) as pool:
                results = pool.map(processor, groups, chunksize=chunksize)

    else:
        results = map(processor, groups)

    grid = pd.concat(results, axis=1).T
    grid.index.name = epochs.time

    return grid  # dataframe, not FitGrid


[docs]def lm_single(data, channel, RHS, eval_env):
    formula = channel + ' ~ ' + RHS
    return ols(formula, data, eval_env=eval_env).fit()


[docs]def lm(
    epochs,
    LHS=None,
    RHS=None,
    parallel=False,
    n_cores=4,
    quiet=False,
    eval_env=4,
):
    """Run ordinary least squares linear regression on the epochs.

    Parameters
    ----------
    epochs : Epochs
        epochs object on which regression is to be run
    LHS : list of str, optional, defaults to all channels
        list of channels for the left hand side of the regression formula
    RHS : str
        right hand side of the regression formula
    parallel : bool, defaults to False
        change to True to run in parallel
    n_cores : int, defaults to 4
        number of processes to use for computation
    quiet : bool, defaults to False
        set to True to disable fitting progress bar
    eval_env : int or patsy.EvalEnvironment, defaults to 4
        environment to use for evaluating patsy formulas, see patsy docs

    Returns
    -------
    grid : LMFitGrid
        LMFitGrid object containing the results of the regression

    """

    if LHS is None:
        LHS = epochs.channels

    validate_LHS(epochs, LHS)
    validate_RHS(RHS)

    function = partial(lm_single, RHS=RHS, eval_env=eval_env)

    _grid = _run_model(
        epochs,
        function=function,
        channels=LHS,
        parallel=parallel,
        n_cores=n_cores,
        quiet=quiet,
    )

    return LMFitGrid(_grid, epochs.epoch_index, epochs.time)


[docs]def lmer_single(
    data, channel, RHS, family, conf_int, factors, permute, ordered, REML
):
    import re
    from pymer4 import Lmer

    model = Lmer(channel + ' ~ ' + RHS, data=data, family=family)

    with redirect_stdout(StringIO()) as captured_stdout:
        model.fit(
            summarize=False,
            conf_int=conf_int,
            factors=factors,
            permute=permute,
            ordered=ordered,
            REML=REML,
        )

    # lmer prints warnings, capture them
    warning = captured_stdout.getvalue()

    # in pymer4 <= 0.6 lmer warnings were not attached to the model object
    # model.has_warning = True if warning else False
    # model.warning = warning

    # as of pymer4 0.7+  model.warning -> model.warnings
    model.has_warning = True if len(model.warnings) > 0 else False

    # captured_stdout.close()

    del model.data
    del model.design_matrix
    del model.model_obj

    # return model.AIC
    # return model._REML
    # return model.__class__
    return model


[docs]def lmer(
    epochs,
    LHS=None,
    RHS=None,
    family='gaussian',
    conf_int='Wald',
    factors=None,
    permute=None,
    ordered=False,
    REML=True,
    parallel=False,
    n_cores=4,
    quiet=False,
):
    """Fit lme4 linear mixed model by interfacing with R.

    Parameters
    ----------
    epochs : Epochs
        epochs object on which lmer is to be run
    LHS : list of str, optional, defaults to all channels
        list of channels for the left hand side of the lmer formula
    RHS : str
        right hand side of the lmer formula
    family : str, defaults to 'gaussian'
        distribution link function to use
    conf_int : str, defaults to 'Wald'
    factors : dict, optional
        Keys should be column names in data to treat as factors. Values
        should either be a list containing unique variable levels if
        dummy-coding or polynomial coding is desired. Otherwise values
        should themselves be dictionaries with unique variable levels as
        keys and desired contrast values (as specified in R!) as keys.
    permute : int, defaults to None
        if non-zero, computes parameter significance tests by permuting
        test stastics rather than parametrically. Permutation is done by
        shuffling observations within clusters to respect random effects
        structure of data.
    ordered : bool, defaults to False
        whether factors should be treated as ordered polynomial contrasts;
        this will parameterize a model with K-1 orthogonal polynomial
        regressors beginning with a linear contrast based on the factor
        order provided
    REML : bool, defaults to True
        change to False to use ML estimation
    parallel : bool, defaults to False
        change to True to run in parallel
    n_cores : int, defaults to 4
        number of processes to use for computation
    quiet : bool, defaults to False
        set to True to disable fitting progress bar

    Returns
    -------
    grid : LMERFitGrid
        LMERFitGrid object containing the results of lmer fitting
    """

    if LHS is None:
        LHS = epochs.channels

    validate_LHS(epochs, LHS)
    validate_RHS(RHS)

    function = partial(
        lmer_single,
        RHS=RHS,
        family=family,
        conf_int=conf_int,
        factors=factors,
        permute=permute,
        ordered=ordered,
        REML=REML,
    )
    _grid = _run_model(
        epochs,
        function,
        channels=LHS,
        parallel=parallel,
        n_cores=n_cores,
        quiet=quiet,
    )

    return LMERFitGrid(_grid, epochs.epoch_index, epochs.time)