Commit 4c7b7522 authored by Andrew Quinn's avatar Andrew Quinn
Browse files

psdglm model fitting refactor and small general tweaks

parent 6eb336ef
Loading
Loading
Loading
Loading
+146 −53
Original line number Diff line number Diff line
@@ -17,8 +17,6 @@ def apply_delay_embedding(x, nperseg, nstep, window=None, detrend_func=None, pad
    Create a windowed versions of a dataset with options for specifying
    padding, detrendingand windowing operations.

    Strongly inspired by scipy.signal.spectral._fft_helper.

    Parameters
    ----------
    x : ndarray
@@ -39,6 +37,11 @@ def apply_delay_embedding(x, nperseg, nstep, window=None, detrend_func=None, pad
    ndarray
        Data array with delay embedding applied

    Notes
    -----
    Strongly inspired by scipy.signal.spectral._fft_helper - needed to separate
    out FFT computation.

    """
    # pad to make the vector length an integer number of windows
    # y.shape == nperseg + (nseg-1)*nstep
@@ -209,7 +212,7 @@ def compute_stft(x,
    freqs = config['freqvals'][fidx]

    # Final two axes are now [..., time x freq]
    result = _proc_unroll_output(result, config['axis'], output_roll=config['output_roll'])
    result = _proc_unroll_output(result, config['axis'], output_roll=output_roll)

    if return_config:
        return freqs, time, result, config
@@ -470,13 +473,16 @@ def set_options(input_len,

    """
    # parse window; if array like, then set nperseg = win.shape
    win, nperseg = signal.spectral._set_segments(window, nperseg, input_length=input_len)
    win, nperseg = signal.spectral._triage_segments(window, nperseg, input_length=input_len)

    nfft = _set_nfft(nfft, nperseg)

    noverlap = _set_noverlap(noverlap, nperseg)

    nstep = nperseg - noverlap

    nwindows = np.fix(input_len/nstep - 1).astype(int)

    scale = _set_scaling(scaling, fs, win)

    detrend_func = _set_detrend(detrend, axis=-1)
@@ -496,6 +502,7 @@ def set_options(input_len,
            'nperseg': nperseg,
            'noverlap': noverlap,
            'nstep': nstep,
            'nwindows': nwindows,
            'nfft': nfft,
            'scale': scale,
            'boundary': boundary,
@@ -507,6 +514,7 @@ def set_options(input_len,
            'fmax': fmax,
            'detrend_func': detrend_func,
            'freqvals': freqvals}
    print(opts)

    return opts

@@ -619,7 +627,7 @@ def _is_sklearn_estimator(fit_method, strict=False):
    return test1 and test2 and test3


def compute_ols_varcopes(design_matrix, data, contrasts, betas):
def _compute_ols_varcopes(design_matrix, data, contrasts, betas):
    """Compute variance of cope estimates."""
    # Compute varcopes
    varcopes = np.zeros((contrasts.shape[0], data.shape[1]))
@@ -642,9 +650,9 @@ def compute_ols_varcopes(design_matrix, data, contrasts, betas):
    return varcopes


def process_regressor(Y, config, mode='confound'):
def _process_regressor(Y, config, mode='confound', prepend_dim=True):
    """Y is [nregs x nsamples]. Confound is scaled 0->1 and covariate is z-transformed."""
    if Y.ndim == 1:
    if Y.ndim == 1 and prepend_dim:
        Y = Y[np.newaxis, :]

    windowed = apply_delay_embedding(Y, config['nperseg'], config['noverlap'],
@@ -657,10 +665,121 @@ def process_regressor(Y, config, mode='confound'):
        y = y / y.max(axis=-1)[:, np.newaxis]
    elif mode == 'covariate':
        y = stats.zscore(y, axis=-1)
    elif mode is None:
        pass

    return y


def _specify_design(covariates, confounds, config, fit_constant=True):

    X = []
    Xlabels = []
    if fit_constant:
        X.append(np.ones((config['nwindows'],)))
        Xlabels.append('Constant')
    # Add covariates
    for idx, var in enumerate(covariates.keys()):
        X.append(_process_regressor(covariates[var], config, mode='covariate')[0, :])
        Xlabels.append(var)
    # Add confounds
    for idx, var in enumerate(confounds.keys()):
        X.append(_process_regressor(covariates[var], config, mode='confound')[0, :])
        Xlabels.append(var)

    design_matrix = np.vstack(X).T
    contrasts = np.eye(design_matrix.shape[1])

    return design_matrix, contrasts, Xlabels


def _run_prefit_checks(data, design_matrix, contrasts):

    # Make sure we're set for model fitting
    assert(data.shape[0] == design_matrix.shape[0])
    assert(design_matrix.shape[1] == contrasts.shape[0])


def _glm_fit_simple(pxx, covariates, confounds, config, fit_method='pinv', fit_constant=True):
    """Fit a GLM using a standard OLS fitting method."""
    # Prepare GLM components
    design_matrix, contrasts, Xlabels = _specify_design(covariates, confounds,
                                                        config, fit_constant=fit_constant)

    # Check we're probably good to go
    _run_prefit_checks(pxx, design_matrix, contrasts)

    # Compute parameters
    if fit_method == 'pinv':
        betas = np.linalg.pinv(design_matrix).dot(pxx)
    elif fit_method == 'lstsq':
        betas, resids, rank, s = np.linalg.lstsq(design_matrix, pxx)
    else:
        raise ValueError("'fit_method' input {0} not recognised".format(fit_method))

    # Compute COPES and VARCOPES
    copes = contrasts.dot(betas)
    varcopes = _compute_ols_varcopes(design_matrix, pxx, contrasts, betas)

    return copes, varcopes, None


def _glm_fit_sklearn_estimator(pxx, covariates, confounds, config, fit_method, fit_constant=True):
    """Fit a GLM using a sklearn-like estimator object."""
    # Prepare GLM components
    design_matrix, contrasts, Xlabels = _specify_design(covariates, confounds,
                                                        config, fit_constant=fit_constant)

    # Check we're probably good to go
    _run_prefit_checks(pxx, design_matrix, contrasts)

    # Compute parameters
    fit_method.fit(design_matrix, pxx)
    if hasattr(fit_method, 'coef_'):
        betas = fit_method.coef_.T
    else:
        # Sometimes this is stored in a sub model...
        betas = fit_method.estimator_.coef_.T
    extras = (fit_method)

    # Compute COPES and VARCOPES
    copes = contrasts.dot(betas)
    varcopes = _compute_ols_varcopes(design_matrix, pxx, contrasts, betas)

    return copes, varcopes, (fit_method)


def _glm_fit_glmtools(pxx, covariates, confounds, config, fit_constant=True):
    """Fit a GLM using the glmtools package."""
    import glmtools as glm  # keep this as a soft dependency

    # Allocate GLM data object
    data = glm.data.TrialGLMData(data=pxx)

    # Add windowed confounds and covariates - no preproc yet
    for key, value in covariates.items():
        data.info[key] = _process_regressor(value, config, mode=None, prepend_dim=False)
    for key, value in confounds.items():
        data.info[key] = _process_regressor(value, config, mode=None, prepend_dim=False)

    DC = glm.design.DesignConfig()
    if fit_constant:
        DC.add_regressor(name='Mean', rtype='Constant')
    for key in covariates.keys():
        print(key)
        DC.add_regressor(name=key, rtype='Parametric', datainfo=key, preproc='z')
    for key in confounds.keys():
        print(key)
        DC.add_regressor(name=key, rtype='Parametric', datainfo=key, preproc='unitmax')
    DC.add_simple_contrasts()

    des = DC.design_from_datainfo(data.info)

    model = glm.fit.OLSModel(des, data)

    return model.copes, model.varcopes, (model, des, data)


def psd_glm(X, covariates=None, confounds=None, fit_method='pinv',
            fit_constant=True,
            # General STFT kwargs - passed to set_options
@@ -673,10 +792,10 @@ def psd_glm(X, covariates=None, confounds=None, fit_method='pinv',
    ----------
    x : array_like
        Time series of measurement values
    covariates : list or None
        List of covariate time series to be added as z-standardised regessors.
    confounds : list or None
        List of confound time series to be added as positive-valued unitmax regessors.
    covariates : dict or None
        Dictionary of covariate time series to be added as z-standardised regessors.
    confounds : dict or None
        Dictionary of confound time series to be added as positive-valued unitmax regessors.
    fit_method : {'pinv', 'lstsq', 'glmtools', sklearn estimator instance}
        Specifies how the GLM parameters will be estimated.

@@ -732,8 +851,11 @@ def psd_glm(X, covariates=None, confounds=None, fit_method='pinv',
    -------
    f : ndarray
        Array of sample frequencies.
    Pxx : ndarray
        Power spectral density or power spectrum of x.
    copes : ndarray
        Power spectral density effect of each regressor
    varcopes : ndarray
        Variance of power spectral density effect of each regressor
    extras : None or tuple

    """
    # Option housekeeping
@@ -755,60 +877,31 @@ def psd_glm(X, covariates=None, confounds=None, fit_method='pinv',
    # Compute STFT
    f, t, p = compute_stft(X, config=config, output_roll='glm')

    # Specify design
    X = []
    if fit_constant:
        X.append(np.ones((p.shape[0],)))
    # Add covariates
    for idx, var in enumerate(covariates.keys()):
        X.append(process_regressor(covariates[var], config, mode='covariate')[0, :])
    # Add confounds
    for idx, var in enumerate(confounds.keys()):
        X.append(process_regressor(covariates[var], config, mode='confound')[0, :])

    design_matrix = np.vstack(X).T
    contrasts = np.eye(design_matrix.shape[1])

    # Prepare data
    orig_shape = p.shape
    p = _flatten(p)

    # Make sure we're set for model fitting
    assert(p.shape[0] == design_matrix.shape[0])
    assert(design_matrix.shape[1] == contrasts.shape[0])

    # Compute model
    extras = None
    copes = None  # Can compute separately if fit doesn't handle this
    if fit_method == 'pinv':
        betas = np.linalg.pinv(design_matrix).dot(p)
        # Design matrix pseudo-inverse method
        copes, varcopes, extras = _glm_fit_simple(p, covariates, confounds, config,
                                                  fit_method='pinv', fit_constant=fit_constant)
    elif fit_method == 'lstsq':
        betas, resids, rank, s = np.linalg.lstsq(design_matrix, p)
        # numpy.linalg.lstsq method
        copes, varcopes, extras = _glm_fit_simple(p, covariates, confounds, config,
                                                  fit_method='lstsq', fit_constant=fit_constant)
    elif fit_method == 'glmtools':
        import glmtools as glm
        des = glm.design.GLMDesign.initialise_from_matrices(design_matrix, contrasts)
        data = glm.data.TrialGLMData(data=p)
        model = glm.fit.OLSModel(des, data)
        betas = model.betas
        copes = model.copes
        varcopes = model.varcopes
        extras = (model, des, data)
        # glmtools 
        copes, varcopes, extras = _glm_fit_glmtools(p, covariates, confounds, config, fit_constant=fit_constant)
    elif _is_sklearn_estimator(fit_method):
        fit_method.fit(design_matrix, p)
        if hasattr(fit_method, 'coef_'):
            betas = fit_method.coef_.T
        else:
            # Somtimes this is stored in a sub model...
            betas = fit_method.estimator_.coef_.T
        extras = (fit_method)
        # sklearn
        copes, varcopes, extras = _glm_fit_sklearn_estimator(p, covariates, confounds, config,
                                                             fit_method=fit_method, fit_constant=fit_constant)
    else:
        raise ValueError('fit_method not recognised')

    if copes is None:
        # Compute contrasts if fitting method hasn't already
        copes = contrasts.dot(betas)
        varcopes = compute_ols_varcopes(design_matrix, p, contrasts, betas)

    # Preserve original input shape
    copes = _unflatten(copes, orig_shape)
    varcopes = _unflatten(varcopes, orig_shape)