%% Introduction to signal analysis in Matlab: GLM I
% CBU workshop, Ed Sohoglu 2018

%% Regression
clearvars
close all
rng(1) % for reproducability- code below will generate 'random' numbers in a predictable way

nReg = 1; % number of regressors
nSubj = 10; % number of subjects

% make design matrix
age = randi([20 60],nSubj,1);
X = age; % X = design matrix
X = [X ones(size(X,1),1)]; % add constant

% make data using GLM equation Y = B1*X1 + B2*X2 + B3*X3 etc. + Constant + Error 
% here beta 1 represents influence of age covariate and beta 2 is the constant
Y = 3*X(:,1) + 500 + 20*randn(nSubj,1);

% plot data and design matrix
figure;
subplot(1,2,1); scatter(age,Y);
subplot(1,2,2); imagesc(X); colormap(gray); colorbar;

% compute betas
betas = pinv(X)*Y;

% error sum of squares
prediction = X*betas;
res = Y-prediction;
SS_error = sum(res.^2);
subplot(1,2,1); hold on; plot(age,prediction); hold off % plot data and prediction

% effect sum of squares
prediction = X(:,1:nReg)*betas(1:nReg); % Only use regressor/group regressors (ignore covariates and constant terms) 
SS_effect = sum((prediction-mean(prediction)).^2);

% compute mean squares
df_effect = nReg;
df_error = size(Y,1)-df_effect-1;
MS_effect = SS_effect./df_effect;
MS_error = SS_error/df_error;

% compute F statistic, p-value and R2
F = MS_effect./MS_error;
p = fcdf(F,df_effect,df_error,'upper'); % requires statistics tooolbox

SS_total = sum((Y-mean(Y)).^2);
R2 = SS_effect/SS_total; % R2 is the proportion of variance explained (take square root to compute correlation coefficient)

%% One-way ANOVA
clearvars
close all
rng(1) % for reproducability- code below will generate 'random' numbers in a predictable way

nReg = 3; % number of regressors (groups)
nSubj = 10; % number of subjects per group

% make design matrix
X = kron(eye(nReg),ones(nSubj,1)); % X = design matrix
X = [X ones(size(X,1),1)]; % add constant

% make data using GLM equation Y = B1*X1 + B2*X2 + B3*X3 etc. + Error  
% here betas 1 to 3 represent means for the three groups and beta 4 is the constant
Y = 500*X(:,1) + 700*X(:,2) + 600*X(:,3) + 20*randn(nReg*nSubj,1);

% plot data and design matrix
figure;
subplot(1,2,1); scatter(1:nSubj*nReg,Y);
subplot(1,2,2); imagesc(X); colormap(gray); colorbar;

% compute betas
betas = pinv(X)*Y;

% error sum of squares
prediction = X*betas;
res = Y-prediction;
SS_error = sum(res.^2);
subplot(1,2,1); hold on; plot(1:nSubj*nReg,prediction); hold off % plot data and prediction

% effect sum of squares
prediction = X(:,1:nReg)*betas(1:nReg); % ignore constant terms 
SS_effect = sum((prediction-mean(prediction)).^2);

% compute mean squares
df_effect = nReg-1;
df_error = size(Y,1)-df_effect-1;
MS_effect = SS_effect./df_effect;
MS_error = SS_error/df_error;

% compute F statistic, p-value and R2
F = MS_effect./MS_error;
p = fcdf(F,df_effect,df_error,'upper'); % requires statistics tooolbox

SS_total = sum((Y-mean(Y)).^2);
R2 = SS_effect/SS_total; % R2 is the proportion of variance explained (take square root to compute correlation coefficient)

%% One-way repeated measures ANOVA
clearvars
close all
rng(1) % for reproducability- code below will generate 'random' numbers in a predictable way

nReg = 3; % number of regressors (conditions)
nSubj = 10; % number of subjects per condition

% make design matrix
X = kron(eye(nReg),ones(nSubj,1)); % X = design matrix
X = [X kron(ones(nReg,1),eye(nSubj))]; % add constant for each subject

% make data using GLM equation Y = B1*X1 + B2*X2 + B3*X3 etc. + Error   
% here betas 1 to 3 represent means for the three conditions, betas 4 to 13
% are the subject-specific constants
subject_means = 200*randn(nSubj,1)+500;
subject_means = kron(ones(nReg,1),subject_means);
Y = 300*X(:,1) + 200*X(:,2) + 400*X(:,3) + subject_means + 20*randn(nReg*nSubj,1);

% plot data and design matrix
Y2plot = reshape(Y,nSubj,nReg);
figure; subplot(1,2,1); hold on
for i=1:nReg
    scatter(1:nSubj,Y2plot(:,i));
end
hold off
subplot(1,2,2); imagesc(X); colormap(gray); colorbar;

% compute betas
betas = pinv(X)*Y;

% error sum of squares
prediction = X*betas;
res = Y-prediction;
SS_error = sum(res.^2);
prediction2plot = reshape(prediction,nSubj,nReg);
subplot(1,2,1); hold on; ax = gca; ax.ColorOrderIndex = 1; plot(1:nSubj,prediction2plot); hold off % plot data and prediction

% effect sum of squares
prediction = X(:,1:nReg)*betas(1:nReg); % ignore constant terms
SS_effect = sum((prediction-mean(prediction)).^2);

% compute mean squares
df_effect = nReg-1;
df_error = size(Y,1)-df_effect-nSubj;
MS_effect = SS_effect./df_effect;
MS_error = SS_error/df_error;

% compute F statistic, p-value and R2
F = MS_effect./MS_error;
p = fcdf(F,df_effect,df_error,'upper'); % requires statistics tooolbox

SS_total = sum((Y-mean(Y)).^2);
R2 = SS_effect/SS_total; % R2 is the proportion of variance explained (take square root to compute correlation coefficient)

%% ANCOVA

% In this exercise, you will combine what you learned from the Regression
% and One-way ANOVA examples to build an ANCOVA model.
% You will need to generate the RT data, arranged as three groups of subjects (N = 10 per group, similar to one-way ANOVA example above.
% You will also need to generate the age covariate (similar to Regression example above)
% Use ANCOVA to test for the group effect, after controlling for the influence of age.

nReg = 3; % number of (group) regressors
nSubj = 10; % per group
rng(1) % for reproducability- code below will generate 'random' numbers in a predictable way

% make design matrix
X = kron(eye(nReg),ones(nSubj,1)); % X = design matrix
age = linspace(20,70,nReg*nSubj)';
X = [X age]; % Add age covariate
X = [X ones(size(X,1),1)]; % add constant

% make data using GLM equation Y = B1*X1 + B2*X2 + B3*X3 etc. + Error   
% here betas 1 to 3 represent means for the three groups, beta 4 is the age
% covariate and beta 5 is the constant
Y = 500*X(:,1) + 700*X(:,2) + 600*X(:,3) + 3*X(:,4) + 20*randn(nReg*nSubj,1);

% plot data and design matrix
figure;
subplot(1,2,1); scatter(1:nSubj*nReg,Y);
subplot(1,2,2); imagesc(X); colormap(gray); colorbar;

% compute betas
betas = pinv(X)*Y;

% error sum of squares
prediction = X*betas;
res = Y-prediction;
SS_error = sum(res.^2);
subplot(1,2,1); hold on; plot(1:nSubj*nReg,prediction); hold off % plot data and prediction

% effect sum of squares
prediction = X(:,1:nReg)*betas(1:nReg); % ignore constant terms (and covariate)
SS_effect = sum((prediction-mean(prediction)).^2);

% compute mean squares
df_effect = nReg-1;
df_error = size(Y,1)-df_effect-nSubj;
MS_effect = SS_effect./df_effect;
MS_error = SS_error/df_error;

% compute F statistic, p-value and R2
F = MS_effect./MS_error;
p = fcdf(F,df_effect,df_error,'upper'); % requires statistics tooolbox

%% Two-way (2x2) ANOVA

% In this exercise, you can try a more complex two-way (2x2) anova.
% Try to assess each of the three effects in this design
% (main effect A, main effect B and AxB interaction)
% Tip: each factor (column) in the design matrix will need to code for 
% differences between the levels of that factor (e.g. for factor A: kron([1 1 -1 -1]',ones(nSubj,1))').
% The column for the interaction can be formed by element-wise
% multiplication of factors A and B.

clearvars
close all
rng(1) % for reproducability- code below will generate 'random' numbers in a predictable way

nReg = 4; % number of regressors (conditions)
nSubj = 10; % number of subjects per group
chooseFactor = 1; % which factor do you want to assess? (1 for A, 2 for B, 3 for interaction)

% make design matrix
A = kron([1 1 -1 -1]',ones(nSubj,1)); % Factor A
B = kron([1 -1 1 -1]',ones(nSubj,1)); % Factor B
AB = A.*B; % AxB interaction
X = [A B AB];
X = [X ones(size(X,1),1)]; % add constant

% make data using GLM equation Y = B1*X1 + B2*X2 + B3*X3 etc. + Error   
% here betas 1 to 4 represent means for the four groups (2x2 design) and beta 5 is the constant
Y = 500*X(:,1) + 700*X(:,2) + 600*X(:,3) + 20*randn(nReg*nSubj,1);

% plot data and design matrix
Y2plot = reshape(Y,nSubj,nReg);
figure; subplot(1,2,1); hold on
for i=1:nReg
    scatter(1:nSubj,Y2plot(:,i));
end
hold off
subplot(1,2,2); imagesc(X); colormap(gray); colorbar;

% compute betas
betas = pinv(X)*Y;

% error sum of squares
prediction = X*betas;
res = Y-prediction;
SS_error = sum(res.^2);
prediction2plot = reshape(prediction,nSubj,nReg);
subplot(1,2,1); hold on; ax = gca; ax.ColorOrderIndex = 1; plot(1:nSubj,prediction2plot); hold off % plot data and prediction

% effect sum of squares
prediction = X(:,chooseFactor)*betas(chooseFactor); % only include factor you want to assess
SS_effect = sum((prediction-mean(prediction)).^2);

% compute mean squares
df_effect = 1; % Degrees of freedom for the factor under consideration
df_error = size(Y,1)-(nReg-1)-1; % But for error degrees of freedom, use full model
MS_effect = SS_effect./df_effect;
MS_error = SS_error/df_error;

% compute F statistic, p-value and R2
F = MS_effect./MS_error;
p = fcdf(F,df_effect,df_error,'upper'); % requires statistics tooolbox

SS_total = sum((Y-mean(Y)).^2);
R2 = SS_effect/SS_total; % R2 is the proportion of variance explained (take square root to compute correlation coefficient)