function demos_realdata_motion_direction_89_djm()
% This example runs a decoding analysis on the example data that can
% be downloaded from the TDT webpage. It performs searchlight decoding
% between up and down motion using a 2-fold cross-validation
% scheme which splits the data in even and odd sessions.
%
% Modified by djm. 1/3/17, 5/9/22
% Based on demos 8 & 9 of The Decoding Toolbox, with single-subject
% statistics added, and demos in
% decoding_toolbox_V3.97b2/statistics/prevalence_inference/
%
% The extracted data are assumed to be in the folder 'TDTdemo8data' one
% level up from this file. If not, the script will ask you for the path.
%
% The results will be written in a result directory in the directory where
% the data are.
%
% This version takes assumes you use SPM, because the example data has been
% processed with SPM.
%
% You can change cfg.analysis = 'searchlight' to either 'ROI' or
% 'wholebrain' and see how it works. The output is then used for an
% additional permutation analysis.
%
% Happy decoding!
% Kai 2016/07/24

close all
dbstop if error % in case something goes wrong
set(0, 'DefaultAxesFontSize', 7);

%% Check that SPM and TDT are available on the path
fpath=fileparts(which(mfilename));
addpath(fpath) % add this directory to path
addpath(fileparts(fileparts(fpath))) % for TDT

if isempty(which('SPM'))
    try
        addpath(fullfile(fpath,'..','..','..','..','\spm12_fil_r7771')); 
        spm('fmri')
    catch
        error('Please add SPM to the path and restart'),
    end
end

if isempty(which('decoding_defaults')), error('Please add TDT to the path and restart'), end
decoding_defaults; % add all important directories to the path

%% Locate data directory
% In your script, this part would look like
%    beta_loc = '/path_to_datadirectory'

% Here it is only longer because it's a demo
databasedir = fullfile(fpath,'..','TDTdemo8data');
check_subdirs = {'sub01_firstlevel_reducedResolution/sub01_GLM_3x3x3mm'; 'sub01_firstlevel/sub01_GLM'};
for c_ind = 1:length(check_subdirs)
        d = fullfile(databasedir, check_subdirs{c_ind});
        if exist(d, 'dir')
                beta_loc = d; break
        end
end
if ~exist('beta_loc','var')
        beta_loc = uigetdir('', 'Select the sub01_GLM* directory from the demo data (inside sub01_firstlevel*)');
end

% % Check that data is really in that directory
% try
%     c = design_from_spm(beta_loc);
% catch
%     error('Seems %s is not the correct directory with the demo data, because it does not contain any SPM or *_SPM.mat. Please check and restart the script', beta_loc)
% end
% expected_first_regressor = 'color';
% if ~strcmp(c{1}, expected_first_regressor)
%     error('The directory %s contains SPM data, but it does not seem to contain the demo data, because the first regressor is "%s" and not "%s" as expected. Please check and restart the script', beta_loc, c{1}, expected_first_regressor)
% end
dispv(1, 'Located demodata in %s, starting analysis', beta_loc);

%% First, define the analysis you want to perform

cfg = decoding_defaults; % add all important directories to the path, and set defaults
cfg.analysis = 'searchlight'; % 'searchlight', 'wholebrain', 'ROI' (if ROI, set one or multiple ROI images as mask files below instead of the mask)

% Specify where the results should be saved
cfg.results.dir = fullfile(beta_loc, 'results', 'motion_up_vs_down', cfg.analysis); % e.g. /TDT/data_loc/results/motion_up_vs_down/searchlight
cfg.results.overwrite = 1;
cfg.results.write=1; % 0=Write nothing, 1= .mat files and .nii files, 2=.mat files only 9the meaning of this changed across versions!)

%% Second, get the file names, labels and run number of each brain image
% For example, you might have 6 runs, two categories, and one example of each.
% That should give you 12 images, one per run and category.
% Each image has an associated filename, run number and label (= category).
% With that info, you can for example do a leave-one-run-out cross-validation.

% There are two ways to get the information we need, depending on what you
% have done previously. The first way is easier.

% === Automatic Creation ===
% If you generated all parameter estimates (beta images) in SPM and were
% using only one model for all runs (i.e. have only one SPM.mat file), use
% the following block.

% Specify the directory to your SPM.mat and all related beta images:
% beta_loc = '/path_to_exampledata';
% display regressors are in that directory
display_regressor_names(beta_loc); % djm: this design is unusual in not including realignment parameters
% Specify the label names that you gave your regressors of interest in the
% SPM analysis (e.g. 'button left' and 'button right').
% Case sensitive!
labelnames = {'up','down'};

%% Set brain mask or ROIs
% Also set the path to the brain mask(s) (e.g.  created by SPM: mask.img).
% Alternatively, you can specify (multiple) ROI masks as a cell or string
% matrix) if you want to perform a ROI analysis, e.g.
%   cfg.files.mask = fullfile('ROIdir', {'ROI1.img', 'ROI2.img'})
% Example data ROI files (here functionally defined V1 & MT, w indicates normalized image)
if strcmp(cfg.analysis, 'ROI')
        %% Set ROIs
        if exist(fullfile(beta_loc, '..', 'sub01_ROI_3x3x3mm'), 'dir')
                cfg.files.mask = fullfile(beta_loc, '..', 'sub01_ROI_3x3x3mm', {'v1.img', 'v4_both.img','mt_both.img'}); % reduce data
        elseif exist(fullfile(beta_loc, '..', 'sub01_ROI'), 'dir')
                cfg.files.mask = fullfile(beta_loc, '..', 'sub01_ROI', {'v1.img', 'v4_both.img', 'mt_both.img'}); % reduce data
        else
                cfg.files.mask = uigetfile('', 'Could not automatically find ROI folder, please select which ROIs to use');
        end
else
        cfg.files.mask = fullfile(beta_loc, 'mask.img');
end

%% Get information from SPM
% The following function extracts all beta names and corresponding run
% numbers from the SPM.mat (and adds 'bin 1' to 'bin m', if a FIR design
% was used)
regressor_names = design_from_spm(beta_loc);

% Now with the names of the labels, we can extract the filenames and the
% run numbers of each label. The labels will be -1 and 1.
% Important: You must make sure to get the label names correct and that
% they have been uniquely assigned, so please check them in regressor_names
cfg = decoding_describe_data(cfg,labelnames,[-1 1],regressor_names,beta_loc);
%
% Other examples:
% For a cross classification, it could look like this:
% cfg = decoding_describe_data(cfg,{labelname1classA labelname1classB labelname2classA labelname2classB},[1 -1 1 -1],regressor_names,beta_loc,[1 1 2 2]);
%
% Or for SVR with a linear relationship it could look like this:
% cfg = decoding_describe_data(cfg,{labelname1 labelname2 labelname3 labelname4},[-1.5 -0.5 0.5 1.5],regressor_names,beta_loc);

% === Manual Creation ===
% Alternatively, you can also manually prepare the files field.
% For this, you have to load all images and labels you want to use
% separately, e.g. with spm_select. This is not part of this example, but
% if you do it later, you should end up with the following fields:
%   cfg.files.name: a 1xn cell array of file names
%   cfg.files.chunk: a 1xn vector of 'chunks' of names (e.g. run numbers)
%   cfg.files.label: a 1xn vector of labels (for decoding, you can choose
%       any two numbers as class labels)

%% Third, create your design for the decoding analysis

% In a design, there are several matrices, one for training, one for test,
% and one for the labels that are used (there is also a 'set' vector which we
% don't need right now). In each matrix, a column represents one decoding
% step (e.g. cross-validation run) while a row represents one sample (i.e.
% brain image). The decoding analysis will later iterate over the columns
% of this design matrix. For example, you might start off with training on
% the first 5 runs and leaving out the 6th run. Then the columns of the
% design matrix will look as follows (we also add the chunk (run) numbers
% and file names to make it clearer):
% cfg.design.train cfg.design.test cfg.design.label cfg.files.chunk  cfg.files.name
%        1                0              -1               1         ..\beta_0001.img
%        1                0               1               1         ..\beta_0002.img
%        1                0              -1               2         ..\beta_0009.img
%        1                0               1               2         ..\beta_0010.img
%        1                0              -1               3         ..\beta_0017.img
%        1                0               1               3         ..\beta_0018.img
%        1                0              -1               4         ..\beta_0025.img
%        1                0               1               4         ..\beta_0026.img
%        1                0              -1               5         ..\beta_0033.img
%        1                0               1               5         ..\beta_0034.img
%        0                1              -1               6         ..\beta_0041.img
%        0                1               1               6         ..\beta_0042.img

% Again, a design can be created automatically (with a design function) or
% manually. If you use a design more often, then it makes sense to create
% your own design function.
%
% If you are a bit confused what the three matrices (train, test & label)
% mean, have a look at them in cfg.design after you executed the next step.
% This should make it easier to understand.

% === Automatic Creation ===
% In this example we change the chunk variable to get a split half design,
% using all uneven runs as training set and all even runs as validation set
% in the first fold, and vice versa in the second fold
cfg.files.chunk = 2 - mod(cfg.files.chunk, 2); % this will change all uneven chunk values to 1, and all even values to 2
cfg.design = make_design_cv(cfg); % This creates the leave-one-chunk-out cross-validation design

% === Automatic Creation - alternative ===
% Alternatively, you can create the design during runtime of the decoding
% function, by specifying the following parameter:
% cfg.design.function.name = 'make_design_cv';
% For the current example, this is not helpful, because you can already
% create the design now. However, you might run into cases in which you
% can't create the design at this stage (e.g. if your design depends on the
% outcome of some previous runs, and then this function could become handy.

% === Manual Creation ===
% After having explained the structure of the design file above, it should
% be easy to create the structure yourself. You can then check it by visual
% inspection. Dependencies between training and test set will be checked
% automatically in the main function.

%% if you want to see your design matrix (you probably do!), use
cfg.fighandles.plot_design = 2; % to choose a figure number
plot_design(cfg); % to plot as a figure
display_design(cfg); % to display as text in console
% display(display_design(cfg)); % to force display if in quiet mode

%% Fourth, set additional parameters manually

% This is an optional step. For example, you want to set the searchlight
% radius and you have non-isotropic voxels (e.g. 3x3x3.75mm), but want the
% searchlight to be spherical in real space.

% Searchlight-specific parameters
cfg.searchlight.unit = 'mm'; % without this line, the default is to use voxels
cfg.searchlight.radius = 12; % this will yield a searchlight radius of 12mm.
cfg.searchlight.spherical = 0; % do not care if the searchlight is spherical (default)

% The verbose level allows you to determine how much output you want to see
% on the console while the program is running (0: no output, 1: normal
% output, 2: high output).
cfg.verbose = 1;

% parameters for libsvm (linear SV classification, cost = 1, no screen output)
% cfg.decoding.train.classification.model_parameters = '-s 0 -t 0 -c 1 -b 0 -q';

% Get different outputs
cfg.results.output = {'accuracy_minus_chance'}; % here you can add other measures,
% e.g. 'predicted_labels' if you want to get the predicted label for each
% input data point. See decoding_transform_results for more.
if strcmp(cfg.analysis, 'searchlight')
        cfg.plot_selected_voxels = 100; % Show the searchlight at every 100 steps to not waste much time with drawing
elseif strcmp(cfg.analysis, 'roi')
        cfg.plot_selected_voxels = 1; % Show every ROI
else
        cfg.plot_selected_voxels = 0; % Show nothing
end

% Enable scaling min0max1 (otherwise libsvm can get VERY slow)
% if you dont need model parameters, and if you use libsvm, use:
cfg.scale.method = 'min0max1';
cfg.scale.estimation = 'all'; % scaling across all data is equivalent to no scaling (i.e. will yield the same results), it only changes the data range which allows libsvm to compute faster

%% Fifth, run the decoding analysis

% Fingers crossed it will not generate any error messages ;)
results = decoding(cfg);

% if already run, can get 'results' from:
resultsfile=fullfile(cfg.results.dir,[cfg.results.filestart, '_', cfg.results.output{1}, '.mat']);
load(resultsfile);

%% To display in TDT:
display_volume(regexprep(resultsfile,'.mat','.img'), 'plain', '', 12,  cfg)

%% Now view the output in e.g. MRIcroN
% Overlay on the structural
% Adjust the colour scale
% Inspect the histogram

keyboard

%% Now run a permutation test
% We will use the same cfg structure, although we could make a new one or
% load one from file, e.g.:
% cfg_file = fullfile(cfg.results.dir,'res_cfg.mat');
% load(cfg_file, 'cfg');

org_cfg = cfg; % store the unpermuted cfg to copy parameters below

rng(1); % we might want to seed the random number generator in case we ever want to recreate this exact set of permutations.

%% Create cfg with permuted sets
% see also "help makde_design_permutation" on how to do this otherwise
if isfield(cfg,'design')
        cfg=rmfield(cfg,'design');
end
cfg.design.function = org_cfg.design.function;

if isfield(cfg.results,'resultsname')
        cfg.results = rmfield(cfg.results, 'resultsname'); % the name will be generated later
end
cfg.results.dir = fullfile(cfg.results.dir, 'perm'); % change directory
cfg.results.filestart = 'perm';
cfg.results.overwrite = 1;

cfg.plot_selected_voxels = 0;

make_design_permutation(cfg); % can run this with single input to just display
% the maximum possible number of unique permutations for this design

n_perms = 10;
%{
10 chosen for demo only, normally you should pick a large number here,
e.g. 1000+. The function might return fewer if fewer permutations are
possible.
%}

% Note: If you aim to perform PREVALENCE ANALYSES on the group level (see
%   Allefeld, Goergen, Haynes, 2016; Hirose, 2020), few permutations might
%   suffice. However, if more unique permutations exist, we strongly
%   recommend to also calculate more in these cases. If you wonder about
%   power, we currently suggest to perform simulations (see papers above).

combine = 1;
cfg.results.setwise=1;
%{
If combine==1, cfg.results.setwise should also be true. Then designs are
combined into one large design where each iteration is treated as a different
set. This speeds things up. The disadvantage is that for large data sets it
might run out of memory faster, or be difficult to parallelize on a cluster.
For regular output (e.g. accuracy) and 1000 searchlight permutations, it should be ok.
See demo9_permutation_analysis for other ways to speed up if not 'combining'.
%}

cfg.design = sort_design(make_design_permutation(cfg,n_perms,combine));
% sorting the designs can speed up decoding - if training data happen to be
% identical in two of the many cross-validation iterations, then re-training
% is not necessary.

[results_perm, final_cfg_perm] = decoding(cfg);
% even with only 10 permutations, likely to take 20-30 minutes...
resultsfile=fullfile(cfg.results.dir,[cfg.results.filestart, '_', cfg.results.output{1}, '.mat']);
save(resultsfile,'results_perm');

% if already run, can get 'results_perm' from:
qq=load(resultsfile);
results_perm=qq.results_perm;
% and 'final_cfg' from:
qq=load(fullfile(cfg.results.dir,[cfg.results.filestart, '_cfg_perm.mat']));
final_cfg_perm=qq.cfg;

%% Exercises we could do while we wait:

%%% 0) Look at the design matrix for modelling the data
%%% 1) Run 'roi' analysis
%%% 2) Run 'wholebrain' analysis
%%% 3) Decode colour (red vs green) instead of direction (up vs down)
%%% 4) Decode button press (left vs right) instead of direction
%%% 5) Could we 'Cross-classify' direction across colour? (E.g. does a classifier trained
%%%    to distinguish up/down on green dots successfully classify up/down
%%%    when tested on red dots? 'Yes' would imply that there is a pattern
%%%    for 'up' that generalises over colour. 'No' would imply that the
%%%    patterns are encoding conjunctions of direction and colour.
%%% 6) Could we 'Cross-classify' direction across cues? Do the patterns for 'up' and
%%%    'down' depend on when subjects are asked to attand to colour or motion?
%%% 7) Add feature transformation (PCA)
%%% 8) Add feature scaling
%%% 9) Add feature selection
%%% 10) Use a different classifier
%%% 11) Add parameter selection
%%% 12) Add different outputs
%%% 13) Run using the GUI

%% save voxelwise permutation image
% TDT (V3.97b2) does not save voxelwise images for permutations. Here we
% will get it to do this, but not for every permutation, just the
% average across them:
tempcfg=final_cfg_perm;
tempcfg.design.function=rmfield(cfg.design.function,'permutation');
tempcfg.results.setwise=0;
decoding_write_results(tempcfg,results_perm);

% compare with the unpermuted decoding

%% single-subject statistics
final_cfg_perm.stats.test = 'permutation'; % could use 'binomial' if decoding steps were independent
final_cfg_perm.stats.tail = 'right';
final_cfg_perm.stats.output = 'accuracy_minus_chance';
final_cfg_perm.stats.results.write = 1;

decoding_statistics(final_cfg_perm,results,results_perm);

% examine:
% 'stats_accuracy_minus_chance_permutation_p_right.img'
% and
% 'stats_accuracy_minus_chance_permutation_z_right.img'
% (would obviously be more useful with >10 permutations!)

keyboard

return % remove this line to also run the example below (quite slow and technical)

%% group 'prevelance inference'
% for information see:
%   Allefeld, C., Goergen, K., & Haynes, J.-D. (2016).
%       Valid population inference for information-based imaging: From the
%       second-level t-test to prevalence inference. NeuroImage.
%       http://doi.org/10.1016/j.neuroimage.2016.07.040
%
% A longer, more didactic, previous version of the manuscript exists here:
%   Allefeld, C., Goergen, K., & Haynes, J.-D. (2015). http://arxiv.org/abs/1512.00810
%
% djm: Note that the validity of this approach depends on trusting the
% within-subject p-values. Also it might not be very sensitive?

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Input data, just for this demo:
% The analysis needs permutation data for multiple subjects as input.
%
% In this demo, we use images from the single example subject are duplicated.
% In a real analysis the data should of course be different for every subject!
%
% The data needs to be provided in the variable inputimages, which is a
% cell of dimension subjects x permutations. The original unpermuted input
% for each subject should be provided as first entry for each subject, i.e.
% as subjects x 1. The cell array can either contain file names with full
% path as
%    1. .img/.nii filenames (cellstr)
%    2. .mat filenames (cellstr) from TDT
%    3. directly 3d data as struct (see demo_prevalenceInference_provide_own_data.m)

n_sbjs = 10;
decoding_measure = 'accuracy_minus_chance';

% directories and file masks for unpermuted and permuted images
orig_inputdir(1:n_sbjs,1) = {org_cfg.results.dir};
orig_filemask(1:n_sbjs,1) = {['res_' decoding_measure '.mat']}; % regular expression, for more see help spm_select

perm_inputdir(1:n_sbjs,1) = {fullfile(org_cfg.results.dir, 'perm')};
perm_filemask(1:n_sbjs,1) = {['^perm.*_' decoding_measure '_set.*\.mat$']}; %

inputimages = cell(n_sbjs,n_perms);
for sbj = 1:n_sbjs
        % get the original unpermuted result image as first image (required by the package)
        orig_image = cellstr(spm_select('FPList',orig_inputdir{sbj},orig_filemask{sbj}));
        if length(orig_image) ~= 1
                error('There should be exactly 1 unpermuted input file for %s, but we found %i, please check', orig_image, length(orig_image))
        elseif isempty(orig_image{1})
                error('No file found for %s %s, please check', orig_inputdir{sbj}, orig_filemask{sbj}, length(orig_image))
        end
        inputimages(sbj, 1) = orig_image;
        
        % put permuted images afterwards
        permuted_images = cellstr(spm_select('FPList',perm_inputdir{sbj},perm_filemask{sbj}));
        if length(permuted_images) == 1 && isempty(permuted_images{1})
                error('  No permuted images found for sbj %i with %s %s', sbj, perm_inputdir{sbj},perm_filemask{sbj});
        else
                fprintf('  Found %i permuted images for sbj %i\n', length(permuted_images), sbj);
        end
        
        inputimages(sbj, 2:length(permuted_images)+1) = permuted_images;
end

warning(['In this demo, we use the same images for all "sbjs". ' ...
        'In a real analysis the data should of course be different for every subj! ' ...
        'We also use a unrealistic low number of second level permutations. ', ...
        'You should clearly increase that for a real analysis. See prevalenceCore.m and the paper.'])

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%% Define where to save the results
resultdir = fullfile(orig_inputdir{1}, 'prevalenceDemo');
mkdir(resultdir);
resultfilenames = fullfile(resultdir, 'prevalence');
disp(['Writing result to ' resultfilenames '*.*']);

%%% Do the analysis. Calculation can be stopped any time by closing the figure
% that pops up. The result at this moment in time will be saved as image and/or returned.

P2 = 20000; % number of 2nd level permutations, should be ~1e6 for a real analysis
prevalenceTDT(inputimages, P2, resultfilenames);

% The function returns images with the results. See prevalenceCore.m for more info.
%   .puGN         uncorrected p-values for global null hypothesis         (Eq. 24)
%   .pcGN         corrected p-values for global null hypothesis           (Eq. 26)
%   .puMN         uncorrected p-values for majority null hypothesis       (Eq. 19)
%   .pcMN         corrected p-values for majority null hypothesis         (Eq. 21)
%   .gamma0c      corrected prevalence lower bounds                       (Eq. 23)
%   .aTypical     median values of test statistic where pcMN <= alpha     (Fig. 4b)

return
