%	 Copyright (C) 2011  Plankensteiner
%
%    This program is free software: you can redistribute it and/or modify
%    it under the terms of the GNU General Public License as published by
%    the Free Software Foundation, either version 3 of the License, or
%    (at your option) any later version.
%
%    This program is distributed in the hope that it will be useful,
%    but WITHOUT ANY WARRANTY; without even the implied warranty of
%    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%    GNU General Public License for more details.
%
%    You should have received a copy of the GNU General Public License
%    along with this program.  If not, see <http://www.gnu.org/licenses/>.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% DEMONSTRATION OF DETERMINING THE OPIMAL NUMBER OF COMPONENTS %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%% NORMAL DISTRIBUTIONS %%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% data obligatory fields:
%%% .y: 1 x N data vector
%%% .N: number of data
%%% .censor: 1 x N vector with 0/1 (uncensored/censored)

%% Load data:

load('datareg_MoE.mat')
% there are two variables: data and data_future
% datareg: consits of - N: number of datapoints
%                  - y: datapoints
%                  - censor: vector 1xN with 0/1 declaring if datapoint i is censored (1) or is not censored (0)            
%                  - X: matrix with values of cofactors for regression (leading 1-row)
%                  - EmpEst_weights: empirical estimated weights (assuming
%                       that y follows a mixture of distributions (in this case K=2))
%                  - Cofactor_weights: values of cofactors for the regression model of mixture weights  
%                  - EmpEst_weightfun: function for empirical estimated weights (use EmpEst_weights)
%                  - Cofactor_weightfun: regression function for mixing
%                  weights (use Cofactor_weights)

% data_future: consitst of - N: number of new datapoints
%                          - y: real measured datapoints (for comparison with predicted data)     
%                          - X: values of cofactors for regression (leading 1-row)   
%                          - censor: vector 1xN with 0/1 declaring if datapoint i is censored (1) or is not censored (0) (for comparison with predicted data)     
%                          - MoE_Xweights: values of cofactors for the weights

 
%% Prepare data: 
data=datareg;
%%% delete variables which are not used at this point:
data = rmfield(data, 'EmpEst_weights');
data = rmfield(data, 'Cofactor_weights');
data = rmfield(data, 'EmpEst_weightfun');
data = rmfield(data, 'Cofactor_weightfun');
data = rmfield(data, 'X'); 

% define 'regression' model (intercept only)

data.X=ones(1, length(data.y)); % Dummy for regression model - regression consists of one parameter only

% plot data
ksdensity(data.y) % data is a mixture of Normal distributions number of components unknown

%% QUESTION: How many components? 
% No pre-defined initial values for slice sampler => default values are used
  
Kmin=1;  % min. number of components
Kmax=3;  % max. number of components
  
M=1000; % number of simulated samples after burnin
burnin=500; % define burnin

MarginalVal=zeros(1,Kmax-Kmin+1); % Variable to store all marginal likelihood values

for K=Kmin:Kmax
%%  define the model
    clear mix;
    mix.dist='Normal';
    mix.K=K;
    mix.d=size(data.X,1);

%%  define the prior  
    clear prior;
    prior=priordefine_MOE(data,mix);

%%  run MCMC
    [data,mix,mcmc]=mcmcstart_MOE(data,mix); 
    mcmc.M=M;
    mcmc.burnin=burnin;
    mcmc.storeS=M;
    mcmcout=mcmc_MOE(data,mix,prior,mcmc);
    if isfield(data,'S') 
        data=rmfield(data,'S'); 
    end % starting classification for current K has to be deleted 

%% compute the marginal likelihood 
    [marlik,mcmcout]=mcmcbf_MOE(data,mcmcout);

%% store marginal likelihood values
    MarginalVal(K-Kmin+1)=marlik.bs;

%% store the results
    mcmcout.name=['store_data_K' num2str(K)];
    mcmcstore(mcmcout);  
    disp(['Results ' mcmcout.name ' stored.']);

end 
  
%% load data with largest marginal likelihood - that gives the best fit
% [~, K]= max(MarginalVal);
% load(['store_data_K' num2str(K+Kmin-1)]);

%% to decide if a model is really better than another model: use bayes factor 
% BF(i,j) gives the BF: a value BF > 1 indicates, that model i is more supported 
% by the data under consideration than model j (for the strength of evidence: see http://en.wikipedia.org/wiki/Bayes_factor) 
BF= zeros(Kmax-Kmin+1, Kmax-Kmin+1);

for K=Kmin:Kmax 
    BF(K-Kmin+1,:)=MarginalVal(K-Kmin+1)./MarginalVal;
end 

% Based on the fact that K>2 is not significantly better than K=2, only two
% components will be used for further investigation!

%% Plot allocation posterior probabilities and data classification 
[clust]=mcmcclust_MoE(data, mcmcout);
mcmcclustplot_MoE(data, clust);

%% estimate point parameters from the posterior draws
[est,mcmcout]=mcmcestimate_MOE(mcmcout);

%% plot the fitted mixture
data.model=est.pm; %takes the postrior mode estimate
dataplot_MoE(data) % based on the best fit and posterior mode estimate, data are fitted and plotted