%	 Copyright (C) 2011  Plankensteiner
%
%    This program is free software: you can redistribute it and/or modify
%    it under the terms of the GNU General Public License as published by
%    the Free Software Foundation, either version 3 of the License, or
%    (at your option) any later version.
%
%    This program is distributed in the hope that it will be useful,
%    but WITHOUT ANY WARRANTY; without even the implied warranty of
%    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%    GNU General Public License for more details.
%
%    You should have received a copy of the GNU General Public License
%    along with this program.  If not, see <http://www.gnu.org/licenses/>.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% DEMONSTRATION OF DETERMINING THE OPIMAL NUMBER OF COMPONENTS %%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%% NORMAL DISTRIBUTIONS %%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 
%% Create data:
clear all;

data.y= [normrnd(3,0.1,50,1); normrnd(6,0.5,80,1)]';

data.N=length(data.y);
data.X=ones(1, length(data.y)); % Dummy for regression model - regression consists of one parameter only

%% plot histograms of data and empirical density
figure()
[n, xout] = hist(data.y, 100);
n=n./max(n);
bar(xout,n)
hold on
ksdensity(data.y) % data is a mixture of Normal distributions - number is supposed to be two - but unknown

clear xout;
clear n;

%% QUESTION: How many components? 
% No pre-defined initial values for slice sampler => default values are used
 
% data seems to be a mixture of at most 3 components
Kmin=1;  % min. number of components
Kmax=3;  % max. number of components
  
M=1000; % number of simulated samples after burnin
burnin=500; % define burnin

MarginalVal=zeros(1,Kmax-Kmin+1); % Variable to store all marginal likelihood values

for K=Kmin:Kmax

    %%  define the model
    clear mix;
    mix.dist='Normal';
    mix.K=K;
    mix.d=size(data.X,1);

%%  define the prior  
    clear prior;
    prior=priordefine_MOE(data,mix);

%%  run MCMC
    [data,mix,mcmc]=mcmcstart_MOE(data,mix);
    mcmc.M=M;
    mcmc.burnin=burnin;
    mcmc.storeS=M;
    mcmcout=mcmc_MOE(data,mix,prior,mcmc);
    if isfield(data,'S') 
        data=rmfield(data,'S'); 
    end % starting classification for current K has to be deleted 

%% compute the marginal likelihood
    [marlik,mcmcout]=mcmcbf_MOE(data,mcmcout);

%% store marginal likelihood values
    MarginalVal(K-Kmin+1)=marlik.bs;
    
%% store the results
    mcmcout.name=['store_data_K' num2str(K)];
    mcmcstore(mcmcout);  
    disp(['Results ' mcmcout.name ' stored.']);
 
end  

%% load data with largest marginal likelihood - that gives the best fit
[~, K]= max(MarginalVal);
load(['store_data_K' num2str(K+Kmin-1)]);
 
%% to decide if a model is really better than another model: use also bayes factor 
% BF(i,j) gives the BF: a value BF > 1 indicates, that model i is more supported 
% by the data under consideration than model j (for the strength of evidence: see http://en.wikipedia.org/wiki/Bayes_factor) 
BF= zeros(Kmax-Kmin+1, Kmax-Kmin+1); 

for K=Kmin:Kmax
    BF(K-Kmin+1,:)=MarginalVal(K-Kmin+1)./MarginalVal;
end 

%% Plot allocation posterior probabilities and data classification 
[clust]=mcmcclust_MoE(data, mcmcout);
mcmcclustplot_MoE(data, clust); 

%% estimate point parameters from the posterior draws
[est,mcmcout]=mcmcestimate_MOE(mcmcout);
est.pm.par % posterior mode estimates for mean and sigma
est.pm.weight % posterior mode estimate for mixture weight


%% plot the fitted mixture
data.model=est.pm; %takes the postrior mode estimate
dataplot_MoE(data) % based on the best fit and posterior mode estimate, data are fitted and plotted