%	 Copyright (C) 2011  Bluder, Plankensteiner
%
%    This program is free software: you can redistribute it and/or modify
%    it under the terms of the GNU General Public License as published by
%    the Free Software Foundation, either version 3 of the License, or
%    (at your option) any later version.
%
%    This program is distributed in the hope that it will be useful,
%    but WITHOUT ANY WARRANTY; without even the implied warranty of
%    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%    GNU General Public License for more details.
%
%    You should have received a copy of the GNU General Public License
%    along with this program.  If not, see <http://www.gnu.org/licenses/>.

function out = CrossValidation_MOE(data, func, mix, prior, varargin)
% performs cross validation (leave one out) for the given datasets, 
 
%%%%%%%%%% Input %%%%%%%%%%
% excluding always one and make the prediction afterwards 
% data = struct array containing y,X,Temp,sizes,....
% func = Simulation funtion (e.g. mcmc_MOE)
% mix = mixture model for mcmc simulations

%   default of mix.MOE_pred: false
%   if mix.MOE_pred is true, a different Xweights and weight_fun for
%   prediction is taken (than for developing the model mcmcout) => stored
%   in mix.MOE_Xweights and mix.MOE_weightfun.

%   default of mix.StoreResult: false
%   if mix.StoreResult = true: ccross validation of each part is stored

% varargin{1} = special specification for the prior (used for priordefine_MOE)
% varargin{2} = special specification for Sperm

%%%%%%%%%% Output %%%%%%%%%%
%   out.posterior_pred = structure array with posterior predicted distribution 
%                       of data.y and the output of 'mcmcpreddens_MOE'
%   out.summary = contains the ML estimator of the parameters (output of mcmcestimate_MOE).
%   out.error = vector with the sum of squared error for each test
%   error_std = standardized error
  
%check Inputs:
    
%modify by Plankensteiner begin
if ~isfield(data, 'sizes')
   data=DetermineSizes(data); 
end
  
if ~isfield(mix, 'K') 
    warning('mix.K is missing. Define mix.K.'); return
else
    K= mix.K;
end
 
if isfield(mix, 'MOE')
    MOE=mix.MOE;
else
    MOE=false;
    MOE_pred=false;
end

if MOE
    if ~isfield(mix, 'weightfun')
        warning('mix.weightfun is missing. Define a weight function for MOE model.'); return
    end 
    
    if ~isfield(data, 'Xweights') 
        warning('data.Xweights is missing. Define weights for MOE model.'); return
    end
        
    if isfield(mix, 'MOE_pred') 
        MOE_pred=mix.MOE_pred;
        if all([mix.MOE_pred any([~isfield(mix, 'MOE_Xweights') ~isfield(mix, 'MOE_weightfun')])])
           warning('mix.MOE_Xweights and/or mix.MOE_weightfun is missing. Define it first before calling the function.'); return
        end
    else
        MOE_pred=false;
    end
end 

if isfield(mix, 'StoreResult')
   StoreResult=mix.StoreResult;
else StoreResult=false;
end

if ~isfield(data, 'X')
    warning('data.X is missing. Define dataX for regression or MOE model.'); return
end
 
if ~isfield(data, 'censor')
    warning('censor vector is created. No data is censored'); 
    data.censor= zeros(1,length(data.y));
end
   
if length(data.sizes)<21
   figure();
   fig=true; % if number of datasets < 21, then prediction is also plotted/shown 
   m=floor(sqrt(length(data.sizes)));
   if floor(length(data.sizes)/m)==length(data.sizes)/m
       n=floor((length(data.sizes)/m));
   else
       n=floor((length(data.sizes)/m))+1;
   end 
end 
    
if ~isfield(data, 'S')
    S=ones(1, length(data.y));
    
    No=(max(data.y)-min(data.y))/K; 
    for k=2:K
        num=(min(data.y)+k*No);
        S(data.y>num)=k; 
    end     
end

if ~isfield(data, 'censor') data.censor = zeros(1, data.N); end
 
%modify by Plankensteiner end
breaks = data.sizes; 
for i=1:length(data.sizes)
    % excluding one dataset 
    clear datareg datanew 
    datareg = data;  
    datanew = data;  

    if i==1 
        datareg.y = data.y((breaks(i)+1):end);
        if isfield(data,'X') datareg.X = data.X(:,(breaks(i)+1):end);end
        if isfield(data,'Xweights') datareg.Xweights = data.Xweights(:,(breaks(i)+1):end); end
        datareg.censor = data.censor((breaks(i)+1):end); 
        datareg.N = size(datareg.y,2);
        if isfield(data,'S') datareg.S = data.S((breaks(i)+1):end); end
        
        datanew.y = data.y(1:breaks(i)); 
        if isfield(data,'X') datanew.X = data.X(:,1:breaks(i)); end
        if isfield(data,'Xweights') datanew.Xweights = data.Xweights(:,1); end
        if isfield(data,'censor') datanew.censor = data.censor(1:breaks(i)); end
        datanew.N = size(datanew.y,2);
        if isfield(data,'S') datanew.S = data.S(1:breaks(i));end
        
        if MOE_pred 
            datanew.Xweights=mix.MOE_Xweights(:,1:breaks(i));
            datanew.weightfun=mix.MOE_weightfun; 
        end
         
    else 
        datareg.y = data.y([1:breaks(i-1),(breaks(i)+1):end]);
        if isfield(data,'X') datareg.X = data.X(:,[1:breaks(i-1),(breaks(i)+1):end]);end
        if isfield(data,'Xweights') datareg.Xweights = data.Xweights(:,[1:breaks(i-1),(breaks(i)+1):end]);end
        datareg.censor=data.censor([1:breaks(i-1),(breaks(i)+1):end]);
        datareg.N = size(datareg.y,2);
        if isfield(data,'S') datareg.S = data.S([1:breaks(i-1),(breaks(i)+1):end]);end
        
        datanew.y = data.y((breaks(i-1)+1):breaks(i));
        if isfield(data,'X') datanew.X = data.X(:,(breaks(i-1)+1));end
        if isfield(data,'Xweights') datanew.Xweights = data.Xweights(:,breaks(i-1)+1);end
        datanew.censor = data.censor((breaks(i-1)+1):breaks(i)); 
        datanew.N = size(datanew.y,2);
        if isfield(data,'S') datanew.S = data.S((breaks(i-1)+1):breaks(i));end
                    
        if MOE_pred
            datanew.Xweights=mix.MOE_Xweights(:,breaks(i-1)+1);
            datanew.weightfun=mix.MOE_weightfun; 
        end
             
    end 
    
    if MOE     
        prior.weight = eval([mix.weightfun,'(datareg.Xweights,mix.K)']);
        dataregnew.weightfun=mix.weightfun;
    end    
      
    if isfield(datanew,'MOE_weightfun') datanew=rmfield(datanew, 'MOE_weightfun'); end
    if isfield(datanew,'MOE_Xweights') datanew=rmfield(datanew,'MOE_Xweights'); end
    if isfield(datanew,'sizes') datanew=rmfield(datanew,'sizes') ; end
     
    clear mixreg 
    mixreg = mix;     
    % define start parameters
    [datareg,mixreg,mcmc]=mcmcstart_MOE(datareg,mixreg);
     
    % simulating the posterior distribution, default sample size=5000, burnin =1000
    mcmc.M = 3000;
    mcmc.burnin = 1000;
     
    Sperm.N=1;  
    Sperm.burnin=300;  
   
    mcmcout  = eval([func,'(datareg,mixreg,prior,mcmc,Sperm)']);
    [est,mcmcout]=mcmcestimate_MOE(mcmcout);
    [ic,mcmcout]=mcmcic_MOE(datareg,mcmcout);
    [marlik, mcmcout]= mcmcbf_MOE(datareg,mcmcout);

    [fig,out.posterior_pred{i}]=mcmcpreddens_MOE(datanew,mcmcout,1,0);
    out.posterior_pred{i}.data=datanew;
    % save the summary statistics 
    out.summary{i} = mcmcout.est.ml;   
    % calculate the sum of squared errors
    out.error(i) = sum(out.posterior_pred{i}.res.^2);
    out.error_std(i) = out.error(i)/datanew.N;
    
    info = strcat('Cross validation info: ',num2str(i),'/',num2str(length(data.sizes)),' evaluated');
    disp(info)  
     
    if StoreResult  
        mcmcout.datareg=datareg;
        mcmcout.datanew=datanew;    
        mcmcout.name= ['CrossVal_',num2str(i)]; 
        mcmcout.comment=info;
        mcmcstore(mcmcout);
    end    
end 

if fig ==true 
    for i=1:length(data.sizes)
        subplot(m,n,i)
        ksdensity(out.posterior_pred{i}.data.y, 'cens', out.posterior_pred{i}.data.censor);
        hold all;
        plot(out.posterior_pred{i}.dens.y, out.posterior_pred{i}.dens.pdf, '--r','Linewidth',1);
        xlabel('y');
        xlim([min(out.posterior_pred{i}.data.y)-2, max(out.posterior_pred{i}.data.y)+2]);
        title(['Leaving out Nr. ' num2str(i)]);
    end 
end

end