%	 Copyright (C) 2011  Plankensteiner
%
%    This program is free software: you can redistribute it and/or modify
%    it under the terms of the GNU General Public License as published by
%    the Free Software Foundation, either version 3 of the License, or
%    (at your option) any later version.
%
%    This program is distributed in the hope that it will be useful,
%    but WITHOUT ANY WARRANTY; without even the implied warranty of
%    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
%    GNU General Public License for more details.
%
%    You should have received a copy of the GNU General Public License
%    along with this program.  If not, see <http://www.gnu.org/licenses/>.

function PIT = PIT_mixtures(data, mcmcout, alpha)
% based on independent MCMC realisations and plug-in method PIT values are calculated and
% plots are made. Probability Integral Transform: PIT= F(x) ~ Unif[0,1]
% only valid for 'mcmcout.ranperm = false';

% For non censored data the PIT value is the according cummulative distribution
% funktion evaluated at each observation and according pdf parameter.
% If data.y ~ N(\mu, \sigma) with \mu= \beta*data.X
%   => PIT = normcdf(data.y, \mu, \sigma)
%
% If data.y ~ \alpha* N(\mu1, \sigma1)+(1-\alpha)*N(\mu2, \sigma2)
%           with \mu1 = \beta1*data.X and \mu2 = \beta2*data.X
%   => PIT = \alpha*normcdf(data.y,
%   \mu1,\sigma1)+(1-\alpha)*normcdf(data.y, \mu2, \sigma2)

% For censored data the according quantile of a normal distribution with mean and
% sigma from mcmcout is calculated and a uniform
% distributed random variable in [0,1] is taken to represent this data

% Input:
%          data: structure containing fields
%                .y ... (1xN) vector of observed data
%                .X ... (dxN) Matrix of cofactors
%                .censor ... (1xN) vector indicating if observation is censored
%                .weightfun ... name of the function for the mixture weight (only for MoE models)
%                .Xweights ... (pxN) matrix of cofactors for 'weightfun' (only for MoE models)

% Outputs is a structure with fields:
%       BayesKS: Structure with output of KS-test for each MCMC draw (1...M)
%                .tests ... (Mx3) matrix with results of KS-test for each draw ([h_m,p_m,ksstat_m])
%                .comment ... null hypothesis
%                .perc ... percentage of accepted null hypotheses
%             Y: (M*Nx1) vector with PIT values for each MCMC draw and real observations
%   maxDistance: max distance from cdf PIT values to cdf of unif[0,1]
%        ksstat: critical value for the KS-Teststatistik
%           KSp: p-value of a ks statistik (if KSp< maxDistance => no unif[0,1])
%          corr: correlation between cdf PIT values and cdf of a unif[0,1]

if ~isfield(mcmcout.model, 'K')
    K= 1;
else
    K=mcmcout.model.K;
end

if ~isfield(mcmcout.model,'indicmod')
    mcmcout.model.indicmod.dist='Multinomial';
end

if ~isfield(data, 'censor')
    disp('A zero censored data vector was constructed - because there are no censored data!');
    data.censor=zeros(1,length(data.y));
else
    quantillim = zeros(length(data.y(data.censor==1)),1);
end

yobserved=data.y(data.censor==0);

if ~isfield(mcmcout,'ranperm') mcmcout.ranperm=false; end

if mcmcout.ranperm
    if ~isfield(mcmcout,'parperm')
        mcmcout=mcmcpermute_MOE(mcmcout);
    end
    % consider only permuted draws
    %    indexp=[1:mcmcout.M]';
    mcmcout.par = mcmcout.parperm;
    mcmcout.M = mcmcout.Mperm;
    mcmcout.weight = mcmcout.weightperm;
end

PIT.BayesKS.tests = nan(mcmcout.M,3);

if isfield(mcmcout.model,'d')
    for k=1:K
        mu(:,:,k) = mcmcout.par.beta(:,:,k)*data.X;
    end
else
    for k=1:K
        mu(:,:,k) = repmat(mcmcout.par.mu(:,k),1,length(data.y));
    end
end

% PIT concerning one dataset => weights per data and not per mcmcout.weights
if strcmp(mcmcout.model.indicmod.dist,'FixedW')
    [~, ind]=sort(squeeze(mean(mu(1,:,:))));
end

sigma= sqrt(mcmcout.par.sigma);

% M x anz(data) array: entry ij= Normcdf value of jth observed DUT with ith simulated beta/sigma
Y=zeros(mcmcout.M, size(data.y,2));

if K==1
    for i=1:mcmcout.M
        Y(i,1:length(yobserved))= normcdf(yobserved, mu(i,data.censor==0,1), sigma(i));
        if sum(data.censor~=0)
            quantillim=(normcdf(data.y(data.censor==1), mu(i,data.censor==1), sigma(i)))';
            Y(i,(length(yobserved)+1):length(data.y))= unifrnd(quantillim, ones(length(data.y(data.censor==1)),1));
        end
        % Bayes KS test (for each draw)
        [h,p,ksstat]= kstest2(Y(i,:),linspace(0,1,size(data.y,2)),alpha);
        PIT.BayesKS.tests(i,:) = [h,p,ksstat];
    end
    
else
    for i=1:mcmcout.M
        if isfield(mcmcout.model,'indicmod')
            if strcmp(mcmcout.model.indicmod.dist,'FixedW')
                for j = 1:K
                    mix.weight = eval([data.weightfun,'(data.Xweights, K)']);
                    mix.weight=mix.weight(:,ind);
                    Y(i,1:length(yobserved))= Y(i,1:length(yobserved)) + mix.weight(data.censor==0,j)'.*normcdf(yobserved, mu(i,data.censor==0,j), sigma(i,j));
                    if sum(data.censor)~=0
                        ycensor=data.y(data.censor==1);
                        % calculate quantiles for censored data
                        quantillim = quantillim + mix.weight(data.censor==1,j).*normcdf(ycensor, mu(i,data.censor==1,j), sigma(i,j))';
                    end
                end
                if sum(data.censor)~=0
                    % draw uniform random samples out of the interval
                    Y(i,(length(yobserved)+1):length(data.y))= unifrnd(quantillim, ones(length(ycensor),1));
                    quantillim=zeros(length(ycensor),1);
                end
            else
                for j = 1:K
                    Y(i,1:length(yobserved))= Y(i,1:length(yobserved)) + mcmcout.weight(i,j)*normcdf(yobserved, mu(i,data.censor==0,j), sigma(i,j));
                    if sum(data.censor)~=0
                        ycensor=data.y(data.censor==1);
                        quantillim = quantillim + mcmcout.weight(i,j)*normcdf(ycensor, mu(i,data.censor==1,j), sigma(i,j))';
                    end
                end
                if sum(data.censor)~=0
                    % draw uniform random samples out of the interval
                    Y(i,(length(yobserved)+1):length(data.y))= unifrnd(quantillim, ones(length(ycensor),1));
                    quantillim=zeros(length(ycensor),1);
                end
            end
            % Bayes KS test (for each draw)
            [h,p,ksstat]= kstest2(Y(i,:),linspace(0,1,size(data.y,2)),alpha);
            PIT.BayesKS.tests(i,:) = [h,p,ksstat];
        end
    end
end

PIT.Y=reshape(Y, mcmcout.M*length(data.y),1);
figure();
subplot(1,2,1)
[n, xout]=hist(PIT.Y, 15);
nnor= n./mean(n);
bar(xout,nnor,'hist');
ylim([0 2]);
if isfield(data, 'name')
    title(['PIT-histogram of ' data.name ' data'] );
else
    title('PIT-histogram')
end


subplot(1,2,2)
plot(linspace(0,1,10000), linspace(0,1,10000), 'r');
hold all;
[f,x]=ecdf(PIT.Y);
[xb,yb]=stairs(x,f);
plot(xb,yb)
grid on;
legend('cdf of a unif[0,1]', 'cdf of PIT values');
title('Comparison of CDFs')
PIT.maxDistance= max(abs(f-x));
% approximation of KS-value
PIT.ksstat= sqrt(log(2/alpha)/(2*length(PIT.Y)));

% percentage of passed tests for Bayes KS test
PIT.BayesKS.comment = 'perc = percentage of accepted H0: Data are uniformly distributed';
PIT.BayesKS.perc = 1-sum(PIT.BayesKS.tests(:,1))/mcmcout.M;
[~, PIT.KSp]=kstest2(PIT.Y', cdf('unif', 0,1),alpha);
PIT.corr=corr(x,f);

