%KPCA Kernel Principal Component Analysis
%
%   [W,L] = KPCA(K,ALF)
%         OR
%   [W,L] = KPCA(W,ALF)
%
% INPUT
%   K   NxN kernel or symmetric similarity matrix (dataset)
%   W   Trained KPCA projection a kernel-induced space
%   ALF Parameter determining the dimensionality and the mapping (optional, default: Inf)
%       (0,1) - fraction of the total preserved variance; e.g. ALF = 0.9
%       M     - Number of dimensions in the subspace; e.g. ALF = 5
%       Inf   - No dimensionality reduction, keeping all dimensions (it's VERY noisy)
%
% OUTPUT
%   W   Kernel PCA mapping
%   L   Sorted list of eigenvalues
%
% DEFAULTS
%   ALF = Inf
%
% DESCRIPTION
% Performs principal component analysis in a kernel-induced space by finding
% M significant directions. M is provided directly or determined by ALF such
% that the fraction ALF of the total variance is preserved. L is a sorted list
% of eigenvalues describing the variances in the kernel subspace.
% The projection X is found as X = K*W.
%
% A trained mapping can be reduced further by:
%   W = KPCA(W,ALF)
%
% SEE ALSO
% MAPPINGS, DATASETS, PCAM, PSEM
%
% REFERENCE
% B. Scholkopf, A. Smola, and K.-R. Muller. Kernel Principal Component Analysis.
% in Advances in Kernel Methods - SV Learning, pages 327-352. MIT Press, Cambridge, MA, 1999.

% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
% Faculty EWI, Delft University of Technology and
% School of Computer Science, University of Manchester, UK


function [W,L] = kpca (K,alf)

if nargin < 2 | isempty(alf),
  alf = inf;
end
if nargin < 1 | isempty(K),
  W = prmapping(mfilename,alf);
  W = setname(W,'Kernel PCA');
  return
end


if (isdataset(K) | isa(K,'double')),
  if ismapping(alf),
    % APPLY MAPPING: project new data using the trained mapping.
    [m,n] = size(K);
    pars  = getdata(alf);

    % Parameters
    Q   = pars{1};  % Eigenvectors
    Kme = pars{2};  % Vector of averaged kernel values

    % Centering the kernel
    H = -repmat(1/n,n,n);
    H(1:n+1:end) = H(1:n+1:end) + 1;     % H = eye(n) - ones(n,n)/n
    K = (K - Kme(ones(m,1),:)) * H;
    W = K*Q;
    if isdataset(W),
      W.name = ['Projected '  updname(W.name)];
    end
    return;
  end
end


if ~isnumeric(alf) & ~isinf(alf)
  error('Wrong ALF.')
end
if alf <= 0
  error('ALF should be positive.')
end


% REDUCE ALREADY TRAINED MAPPING
if ismapping(K),
  pars  = getdata(K);

  Q  = pars{1};
  L  = pars{3};
  m  = size(Q,1);

  [ll,P] = sort(-abs(L));
  L = L(P);
  Q = Q(:,P);
  J = seleigs(L,alf);       % J is the index of selected eigenvalues
  Q = Q(:,J);               % Eigenvectors
  L = L(J);                 % Eigenvalues

  W = prmapping(mfilename,'trained',{Q,pars{2},L},[],m,length(J));
  W = setname(W,'KPCA');
  return
end



% TRAIN MAPPING
K  = +K;
[n,m] = size(K);

% Tolerance value used in comparisons
if mean(diag(+K)) < 1,
  tol = 1e-12;
else
  tol = 1e-10;
end

if ~issym(K,tol),
  prwarning(1,'Kernel should be symmetric. It is made so by averaging.')
  K  = 0.5 * (K+K');
end

eigmin = min(preig(K));
if eigmin < 0,
  error(['K is not psd. Minimum eig(K) = ' ...
  num2str(eigmin) '. Please regularize the kernel appropriately or use IKPCA.']);
end

Kme = mean(K,2)';

% Project the data such that the mean lies at the origin
H = -repmat(1/n,n,n);
H(1:n+1:end) = H(1:n+1:end) + 1;  % H = eye(n) - ones(n,n)/n
K  = H * K * H;            % K is now the centered kernel
K  = 0.5 * (K+K');         % Make sure that K is symmetric after centering

[Q,L]  = preig(K);
Q      = real(Q);
l      = diag(real(L));
[lm,Z] = sort(-l);
Q      = Q(:,Z);
l      = l(Z);             % Eigenvalues are sorted by decreasing value


J = seleigs(l,alf);        % J is the index of selected eigenvalues
L = l(J);                  % Eigenvalues
Q = Q(:,J);                % Eigenvectors

% Normalize Q such that the eigenvectors of the covariance
% matrix are orthonormal
Q = Q* diag(1./sqrt(diag(Q'*K*Q)));

% Determine the mapping
W = prmapping(mfilename,'trained',{Q,Kme,L},[],m,length(J));
W = setname(W,'KPCA');
return