[10] | 1 | %IKPCA Indefinite Kernel Principal Component Analysis
|
---|
| 2 | %
|
---|
| 3 | % [W,SIG,L] = IKPCA(K,ALF)
|
---|
| 4 | % OR
|
---|
| 5 | % [W,SIG,L] = IKPCA(W,ALF)
|
---|
| 6 | %
|
---|
| 7 | % INPUT
|
---|
| 8 | % K NxN kernel or symmetric similarity matrix (dataset)
|
---|
| 9 | % W Trained IKPCA projection into a kernel-induced pseudo-Euclidean subspace
|
---|
| 10 | % ALF Parameter determining the dimension and the mapping (optional, default: Inf)
|
---|
| 11 | % (0,1) - fraction of the total magnitude of the preserved variance
|
---|
| 12 | % Inf - no dimension reduction; keeping all dimensions (it's VERY noisy)
|
---|
| 13 | % 'p' - projection into a Euclidean space based on the positive eigenvalues only
|
---|
| 14 | % 'PARp'- projection into a Euclidean space based on the PAR fraction of
|
---|
| 15 | % positive eigenvalues; e.g. ALF = '0.9p'
|
---|
| 16 | % 'n' - projection into a Euclidean space based on the negative eigenvalues only
|
---|
| 17 | % 'PARn'- projection into a (negative) Euclidean space based on the PAR fraction
|
---|
| 18 | % of negative eigenvalues; e.g. ALF = '0.7n'
|
---|
| 19 | % 'P1pP2n'- projection into a Euclidean space based on the P1 positive eigenvalues
|
---|
| 20 | % and P2 negative eigenvalues; e.g. ALF = '0.7p0.1n', ALF = '7p2n'
|
---|
| 21 | % 1 .. N - total number of significant dimensions
|
---|
| 22 | % [P1 P2] - P1 dimensions or a fraction of preserved variance in the positive
|
---|
| 23 | % subspace and P2 dimensions or a fraction of preserved variance in
|
---|
| 24 | % the negative subspace; e.g. ALF = [5 10], ALF = [0.9 0.1]
|
---|
| 25 | %
|
---|
| 26 | % OUTPUT
|
---|
| 27 | % W Indefinite Kernel PCA mapping
|
---|
| 28 | % SIG Signature of the kernel Pseudo-Euclidean subspace
|
---|
| 29 | % L Sorted list of eigenvalues
|
---|
| 30 | %
|
---|
| 31 | % DEFAULTS
|
---|
| 32 | % ALF = Inf
|
---|
| 33 | %
|
---|
| 34 | % DESCRIPTION
|
---|
| 35 | % Performs principal component analysis in a kernel space (either Hilbert or Krein
|
---|
| 36 | % space) by finding M significant principal components. M is determined by ALF,
|
---|
| 37 | % e.g. such that at least the fraction ALF of the total variance is preserved.
|
---|
| 38 | % The parameter SIG describes the signature of the subspace. L is a sorted list
|
---|
| 39 | % of eigenvalues describing the variances in the (pseudo-)Euclidean subspace.
|
---|
| 40 | % The projection X is found as X = K*W. The signature is also stored in X.user.
|
---|
| 41 | %
|
---|
| 42 | % A trained mapping can be reduced by:
|
---|
| 43 | % [W,SIG,L] = IKPCA(W,ALF)
|
---|
| 44 | %
|
---|
| 45 | % SEE ALSO
|
---|
| 46 | % MAPPINGS, DATASETS, PCA, PSEM
|
---|
| 47 | %
|
---|
| 48 | % REFERENCE
|
---|
| 49 | % 1. E. Pekalska and R.P.W. Duin. Indefinite Kernel Principal Component Analysis.
|
---|
| 50 | % Technical Report, 2006.
|
---|
| 51 | % 2. B. Schölkopf, A. Smola, and K.-R. Müller. Kernel Principal Component Analysis.
|
---|
| 52 | % in Advances in Kernel Methods - SV Learning, pages 327-352. MIT Press, Cambridge, MA, 1999.
|
---|
| 53 | %
|
---|
| 54 |
|
---|
| 55 | % Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
|
---|
| 56 | % Faculty EWI, Delft University of Technology and
|
---|
| 57 | % School of Computer Science, University of Manchester, UK
|
---|
| 58 |
|
---|
| 59 |
|
---|
| 60 | function [W,sig,L] = ikpca (K,alf,prec)
|
---|
| 61 |
|
---|
| 62 | % PREC is the precision parameter used for the automatic
|
---|
| 63 | % selection (heuristic) of the number of dominant eigenvalues.
|
---|
| 64 | % This happens when SELEIGS is called with the parameter 'CUT'.
|
---|
| 65 |
|
---|
| 66 | if nargin < 3 | isempty(prec),
|
---|
| 67 | prec = 1e-4;
|
---|
| 68 | end
|
---|
| 69 | if nargin < 2 | isempty(alf),
|
---|
| 70 | alf = inf;
|
---|
| 71 | end
|
---|
| 72 | if nargin < 1 | isempty(K),
|
---|
| 73 | W = mapping(mfilename,alf,prec);
|
---|
| 74 | W = setname(W,'IKPCA');
|
---|
| 75 | return
|
---|
| 76 | end
|
---|
| 77 |
|
---|
| 78 |
|
---|
| 79 | if (isdataset(K) | isa(K,'double')),
|
---|
| 80 | if ismapping(alf),
|
---|
| 81 | % APPLY THE MAPPING: project new data using the trained mapping
|
---|
| 82 | [m,n] = size(K);
|
---|
| 83 | pars = getdata(alf);
|
---|
| 84 |
|
---|
| 85 | % Parameters
|
---|
| 86 | QL = pars{1}; % Normalized eigenvectors
|
---|
| 87 | Kme = pars{2}; % Row vector of the average original kernel values
|
---|
| 88 |
|
---|
| 89 | % Centering the kernel
|
---|
| 90 | H = -repmat(1/n,n,n);
|
---|
| 91 | H(1:n+1:end) = H(1:n+1:end) + 1; % H = eye(n) - ones(n,n)/n
|
---|
| 92 | K = (K - repmat(Kme,m,1)) * H;
|
---|
| 93 | W = K*QL;
|
---|
| 94 | if isdataset(W),
|
---|
| 95 | W.name = ['Projected ' updname(W.name)];
|
---|
| 96 | W.user = pars{4}; % Signature of the PE subspace
|
---|
| 97 | end
|
---|
| 98 | return;
|
---|
| 99 | end
|
---|
| 100 | end
|
---|
| 101 |
|
---|
| 102 |
|
---|
| 103 |
|
---|
| 104 | % REDUCE ALREADY TRAINED MAPPING
|
---|
| 105 | if ismapping(K),
|
---|
| 106 | pars = getdata(K);
|
---|
| 107 |
|
---|
| 108 | QL = pars{1};
|
---|
| 109 | L = pars{3};
|
---|
| 110 | m = size(Q,1);
|
---|
| 111 |
|
---|
| 112 | [ll,P] = sort(-abs(L));
|
---|
| 113 | L = L(P);
|
---|
| 114 | QL = QL(:,P);
|
---|
| 115 | [J,sig] = seleigs(L,alf,pars{5});% J is the index of selected eigenvalues
|
---|
| 116 | QL = QL(:,J); % Eigenvectors
|
---|
| 117 | L = L(J); % Eigenvalues
|
---|
| 118 |
|
---|
| 119 | W = mapping(mfilename,'trained',{QL,pars{2},L,sig,pars{5}},[],m,length(J));
|
---|
| 120 | W = setname(W,'IKPCA');
|
---|
| 121 | return
|
---|
| 122 | end
|
---|
| 123 |
|
---|
| 124 |
|
---|
| 125 |
|
---|
| 126 |
|
---|
| 127 | % TRAIN THE MAPPING
|
---|
| 128 | K = +K;
|
---|
| 129 | [n,m] = size(K);
|
---|
| 130 |
|
---|
| 131 | % Tolerance value used in comparisons
|
---|
| 132 | if mean(diag(+K)) < 1,
|
---|
| 133 | tol = 1e-12;
|
---|
| 134 | else
|
---|
| 135 | tol = 1e-10;
|
---|
| 136 | end
|
---|
| 137 |
|
---|
| 138 | if ~issym(K,tol),
|
---|
| 139 | prwarning(1,'Kernel should be symmetric. It is made so by averaging.')
|
---|
| 140 | K = 0.5 * (K+K');
|
---|
| 141 | end
|
---|
| 142 |
|
---|
| 143 | eigmin = min(preig(K));
|
---|
| 144 | if eigmin < 0 & abs(eigmin) < 1e-12
|
---|
| 145 | disp(['Minimum eig(K) = ' num2str(eigmin) '. K might be indefinite due to numerical inaccuracies.']);
|
---|
| 146 | end
|
---|
| 147 |
|
---|
| 148 |
|
---|
| 149 | Kme = mean(K,2)';
|
---|
| 150 |
|
---|
| 151 | % Project the data such that the mean lies at the origin
|
---|
| 152 | H = -repmat(1/n,n,n);
|
---|
| 153 | H(1:n+1:end) = H(1:n+1:end) + 1; % H = eye(n) - ones(n,n)/n
|
---|
| 154 | K = H * K * H; % K is now a centered kernel
|
---|
| 155 | K = 0.5 * (K+K'); % Make sure that K is symmetric after centering
|
---|
| 156 |
|
---|
| 157 | [Q, L] = preig(K);
|
---|
| 158 | Q = real(Q);
|
---|
| 159 | l = diag(real(L));
|
---|
| 160 | [lm,Z] = sort(-abs(l));
|
---|
| 161 | Q = Q(:,Z);
|
---|
| 162 | l = l(Z);
|
---|
| 163 |
|
---|
| 164 | % Eigenvalues are now sorted by decreasing absolute value
|
---|
| 165 |
|
---|
| 166 | [J,sig] = seleigs(l,alf,prec); % J is the index of selected eigenvalues
|
---|
| 167 | L = l(J); % Eigenvalues
|
---|
| 168 | Q = Q(:,J); % Eigenvectors
|
---|
| 169 |
|
---|
| 170 | % Normalize Q such that the eigenvectors of the corresponding
|
---|
| 171 | % covariance matrix are J-orthonormal
|
---|
| 172 | QL = Q * diag(1./sqrt(abs(L)));
|
---|
| 173 |
|
---|
| 174 | % Determine the mapping
|
---|
| 175 | W = mapping(mfilename,'trained',{QL,Kme,L,sig,prec},[],m,sum(sig));
|
---|
| 176 | W = setname(W,'IKPCA');
|
---|
| 177 | return
|
---|