source: distools/ikpca.m @ 100

Last change on this file since 100 was 79, checked in by bduin, 11 years ago
File size: 5.6 KB
RevLine 
[10]1%IKPCA Indefinite Kernel Principal Component Analysis
2%
3%   [W,SIG,L] = IKPCA(K,ALF)
4%             OR
5%   [W,SIG,L] = IKPCA(W,ALF)
6%
7% INPUT
8%   K   NxN kernel or symmetric similarity matrix (dataset)
9%   W   Trained IKPCA projection into a kernel-induced pseudo-Euclidean subspace
10%   ALF Parameter determining the dimension and the mapping (optional, default: Inf)
11%       (0,1) - fraction of the total magnitude of the preserved variance
12%       Inf   - no dimension reduction; keeping all dimensions (it's VERY noisy)
13%       'p'   - projection into a Euclidean space based on the positive eigenvalues only
14%       'PARp'- projection into a Euclidean space based on the PAR fraction of
15%               positive eigenvalues; e.g. ALF = '0.9p'
16%       'n'   - projection into a Euclidean space based on the negative eigenvalues only
17%       'PARn'- projection into a (negative) Euclidean space based on the PAR fraction
18%               of negative eigenvalues; e.g. ALF = '0.7n'
19%     'P1pP2n'- projection into a Euclidean space based on the P1 positive eigenvalues
20%               and P2 negative eigenvalues; e.g. ALF = '0.7p0.1n', ALF = '7p2n'
21%     1 .. N  - total number of significant dimensions
22%     [P1 P2] - P1 dimensions or a fraction of preserved variance in the positive
23%               subspace and P2 dimensions or a fraction of preserved variance in
24%               the negative subspace; e.g. ALF = [5 10], ALF = [0.9 0.1]
25%
26% OUTPUT
27%   W   Indefinite Kernel PCA mapping
28%   SIG Signature of the kernel Pseudo-Euclidean subspace
29%   L   Sorted list of eigenvalues
30%
31% DEFAULTS
32%   ALF = Inf
33%
34% DESCRIPTION
35% Performs principal component analysis in a kernel space (either Hilbert or Krein
36% space) by finding M significant principal components. M is determined by ALF,
37% e.g. such that at least the fraction ALF of the total variance is preserved.
38% The parameter SIG describes the signature of the subspace. L is a sorted list
39% of eigenvalues describing the variances in the (pseudo-)Euclidean subspace.
40% The projection X is found as X = K*W. The signature is also stored in X.user.
41%
42% A trained mapping can be reduced by:
43%   [W,SIG,L] = IKPCA(W,ALF)
44%
45% SEE ALSO
[79]46% MAPPINGS, DATASETS, PCAM, PSEM
[10]47%
48% REFERENCE
49% 1. E. Pekalska and R.P.W. Duin. Indefinite Kernel Principal Component Analysis.
50%    Technical Report, 2006.
51% 2. B. Schölkopf, A. Smola, and K.-R. Müller. Kernel Principal Component Analysis.
52%    in Advances in Kernel Methods - SV Learning, pages 327-352. MIT Press, Cambridge, MA, 1999.
53%
54
55% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
56% Faculty EWI, Delft University of Technology and
57% School of Computer Science, University of Manchester, UK
58
59
60function [W,sig,L] = ikpca (K,alf,prec)
61
62% PREC is the precision parameter used for the automatic
63% selection (heuristic) of the number of dominant eigenvalues.
64% This happens when SELEIGS is called with the parameter 'CUT'.
65
66if nargin < 3 | isempty(prec),
67  prec = 1e-4;
68end
69if nargin < 2 | isempty(alf),
70  alf = inf;
71end
72if nargin < 1 | isempty(K),
[79]73  W = prmapping(mfilename,alf,prec);
[10]74  W = setname(W,'IKPCA');
75  return
76end
77
78
79if (isdataset(K) | isa(K,'double')),
80  if ismapping(alf),
81    % APPLY THE MAPPING: project new data using the trained mapping
82    [m,n] = size(K);
83    pars  = getdata(alf);
84
85    % Parameters
86    QL  = pars{1};    % Normalized eigenvectors
87    Kme = pars{2};    % Row vector of the average original kernel values
88
89    % Centering the kernel
90    H = -repmat(1/n,n,n);
91    H(1:n+1:end) = H(1:n+1:end) + 1;     % H = eye(n) - ones(n,n)/n
92    K = (K - repmat(Kme,m,1)) * H;
93    W = K*QL;
94    if isdataset(W),
95      W.name = ['Projected '  updname(W.name)];
96      W.user = pars{4};  % Signature of the PE subspace
97    end
98    return;
99  end
100end
101
102
103
104% REDUCE ALREADY TRAINED MAPPING
105if ismapping(K),
106  pars  = getdata(K);
107
108  QL = pars{1};
109  L  = pars{3};
110  m  = size(Q,1);
111
112  [ll,P]  = sort(-abs(L));
113  L       = L(P);
114  QL      = QL(:,P);
115  [J,sig] = seleigs(L,alf,pars{5});% J is the index of selected eigenvalues
116  QL      = QL(:,J);               % Eigenvectors
117  L       = L(J);                  % Eigenvalues
118
[79]119  W = prmapping(mfilename,'trained',{QL,pars{2},L,sig,pars{5}},[],m,length(J));
[10]120  W = setname(W,'IKPCA');
121  return
122end
123
124
125
126
127% TRAIN THE MAPPING
128K  = +K;
129[n,m] = size(K);
130
131% Tolerance value used in comparisons
132if mean(diag(+K)) < 1,
133  tol = 1e-12;
134else
135  tol = 1e-10;
136end
137
138if ~issym(K,tol),
139  prwarning(1,'Kernel should be symmetric. It is made so by averaging.')
140  K  = 0.5 * (K+K');
141end
142
143eigmin = min(preig(K));
144if eigmin < 0 & abs(eigmin) < 1e-12
145  disp(['Minimum eig(K) = ' num2str(eigmin) '. K might be indefinite due to numerical inaccuracies.']);
146end
147
148
149Kme = mean(K,2)';
150
151% Project the data such that the mean lies at the origin
152H = -repmat(1/n,n,n);
153H(1:n+1:end) = H(1:n+1:end) + 1;  % H = eye(n) - ones(n,n)/n
154K  = H * K * H;            % K is now a centered kernel
155K  = 0.5 * (K+K');         % Make sure that K is symmetric after centering
156
157[Q, L] = preig(K);
158Q      = real(Q);
159l      = diag(real(L));
160[lm,Z] = sort(-abs(l));
161Q      = Q(:,Z);
162l      = l(Z);
163
164% Eigenvalues are now sorted by decreasing absolute value
165
166[J,sig] = seleigs(l,alf,prec);  % J is the index of selected eigenvalues
167L = l(J);                       % Eigenvalues
168Q = Q(:,J);                     % Eigenvectors
169
170% Normalize Q such that the eigenvectors of the corresponding
171% covariance matrix are J-orthonormal
172QL = Q * diag(1./sqrt(abs(L)));
173
174% Determine the mapping
[79]175W = prmapping(mfilename,'trained',{QL,Kme,L,sig,prec},[],m,sum(sig));
[10]176W = setname(W,'IKPCA');
177return
Note: See TracBrowser for help on using the repository browser.