source: distools/kpca.m @ 31

Last change on this file since 31 was 10, checked in by bduin, 14 years ago
File size: 4.1 KB
Line 
1%KPCA Kernel Principal Component Analysis
2%
3%   [W,L] = KPCA(K,ALF)
4%         OR
5%   [W,L] = KPCA(W,ALF)
6%
7% INPUT
8%   K   NxN kernel or symmetric similarity matrix (dataset)
9%   W   Trained KPCA projection a kernel-induced space
10%   ALF Parameter determining the dimensionality and the mapping (optional, default: Inf)
11%       (0,1) - fraction of the total preserved variance; e.g. ALF = 0.9
12%       M     - Number of dimensions in the subspace; e.g. ALF = 5
13%       Inf   - No dimensionality reduction, keeping all dimensions (it's VERY noisy)
14%
15% OUTPUT
16%   W   Kernel PCA mapping
17%   L   Sorted list of eigenvalues
18%
19% DEFAULTS
20%   ALF = Inf
21%
22% DESCRIPTION
23% Performs principal component analysis in a kernel-induced space by finding
24% M significant directions. M is provided directly or determined by ALF such
25% that the fraction ALF of the total variance is preserved. L is a sorted list
26% of eigenvalues describing the variances in the kernel subspace.
27% The projection X is found as X = K*W.
28%
29% A trained mapping can be reduced further by:
30%   W = KPCA(W,ALF)
31%
32% SEE ALSO
33% MAPPINGS, DATASETS, PCA, PSEM
34%
35% REFERENCE
36% B. Scholkopf, A. Smola, and K.-R. Muller. Kernel Principal Component Analysis.
37% in Advances in Kernel Methods - SV Learning, pages 327-352. MIT Press, Cambridge, MA, 1999.
38
39% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
40% Faculty EWI, Delft University of Technology and
41% School of Computer Science, University of Manchester, UK
42
43
44function [W,L] = kpca (K,alf)
45
46if nargin < 2 | isempty(alf),
47  alf = inf;
48end
49if nargin < 1 | isempty(K),
50  W = mapping(mfilename,alf);
51  W = setname(W,'Kernel PCA');
52  return
53end
54
55
56if (isdataset(K) | isa(K,'double')),
57  if ismapping(alf),
58    % APPLY MAPPING: project new data using the trained mapping.
59    [m,n] = size(K);
60    pars  = getdata(alf);
61
62    % Parameters
63    Q   = pars{1};  % Eigenvectors
64    Kme = pars{2};  % Vector of averaged kernel values
65
66    % Centering the kernel
67    H = -repmat(1/n,n,n);
68    H(1:n+1:end) = H(1:n+1:end) + 1;     % H = eye(n) - ones(n,n)/n
69    K = (K - Kme(ones(m,1),:)) * H;
70    W = K*Q;
71    if isdataset(W),
72      W.name = ['Projected '  updname(W.name)];
73    end
74    return;
75  end
76end
77
78
79if ~isnumeric(alf) & ~isinf(alf)
80  error('Wrong ALF.')
81end
82if alf <= 0
83  error('ALF should be positive.')
84end
85
86
87% REDUCE ALREADY TRAINED MAPPING
88if ismapping(K),
89  pars  = getdata(K);
90
91  Q  = pars{1};
92  L  = pars{3};
93  m  = size(Q,1);
94
95  [ll,P] = sort(-abs(L));
96  L = L(P);
97  Q = Q(:,P);
98  J = seleigs(L,alf);       % J is the index of selected eigenvalues
99  Q = Q(:,J);               % Eigenvectors
100  L = L(J);                 % Eigenvalues
101
102  W = mapping(mfilename,'trained',{Q,pars{2},L},[],m,length(J));
103  W = setname(W,'KPCA');
104  return
105end
106
107
108
109% TRAIN MAPPING
110K  = +K;
111[n,m] = size(K);
112
113% Tolerance value used in comparisons
114if mean(diag(+K)) < 1,
115  tol = 1e-12;
116else
117  tol = 1e-10;
118end
119
120if ~issym(K,tol),
121  prwarning(1,'Kernel should be symmetric. It is made so by averaging.')
122  K  = 0.5 * (K+K');
123end
124
125eigmin = min(preig(K));
126if eigmin < 0,
127  error(['K is not psd. Minimum eig(K) = ' ...
128  num2str(eigmin) '. Please regularize the kernel appropriately or use IKPCA.']);
129end
130
131Kme = mean(K,2)';
132
133% Project the data such that the mean lies at the origin
134H = -repmat(1/n,n,n);
135H(1:n+1:end) = H(1:n+1:end) + 1;  % H = eye(n) - ones(n,n)/n
136K  = H * K * H;            % K is now the centered kernel
137K  = 0.5 * (K+K');         % Make sure that K is symmetric after centering
138
139[Q,L]  = preig(K);
140Q      = real(Q);
141l      = diag(real(L));
142[lm,Z] = sort(-l);
143Q      = Q(:,Z);
144l      = l(Z);             % Eigenvalues are sorted by decreasing value
145
146
147J = seleigs(l,alf);        % J is the index of selected eigenvalues
148L = l(J);                  % Eigenvalues
149Q = Q(:,J);                % Eigenvectors
150
151% Normalize Q such that the eigenvectors of the covariance
152% matrix are orthonormal
153Q = Q* diag(1./sqrt(diag(Q'*K*Q)));
154
155% Determine the mapping
156W = mapping(mfilename,'trained',{Q,Kme,L},[],m,length(J));
157W = setname(W,'KPCA');
158return
Note: See TracBrowser for help on using the repository browser.