source: distools/kpca.m @ 139

Last change on this file since 139 was 120, checked in by bduin, 8 years ago
File size: 4.2 KB
Line 
1%KPCA Kernel Principal Component Analysis
2%
3%   [W,L] = KPCA(K,ALF)
4%         OR
5%   [W,L] = KPCA(W,ALF)
6%
7% INPUT
8%   K   NxN kernel or symmetric similarity matrix (dataset)
9%   W   Trained KPCA projection a kernel-induced space
10%   ALF Parameter determining the dimensionality and the mapping (optional, default: Inf)
11%       (0,1) - fraction of the total preserved variance; e.g. ALF = 0.9
12%       M     - Number of dimensions in the subspace; e.g. ALF = 5
13%       Inf   - No dimensionality reduction, keeping all dimensions (it's VERY noisy)
14%
15% OUTPUT
16%   W   Kernel PCA mapping
17%   L   Sorted list of eigenvalues
18%
19% DEFAULTS
20%   ALF = Inf
21%
22% DESCRIPTION
23% Performs principal component analysis in a kernel-induced space by finding
24% M significant directions. M is provided directly or determined by ALF such
25% that the fraction ALF of the total variance is preserved. L is a sorted list
26% of eigenvalues describing the variances in the kernel subspace.
27% The projection X is found as X = K*W.
28%
29% A trained mapping can be reduced further by:
30%   W = KPCA(W,ALF)
31%
32% SEE ALSO
33% MAPPINGS, DATASETS, PCAM, PSEM
34%
35% REFERENCE
36% B. Scholkopf, A. Smola, and K.-R. Muller. Kernel Principal Component Analysis.
37% in Advances in Kernel Methods - SV Learning, pages 327-352. MIT Press, Cambridge, MA, 1999.
38
39% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
40% Faculty EWI, Delft University of Technology and
41% School of Computer Science, University of Manchester, UK
42
43
44function [W,L] = kpca (K,alf,reg)
45
46if nargin < 2 | isempty(alf),
47  alf = inf;
48end
49if nargin < 1 | isempty(K),
50  W = prmapping(mfilename,{alf,reg});
51  W = setname(W,'Kernel PCA');
52  return
53end
54
55if nargin > 2 && ~isempty(reg)
56  K = K+reg*eye(size(K,1));
57end
58
59if (isdataset(K) | isa(K,'double')),
60  if ismapping(alf),
61    % APPLY MAPPING: project new data using the trained mapping.
62    [m,n] = size(K);
63    pars  = getdata(alf);
64
65    % Parameters
66    Q   = pars{1};  % Eigenvectors
67    Kme = pars{2};  % Vector of averaged kernel values
68
69    % Centering the kernel
70    H = -repmat(1/n,n,n);
71    H(1:n+1:end) = H(1:n+1:end) + 1;     % H = eye(n) - ones(n,n)/n
72    K = (K - Kme(ones(m,1),:)) * H;
73    W = K*Q;
74    if isdataset(W),
75      W.name = ['Projected '  updname(W.name)];
76    end
77    return;
78  end
79end
80
81
82if ~isnumeric(alf) & ~isinf(alf)
83  error('Wrong ALF.')
84end
85if alf <= 0
86  error('ALF should be positive.')
87end
88
89
90% REDUCE ALREADY TRAINED MAPPING
91if ismapping(K),
92  pars  = getdata(K);
93
94  Q  = pars{1};
95  L  = pars{3};
96  m  = size(Q,1);
97
98  [ll,P] = sort(-abs(L));
99  L = L(P);
100  Q = Q(:,P);
101  J = seleigs(L,alf);       % J is the index of selected eigenvalues
102  Q = Q(:,J);               % Eigenvectors
103  L = L(J);                 % Eigenvalues
104
105  W = prmapping(mfilename,'trained',{Q,pars{2},L},[],m,length(J));
106  W = setname(W,'KPCA');
107  return
108end
109
110
111
112% TRAIN MAPPING
113K  = +K;
114[n,m] = size(K);
115
116% Tolerance value used in comparisons
117if mean(diag(+K)) < 1,
118  tol = 1e-12;
119else
120  tol = 1e-10;
121end
122
123if ~issym(K,tol),
124  prwarning(1,'Kernel should be symmetric. It is made so by averaging.')
125  K  = 0.5 * (K+K');
126end
127
128eigmin = min(preig(K));
129if eigmin < 0,
130  error(['K is not psd. Minimum eig(K) = ' ...
131  num2str(eigmin) '. Please regularize the kernel appropriately or use IKPCA.']);
132end
133
134Kme = mean(K,2)';
135
136% Project the data such that the mean lies at the origin
137H = -repmat(1/n,n,n);
138H(1:n+1:end) = H(1:n+1:end) + 1;  % H = eye(n) - ones(n,n)/n
139K  = H * K * H;            % K is now the centered kernel
140K  = 0.5 * (K+K');         % Make sure that K is symmetric after centering
141
142[Q,L]  = preig(K);
143Q      = real(Q);
144l      = diag(real(L));
145[lm,Z] = sort(-l);
146Q      = Q(:,Z);
147l      = l(Z);             % Eigenvalues are sorted by decreasing value
148
149
150J = seleigs(l,alf);        % J is the index of selected eigenvalues
151L = l(J);                  % Eigenvalues
152Q = Q(:,J);                % Eigenvectors
153
154% Normalize Q such that the eigenvectors of the covariance
155% matrix are orthonormal
156Q = Q* diag(1./sqrt(diag(Q'*K*Q)));
157
158% Determine the mapping
159W = prmapping(mfilename,'trained',{Q,Kme,L},[],m,length(J));
160W = setname(W,'KPCA');
161return
Note: See TracBrowser for help on using the repository browser.