1 | %KPCA Kernel Principal Component Analysis
|
---|
2 | %
|
---|
3 | % [W,L] = KPCA(K,ALF)
|
---|
4 | % OR
|
---|
5 | % [W,L] = KPCA(W,ALF)
|
---|
6 | %
|
---|
7 | % INPUT
|
---|
8 | % K NxN kernel or symmetric similarity matrix (dataset)
|
---|
9 | % W Trained KPCA projection a kernel-induced space
|
---|
10 | % ALF Parameter determining the dimensionality and the mapping (optional, default: Inf)
|
---|
11 | % (0,1) - fraction of the total preserved variance; e.g. ALF = 0.9
|
---|
12 | % M - Number of dimensions in the subspace; e.g. ALF = 5
|
---|
13 | % Inf - No dimensionality reduction, keeping all dimensions (it's VERY noisy)
|
---|
14 | %
|
---|
15 | % OUTPUT
|
---|
16 | % W Kernel PCA mapping
|
---|
17 | % L Sorted list of eigenvalues
|
---|
18 | %
|
---|
19 | % DEFAULTS
|
---|
20 | % ALF = Inf
|
---|
21 | %
|
---|
22 | % DESCRIPTION
|
---|
23 | % Performs principal component analysis in a kernel-induced space by finding
|
---|
24 | % M significant directions. M is provided directly or determined by ALF such
|
---|
25 | % that the fraction ALF of the total variance is preserved. L is a sorted list
|
---|
26 | % of eigenvalues describing the variances in the kernel subspace.
|
---|
27 | % The projection X is found as X = K*W.
|
---|
28 | %
|
---|
29 | % A trained mapping can be reduced further by:
|
---|
30 | % W = KPCA(W,ALF)
|
---|
31 | %
|
---|
32 | % SEE ALSO
|
---|
33 | % MAPPINGS, DATASETS, PCAM, PSEM
|
---|
34 | %
|
---|
35 | % REFERENCE
|
---|
36 | % B. Scholkopf, A. Smola, and K.-R. Muller. Kernel Principal Component Analysis.
|
---|
37 | % in Advances in Kernel Methods - SV Learning, pages 327-352. MIT Press, Cambridge, MA, 1999.
|
---|
38 |
|
---|
39 | % Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
|
---|
40 | % Faculty EWI, Delft University of Technology and
|
---|
41 | % School of Computer Science, University of Manchester, UK
|
---|
42 |
|
---|
43 |
|
---|
44 | function [W,L] = kpca (K,alf)
|
---|
45 |
|
---|
46 | if nargin < 2 | isempty(alf),
|
---|
47 | alf = inf;
|
---|
48 | end
|
---|
49 | if nargin < 1 | isempty(K),
|
---|
50 | W = prmapping(mfilename,alf);
|
---|
51 | W = setname(W,'Kernel PCA');
|
---|
52 | return
|
---|
53 | end
|
---|
54 |
|
---|
55 |
|
---|
56 | if (isdataset(K) | isa(K,'double')),
|
---|
57 | if ismapping(alf),
|
---|
58 | % APPLY MAPPING: project new data using the trained mapping.
|
---|
59 | [m,n] = size(K);
|
---|
60 | pars = getdata(alf);
|
---|
61 |
|
---|
62 | % Parameters
|
---|
63 | Q = pars{1}; % Eigenvectors
|
---|
64 | Kme = pars{2}; % Vector of averaged kernel values
|
---|
65 |
|
---|
66 | % Centering the kernel
|
---|
67 | H = -repmat(1/n,n,n);
|
---|
68 | H(1:n+1:end) = H(1:n+1:end) + 1; % H = eye(n) - ones(n,n)/n
|
---|
69 | K = (K - Kme(ones(m,1),:)) * H;
|
---|
70 | W = K*Q;
|
---|
71 | if isdataset(W),
|
---|
72 | W.name = ['Projected ' updname(W.name)];
|
---|
73 | end
|
---|
74 | return;
|
---|
75 | end
|
---|
76 | end
|
---|
77 |
|
---|
78 |
|
---|
79 | if ~isnumeric(alf) & ~isinf(alf)
|
---|
80 | error('Wrong ALF.')
|
---|
81 | end
|
---|
82 | if alf <= 0
|
---|
83 | error('ALF should be positive.')
|
---|
84 | end
|
---|
85 |
|
---|
86 |
|
---|
87 | % REDUCE ALREADY TRAINED MAPPING
|
---|
88 | if ismapping(K),
|
---|
89 | pars = getdata(K);
|
---|
90 |
|
---|
91 | Q = pars{1};
|
---|
92 | L = pars{3};
|
---|
93 | m = size(Q,1);
|
---|
94 |
|
---|
95 | [ll,P] = sort(-abs(L));
|
---|
96 | L = L(P);
|
---|
97 | Q = Q(:,P);
|
---|
98 | J = seleigs(L,alf); % J is the index of selected eigenvalues
|
---|
99 | Q = Q(:,J); % Eigenvectors
|
---|
100 | L = L(J); % Eigenvalues
|
---|
101 |
|
---|
102 | W = prmapping(mfilename,'trained',{Q,pars{2},L},[],m,length(J));
|
---|
103 | W = setname(W,'KPCA');
|
---|
104 | return
|
---|
105 | end
|
---|
106 |
|
---|
107 |
|
---|
108 |
|
---|
109 | % TRAIN MAPPING
|
---|
110 | K = +K;
|
---|
111 | [n,m] = size(K);
|
---|
112 |
|
---|
113 | % Tolerance value used in comparisons
|
---|
114 | if mean(diag(+K)) < 1,
|
---|
115 | tol = 1e-12;
|
---|
116 | else
|
---|
117 | tol = 1e-10;
|
---|
118 | end
|
---|
119 |
|
---|
120 | if ~issym(K,tol),
|
---|
121 | prwarning(1,'Kernel should be symmetric. It is made so by averaging.')
|
---|
122 | K = 0.5 * (K+K');
|
---|
123 | end
|
---|
124 |
|
---|
125 | eigmin = min(preig(K));
|
---|
126 | if eigmin < 0,
|
---|
127 | error(['K is not psd. Minimum eig(K) = ' ...
|
---|
128 | num2str(eigmin) '. Please regularize the kernel appropriately or use IKPCA.']);
|
---|
129 | end
|
---|
130 |
|
---|
131 | Kme = mean(K,2)';
|
---|
132 |
|
---|
133 | % Project the data such that the mean lies at the origin
|
---|
134 | H = -repmat(1/n,n,n);
|
---|
135 | H(1:n+1:end) = H(1:n+1:end) + 1; % H = eye(n) - ones(n,n)/n
|
---|
136 | K = H * K * H; % K is now the centered kernel
|
---|
137 | K = 0.5 * (K+K'); % Make sure that K is symmetric after centering
|
---|
138 |
|
---|
139 | [Q,L] = preig(K);
|
---|
140 | Q = real(Q);
|
---|
141 | l = diag(real(L));
|
---|
142 | [lm,Z] = sort(-l);
|
---|
143 | Q = Q(:,Z);
|
---|
144 | l = l(Z); % Eigenvalues are sorted by decreasing value
|
---|
145 |
|
---|
146 |
|
---|
147 | J = seleigs(l,alf); % J is the index of selected eigenvalues
|
---|
148 | L = l(J); % Eigenvalues
|
---|
149 | Q = Q(:,J); % Eigenvectors
|
---|
150 |
|
---|
151 | % Normalize Q such that the eigenvectors of the covariance
|
---|
152 | % matrix are orthonormal
|
---|
153 | Q = Q* diag(1./sqrt(diag(Q'*K*Q)));
|
---|
154 |
|
---|
155 | % Determine the mapping
|
---|
156 | W = prmapping(mfilename,'trained',{Q,Kme,L},[],m,length(J));
|
---|
157 | W = setname(W,'KPCA');
|
---|
158 | return
|
---|