source: distools/ksvc.m

Last change on this file was 79, checked in by bduin, 11 years ago
File size: 7.9 KB
Line 
1%KSVC Kernel Support Vector Classifier on a kernel matrix
2%
3%   [W,J,REG] = KSVC(K,C,KC,R)
4%
5% INPUT
6%   K   NxN Kernel dataset
7%   C   Regularization parameter (optional; default: 1)
8%   KC  Kernel centering, 1/0, (optional; default: 1)
9%   R   Parameter: -1,0,1,2
10%       -1 or 'flip',   Changes the kernel by flipping negative eigenvalues to
11%          positive
12%        0 or '',       Uses the kernel as it is
13%        1 or 'reg',    Checks positive definiteness and regularizes the kernel by
14%          adding the minimal constant in the form of 10^i to the diagonal
15%        2 or 'lamreg', Checks positive definiteness and regularizes the kernel by
16%          adding the minimal constant to the diagonal (equal to the magnitude of
17%          the smallest negative eigenvalue)
18%          (optional; default: 0, do not change the kernel)
19%
20% OUTPUT
21%   W   Mapping: Support Vector Classifier
22%   J   Object identifiers of support objects
23%   REG Regularization parameter added to the diagonal, if used (R=1,2); a vector
24%       of eigenvalues of K (R=-1), or -1 if not checked (R=0)
25%
26% DEFAULT
27%   C  = 1
28%   KC = 1
29%   R  = 0 (use the kernel as it is)
30%
31% DESCRIPTION
32% Determines a support vector machine (SVM) for the kernel K. Quadratic programming
33% formulation is used to solve the problem. K can be positive definite or indefinite.
34% J is a list of the indices of the support objects from K. The smaller C, e.g.
35% C < 1, the larger class overlap imposed. Optimal C will be different for regularized
36% and indefinite kernels.
37%
38% If R = 2, then K is regularized by adding the smallest constant possible to the
39% diagonal to make it positive definite. If R = 1, then K is regularized by adding
40% the smallest constant in the form of 10^i to the diagonal. If R = -1, then the
41% eigendecomposition of K is found as K = Q*L*Q' and the negative eigenvalues are
42% flipped, so the SVM is built on K = Q*abs(L)*Q'.
43%
44% IMPORTANT
45% The classifier cannot always be constructed for an indefinite kernel. If the norm
46% of the determined weight vector V in the Krein (pseudo-Euclidean) space induced
47% by K is negative, i.e. if V'*K*V < 0, then the proper SVM cannot be built. This
48% happens when the hyperplane in the Krein space lies 'more' in the negative
49% subspace than in the positive one. This means that the kernel is strongly
50% indefinite. Currently, a warning is given and a pseudo-Fisher classifier is trained,
51% instead. The pseudo-Fisher classifier is also trained in any situation when the
52% quadratic optimization fails to find a solution.
53%
54% REMARK
55% Note that if D is a symmetric distance/dissimilarity matrix, then K = -D is an
56% (indefinite) kernel. If D.^2 is a square Euclidean distance matrix, then K = -D.^2
57% is a proper (conditionally negative definite) kernel. So, a linear SVM on the
58% vectorial data X, based on the kernel K = X*X', is equivalent to the kernel-based
59% SVM on K = -D(X,X).^2.
60%
61% SEE ALSO
62% MAPPINGS, DATASETS, KSVO, SVC,
63%
64% LITERATURE
65% 1. B.Scholkopf, A. Smola, Learning with kernels, MIT press, 2001,
66%    http://www.learning-with-kernels.org/.
67% 2. B. Haasdonk, Feature Space Interpretation of SVMs with Indefinite Kernels.
68%    IEEE Trans. on PAMI, 27(4):482-492, 2005.
69% 3. E. Pekalska, P. Paclik, R.P.W. Duin,  A Generalized Kernel Approach to
70%    Dissimilarity-based Classification, JMLR, vol.2, no.2, 175-211, 2002.
71
72% Copyright: Elzbieta Pekalska, Robert P.W. Duin, ela.pekalska@googlemail.com
73% Based on SVC.M by D.M.J. Tax, D. de Ridder and R.P.W. Duin
74% Faculty EWI, Delft University of Technology and
75% School of Computer Science, University of Manchester
76
77
78function [W,J,reg,err,K] = ksvc(K,C,kc,r,no)
79prtrace(mfilename);
80
81if nargin < 5,
82  % Multi-class problems are solved by one-vs-all by calling KSVC.
83  % no is the class number in such a situation; 0 is the standard 2-class case
84  no = 0;
85end
86if nargin < 4,
87  r = 0;
88end
89if nargin < 3,
90  kc = 1;
91end
92if nargin < 2 | isempty(C),
93  C = 1;
94  prwarning(3,'Regularization parameter C set to 1.\n');
95end
96if nargin < 1 | isempty(K),
97  W = prmapping(mfilename,{C,kc,r});
98  W = setname(W,'Kernel Support Vector Classifier (KSVC)');
99  return;
100end
101
102if all(kc ~= [0 1]),
103  error('Wrong KC parameter.');
104end
105
106switch lower(r)
107  case {'flip', -1},  r = -1;
108  case {'', 0},       r = 0;
109  case {'reg', 1},    r = 1;
110  case {'lamreg', 2}, r = 2;
111  otherwise
112    error('Wrong parameter R.');
113end
114
115
116% TRAIN THE CLASSIFIER
117if ~isa(C,'prmapping')
118  islabtype(K,'crisp');
119  isvaldset(K,1,2);      % Expect at least 1 object per class and 2 classes
120  [m,k,c] = getsize(K);
121  if m ~=k,
122    error('The kernel is not a square matrix.');
123  end
124  nlab  = getnlab(K);
125
126  % The SVM is basically a two-class classifier.
127  % Multi-class problems are trained one-versus-all.
128
129  if c == 2   % Two-class classifier
130    % Compute the parameters for the optimization:
131    y = 3 - 2*nlab;
132
133    prec = 1e-12;
134    if ~issym(K,prec),
135      prwarning(1, 'The kernel is not symmetric. The values are averaged out.')
136    end
137
138    if kc,   % Center the data
139      me = mean(+K,2)';                % Store the mean value
140
141      B = -repmat(1/m,m,m);
142      B(1:m+1:end) = B(1:m+1:end)+1;   % B = eye(m) - ones(m,m)/m
143      K = B*K*B;                       % K is now centered
144    else
145      me = [];
146    end
147    K = (K+K')/2;                      % K is averaged as QP solver is sensitive to small asymmetry
148
149
150    % Check feasibility of the kernel:
151    if r == 0 & dmeans(K,y) < 0,
152      if no > 0,
153        ss = [num2str(no) '-vs-all '];
154      else,
155        ss = '';
156      end
157      prwarning(1,['The kernel is badly indefinite. The ' ss ' SVM cannot be defined. Pseudo-Fisher is computed instead.']);
158      err = 1;
159      v   = prpinv([K ones(m,1)])*y;
160      J   = [1:m]';
161      T   = [];
162      reg = [];
163    else
164      % Perform the optimization
165      [v,J,T,reg,err] = ksvo(+K,y,C,r);
166    end
167
168    % Store the results
169    W = prmapping(mfilename,'trained',{me,J,T,v,reg},getlablist(K),k,2);
170    % W = cnormc(W,K);
171    W = setname(W,'Kernel Support Vector Classifier (KSVC)');
172    W = setcost(W,K);
173    % J = K.ident(J);
174
175  else   % MULTI-CLASS CLASSIFIER: C > 2
176
177    % MCLASSC cannot be used here as we have a kernel K
178    W = [];
179    J = zeros(m,1);
180    lablist = getlablist(K);
181
182    for i=1:c
183      lab = 2 - (nlab == i);   % lab = 1/2
184      KK  = setlabels(K,lab);
185      KK  = remclass(KK,0);
186      KK  = setfeatlab(K,lab);
187      if ~isempty(K.prior)
188        KK = setprior(KK,[K.prior(i),1-K.prior(i)]');
189      end
190      [V,j,reg(i),err(i)]= ksvc(KK,C,kc,r,i);
191      W = [W,setlabels(V(:,1),lablist(i,:))];
192      J(j) = 1;
193    end
194    J = find(J);
195  end
196
197else
198
199% EXECUTE THE CLASSIFIER
200  % C is a SVM classifier now
201  n = size(K,1);
202  w = +C;
203
204  % Get the parameters from the classifier
205  me = w{1};
206  J  = w{2};
207
208  % The first parameter w{1} stores the mean of the kernel.
209  % When it is non-empty, kernel centering should also be applied
210  % to the test kernel.
211
212  if ~isempty(me),
213    m = length(me);
214    % Center the kernel
215    B = -repmat(1/m,m,m);
216    B(1:m+1:end) = B(1:m+1:end) + 1;    % B = eye(m) - ones(m,m)/m
217    K = (K - me(ones(n,1),:)) * B;
218  end
219
220  if ~isempty(w{3}),   % this is the transformation that reverses the negative eigenvalues
221    K = K*w{3};
222  end
223
224  % The classifier is stored in w{4}
225  % Data is mapped by the kernel, now we just have a linear classifier  w*x+b:
226  d = [K(:,J) ones(n,1)] * w{4};
227  d = sigm([d -d]);
228  W = setdat(K,d,C);
229end
230return
231
232
233
234
235function dm = dmeans(K,y)
236% Coputes the square pseudo Euclidean distance between the
237% means of the two classes. This can be done by using the kernel only.
238% Negative value means that the contribution of the negative
239% subspace in the pseudo-Euclidean sense is too large.
240
241yy = y;
242Z  = find (y == 1);
243T  = find (y == -1);
244yy(Z) = 1/length(Z);
245yy(T) = -1/length(T);
246dm    = yy'*(+K)*yy;
247return;
Note: See TracBrowser for help on using the repository browser.