source: distools/ksvc_nu.m

Last change on this file was 79, checked in by bduin, 11 years ago
File size: 8.9 KB
Line 
1%KSVC_NU Kernel Support Vector Classifier: NU algorithm
2%
3%   [W,J,C,REG] = KSVC_NU(K,NU,KC,R)
4%
5% INPUT
6%   K   NxN Kernel dataset
7%   NU  Regularization parameter (0 < NU < 1);
8%       expected fraction of support vectors (optional; default: 0.25)
9%   KC  Kernel centering, 1/0, (optional; default: 1)
10%   R   Parameter: -1,0,1,2.
11%       -1 or 'flip',   Changes the kernel by flipping negative eigenvalues to
12%          positive
13%        0 or '',       Uses the kernel as it is
14%        1 or 'reg',    Checks positive definiteness and regularizes the kernel by
15%          adding the minimal constant in the form of 10^i to the diagonal
16%        2 or 'lamreg', Checks positive definiteness and regularizes the kernel by
17%          adding the minimal constant to the diagonal (equal to the magnitude of
18%          the smallest negative eigenvalue)
19%          (optional; default: 0, do not change the kernel)
20%
21% OUTPUT
22%   W   Mapping: Support Vector Classifier
23%   J   Object identifiers of support objects
24%   C   Equivalent C regularization parameter of the KSVC algorithm
25%   REG Regularization parameter added to the diagonal, if used (R=1,2); a vector
26%       of eigenvalues of K (R=-1), or -1 if not checked (R=0)
27%
28% DEFAULT
29%   NU  = 1
30%   KC  = 1
31%   R   = 0 (use the kernel as it is)
32%
33% DESCRIPTION
34% Determines a support vector machine (SVM) for the kernel K. Quadratic programming
35% formulation is used to solve the problem. K can be positive definite or indefinite.
36% indefinite. J is a list of the indices of the support objects from K. NU belongs
37% to the interval (0,1). The larger NU, the larger class overlap. Default NU = 0.25.
38%
39% For positive semidefinite kernels, NU is bounded from above by NU_MAX, where
40% NU_MAX = (1 - ABS(Lp-Lm)/(Lp+Lm)), where Lp (Lm) is the number of positive
41% (negative) samples. If NU > NU_MAX is supplied to the routine it will be changed to
42% the NU_MAX.
43% If NU is less than some NU_MIN which depends on the overlap between the classes,
44% the algorithm will typically take long time to converge (if at all). So, it is
45% advisable to set NU larger than the expected overlap.
46% The output is rescaled in such a way as if returned by KSVC with the parameter C.
47%
48% If R = 2, then K is regularized by adding the smallest constant possible to the
49% diagonal to make it positive definite. If R = 1, then K is regularized by adding
50% the smallest constant in the form of 10^i to the diagonal. If R = -1, then the
51% eigendecomposition of K is found as K = Q*L*Q' and the negative eigenvalues are
52% flipped, so the SVM is built on K = Q*abs(L)*Q'.
53%
54% IMPORTANT
55% The classifier cannot always be constructed for an indefinite kernel. If the norm
56% of the determined weight vector V in the Krein (pseudo-Euclidean) space induced
57% by K is negative, i.e. if V'*K*V < 0, then the proper SVM cannot be built. This
58% happens when the hyperplane in the Krein space lies 'more' in the negative subspace
59% than in the positive one. This means that the kernel is strongly indefinite.
60% Currently, a warning is given and a pseudo-Fisher classifier is trained, instead.
61% The pseudo-Fisher classifier is also trained in any situation when the quadratic
62% optimization fails to find a solution.
63%
64% REMARKS
65% Note that if D is a symmetric distance/dissimilarity matrix, then K = -D is an
66% (indefinite) kernel. If D.^2 is a square Euclidean distance matrix, then K = -D.^2
67% is a proper (conditionally negative definite) kernel. So, a linear SVM on some
68% data X, based on the kernel K = X*X' is the same classifier as the kernel-based
69% SVM on K = -D(X,X).^2.
70
71% DESCRIPTION
72% Determines a support vector machine (SVM) for the kernel K. Quadratic programming
73% formulation is used
74% The smaller C, e.g.
75% C < 1, the larger class overlap imposed. Optimal C will be different for a regularized
76% kernel and for an indefinite one.
77%
78% SEE ALSO
79% KSVO_NU, KSVO, KSVC, MAPPINGS
80%
81% LITERATURE
82% 1. B.Scholkopf, A. Smola, Learning with kernels, MIT press, 2001,
83%    http://www.learning-with-kernels.org/.
84% 2. B. Haasdonk, Feature Space Interpretation of SVMs with Indefinite Kernels.
85%    IEEE Trans. on PAMI, 27(4):482-492, 2005.
86% 3. E. Pekalska, P. Paclik, R.P.W. Duin,  A Generalized Kernel Approach to
87%    Dissimilarity-based Classification, JMLR, vol.2, no.2, 175-211, 2002.
88
89
90% Copyright: Elzbieta Pekalska, Robert P.W. Duin, ela.pekalska@googlemail.com
91% Based on SVC.M by D.M.J. Tax, D. de Ridder and R.P.W. Duin and SVC_NU.M by S.Verzakov
92% Faculty EWI, Delft University of Technology and
93% School of Computer Science, University of Manchester
94
95
96function [W,J,reg,err,K] = ksvc(K,nu,kc,r,no)
97prtrace(mfilename);
98
99if nargin < 5,
100  % Multi-class problems are solved by one-vs-all by calling KSVC.
101  % no is the class number in such a situation; 0 is the standard 2-class case
102  no = 0;
103end
104if nargin < 4,
105  r = 0;
106end
107if nargin < 3,
108  kc = 1;
109end
110if nargin < 2 | isempty(nu),
111  nu = 0.25;
112  prwarning(3,'Regularization parameter NU set to 0.25\n');
113end
114if nargin < 1 | isempty(K),
115  W = prmapping(mfilename,{nu,kc,r});
116  W = setname(W,'Kernel Support Vector Classifier (KSVC-NU)');
117  return;
118end
119
120
121if all(kc ~= [0 1]),
122  error('Wrong KC parameter.');
123end
124
125switch lower(r)
126  case {'flip', -1},  r = -1;
127  case {'', 0},       r = 0;
128  case {'reg', 1},    r = 1;
129  case {'lamreg', 2}, r = 2;
130  otherwise
131    error('Wrong parameter R.');
132end
133
134
135% TRAIN THE CLASSIFIER
136if ~isa(nu,'prmapping')
137  islabtype(K,'crisp');
138  isvaldset(K,1,2);      % Expect at least 1 object per class and 2 classes
139  [m,k,c] = getsize(K);
140  if m ~=k,
141    error('The kernel is not a square matrix.');
142  end
143  nlab = getnlab(K);
144
145  % The SVC is basically a two-class classifier.
146  % Multi-class problems are trained one-versus-all.
147
148  if c == 2   % Two-class classifier
149  % Compute the parameters for the optimization:
150    y = 3 - 2*nlab;    % y = +1/-1
151
152    prec = 1e-12;
153    if ~issym(K,prec),
154      prwarning(1, 'The kernel is not symmetric. The values are averaged out.')
155      K = (K+K')/2;
156    end
157
158    if kc,   % Center the data
159      me = mean(+K,2)';                % Store the mean value
160
161      B = -repmat(1/m,m,m);
162      B(1:m+1:end) = B(1:m+1:end)+1;   % B = eye(m) - ones(m,m)/m
163      K = B*K*B;                       % K is now centered
164    else
165      me = [];
166    end
167    K = (K+K')/2;                      % K is averaged out as QP solver is sensitive to small asymmetry
168
169
170    % Check feasibility of the kernel:
171    if r == 0 & dmeans(K,y) < 0,
172      if no > 0,
173        ss = [num2str(no) '-vs-all '];
174      else,
175        ss = '';
176      end
177      prwarning(1,['The kernel is badly indefinite. The ' ss ' SVM cannot be defined. Pseudo-Fisher is computed instead.']);
178      err = 1;
179      v   = prpinv([K ones(m,1)])*y;
180      J   = [1:m]';
181      T   = [];
182      reg = [];
183    else
184      % Perform the optimization
185      [v,J,T,reg,err] = ksvo_nu(+K,y,nu,r);
186    end
187
188    % Store the results
189    W = prmapping(mfilename,'trained',{me,J,T,v},getlablist(K),k,2);
190    %W = cnormc(W,a);
191    W = setname(W,'Kernel Support Vector Classifier (KSVC-NU)');
192    W = setcost(W,K);
193    % J = K.ident(J);
194
195  else   % MULTI-CLASS CLASSIFIER: C > 2
196
197    % MCLASSC cannot be used here as we have a kernel K
198    W = [];
199    J = zeros(m,1);
200    lablist = getlablist(K);
201
202    for i=1:c
203      lab = 2 - (nlab == i);   % lab = 1/2
204      KK  = setlabels(K,lab);
205      KK  = remclass(KK,0);
206      KK  = setfeatlab(K,lab);
207      if ~isempty(K.prior)
208        KK = setprior(KK,[K.prior(i),1-K.prior(i)]');
209      end
210      [V,j,reg(i),err(i)]= ksvc_nu(KK,nu,kc,r,i);
211      W = [W,setlabels(V(:,1),lablist(i,:))];
212      J(j) = 1;
213    end
214    J = find(J);
215  end
216
217else
218
219% EXECUTE THE CLASSIFIER
220  % nu is an SVM classifier now
221  n = size(K,1);
222  w = +nu;
223
224  % Get the parameters from the classifier
225  me = w{1};
226  J  = w{2};
227
228  % The first   % The first parameter w{1} stores the mean of the kernel.
229  % When it is non-empty, data centering should also be applied
230  % to the test kernel.
231
232  if ~isempty(me),
233    % Center the kernel
234    m = length(me);
235    B = -repmat(1/m,m,m);
236    B(1:m+1:end) = B(1:m+1:end) + 1;    % B = eye(m) - ones(m,m)/m
237    K = (K - me(ones(n,1),:)) * B;
238  end
239
240  if ~isempty(w{3}),   % this is the transformation that reverses the negative eigenvalues
241    K = K*w{3};
242  end
243
244  % The classifier is stored in w{4}
245  % Data is mapped by the kernel, now we just have a linear classifier  w*x+b:
246  d = [+K(:,J) ones(n,1)] * w{4};
247  d = sigm([d -d]);
248  W = setdat(K,d,nu);
249end
250
251return
252
253
254
255
256function dm = dmeans(K,y)
257% Computes the square pseudo-Euclidean distance between the
258% means of the two classes. This can be done by using the kernel only.
259% Negative value means that the contribution of the negative
260% subspace in the pseudo-Euclidean sense is too large.
261
262yy = y;
263Z  = find (y == 1);
264T  = find (y == -1);
265yy(Z) = 1/length(Z);
266yy(T) = -1/length(T);
267dm    = yy'*(+K)*yy;
268return;
Note: See TracBrowser for help on using the repository browser.