1 | %KSVO_NU Kernel Support Vector Optimizer: NU algorithm
|
---|
2 | %
|
---|
3 | % [V,J,C,REG] = KSVO_NU(K,Y,NU,PD)
|
---|
4 | %
|
---|
5 | % INPUT
|
---|
6 | % K Kernel matrix
|
---|
7 | % Y Label list consisting of -1/+1
|
---|
8 | % NU Regularization parameter (0 < NU < 1):
|
---|
9 | % expected fraction of support vectors (optional; default: 0.25)
|
---|
10 | % R Parameter: -1,0,1,2
|
---|
11 | % -1 or 'flip', Changes the kernel by flipping negative eigenvalues to
|
---|
12 | % positive
|
---|
13 | % 0 or '', Uses the kernel as it is
|
---|
14 | % 1 or 'reg', Checks positive definiteness and regularizes the kernel by
|
---|
15 | % adding the minimal constant in the form of 10^i to the diagonal
|
---|
16 | % 2 or 'lamreg', Checks positive definiteness and regularizes the kernel by
|
---|
17 | % adding the minimal constant to the diagonal (equal to the magnitude of
|
---|
18 | % the smallest negative eigenvalue)
|
---|
19 | % (optional; default: 0, do not change the kernel)
|
---|
20 | %
|
---|
21 | % OUTPUT
|
---|
22 | % V Vector of weights for the support vectors
|
---|
23 | % J Index vector pointing to the support vectors
|
---|
24 | % T Transformation matrix for the test kernel; to be used in testing
|
---|
25 | % REG Regularization parameter added to the diagonal, if used (R=1,2); a vector
|
---|
26 | % of eigenvalues of K (R=-1), or -1 if not checked (R=0)
|
---|
27 | %
|
---|
28 | % DESCRIPTION
|
---|
29 | % A low level routine that optimizes the set of support vectors for a two-class
|
---|
30 | % classification problem based on the kernel or similarity matrix K.
|
---|
31 | % KSVO_NU is called directly from KSVC_NU. The labels Y should indicate the
|
---|
32 | % two classes by +1 and -1. Optimization is done by a quadratic programming.
|
---|
33 | % If available, the QLD function is used for a positive definite kernel,
|
---|
34 | % otherwise an appropriate Matlab routine.
|
---|
35 | %
|
---|
36 | % NU is bounded from above by NU_MAX = (1 - ABS(Lp-Lm)/(Lp+Lm)), where Lp (Lm)
|
---|
37 | % is the number of positive (negative) samples. If NU > NU_MAX is supplied,
|
---|
38 | % it will be changed to the NU_MAX.
|
---|
39 | %
|
---|
40 | % If NU is less than some NU_MIN which depends on the overlap between the
|
---|
41 | % classes, the algorithm will typically take long time to converge (if at all).
|
---|
42 | % So, it is advisable to set NU larger than the expected overlap.
|
---|
43 | % Weights V are rescaled in such a way as if returned by KSVO with the
|
---|
44 | % parameter C.
|
---|
45 | %
|
---|
46 | % SEE ALSO
|
---|
47 | % KSCV_NU, KSVC, KSVO, SVO
|
---|
48 |
|
---|
49 | % Copyright: Elzbieta Pekalska, Robert P.W. Duin, ela.pekalska@googlemail.com
|
---|
50 | % Based on SVC.M by D.M.J. Tax, D. de Ridder and R.P.W. Duin and SVC_NU.M by S.Verzakov
|
---|
51 | % Faculty EWI, Delft University of Technology and
|
---|
52 | % School of Computer Science, University of Manchester
|
---|
53 |
|
---|
54 |
|
---|
55 | function [v,J,T,reg,err] = ksvo_nu(K,y,nu,r)
|
---|
56 |
|
---|
57 | prtrace(mfilename);
|
---|
58 |
|
---|
59 | if (nargin < 4),
|
---|
60 | r = 0; % Default: the kernel is used as provided
|
---|
61 | end
|
---|
62 | if (nargin < 3)
|
---|
63 | prwarning(3,'Third parameter (nu) not specified, assuming 0.25.');
|
---|
64 | nu = 0.25;
|
---|
65 | end
|
---|
66 |
|
---|
67 | if all(r ~= [-1,0,1,2]),
|
---|
68 | error('Wrong parameter R.');
|
---|
69 | end
|
---|
70 |
|
---|
71 | nu_max = 1 - abs(nnz(y == 1) - nnz(y == -1))/length(y);
|
---|
72 | if nu > nu_max
|
---|
73 | prwarning(3,['nu==' num2str(nu) ' is not feasible; set to ' num2str(nu_max)]);
|
---|
74 | nu = nu_max;
|
---|
75 | end
|
---|
76 |
|
---|
77 | err = 0;
|
---|
78 | v = []; % Weights of the SVM
|
---|
79 | J = []; % Index of support vectors
|
---|
80 | reg = -1; % Regularization on the diagonal or a list of eigenvalues, if used; -1, if not checked
|
---|
81 | vmin = 1e-9; % Accuracy to determine when an object becomes the support object
|
---|
82 | iacc = -14; % Accuracy of 10^i to determine whether the kernel is pd or not
|
---|
83 | T = []; % Matrix to transform the test kernel if r = -1
|
---|
84 |
|
---|
85 |
|
---|
86 | % Set up the variables for the optimization.
|
---|
87 | n = size(K,1);
|
---|
88 | Ky = (y*y').*K;
|
---|
89 | f = zeros(1,n);
|
---|
90 | A = [ones(1,n); y'];
|
---|
91 | b = [nu*n; 0];
|
---|
92 | lb = zeros(n,1);
|
---|
93 | ub = ones(n,1);
|
---|
94 | p = rand(n,1);
|
---|
95 |
|
---|
96 |
|
---|
97 | if r ~= 0,
|
---|
98 | % Find whether the kernel is positive definite.
|
---|
99 | i = -20;
|
---|
100 | while (pd_check (Ky + (10.0^i) * eye(n)) == 0)
|
---|
101 | i = i + 1;
|
---|
102 | end
|
---|
103 |
|
---|
104 | if r == 1,
|
---|
105 | if i > iacc, % if i is smaller, then the regularization is within numerical accuracy
|
---|
106 | prwarning(1,'K is not positive definite. The diagonal is regularized by 10.0^(%d)',i);
|
---|
107 | end
|
---|
108 |
|
---|
109 | % Make the kernel positive definite by adding a constant to the diagonal.
|
---|
110 | reg = 10.0^i;
|
---|
111 | Ky = Ky + (10.0^(i)) * eye(n);
|
---|
112 |
|
---|
113 | elseif r == 2,
|
---|
114 | L = preig(Ky);
|
---|
115 | L = diag(real(L));
|
---|
116 | I = find(L < 0);
|
---|
117 | if ~isempty(I),
|
---|
118 | % Make the kernel positive definite by adding a constant to the diagonal.
|
---|
119 | reg = -1.001*min(L(I));
|
---|
120 | Ky = Ky + reg * eye(n);
|
---|
121 | pow = round(log10(reg));
|
---|
122 | if pow >= 0
|
---|
123 | prwarning(1,['K is not positive definite. The diagonal is regularized by %' num2str(pow) '.2f'],reg);
|
---|
124 | elseif pow > iacc,
|
---|
125 | prwarning(1,['K is not positive definite. The diagonal is regularized by %0.3g'], reg);
|
---|
126 | else
|
---|
127 | ;
|
---|
128 | end
|
---|
129 | else
|
---|
130 | reg = 0;
|
---|
131 | end
|
---|
132 |
|
---|
133 | else % r = -1,
|
---|
134 | % reverse the negative eigenvalues of the kernel K
|
---|
135 | if i > iacc,
|
---|
136 | [Q,L] = preig(K);
|
---|
137 | Q = real(Q);
|
---|
138 | L = diag(real(L));
|
---|
139 | if all(L >= 0),
|
---|
140 | T = [];
|
---|
141 | else
|
---|
142 | K = Q*diag(abs(L))*Q';
|
---|
143 | Ky = (y*y').*K;
|
---|
144 | T = Q* diag(sign(L)) *Q'; % transformation matrix for the test kernel
|
---|
145 | end
|
---|
146 | else
|
---|
147 | % if i < iacc, then the regularization is within numerical accuracy, so apply it
|
---|
148 | Ky = Ky + (10.0^(i)) * eye(n);
|
---|
149 | end
|
---|
150 | end
|
---|
151 | end
|
---|
152 |
|
---|
153 |
|
---|
154 |
|
---|
155 | % Minimization procedure
|
---|
156 | % QP minimizes: 0.5 x'*Ky*x + f'x
|
---|
157 | % subject to: Ax <= b
|
---|
158 | %
|
---|
159 |
|
---|
160 |
|
---|
161 | done = 0;
|
---|
162 | if r ~= 0 & (exist('qld') == 3)
|
---|
163 | prwarning(5,'QLD routine is used.')
|
---|
164 | v = qld (Ky, f, -A, b, lb, ub, p, 2);
|
---|
165 | done = 1;
|
---|
166 | end
|
---|
167 |
|
---|
168 | if r == 0 | ~done,
|
---|
169 | if (exist('quadprog') == 2)
|
---|
170 | prwarning(5,'Matlab QUADPROG is used.')
|
---|
171 | opt = optimset;
|
---|
172 | opt.Diagnostics = 'off';
|
---|
173 | opt.LargeScale = 'off';
|
---|
174 | opt.Display = 'off';
|
---|
175 | opt.MaxIter = 500;
|
---|
176 | v = quadprog(Ky, f, [], [], A, b, lb, ub,[],opt);
|
---|
177 | else
|
---|
178 | error(5,'KSVC requires Matlab 6.x')
|
---|
179 | end
|
---|
180 | end
|
---|
181 |
|
---|
182 |
|
---|
183 | % Compute the square pseudo-norm of the weights in the corresponding
|
---|
184 | % Hilbert/Krein space. If it is negative (which may happen for a highly
|
---|
185 | % indefinite kernel), then the SVM is not proper.
|
---|
186 |
|
---|
187 | vnorm = v'*Ky*v;
|
---|
188 | if vnorm < 0 | abs(vnorm) < 10^iacc,
|
---|
189 | prwarning(1,'SVM: ||w||^2_PE < 0. The SVM is not properly defined. Pseudo-Fisher is computed instead.');
|
---|
190 | err = 1;
|
---|
191 | v = prpinv([K ones(n,1)])*y;
|
---|
192 | J = [1:n]';
|
---|
193 | return;
|
---|
194 | end
|
---|
195 |
|
---|
196 |
|
---|
197 | % Find all the support vectors.
|
---|
198 | J = find(v > vmin);
|
---|
199 |
|
---|
200 | % First find the SV on the boundary
|
---|
201 | I = J(v(J) < 1-vmin);
|
---|
202 | Ip = find(y(I) == 1);
|
---|
203 | Im = find(y(I) == -1);
|
---|
204 | if (isempty(v) | isempty(Ip) | isempty(Im))
|
---|
205 | prwarning(1,'Quadratic Optimization failed. Pseudo-Fisher is computed instead.');
|
---|
206 | v = prpinv([K ones(n,1)])*y;
|
---|
207 | J = [1:n]';
|
---|
208 | C = nan;
|
---|
209 | err = 1;
|
---|
210 | return;
|
---|
211 | end
|
---|
212 |
|
---|
213 |
|
---|
214 |
|
---|
215 | v = y.*v;
|
---|
216 | wxI = K(I,J)*v(J);
|
---|
217 | wxIp = mean(wxI(Ip),1);
|
---|
218 | wxIm = mean(wxI(Im),1);
|
---|
219 |
|
---|
220 | rho = 0.5*(wxIp-wxIm);
|
---|
221 | b = - (0.5/rho)*(wxIp+wxIm);
|
---|
222 | %b = mean(y(I) - wxI/rho);
|
---|
223 |
|
---|
224 | v = [v(J)/rho; b];
|
---|
225 | C = 1/rho;
|
---|
226 |
|
---|
227 | return;
|
---|