[10] | 1 | %KSVO_NU Kernel Support Vector Optimizer: NU algorithm
|
---|
| 2 | %
|
---|
| 3 | % [V,J,C,REG] = KSVO_NU(K,Y,NU,PD)
|
---|
| 4 | %
|
---|
| 5 | % INPUT
|
---|
| 6 | % K Kernel matrix
|
---|
| 7 | % Y Label list consisting of -1/+1
|
---|
| 8 | % NU Regularization parameter (0 < NU < 1):
|
---|
| 9 | % expected fraction of support vectors (optional; default: 0.25)
|
---|
| 10 | % R Parameter: -1,0,1,2
|
---|
| 11 | % -1 or 'flip', Changes the kernel by flipping negative eigenvalues to
|
---|
| 12 | % positive
|
---|
| 13 | % 0 or '', Uses the kernel as it is
|
---|
| 14 | % 1 or 'reg', Checks positive definiteness and regularizes the kernel by
|
---|
| 15 | % adding the minimal constant in the form of 10^i to the diagonal
|
---|
| 16 | % 2 or 'lamreg', Checks positive definiteness and regularizes the kernel by
|
---|
| 17 | % adding the minimal constant to the diagonal (equal to the magnitude of
|
---|
| 18 | % the smallest negative eigenvalue)
|
---|
| 19 | % (optional; default: 0, do not change the kernel)
|
---|
| 20 | %
|
---|
| 21 | % OUTPUT
|
---|
| 22 | % V Vector of weights for the support vectors
|
---|
| 23 | % J Index vector pointing to the support vectors
|
---|
| 24 | % T Transformation matrix for the test kernel; to be used in testing
|
---|
| 25 | % REG Regularization parameter added to the diagonal, if used (R=1,2); a vector
|
---|
| 26 | % of eigenvalues of K (R=-1), or -1 if not checked (R=0)
|
---|
| 27 | %
|
---|
| 28 | % DESCRIPTION
|
---|
| 29 | % A low level routine that optimizes the set of support vectors for a two-class
|
---|
| 30 | % classification problem based on the kernel or similarity matrix K.
|
---|
| 31 | % KSVO_NU is called directly from KSVC_NU. The labels Y should indicate the
|
---|
| 32 | % two classes by +1 and -1. Optimization is done by a quadratic programming.
|
---|
| 33 | % If available, the QLD function is used for a positive definite kernel,
|
---|
| 34 | % otherwise an appropriate Matlab routine.
|
---|
| 35 | %
|
---|
| 36 | % NU is bounded from above by NU_MAX = (1 - ABS(Lp-Lm)/(Lp+Lm)), where Lp (Lm)
|
---|
| 37 | % is the number of positive (negative) samples. If NU > NU_MAX is supplied,
|
---|
| 38 | % it will be changed to the NU_MAX.
|
---|
| 39 | %
|
---|
| 40 | % If NU is less than some NU_MIN which depends on the overlap between the
|
---|
| 41 | % classes, the algorithm will typically take long time to converge (if at all).
|
---|
| 42 | % So, it is advisable to set NU larger than the expected overlap.
|
---|
| 43 | % Weights V are rescaled in such a way as if returned by KSVO with the
|
---|
| 44 | % parameter C.
|
---|
| 45 | %
|
---|
| 46 | % SEE ALSO
|
---|
| 47 | % KSCV_NU, KSVC, KSVO, SVO
|
---|
| 48 |
|
---|
| 49 | % Copyright: Elzbieta Pekalska, Robert P.W. Duin, ela.pekalska@googlemail.com
|
---|
| 50 | % Based on SVC.M by D.M.J. Tax, D. de Ridder and R.P.W. Duin and SVC_NU.M by S.Verzakov
|
---|
| 51 | % Faculty EWI, Delft University of Technology and
|
---|
| 52 | % School of Computer Science, University of Manchester
|
---|
| 53 |
|
---|
| 54 |
|
---|
| 55 | function [v,J,T,reg,err] = ksvo_nu(K,y,nu,r)
|
---|
| 56 |
|
---|
| 57 | prtrace(mfilename);
|
---|
| 58 |
|
---|
| 59 | if (nargin < 4),
|
---|
| 60 | r = 0; % Default: the kernel is used as provided
|
---|
| 61 | end
|
---|
| 62 | if (nargin < 3)
|
---|
| 63 | prwarning(3,'Third parameter (nu) not specified, assuming 0.25.');
|
---|
| 64 | nu = 0.25;
|
---|
| 65 | end
|
---|
| 66 |
|
---|
| 67 | if all(r ~= [-1,0,1,2]),
|
---|
| 68 | error('Wrong parameter R.');
|
---|
| 69 | end
|
---|
| 70 |
|
---|
| 71 | nu_max = 1 - abs(nnz(y == 1) - nnz(y == -1))/length(y);
|
---|
| 72 | if nu > nu_max
|
---|
| 73 | prwarning(3,['nu==' num2str(nu) ' is not feasible; set to ' num2str(nu_max)]);
|
---|
| 74 | nu = nu_max;
|
---|
| 75 | end
|
---|
| 76 |
|
---|
| 77 | err = 0;
|
---|
| 78 | v = []; % Weights of the SVM
|
---|
| 79 | J = []; % Index of support vectors
|
---|
| 80 | reg = -1; % Regularization on the diagonal or a list of eigenvalues, if used; -1, if not checked
|
---|
| 81 | vmin = 1e-9; % Accuracy to determine when an object becomes the support object
|
---|
| 82 | iacc = -14; % Accuracy of 10^i to determine whether the kernel is pd or not
|
---|
| 83 | T = []; % Matrix to transform the test kernel if r = -1
|
---|
| 84 |
|
---|
| 85 |
|
---|
| 86 | % Set up the variables for the optimization.
|
---|
| 87 | n = size(K,1);
|
---|
| 88 | Ky = (y*y').*K;
|
---|
| 89 | f = zeros(1,n);
|
---|
| 90 | A = [ones(1,n); y'];
|
---|
| 91 | b = [nu*n; 0];
|
---|
| 92 | lb = zeros(n,1);
|
---|
| 93 | ub = ones(n,1);
|
---|
| 94 | p = rand(n,1);
|
---|
| 95 |
|
---|
| 96 |
|
---|
| 97 | if r ~= 0,
|
---|
| 98 | % Find whether the kernel is positive definite.
|
---|
| 99 | i = -20;
|
---|
| 100 | while (pd_check (Ky + (10.0^i) * eye(n)) == 0)
|
---|
| 101 | i = i + 1;
|
---|
| 102 | end
|
---|
| 103 |
|
---|
| 104 | if r == 1,
|
---|
| 105 | if i > iacc, % if i is smaller, then the regularization is within numerical accuracy
|
---|
| 106 | prwarning(1,'K is not positive definite. The diagonal is regularized by 10.0^(%d)',i);
|
---|
| 107 | end
|
---|
| 108 |
|
---|
| 109 | % Make the kernel positive definite by adding a constant to the diagonal.
|
---|
| 110 | reg = 10.0^i;
|
---|
| 111 | Ky = Ky + (10.0^(i)) * eye(n);
|
---|
| 112 |
|
---|
| 113 | elseif r == 2,
|
---|
| 114 | L = preig(Ky);
|
---|
| 115 | L = diag(real(L));
|
---|
| 116 | I = find(L < 0);
|
---|
| 117 | if ~isempty(I),
|
---|
| 118 | % Make the kernel positive definite by adding a constant to the diagonal.
|
---|
| 119 | reg = -1.001*min(L(I));
|
---|
| 120 | Ky = Ky + reg * eye(n);
|
---|
| 121 | pow = round(log10(reg));
|
---|
| 122 | if pow >= 0
|
---|
| 123 | prwarning(1,['K is not positive definite. The diagonal is regularized by %' num2str(pow) '.2f'],reg);
|
---|
| 124 | elseif pow > iacc,
|
---|
| 125 | prwarning(1,['K is not positive definite. The diagonal is regularized by %0.3g'], reg);
|
---|
| 126 | else
|
---|
| 127 | ;
|
---|
| 128 | end
|
---|
| 129 | else
|
---|
| 130 | reg = 0;
|
---|
| 131 | end
|
---|
| 132 |
|
---|
| 133 | else % r = -1,
|
---|
| 134 | % reverse the negative eigenvalues of the kernel K
|
---|
| 135 | if i > iacc,
|
---|
| 136 | [Q,L] = preig(K);
|
---|
| 137 | Q = real(Q);
|
---|
| 138 | L = diag(real(L));
|
---|
| 139 | if all(L >= 0),
|
---|
| 140 | T = [];
|
---|
| 141 | else
|
---|
| 142 | K = Q*diag(abs(L))*Q';
|
---|
| 143 | Ky = (y*y').*K;
|
---|
| 144 | T = Q* diag(sign(L)) *Q'; % transformation matrix for the test kernel
|
---|
| 145 | end
|
---|
| 146 | else
|
---|
| 147 | % if i < iacc, then the regularization is within numerical accuracy, so apply it
|
---|
| 148 | Ky = Ky + (10.0^(i)) * eye(n);
|
---|
| 149 | end
|
---|
| 150 | end
|
---|
| 151 | end
|
---|
| 152 |
|
---|
| 153 |
|
---|
| 154 |
|
---|
| 155 | % Minimization procedure
|
---|
| 156 | % QP minimizes: 0.5 x'*Ky*x + f'x
|
---|
| 157 | % subject to: Ax <= b
|
---|
| 158 | %
|
---|
| 159 |
|
---|
| 160 |
|
---|
| 161 | done = 0;
|
---|
| 162 | if r ~= 0 & (exist('qld') == 3)
|
---|
| 163 | prwarning(5,'QLD routine is used.')
|
---|
| 164 | v = qld (Ky, f, -A, b, lb, ub, p, 2);
|
---|
| 165 | done = 1;
|
---|
| 166 | end
|
---|
| 167 |
|
---|
| 168 | if r == 0 | ~done,
|
---|
| 169 | if (exist('quadprog') == 2)
|
---|
| 170 | prwarning(5,'Matlab QUADPROG is used.')
|
---|
| 171 | opt = optimset;
|
---|
| 172 | opt.Diagnostics = 'off';
|
---|
| 173 | opt.LargeScale = 'off';
|
---|
| 174 | opt.Display = 'off';
|
---|
| 175 | opt.MaxIter = 500;
|
---|
| 176 | v = quadprog(Ky, f, [], [], A, b, lb, ub,[],opt);
|
---|
| 177 | else
|
---|
| 178 | error(5,'KSVC requires Matlab 6.x')
|
---|
| 179 | end
|
---|
| 180 | end
|
---|
| 181 |
|
---|
| 182 |
|
---|
| 183 | % Compute the square pseudo-norm of the weights in the corresponding
|
---|
| 184 | % Hilbert/Krein space. If it is negative (which may happen for a highly
|
---|
| 185 | % indefinite kernel), then the SVM is not proper.
|
---|
| 186 |
|
---|
| 187 | vnorm = v'*Ky*v;
|
---|
| 188 | if vnorm < 0 | abs(vnorm) < 10^iacc,
|
---|
| 189 | prwarning(1,'SVM: ||w||^2_PE < 0. The SVM is not properly defined. Pseudo-Fisher is computed instead.');
|
---|
| 190 | err = 1;
|
---|
| 191 | v = prpinv([K ones(n,1)])*y;
|
---|
| 192 | J = [1:n]';
|
---|
| 193 | return;
|
---|
| 194 | end
|
---|
| 195 |
|
---|
| 196 |
|
---|
| 197 | % Find all the support vectors.
|
---|
| 198 | J = find(v > vmin);
|
---|
| 199 |
|
---|
| 200 | % First find the SV on the boundary
|
---|
| 201 | I = J(v(J) < 1-vmin);
|
---|
| 202 | Ip = find(y(I) == 1);
|
---|
| 203 | Im = find(y(I) == -1);
|
---|
| 204 | if (isempty(v) | isempty(Ip) | isempty(Im))
|
---|
| 205 | prwarning(1,'Quadratic Optimization failed. Pseudo-Fisher is computed instead.');
|
---|
| 206 | v = prpinv([K ones(n,1)])*y;
|
---|
| 207 | J = [1:n]';
|
---|
| 208 | C = nan;
|
---|
| 209 | err = 1;
|
---|
| 210 | return;
|
---|
| 211 | end
|
---|
| 212 |
|
---|
| 213 |
|
---|
| 214 |
|
---|
| 215 | v = y.*v;
|
---|
| 216 | wxI = K(I,J)*v(J);
|
---|
| 217 | wxIp = mean(wxI(Ip),1);
|
---|
| 218 | wxIm = mean(wxI(Im),1);
|
---|
| 219 |
|
---|
| 220 | rho = 0.5*(wxIp-wxIm);
|
---|
| 221 | b = - (0.5/rho)*(wxIp+wxIm);
|
---|
| 222 | %b = mean(y(I) - wxI/rho);
|
---|
| 223 |
|
---|
| 224 | v = [v(J)/rho; b];
|
---|
| 225 | C = 1/rho;
|
---|
| 226 |
|
---|
| 227 | return;
|
---|