source: prextra/fsellpc.m @ 108

Last change on this file since 108 was 5, checked in by bduin, 14 years ago
File size: 8.4 KB
Line 
1%FSELLPC LP (linear programming) classifier
2%
3%   [W1,W2,W3] = fsellpc(A,BIAS,TYPE,PARAM)
4%
5% INPUT
6%   A     Dataset
7%   BIAS  YES or NO (optional; default: 1 (YES))
8%   TYPE  Type of a classifier
9%         'SIMPLE'   - the most simple formulation; all features used; PARAM = [];
10%         'STANDARD' - minimization of the training misclassification errors;
11%                      all features used; PARAM = [];
12%         'C-SPARSE' - feature selection (sparse solution); a formulation similar
13%                      to the LP_1 SVM; PARAM is a tradeoff parameter.
14%                      (optional; DEFAULT: 1).
15%         'MU-SPARSE' - feature selection (sparse solution); a formulation similar
16%                       to the LP_1 SVM, based on the paper of Graepel, Herbrich, Smola etc
17%                       'Classification on proximity data with LP-machines'.
18%                       PARAM is a tradeoff parameter, usually PARAM = 0.05 or 0.1.
19%                       It is an upper bound on the misclassfied training objects.
20%                       So, for well separable problems, PARAM = 0.01 or PARAM = 0.02.
21%                       (optional; DEFAULT: the LOO 1-NN error * 1.3).
22%  PARAM Parameter connected to the TYPE, as above
23%
24% OUTPUT
25%   W1  LP Classifier in the complete space
26%   W2  LP Classifier in a reduced space
27%   W3  Feature selection mapping; the indices of chosen features are in +W3.
28%
29% DEFAULTS
30%   BIAS  = 1
31%   TYPE  = 'STANDARD'
32%   PARAM = []
33%
34% DESCRIPTION
35% Classification problem on a N x M data A with LP-machines. Assume a two-class
36% problem. Let DLPC select J features. Then:
37% W1 is an M x 2 classifier in the original space, W2 is an J x 2 classifier
38% in the feature space defined by the J chosen features and W3 is an M x R feature
39% selection such that W1 = W3 * W2. Note that the indices of the selected features
40% can be retrieved by +W3.
41%
42% A linear classifier is built on A:
43%
44%     f(A(x,F)) = A(x,F) * w + w0,
45%
46% where F are the features and w are the weights. If BIAS is 1, then w0 is
47% also sought, otherwise it equals 0. This means that the hyperplane is
48% forced to go through the origin.
49%
50% For C-class problems, C classifiers are trained, one against all others.
51% In such a case, only W1 is returned and W3 in now NOT a feature selection,
52% but directly the indices of the selected features.
53%
54% DEFAULT:
55%   BIAS  = 1
56%   TYPE  = 'STANDARD'
57%   PARAM = 1
58%
59
60% Elzbieta Pekalska, Robert P.W. Duin, e.pekalska@tudelft.nl
61% Faculty of Electrical Engineering, Mathematics and Computer Science,
62% Delft University of Technology, The Netherlands.
63
64
65
66
67function [W1,W2,W3] = fsellpc(a,is_w0,type,par,usematlab,prec)
68
69if nargin < 6, prec = 1e-7; end
70if nargin < 5, usematlab = 0; end
71if nargin < 3 | isempty(type), type = 'standard'; end
72if nargin < 4 | isempty(par),
73  switch upper(type)
74    case {'MU-SPARSE'}
75      par = max(0.01,1.3*testk(a,1));     % upperbound error: 1.3 * loo 1-nn error
76    case {'C-SPARSE'}
77      par = 1;
78    case {'SIMPLE','STANDARD'},
79      par = [];
80    otherwise
81      disp(type)
82      error('Wrong type.')
83  end
84end
85if nargin < 2 | isempty(is_w0), is_w0 = 1; end
86if nargin < 1 | isempty(a)
87  W1 = mapping(mfilename,{is_w0,type,par,usematlab});
88  W1 = setname(W1,'FSELLPC');
89  W2 = [];
90  W3 = [];
91  return
92end
93
94
95
96if ~isdataset(a),
97  error('The first parameter should be a dataset.')
98end
99if ~isnumeric(is_w0) | (is_w0 ~= 0 & is_w0 ~= 1),
100  error('The second parameter should be 0 or 1.');
101end
102
103
104lab      = getnlab(a);
105lablist  = getlablist(a);
106[m,k,C]  = getsize(a);
107
108
109z = (is_w0 > 0);  % is the bias used or not?
110
111% This is the status of the optimization procedure.
112% For GLPK, this is the exit code; see GLPKMEX for details.
113% For Matlab LINPROG, if negative then no solution is found.
114
115status = 1;
116
117
118% If more than 2 classes, train the classifier one-against-all.
119if C > 2,
120
121% W1 = mclassc(a,mapping(mfilename,{is_w0,type,par,usematlab}));
122
123  W1 = [];
124  W2 = [];
125  W3 = [];
126  N  = [];
127  for i=1:C
128    mlab = 2 - (lab == i);
129    aa   = dataset(+a,mlab);
130    [v1,v2,v3]= fsellpc(aa,is_w0,type,par,usematlab);
131    j = +v3;
132    if isempty(v1),
133      W1 = [];
134      W2 = [];
135      W3 = [];
136      prwarning(1,'No solution found.');
137      return;
138    end
139    W1 = [W1,setlabels(v1(:,1),lablist(i,:))];
140    W2 = [W2;setlabels(v2(:,1),lablist(i,:))];
141    W3(j) = ones(length(j),1);
142    N = [N j];
143  end
144  [N1,N2,N3] = unique(N);
145  W3 = featsel(k,N1);
146  W2 = featsel(length(N1),N3)*W2;
147  return
148
149else
150
151  Y1 = 3 - 2 * lab;   % labels     +/-1
152  Y  = ones(k,1);
153
154  alpha(1:k+1,1) = 0;
155
156  aa = +a;
157  switch upper(type)
158    case {'SIMPLE'}
159      f = zeros(k+z,1);
160      b = -ones(m,1);
161      if is_w0,
162        A = -[(Y1*Y').* aa  Y1];
163      else
164        A = -[(Y1*Y').* aa];
165      end
166      [al,fval,status] = linprog(f,A,b);
167      alpha(1:k+z) = al;
168
169
170
171    case {'STANDARD'}
172      L  = ones(k,1)/k;
173
174      f  = [zeros(k+z,1); L];
175      lb = [-Inf .*ones(k+z,1); zeros(k,1)];
176      ub = Inf .* ones(2*k+z,1);
177      b  = -ones(m,1);
178      if is_w0,
179        A  = -[(Y1*Y').* aa  Y1  eye(m,k)];
180      else
181        A  = -[(Y1*Y').* aa  eye(m,k)];
182      end
183      [al,fval,ststus] = linprog(f,A,b,[],[],lb,ub);
184      alpha(1:k+z) = al(1:k+z);
185
186
187
188    case {'C-SPARSE'}
189      L  = ones(k,1);
190      ub = Inf .* ones(3*k+z,1);
191      lb = [zeros(2*k,1); -Inf.*ones(z,1); zeros(k,1)];
192      b  = -ones(m,1);
193      ay = (Y1*Y').* aa;
194      if is_w0,
195        f  = [ones(2*k,1); 0; par*L];
196        A  = -[ay  -ay  Y1 eye(m,k)];
197      else
198        f  = [ones(2*k,1); par*L];
199        A  = -[ay  -ay  eye(m,k)];
200      end
201      if (exist('glpkmex')>0) & (usematlab==0)
202        smin     = 1;  % solve minimum
203        ctype    = char(ones(m,1)*abs('U'));      % sign of inequalities
204        vartype  = char(ones(3*k+z,1)*abs('C'));  % continous variables
205%       lpsolver = 1;                             % Revised Simlex Method
206        lpsolver = 2;                             % Interior Point Method
207        params.msglev = 0; % no outputs
208        [sss,hostname] = unix('hostname');
209        hostname = hostname(1:end-1);
210        if strcmp(hostname,'saturnus') | strcmp(hostname,'polaris') | strcmp(hostname,'neptunus')
211          [al,fval,status] = glpkmex_redhat(smin,f,A,b,ctype,lb,ub,vartype,params,lpsolver);
212        else
213          [al,fval,status] = glpkmex(smin,f,A,b,ctype,lb,ub,vartype,params,lpsolver);
214        end
215      else
216        [al,fval,status] = linprog (f,A,b,[],[],lb,ub);
217      end
218      alpha(1:k) = al(1:k) - al(k+1:2*k);
219      if is_w0,
220        alpha(k+1) = al(2*k+1);
221      end
222
223
224    case {'MU-SPARSE'}
225      L   = ones(k,1)/k;
226      f   = [zeros(2*k+z,1); L; -par];
227      ub  = Inf .* ones(3*k+1+z,1);
228      lb  = [zeros(2*k,1); -Inf.*ones(z,1); zeros(k+1,1)];
229      Aeq = [ones(2*k,1); zeros(k+1+z,1)]';
230      beq = 1;
231      b   = zeros(m,1);
232      ay  = (Y1*Y').* aa;
233
234      if is_w0,
235        A  = -[ay  -ay  Y1  eye(m,k) -ones(m,1)];
236      else
237        A  = -[ay -ay eye(m,k) -ones(m,1)];
238      end
239
240      if (exist('glpkmex')>0) & (usematlab==0)
241        smin     = 1;  % solve minimum
242        ctype    = char([ones(m,1)*abs('U'); 'S']);  % sign of inequalities
243        vartype  = char(ones(3*k+1+z,1)*abs('C'));   % continous variables
244%       lpsolver = 1;                                % Revised Simlex Method
245        lpsolver = 2;                                % Interior Point Method
246        params.msglev = 0; % no outputs
247        [sss,hostname] = unix('hostname');
248        hostname = hostname(1:end-1);
249        if strcmp(hostname,'saturnus') | strcmp(hostname,'polaris') | strcmp(hostname,'neptunus')
250          [al,fval,status] = glpkmex_redhat(smin,f,[A; Aeq],[b; beq],ctype,lb,ub,vartype,params,lpsolver);
251        else
252          [al,fval,status] = glpkmex(smin,f,[A; Aeq],[b; beq],ctype,lb,ub,vartype,params,lpsolver);
253        end
254      else
255        [al,fval,status] = linprog(f,A,b,Aeq,beq,lb,ub);
256      end
257      alpha(1:k) = al(1:k) - al(k+1:2*k);
258      if is_w0,
259        alpha(k+1) = al(2*k+1);
260      end
261
262    otherwise
263      disp(type)
264      error ('Wrong type.');
265    end
266  end
267
268  % Choose features
269  ss = sum(abs(alpha(1:k)));
270  J  = find(abs(alpha(1:k)) > ss*prec);
271
272  if isempty(J) | (status <= 0) | (status > 181 | status == 150),
273    prwarning(1,'Fisher classifier is trained.');
274    W1 = fisherc(a);
275    W2 = W1;
276    W3 = featsel(k,[1:k]);
277  else
278    W3 = featsel(k,J);
279    w = [Y; 1] .* alpha(1:k+1);
280    W2 = affine(w(J),w(k+1),a(:,J),lablist,k,2);
281    W2 = cnormc(W2,a(:,J));
282    W1 = W3*W2;
283    W1 = setname(W1,'FSELLPC');
284    W2 = setname(W2,'FSELLPC');
285  end
286return;
Note: See TracBrowser for help on using the repository browser.