Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: prextra/auclpm.m @ 5

Last change on this file since 5 was 5, checked in by bduin, 14 years ago

File size: 6.1 KB

Line
1	function [w,returnA,r2] = auclpm(x, C, rtype, par, unitnorm, usematlab)
2	%AUCLPM Find linear mapping with optimized AUC
3	%
4	% W = AUCLPM(X, C, RTYPE, PAR)
5	%
6	% Optimize the AUC on dataset X and reg. param. C. This is done by
7	% finding the weights W for which the ordering of the objects mapped
8	% onto the line defined by W, is optimal. That means that objects from
9	% class +1 is always mapped above objects from the -1 class. This
10	% results in a constraint for each (+1,-1) pair of objects. The number
11	% of constraints therefore become very large. The AUC constraints can
12	% be subsampled in different ways:
13	%
14	% RTYPE PAR
15	% 'full', - use all constraints
16	% 'subs', N subsample just N constraints
17	% 'subk', k subsample just k*#trainobj. constraints
18	% 'knn' k use only the k nearest neighbors
19	% 'xval' N subsample just N constraints and use the rest to
20	% optimize C (this version can be very slow)
21	% 'xvalk' k subsample k*#trainobj and use remaining constraints
22	% to optimize C
23	% 'kmeans' k use k-means clustering with k=PAR
24	% 'randk' subsample objects to get PAR*(Npos+Nneg) constraints
25	%
26	% W = AUCLPM(X, C, RTYPE, PAR, UNITNORM)
27	%
28	% Finally, per default the difference vectors are normalized to unit
29	% length. If you don't like that, set UNITNORM to 0.
30	%
31	% Default: RTYPE='subk'
32	% PAR = 1.0;
33	prtrace(mfilename);
34
35	if (nargin < 6)
36	usematlab = 0;
37	end
38	if (nargin < 5)
39	unitnorm = 1;
40	end
41	if (nargin < 4)
42	par = 1.00;
43	end
44	if (nargin < 3)
45	rtype = 'subk';
46	end
47	if (nargin < 2)
48	prwarning(3,'Lambda set to ten');
49	C = 10;
50	end
51
52	% define the correct name:
53	if unitnorm
54	cl_name = sprintf('AUC-LP %s (%s) 1norm',rtype,num2str(par));
55	else
56	cl_name = sprintf('AUC-LP %s (%s)',rtype,num2str(par));
57	end
58	if (nargin < 1) \| (isempty(x))
59	w = mapping(mfilename,{C,rtype,par,unitnorm,usematlab});
60	w = setname(w,cl_name);
61	return
62	end
63
64	if ~ismapping(C) % train the mapping
65
66	% Unpack the dataset.
67	islabtype(x,'crisp');
68	isvaldset(x,1,2); % at least 1 object per class, 2 classes
69	[n,k,c] = getsize(x);
70	% Check some values:
71	if par<=0
72	error('Parameter ''par'' should be larger than zero');
73	end
74
75	if c == 2 % two-class classifier:
76
77	labl = getlablist(x); dim = size(x,2);
78	% first create the target values (+1 and -1):
79	% make an exception for a one-class or mil dataset:
80	tnr = strmatch('target',labl);
81	if isempty(tnr) % no target class defined.
82	tnr = strmatch('positive',labl);
83	if isempty(tnr) % no positive class defined.
84	% we just take the first class as target class:
85	tnr = 1;
86	end
87	end
88	y = 2*(getnlab(x)==tnr) - 1;
89	tlab = labl(tnr,:);
90
91	% makes the mapping much faster:
92	X = +x; clear x;
93
94	%---create A for optauc
95	rstate = rand('state');
96	seed = 0;
97	[A,Nxi,Aval] = createA(X,y,rtype,par,seed);
98	rand('state',rstate);
99	if unitnorm
100	% normalize the length of A:
101	lA = sqrt(sum(A.*A,2));
102	lenn0 = find(lA~=0); % when labels are flipped, terrible
103	% things can happen
104	A(lenn0,:) = A(lenn0,:)./repmat(lA(lenn0,:),1,size(A,2));
105	if ~isempty(Aval)
106	% also normalize the length of Aval:
107	lA = sqrt(sum(Aval.*Aval,2));
108	lenn0 = find(lA~=0);
109	Aval(lenn0,:) = Aval(lenn0,:)./repmat(lA(lenn0,:),1,size(Aval,2));
110	end
111	end
112	%if nargout>1
113	% returnA = A;
114	%end
115	orgA = A;
116	% negative should be present for the constraints:
117	A = [A -A];
118	% take also care for the xi:
119	A = [A -speye(Nxi)];
120	%A = [A -eye(Nxi)];
121	%---create f
122	% NO, do this later, maybe we want to optimize it!
123	%f = [ones(2*k,1); repmat(C,Nxi,1)];
124	%--- generate b
125	b = -ones(Nxi,1); % no zeros, otherwise we get w=0
126	% the constraint is changed here to <=-1
127	%---lower bound constraints
128	lb = zeros(2*k+Nxi,1);
129
130	% should we run over a range of Cs?
131	if ~isempty(Aval)
132	M = 25;
133	xtr = zeros(M,1);
134	xval = zeros(M,1);
135	C = logspace(-3,3,M);
136	% run over all the Cs
137	for i=1:length(C)
138	%---create f again:
139	f = [ones(2*k,1); repmat(C(i),Nxi,1)];
140	%---solve linear program
141	if (exist('glpk')>0) & ~usematlab
142	[z,fmin,status]=glpk(f,A,b,lb,[],repmat('U',Nxi,1),...
143	repmat('C',size(f,1),1),1);
144	elseif (exist('glpkmex')>0) & ~usematlab
145	[z,fmin,status]=glpkmex(1,f,A,b,repmat('U',Nxi,1),...
146	lb,[],repmat('C',size(f,1),1));
147	else
148	opts = optimset('Display','off','LargeScale','on','Diagnostics','off');
149	z = linprog(f,A,b,[],[],lb,[],[],opts);
150	end
151	constr = Aval(z(1:k)-z(k+1:2k));
152	% the number of satisfied constraints (=AUC:)
153	I = find(constr<-0);
154	if ~isempty(I)
155	xval(i) = length(I)/size(constr,1);
156	end
157	constr = orgA(z(1:k)-z(k+1:2k));
158	% the number of satisfied constraints (=AUC:)
159	I = find(constr<-0);
160	if ~isempty(I)
161	xtr(i) = length(I)/size(constr,1);
162	end
163	end
164	if nargout>1
165	returnA = xval;
166	if nargout>2
167	r2 = xtr;
168	end
169	end
170	[minxval,mini] = max(xval);
171	C = C(mini);
172	message(4,'Optimum C = %f\n',C);
173	end
174	%---create f
175	f = [ones(2*k,1); repmat(C,Nxi,1)];
176	%---solve linear program
177	if (exist('glpkmex')>0) & ~usematlab
178	prwarning(7,'Use glpkmex');
179	param.msglev=0;
180	[z,fmin,status,xtra]=glpkmex(1,f,A,b,repmat('U',Nxi,1),...
181	lb,[],repmat('C',size(f,1),1),param);
182	alpha = xtra.lambda;
183	else
184	[z,fmin,exitflag,outp,alpha] = linprog(f,A,b,[],[],lb);
185	end
186
187	%---extract parameters
188	u = z(1:k); u = u(:);
189	v = z(k+1:2*k); v = v(:);
190	zeta = z(2k+1:2k+Nxi); zeta = zeta(:);
191	%if nargout>1
192	% returnA = zeta;
193	%end
194	else
195	error('Only a two-class classifier is implemented');
196	end
197	% now find out how sparse the result is:
198	%nr = sum(beta>1e-6);
199	rel = (abs(u-v)>0);
200	nr = sum(rel);
201
202	% and store the results:
203	%W.wsc = wsc;
204	W.u = u; %the ultimate weights
205	W.v = v;
206	W.alpha = alpha;
207	W.zeta = zeta;
208	W.nr = nr;
209	W.rel = rel;
210	W.C = C;
211	w = mapping(mfilename,'trained',W,tlab,dim,1);
212	w = setname(w,sprintf('%s %s',cl_name,rtype));
213
214	else
215	% Evaluate the classifier on new data:
216	W = getdata(C);
217	n = size(x,1);
218
219	% linear classifier:
220	out = x*(W.u-W.v);
221
222	% and put it nicely in a prtools dataset:
223	% (I am not really sure what I should output, I decided to give a 1D
224	% output:)
225	w = setdat(x,out,C);
226	%w = setdat(x,[-out out],C);
227	%w = setdat(x,sigm([-out out]),C);
228
229	end
230
231	return

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: prextra/auclpm.m @ 5

Download in other formats: