Context Navigation

source: distools/protselfd.m @ 19

Last change on this file since 19 was 10, checked in by bduin, 14 years ago

File size: 5.1 KB

Rev	Line
[10]	1	%PROTSELFD Forward Prototype Selection for Dissimilarity Matrices
	2	%
	3	% [W,E,KOPT] = PROTSELFD(D,K,PAR)
	4	% W = D*PROTSELFD([],K,PAR)
	5	%
	6	% INPUT
	7	% D Dataset, square dissimilarity matrix
	8	% K Integer, desired number of prototypes
	9	% PAR 'LOO' - leave-one-out option. This should be used if
	10	% the objects are related to themselves. If D is not square,
	11	% it is assumed that the first sets of objects in columns and
	12	% rows match.
	13	% 'ALL' - use all objects (default).
	14	%
	15	% OUTPUT
	16	% W Selection mapping ('feature selection')
	17	% E Error stimate as a function of number of selected prototypes
	18	% (only reliable for prototype sizes >= class size)
	19	% KOPT Estimate for best size in avoiding peaking
	20	%
	21	% DESCRIPTION
	22	% This procedure for optimizing the representation set of a
	23	% dissimilarity matrix is based on a greedy, forward selection of
	24	% prototypes using the leave-one-out error estimate of the 1NN rule
	25	% as a criterion. As this is computed on the given distances in
	26	% D, the procedure is based on sorting and counting only and is
	27	% thereby fast. In case K=1 just a single prototype has to be returned,
	28	% but computing the 1NN error is not possible as all objects are assigned
	29	% to the same class. In that case the centre object of the largest class
	30	% will be returned.
	31	%
	32	% Note that the search continues untill K prototypes are found.
	33	% This might be larger than desired due to peaking (curse of
	34	% dimensionality, overtraining). Therefor an estimate for the
	35	% optimal number of prototype is returned in KOPT.
	36	%
	37	% The prototype selection may be applied by C = B*W(:,1:KSEL),
	38	% in which B is a dissimilarity matrix based on the same
	39	% representation set as A (e.g. A itself) and C is a resulting
	40	% dissimilarity matrix in which the KSEL (e.g. KOPT) best prototypes
	41	% are selected.
	42	%
	43	% REFERENCE
	44	% E. Pekalska, R.P.W. Duin, and P. Paclik, Prototype selection for
	45	% dissimilarity-based classification, Pattern Recognition,
	46	% vol. 39, no. 2, 2006, 189-208.
	47	%
	48	% SEE ALSO
	49	% KNNDC, DISEX_PROTSELFD
	50
	51	% Copyright: R.P.W. Duin, r.p.w.duin@prtools.org
	52	% Faculty EWI, Delft University of Technology
	53	% P.O. Box 5031, 2600 GA Delft, The Netherlands
	54
	55	%
	56
	57	function [R,e,D,J,nlab,clab] = protselfd(D,ksel,par,J,e,nlab,clab)
	58
	59	if nargin < 2, ksel = []; end
	60	if nargin < 3 \| isempty(par), par = 'all'; end
	61
	62	if nargin < 4 % user call
	63
	64	if nargin < 1 \| isempty(D) % allow for D*protselfd([],pars)
	65	R = mapping(mfilename,'untrained',{ksel,par});
	66	R = setname(R,'Forward Prototype Sel');
	67	return
	68	end
	69
	70	[m,k,c] = getsize(D);
	71	if isempty(ksel), ksel = k; end
	72	if strcmp(par,'loo') \| strcmp(par,'LOO')
	73	if k > m
	74	error('More rows than columns expected for dissimilarity matrix')
	75	end
	76	discheck(D(1:k,:));
	77	D(1:k,:) = D(1:k,:) + 1e100*eye(k); % get rid of diagonal for LOO
	78	end
	79
	80	%Initialise by the centre of the largest class
	81	cc = classsizes(D);
	82	[cmax,n] = max(cc); % n is the largest class
	83	lablist = getlablist(D);
	84	nlab = getnlab(D);
	85	clab = renumlab(getfeatlab(D),lablist);
	86	R = find(nlab == n);
	87	C = find(clab == n);
	88	dd = +D(R,C);
	89	[dmin,rmin] = sort(dd,1); % find one but most remote object
	90	[dmin,cmin] = min(dmin(end-1,:)); % find prototype for which this is minimum
	91	R = C(cmin);
	92
	93	e = zeros(1,ksel);
	94	[nlab,clab] = renumlab(getlabels(D),getfeatlab(D));
	95	[dd,J] = min(+D(:,R),[],2);
	96	e(1) = sum(clab(R(J)) ~= nlab);
	97
	98	if ksel > 1
	99	% this will be a deep recursive call !!!
	100	prwaitbar(ksel,'Forward prototype selection')
	101	[R,e,D,J,nlab,clab] = protselfd(D,ksel,R,J,e,nlab,clab);
	102	prwaitbar(0);
	103	end
	104	e = e(1:length(+R))/m;
	105	R = featsel(k,R);
	106
	107	% Find optimal number of prototypes in avoiding peaking
	108
	109	Jopt = find(e==min(e));
	110	D = floor((Jopt(end)+Jopt(1))/2);
	111
	112	% done!
	113
	114	else % internal call, parameters may have another meaning!
	115
	116	R = par; % prototypes sofar
	117	[m,k,c] = getsize(D);
	118	d = +D;
	119	S = [1:k]; % all candidates
	120	S(R) = []; % exclude ones we have
	121	emin = inf;
	122	dmin = inf;
	123	r = length(R);
	124	prwaitbar(ksel,r);
	125	for j=S % run over all candidates left
	126	% the following tricky statements finds the nearest neighobor indices n
	127	% for all objects to their nearest prototype (n=1) or the candidate
	128	% prototype (n=2). In ds the minimum distances are stored and used for
	129	% solving ties later.
	130	[ds,n] = min([d(m*(R(J')'-1)+[1:m]'),d(:,j)],[],2);
	131	% the labels of the nearest prototypes and the candidates
	132	cclab = [clab(R(J)') repmat(clab(j),m,1)];
	133	% compute the nearest neighbor error using the computed n
	134	ee = sum(cclab(m*(n-1)+[1:m]') ~= nlab);
	135	de = sum(ds);
	136	% if better, use it
	137	if ee < emin \| ((ee == emin) & (de < dmin))
	138	emin = ee;
	139	jmin = j;
	140	JJ = [J repmat(r+1,m,1)];
	141	Jmin = JJ(m*(n-1)+[1:m]');
	142	Rmin = [R jmin];
	143	dmin = de;
	144	end
	145	end
	146
	147	if emin <= e(r) \| 1 % we even continue if emin increases due to peaking
	148	e(r+1) = emin;
	149	R = Rmin;
	150	if (r+1) < ksel
	151	[R,e,D,J,nlab,clab] = protselfd(D,ksel,R,Jmin,e,nlab,clab);
	152	end
	153	end
	154
	155	end

Note: See TracBrowser for help on using the repository browser.

Download in other formats: