Context Navigation

source: distools/clevald.m @ 19

Last change on this file since 19 was 18, checked in by bduin, 14 years ago
clevald, parzenddc, parzend_map and kem added
File size: 6.7 KB

Rev	Line
[18]	1	%CLEVALD Classifier evaluation (learning curve) for dissimilarity data
	2	%
	3	% E = CLEVALD(D,CLASSF,TRAINSIZES,REPSIZE,NREPS,T)
	4	%
	5	% INPUT
	6	% D Square dissimilarity dataset
	7	% CLASSF Classifiers to be evaluated (cell array)
	8	% TRAINSIZE Vector of class sizes, used to generate subsets of D
	9	% (default [2,3,5,7,10,15,20,30,50,70,100])
	10	% REPSIZE Representation set size per class (>=1), or fraction (<1)
	11	% (default total, training set)
	12	% NREPS Number of repetitions (default 1)
	13	% T Test dataset (default [], use remaining samples in A)
	14	%
	15	% OUTPUT
	16	% E Error structure (see PLOTE) containing training and test
	17	% errors
	18	%
	19	% DESCRIPTION
	20	% Generates at random, for all class sizes defined in TRAINSIZES, training
	21	% sets out of the dissimilarity dataset D. The representation set is either
	22	% equal to the training set (REPSIZE = []), or a fraction of it (REPSIZE <1)
	23	% or a random subset of it of a given size (REPSIZE>1). This set is used
	24	% for training the untrained classifiers CLASSF. The resulting trained
	25	% classifiers are tested on the training objects and on the left-over test
	26	% objects, or, if supplied, the testset T. This procedure is then repeated
	27	% NREPS times.
	28	%
	29	% The returned structure E contains several fields for annotating the plot
	30	% produced by PLOTE. They may be changed by the users. Removal of the field
	31	% 'apperror' (RMFIELD(E,'apperror')) suppresses the draw of the error
	32	% curves for the training set.
	33	%
	34	% Training set generation is done "with replacement" and such that for each
	35	% run the larger training sets include the smaller ones and that for all
	36	% classifiers the same training sets are used.
	37	%
	38	% This function uses the RAND random generator and thereby reproduces
	39	% if its seed is reset (see RAND).
	40	% If CLASSF uses RANDN, its seed should be reset as well.
	41	%
	42	% SEE ALSO
	43	% MAPPINGS, DATASETS, CLEVAL, TESTC, PLOTE
	44
	45	% R.P.W. Duin, r.p.w.duin@prtools.org
	46	% Faculty EWI, Delft University of Technology
	47	% P.O. Box 5031, 2600 GA Delft, The Netherlands
	48
	49	function e = cleval(a,classf,learnsizes,repsize,nreps,t)
	50
	51	prtrace(mfilename);
	52
	53	if (nargin < 6)
	54	t = [];
	55	end;
	56	if (nargin < 5) \| isempty(nreps);
	57	nreps = 1;
	58	end;
	59	if (nargin < 4)
	60	repsize = [];
	61	end
	62	if (nargin < 3) \| isempty(learnsizes);
	63	learnsizes = [2,3,5,7,10,15,20,30,50,70,100];
	64	end;
	65	if ~iscell(classf), classf = {classf}; end
	66
	67	% Assert that all is right.
	68	isdataset(a); issquare(a); ismapping(classf{1});
	69	if (~isempty(t)), isdataset(t); end
	70
	71	% Remove requested class sizes that are larger than the size of the
	72	% smallest class.
	73
	74	mc = classsizes(a); [m,k,c] = getsize(a);
	75	toolarge = find(learnsizes >= min(mc));
	76	if (~isempty(toolarge))
	77	prwarning(2,['training set class sizes ' num2str(learnsizes(toolarge)) ...
	78	' larger than the minimal class size in A; remove them']);
	79	learnsizes(toolarge) = [];
	80	end
	81	learnsizes = learnsizes(:)';
	82
	83	% Fill the error structure.
	84
	85	nw = length(classf(:));
	86	datname = getname(a);
	87
	88	e.n = nreps;
	89	e.error = zeros(nw,length(learnsizes));
	90	e.std = zeros(nw,length(learnsizes));
	91	e.apperror = zeros(nw,length(learnsizes));
	92	e.appstd = zeros(nw,length(learnsizes));
	93	e.xvalues = learnsizes(:)';
	94	e.xlabel = 'Training set size per class';
	95	e.names = [];
	96	if (nreps > 1)
	97	e.ylabel= ['Averaged error (' num2str(nreps) ' experiments)'];
	98	elseif (nreps == 1)
	99	e.ylabel = 'Error';
	100	else
	101	error('Number of repetitions NREPS should be >= 1.');
	102	end;
	103	if (~isempty(datname))
	104	if isempty(repsize)
	105	e.title = [datname ', Rep. Set = Train Set'];
	106	elseif repsize < 1
	107	e.title = [datname ', Rep. size = ' num2str(repsize) ' Train size'];
	108	else
	109	e.title = [datname ', Rep. size = ' num2str(repsize) ' per class'];
	110	end
	111	end
	112	if (learnsizes(end)/learnsizes(1) > 20)
	113	e.plot = 'semilogx'; % If range too large, use a log-plot for X.
	114	end
	115
	116	% Report progress.
	117
	118	s1 = sprintf('cleval: %i classifiers: ',nw);
	119	prwaitbar(nw,s1);
	120
	121	% Store the seed, to reset the random generator later for different
	122	% classifiers.
	123
	124	seed = rand('state');
	125
	126	% Loop over all classifiers (with index WI).
	127
	128	for wi = 1:nw
	129
	130	if (~isuntrained(classf{wi}))
	131	error('Classifiers should be untrained.')
	132	end
	133	name = getname(classf{wi});
	134	e.names = char(e.names,name);
	135	prwaitbar(nw,wi,[s1 name]);
	136
	137	% E1 will contain the error estimates.
	138
	139	e1 = zeros(nreps,length(learnsizes));
	140	e0 = zeros(nreps,length(learnsizes));
	141
	142	% Take care that classifiers use same training set.
	143
	144	rand('state',seed); seed2 = seed;
	145
	146	% For NREPS repetitions...
	147
	148	s2 = sprintf('cleval: %i repetitions: ',nreps);
	149	prwaitbar(nreps,s2);
	150
	151	for i = 1:nreps
	152
	153	prwaitbar(nreps,i,[s2 int2str(i)]);
	154	% Store the randomly permuted indices of samples of class CI to use in
	155	% this training set in JR(CI,:).
	156
	157	JR = zeros(c,max(learnsizes));
	158
	159	for ci = 1:c
	160
	161	JC = findnlab(a,ci);
	162
	163	% Necessary for reproducable training sets: set the seed and store
	164	% it after generation, so that next time we will use the previous one.
	165	rand('state',seed2);
	166
	167	JD = JC(randperm(mc(ci)));
	168	JR(ci,:) = JD(1:max(learnsizes))';
	169	seed2 = rand('state');
	170	end
	171
	172	li = 0; % Index of training set.
	173
	174	nlearns = length(learnsizes);
	175	s3 = sprintf('cleval: %i sizes: ',nlearns);
	176	prwaitbar(nreps,s3);
	177
	178	for j = 1:nlearns
	179
	180	nj = learnsizes(j);
	181
	182	prwaitbar(nlearns,j,[s3 int2str(j) ' (' int2str(nj) ')']);
	183	li = li + 1;
	184
	185	% J will contain the indices for this training set.
	186
	187	J = [];
	188	R = [];
	189	for ci = 1:c
	190	J = [J;JR(ci,1:nj)'];
	191	if isempty(repsize)
	192	R = [R JR(ci,1:nj)];
	193	elseif repsize < 1
	194	R = [R JR(ci,1:ceil(repsize*nj))];
	195	else
	196	R = [R JR(ci,1:min(nj,repsize))];
	197	end
	198
	199	end;
	200
	201	w = a(J,R)*classf{wi}; % Use right classifier.
	202	e0(i,li) = a(J,R)wtestc;
	203	if (isempty(t))
	204	Jt = ones(m,1);
	205	Jt(J) = zeros(size(J));
	206	Jt = find(Jt); % Don't use training set for testing.
	207	e1(i,li) = a(Jt,R)wtestc;
	208	else
	209	e1(i,li) = t(:,R)wtestc;
	210	end
	211
	212	end
	213	prwaitbar(0);
	214
	215	end
	216	prwaitbar(0);
	217
	218	% Calculate average error and standard deviation for this classifier
	219	% (or set the latter to zero if there's been just 1 repetition).
	220
	221	e.error(wi,:) = mean(e1,1);
	222	e.apperror(wi,:) = mean(e0,1);
	223	if (nreps == 1)
	224	e.std(wi,:) = zeros(1,size(e.std,2));
	225	e.appstd(wi,:) = zeros(1,size(e.appstd,2));
	226	else
	227	e.std(wi,:) = std(e1)/sqrt(nreps);
	228	e.appstd(wi,:) = std(e0)/sqrt(nreps);
	229	end
	230	end
	231	prwaitbar(0);
	232
	233	% The first element is the empty string [], remove it.
	234	e.names(1,:) = [];
	235
	236	return
	237

Note: See TracBrowser for help on using the repository browser.

Download in other formats: