[18] | 1 | %CLEVALD Classifier evaluation (learning curve) for dissimilarity data |
---|
| 2 | % |
---|
| 3 | % E = CLEVALD(D,CLASSF,TRAINSIZES,REPSIZE,NREPS,T) |
---|
| 4 | % |
---|
| 5 | % INPUT |
---|
| 6 | % D Square dissimilarity dataset |
---|
| 7 | % CLASSF Classifiers to be evaluated (cell array) |
---|
| 8 | % TRAINSIZE Vector of class sizes, used to generate subsets of D |
---|
| 9 | % (default [2,3,5,7,10,15,20,30,50,70,100]) |
---|
| 10 | % REPSIZE Representation set size per class (>=1), or fraction (<1) |
---|
| 11 | % (default total, training set) |
---|
| 12 | % NREPS Number of repetitions (default 1) |
---|
| 13 | % T Test dataset (default [], use remaining samples in A) |
---|
| 14 | % |
---|
| 15 | % OUTPUT |
---|
| 16 | % E Error structure (see PLOTE) containing training and test |
---|
| 17 | % errors |
---|
| 18 | % |
---|
| 19 | % DESCRIPTION |
---|
| 20 | % Generates at random, for all class sizes defined in TRAINSIZES, training |
---|
| 21 | % sets out of the dissimilarity dataset D. The representation set is either |
---|
| 22 | % equal to the training set (REPSIZE = []), or a fraction of it (REPSIZE <1) |
---|
| 23 | % or a random subset of it of a given size (REPSIZE>1). This set is used |
---|
| 24 | % for training the untrained classifiers CLASSF. The resulting trained |
---|
| 25 | % classifiers are tested on the training objects and on the left-over test |
---|
| 26 | % objects, or, if supplied, the testset T. This procedure is then repeated |
---|
| 27 | % NREPS times. |
---|
| 28 | % |
---|
| 29 | % The returned structure E contains several fields for annotating the plot |
---|
| 30 | % produced by PLOTE. They may be changed by the users. Removal of the field |
---|
| 31 | % 'apperror' (RMFIELD(E,'apperror')) suppresses the draw of the error |
---|
| 32 | % curves for the training set. |
---|
| 33 | % |
---|
| 34 | % Training set generation is done "with replacement" and such that for each |
---|
| 35 | % run the larger training sets include the smaller ones and that for all |
---|
| 36 | % classifiers the same training sets are used. |
---|
| 37 | % |
---|
| 38 | % This function uses the RAND random generator and thereby reproduces |
---|
| 39 | % if its seed is reset (see RAND). |
---|
| 40 | % If CLASSF uses RANDN, its seed should be reset as well. |
---|
| 41 | % |
---|
| 42 | % SEE ALSO |
---|
| 43 | % MAPPINGS, DATASETS, CLEVAL, TESTC, PLOTE |
---|
| 44 | |
---|
| 45 | % R.P.W. Duin, r.p.w.duin@prtools.org |
---|
| 46 | % Faculty EWI, Delft University of Technology |
---|
| 47 | % P.O. Box 5031, 2600 GA Delft, The Netherlands |
---|
| 48 | |
---|
| 49 | function e = cleval(a,classf,learnsizes,repsize,nreps,t) |
---|
| 50 | |
---|
| 51 | prtrace(mfilename); |
---|
| 52 | |
---|
| 53 | if (nargin < 6) |
---|
| 54 | t = []; |
---|
| 55 | end; |
---|
| 56 | if (nargin < 5) | isempty(nreps); |
---|
| 57 | nreps = 1; |
---|
| 58 | end; |
---|
| 59 | if (nargin < 4) |
---|
| 60 | repsize = []; |
---|
| 61 | end |
---|
| 62 | if (nargin < 3) | isempty(learnsizes); |
---|
| 63 | learnsizes = [2,3,5,7,10,15,20,30,50,70,100]; |
---|
| 64 | end; |
---|
| 65 | if ~iscell(classf), classf = {classf}; end |
---|
| 66 | |
---|
| 67 | % Assert that all is right. |
---|
| 68 | isdataset(a); issquare(a); ismapping(classf{1}); |
---|
| 69 | if (~isempty(t)), isdataset(t); end |
---|
| 70 | |
---|
| 71 | % Remove requested class sizes that are larger than the size of the |
---|
| 72 | % smallest class. |
---|
| 73 | |
---|
| 74 | mc = classsizes(a); [m,k,c] = getsize(a); |
---|
| 75 | toolarge = find(learnsizes >= min(mc)); |
---|
| 76 | if (~isempty(toolarge)) |
---|
| 77 | prwarning(2,['training set class sizes ' num2str(learnsizes(toolarge)) ... |
---|
| 78 | ' larger than the minimal class size in A; remove them']); |
---|
| 79 | learnsizes(toolarge) = []; |
---|
| 80 | end |
---|
| 81 | learnsizes = learnsizes(:)'; |
---|
| 82 | |
---|
| 83 | % Fill the error structure. |
---|
| 84 | |
---|
| 85 | nw = length(classf(:)); |
---|
| 86 | datname = getname(a); |
---|
| 87 | |
---|
| 88 | e.n = nreps; |
---|
| 89 | e.error = zeros(nw,length(learnsizes)); |
---|
| 90 | e.std = zeros(nw,length(learnsizes)); |
---|
| 91 | e.apperror = zeros(nw,length(learnsizes)); |
---|
| 92 | e.appstd = zeros(nw,length(learnsizes)); |
---|
| 93 | e.xvalues = learnsizes(:)'; |
---|
| 94 | e.xlabel = 'Training set size per class'; |
---|
| 95 | e.names = []; |
---|
| 96 | if (nreps > 1) |
---|
| 97 | e.ylabel= ['Averaged error (' num2str(nreps) ' experiments)']; |
---|
| 98 | elseif (nreps == 1) |
---|
| 99 | e.ylabel = 'Error'; |
---|
| 100 | else |
---|
| 101 | error('Number of repetitions NREPS should be >= 1.'); |
---|
| 102 | end; |
---|
| 103 | if (~isempty(datname)) |
---|
| 104 | if isempty(repsize) |
---|
| 105 | e.title = [datname ', Rep. Set = Train Set']; |
---|
| 106 | elseif repsize < 1 |
---|
| 107 | e.title = [datname ', Rep. size = ' num2str(repsize) ' Train size']; |
---|
| 108 | else |
---|
| 109 | e.title = [datname ', Rep. size = ' num2str(repsize) ' per class']; |
---|
| 110 | end |
---|
| 111 | end |
---|
| 112 | if (learnsizes(end)/learnsizes(1) > 20) |
---|
| 113 | e.plot = 'semilogx'; % If range too large, use a log-plot for X. |
---|
| 114 | end |
---|
| 115 | |
---|
| 116 | % Report progress. |
---|
| 117 | |
---|
| 118 | s1 = sprintf('cleval: %i classifiers: ',nw); |
---|
| 119 | prwaitbar(nw,s1); |
---|
| 120 | |
---|
| 121 | % Store the seed, to reset the random generator later for different |
---|
| 122 | % classifiers. |
---|
| 123 | |
---|
| 124 | seed = rand('state'); |
---|
| 125 | |
---|
| 126 | % Loop over all classifiers (with index WI). |
---|
| 127 | |
---|
| 128 | for wi = 1:nw |
---|
| 129 | |
---|
| 130 | if (~isuntrained(classf{wi})) |
---|
| 131 | error('Classifiers should be untrained.') |
---|
| 132 | end |
---|
| 133 | name = getname(classf{wi}); |
---|
| 134 | e.names = char(e.names,name); |
---|
| 135 | prwaitbar(nw,wi,[s1 name]); |
---|
| 136 | |
---|
| 137 | % E1 will contain the error estimates. |
---|
| 138 | |
---|
| 139 | e1 = zeros(nreps,length(learnsizes)); |
---|
| 140 | e0 = zeros(nreps,length(learnsizes)); |
---|
| 141 | |
---|
| 142 | % Take care that classifiers use same training set. |
---|
| 143 | |
---|
| 144 | rand('state',seed); seed2 = seed; |
---|
| 145 | |
---|
| 146 | % For NREPS repetitions... |
---|
| 147 | |
---|
| 148 | s2 = sprintf('cleval: %i repetitions: ',nreps); |
---|
| 149 | prwaitbar(nreps,s2); |
---|
| 150 | |
---|
| 151 | for i = 1:nreps |
---|
| 152 | |
---|
| 153 | prwaitbar(nreps,i,[s2 int2str(i)]); |
---|
| 154 | % Store the randomly permuted indices of samples of class CI to use in |
---|
| 155 | % this training set in JR(CI,:). |
---|
| 156 | |
---|
| 157 | JR = zeros(c,max(learnsizes)); |
---|
| 158 | |
---|
| 159 | for ci = 1:c |
---|
| 160 | |
---|
| 161 | JC = findnlab(a,ci); |
---|
| 162 | |
---|
| 163 | % Necessary for reproducable training sets: set the seed and store |
---|
| 164 | % it after generation, so that next time we will use the previous one. |
---|
| 165 | rand('state',seed2); |
---|
| 166 | |
---|
| 167 | JD = JC(randperm(mc(ci))); |
---|
| 168 | JR(ci,:) = JD(1:max(learnsizes))'; |
---|
| 169 | seed2 = rand('state'); |
---|
| 170 | end |
---|
| 171 | |
---|
| 172 | li = 0; % Index of training set. |
---|
| 173 | |
---|
| 174 | nlearns = length(learnsizes); |
---|
| 175 | s3 = sprintf('cleval: %i sizes: ',nlearns); |
---|
| 176 | prwaitbar(nreps,s3); |
---|
| 177 | |
---|
| 178 | for j = 1:nlearns |
---|
| 179 | |
---|
| 180 | nj = learnsizes(j); |
---|
| 181 | |
---|
| 182 | prwaitbar(nlearns,j,[s3 int2str(j) ' (' int2str(nj) ')']); |
---|
| 183 | li = li + 1; |
---|
| 184 | |
---|
| 185 | % J will contain the indices for this training set. |
---|
| 186 | |
---|
| 187 | J = []; |
---|
| 188 | R = []; |
---|
| 189 | for ci = 1:c |
---|
| 190 | J = [J;JR(ci,1:nj)']; |
---|
| 191 | if isempty(repsize) |
---|
| 192 | R = [R JR(ci,1:nj)]; |
---|
| 193 | elseif repsize < 1 |
---|
| 194 | R = [R JR(ci,1:ceil(repsize*nj))]; |
---|
| 195 | else |
---|
| 196 | R = [R JR(ci,1:min(nj,repsize))]; |
---|
| 197 | end |
---|
| 198 | |
---|
| 199 | end; |
---|
| 200 | |
---|
| 201 | w = a(J,R)*classf{wi}; % Use right classifier. |
---|
| 202 | e0(i,li) = a(J,R)*w*testc; |
---|
| 203 | if (isempty(t)) |
---|
| 204 | Jt = ones(m,1); |
---|
| 205 | Jt(J) = zeros(size(J)); |
---|
| 206 | Jt = find(Jt); % Don't use training set for testing. |
---|
| 207 | e1(i,li) = a(Jt,R)*w*testc; |
---|
| 208 | else |
---|
| 209 | e1(i,li) = t(:,R)*w*testc; |
---|
| 210 | end |
---|
| 211 | |
---|
| 212 | end |
---|
| 213 | prwaitbar(0); |
---|
| 214 | |
---|
| 215 | end |
---|
| 216 | prwaitbar(0); |
---|
| 217 | |
---|
| 218 | % Calculate average error and standard deviation for this classifier |
---|
| 219 | % (or set the latter to zero if there's been just 1 repetition). |
---|
| 220 | |
---|
| 221 | e.error(wi,:) = mean(e1,1); |
---|
| 222 | e.apperror(wi,:) = mean(e0,1); |
---|
| 223 | if (nreps == 1) |
---|
| 224 | e.std(wi,:) = zeros(1,size(e.std,2)); |
---|
| 225 | e.appstd(wi,:) = zeros(1,size(e.appstd,2)); |
---|
| 226 | else |
---|
| 227 | e.std(wi,:) = std(e1)/sqrt(nreps); |
---|
| 228 | e.appstd(wi,:) = std(e0)/sqrt(nreps); |
---|
| 229 | end |
---|
| 230 | end |
---|
| 231 | prwaitbar(0); |
---|
| 232 | |
---|
| 233 | % The first element is the empty string [], remove it. |
---|
| 234 | e.names(1,:) = []; |
---|
| 235 | |
---|
| 236 | return |
---|
| 237 | |
---|