Context Navigation

← Previous Changeset
Next Changeset →

Changeset 20

Timestamp:

07/16/11 14:48:08 (14 years ago)

Author:

bduin

Message:

updates for handling soft labels

Location:

Files:

: 5 edited

clevald.m (modified) (7 diffs)
issquare.m (modified) (2 diffs)
nne.m (modified) (1 diff)
pe_em.m (modified) (1 diff)
plotspectrum.m (modified) (1 diff)

Legend:

: Unmodified
: Added
: Removed

distools/clevald.m

-                      r18
+                      r20
 %CLEVALD Classifier evaluation (learning curve) for dissimilarity data
+%
 %   E = CLEVALD(D,CLASSF,TRAINSIZES,REPSIZE,NREPS,T)
+%   E = CLEVALD(D,CLASSF,TRAINSIZES,REPSIZE,NREPS,T,TESTFUN)
+%
 % INPUT
 %   D          Square dissimilarity dataset
 %   CLASSF     Classifiers to be evaluated (cell array)
+%   TRAINSIZE  Vector of class sizes, used to generate subsets of D
+%              (default [2,3,5,7,10,15,20,30,50,70,100])
+%   TRAINSIZE  Vector of training set sizes, used to generate subsets of D
+%              (default [2,3,5,7,10,15,20,30,50,70,100]). TRAINSIZE is per
+%              class unless D has no priors set or has soft labels.
 %   REPSIZE    Representation set size per class (>=1), or fraction (<1)
 %              (default total, training set)
 %   NREPS      Number of repetitions (default 1)
 %   T          Test dataset (default [], use remaining samples in A)
+%   TESTFUN    Mapping,evaluation function (default classification error)
+%
 % OUTPUT
 …
 % Generates at random, for all class sizes defined in TRAINSIZES, training
 % sets out of the dissimilarity dataset D. The representation set is either
 % equal to the training set (REPSIZE = []), or a fraction of it (REPSIZE  <1)
+% equal to the training set (REPSIZE = []), or a fraction of it (REPSIZE <1)
 % or a random subset of it of a given size (REPSIZE>1). This set is used
 % for training the untrained classifiers CLASSF. The resulting trained
 % classifiers are tested on the training objects and on the left-over test
 % objects, or, if supplied, the testset T. This procedure is then repeated
+% NREPS times.
+% NREPS times. The default test routine is classification error estimation
+% by TESTC([],'crisp').
+%
 % The returned structure E contains several fields for annotating the plot
 …
 % P.O. Box 5031, 2600 GA Delft, The Netherlands
 function e = cleval(a,classf,learnsizes,repsize,nreps,t)
+function e = clevald(a,classf,learnsizes,repsize,nreps,t,testfun)
         prtrace(mfilename);
+  if (nargin < 7) | isempty(testfun)
+    testfun = testc([],'crisp');
+  end;
   if (nargin < 6)
     t = [];
 …
   % smallest class.
+  mc = classsizes(a); [m,k,c] = getsize(a);
+  toolarge = find(learnsizes >= min(mc));
+  if (~isempty(toolarge))
+    prwarning(2,['training set class sizes ' num2str(learnsizes(toolarge)) ...
+                 ' larger than the minimal class size in A; remove them']);
+    learnsizes(toolarge) = [];
+  end
+        [m,k,c] = getsize(a);
+        if ~isempty(a,'prior') & islabtype(a,'crisp')
+                classs = true;
+                mc = classsizes(a);
+                toolarge = find(learnsizes >= min(mc));
+                if (~isempty(toolarge))
+                        prwarning(2,['training set class sizes ' num2str(learnsizes(toolarge)) ...
+                                                                         ' larger than the minimal class size; removed them']);
+                        learnsizes(toolarge) = [];
+                end
+        else
+                if islabtype(a,'crisp') & isempty(a,'prior')
+                        prwarning(1,['No priors found in dataset, class frequencies are used.' ...
+                        newline '            Training set sizes hold for entire dataset']);
+                end
+                classs = false;
+                toolarge = find(learnsizes >= m);
+                if (~isempty(toolarge))
+                        prwarning(2,['training set sizes ' num2str(learnsizes(toolarge)) ...
+                                                                         ' larger than number of objects; removed them']);
+                        learnsizes(toolarge) = [];
+                end
+        end
   learnsizes = learnsizes(:)';
 …
   e.appstd     = zeros(nw,length(learnsizes));
   e.xvalues = learnsizes(:)';
+  e.xlabel = 'Training set size per class';
+        if classs
+                e.xlabel   = 'Training set size per class';
+        else
+                e.xlabel   = 'Training set size';
+        end
   e.names   = [];
   if (nreps > 1)
 …
       % this training set in JR(CI,:).
+                        JR = zeros(c,max(learnsizes));
+                        if classs
+                                JR = zeros(c,max(learnsizes));
+                        for ci = 1:c
+                                JC = findnlab(a,ci);
+        % Necessary for reproducable training sets: set the seed and store
+        % it after generation, so that next time we will use the previous one.
+                                rand('state',seed2);
+                                JD = JC(randperm(mc(ci)));
+                                JR(ci,:) = JD(1:max(learnsizes))';
+                                seed2 = rand('state');
+                                for ci = 1:c
+                                        JC = findnlab(a,ci);
+                                        % Necessary for reproducable training sets: set the seed and store
+                                        % it after generation, so that next time we will use the previous one.
+                                        rand('state',seed2);
+                                        JD = JC(randperm(mc(ci)));
+                                        JR(ci,:) = JD(1:max(learnsizes))';
+                                        seed2 = rand('state');
+                                end
+                        elseif islabtype(a,'crisp')
+                                rand('state',seed2); % get seed for reproducable training sets
+                                % generate indices for the entire dataset taking care that in
+                                % the first 2c objects we have 2 objects for every class
+                                [a1,a2,I1,I2] = gendat(a,2*ones(1,c));
+                                JD = randperm(m-2*c);
+                                JR = [I1;I2(JD)];
+                                seed2 = rand('state'); % save seed for reproducable training sets
+                        else  % soft labels
+                                rand('state',seed2); % get seed for reproducable training sets
+                                JR = randperm(m);
+                                seed2 = rand('state'); % save seed for reproducable training sets
                         end
 …
         J = [];
         R = [];
+        for ci = 1:c
+          J = [J;JR(ci,1:nj)'];
+          if isempty(repsize)
+            R = [R JR(ci,1:nj)];
+          elseif repsize < 1
+            R = [R JR(ci,1:ceil(repsize*nj))];
+          else
+            R = [R JR(ci,1:min(nj,repsize))];
+          end
+        end;
+                                w = a(J,R)*classf{wi};                                  % Use right classifier.
+                                e0(i,li) = a(J,R)*w*testc;
+                                if classs
+                                        for ci = 1:c
+                                                J = [J;JR(ci,1:nj)'];
+                                                if isempty(repsize)
+                                                        R = [R JR(ci,1:nj)];
+                                                elseif repsize < 1
+                                                        R = [R JR(ci,1:ceil(repsize*nj))];
+                                                else
+                                                        R = [R JR(ci,1:min(nj,repsize))];
+                                                end
+                                        end;
+                                else
+                                        J = JR(1:nj);
+                                        if isempty(repsize)
+                                                R = JR;
+                                        elseif repsize < 1
+                                                R = JR(1:ceil(repsize*nj));
+                                        else
+                                                R = JR(1:min(nj,repsize));
+                                        end
+                                end;
+                                trainset = a(J,R);
+                                trainset = setprior(trainset,getprior(trainset,0));
+                                w = trainset*classf{wi};                                        % Use right classifier.
+                                e0(i,li) = trainset*w*testfun;
                                 if (isempty(t))
                                 Jt = ones(m,1);
                                         Jt(J) = zeros(size(J));
                                         Jt = find(Jt);                                                          % Don't use training set for testing.
+                                        e1(i,li) = a(Jt,R)*w*testc;
+                                        testset = a(Jt,R);
+                                        testset = setprior(testset,getprior(testset,0));
+                                        e1(i,li) = testset*w*testfun;
                                 else
+                                        e1(i,li) = t(:,R)*w*testc;
+                                        testset = t(:,R);
+                                        testset = setprior(testset,getprior(testset,0));
+                                        e1(i,li) = testset*w*testfun;
                                 end

distools/issquare.m

-                      r10
+                      r20
 % DESCRIPTION
 % True is D is a square dissimilarity matrix dataset. This includes
 % the check whether feature labels equal object labels.
 % If called without an output argument ISSQUARE generates an error
 % if D is not square.
+% the check (in case of crisp dataset D) whether feature labels equal
+% object labels. If called without an output argument ISSQUARE generates an
+% error if D is not square.
 % Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
 …
 if m == k
+  n  = nlabcmp(getfeatlab(d),getlabels(d));
+  OK = (n == 0);
+        if islabtype(d,'crisp')
+                n  = nlabcmp(getfeatlab(d),getlabels(d));
+                OK = (n == 0);
+        else
+                OK = 1;
+        end
 else
   OK = 0;

distools/nne.m

-                      r10
+                      r20
 [d,M] = min(D');
 e     = mean(nlab(M) ~= nlab);
+NNlab = lablist(nlab(M),:);
+if islabtype(D,'crisp')
+        NNlab = lablist(nlab(M),:);
+else
+        labs = gettargets(D);
+        NNlab = labs(M,:);
+end
 return;

distools/pe_em.m

-                      r10
+                      r20
 % by D*W. The signature of the obtained PE space (numbers of positive and negative
 % directions) can be found by PE_SIG(W). The spectrum of the obtained space
+% can be found by PE_SPEC(W).
+%
+% A trained mapping can be reduced further by:   W = PE_EM(W,ALF)
+% The signature of the obtained PE space can be found by PE_SIG(W)
+% The spectrum of
+% can be found by PE_SPEC(W).
+%
 % SEE ALSO

distools/plotspectrum.m

-                      r10
+                      r20
     L = getdata(L,4);
     tit = 'Embedding Spectrum';
   elseif strcmp(getmapping_file(L),'affine')
+  elseif strcmp(getmapping_file(L),'affine') | strcmp(getmapping_file(L),'pe_em')
     try
       L = getdata(L,'eigenvalues');
+      L = getdata(L,'eval');
       tit = 'Eigenvalues';
     catch

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats: