[10] | 1 | %NNERR Exact expected NN error from a dissimilarity matrix (1)
|
---|
| 2 | %
|
---|
| 3 | % E = NNERR(D,M)
|
---|
| 4 | %
|
---|
| 5 | % INPUT
|
---|
| 6 | % D NxN dissimilarity dataset
|
---|
| 7 | % M Vector with desired umber of objects to be selected
|
---|
| 8 | %
|
---|
| 9 | % OUTPUT
|
---|
| 10 | % E Expected NN errror
|
---|
| 11 | %
|
---|
| 12 | % E = NNERR(D)
|
---|
| 13 | %
|
---|
| 14 | % In this case a set of training set sizes is used to produce
|
---|
| 15 | % a full learning curve. E can be plotted by PLOTE.
|
---|
| 16 | %
|
---|
| 17 | % DESCRIPTION
|
---|
| 18 | % An exact computation is made of the expected NN error for a random
|
---|
| 19 | % selection of M objects for training. D should be a dataset containing
|
---|
| 20 | % a labeled square dissimilarity matrix.
|
---|
| 21 | %
|
---|
| 22 |
|
---|
| 23 | % Copyright: R.P.W. Duin, r.duin@ieee.org
|
---|
| 24 | % and Elzbieta Pekalska, ela.pekalska@googlemail.com
|
---|
| 25 | % Faculty EWI, Delft University of Technology and
|
---|
| 26 | % School of Computer Science, University of Manchester
|
---|
| 27 |
|
---|
| 28 | function e = nnerr(d,n)
|
---|
| 29 |
|
---|
| 30 | if nargin < 2, n = []; end
|
---|
| 31 | m = size(d,2);
|
---|
| 32 | if nargin == 2 & any(n >= m)
|
---|
| 33 | error('Training set sizes should be smaller than sample size')
|
---|
| 34 | end
|
---|
| 35 |
|
---|
| 36 | if isempty(n)
|
---|
| 37 | % find full curve, but gain some speed
|
---|
| 38 | L = [1:20 22:2:40 45:5:60 70:10:100 120:20:300 350:50:1000 1100:100:10000];
|
---|
| 39 | L = [L(find(L<m-1)) m-1];
|
---|
| 40 | f = zeros(1,m-1);
|
---|
| 41 | prwaitbar(max(L),'Compute Learning Curve')
|
---|
| 42 | for i=1:length(L)
|
---|
| 43 | prwaitbar(max(L),L(i));
|
---|
| 44 | f(L(i)) = feval(mfilename,d,L(i));
|
---|
| 45 | if (i > 1) & (L(i)-L(i-1) > 1)
|
---|
| 46 | for n=L(i-1):L(i)
|
---|
| 47 | f(n) = f(L(i-1)) + (f(L(i))-f(L(i-1)))*(n-L(i-1))/(L(i)-L(i-1));
|
---|
| 48 | end
|
---|
| 49 | end
|
---|
| 50 | end
|
---|
| 51 | prwaitbar(0)
|
---|
| 52 |
|
---|
| 53 | e.error = f;
|
---|
| 54 | e.xvalues = [1:length(e.error)];
|
---|
| 55 | e.title = 'Learning curve 1-NN rule';
|
---|
| 56 | e.xlabel = 'Size training set';
|
---|
| 57 | e.ylabel = 'Expected classification error';
|
---|
| 58 | e.plot = 'semilogx';
|
---|
| 59 |
|
---|
| 60 | elseif length(n) > 1
|
---|
| 61 |
|
---|
| 62 | for i=1:length(n)
|
---|
| 63 | e(i) = feval(mfilename,d,n(i));
|
---|
| 64 | end
|
---|
| 65 |
|
---|
| 66 | else
|
---|
| 67 |
|
---|
| 68 | q = zeros(1,m);
|
---|
| 69 | for k = 1:m-n
|
---|
| 70 | %p(k) = (prod(m-k+1-n+1:m-k+1) - prod(m-k-n+1:m-k)) / prod(m-n+1:m);
|
---|
| 71 | q(k) = (exp(gamln(m-k+2)-gamln(m-k+1-n+1)-gamln(m+1)+gamln(m-n+1)) ...
|
---|
| 72 | - exp(gamln(m-k+1)-gamln(m-k-n+1)-gamln(m+1)+gamln(m-n+1)));
|
---|
| 73 | end
|
---|
| 74 | k = m-n+1;
|
---|
| 75 | %p(k) = (prod(m-k+1-n+1:m-k+1) - prod(m-k-n+1:m-k)) / prod(m-n+1:m);
|
---|
| 76 | q(k) = (exp(gamln(m-k+2)-gamln(m-k+1-n+1)-gamln(m+1)+gamln(m-n+1)));
|
---|
| 77 |
|
---|
| 78 | isdataset(d);
|
---|
| 79 | nlab = getnlab(d);
|
---|
| 80 | d = d + diag(repmat(inf,1,m));
|
---|
| 81 | [DD,L] = sort(+d,2); % sort distances
|
---|
| 82 | L = nlab(L);
|
---|
| 83 | R = mean(L ~= repmat(nlab,1,m));
|
---|
| 84 | e = q*R';
|
---|
| 85 |
|
---|
| 86 | end
|
---|
| 87 |
|
---|
| 88 | return
|
---|
| 89 |
|
---|
| 90 | function x = gamln(y)
|
---|
| 91 | if y == 0, x = 1;
|
---|
| 92 | elseif y < 0, x = 1;
|
---|
| 93 | else, x = gammaln(y); end
|
---|
| 94 | return |
---|