[10] | 1 | %NNERROR Exact expected NN error from a dissimilarity matrix (2) |
---|
| 2 | % |
---|
| 3 | % E = NNERROR(D,M) |
---|
| 4 | % |
---|
| 5 | % INPUT |
---|
| 6 | % D NxN dissimilarity dataset |
---|
| 7 | % M Number of objects to be selected per class (optional) |
---|
| 8 | % |
---|
| 9 | % OUTPUT |
---|
| 10 | % E Expected NN errror |
---|
| 11 | % |
---|
| 12 | % DEFAULT |
---|
| 13 | % M = minimum class size minus 1 |
---|
| 14 | % |
---|
| 15 | % DESCRIPTION |
---|
| 16 | % An exact computation is made of the expected NN error for a random |
---|
| 17 | % selection of M for training. D should be a dataset containing a labeled |
---|
| 18 | % square dissimilarity matrix. |
---|
| 19 | |
---|
| 20 | % Copyright: R.P.W. Duin, r.p.w.duin@prtools.org |
---|
| 21 | % Faculty EWI, Delft University of Technology |
---|
| 22 | % P.O. Box 5031, 2600 GA Delft, The Netherlands |
---|
| 23 | |
---|
| 24 | |
---|
| 25 | function e = nnerror(D,n) |
---|
| 26 | isdataset(D); |
---|
| 27 | discheck(D); |
---|
| 28 | nlab = getnlab(D); |
---|
| 29 | [m,mm,c] = getsize(D); |
---|
| 30 | |
---|
| 31 | % Number of objects per class |
---|
| 32 | nc = classsizes(D); |
---|
| 33 | |
---|
| 34 | % Compute for all training set sizes |
---|
| 35 | if nargin < 2 |
---|
| 36 | n = min(nc)-1; |
---|
| 37 | end |
---|
| 38 | |
---|
| 39 | if length(n) > 1 |
---|
| 40 | e = zeros(1,length(n)); |
---|
| 41 | for j=1:length(n) |
---|
| 42 | e(j) = nnerror(D,n(j)); |
---|
| 43 | end |
---|
| 44 | else |
---|
| 45 | if n >= min(nc) |
---|
| 46 | error('Requested size of the training set is too large.') |
---|
| 47 | end |
---|
| 48 | % Call for the given sample size |
---|
| 49 | [D,I] = sort(D); |
---|
| 50 | I = reshape(nlab(I),m,m); % Order objects according to their distances |
---|
| 51 | ee = zeros(1,m); |
---|
| 52 | |
---|
| 53 | % Loop over all classes |
---|
| 54 | for j = 1:c |
---|
| 55 | % Find probabilities Q that objects of other classes are not selected |
---|
| 56 | Q = ones(m,m); |
---|
| 57 | for i = 1:c |
---|
| 58 | if i~=j |
---|
| 59 | [p,q] = nnprob(nc(i),n); |
---|
| 60 | q = [1,q]; |
---|
| 61 | C = cumsum(I==i)+1; |
---|
| 62 | Q = Q.*reshape(q(C),m,m); |
---|
| 63 | end |
---|
| 64 | end |
---|
| 65 | |
---|
| 66 | % Find probabilitues P for objects of this class to be the first |
---|
| 67 | [p,q] = nnprob(nc(j),n); |
---|
| 68 | p = [0,p]; |
---|
| 69 | C = cumsum(I==j)+1; |
---|
| 70 | P = reshape(p(C),m,m); |
---|
| 71 | |
---|
| 72 | % Now estimate the prob EC it is really the NN |
---|
| 73 | J = find(I==j); |
---|
| 74 | EC = zeros(m,m); |
---|
| 75 | EC(J) = P(J).*Q(J); |
---|
| 76 | |
---|
| 77 | % Determine its error contribution |
---|
| 78 | L = find(nlab==j); |
---|
| 79 | ee(L) = 1-sum(EC(2:m,L))./(1-EC(1,L)); % Correct for the training size |
---|
| 80 | end |
---|
| 81 | |
---|
| 82 | % Average for the final result |
---|
| 83 | e = abs(mean(ee)); |
---|
| 84 | end |
---|
| 85 | |
---|
| 86 | |
---|
| 87 | |
---|
| 88 | %NNPROB Probability of selection as the nearest neighbor |
---|
| 89 | % |
---|
| 90 | % [P,Q] = NNPROB(M,K) |
---|
| 91 | % |
---|
| 92 | % If K objects are selected out of M, then P(i) is the probability |
---|
| 93 | % that the i-th object is the nearest neigbor and Q(i) is the probability |
---|
| 94 | % that this object is not selected. |
---|
| 95 | |
---|
| 96 | function [p,q] = nnprob(m,k) |
---|
| 97 | p = zeros(1,m); |
---|
| 98 | q = zeros(1,m); |
---|
| 99 | q(1) = (m-k)/m; |
---|
| 100 | p(1) = k/m; |
---|
| 101 | for i=2:(m-k+1) |
---|
| 102 | q(i) = q(i-1)*(m-k-i+1)/(m-i+1); |
---|
| 103 | p(i) = q(i-1)*k/(m-i+1); |
---|
| 104 | end |
---|
| 105 | |
---|