1 | %NNERROR Exact expected NN error from a dissimilarity matrix (2) |
---|
2 | % |
---|
3 | % E = NNERROR(D,M) |
---|
4 | % |
---|
5 | % INPUT |
---|
6 | % D NxN dissimilarity dataset |
---|
7 | % M Number of objects to be selected per class (optional) |
---|
8 | % |
---|
9 | % OUTPUT |
---|
10 | % E Expected NN errror |
---|
11 | % |
---|
12 | % DEFAULT |
---|
13 | % M = minimum class size minus 1 |
---|
14 | % |
---|
15 | % DESCRIPTION |
---|
16 | % An exact computation is made of the expected NN error for a random |
---|
17 | % selection of M for training. D should be a dataset containing a labeled |
---|
18 | % square dissimilarity matrix. |
---|
19 | |
---|
20 | % Copyright: R.P.W. Duin, r.p.w.duin@prtools.org |
---|
21 | % Faculty EWI, Delft University of Technology |
---|
22 | % P.O. Box 5031, 2600 GA Delft, The Netherlands |
---|
23 | |
---|
24 | |
---|
25 | function e = nnerror(D,n) |
---|
26 | isdataset(D); |
---|
27 | discheck(D); |
---|
28 | nlab = getnlab(D); |
---|
29 | [m,mm,c] = getsize(D); |
---|
30 | |
---|
31 | % Number of objects per class |
---|
32 | nc = classsizes(D); |
---|
33 | |
---|
34 | % Compute for all training set sizes |
---|
35 | if nargin < 2 |
---|
36 | n = min(nc)-1; |
---|
37 | end |
---|
38 | |
---|
39 | if length(n) > 1 |
---|
40 | e = zeros(1,length(n)); |
---|
41 | for j=1:length(n) |
---|
42 | e(j) = nnerror(D,n(j)); |
---|
43 | end |
---|
44 | else |
---|
45 | if n >= min(nc) |
---|
46 | error('Requested size of the training set is too large.') |
---|
47 | end |
---|
48 | % Call for the given sample size |
---|
49 | [D,I] = sort(D); |
---|
50 | I = reshape(nlab(I),m,m); % Order objects according to their distances |
---|
51 | ee = zeros(1,m); |
---|
52 | |
---|
53 | % Loop over all classes |
---|
54 | for j = 1:c |
---|
55 | % Find probabilities Q that objects of other classes are not selected |
---|
56 | Q = ones(m,m); |
---|
57 | for i = 1:c |
---|
58 | if i~=j |
---|
59 | [p,q] = nnprob(nc(i),n); |
---|
60 | q = [1,q]; |
---|
61 | C = cumsum(I==i)+1; |
---|
62 | Q = Q.*reshape(q(C),m,m); |
---|
63 | end |
---|
64 | end |
---|
65 | |
---|
66 | % Find probabilitues P for objects of this class to be the first |
---|
67 | [p,q] = nnprob(nc(j),n); |
---|
68 | p = [0,p]; |
---|
69 | C = cumsum(I==j)+1; |
---|
70 | P = reshape(p(C),m,m); |
---|
71 | |
---|
72 | % Now estimate the prob EC it is really the NN |
---|
73 | J = find(I==j); |
---|
74 | EC = zeros(m,m); |
---|
75 | EC(J) = P(J).*Q(J); |
---|
76 | |
---|
77 | % Determine its error contribution |
---|
78 | L = find(nlab==j); |
---|
79 | ee(L) = 1-sum(EC(2:m,L))./(1-EC(1,L)); % Correct for the training size |
---|
80 | end |
---|
81 | |
---|
82 | % Average for the final result |
---|
83 | e = abs(mean(ee)); |
---|
84 | end |
---|
85 | |
---|
86 | |
---|
87 | |
---|
88 | %NNPROB Probability of selection as the nearest neighbor |
---|
89 | % |
---|
90 | % [P,Q] = NNPROB(M,K) |
---|
91 | % |
---|
92 | % If K objects are selected out of M, then P(i) is the probability |
---|
93 | % that the i-th object is the nearest neigbor and Q(i) is the probability |
---|
94 | % that this object is not selected. |
---|
95 | |
---|
96 | function [p,q] = nnprob(m,k) |
---|
97 | p = zeros(1,m); |
---|
98 | q = zeros(1,m); |
---|
99 | q(1) = (m-k)/m; |
---|
100 | p(1) = k/m; |
---|
101 | for i=2:(m-k+1) |
---|
102 | q(i) = q(i-1)*(m-k-i+1)/(m-i+1); |
---|
103 | p(i) = q(i-1)*k/(m-i+1); |
---|
104 | end |
---|
105 | |
---|