1 | %TESTKD Test k-NN classifier for dissimilarity data
|
---|
2 | %
|
---|
3 | % [E,C] = TESTKD(D,K,PAR)
|
---|
4 | %
|
---|
5 | % INPUT
|
---|
6 | % D Dissimilarity dataset. Object labels are assumed to be the
|
---|
7 | % true labels. Feature labels are assumed to be the labels of
|
---|
8 | % the objects they are related to.
|
---|
9 | % K Desired number of neighbors to take into account, default K = 1.
|
---|
10 | % PAR 'LOO' - leave-one-out option. This should be used if
|
---|
11 | % the objects are related to themselves. If D is not square,
|
---|
12 | % it is assumed that the first sets of objects in columns and
|
---|
13 | % rows match.
|
---|
14 | % 'ALL' - use all objects (default).
|
---|
15 | %
|
---|
16 | % OUTPUT
|
---|
17 | % E Estimated error
|
---|
18 | % C Matrix with confidences, size M x N, if D has size M x L and
|
---|
19 | % the labels are given for N classes. Note that for K < 3 these
|
---|
20 | % confidences are derived from the nearest neigbor distances
|
---|
21 | % and that for K >= 3 they are the Bayes estimators of the
|
---|
22 | % neighborhood class probabilities.
|
---|
23 | %
|
---|
24 | % DESCRIPTION
|
---|
25 | % TESTKD is based on just counting errors and does not weight with
|
---|
26 | % testobject priors.
|
---|
27 | %
|
---|
28 | % SEE ALSO
|
---|
29 | % DATASETS, KNNDC
|
---|
30 |
|
---|
31 | % Copyright: R.P.W. Duin, r.duin@ieee.org
|
---|
32 | % Faculty EWI, Delft University of Technology
|
---|
33 |
|
---|
34 |
|
---|
35 | function [e,F] = testkd(d,knn,par)
|
---|
36 |
|
---|
37 | if nargin < 3 | isempty(par), par = 'all'; end
|
---|
38 | if nargin < 2 | isempty(knn), knn = 1; end
|
---|
39 |
|
---|
40 | isdataset(d);
|
---|
41 |
|
---|
42 | nlab = getnlab(d);
|
---|
43 | lablist = getlablist(d);
|
---|
44 | featlist = getfeat(d);
|
---|
45 | [m,k] = getsize(d);
|
---|
46 | p = getprior(d);
|
---|
47 |
|
---|
48 | [clab,classlist] = renumlab(featlist);
|
---|
49 | %[cl,nc,labl] = renumlab(classlist,lablist);
|
---|
50 | %if size(labl,1) > c
|
---|
51 | % error('Object labels do not match representation set.')
|
---|
52 | %end
|
---|
53 | % correct for different classlist - lablist orders
|
---|
54 | J = matchlablist(classlist,lablist);
|
---|
55 | classlist = lablist;
|
---|
56 | clab = J(clab);
|
---|
57 | c = max(clab);
|
---|
58 |
|
---|
59 | if strcmp(upper(par),'LOO')
|
---|
60 | % get rid of leave-one-out problems
|
---|
61 | km = min(k,m);
|
---|
62 | dmax=max(max(+d))*2;
|
---|
63 | d(1:km,1:km) = d(1:km,1:km) + dmax*eye(km);
|
---|
64 | elseif ~strcmp(upper(par),'ALL')
|
---|
65 | error(['Unknown option ''' par ''''])
|
---|
66 | end
|
---|
67 |
|
---|
68 |
|
---|
69 | % find class frequencies in representation set
|
---|
70 | % r = zeros(1,c);
|
---|
71 | % for j=1:c
|
---|
72 | % r(j) = length(find(clab==j));
|
---|
73 | % end
|
---|
74 |
|
---|
75 | %D = ones(m,c);
|
---|
76 | [DD,L] = sort(+d',1); % sort distances
|
---|
77 | L = clab(L);
|
---|
78 | for j = 1:c % find label frequencies
|
---|
79 | F(:,j) = sum(L(1:knn,:)==j,1)';
|
---|
80 | end
|
---|
81 | K = max(F');
|
---|
82 | %for j = 1:c
|
---|
83 | % K = min(K,r(j));
|
---|
84 | % J = reshape(find(L==j),r(j),m); % find the distances to the
|
---|
85 | % J = J(K+[0:m-1]*r(j)); % objects of that neighbor
|
---|
86 | % D(:,j) = DD(J)'; % number for all classes
|
---|
87 | %end
|
---|
88 | % estimate posterior probabilities
|
---|
89 | %if knn > 2 % use Bayes estimators on frequencies
|
---|
90 | F = (F+1)/(knn+c);
|
---|
91 | %else % use distances
|
---|
92 | % F = sigm(log(sum(D,2)*ones(1,c)./(D+realmin) - 1 + realmin));
|
---|
93 | %end
|
---|
94 | F = F ./ (sum(F,2)*ones(1,c));
|
---|
95 | F = setdata(d,F,classlist);
|
---|
96 | %e = F*testc; % goes wrong in case of LOO testing (empty classes)
|
---|
97 | labf = F*labeld;
|
---|
98 | e = nlabcmp(labf,getlabels(F))/m; |
---|