[10] | 1 | %TESTKD Test k-NN classifier for dissimilarity data
|
---|
| 2 | %
|
---|
| 3 | % [E,C] = TESTKD(D,K,PAR)
|
---|
| 4 | %
|
---|
| 5 | % INPUT
|
---|
| 6 | % D Dissimilarity dataset. Object labels are assumed to be the
|
---|
| 7 | % true labels. Feature labels are assumed to be the labels of
|
---|
| 8 | % the objects they are related to.
|
---|
| 9 | % K Desired number of neighbors to take into account, default K = 1.
|
---|
| 10 | % PAR 'LOO' - leave-one-out option. This should be used if
|
---|
| 11 | % the objects are related to themselves. If D is not square,
|
---|
| 12 | % it is assumed that the first sets of objects in columns and
|
---|
| 13 | % rows match.
|
---|
| 14 | % 'ALL' - use all objects (default).
|
---|
| 15 | %
|
---|
| 16 | % OUTPUT
|
---|
| 17 | % E Estimated error
|
---|
| 18 | % C Matrix with confidences, size M x N, if D has size M x L and
|
---|
| 19 | % the labels are given for N classes. Note that for K < 3 these
|
---|
| 20 | % confidences are derived from the nearest neigbor distances
|
---|
| 21 | % and that for K >= 3 they are the Bayes estimators of the
|
---|
| 22 | % neighborhood class probabilities.
|
---|
| 23 | %
|
---|
| 24 | % DESCRIPTION
|
---|
| 25 | % TESTKD is based on just counting errors and does not weight with
|
---|
| 26 | % testobject priors.
|
---|
| 27 | %
|
---|
| 28 | % SEE ALSO
|
---|
| 29 | % DATASETS, KNNDC
|
---|
| 30 |
|
---|
| 31 | % Copyright: R.P.W. Duin, r.duin@ieee.org
|
---|
| 32 | % Faculty EWI, Delft University of Technology
|
---|
| 33 |
|
---|
| 34 |
|
---|
| 35 | function [e,F] = testkd(d,knn,par)
|
---|
| 36 |
|
---|
| 37 | if nargin < 3 | isempty(par), par = 'all'; end
|
---|
| 38 | if nargin < 2 | isempty(knn), knn = 1; end
|
---|
| 39 |
|
---|
| 40 | isdataset(d);
|
---|
| 41 |
|
---|
| 42 | nlab = getnlab(d);
|
---|
| 43 | lablist = getlablist(d);
|
---|
| 44 | featlist = getfeat(d);
|
---|
| 45 | [m,k] = getsize(d);
|
---|
| 46 | p = getprior(d);
|
---|
| 47 |
|
---|
| 48 | [clab,classlist] = renumlab(featlist);
|
---|
| 49 | %[cl,nc,labl] = renumlab(classlist,lablist);
|
---|
| 50 | %if size(labl,1) > c
|
---|
| 51 | % error('Object labels do not match representation set.')
|
---|
| 52 | %end
|
---|
| 53 | % correct for different classlist - lablist orders
|
---|
| 54 | J = matchlablist(classlist,lablist);
|
---|
| 55 | classlist = lablist;
|
---|
| 56 | clab = J(clab);
|
---|
| 57 | c = max(clab);
|
---|
| 58 |
|
---|
| 59 | if strcmp(upper(par),'LOO')
|
---|
| 60 | % get rid of leave-one-out problems
|
---|
| 61 | km = min(k,m);
|
---|
| 62 | dmax=max(max(+d))*2;
|
---|
| 63 | d(1:km,1:km) = d(1:km,1:km) + dmax*eye(km);
|
---|
| 64 | elseif ~strcmp(upper(par),'ALL')
|
---|
| 65 | error(['Unknown option ''' par ''''])
|
---|
| 66 | end
|
---|
| 67 |
|
---|
| 68 |
|
---|
| 69 | % find class frequencies in representation set
|
---|
| 70 | % r = zeros(1,c);
|
---|
| 71 | % for j=1:c
|
---|
| 72 | % r(j) = length(find(clab==j));
|
---|
| 73 | % end
|
---|
| 74 |
|
---|
| 75 | %D = ones(m,c);
|
---|
| 76 | [DD,L] = sort(+d',1); % sort distances
|
---|
| 77 | L = clab(L);
|
---|
| 78 | for j = 1:c % find label frequencies
|
---|
| 79 | F(:,j) = sum(L(1:knn,:)==j,1)';
|
---|
| 80 | end
|
---|
| 81 | K = max(F');
|
---|
| 82 | %for j = 1:c
|
---|
| 83 | % K = min(K,r(j));
|
---|
| 84 | % J = reshape(find(L==j),r(j),m); % find the distances to the
|
---|
| 85 | % J = J(K+[0:m-1]*r(j)); % objects of that neighbor
|
---|
| 86 | % D(:,j) = DD(J)'; % number for all classes
|
---|
| 87 | %end
|
---|
| 88 | % estimate posterior probabilities
|
---|
| 89 | %if knn > 2 % use Bayes estimators on frequencies
|
---|
| 90 | F = (F+1)/(knn+c);
|
---|
| 91 | %else % use distances
|
---|
| 92 | % F = sigm(log(sum(D,2)*ones(1,c)./(D+realmin) - 1 + realmin));
|
---|
| 93 | %end
|
---|
| 94 | F = F ./ (sum(F,2)*ones(1,c));
|
---|
| 95 | F = setdata(d,F,classlist);
|
---|
| 96 | %e = F*testc; % goes wrong in case of LOO testing (empty classes)
|
---|
| 97 | labf = F*labeld;
|
---|
| 98 | e = nlabcmp(labf,getlabels(F))/m; |
---|