source: distools/testkd.m @ 10

Last change on this file since 10 was 10, checked in by bduin, 14 years ago
File size: 2.9 KB
Line 
1%TESTKD Test k-NN classifier for dissimilarity data
2%
3%   [E,C] = TESTKD(D,K,PAR)
4%
5% INPUT
6%   D    Dissimilarity dataset. Object labels are assumed to be the
7%        true labels. Feature labels are assumed to be the labels of
8%        the objects they are related to.
9%   K    Desired number of neighbors to take into account, default K = 1.
10%   PAR  'LOO' - leave-one-out option. This should be used if
11%          the objects are related to themselves. If D is not square,
12%          it is assumed that the first sets of objects in columns and
13%          rows match.
14%        'ALL' - use all objects (default).
15%
16% OUTPUT
17%   E    Estimated error
18%   C    Matrix with confidences, size M x N, if D has size M x L and
19%        the labels are given for N classes. Note that for K < 3 these
20%        confidences are derived from the nearest neigbor distances
21%        and that for K >= 3 they are the Bayes estimators of the
22%        neighborhood class probabilities.
23%
24% DESCRIPTION
25% TESTKD is based on just counting errors and does not weight with
26% testobject priors.
27%
28% SEE ALSO
29% DATASETS, KNNDC
30
31% Copyright: R.P.W. Duin, r.duin@ieee.org
32% Faculty EWI, Delft University of Technology
33
34
35function [e,F] = testkd(d,knn,par)
36
37if nargin < 3 | isempty(par), par = 'all'; end
38if nargin < 2 | isempty(knn), knn = 1; end
39
40isdataset(d);
41
42nlab     = getnlab(d);
43lablist  = getlablist(d);
44featlist = getfeat(d);
45[m,k]  = getsize(d);
46p        = getprior(d);
47
48[clab,classlist] = renumlab(featlist);
49%[cl,nc,labl] = renumlab(classlist,lablist);
50%if size(labl,1) > c
51%       error('Object labels do not match representation set.')
52%end
53                   % correct for different classlist - lablist orders
54J = matchlablist(classlist,lablist);
55classlist = lablist;
56clab = J(clab);
57c = max(clab);
58
59if strcmp(upper(par),'LOO')
60        % get rid of leave-one-out problems
61        km = min(k,m);
62        dmax=max(max(+d))*2;
63        d(1:km,1:km) = d(1:km,1:km) + dmax*eye(km);
64elseif ~strcmp(upper(par),'ALL')
65        error(['Unknown option ''' par ''''])
66end
67
68
69% find class frequencies in representation set                 
70% r = zeros(1,c);
71% for j=1:c
72%       r(j) = length(find(clab==j));
73% end
74
75%D = ones(m,c);
76[DD,L] = sort(+d',1);                   % sort distances
77L = clab(L);
78for j = 1:c                                     % find label frequencies
79        F(:,j) = sum(L(1:knn,:)==j,1)';
80end
81K = max(F');
82%for j = 1:c
83%       K = min(K,r(j)); 
84%       J = reshape(find(L==j),r(j),m); % find the distances to the
85%       J = J(K+[0:m-1]*r(j));          % objects of that neighbor
86%       D(:,j) = DD(J)';                        % number for all classes
87%end
88                                % estimate posterior probabilities
89%if knn > 2                                         % use Bayes estimators on frequencies
90        F = (F+1)/(knn+c);
91        %else                                           % use distances
92%  F = sigm(log(sum(D,2)*ones(1,c)./(D+realmin) - 1 + realmin));
93%end
94F = F ./ (sum(F,2)*ones(1,c));
95F = setdata(d,F,classlist);
96%e = F*testc; % goes wrong in case of LOO testing (empty classes)
97labf = F*labeld; 
98e = nlabcmp(labf,getlabels(F))/m;
Note: See TracBrowser for help on using the repository browser.