source: distools/knndc.m @ 51

Last change on this file since 51 was 10, checked in by bduin, 14 years ago
File size: 4.1 KB
RevLine 
[10]1%KNNDC K-Nearest Neighbor Classifier for dissimilarity matrices
2%
3%       [W,K,E] = KNNDC(D,K,PAR,EDIT,PAR1,PAR2,PAR3)
4%       [W,K,E] = D*KNNDC([],K,PAR,EDIT,PAR1,PAR2,PAR3)
5%
6% INPUT
7%   D     NxN dissimilarity matrix or dataset
8%       K         Number of nearest neighbors; if [], then K is optimized       
9%       PAR = 'LOO' - (default) compute leave-one-out optimization for K
10%               it is assumed that the first objects in the training set
11%               constitute the representation set.
12%                 'ALL' - include all dissimilarities for optimization of K
13%               (representation set should not be included in training set)
14%   EDIT = 'ORG' editting and condensing is done by EDICON_ORG using
15%                       K = PAR1 and N = PAR2. K is set to 1. This only affects the
16%                       representation set. During testing the reduced representation set
17%                       is used.
18%        = 'DANDK', editting and condensing is done by EDICON using NSETS = PAR1,
19%                   NITERS = PAR2 and NTRIES = PAR3. K is set to 1. This only affects
20%                       the representation set. During testing the reduced representation
21%                       set is used.
22% OUTPUT
23%   W     Classifier
24%   K     Number of nearest neighbors
25%       E         Error on D
26%
27% DESCRIPTION
28% Compute K-Nearest Neigbor classifier for the dissimilarity set D by optimizing K
29% (if the routine is called with K = []), the error on D is returned in E.
30% A test dissimilarity set DTE defined by the same representation set can now be mapped
31% by C = DTE*W:
32%    C        - estimated class confidences
33%    C*LABELD - assigned class labels
34%    C*TESTC  - classification error
35%
36% NOTE
37% NN errors for dissimilarity data can be directly estimated by TESTKD.
38%
39% SEE ALSO
40% DATASETS, MAPPINGS, TESTKD, CROSSVALD, EDICON
41%
42
43% Copyright: R.P.W. Duin, r.p.w.duin@prtools.org
44% and Ela Pekalska, ela.pekalska@googlemail.com
45% Faculty EWI, Delft University of Technology
46% and University of Manchester, UK
47
48
49function [W,knn,e] = knndc(d,knn,par,edit,par1,par2,par3)
50
51if nargin < 7, par3 = []; end
52if nargin < 6, par2 = []; end
53if nargin < 5, par1 = []; end
54if nargin < 4, edit = ''; end
55if nargin < 3 | isempty(par), par = 'loo'; end
56if nargin < 2,  knn = []; end
57
58
59% empty call, to handle d*knnd, or d*knnd([],par)                       
60if nargin < 1 | isempty(d)
61        W = mapping(mfilename,'untrained',knn,par);
62        W = setname(W,'KNND');
63        return
64end
65
66nlab     = getnlab(d);
67lablist  = getlablist(d);
68featlist = getfeat(d);
69[m,k,c]  = getsize(d);
70p        = getprior(d);
71
72%[nlab,lablist,m,k,c,p,featlist] = dataset(d);
73[clab,classlist] = renumlab(featlist);
74[cl,nc] = renumlab(classlist,lablist);
75
76if size(nc,1) > c
77        error('Object labels do not match representation set')
78end
79                   % correct for different classlist - lablist orders
80J = matchlablist(classlist,lablist);
81classlist = lablist;
82clab = J(clab);
83
84if ~ismapping(knn)  % training (find knn)
85                       
86        if strcmp(par,'loo')
87                % get rid of leave-one-out problems
88                km = min(k,m);
89                dmax=max(max(+d))*2;
90                d(1:km,1:km) = d(1:km,1:km) + dmax*eye(km);
91        elseif ~strcmp(par,'all')
92                error(['Unknown option ''' par ''''])
93        end
94
95        switch upper(edit)
96                case 'ORG'
97                        if isempty(par1) & isempty(par2)
98                                JJ = edicon_org(d);
99                        elseif isempty(par2)
100                                JJ = edicon_org(d,par1);
101                        else
102                                JJ = edicon_org(d,par,par2);
103                        end
104                        knn = 1;
105                case 'DANDK'
106                        JJ = edicon(d,par1,par2,par3);
107                        knn = 1;
108                otherwise
109                        JJ = [1:k];
110        end
111        if isempty(knn) % optimize knn
112                [Y,L] = sort(+d,2);
113                L = clab(L);
114                Ymax = zeros(m,k);
115                Yc = zeros(m,k);
116                for j = 1:c
117                        Y = double(L == j);
118                        for n = 2:k
119                                Y(:,n) = Y(:,n-1) + Y(:,n);
120                        end
121                        J = Y > Ymax;
122                        Ymax(J) = Y(J);
123                        Yc(J) = j*ones(size(Yc(J)));
124                end
125                z = sum(Yc == nlab*ones(1,k),1);
126                [e,knn]=max(z);
127                e = 1 - e/m;
128                z = 1 - z/m;
129        elseif nargout == 3
130                e = testkd(d,knn,par);
131        end
132       
133        W = mapping(mfilename,'trained',{knn,JJ},lablist,k,c);
134        W = setname(W,'KNNDC');
135       
136else        % testing for given mapping or knn
137
138        w = knn; % mapping stored in knn
139  wdata  = getdata(w);
140        knn = wdata{1};
141        J = wdata{2};
142  classlist = getlab(w);
143  c = size(w,2);
144  [nn,nf,fl] = renumlab(classlist,lablist);
145        if max(nf) > c
146                error('Representation set labels do not match with classifier')
147        end
148        [e,q] = testkd(d(:,J),knn);
149        W = q;
150       
151end
Note: See TracBrowser for help on using the repository browser.