[1] | 1 | %KNNDC K-Nearest Neighbor Classifier for dissimilarity matrices |
---|
| 2 | % |
---|
| 3 | % [W,K,E] = KNNDC(D,K,PAR,EDIT,PAR1,PAR2,PAR3) |
---|
| 4 | % [W,K,E] = D*KNNDC([],K,PAR,EDIT,PAR1,PAR2,PAR3) |
---|
| 5 | % |
---|
| 6 | % INPUT
|
---|
| 7 | % D NxN dissimilarity matrix or dataset |
---|
| 8 | % K Number of nearest neighbors; if [], then K is optimized
|
---|
| 9 | % PAR = 'LOO' - (default) compute leave-one-out optimization for K |
---|
| 10 | % it is assumed that the first objects in the training set |
---|
| 11 | % constitute the representation set. |
---|
| 12 | % 'ALL' - include all dissimilarities for optimization of K |
---|
| 13 | % (representation set should not be included in training set) |
---|
| 14 | % EDIT = 'ORG' editting and condensing is done by EDICON_ORG using |
---|
| 15 | % K = PAR1 and N = PAR2. K is set to 1. This only affects the |
---|
| 16 | % representation set. During testing the reduced representation set |
---|
| 17 | % is used. |
---|
| 18 | % = 'DANDK', editting and condensing is done by EDICON using NSETS = PAR1, |
---|
| 19 | % NITERS = PAR2 and NTRIES = PAR3. K is set to 1. This only affects |
---|
| 20 | % the representation set. During testing the reduced representation |
---|
| 21 | % set is used.
|
---|
| 22 | % OUTPUT
|
---|
| 23 | % W Classifier |
---|
| 24 | % K Number of nearest neighbors |
---|
| 25 | % E Error on D |
---|
| 26 | %
|
---|
| 27 | % DESCRIPTION
|
---|
| 28 | % Compute K-Nearest Neigbor classifier for the dissimilarity set D by optimizing K |
---|
| 29 | % (if the routine is called with K = []), the error on D is returned in E. |
---|
| 30 | % A test dissimilarity set DTE defined by the same representation set can now be mapped |
---|
| 31 | % by C = DTE*W: |
---|
| 32 | % C - estimated class confidences |
---|
| 33 | % C*LABELD - assigned class labels |
---|
| 34 | % C*TESTC - classification error |
---|
| 35 | % |
---|
| 36 | % NOTE |
---|
| 37 | % NN errors for dissimilarity data can be directly estimated by TESTKD. |
---|
| 38 | % |
---|
| 39 | % SEE ALSO |
---|
| 40 | % DATASETS, MAPPINGS, TESTKD, CROSSVALD, EDICON |
---|
| 41 | % |
---|
| 42 | |
---|
| 43 | % Copyright: R.P.W. Duin, r.p.w.duin@prtools.org |
---|
| 44 | % and Ela Pekalska, ela.pekalska@googlemail.com |
---|
| 45 | % Faculty EWI, Delft University of Technology |
---|
| 46 | % and University of Manchester, UK |
---|
| 47 | |
---|
| 48 | |
---|
| 49 | function [W,knn,e] = knndc(d,knn,par,edit,par1,par2,par3) |
---|
| 50 | |
---|
| 51 | if nargin < 7, par3 = []; end |
---|
| 52 | if nargin < 6, par2 = []; end |
---|
| 53 | if nargin < 5, par1 = []; end |
---|
| 54 | if nargin < 4, edit = ''; end |
---|
| 55 | if nargin < 3 | isempty(par), par = 'loo'; end |
---|
| 56 | if nargin < 2, knn = []; end |
---|
| 57 | |
---|
| 58 | |
---|
| 59 | % empty call, to handle d*knnd, or d*knnd([],par) |
---|
| 60 | if nargin < 1 | isempty(d) |
---|
| 61 | W = mapping(mfilename,'untrained',knn,par); |
---|
| 62 | W = setname(W,'KNND'); |
---|
| 63 | return |
---|
| 64 | end |
---|
| 65 | |
---|
| 66 | nlab = getnlab(d); |
---|
| 67 | lablist = getlablist(d); |
---|
| 68 | featlist = getfeat(d); |
---|
| 69 | [m,k,c] = getsize(d); |
---|
| 70 | p = getprior(d); |
---|
| 71 | |
---|
| 72 | %[nlab,lablist,m,k,c,p,featlist] = dataset(d); |
---|
| 73 | [clab,classlist] = renumlab(featlist); |
---|
| 74 | [cl,nc] = renumlab(classlist,lablist); |
---|
| 75 | |
---|
| 76 | if size(nc,1) > c |
---|
| 77 | error('Object labels do not match representation set') |
---|
| 78 | end |
---|
| 79 | % correct for different classlist - lablist orders |
---|
| 80 | J = matchlablist(classlist,lablist); |
---|
| 81 | classlist = lablist; |
---|
| 82 | clab = J(clab); |
---|
| 83 | |
---|
| 84 | if ~ismapping(knn) % training (find knn) |
---|
| 85 | |
---|
| 86 | if strcmp(par,'loo') |
---|
| 87 | % get rid of leave-one-out problems |
---|
| 88 | km = min(k,m); |
---|
| 89 | dmax=max(max(+d))*2; |
---|
| 90 | d(1:km,1:km) = d(1:km,1:km) + dmax*eye(km); |
---|
| 91 | elseif ~strcmp(par,'all') |
---|
| 92 | error(['Unknown option ''' par '''']) |
---|
| 93 | end |
---|
| 94 | |
---|
| 95 | switch upper(edit) |
---|
| 96 | case 'ORG' |
---|
| 97 | if isempty(par1) & isempty(par2) |
---|
| 98 | JJ = edicon_org(d); |
---|
| 99 | elseif isempty(par2) |
---|
| 100 | JJ = edicon_org(d,par1); |
---|
| 101 | else |
---|
| 102 | JJ = edicon_org(d,par,par2); |
---|
| 103 | end |
---|
| 104 | knn = 1; |
---|
| 105 | case 'DANDK' |
---|
| 106 | JJ = edicon(d,par1,par2,par3); |
---|
| 107 | knn = 1; |
---|
| 108 | otherwise |
---|
| 109 | JJ = [1:k]; |
---|
| 110 | end |
---|
| 111 | if isempty(knn) % optimize knn |
---|
| 112 | [Y,L] = sort(+d,2); |
---|
| 113 | L = clab(L); |
---|
| 114 | Ymax = zeros(m,k); |
---|
| 115 | Yc = zeros(m,k); |
---|
| 116 | for j = 1:c |
---|
| 117 | Y = double(L == j); |
---|
| 118 | for n = 2:k |
---|
| 119 | Y(:,n) = Y(:,n-1) + Y(:,n); |
---|
| 120 | end |
---|
| 121 | J = Y > Ymax; |
---|
| 122 | Ymax(J) = Y(J); |
---|
| 123 | Yc(J) = j*ones(size(Yc(J))); |
---|
| 124 | end |
---|
| 125 | z = sum(Yc == nlab*ones(1,k),1); |
---|
| 126 | [e,knn]=max(z); |
---|
| 127 | e = 1 - e/m; |
---|
| 128 | z = 1 - z/m; |
---|
| 129 | elseif nargout == 3 |
---|
| 130 | e = testkd(d,knn,par); |
---|
| 131 | end |
---|
| 132 | |
---|
| 133 | W = mapping(mfilename,'trained',{knn,JJ},lablist,k,c); |
---|
| 134 | W = setname(W,'KNNDC'); |
---|
| 135 | |
---|
| 136 | else % testing for given mapping or knn |
---|
| 137 | |
---|
| 138 | w = knn; % mapping stored in knn |
---|
| 139 | wdata = getdata(w); |
---|
| 140 | knn = wdata{1}; |
---|
| 141 | J = wdata{2}; |
---|
| 142 | classlist = getlab(w); |
---|
| 143 | c = size(w,2); |
---|
| 144 | [nn,nf,fl] = renumlab(classlist,lablist); |
---|
| 145 | if max(nf) > c |
---|
| 146 | error('Representation set labels do not match with classifier') |
---|
| 147 | end |
---|
| 148 | [e,q] = testkd(d(:,J),knn); |
---|
| 149 | W = q; |
---|
| 150 | |
---|
| 151 | end |
---|