1 | %KNNDC K-Nearest Neighbor Classifier for dissimilarity matrices |
---|
2 | % |
---|
3 | % [W,K,E] = KNNDC(D,K,PAR,EDIT,PAR1,PAR2,PAR3) |
---|
4 | % [W,K,E] = D*KNNDC([],K,PAR,EDIT,PAR1,PAR2,PAR3) |
---|
5 | % |
---|
6 | % INPUT
|
---|
7 | % D NxN dissimilarity matrix or dataset |
---|
8 | % K Number of nearest neighbors; if [], then K is optimized
|
---|
9 | % PAR = 'LOO' - (default) compute leave-one-out optimization for K |
---|
10 | % it is assumed that the first objects in the training set |
---|
11 | % constitute the representation set. |
---|
12 | % 'ALL' - include all dissimilarities for optimization of K |
---|
13 | % (representation set should not be included in training set) |
---|
14 | % EDIT = 'ORG' editting and condensing is done by EDICON_ORG using |
---|
15 | % K = PAR1 and N = PAR2. K is set to 1. This only affects the |
---|
16 | % representation set. During testing the reduced representation set |
---|
17 | % is used. |
---|
18 | % = 'DANDK', editting and condensing is done by EDICON using NSETS = PAR1, |
---|
19 | % NITERS = PAR2 and NTRIES = PAR3. K is set to 1. This only affects |
---|
20 | % the representation set. During testing the reduced representation |
---|
21 | % set is used.
|
---|
22 | % OUTPUT
|
---|
23 | % W Classifier |
---|
24 | % K Number of nearest neighbors |
---|
25 | % E Error on D |
---|
26 | %
|
---|
27 | % DESCRIPTION
|
---|
28 | % Compute K-Nearest Neigbor classifier for the dissimilarity set D by optimizing K |
---|
29 | % (if the routine is called with K = []), the error on D is returned in E. |
---|
30 | % A test dissimilarity set DTE defined by the same representation set can now be mapped |
---|
31 | % by C = DTE*W: |
---|
32 | % C - estimated class confidences |
---|
33 | % C*LABELD - assigned class labels |
---|
34 | % C*TESTC - classification error |
---|
35 | % |
---|
36 | % NOTE |
---|
37 | % NN errors for dissimilarity data can be directly estimated by TESTKD. |
---|
38 | % |
---|
39 | % SEE ALSO |
---|
40 | % DATASETS, MAPPINGS, TESTKD, CROSSVALD, EDICON |
---|
41 | % |
---|
42 | |
---|
43 | % Copyright: R.P.W. Duin, r.p.w.duin@prtools.org |
---|
44 | % and Ela Pekalska, ela.pekalska@googlemail.com |
---|
45 | % Faculty EWI, Delft University of Technology |
---|
46 | % and University of Manchester, UK |
---|
47 | |
---|
48 | |
---|
49 | function [W,knn,e] = knndc(d,knn,par,edit,par1,par2,par3) |
---|
50 | |
---|
51 | if nargin < 7, par3 = []; end |
---|
52 | if nargin < 6, par2 = []; end |
---|
53 | if nargin < 5, par1 = []; end |
---|
54 | if nargin < 4, edit = ''; end |
---|
55 | if nargin < 3 | isempty(par), par = 'loo'; end |
---|
56 | if nargin < 2, knn = []; end |
---|
57 | |
---|
58 | |
---|
59 | % empty call, to handle d*knnd, or d*knnd([],par) |
---|
60 | if nargin < 1 | isempty(d) |
---|
61 | W = prmapping(mfilename,'untrained',knn,par); |
---|
62 | W = setname(W,'KNND'); |
---|
63 | return |
---|
64 | end |
---|
65 | |
---|
66 | nlab = getnlab(d); |
---|
67 | lablist = getlablist(d); |
---|
68 | featlist = getfeat(d); |
---|
69 | [m,k,c] = getsize(d); |
---|
70 | p = getprior(d); |
---|
71 | |
---|
72 | %[nlab,lablist,m,k,c,p,featlist] = prdataset(d); |
---|
73 | [clab,classlist] = renumlab(featlist); |
---|
74 | [cl,nc] = renumlab(classlist,lablist); |
---|
75 | |
---|
76 | if size(nc,1) > c |
---|
77 | error('Object labels do not match representation set') |
---|
78 | end |
---|
79 | % correct for different classlist - lablist orders |
---|
80 | J = matchlablist(classlist,lablist); |
---|
81 | classlist = lablist; |
---|
82 | clab = J(clab); |
---|
83 | |
---|
84 | if ~ismapping(knn) % training (find knn) |
---|
85 | |
---|
86 | if strcmp(par,'loo') |
---|
87 | % get rid of leave-one-out problems |
---|
88 | km = min(k,m); |
---|
89 | dmax=max(max(+d))*2; |
---|
90 | d(1:km,1:km) = d(1:km,1:km) + dmax*eye(km); |
---|
91 | elseif ~strcmp(par,'all') |
---|
92 | error(['Unknown option ''' par '''']) |
---|
93 | end |
---|
94 | |
---|
95 | switch upper(edit) |
---|
96 | case 'ORG' |
---|
97 | if isempty(par1) & isempty(par2) |
---|
98 | JJ = edicon_org(d); |
---|
99 | elseif isempty(par2) |
---|
100 | JJ = edicon_org(d,par1); |
---|
101 | else |
---|
102 | JJ = edicon_org(d,par,par2); |
---|
103 | end |
---|
104 | knn = 1; |
---|
105 | case 'DANDK' |
---|
106 | JJ = edicon(d,par1,par2,par3); |
---|
107 | knn = 1; |
---|
108 | otherwise |
---|
109 | JJ = [1:k]; |
---|
110 | end |
---|
111 | if isempty(knn) % optimize knn |
---|
112 | [Y,L] = sort(+d,2); |
---|
113 | L = clab(L); |
---|
114 | Ymax = zeros(m,k); |
---|
115 | Yc = zeros(m,k); |
---|
116 | for j = 1:c |
---|
117 | Y = double(L == j); |
---|
118 | for n = 2:k |
---|
119 | Y(:,n) = Y(:,n-1) + Y(:,n); |
---|
120 | end |
---|
121 | J = Y > Ymax; |
---|
122 | Ymax(J) = Y(J); |
---|
123 | Yc(J) = j*ones(size(Yc(J))); |
---|
124 | end |
---|
125 | z = sum(Yc == nlab*ones(1,k),1); |
---|
126 | [e,knn]=max(z); |
---|
127 | e = 1 - e/m; |
---|
128 | z = 1 - z/m; |
---|
129 | elseif nargout == 3 |
---|
130 | e = testkd(d,knn,par); |
---|
131 | end |
---|
132 | |
---|
133 | W = prmapping(mfilename,'trained',{knn,JJ},lablist,k,c); |
---|
134 | W = setname(W,'KNNDC'); |
---|
135 | |
---|
136 | else % testing for given mapping or knn |
---|
137 | |
---|
138 | w = knn; % mapping stored in knn |
---|
139 | wdata = getdata(w); |
---|
140 | knn = wdata{1}; |
---|
141 | J = wdata{2}; |
---|
142 | classlist = getlab(w); |
---|
143 | c = size(w,2); |
---|
144 | [nn,nf,fl] = renumlab(classlist,lablist); |
---|
145 | if max(nf) > c |
---|
146 | error('Representation set labels do not match with classifier') |
---|
147 | end |
---|
148 | [e,q] = testkd(d(:,J),knn); |
---|
149 | W = q; |
---|
150 | |
---|
151 | end |
---|