source: prextra/ecoc.m @ 29

Last change on this file since 29 was 6, checked in by bduin, 14 years ago
File size: 3.5 KB
Line 
1%ECOC Error-correcting output code
2%
3%      W = ECOC(A,BASECL,CM,RULE)
4%
5% INPUT
6%    A       Dataset
7%    BASECL  Untrained base classifier
8%    CM      Coding matrix
9%    RULE    Combination rule
10%
11% OUTPUT
12%    W       ECOC  classifier
13%
14% DESCRIPTION
15% Computation of the classifier using Error-correcting output codes to
16% create a multi-class classifier from a base classifier/dichotomizer.
17% The classes in dataset A are relabeled according to the coding matrix
18% CM (containing +1, -1 or 0). The default coding matrix is a 1-vs-rest
19% coding. For a three-class problem this becomes:
20%     CM = [ 1 -1 -1;
21%           -1  1 -1;
22%           -1 -1  1];
23%
24% For the evaluation of objects, the outputs of the dichotomizers are
25% combined using the combination rule RULE. Currently the following
26% rules are implemented:
27%     'none'     take the maximum output (this only works for 1-vs-all
28%                coding matrices)
29%     'hamming'  standard Hamming distance between the discretized
30%                classifier outcomes and the coding matrix. When a zero
31%                entry in the coding matrix appears, this entry is not
32%                used in the distance computation
33%
34% NOTE: the order of the classes as they are used in the coding matrix
35% is determined by the lablist of A.
36%
37% SEE ALSO
38%    MCLASSC, MAXC
39
40% Copyright: D.M.J. Tax, D.M.J.Tax@prtools.org
41% Faculty EWI, Delft University of Technology
42% P.O. Box 5031, 2600 GA Delft, The Netherlands
43 
44function W = ecoc(a,basecl,CM,rule)
45
46if nargin < 4 | isempty(rule), rule = 'hamming'; end
47if nargin < 3 | isempty(CM), CM = []; end
48if nargin < 2 | isempty(basecl), basecl = fisherc; end
49if nargin < 1 | isempty(a)
50        W = mapping(mfilename,{w,L,rule});
51        W = setname(W,'ECOC');
52        return
53end
54
55if ~ismapping(basecl) | ~istrained(basecl)   %training
56
57        [n,k,c] = getsize(a);
58        nlab = getnlab(a);
59        % define the coding matrix if not given:
60        if isempty(CM)
61                CM = repmat(-1,c,c) + 2*eye(c);
62        else
63                % check if the coding matrix is OK
64                if size(CM,1)~=c
65                        error('Coding matrix does not have correct number of rows.');
66                end
67                if (max(CM(:))>1)|(min(CM(:))<-1)
68                        error('Entries in the coding matrix should be -1 or +1.');
69                end
70        end
71        l = size(CM,2);
72   w = cell(1,l);
73
74        % Train it:
75        for i=1:l
76                % create a new dataset
77                lab = nlab;
78                for j=1:c
79                        lab(lab==j) = CM(j,i);
80                end
81                % should I remove the objects with 0 label?
82                b = setlabels(a,lab);
83                % train the base dichotomizer:
84                w{i} = b*basecl;
85        end
86
87        %and save all useful data:
88        W.CM = CM;
89        W.w = w;
90        W.rule = rule;
91        W = mapping(mfilename,'trained',W,getlablist(a),k,c);
92        W = setname(W,'ECOC');
93
94else                               %testing
95
96        % Extract the data:
97        W = getdata(basecl);
98        m = size(a,1);
99        [c,l] = size(W.CM);
100
101        % Apply all the base classifiers:
102        z = zeros(m,l);
103        for i=1:l
104                tmp = a*W.w{i};
105                id = findfeatlab(tmp,1);
106                z(:,i) = +tmp(:,id);
107        end
108        z = 2*z-1;  %DXD classifier output should be between -1 and +1!!!
109%[W.CM; z]
110        % and apply the combination rule:
111        out = zeros(m,c);
112        switch W.rule
113        case 'none'
114                if size(z,2)~=c
115                        error('You have to combine when you don''t do one-vs-rest.');
116                end
117                out = z;
118        case 'hamming'
119                for i=1:m
120                        zz = sign(z(i,:)); % discrete output
121                        % hamming distance of object i to all code words:
122                        % remove entries for which the coding matrix has a 0:
123                        CM0 = (W.CM~=0);
124                        d = sum(CM0.*abs(repmat(zz,c,1)-W.CM),2);
125%keyboard
126                        [md,Icl] = min(d);
127                        % class Icl won!
128                        out(i,Icl) = 1;
129                end
130        otherwise
131                error('This combination rule is not implemented yet.');
132        end
133
134        % Store the outputs
135        W = setdat(a,out,basecl);
136end
137return
138
139
Note: See TracBrowser for help on using the repository browser.