[5] | 1 | %EMC EM Classifier using semi-supervised data
|
---|
| 2 | %
|
---|
| 3 | % W = EMC(A,B,CLASSF,LABTYPE,FID)
|
---|
| 4 | % W = A*EMC([],B,CLASSF,LABTYPE,FID)
|
---|
| 5 | %
|
---|
| 6 | % INPUT
|
---|
| 7 | % A Labeled dataset used for training
|
---|
| 8 | % B Additional unlabeled dataset
|
---|
| 9 | % CLASSF Untrained classifier (default QDC)
|
---|
| 10 | % LABTYPE Label type to be used (crisp (default) or soft)
|
---|
| 11 | % FID File ID to write progress to (default [], see PRPROGRESS)
|
---|
| 12 | %
|
---|
| 13 | % OUTPUT
|
---|
| 14 | % W Trained classifier
|
---|
| 15 | %
|
---|
| 16 | % DESCRIPTION
|
---|
| 17 | % Using the EM algorithm the classifier CLASSF is used iteratively
|
---|
| 18 | % on the joint dataset [A;B]. In each step the labels of A are reset
|
---|
| 19 | % to their initial values. Initial labels in B are neglected.
|
---|
| 20 | % Labels of LABTYPE 'soft' are not supported by all classifiers.
|
---|
| 21 | %
|
---|
| 22 | % SEE ALSO
|
---|
| 23 | % DATASETS, MAPPINGS, EMCLUST, PRPROGRESS
|
---|
| 24 |
|
---|
| 25 | % Copyright: R.P.W. Duin, r.p.w.duin@prtools.org
|
---|
| 26 | % Faculty EWI, Delft University of Technology
|
---|
| 27 | % P.O. Box 5031, 2600 GA Delft, The Netherlands
|
---|
| 28 |
|
---|
[100] | 29 | function w = emc(a,b,classf,labtype)
|
---|
[5] | 30 | if nargin < 4 | isempty(labtype), labtype = 'crisp'; end
|
---|
| 31 | if nargin < 3 | isempty(classf), classf = qdc; end
|
---|
| 32 | if nargin < 2, b = []; end
|
---|
| 33 | if nargin < 1 | isempty(a)
|
---|
| 34 | w = mapping(mfilename,'untrained',{b,classf,labtype,fid});
|
---|
| 35 | w = setname(w,'EM CLassifier');
|
---|
| 36 | return
|
---|
| 37 | end
|
---|
| 38 |
|
---|
| 39 | islabtype(a,'crisp','soft');
|
---|
| 40 | isvaldset(a,1,2); % at least 2 object per class, 2 classes
|
---|
| 41 | if isempty(b)
|
---|
| 42 | w = a*classf;
|
---|
| 43 | return
|
---|
| 44 | end
|
---|
| 45 | if size(a,2) ~= size(b,2)
|
---|
| 46 | error('Datasets should have same number of features')
|
---|
[113] | 47 | end
|
---|
[5] | 48 |
|
---|
[113] | 49 | borg = setlabels(b,getnlab(b));
|
---|
[5] | 50 | c = getsize(a,3);
|
---|
| 51 | epsilon = 1e-6;
|
---|
| 52 | change = 1;
|
---|
| 53 | nlab = getnlab(a);
|
---|
| 54 | lablist = getlablist(a);
|
---|
[100] | 55 | p = getprior(a);
|
---|
[5] | 56 | a = setlabels(a,nlab);
|
---|
[100] | 57 | a = setprior(a,p);
|
---|
[5] | 58 | a = setlabtype(a,labtype);
|
---|
| 59 | switch labtype
|
---|
| 60 | case 'crisp'
|
---|
| 61 | lab = zeros(size(b,1),1);
|
---|
| 62 | case 'soft'
|
---|
| 63 | lab = zeros(size(b,1),c);
|
---|
| 64 | end
|
---|
[100] | 65 | b = prdataset(+b);
|
---|
[5] | 66 | w = a*classf;
|
---|
| 67 |
|
---|
[113] | 68 | starttime = clock;
|
---|
| 69 | runtime = 0;
|
---|
| 70 | iter = 0;
|
---|
[5] | 71 | while change > epsilon
|
---|
[113] | 72 | disp(borg*w*testd)
|
---|
| 73 | if runtime > prtime
|
---|
| 74 | prwarning(2,['EM algorithme stopped by PRTIME after ' num2str(iter) ' iterations']);
|
---|
| 75 | break
|
---|
| 76 | end
|
---|
[5] | 77 | d = b*w;
|
---|
| 78 | switch labtype
|
---|
| 79 | case 'crisp'
|
---|
| 80 | labb = d*labeld;
|
---|
| 81 | change = mean(lab ~= labb);
|
---|
| 82 | case 'soft'
|
---|
| 83 | labb = d*classc;
|
---|
| 84 | change = mean(mean((+(labb-lab)).^2));
|
---|
| 85 | otherwise
|
---|
| 86 | error('Wrong LABTYPE given')
|
---|
| 87 | end
|
---|
| 88 | lab = labb;
|
---|
| 89 | b = setlabtype(b,labtype,lab);
|
---|
[109] | 90 | c = [setlabtype(a,labtype); b];
|
---|
[5] | 91 | w = c*classf;
|
---|
[113] | 92 | runtime = etime(clock,starttime);
|
---|
| 93 | iter = iter+1;
|
---|
[5] | 94 | end
|
---|
| 95 |
|
---|
| 96 | J = getlabels(w);
|
---|
| 97 | w = setlabels(w,lablist(J,:));
|
---|
| 98 | |
---|