[5] | 1 | %EMPARZENC EM-algorithm for semi-supervised learning by parzenc
|
---|
| 2 | %
|
---|
| 3 | % [W,V] = EMPARZENC(A,B,N,FID)
|
---|
| 4 | % W = A*EMPARZENC([],B,N,FID)
|
---|
| 5 | %
|
---|
| 6 | % INPUT
|
---|
| 7 | % A Labeled dataset used for training
|
---|
| 8 | % B Additional unlabeled dataset
|
---|
| 9 | % N Number of smoothing parameter steps (default 1)
|
---|
| 10 | % FID File ID to write progress to (default [], see PRPROGRESS)
|
---|
| 11 | %
|
---|
| 12 | % OUTPUT
|
---|
| 13 | % W Trained classifier, based on A and B
|
---|
| 14 | % V Trained classifier based on A only
|
---|
| 15 | %
|
---|
| 16 | % DESCRIPTION
|
---|
| 17 | % Using the EM algorithm the PARZENC classifier is used iteratively
|
---|
| 18 | % on the joint dataset [A;B]. In EM each step the labels of A are reset
|
---|
| 19 | % to their initial values. Initial labels in B are neglected. They
|
---|
| 20 | % are iteratively updated as soft labels obtained by classifying B
|
---|
| 21 | % by the actual W. The EM algorithm is run for a fixed smoothing
|
---|
| 22 | % parameter of PARZENC. This is repeated for smaller smoothing
|
---|
| 23 | % parameters in N steps, using harmonic interpolation between HL and HU,
|
---|
| 24 | % in which HL is the smoothing parameter estimate obtained from PARZENML
|
---|
| 25 | % applied to A and HU the estimate obtained from PARZENML applied to B.
|
---|
| 26 | % For N = 1, the average of HL and HU is used.
|
---|
| 27 | %
|
---|
| 28 | % SEE ALSO
|
---|
| 29 | % DATASETS, MAPPINGS, EMCLUST, EMC, PARZENC, PARZENML, PRPROGRESS
|
---|
| 30 |
|
---|
| 31 | % Copyright: R.P.W. Duin, r.p.w.duin@prtools.org
|
---|
| 32 | % Faculty EWI, Delft University of Technology
|
---|
| 33 | % P.O. Box 5031, 2600 GA Delft, The Netherlands
|
---|
| 34 |
|
---|
| 35 | function [w,v] = emc(a,b,n,fid)
|
---|
| 36 | if nargin < 4, fid = []; end
|
---|
| 37 | if nargin < 3, n = 1; end
|
---|
| 38 | if nargin < 2, b = []; end
|
---|
| 39 | if nargin < 1 | isempty(a)
|
---|
| 40 | w = mapping(mfilename,'untrained',{b,classf,labtype,fid});
|
---|
| 41 | w = setname(w,'EMParzen CLassifier');
|
---|
| 42 | return
|
---|
| 43 | end
|
---|
| 44 |
|
---|
| 45 | if size(a,2) ~= size(b,2)
|
---|
| 46 | error('Datasets should have same number of features')
|
---|
| 47 | end
|
---|
| 48 |
|
---|
| 49 | c = getsize(a,3);
|
---|
| 50 | epsilon = 1e-6;
|
---|
| 51 | nlab = getnlab(a);
|
---|
| 52 | lablist = getlablist(a);
|
---|
| 53 | a = setlabels(a,nlab);
|
---|
| 54 | a = setlabtype(a,'soft');
|
---|
| 55 | %ws = scalem([+a; +b],'variance');
|
---|
| 56 | ws = unitm;
|
---|
| 57 | a = a*ws;
|
---|
| 58 | b = b*ws;
|
---|
| 59 | lab = zeros(size(b,1),c);
|
---|
| 60 | hl = parzenml(a);
|
---|
| 61 | b = dataset(+b);
|
---|
| 62 | hu = parzenml(b);
|
---|
| 63 | hl = max(hl,hu) * 1.05;
|
---|
| 64 | hu = min(hl,hu) * 0.95;
|
---|
| 65 | if n == 1
|
---|
| 66 | h = (hl+hu)/2;
|
---|
| 67 | else
|
---|
| 68 | dh = (log(hl) - log(hu))/(n-1);
|
---|
| 69 | h = exp([log(hl):-dh:log(hu)]);
|
---|
| 70 | end
|
---|
| 71 | c = a;
|
---|
| 72 |
|
---|
| 73 | first = 1;
|
---|
| 74 | for j=1:length(h)
|
---|
| 75 | hh = h(j);
|
---|
| 76 | prprogress(fid,['\nem_classifier optimization, h = ' num2str(hh) '\n'])
|
---|
| 77 | change = 1;
|
---|
| 78 | while change > epsilon
|
---|
| 79 | w = parzenc(c,hh);
|
---|
| 80 | if first, v = w; first = 0; end
|
---|
| 81 | d = b*w;
|
---|
| 82 | labb = d*classc;
|
---|
| 83 | change = mean(mean((+(labb-lab)).^2));
|
---|
| 84 | lab = labb;
|
---|
| 85 | b = setlabtype(b,'soft',lab);
|
---|
| 86 | c = [a; b];
|
---|
| 87 | prprogress(fid,' change = %d\n', change)
|
---|
| 88 | end
|
---|
| 89 | end
|
---|
| 90 |
|
---|
| 91 | J = getlabels(w);
|
---|
| 92 | w = ws*setlabels(w,lablist(J,:));
|
---|
| 93 | v = ws*setlabels(v,lablist(J,:));
|
---|
| 94 | |
---|