1 | %EMPARZENC EM-algorithm for semi-supervised learning by parzenc
|
---|
2 | %
|
---|
3 | % [W,V] = EMPARZENC(A,B,N,FID)
|
---|
4 | % W = A*EMPARZENC([],B,N,FID)
|
---|
5 | %
|
---|
6 | % INPUT
|
---|
7 | % A Labeled dataset used for training
|
---|
8 | % B Additional unlabeled dataset
|
---|
9 | % N Number of smoothing parameter steps (default 1)
|
---|
10 | % FID File ID to write progress to (default [], see PRPROGRESS)
|
---|
11 | %
|
---|
12 | % OUTPUT
|
---|
13 | % W Trained classifier, based on A and B
|
---|
14 | % V Trained classifier based on A only
|
---|
15 | %
|
---|
16 | % DESCRIPTION
|
---|
17 | % Using the EM algorithm the PARZENC classifier is used iteratively
|
---|
18 | % on the joint dataset [A;B]. In EM each step the labels of A are reset
|
---|
19 | % to their initial values. Initial labels in B are neglected. They
|
---|
20 | % are iteratively updated as soft labels obtained by classifying B
|
---|
21 | % by the actual W. The EM algorithm is run for a fixed smoothing
|
---|
22 | % parameter of PARZENC. This is repeated for smaller smoothing
|
---|
23 | % parameters in N steps, using harmonic interpolation between HL and HU,
|
---|
24 | % in which HL is the smoothing parameter estimate obtained from PARZENML
|
---|
25 | % applied to A and HU the estimate obtained from PARZENML applied to B.
|
---|
26 | % For N = 1, the average of HL and HU is used.
|
---|
27 | %
|
---|
28 | % SEE ALSO
|
---|
29 | % DATASETS, MAPPINGS, EMCLUST, EMC, PARZENC, PARZENML, PRPROGRESS
|
---|
30 |
|
---|
31 | % Copyright: R.P.W. Duin, r.p.w.duin@prtools.org
|
---|
32 | % Faculty EWI, Delft University of Technology
|
---|
33 | % P.O. Box 5031, 2600 GA Delft, The Netherlands
|
---|
34 |
|
---|
35 | function [w,v] = emc(a,b,n,fid)
|
---|
36 | if nargin < 4, fid = []; end
|
---|
37 | if nargin < 3, n = 1; end
|
---|
38 | if nargin < 2, b = []; end
|
---|
39 | if nargin < 1 | isempty(a)
|
---|
40 | w = mapping(mfilename,'untrained',{b,classf,labtype,fid});
|
---|
41 | w = setname(w,'EMParzen CLassifier');
|
---|
42 | return
|
---|
43 | end
|
---|
44 |
|
---|
45 | if size(a,2) ~= size(b,2)
|
---|
46 | error('Datasets should have same number of features')
|
---|
47 | end
|
---|
48 |
|
---|
49 | c = getsize(a,3);
|
---|
50 | epsilon = 1e-6;
|
---|
51 | nlab = getnlab(a);
|
---|
52 | lablist = getlablist(a);
|
---|
53 | a = setlabels(a,nlab);
|
---|
54 | a = setlabtype(a,'soft');
|
---|
55 | %ws = scalem([+a; +b],'variance');
|
---|
56 | ws = unitm;
|
---|
57 | a = a*ws;
|
---|
58 | b = b*ws;
|
---|
59 | lab = zeros(size(b,1),c);
|
---|
60 | hl = parzenml(a);
|
---|
61 | b = dataset(+b);
|
---|
62 | hu = parzenml(b);
|
---|
63 | hl = max(hl,hu) * 1.05;
|
---|
64 | hu = min(hl,hu) * 0.95;
|
---|
65 | if n == 1
|
---|
66 | h = (hl+hu)/2;
|
---|
67 | else
|
---|
68 | dh = (log(hl) - log(hu))/(n-1);
|
---|
69 | h = exp([log(hl):-dh:log(hu)]);
|
---|
70 | end
|
---|
71 | c = a;
|
---|
72 |
|
---|
73 | first = 1;
|
---|
74 | for j=1:length(h)
|
---|
75 | hh = h(j);
|
---|
76 | prprogress(fid,['\nem_classifier optimization, h = ' num2str(hh) '\n'])
|
---|
77 | change = 1;
|
---|
78 | while change > epsilon
|
---|
79 | w = parzenc(c,hh);
|
---|
80 | if first, v = w; first = 0; end
|
---|
81 | d = b*w;
|
---|
82 | labb = d*classc;
|
---|
83 | change = mean(mean((+(labb-lab)).^2));
|
---|
84 | lab = labb;
|
---|
85 | b = setlabtype(b,'soft',lab);
|
---|
86 | c = [a; b];
|
---|
87 | prprogress(fid,' change = %d\n', change)
|
---|
88 | end
|
---|
89 | end
|
---|
90 |
|
---|
91 | J = getlabels(w);
|
---|
92 | w = ws*setlabels(w,lablist(J,:));
|
---|
93 | v = ws*setlabels(v,lablist(J,:));
|
---|
94 | |
---|