source: prextra/emparzenc.m @ 153

Last change on this file since 153 was 5, checked in by bduin, 14 years ago
File size: 2.7 KB
RevLine 
[5]1%EMPARZENC EM-algorithm for semi-supervised learning by parzenc
2%
3%   [W,V] = EMPARZENC(A,B,N,FID)
4%   W = A*EMPARZENC([],B,N,FID)
5%
6% INPUT
7%   A       Labeled dataset used for training
8%   B       Additional unlabeled dataset
9%   N       Number of smoothing parameter steps (default 1)
10%   FID     File ID to write progress to (default [], see PRPROGRESS)
11%
12% OUTPUT
13%   W      Trained classifier, based on A and B
14%   V      Trained classifier based on A only
15%
16% DESCRIPTION
17% Using the EM algorithm the PARZENC classifier is used iteratively
18% on the joint dataset [A;B]. In EM each step the labels of A are reset
19% to their initial values. Initial labels in B are neglected. They
20% are iteratively updated as soft labels obtained by classifying B
21% by the actual W. The EM algorithm is run for a fixed smoothing
22% parameter of PARZENC. This is repeated for smaller smoothing
23% parameters in N steps, using harmonic interpolation between HL and HU,
24% in which HL is the smoothing parameter estimate obtained from PARZENML
25% applied to A and HU the estimate obtained from PARZENML applied to B.
26% For N = 1, the average of HL and HU is used.
27%
28% SEE ALSO
29% DATASETS, MAPPINGS, EMCLUST, EMC, PARZENC, PARZENML, PRPROGRESS
30
31% Copyright: R.P.W. Duin, r.p.w.duin@prtools.org
32% Faculty EWI, Delft University of Technology
33% P.O. Box 5031, 2600 GA Delft, The Netherlands
34
35function [w,v] = emc(a,b,n,fid)
36        if nargin < 4, fid = []; end
37        if nargin < 3, n = 1; end
38        if nargin < 2, b = []; end
39        if nargin < 1 | isempty(a)
40                w = mapping(mfilename,'untrained',{b,classf,labtype,fid});
41                w = setname(w,'EMParzen CLassifier');
42                return
43        end
44
45        if size(a,2) ~= size(b,2)
46                error('Datasets should have same number of features')
47        end
48       
49        c = getsize(a,3);
50        epsilon = 1e-6;
51        nlab = getnlab(a);
52        lablist = getlablist(a);
53        a = setlabels(a,nlab);
54        a = setlabtype(a,'soft');
55        %ws = scalem([+a; +b],'variance');
56        ws = unitm;
57        a = a*ws;
58        b = b*ws;
59        lab = zeros(size(b,1),c);
60        hl = parzenml(a);
61        b = dataset(+b);
62        hu = parzenml(b);
63        hl = max(hl,hu) * 1.05;
64        hu = min(hl,hu) * 0.95;
65        if n == 1
66                h = (hl+hu)/2;
67        else
68                dh = (log(hl) - log(hu))/(n-1);
69                h = exp([log(hl):-dh:log(hu)]);
70        end
71        c = a;
72       
73        first = 1;
74        for j=1:length(h)
75                hh = h(j);
76                prprogress(fid,['\nem_classifier optimization, h = ' num2str(hh) '\n'])
77                change = 1;
78                while change > epsilon
79                        w = parzenc(c,hh);
80                        if first, v = w; first = 0; end
81                        d = b*w;
82                        labb = d*classc;
83                        change = mean(mean((+(labb-lab)).^2));
84                        lab = labb;
85                        b = setlabtype(b,'soft',lab);
86                        c = [a; b];
87                        prprogress(fid,'  change = %d\n', change)
88                end
89        end
90       
91        J = getlabels(w);
92        w = ws*setlabels(w,lablist(J,:));
93        v = ws*setlabels(v,lablist(J,:));
94       
Note: See TracBrowser for help on using the repository browser.