%GLDBM_TD GLDB using top-down criterion (Fisher separability)
% 
%  W = GLDBM_TD(A)
% 
% INPUT  
%  A    Training dataset
%
% OUTPUT
%  W    best bases mapping
%
% DESCRIPTION 
% Generalised Best Bases mapping is a feature extraction technique for
% spectral datasets with two classes.  This function implements the
% top-down approach starting from a single group with all the wavelengths,
% recursively identifying the best possible split points.  Groups of
% adjacent features (wavelengths) are identified in the input dataset such
% that Fisher criterion is maximized in each group. Output dataset is
% created by projecting wavelengths in each group to the 1D Fisher
% subspace.  The procedure is defined for two class problems, if a
% multi-class dataset is supplied, a cell array of mappings will be
% computed.  Multi-class classifier, based on this routine, is implemented
% as PREXP algorithm ALG_GLDBM
%
% REFERENCE 
% Kumar,Ghosh,Crawford: Classification of Hyperspectral Data using
% Best-bases Feature Extraction Algorithm
%
% SEE ALSO
% DATASETS, MAPPINGS, GLDBM

% Copyright: P. Paclik, pavel@ph.tn.tudelft.nl
% Faculty of Applied Physics, Delft University of Technology
% P.O. Box 5046, 2600 GA Delft, The Netherlands

% $Id: gldbm_td.m,v 1.5 2005/02/10 15:24:07 pavel Exp $

function [w,crit] = gldbm_td(a,arg2)

	% No arguments given: return untrained mapping.

	if (nargin < 1) | (isempty(a))
		w = mapping(mfilename);
		w = setname(w,'Best Bases mapping (top-down)');
		return
	end

	if (~exist('arg2','var'))     % Second argument is not a mapping: training.

		isdataset(a);           % Assert that A is a dataset.

		[m,k,c] = getsize(a); M = classsizes(a);
		
		if c>2
			% for multi-class problem, create all pair-wise extractors
			w={};
			iter=[];
			for i=1:c-1
				for j=i+1:c
					fprintf(1,'(%d-%d',i,j);
					t=seldat(a,[i j]);
					eval(['w{i,j}=' mfilename '(t);']);
					fprintf(1,') ');
				end
			end
			return
		end

		% counter with ten steps
		t=floor(k/10); % usually, only 80% bands is processed
		counts=t*ones(1,10);
		counts(1:mod(m,10))=counts(1:mod(m,10))+1;
		counts=cumsum(counts);
		
		Q=corrcoef(+a);
		[U,G]=meancov(a);

		m=0;              % level
		Nm=size(a,2);
		
		% in top-down approach, each group of bands is defined by its limits (l,u),
		% flag saying if it should be processed further and a criterion value:
		
		l=1;  % lower bounds of group-bands
		u=Nm; % upper bounds of group-bands
		process=1; % should the group be processed?
		I=0;    % criterion values for each group

		% compute crit for each single band group:
%		I=crit_eval(l,u,Q,+U(1,:),+U(2,:),G(:,:,1),G(:,:,2));

		while 1
		    
		    % we'll go over all the groups and process the processable ones:
		    temp=find(process==1);
		    gid=temp(1); % the 1st group in queue to be processed
%		    fprintf(1,' l=%d,u=%d ',l(gid),u(gid));
		    
		    % make possible merges:
		    % pl and pu refer to the possible left subgroup!
		    gc=u(gid)-l(gid)+1; % number of possible left subgroups
		    pl=repmat( l(gid), 1, gc-1 ); % possible beginnings of the left new subgroup
		    pu=pl(1):u(gid)-1;

		    % eval criterium for left subgroups:
		    crit1=crit_eval(pl,pu,Q,+U(1,:),+U(2,:),G(:,:,1),G(:,:,2));
		    
		    % eval criterion for right subgroups:
		    crit2=crit_eval(pu+1,repmat(u(gid),1,gc-1),Q,+U(1,:),+U(2,:),G(:,:,1),G(:,:,2));		    
		    
		    maxval=max([crit1; crit2]);
		    [maxval,ind]=max(maxval);

		    newcrit=[crit1(ind) crit2(ind)];
		    
		    % pu(ind) is the best possible splitting index

		    % decide which of the two new subgroups should be processed further
		    newprocess=[0 0]; % by default: don't process
		    if crit1(ind)>I(gid) & ind>1
			% decompose the left subgroup
			newprocess(1)=1;
		    end
		    if crit2(ind)>I(gid) & gc-ind>1
			% decompose the left subgroup
			newprocess(2)=1;
		    end

		    % update high-level group definition:
		    if gid==1
			% we're splitting the firrst group
			l=[ 1       pu(ind)+1 l(2:end)];
			u=[ pu(ind) u(1)      u(2:end)];
			process=[newprocess process(2:end)];
			I=[newcrit I(2:end)];
		    elseif gid==Nm
			% we're splitting the last group
			l=[ l(1:end-1) l(gid) pu(ind)+1];
			u=[ u(1:end-1) pu(ind) Nm];
			process=[process(1:end-1) newprocess];
			I=[I(1:end-1) newcrit];
		    else
			% we're in the middle
			l=[ l(1:gid-1) l(gid) pu(ind)+1 l(gid+1:end)];
			u=[ u(1:gid-1) pu(ind) u(gid)   u(gid+1:end)];
			process=[process(1:gid-1) newprocess process(gid+1:end)];
			I=[I(1:gid-1) newcrit I(gid+1:end)];			
		    end
		    fprintf(1,'(%d) ', sum(process));
		    
		    if isempty(find(process==1))
			% no candidates for progress left, return what
			% we have at the moment.
			[crit,clf]=crit_eval(l,u,Q,+U(1,:),+U(2,:),G(:,:,1),G(:,:,2));

			% parameters
			pars.clf=clf; pars.l=l; pars.u=u; pars.info_iter=m;
			w = mapping(mfilename,'trained',pars,(1:length(l))',k,length(pars.l));
			w = setname(w,'Best Bases mapping (top-down)');
			
			return
		    end
		end % while
	else % Second argument is a a mapping: testing.

		w = arg2;			
		pars = getdata(w); % Unpack the mapping.
		[m,k] = getsize(a); 

		% create an output dataset
		out=[];
		for i=1:length(pars.clf)
			% get the subset of bands...
			t=+a(:,pars.l(i):pars.u(i));
			% ... and project it to 1D Fisher subspace
			out=[out t*pars.clf(i).w];
		end
	
		w = setdat(a,out);
		w=setfeatlab(w,(1:size(w,2))');
	end

return


function [I,clf]=crit_eval(l,u,Q,mua,mub,sigmaa,sigmab)

	I=zeros(1,length(l));

	clf=[];
	for i=1:length(l)
		
		C=min(min( Q( l(i):u(i), l(i):u(i) ) ));
		
		W=0.5*( sigmaa( l(i):u(i), l(i):u(i) )+sigmab( l(i):u(i), l(i):u(i) ) );
		t=( mua( l(i):u(i) ) - mub( l(i):u(i) ) )';
		B=t*t';
		
		w=pinv(W)*t;
		D=(w'*B*w)/(w'*W*w);
		
		I(i)=C*D;
		
		clf(i).w=w;
	end