Context Navigation

source: distools/pspca.m @ 108

Last change on this file since 108 was 79, checked in by bduin, 11 years ago

File size: 5.1 KB

Rev	Line
[10]	1	%PSPCA Pseudo-Euclidean Principal Component Analysis
	2	%
	3	% [W,SIG,L] = PSPCA(X,XSIG,ALF)
	4	%
	5	% INPUT
	6	% X NxK data
	7	% XSIG Signature of the input pseudo-Euclidean space; (default: [K 0])
	8	% ALF Parameter determining the dimensionality and the mapping (optional, default: Inf)
	9	% (0,1) - fraction of the total (absolute value) preserved variance
	10	% Inf - no dimensionality reduction, keeping all dimensions (it's noisy)
	11	% 'p' - projection into a Euclidean space based on positive eigenvalues only
	12	% 'PARp' - projection into a Euclidean space based on the PAR fraction of
	13	% positive eigenvalues; e.g. ALF = '0.9p'
	14	% 'n' - projection into a Euclidean space based on negative eigenvalues only
	15	% 'PARn' - projection into a (negative) Euclidean space based on the PAR fraction
	16	% of negative eigenvalues; e.g. ALF = '0.7n'
	17	% 'P1pP2n'- projection into a Euclidean space based on the P1 positive eigenvalues
	18	% and P2 negative eigenvalues; e.g. ALF = '0.7p0.1n', ALF = '7p2n'
	19	% 1 .. N - number of dimensions in total
	20	% [P1 P2] - P1 dimensions or preserved fraction of variance in the positive subspace
	21	% and P2 dimensions or preserved fraction of variance in the negative
	22	% subspace; e.g. ALF = [5 10], ALF = [0.9 0.1]
	23	%
	24	% OUTPUT
	25	% W PCA mapping in a pseudo-Euclidean space
	26	% SIG Signature of the output pseudo-Euclidean space
	27	% L List of eigenvalues
	28	%
	29	% DEFAULT
	30	% XSIG = [K 0]
	31	% ALF = INF
	32	%
	33	% DESCRIPTION
	34	% PCA mapping W from a K-dimensional pseudo-Euclidean space to an M-dimensional
	35	% pseudo-Euclidean subspace. M is determined by ALF. The subspace is found, e.g.
	36	% such that at least a fraction ALF of the total variance is preserved for ALF
	37	% in (0,1). The resulting Y is found by X*W. The parameter SIG describes the
	38	% signature of the subspace. L is a sorted list of eigenvalues describing the
	39	% variances in the (pseudo-)Euclidean space.
	40	%
	41	% If X is a labeled dataset, then the averaged covariance matrix is weighted
	42	% by class priors.
	43	%
	44	% Note that a PCA decomposition in a pseudo-Euclidean space is different than in
	45	% a Euclidean space. Namely, CJ = QLinv(Q), where CJ is a pseudo-Euclidean
	46	% covariance matrix computed such that CJ= C*J, where C is a Euclidean covariance
	47	% matrix, J is the fundamental symmetry (taking part in inner products). Q is
	48	% J-orthogonal, i.e. Q'JQ = J, hence inv(Q) = JQ'J.
	49	%
	50	% SEE ALSO
[79]	51	% MAPPINGS, DATASETS, PCAM, KPSEM, PSEM
[10]	52	%
	53	% LITERATURE
	54	% 1. E. Pekalska, R.P.W. Duin, The Dissimilarity representation in Pattern Recognition.
	55	% Foundations and Applications. World Scientific, Singapore, 2005.
	56	% 2. L. Goldfarb, A unified approach to pattern recognition, Pattern Recognition, vol.17, 575-582, 1984.
	57	%
	58
	59	% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
	60	% Faculty EWI, Delft University of Technology and
	61	% School of Computer Science, University of Manchester
	62
	63
	64
	65	function [W,outsig,L,Q] = pspca(a,sig,alf,prec)
	66
	67	if nargin < 4 \| isempty(prec), prec = 1e-4; end
	68	if nargin < 3 \| isempty(alf), alf = inf; end
	69	if nargin < 2 \| isempty(sig), sig = [size(a,1) 0]; end
	70	if nargin < 1 \| isempty(a),
[79]	71	W = prmapping(mfilename,sig,alf,prec);
[10]	72	W = setname(W,'Pseudo-Euclidean PCA');
	73	return
	74	end
	75
	76
	77	if (isdataset(a) \| isa(a,'double')),
	78
	79	if ismapping(sig),
	80	% APPLY MAPPING: project new data using the trained mapping.
	81	[m,n] = size(a);
	82	pars = getdata(sig);
	83
	84	% Parameters
	85	v = pars{1}; % Mapping that shifts data to the origin
	86	JQ = pars{2}; % J*Q
	87	sig = pars{3}; % Signature in the output space
	88	W = (av) JQ;
	89	if isdataset(W),
	90	W.user = sig;
	91	W.name = updname(W.name);
	92	end
	93	return;
	94	end
	95	end
	96
	97
	98	% TRAIN THE MAPPING
	99	[m,k] = size(a);
	100	if m < 2,
	101	error('At least two objects are expected.');
	102	end
	103	if sum(sig) ~= k,
	104	error('Signature does not fit the data dimensionality.')
	105	end
	106	isdset = isdataset(a);
	107
	108
	109	% Shift mean of data to the origin
	110	v = scalem(+a);
	111	aa = a*v;
	112
	113	if ~isdset, % Unlabeled data
	114	A = +aa;
	115	else
	116	c = max(getnlab(aa));
	117	if c == 0,
	118	A = +aa;
	119	else
	120	p = getprior(a);
	121	A = [];
	122	for j = 1:c
	123	A = [A; +seldat(aa,j)*p(j)];
	124	end
	125	end
	126	end
	127	G = prcov(A);
	128	G = 0.5*(G+G'); % Make sure G is symmetric
	129	if sig(2) > 0,
	130	J = diag([ones(sig(1),1); -ones(sig(2),1)]);
	131	G = G*J;
	132	end
	133
	134	[Q,L] = eig(G);
	135	Q = real(Q);
	136	l = diag(real(L));
	137	[lm,Z] = sort(-abs(l));
	138	Q = Q(:,Z);
	139	l = l(Z);
	140
	141	[I,outsig] = seleigs(l,alf,prec); % I is the index of selected eigenvalues
	142	L = l(I); % Eigenvalues
	143	Q = Q(:,I); % Eigenvectors
	144
	145
	146	if sig(2) > 0,
	147	% Q is NOT orthogonal, but should be J-orthogonal, i.e. Q'JQ = J
	148	% Normalize Q to be J-orthogonal
	149	Q = Qdiag(1./sqrt(abs(diag(Q'J*Q))));
	150	Q = J*Q;
	151	end
	152
	153
	154	% Determine the mapping
[79]	155	W = prmapping(mfilename,'trained',{v,Q,outsig,sig},[],k,sum(outsig));
[10]	156	W = setname(W,'Pseudo-Euclidean PCA');
	157	return

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: distools/pspca.m @ 108

Download in other formats: