Context Navigation

source: distools/pspca.m @ 59

Last change on this file since 59 was 10, checked in by bduin, 14 years ago

File size: 5.0 KB

Line
1	%PSPCA Pseudo-Euclidean Principal Component Analysis
2	%
3	% [W,SIG,L] = PSPCA(X,XSIG,ALF)
4	%
5	% INPUT
6	% X NxK data
7	% XSIG Signature of the input pseudo-Euclidean space; (default: [K 0])
8	% ALF Parameter determining the dimensionality and the mapping (optional, default: Inf)
9	% (0,1) - fraction of the total (absolute value) preserved variance
10	% Inf - no dimensionality reduction, keeping all dimensions (it's noisy)
11	% 'p' - projection into a Euclidean space based on positive eigenvalues only
12	% 'PARp' - projection into a Euclidean space based on the PAR fraction of
13	% positive eigenvalues; e.g. ALF = '0.9p'
14	% 'n' - projection into a Euclidean space based on negative eigenvalues only
15	% 'PARn' - projection into a (negative) Euclidean space based on the PAR fraction
16	% of negative eigenvalues; e.g. ALF = '0.7n'
17	% 'P1pP2n'- projection into a Euclidean space based on the P1 positive eigenvalues
18	% and P2 negative eigenvalues; e.g. ALF = '0.7p0.1n', ALF = '7p2n'
19	% 1 .. N - number of dimensions in total
20	% [P1 P2] - P1 dimensions or preserved fraction of variance in the positive subspace
21	% and P2 dimensions or preserved fraction of variance in the negative
22	% subspace; e.g. ALF = [5 10], ALF = [0.9 0.1]
23	%
24	% OUTPUT
25	% W PCA mapping in a pseudo-Euclidean space
26	% SIG Signature of the output pseudo-Euclidean space
27	% L List of eigenvalues
28	%
29	% DEFAULT
30	% XSIG = [K 0]
31	% ALF = INF
32	%
33	% DESCRIPTION
34	% PCA mapping W from a K-dimensional pseudo-Euclidean space to an M-dimensional
35	% pseudo-Euclidean subspace. M is determined by ALF. The subspace is found, e.g.
36	% such that at least a fraction ALF of the total variance is preserved for ALF
37	% in (0,1). The resulting Y is found by X*W. The parameter SIG describes the
38	% signature of the subspace. L is a sorted list of eigenvalues describing the
39	% variances in the (pseudo-)Euclidean space.
40	%
41	% If X is a labeled dataset, then the averaged covariance matrix is weighted
42	% by class priors.
43	%
44	% Note that a PCA decomposition in a pseudo-Euclidean space is different than in
45	% a Euclidean space. Namely, CJ = QLinv(Q), where CJ is a pseudo-Euclidean
46	% covariance matrix computed such that CJ= C*J, where C is a Euclidean covariance
47	% matrix, J is the fundamental symmetry (taking part in inner products). Q is
48	% J-orthogonal, i.e. Q'JQ = J, hence inv(Q) = JQ'J.
49	%
50	% SEE ALSO
51	% MAPPINGS, DATASETS, PCA, KPSEM, PSEM
52	%
53	% LITERATURE
54	% 1. E. Pekalska, R.P.W. Duin, The Dissimilarity representation in Pattern Recognition.
55	% Foundations and Applications. World Scientific, Singapore, 2005.
56	% 2. L. Goldfarb, A unified approach to pattern recognition, Pattern Recognition, vol.17, 575-582, 1984.
57	%
58
59	% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
60	% Faculty EWI, Delft University of Technology and
61	% School of Computer Science, University of Manchester
62
63
64
65	function [W,outsig,L,Q] = pspca(a,sig,alf,prec)
66
67	if nargin < 4 \| isempty(prec), prec = 1e-4; end
68	if nargin < 3 \| isempty(alf), alf = inf; end
69	if nargin < 2 \| isempty(sig), sig = [size(a,1) 0]; end
70	if nargin < 1 \| isempty(a),
71	W = mapping(mfilename,sig,alf,prec);
72	W = setname(W,'Pseudo-Euclidean PCA');
73	return
74	end
75
76
77	if (isdataset(a) \| isa(a,'double')),
78
79	if ismapping(sig),
80	% APPLY MAPPING: project new data using the trained mapping.
81	[m,n] = size(a);
82	pars = getdata(sig);
83
84	% Parameters
85	v = pars{1}; % Mapping that shifts data to the origin
86	JQ = pars{2}; % J*Q
87	sig = pars{3}; % Signature in the output space
88	W = (av) JQ;
89	if isdataset(W),
90	W.user = sig;
91	W.name = updname(W.name);
92	end
93	return;
94	end
95	end
96
97
98	% TRAIN THE MAPPING
99	[m,k] = size(a);
100	if m < 2,
101	error('At least two objects are expected.');
102	end
103	if sum(sig) ~= k,
104	error('Signature does not fit the data dimensionality.')
105	end
106	isdset = isdataset(a);
107
108
109	% Shift mean of data to the origin
110	v = scalem(+a);
111	aa = a*v;
112
113	if ~isdset, % Unlabeled data
114	A = +aa;
115	else
116	c = max(getnlab(aa));
117	if c == 0,
118	A = +aa;
119	else
120	p = getprior(a);
121	A = [];
122	for j = 1:c
123	A = [A; +seldat(aa,j)*p(j)];
124	end
125	end
126	end
127	G = prcov(A);
128	G = 0.5*(G+G'); % Make sure G is symmetric
129	if sig(2) > 0,
130	J = diag([ones(sig(1),1); -ones(sig(2),1)]);
131	G = G*J;
132	end
133
134	[Q,L] = eig(G);
135	Q = real(Q);
136	l = diag(real(L));
137	[lm,Z] = sort(-abs(l));
138	Q = Q(:,Z);
139	l = l(Z);
140
141	[I,outsig] = seleigs(l,alf,prec); % I is the index of selected eigenvalues
142	L = l(I); % Eigenvalues
143	Q = Q(:,I); % Eigenvectors
144
145
146	if sig(2) > 0,
147	% Q is NOT orthogonal, but should be J-orthogonal, i.e. Q'JQ = J
148	% Normalize Q to be J-orthogonal
149	Q = Qdiag(1./sqrt(abs(diag(Q'J*Q))));
150	Q = J*Q;
151	end
152
153
154	% Determine the mapping
155	W = mapping(mfilename,'trained',{v,Q,outsig,sig},[],k,sum(outsig));
156	W = setname(W,'Pseudo-Euclidean PCA');
157	return

Note: See TracBrowser for help on using the repository browser.

Download in other formats: