source: distools/pspca.m @ 41

Last change on this file since 41 was 10, checked in by bduin, 14 years ago
File size: 5.0 KB
Line 
1%PSPCA Pseudo-Euclidean Principal Component Analysis
2%
3%     [W,SIG,L] = PSPCA(X,XSIG,ALF)
4%
5% INPUT
6%   X     NxK data
7%   XSIG  Signature of the input pseudo-Euclidean space; (default: [K 0])
8%   ALF   Parameter determining the dimensionality and the mapping (optional, default: Inf)
9%         (0,1)   - fraction of the total (absolute value) preserved variance
10%         Inf     - no dimensionality reduction, keeping all dimensions (it's noisy)
11%         'p'     - projection into a Euclidean space based on positive eigenvalues only
12%         'PARp'  - projection into a Euclidean space based on the PAR fraction of
13%                   positive eigenvalues; e.g. ALF = '0.9p'
14%         'n'     - projection into a Euclidean space based on negative eigenvalues only
15%         'PARn'  - projection into a (negative) Euclidean space based on the PAR fraction
16%                   of negative eigenvalues; e.g. ALF = '0.7n'
17%         'P1pP2n'- projection into a Euclidean space based on the P1 positive eigenvalues
18%                   and P2 negative eigenvalues; e.g. ALF = '0.7p0.1n', ALF = '7p2n'
19%         1 .. N  - number of dimensions in total
20%         [P1 P2] - P1 dimensions or preserved fraction of variance in the positive subspace
21%                   and P2 dimensions or preserved fraction of variance in the negative
22%                   subspace; e.g. ALF = [5 10], ALF = [0.9 0.1]
23%
24% OUTPUT
25%   W     PCA mapping in a pseudo-Euclidean space
26%   SIG   Signature of the output pseudo-Euclidean space
27%   L     List of eigenvalues
28%
29% DEFAULT
30%   XSIG = [K 0]
31%   ALF  = INF
32%
33% DESCRIPTION
34% PCA mapping W from a K-dimensional pseudo-Euclidean space to an M-dimensional
35% pseudo-Euclidean subspace. M is determined by ALF. The subspace is found, e.g.
36% such that at least a fraction ALF of the total variance is preserved for ALF
37% in (0,1). The resulting Y is found by X*W. The parameter SIG describes the
38% signature of the subspace. L is a sorted list of eigenvalues describing the
39% variances in the (pseudo-)Euclidean space.
40%
41% If X is a labeled dataset, then the averaged covariance matrix is weighted
42% by class priors.
43%
44% Note that a PCA decomposition in a pseudo-Euclidean space is different than in
45% a Euclidean space. Namely, CJ = Q*L*inv(Q), where CJ is a pseudo-Euclidean
46% covariance matrix computed such that CJ= C*J, where C is a Euclidean covariance
47% matrix, J is the fundamental symmetry (taking part in inner products). Q is
48% J-orthogonal, i.e. Q'*J*Q = J, hence inv(Q) = J*Q'*J.
49%
50% SEE ALSO
51% MAPPINGS, DATASETS, PCA, KPSEM, PSEM
52%
53% LITERATURE
54% 1. E. Pekalska, R.P.W. Duin, The Dissimilarity representation in Pattern Recognition.
55%    Foundations and Applications. World Scientific, Singapore, 2005.
56% 2. L. Goldfarb, A unified approach to pattern recognition, Pattern Recognition, vol.17, 575-582, 1984.
57%
58
59% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
60% Faculty EWI, Delft University of Technology and
61% School of Computer Science, University of Manchester
62
63
64
65function [W,outsig,L,Q] = pspca(a,sig,alf,prec)
66
67if nargin < 4 | isempty(prec), prec = 1e-4; end
68if nargin < 3 | isempty(alf), alf = inf; end
69if nargin < 2 | isempty(sig), sig = [size(a,1) 0]; end
70if nargin < 1 | isempty(a),
71  W = mapping(mfilename,sig,alf,prec);
72  W = setname(W,'Pseudo-Euclidean PCA');
73  return
74end
75
76
77if (isdataset(a) | isa(a,'double')),
78
79  if ismapping(sig),
80    % APPLY MAPPING: project new data using the trained mapping.
81    [m,n] = size(a);
82    pars  = getdata(sig);
83
84    % Parameters
85    v   = pars{1};  % Mapping that shifts data to the origin
86    JQ  = pars{2};  % J*Q
87    sig = pars{3};  % Signature in the output space
88    W = (a*v) * JQ;
89    if isdataset(W),
90      W.user = sig;
91      W.name = updname(W.name);
92    end
93    return;
94  end
95end
96
97
98% TRAIN THE MAPPING
99[m,k] = size(a);
100if m < 2,
101  error('At least two objects are expected.');
102end
103if sum(sig) ~= k,
104  error('Signature does not fit the data dimensionality.')
105end
106isdset = isdataset(a);
107
108
109% Shift mean of data to the origin
110v  = scalem(+a);
111aa = a*v;
112
113if ~isdset,    % Unlabeled data
114  A = +aa;
115else
116  c = max(getnlab(aa));
117  if c == 0,
118    A = +aa;
119  else
120    p = getprior(a);
121    A = [];
122    for j = 1:c
123      A = [A; +seldat(aa,j)*p(j)];
124    end
125  end
126end
127G = prcov(A);
128G = 0.5*(G+G');     % Make sure G is symmetric
129if sig(2) > 0,
130  J = diag([ones(sig(1),1); -ones(sig(2),1)]);
131  G = G*J;
132end
133
134[Q,L]  = eig(G);
135Q      = real(Q);
136l      = diag(real(L));
137[lm,Z] = sort(-abs(l));
138Q      = Q(:,Z);
139l      = l(Z);
140
141[I,outsig] = seleigs(l,alf,prec); % I is the index of selected eigenvalues
142L = l(I);                         % Eigenvalues
143Q = Q(:,I);                       % Eigenvectors
144
145
146if sig(2) > 0,
147  % Q is NOT orthogonal, but should be J-orthogonal, i.e. Q'*J*Q = J
148  % Normalize Q to be J-orthogonal
149  Q = Q*diag(1./sqrt(abs(diag(Q'*J*Q))));
150  Q = J*Q;
151end
152
153
154% Determine the mapping
155W = mapping(mfilename,'trained',{v,Q,outsig,sig},[],k,sum(outsig));
156W = setname(W,'Pseudo-Euclidean PCA');
157return
Note: See TracBrowser for help on using the repository browser.