1 | %PSPCA Pseudo-Euclidean Principal Component Analysis
|
---|
2 | %
|
---|
3 | % [W,SIG,L] = PSPCA(X,XSIG,ALF)
|
---|
4 | %
|
---|
5 | % INPUT
|
---|
6 | % X NxK data
|
---|
7 | % XSIG Signature of the input pseudo-Euclidean space; (default: [K 0])
|
---|
8 | % ALF Parameter determining the dimensionality and the mapping (optional, default: Inf)
|
---|
9 | % (0,1) - fraction of the total (absolute value) preserved variance
|
---|
10 | % Inf - no dimensionality reduction, keeping all dimensions (it's noisy)
|
---|
11 | % 'p' - projection into a Euclidean space based on positive eigenvalues only
|
---|
12 | % 'PARp' - projection into a Euclidean space based on the PAR fraction of
|
---|
13 | % positive eigenvalues; e.g. ALF = '0.9p'
|
---|
14 | % 'n' - projection into a Euclidean space based on negative eigenvalues only
|
---|
15 | % 'PARn' - projection into a (negative) Euclidean space based on the PAR fraction
|
---|
16 | % of negative eigenvalues; e.g. ALF = '0.7n'
|
---|
17 | % 'P1pP2n'- projection into a Euclidean space based on the P1 positive eigenvalues
|
---|
18 | % and P2 negative eigenvalues; e.g. ALF = '0.7p0.1n', ALF = '7p2n'
|
---|
19 | % 1 .. N - number of dimensions in total
|
---|
20 | % [P1 P2] - P1 dimensions or preserved fraction of variance in the positive subspace
|
---|
21 | % and P2 dimensions or preserved fraction of variance in the negative
|
---|
22 | % subspace; e.g. ALF = [5 10], ALF = [0.9 0.1]
|
---|
23 | %
|
---|
24 | % OUTPUT
|
---|
25 | % W PCA mapping in a pseudo-Euclidean space
|
---|
26 | % SIG Signature of the output pseudo-Euclidean space
|
---|
27 | % L List of eigenvalues
|
---|
28 | %
|
---|
29 | % DEFAULT
|
---|
30 | % XSIG = [K 0]
|
---|
31 | % ALF = INF
|
---|
32 | %
|
---|
33 | % DESCRIPTION
|
---|
34 | % PCA mapping W from a K-dimensional pseudo-Euclidean space to an M-dimensional
|
---|
35 | % pseudo-Euclidean subspace. M is determined by ALF. The subspace is found, e.g.
|
---|
36 | % such that at least a fraction ALF of the total variance is preserved for ALF
|
---|
37 | % in (0,1). The resulting Y is found by X*W. The parameter SIG describes the
|
---|
38 | % signature of the subspace. L is a sorted list of eigenvalues describing the
|
---|
39 | % variances in the (pseudo-)Euclidean space.
|
---|
40 | %
|
---|
41 | % If X is a labeled dataset, then the averaged covariance matrix is weighted
|
---|
42 | % by class priors.
|
---|
43 | %
|
---|
44 | % Note that a PCA decomposition in a pseudo-Euclidean space is different than in
|
---|
45 | % a Euclidean space. Namely, CJ = Q*L*inv(Q), where CJ is a pseudo-Euclidean
|
---|
46 | % covariance matrix computed such that CJ= C*J, where C is a Euclidean covariance
|
---|
47 | % matrix, J is the fundamental symmetry (taking part in inner products). Q is
|
---|
48 | % J-orthogonal, i.e. Q'*J*Q = J, hence inv(Q) = J*Q'*J.
|
---|
49 | %
|
---|
50 | % SEE ALSO
|
---|
51 | % MAPPINGS, DATASETS, PCA, KPSEM, PSEM
|
---|
52 | %
|
---|
53 | % LITERATURE
|
---|
54 | % 1. E. Pekalska, R.P.W. Duin, The Dissimilarity representation in Pattern Recognition.
|
---|
55 | % Foundations and Applications. World Scientific, Singapore, 2005.
|
---|
56 | % 2. L. Goldfarb, A unified approach to pattern recognition, Pattern Recognition, vol.17, 575-582, 1984.
|
---|
57 | %
|
---|
58 |
|
---|
59 | % Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
|
---|
60 | % Faculty EWI, Delft University of Technology and
|
---|
61 | % School of Computer Science, University of Manchester
|
---|
62 |
|
---|
63 |
|
---|
64 |
|
---|
65 | function [W,outsig,L,Q] = pspca(a,sig,alf,prec)
|
---|
66 |
|
---|
67 | if nargin < 4 | isempty(prec), prec = 1e-4; end
|
---|
68 | if nargin < 3 | isempty(alf), alf = inf; end
|
---|
69 | if nargin < 2 | isempty(sig), sig = [size(a,1) 0]; end
|
---|
70 | if nargin < 1 | isempty(a),
|
---|
71 | W = mapping(mfilename,sig,alf,prec);
|
---|
72 | W = setname(W,'Pseudo-Euclidean PCA');
|
---|
73 | return
|
---|
74 | end
|
---|
75 |
|
---|
76 |
|
---|
77 | if (isdataset(a) | isa(a,'double')),
|
---|
78 |
|
---|
79 | if ismapping(sig),
|
---|
80 | % APPLY MAPPING: project new data using the trained mapping.
|
---|
81 | [m,n] = size(a);
|
---|
82 | pars = getdata(sig);
|
---|
83 |
|
---|
84 | % Parameters
|
---|
85 | v = pars{1}; % Mapping that shifts data to the origin
|
---|
86 | JQ = pars{2}; % J*Q
|
---|
87 | sig = pars{3}; % Signature in the output space
|
---|
88 | W = (a*v) * JQ;
|
---|
89 | if isdataset(W),
|
---|
90 | W.user = sig;
|
---|
91 | W.name = updname(W.name);
|
---|
92 | end
|
---|
93 | return;
|
---|
94 | end
|
---|
95 | end
|
---|
96 |
|
---|
97 |
|
---|
98 | % TRAIN THE MAPPING
|
---|
99 | [m,k] = size(a);
|
---|
100 | if m < 2,
|
---|
101 | error('At least two objects are expected.');
|
---|
102 | end
|
---|
103 | if sum(sig) ~= k,
|
---|
104 | error('Signature does not fit the data dimensionality.')
|
---|
105 | end
|
---|
106 | isdset = isdataset(a);
|
---|
107 |
|
---|
108 |
|
---|
109 | % Shift mean of data to the origin
|
---|
110 | v = scalem(+a);
|
---|
111 | aa = a*v;
|
---|
112 |
|
---|
113 | if ~isdset, % Unlabeled data
|
---|
114 | A = +aa;
|
---|
115 | else
|
---|
116 | c = max(getnlab(aa));
|
---|
117 | if c == 0,
|
---|
118 | A = +aa;
|
---|
119 | else
|
---|
120 | p = getprior(a);
|
---|
121 | A = [];
|
---|
122 | for j = 1:c
|
---|
123 | A = [A; +seldat(aa,j)*p(j)];
|
---|
124 | end
|
---|
125 | end
|
---|
126 | end
|
---|
127 | G = prcov(A);
|
---|
128 | G = 0.5*(G+G'); % Make sure G is symmetric
|
---|
129 | if sig(2) > 0,
|
---|
130 | J = diag([ones(sig(1),1); -ones(sig(2),1)]);
|
---|
131 | G = G*J;
|
---|
132 | end
|
---|
133 |
|
---|
134 | [Q,L] = eig(G);
|
---|
135 | Q = real(Q);
|
---|
136 | l = diag(real(L));
|
---|
137 | [lm,Z] = sort(-abs(l));
|
---|
138 | Q = Q(:,Z);
|
---|
139 | l = l(Z);
|
---|
140 |
|
---|
141 | [I,outsig] = seleigs(l,alf,prec); % I is the index of selected eigenvalues
|
---|
142 | L = l(I); % Eigenvalues
|
---|
143 | Q = Q(:,I); % Eigenvectors
|
---|
144 |
|
---|
145 |
|
---|
146 | if sig(2) > 0,
|
---|
147 | % Q is NOT orthogonal, but should be J-orthogonal, i.e. Q'*J*Q = J
|
---|
148 | % Normalize Q to be J-orthogonal
|
---|
149 | Q = Q*diag(1./sqrt(abs(diag(Q'*J*Q))));
|
---|
150 | Q = J*Q;
|
---|
151 | end
|
---|
152 |
|
---|
153 |
|
---|
154 | % Determine the mapping
|
---|
155 | W = mapping(mfilename,'trained',{v,Q,outsig,sig},[],k,sum(outsig));
|
---|
156 | W = setname(W,'Pseudo-Euclidean PCA');
|
---|
157 | return
|
---|