[10] | 1 | %PSPCA Pseudo-Euclidean Principal Component Analysis
|
---|
| 2 | %
|
---|
| 3 | % [W,SIG,L] = PSPCA(X,XSIG,ALF)
|
---|
| 4 | %
|
---|
| 5 | % INPUT
|
---|
| 6 | % X NxK data
|
---|
| 7 | % XSIG Signature of the input pseudo-Euclidean space; (default: [K 0])
|
---|
| 8 | % ALF Parameter determining the dimensionality and the mapping (optional, default: Inf)
|
---|
| 9 | % (0,1) - fraction of the total (absolute value) preserved variance
|
---|
| 10 | % Inf - no dimensionality reduction, keeping all dimensions (it's noisy)
|
---|
| 11 | % 'p' - projection into a Euclidean space based on positive eigenvalues only
|
---|
| 12 | % 'PARp' - projection into a Euclidean space based on the PAR fraction of
|
---|
| 13 | % positive eigenvalues; e.g. ALF = '0.9p'
|
---|
| 14 | % 'n' - projection into a Euclidean space based on negative eigenvalues only
|
---|
| 15 | % 'PARn' - projection into a (negative) Euclidean space based on the PAR fraction
|
---|
| 16 | % of negative eigenvalues; e.g. ALF = '0.7n'
|
---|
| 17 | % 'P1pP2n'- projection into a Euclidean space based on the P1 positive eigenvalues
|
---|
| 18 | % and P2 negative eigenvalues; e.g. ALF = '0.7p0.1n', ALF = '7p2n'
|
---|
| 19 | % 1 .. N - number of dimensions in total
|
---|
| 20 | % [P1 P2] - P1 dimensions or preserved fraction of variance in the positive subspace
|
---|
| 21 | % and P2 dimensions or preserved fraction of variance in the negative
|
---|
| 22 | % subspace; e.g. ALF = [5 10], ALF = [0.9 0.1]
|
---|
| 23 | %
|
---|
| 24 | % OUTPUT
|
---|
| 25 | % W PCA mapping in a pseudo-Euclidean space
|
---|
| 26 | % SIG Signature of the output pseudo-Euclidean space
|
---|
| 27 | % L List of eigenvalues
|
---|
| 28 | %
|
---|
| 29 | % DEFAULT
|
---|
| 30 | % XSIG = [K 0]
|
---|
| 31 | % ALF = INF
|
---|
| 32 | %
|
---|
| 33 | % DESCRIPTION
|
---|
| 34 | % PCA mapping W from a K-dimensional pseudo-Euclidean space to an M-dimensional
|
---|
| 35 | % pseudo-Euclidean subspace. M is determined by ALF. The subspace is found, e.g.
|
---|
| 36 | % such that at least a fraction ALF of the total variance is preserved for ALF
|
---|
| 37 | % in (0,1). The resulting Y is found by X*W. The parameter SIG describes the
|
---|
| 38 | % signature of the subspace. L is a sorted list of eigenvalues describing the
|
---|
| 39 | % variances in the (pseudo-)Euclidean space.
|
---|
| 40 | %
|
---|
| 41 | % If X is a labeled dataset, then the averaged covariance matrix is weighted
|
---|
| 42 | % by class priors.
|
---|
| 43 | %
|
---|
| 44 | % Note that a PCA decomposition in a pseudo-Euclidean space is different than in
|
---|
| 45 | % a Euclidean space. Namely, CJ = Q*L*inv(Q), where CJ is a pseudo-Euclidean
|
---|
| 46 | % covariance matrix computed such that CJ= C*J, where C is a Euclidean covariance
|
---|
| 47 | % matrix, J is the fundamental symmetry (taking part in inner products). Q is
|
---|
| 48 | % J-orthogonal, i.e. Q'*J*Q = J, hence inv(Q) = J*Q'*J.
|
---|
| 49 | %
|
---|
| 50 | % SEE ALSO
|
---|
[79] | 51 | % MAPPINGS, DATASETS, PCAM, KPSEM, PSEM
|
---|
[10] | 52 | %
|
---|
| 53 | % LITERATURE
|
---|
| 54 | % 1. E. Pekalska, R.P.W. Duin, The Dissimilarity representation in Pattern Recognition.
|
---|
| 55 | % Foundations and Applications. World Scientific, Singapore, 2005.
|
---|
| 56 | % 2. L. Goldfarb, A unified approach to pattern recognition, Pattern Recognition, vol.17, 575-582, 1984.
|
---|
| 57 | %
|
---|
| 58 |
|
---|
| 59 | % Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
|
---|
| 60 | % Faculty EWI, Delft University of Technology and
|
---|
| 61 | % School of Computer Science, University of Manchester
|
---|
| 62 |
|
---|
| 63 |
|
---|
| 64 |
|
---|
| 65 | function [W,outsig,L,Q] = pspca(a,sig,alf,prec)
|
---|
| 66 |
|
---|
| 67 | if nargin < 4 | isempty(prec), prec = 1e-4; end
|
---|
| 68 | if nargin < 3 | isempty(alf), alf = inf; end
|
---|
| 69 | if nargin < 2 | isempty(sig), sig = [size(a,1) 0]; end
|
---|
| 70 | if nargin < 1 | isempty(a),
|
---|
[79] | 71 | W = prmapping(mfilename,sig,alf,prec);
|
---|
[10] | 72 | W = setname(W,'Pseudo-Euclidean PCA');
|
---|
| 73 | return
|
---|
| 74 | end
|
---|
| 75 |
|
---|
| 76 |
|
---|
| 77 | if (isdataset(a) | isa(a,'double')),
|
---|
| 78 |
|
---|
| 79 | if ismapping(sig),
|
---|
| 80 | % APPLY MAPPING: project new data using the trained mapping.
|
---|
| 81 | [m,n] = size(a);
|
---|
| 82 | pars = getdata(sig);
|
---|
| 83 |
|
---|
| 84 | % Parameters
|
---|
| 85 | v = pars{1}; % Mapping that shifts data to the origin
|
---|
| 86 | JQ = pars{2}; % J*Q
|
---|
| 87 | sig = pars{3}; % Signature in the output space
|
---|
| 88 | W = (a*v) * JQ;
|
---|
| 89 | if isdataset(W),
|
---|
| 90 | W.user = sig;
|
---|
| 91 | W.name = updname(W.name);
|
---|
| 92 | end
|
---|
| 93 | return;
|
---|
| 94 | end
|
---|
| 95 | end
|
---|
| 96 |
|
---|
| 97 |
|
---|
| 98 | % TRAIN THE MAPPING
|
---|
| 99 | [m,k] = size(a);
|
---|
| 100 | if m < 2,
|
---|
| 101 | error('At least two objects are expected.');
|
---|
| 102 | end
|
---|
| 103 | if sum(sig) ~= k,
|
---|
| 104 | error('Signature does not fit the data dimensionality.')
|
---|
| 105 | end
|
---|
| 106 | isdset = isdataset(a);
|
---|
| 107 |
|
---|
| 108 |
|
---|
| 109 | % Shift mean of data to the origin
|
---|
| 110 | v = scalem(+a);
|
---|
| 111 | aa = a*v;
|
---|
| 112 |
|
---|
| 113 | if ~isdset, % Unlabeled data
|
---|
| 114 | A = +aa;
|
---|
| 115 | else
|
---|
| 116 | c = max(getnlab(aa));
|
---|
| 117 | if c == 0,
|
---|
| 118 | A = +aa;
|
---|
| 119 | else
|
---|
| 120 | p = getprior(a);
|
---|
| 121 | A = [];
|
---|
| 122 | for j = 1:c
|
---|
| 123 | A = [A; +seldat(aa,j)*p(j)];
|
---|
| 124 | end
|
---|
| 125 | end
|
---|
| 126 | end
|
---|
| 127 | G = prcov(A);
|
---|
| 128 | G = 0.5*(G+G'); % Make sure G is symmetric
|
---|
| 129 | if sig(2) > 0,
|
---|
| 130 | J = diag([ones(sig(1),1); -ones(sig(2),1)]);
|
---|
| 131 | G = G*J;
|
---|
| 132 | end
|
---|
| 133 |
|
---|
| 134 | [Q,L] = eig(G);
|
---|
| 135 | Q = real(Q);
|
---|
| 136 | l = diag(real(L));
|
---|
| 137 | [lm,Z] = sort(-abs(l));
|
---|
| 138 | Q = Q(:,Z);
|
---|
| 139 | l = l(Z);
|
---|
| 140 |
|
---|
| 141 | [I,outsig] = seleigs(l,alf,prec); % I is the index of selected eigenvalues
|
---|
| 142 | L = l(I); % Eigenvalues
|
---|
| 143 | Q = Q(:,I); % Eigenvectors
|
---|
| 144 |
|
---|
| 145 |
|
---|
| 146 | if sig(2) > 0,
|
---|
| 147 | % Q is NOT orthogonal, but should be J-orthogonal, i.e. Q'*J*Q = J
|
---|
| 148 | % Normalize Q to be J-orthogonal
|
---|
| 149 | Q = Q*diag(1./sqrt(abs(diag(Q'*J*Q))));
|
---|
| 150 | Q = J*Q;
|
---|
| 151 | end
|
---|
| 152 |
|
---|
| 153 |
|
---|
| 154 | % Determine the mapping
|
---|
[79] | 155 | W = prmapping(mfilename,'trained',{v,Q,outsig,sig},[],k,sum(outsig));
|
---|
[10] | 156 | W = setname(W,'Pseudo-Euclidean PCA');
|
---|
| 157 | return
|
---|