source: distools/distools/augpsem.m @ 1

Last change on this file since 1 was 1, checked in by dtax, 15 years ago

Dissimilarity toolbox, start!

File size: 6.6 KB
Line 
1%AUGPSEM Augmented Pseudo-Euclidean Linear Embedding
2%
3% --- THIS IS A TEST VERSION! DO NOT RELY ON IT. ----
4%
5%   [W,SIG,L] = AUGPSEM(D,ALF,P)
6%
7% INPUT
8%   D   NxN symmetric dissimilarity matrix (dataset)
9%   ALF Parameter determining the dimensionality and the mapping (optional, defaulf: Inf)
10%       (0,1)   - fraction of the total (absolute value) preserved variance
11%        Inf    - no dimensionality reduction, keeping all dimensions (it's noisy)
12%       'p'     - projection into a Euclidean space based on positive eigenvalues only
13%       'PARp'  - projection into a Euclidean space based on the PAR fraction of
14%                 positive eigenvalues; e.g. ALF = '0.9p'
15%       'n'     - projection into a Euclidean space based on negative eigenvalues only
16%       'PARn'  - projection into a Euclidean space based on the PAR fraction of
17%                 positive eigenvalues; e.g. ALF = '0.7n'
18%       1 .. N  - number of dimensions <= N
19%   P   Integer between 0 and N specifying which object is mapped at the origin;
20%       0 stands for the mean; (optional, default: 0)
21%
22% OUTPUT
23%   W   Augmented pseudo-Euclidean embedding
24%   SIG Signature of the space
25%   L   List of eigenvalues
26%
27% DEFAULT
28%   ALF = INF
29%   P   = 0
30%
31% DESCRIPTION
32% Linear mapping W onto an M-dimensional pseudo-Euclidean subspace from a symmetric,
33% square dissimilarity matrix D such that the dissimilarities are preserved.
34% M is determined by ALF. E.g., the subspace is found such that at least a fraction
35% ALF of the total variance is preserved for ALF in (0,1). The resulting X is found
36% by D*W.
37% This is an augmented embedding of the standard pseudo-Euclidean embedding (PSEM)
38% such that the dissimilairites are perfectly preserved. The augmentation is by one
39% (Euclidean case) or two (pseudo-Euclidean case) dimensions. This has no effect
40% on the original data D(R,R); the additional dimension(s) are zeros. The effect
41% is apparent when test data D(Te,R) are projected. Due to the projection error,
42% the new dissimilarities are not preserved by PSEM, while they are preserved by
43% the augmented embedding.
44%
45% The parameter SIG describes the signature of the subspace. L is a sorted list
46% of eigenvalues describing the variances in the (pseudo-)Euclidean space.
47%
48% SEE ALSO
49% PSEM, MAPPINGS, DATASETS
50%
51% LITERATURE
52% 1. A. Harol, E.Pekalska, S.Verzakov, R.P.W. Duin, "Augmented embedding of
53% dissimilarity data into (pseudo-)Euclidean spaces", Joint Workshops on S+SSPR
54% Lecture Notes in Computer Science, vol. 4109, 613-621, 2006.
55% 2. L. Goldfarb, A unified approach to pattern recognition, Pattern Recognition,
56% vol.17, 575-582, 1984.
57
58% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
59% Faculty EWI, Delft University of Technology and
60% School of Computer Science, University of Manchester
61
62
63
64function [W,sig,L] = augpsem(d,alf,pzero)
65
66if nargin < 3 | isempty(pzero), pzero = 0; end
67if nargin < 2 | isempty(alf), alf = inf; end
68if nargin < 1 | isempty(d)
69   W = mapping(mfilename,alf,pzero);
70   W = setname(W,'Augmented PE embedding');
71   return
72end
73
74
75
76if (isdataset(d) | isa(d,'double'))
77  if ismapping(alf)
78
79    % APPLY THE MAPPING
80    pars = getdata(alf);
81    [m,n] = size(d);
82    ds    = sum(diag(+d));
83
84    Wp    = pars{1};   % projection onto positive Euclidean subspace
85    Wn    = pars{2};   % projection onto negative Euclidean subspace
86    dme   = pars{3};   % average square distance of the training disssimilarities
87    pzero = pars{4};   % pzero=0 -> the mean of the embedded configuration lies at 0, otherwise the mean lies at pzero
88    tsig  = pars{5};   % true signature of PE embedding
89    sig   = pars{6};   % signature of the augmented PE embedding
90    tol   = pars{7};
91
92    Xp = d*Wp;
93    if tsig(2) > 0,
94      Xn = d*Wn;
95      J  = diag([ones(tsig(1),1); -ones(tsig(2),1)]);
96    else
97      Xn = [];
98      J = diag(ones(tsig(1),1));
99    end
100    XX = [+Xp +Xn];
101
102    % Compute the true PE norm depending on pzero
103    if pzero > 0,
104      Xtnorm   = +d(:,pzero).^2;
105    else
106      Xtnorm   = mean(+d.^2,2) - 0.5*dme;
107    end
108
109    % Compute the PE norm of the projected data
110    Xpnorm = diag(XX*J*XX');
111
112    % Compute the square projection error
113    % In a perfect Euclidean embedding, the true norm should >= the projected norm.
114    perr   = (Xtnorm - Xpnorm);  % negative signs of perr indicate PE space
115
116    % To remove noise, set very small values to zero
117    P = find(abs(perr) < tol);
118    perr(P) = 0;
119
120    % Additional 'positive' dimension is extablish by positive projection error
121    Ip =  find(perr >= 0);
122    Zp = zeros(m,1);
123    Zp(Ip,1) = sqrt(perr(Ip));
124
125    W = Xp;
126    if sig(2) > 0,
127      % Additional 'negative' dimension is extablish by negative projection error
128      In =  find(perr < 0);
129      Zn = zeros(m,1);
130      Zn(In,1) = -sqrt(abs(perr(In)));
131      W = setdat(W, [Xp Zp Xn Zn]);
132      if max(abs([Zp; Zn])) < tol & ds > tol & ~issym(d),
133         prwarning(1,'Augmented dimensions are close to zero. Perform PSEM, instead.');
134      end
135    else
136      W = setdat(W, [Xp Zp]);
137      if max(abs(Zp)) < tol & ds > tol & ~issym(d),
138         prwarning(1,'Augmented dimension is close to zero. Perform PSEM, instead.');
139      end
140    end
141    W.user = sig;
142    return
143  end
144end
145
146
147% TRAIN THE MAPPING
148% Tolerance value used in comparisons
149if mean(+d(:)) < 1,
150  tol = 1e-12;
151else
152  tol = 1e-10;
153end
154
155[n,m] = size(d);
156if ~issym(d,tol),
157  prwarning(1,'Matrix should be symmetric. It is made symmetric by averaging.')
158  d = 0.5*(d+d');
159end
160
161if pzero > n,
162  error('Wrong third parameter.');
163end
164
165[W,tsig,L,Q] = psem(d,alf,pzero);
166Wp  = psem(W,'p');
167Wn  = [];
168sig = tsig;   % this is the signature describing augmented mapping
169I   = tsig > 0;
170sig (I) = sig(I) + 1;
171if tsig(2) > 0,
172  Wn  = psem(W,'n');
173end
174dme = mean(+d(:).^2);
175
176
177%%%%% ------------ THIS IS NOT USED now ------------ %%%%%
178%% Compute the true PE norm depending on pzero
179%if pzero > 0,
180%  Xtnorm = +d(:,pzero).^2;
181%else
182%  Xtnorm = mean(+d.^2,2) - 0.5*dme;
183%end
184%
185%% Compute the PE norm of the projected data
186%% Note that X = Q * diag(sqrt(abs(L))), so
187%% Xpnorm = diag (X*J*X') = diag(Q * diag(L) * Q')
188%Xpnorm = diag(Q * diag(L) * Q');
189%
190%% Compute the square projection error
191%% It is different from zero if ALF is not 0.99999999 or Inf
192%perr   = Xtnorm - Xpnorm;  % negative values indicate PE space
193%if sum(perr) < tol,
194%  perr = zeros(size(d,1),1);
195%end
196%%%%% ---------------------------------------------- %%%%%
197
198W = mapping(mfilename,'trained',{Wp,Wn,dme,pzero,tsig,sig,tol},[],m,sum(sig));
199W = setname(W,'Augmented PE embedding');
200return
Note: See TracBrowser for help on using the repository browser.