source: distools/proxxm.m @ 27

Last change on this file since 27 was 10, checked in by bduin, 14 years ago
File size: 9.7 KB
Line 
1%PROXXM Proximity Mapping
2%
3%   W = PROXXM(A,TYPE,P,WEIGHTS)
4%   W = A*PROXXM([],TYPE,P,WEIGHTS)
5%
6% INPUT
7%   A       MxK Dataset
8%   TYPE    Type of the proximity (optional; default: 'distance')
9%   P       Parameter of the proximity (optional; default: 1)
10%   WEIGHTS Weights (optional; default: all 1)
11%
12%   OUTPUT
13%   W       Proximity mapping
14%
15% DESCRIPTION
16% Computation of the KxM proximity mapping (or kernel) defined by the
17% MxK dataset A. Unlabeled objects in A are neglected. If B is an NxK
18% dataset, then D=B*W is the NxM proximity matrix between B and A. The
19% proximities can be defined by the following types:
20%
21%   'POLYNOMIAL'   | 'P':  SIGN(A*B').*(A*B').^P
22%   'HOMOGENEOUS'  | 'H':  SIGN(A*B').*(A*B').^P
23%   'EXP'          | 'E':  EXP(-(||A-B||)/P)
24%   'EXP-SUM'      | 'ES': SUM_Z SIGN(P(Z)) * EXP(-(||A-B||)/P(Z))
25%   'RBF'          | 'R':  EXP(-(||A-B||.^2)/(P*P))
26%   'RBF-SUM'      | 'RS': SUM_Z SIGN(P(Z)) * EXP(-(||A-B||.^2)/(P(Z)^2))
27%   'SIGMOID'      | 'S':  SIGM(A*B'/P)
28%   'DSIGMOID'     | 'DS': SIGM(||A-B||^(2P(1))/P(2))
29%   'DISTANCE'     | 'D':  ||A-B||.^P
30%   'MULTIQUADRIC' | 'MQ': SQRT(||A-B||.^2/P(1) + P(2))
31%   'THIN-PLATE'   | 'TP': ||A-B||.^(2*P(1))/P(2) * LOG(1+||A-B||.^(2*P(1))/P(2))
32%   'N-THIN-PLATE' | 'NTP': -||A-B||.^(2*P(1))/P(2) * LOG(1+||A-B||.^(2*P(1))/P(2))
33%   'MINKOWSKI'    | 'M':  SUM(|A-B|^P).^(1/P)
34%   'CITY-BLOCK'   | 'C':  SUM(|A-B|)
35%   'COSINE'       | 'O':  1 - (A*B')/||A||*||B||
36%   'FOURIER'      | 'F'
37%   'TANH'         | 'T':  TANH(P*(A*B')/LENGTH(A) + P);
38%   'ANOVA'        | 'A':  ANOVA MODEL
39%   'BSPLINE'      | 'B':  BSPLINE MODEL, ORDER P
40%   'ANOVABSPLINE' | 'AB': ANOVA-BSPLINE MODEL, ORDER P
41%   'ANOVASPLINE1' | 'AS1':ANOVA-SPLINE MODEL, ORDER 1
42%   'ANOVASPLINE2' | 'AS2':ANOVA-SPLINE MODEL, ORDER 2
43%   'ANOVASPLINE3' | 'AS3':ANOVA-SPLINE MODEL, ORDER 3
44%
45% In the polynomial case for a non-integer P, the proximity is computed
46% as D = SIGN(S+1).*ABS(S+1).^P, in order to avoid problems with negative
47% inner products S = A*B'. The features of the objects in A and B may be
48% weighted by the weights in the vector WEIGHTS.
49%
50% SEE ALSO
51% PROXM, MAPPINGS, DATASETS
52
53% Copyright: Robert P.W. Duin, r.p.w.duin@prtools.org, Elzbieta Pekalska, ela.pekalska@googlemail.com
54% Faculty EWI, Delft University of Technology and
55% School of Computer Science, University of Manchester
56
57
58
59function W = proxxm(A,type,s,weights)
60
61prtrace(mfilename);
62
63if (nargin < 4)
64  weights = [];
65  prwarning(4,'Weights are not supplied, assuming all 1.');
66end
67
68if (nargin < 3) | (isempty(s))
69  s = [1 1];
70  prwarning(4,'The proximity parameter is not supplied, assuming 1.');
71end
72if length(s) < 2, s(2) = 1; end
73
74if (nargin < 2) | (isempty(type))
75  type = 'd';
76  prwarning(4,'Type of the proximity is not supplied, assuming ''DISTANCE''.');
77end
78
79
80% No data, return an UNTRAINED mapping
81if (nargin < 1) | (isempty(A))
82  W = mapping(mfilename,{type,s,weights});
83  W = setname(W,'Proximity mapping');
84  return;
85end
86
87
88[m,k] = size(A);
89
90if (isstr(type))
91  % Definition of the mapping, just store parameters.
92  all = char('polynomial','p','homogeneous','h','exp','e','exp-sum','es','rbf','r',...
93             'rbf-sum','rs','sigmoid','s','distance','d','thin-plate','tp',...
94             'multiquadric','mq','dsigmoid','ds','minkowski','m','city-block','c',...
95             'cosine','o','uniform','u','anova','a','anovaspline1','as1',...
96             'anovaspline2','as2','anovaspline3','as3','anovabspline',...
97             'ab','bspline','b','spline','fourier','f','tanh','t',...
98             'n-thin-plate','ntp','shiftdist');
99  if (~any(strcmp(cellstr(all),type)))
100    error(['Unknown proximity type: ' type])
101  end
102
103  A     = cdats(A,1);
104  [m,k] = size(A);
105  if isdataset(A)
106    W = mapping(mfilename,'trained',{A,type,s,weights},getlab(A),getfeatsize(A),getobjsize(A));
107  else
108    W = mapping(mfilename,'trained',{A,type,s,weights},[],k,m);
109  end
110  W = setname(W,'Proximity mapping');
111
112
113elseif ismapping(type)
114
115  % Execution of the mapping: compute a proximity matrix
116  % between the input data A and B; output stored in W.
117
118  w = type;
119  [B,type,s,weights] = deal(w.data{1},w.data{2},w.data{3},w.data{4});
120  [kk,n] = size(w);
121  if (k ~= kk)
122    error('Mismatch in sizes between the mapping and its data stored internally.');
123  end
124  if (~isempty(weights))
125    if (length(weights) ~= k),
126      error('Weight vector has a wrong length.');
127    end
128    A = A.*(ones(m,1)*weights(:)');
129    B = B.*(ones(n,1)*weights(:)');
130  end
131  AA = A;
132  A  = +A;
133  B  = +B;
134
135  switch type
136    case {'polynomial','p'}
137      D = A*B' + ones(m,n);
138      if (s ~= round(s(1)))
139        D = sign(D).*abs(D).^s(1);
140      elseif (s(1) ~= 1)
141        D = D.^s(1);
142      end
143
144    case {'homogeneous','h'}
145      D = (A*B');
146      if (s ~= round(s(1)))
147        D = sign(D).*abs(D).^s(1);
148      elseif (s(1) ~= 1)
149        D = D.^s(1);
150      end
151
152    case {'sigmoid','s'}
153      D = (A*B');
154      D = sigm(D/s(1));
155
156    case {'dsigmoid','ds'}
157      D = dist2(B,A).^s(1);
158      D = sigm(D/s(2));
159
160    case {'city-block','c'}
161      D = zeros(m,n);
162      for j=1:n
163        D(:,j) = sum(abs(A - repmat(+B(j,:),m,1)),2);
164      end
165
166    case {'minkowski','m'}
167      D = zeros(m,n);
168      for j=1:n
169        D(:,j) = sum(abs(A - repmat(+B(j,:),m,1)).^s(1),2).^(1/s(1));
170      end
171
172    case {'exp','e'}
173      D = dist2(B,A);
174      D = exp(-sqrt(D)/s(1));
175
176    case {'exp-sum','es'}
177      d = sqrt(dist2(B,A));
178      D = sign(s(1))*exp(-d/s(1));
179      for z=2:length(s)
180        D = D + sign(s(z))*exp(-d/s(z));
181      end
182
183      case {'rbf','r'}
184        D = dist2(B,A);
185        D = exp(-D/(s(1)*s(1)));
186
187      case {'rbf-sum','rs'}
188        d = dist2(B,A);
189        D = sign(s(1))*exp(-d/(s(1)^2));;
190        for z=2:length(s)
191          D = D + sign(s(z))*exp(-d/(s(z)^2));
192        end
193
194      case {'distance','d'}
195        D = dist2(B,A);
196        if s(1) ~= 2
197          D = sqrt(D).^s(1);
198                                end
199
200     case {'shiftdist'} % not properly implemented for test objects
201        D = dist2(B,A);
202        if s(1) ~= 2
203          D = sqrt(D).^s(1);
204        end
205        D = -D;
206                                n = size(D,1);
207        % shift data such that mean lies in the origin
208        H = -repmat(1/n,n,n);
209        H(1:n+1:end) = H(1:n+1:end) + 1;  % H = eye(n) - ones(n,n)/n
210                                size(D)
211                                size(H)
212        D = -0.5 * H * D * H;             % D is now the matrix of inner products
213                               
214      case {'multiquadric','mq'}
215        D = sqrt(dist2(B,A)/s(1) +s(2).^2);
216
217      case {'thin-plate','tp'}
218        d = dist2(B,A).^s(1)/s(2);
219        D = d.*log(1+d);
220
221      case {'n-thin-plate','ntp'}
222        d = dist2(B,A).^s(1)/s(2);
223        D = -d.*log(1+d);
224
225      case {'cosine','o'}
226        D  = (A*B');
227        lA = sqrt(sum(A.*A,2));
228        lB = sqrt(sum(B.*B,2));
229        D  = 1 - D./(lA*lB');
230
231      case {'tanh','t'}
232        D = tanh(s(1)*(A*B')/k + s(1));
233
234      case {'fourier','f'}
235        zz = sin(s(1) + 1/2)*2*ones(k,1);
236        D  = zeros(m,n);
237        for oo=1:m
238          for o=1:n
239            z = zz;
240            I = find(A(oo,:)-B(o,:));
241            z(I) = sin(s(1) + 1/2)*(A(oo,I)-B(o,I))./sin((A(oo,I)-B(o,I))/2);
242            D(oo,o) = prod(z);
243          end
244        end
245
246      case {'spline','anovaspline1','as1'}
247        D = zeros(m,n);
248        for oo=1:m
249          for o=1:n
250            mAB = min(A(oo,:),B(o,:));
251            z   = 1 + A(oo,:).*B(o,:).*(1 + mAB) - ((A(oo,:)+B(o,:))/2).*mAB.^2 + mAB.^3/3;
252            D(oo,o) = prod(z);
253          end
254        end
255
256      case {'anova','a'}
257        D = zeros(m,n);
258        for oo=1:m
259          for o=1:n
260            mAB = min(A(oo,:),B(o,:));
261            z   = 1 + A(oo,:).*B(o,:).*(1 + 0.5*mAB) - mAB.^3/6;
262            D(oo,o) = prod(z);
263          end
264        end
265
266      case {'bspline','b'}
267        D = zeros(m,n);
268        for oo=1:m
269          for o=1:n
270            z = 0;
271            AmB = A(oo,:)- B(o,:);
272            for r = 0:2*(s(1)+1)
273              z = z + (-1)^r*binomial(2*(s(1)+1),r)*(max(0,AmB + s(1)+1 - r)).^(2*s(1) + 1);
274            end
275            D(oo,o) = prod(z);
276          end
277        end
278
279      case {'anovaspline2','as2'}
280        D = zeros(m,n);
281        for oo=1:m
282          for o=1:n
283            mAB = min(A(oo,:),B(o,:));
284            AmB = A(oo,:) - B(o,:);
285            AB  = A(oo,:) .* B(o,:);
286            ApB = A(oo,:) + B(o,:);
287            z   = 1 + AB + AB.^2.*(1 + mAB) - AB.*(ApB).*mAB.^2 + ...
288                  (ApB.^2+2*AB).*mAB.^3/3 - 0.5*ApB.*mAB.^4 + 0.2*mAB.^5;
289            D(oo,o) = prod(z);
290          end
291        end
292
293      case {'anovaspline3','as3'}
294        D = zeros(m,n);
295        for oo=1:m
296          for o=1:n
297            mAB = min(A(oo,:),B(o,:));
298            AmB = A(oo,:)- B(o,:);
299            AB  = A(oo,:).*B(o,:);
300            ApB = A(oo,:)+B(o,:);
301            z   = 1 + AB + AB.^2 + AB.^3.*(1 + mAB) - 1.5 * AB.^2 .*(ApB).*mAB.^2 + ...
302                  AB.*(ApB.^2+AB).*mAB.^3 - 0.25 * (3*ApB.^3- 2*B(oo,:).^2 - ...
303                  2*A(o,:).^2).*mAB.^4 + 0.6 * (ApB.^2+AB).*mAB.^5 - 0.5 * ApB .*mAB.^6+mAB.^7/7;
304            D(oo,o) = prod(z);
305          end
306        end
307
308      case {'anovabspline','ab'}
309        D = zeros(m,n);
310        for oo=1:m
311          for o=1:n
312            z = 0;
313            AmB = A(oo,:)- B(o,:);
314            for r = 0:2*(s(1)+1)
315              z = z + (-1)^r*binomial(2*(s(1)+1),r)*(max(0,BmA + s(1)+1 - r)).^(2*s(1) + 1);
316            end
317          D(oo,o) = prod(1+z);
318        end
319      end
320
321      otherwise
322        error('Unknown proximity type')
323    end
324    W = setdat(AA,D,w);
325
326  else
327      error('Illegal TYPE argument.')
328  end
329return;
330
331
332
333function D = dist2(B,A)
334% Computes square Euclidean distance, avoiding large matrices for a high
335% dimensionality data
336    m = size(A,1);
337    n = size(B,1);
338    D = ones(m,1)*sum(B'.*B',1);
339    D = D + sum(A'.*A',1)'*ones(1,n);
340    D = D - 2 .* (+A)*(+B)';
341    % Clean result.
342    J = find(D<0);
343    D(J) = zeros(size(J));
344return
Note: See TracBrowser for help on using the repository browser.