1 | %PROXXM Proximity Mapping |
---|
2 | % |
---|
3 | % W = PROXXM(A,TYPE,P,WEIGHTS) |
---|
4 | % W = A*PROXXM([],TYPE,P,WEIGHTS) |
---|
5 | % |
---|
6 | % INPUT |
---|
7 | % A MxK Dataset |
---|
8 | % TYPE Type of the proximity (optional; default: 'distance') |
---|
9 | % P Parameter of the proximity (optional; default: 1) |
---|
10 | % WEIGHTS Weights (optional; default: all 1) |
---|
11 | % |
---|
12 | % OUTPUT |
---|
13 | % W Proximity mapping |
---|
14 | % |
---|
15 | % DESCRIPTION |
---|
16 | % Computation of the KxM proximity mapping (or kernel) defined by the |
---|
17 | % MxK dataset A. Unlabeled objects in A are neglected. If B is an NxK |
---|
18 | % dataset, then D=B*W is the NxM proximity matrix between B and A. The |
---|
19 | % proximities can be defined by the following types: |
---|
20 | % |
---|
21 | % 'POLYNOMIAL' | 'P': SIGN(A*B').*(A*B').^P |
---|
22 | % 'HOMOGENEOUS' | 'H': SIGN(A*B').*(A*B').^P |
---|
23 | % 'EXP' | 'E': EXP(-(||A-B||)/P) |
---|
24 | % 'EXP-SUM' | 'ES': SUM_Z SIGN(P(Z)) * EXP(-(||A-B||)/P(Z)) |
---|
25 | % 'RBF' | 'R': EXP(-(||A-B||.^2)/(P*P)) |
---|
26 | % 'RBF-SUM' | 'RS': SUM_Z SIGN(P(Z)) * EXP(-(||A-B||.^2)/(P(Z)^2)) |
---|
27 | % 'SIGMOID' | 'S': SIGM(A*B'/P) |
---|
28 | % 'DSIGMOID' | 'DS': SIGM(||A-B||^(2P(1))/P(2)) |
---|
29 | % 'DISTANCE' | 'D': ||A-B||.^P |
---|
30 | % 'MULTIQUADRIC' | 'MQ': SQRT(||A-B||.^2/P(1) + P(2)) |
---|
31 | % 'THIN-PLATE' | 'TP': ||A-B||.^(2*P(1))/P(2) * LOG(1+||A-B||.^(2*P(1))/P(2)) |
---|
32 | % 'N-THIN-PLATE' | 'NTP': -||A-B||.^(2*P(1))/P(2) * LOG(1+||A-B||.^(2*P(1))/P(2)) |
---|
33 | % 'MINKOWSKI' | 'M': SUM(|A-B|^P).^(1/P) |
---|
34 | % 'CITY-BLOCK' | 'C': SUM(|A-B|) |
---|
35 | % 'COSINE' | 'O': 1 - (A*B')/||A||*||B|| |
---|
36 | % 'FOURIER' | 'F' |
---|
37 | % 'TANH' | 'T': TANH(P*(A*B')/LENGTH(A) + P); |
---|
38 | % 'ANOVA' | 'A': ANOVA MODEL |
---|
39 | % 'BSPLINE' | 'B': BSPLINE MODEL, ORDER P |
---|
40 | % 'ANOVABSPLINE' | 'AB': ANOVA-BSPLINE MODEL, ORDER P |
---|
41 | % 'ANOVASPLINE1' | 'AS1':ANOVA-SPLINE MODEL, ORDER 1 |
---|
42 | % 'ANOVASPLINE2' | 'AS2':ANOVA-SPLINE MODEL, ORDER 2 |
---|
43 | % 'ANOVASPLINE3' | 'AS3':ANOVA-SPLINE MODEL, ORDER 3 |
---|
44 | % |
---|
45 | % In the polynomial case for a non-integer P, the proximity is computed |
---|
46 | % as D = SIGN(S+1).*ABS(S+1).^P, in order to avoid problems with negative |
---|
47 | % inner products S = A*B'. The features of the objects in A and B may be |
---|
48 | % weighted by the weights in the vector WEIGHTS. |
---|
49 | % |
---|
50 | % SEE ALSO |
---|
51 | % PROXM, MAPPINGS, DATASETS |
---|
52 | |
---|
53 | % Copyright: Robert P.W. Duin, r.p.w.duin@prtools.org, Elzbieta Pekalska, ela.pekalska@googlemail.com |
---|
54 | % Faculty EWI, Delft University of Technology and |
---|
55 | % School of Computer Science, University of Manchester |
---|
56 | |
---|
57 | |
---|
58 | |
---|
59 | function W = proxxm(A,type,s,weights) |
---|
60 | |
---|
61 | prtrace(mfilename); |
---|
62 | |
---|
63 | if (nargin < 4) |
---|
64 | weights = []; |
---|
65 | prwarning(4,'Weights are not supplied, assuming all 1.'); |
---|
66 | end |
---|
67 | |
---|
68 | if (nargin < 3) | (isempty(s)) |
---|
69 | s = [1 1]; |
---|
70 | prwarning(4,'The proximity parameter is not supplied, assuming 1.'); |
---|
71 | end |
---|
72 | if length(s) < 2, s(2) = 1; end |
---|
73 | |
---|
74 | if (nargin < 2) | (isempty(type)) |
---|
75 | type = 'd'; |
---|
76 | prwarning(4,'Type of the proximity is not supplied, assuming ''DISTANCE''.'); |
---|
77 | end |
---|
78 | |
---|
79 | |
---|
80 | % No data, return an UNTRAINED mapping |
---|
81 | if (nargin < 1) | (isempty(A)) |
---|
82 | W = mapping(mfilename,{type,s,weights}); |
---|
83 | W = setname(W,'Proximity mapping'); |
---|
84 | return; |
---|
85 | end |
---|
86 | |
---|
87 | |
---|
88 | [m,k] = size(A); |
---|
89 | |
---|
90 | if (isstr(type)) |
---|
91 | % Definition of the mapping, just store parameters. |
---|
92 | all = char('polynomial','p','homogeneous','h','exp','e','exp-sum','es','rbf','r',... |
---|
93 | 'rbf-sum','rs','sigmoid','s','distance','d','thin-plate','tp',... |
---|
94 | 'multiquadric','mq','dsigmoid','ds','minkowski','m','city-block','c',... |
---|
95 | 'cosine','o','uniform','u','anova','a','anovaspline1','as1',... |
---|
96 | 'anovaspline2','as2','anovaspline3','as3','anovabspline',... |
---|
97 | 'ab','bspline','b','spline','fourier','f','tanh','t',... |
---|
98 | 'n-thin-plate','ntp','shiftdist'); |
---|
99 | if (~any(strcmp(cellstr(all),type))) |
---|
100 | error(['Unknown proximity type: ' type]) |
---|
101 | end |
---|
102 | |
---|
103 | A = cdats(A,1); |
---|
104 | [m,k] = size(A); |
---|
105 | if isdataset(A) |
---|
106 | W = mapping(mfilename,'trained',{A,type,s,weights},getlab(A),getfeatsize(A),getobjsize(A)); |
---|
107 | else |
---|
108 | W = mapping(mfilename,'trained',{A,type,s,weights},[],k,m); |
---|
109 | end |
---|
110 | W = setname(W,'Proximity mapping'); |
---|
111 | |
---|
112 | |
---|
113 | elseif ismapping(type) |
---|
114 | |
---|
115 | % Execution of the mapping: compute a proximity matrix |
---|
116 | % between the input data A and B; output stored in W. |
---|
117 | |
---|
118 | w = type; |
---|
119 | [B,type,s,weights] = deal(w.data{1},w.data{2},w.data{3},w.data{4}); |
---|
120 | [kk,n] = size(w); |
---|
121 | if (k ~= kk) |
---|
122 | error('Mismatch in sizes between the mapping and its data stored internally.'); |
---|
123 | end |
---|
124 | if (~isempty(weights)) |
---|
125 | if (length(weights) ~= k), |
---|
126 | error('Weight vector has a wrong length.'); |
---|
127 | end |
---|
128 | A = A.*(ones(m,1)*weights(:)'); |
---|
129 | B = B.*(ones(n,1)*weights(:)'); |
---|
130 | end |
---|
131 | AA = A; |
---|
132 | A = +A; |
---|
133 | B = +B; |
---|
134 | |
---|
135 | switch type |
---|
136 | case {'polynomial','p'} |
---|
137 | D = A*B' + ones(m,n); |
---|
138 | if (s ~= round(s(1))) |
---|
139 | D = sign(D).*abs(D).^s(1); |
---|
140 | elseif (s(1) ~= 1) |
---|
141 | D = D.^s(1); |
---|
142 | end |
---|
143 | |
---|
144 | case {'homogeneous','h'} |
---|
145 | D = (A*B'); |
---|
146 | if (s ~= round(s(1))) |
---|
147 | D = sign(D).*abs(D).^s(1); |
---|
148 | elseif (s(1) ~= 1) |
---|
149 | D = D.^s(1); |
---|
150 | end |
---|
151 | |
---|
152 | case {'sigmoid','s'} |
---|
153 | D = (A*B'); |
---|
154 | D = sigm(D/s(1)); |
---|
155 | |
---|
156 | case {'dsigmoid','ds'} |
---|
157 | D = dist2(B,A).^s(1); |
---|
158 | D = sigm(D/s(2)); |
---|
159 | |
---|
160 | case {'city-block','c'} |
---|
161 | D = zeros(m,n); |
---|
162 | for j=1:n |
---|
163 | D(:,j) = sum(abs(A - repmat(+B(j,:),m,1)),2); |
---|
164 | end |
---|
165 | |
---|
166 | case {'minkowski','m'} |
---|
167 | D = zeros(m,n); |
---|
168 | for j=1:n |
---|
169 | D(:,j) = sum(abs(A - repmat(+B(j,:),m,1)).^s(1),2).^(1/s(1)); |
---|
170 | end |
---|
171 | |
---|
172 | case {'exp','e'} |
---|
173 | D = dist2(B,A); |
---|
174 | D = exp(-sqrt(D)/s(1)); |
---|
175 | |
---|
176 | case {'exp-sum','es'} |
---|
177 | d = sqrt(dist2(B,A)); |
---|
178 | D = sign(s(1))*exp(-d/s(1)); |
---|
179 | for z=2:length(s) |
---|
180 | D = D + sign(s(z))*exp(-d/s(z)); |
---|
181 | end |
---|
182 | |
---|
183 | case {'rbf','r'} |
---|
184 | D = dist2(B,A); |
---|
185 | D = exp(-D/(s(1)*s(1))); |
---|
186 | |
---|
187 | case {'rbf-sum','rs'} |
---|
188 | d = dist2(B,A); |
---|
189 | D = sign(s(1))*exp(-d/(s(1)^2));; |
---|
190 | for z=2:length(s) |
---|
191 | D = D + sign(s(z))*exp(-d/(s(z)^2)); |
---|
192 | end |
---|
193 | |
---|
194 | case {'distance','d'} |
---|
195 | D = dist2(B,A); |
---|
196 | if s(1) ~= 2 |
---|
197 | D = sqrt(D).^s(1); |
---|
198 | end |
---|
199 | |
---|
200 | case {'shiftdist'} % not properly implemented for test objects |
---|
201 | D = dist2(B,A); |
---|
202 | if s(1) ~= 2 |
---|
203 | D = sqrt(D).^s(1); |
---|
204 | end |
---|
205 | D = -D; |
---|
206 | n = size(D,1); |
---|
207 | % shift data such that mean lies in the origin |
---|
208 | H = -repmat(1/n,n,n); |
---|
209 | H(1:n+1:end) = H(1:n+1:end) + 1; % H = eye(n) - ones(n,n)/n |
---|
210 | size(D) |
---|
211 | size(H) |
---|
212 | D = -0.5 * H * D * H; % D is now the matrix of inner products |
---|
213 | |
---|
214 | case {'multiquadric','mq'} |
---|
215 | D = sqrt(dist2(B,A)/s(1) +s(2).^2); |
---|
216 | |
---|
217 | case {'thin-plate','tp'} |
---|
218 | d = dist2(B,A).^s(1)/s(2); |
---|
219 | D = d.*log(1+d); |
---|
220 | |
---|
221 | case {'n-thin-plate','ntp'} |
---|
222 | d = dist2(B,A).^s(1)/s(2); |
---|
223 | D = -d.*log(1+d); |
---|
224 | |
---|
225 | case {'cosine','o'} |
---|
226 | D = (A*B'); |
---|
227 | lA = sqrt(sum(A.*A,2)); |
---|
228 | lB = sqrt(sum(B.*B,2)); |
---|
229 | D = 1 - D./(lA*lB'); |
---|
230 | |
---|
231 | case {'tanh','t'} |
---|
232 | D = tanh(s(1)*(A*B')/k + s(1)); |
---|
233 | |
---|
234 | case {'fourier','f'} |
---|
235 | zz = sin(s(1) + 1/2)*2*ones(k,1); |
---|
236 | D = zeros(m,n); |
---|
237 | for oo=1:m |
---|
238 | for o=1:n |
---|
239 | z = zz; |
---|
240 | I = find(A(oo,:)-B(o,:)); |
---|
241 | z(I) = sin(s(1) + 1/2)*(A(oo,I)-B(o,I))./sin((A(oo,I)-B(o,I))/2); |
---|
242 | D(oo,o) = prod(z); |
---|
243 | end |
---|
244 | end |
---|
245 | |
---|
246 | case {'spline','anovaspline1','as1'} |
---|
247 | D = zeros(m,n); |
---|
248 | for oo=1:m |
---|
249 | for o=1:n |
---|
250 | mAB = min(A(oo,:),B(o,:)); |
---|
251 | z = 1 + A(oo,:).*B(o,:).*(1 + mAB) - ((A(oo,:)+B(o,:))/2).*mAB.^2 + mAB.^3/3; |
---|
252 | D(oo,o) = prod(z); |
---|
253 | end |
---|
254 | end |
---|
255 | |
---|
256 | case {'anova','a'} |
---|
257 | D = zeros(m,n); |
---|
258 | for oo=1:m |
---|
259 | for o=1:n |
---|
260 | mAB = min(A(oo,:),B(o,:)); |
---|
261 | z = 1 + A(oo,:).*B(o,:).*(1 + 0.5*mAB) - mAB.^3/6; |
---|
262 | D(oo,o) = prod(z); |
---|
263 | end |
---|
264 | end |
---|
265 | |
---|
266 | case {'bspline','b'} |
---|
267 | D = zeros(m,n); |
---|
268 | for oo=1:m |
---|
269 | for o=1:n |
---|
270 | z = 0; |
---|
271 | AmB = A(oo,:)- B(o,:); |
---|
272 | for r = 0:2*(s(1)+1) |
---|
273 | z = z + (-1)^r*binomial(2*(s(1)+1),r)*(max(0,AmB + s(1)+1 - r)).^(2*s(1) + 1); |
---|
274 | end |
---|
275 | D(oo,o) = prod(z); |
---|
276 | end |
---|
277 | end |
---|
278 | |
---|
279 | case {'anovaspline2','as2'} |
---|
280 | D = zeros(m,n); |
---|
281 | for oo=1:m |
---|
282 | for o=1:n |
---|
283 | mAB = min(A(oo,:),B(o,:)); |
---|
284 | AmB = A(oo,:) - B(o,:); |
---|
285 | AB = A(oo,:) .* B(o,:); |
---|
286 | ApB = A(oo,:) + B(o,:); |
---|
287 | z = 1 + AB + AB.^2.*(1 + mAB) - AB.*(ApB).*mAB.^2 + ... |
---|
288 | (ApB.^2+2*AB).*mAB.^3/3 - 0.5*ApB.*mAB.^4 + 0.2*mAB.^5; |
---|
289 | D(oo,o) = prod(z); |
---|
290 | end |
---|
291 | end |
---|
292 | |
---|
293 | case {'anovaspline3','as3'} |
---|
294 | D = zeros(m,n); |
---|
295 | for oo=1:m |
---|
296 | for o=1:n |
---|
297 | mAB = min(A(oo,:),B(o,:)); |
---|
298 | AmB = A(oo,:)- B(o,:); |
---|
299 | AB = A(oo,:).*B(o,:); |
---|
300 | ApB = A(oo,:)+B(o,:); |
---|
301 | z = 1 + AB + AB.^2 + AB.^3.*(1 + mAB) - 1.5 * AB.^2 .*(ApB).*mAB.^2 + ... |
---|
302 | AB.*(ApB.^2+AB).*mAB.^3 - 0.25 * (3*ApB.^3- 2*B(oo,:).^2 - ... |
---|
303 | 2*A(o,:).^2).*mAB.^4 + 0.6 * (ApB.^2+AB).*mAB.^5 - 0.5 * ApB .*mAB.^6+mAB.^7/7; |
---|
304 | D(oo,o) = prod(z); |
---|
305 | end |
---|
306 | end |
---|
307 | |
---|
308 | case {'anovabspline','ab'} |
---|
309 | D = zeros(m,n); |
---|
310 | for oo=1:m |
---|
311 | for o=1:n |
---|
312 | z = 0; |
---|
313 | AmB = A(oo,:)- B(o,:); |
---|
314 | for r = 0:2*(s(1)+1) |
---|
315 | z = z + (-1)^r*binomial(2*(s(1)+1),r)*(max(0,BmA + s(1)+1 - r)).^(2*s(1) + 1); |
---|
316 | end |
---|
317 | D(oo,o) = prod(1+z); |
---|
318 | end |
---|
319 | end |
---|
320 | |
---|
321 | otherwise |
---|
322 | error('Unknown proximity type') |
---|
323 | end |
---|
324 | W = setdat(AA,D,w); |
---|
325 | |
---|
326 | else |
---|
327 | error('Illegal TYPE argument.') |
---|
328 | end |
---|
329 | return; |
---|
330 | |
---|
331 | |
---|
332 | |
---|
333 | function D = dist2(B,A) |
---|
334 | % Computes square Euclidean distance, avoiding large matrices for a high |
---|
335 | % dimensionality data |
---|
336 | m = size(A,1); |
---|
337 | n = size(B,1); |
---|
338 | D = ones(m,1)*sum(B'.*B',1); |
---|
339 | D = D + sum(A'.*A',1)'*ones(1,n); |
---|
340 | D = D - 2 .* (+A)*(+B)'; |
---|
341 | % Clean result. |
---|
342 | J = find(D<0); |
---|
343 | D(J) = zeros(size(J)); |
---|
344 | return |
---|