[10] | 1 | %PROXXM Proximity Mapping |
---|
| 2 | % |
---|
| 3 | % W = PROXXM(A,TYPE,P,WEIGHTS) |
---|
| 4 | % W = A*PROXXM([],TYPE,P,WEIGHTS) |
---|
| 5 | % |
---|
| 6 | % INPUT |
---|
| 7 | % A MxK Dataset |
---|
| 8 | % TYPE Type of the proximity (optional; default: 'distance') |
---|
| 9 | % P Parameter of the proximity (optional; default: 1) |
---|
| 10 | % WEIGHTS Weights (optional; default: all 1) |
---|
| 11 | % |
---|
| 12 | % OUTPUT |
---|
| 13 | % W Proximity mapping |
---|
| 14 | % |
---|
| 15 | % DESCRIPTION |
---|
| 16 | % Computation of the KxM proximity mapping (or kernel) defined by the |
---|
| 17 | % MxK dataset A. Unlabeled objects in A are neglected. If B is an NxK |
---|
| 18 | % dataset, then D=B*W is the NxM proximity matrix between B and A. The |
---|
| 19 | % proximities can be defined by the following types: |
---|
| 20 | % |
---|
| 21 | % 'POLYNOMIAL' | 'P': SIGN(A*B').*(A*B').^P |
---|
| 22 | % 'HOMOGENEOUS' | 'H': SIGN(A*B').*(A*B').^P |
---|
| 23 | % 'EXP' | 'E': EXP(-(||A-B||)/P) |
---|
| 24 | % 'EXP-SUM' | 'ES': SUM_Z SIGN(P(Z)) * EXP(-(||A-B||)/P(Z)) |
---|
| 25 | % 'RBF' | 'R': EXP(-(||A-B||.^2)/(P*P)) |
---|
| 26 | % 'RBF-SUM' | 'RS': SUM_Z SIGN(P(Z)) * EXP(-(||A-B||.^2)/(P(Z)^2)) |
---|
| 27 | % 'SIGMOID' | 'S': SIGM(A*B'/P) |
---|
| 28 | % 'DSIGMOID' | 'DS': SIGM(||A-B||^(2P(1))/P(2)) |
---|
| 29 | % 'DISTANCE' | 'D': ||A-B||.^P |
---|
| 30 | % 'MULTIQUADRIC' | 'MQ': SQRT(||A-B||.^2/P(1) + P(2)) |
---|
| 31 | % 'THIN-PLATE' | 'TP': ||A-B||.^(2*P(1))/P(2) * LOG(1+||A-B||.^(2*P(1))/P(2)) |
---|
| 32 | % 'N-THIN-PLATE' | 'NTP': -||A-B||.^(2*P(1))/P(2) * LOG(1+||A-B||.^(2*P(1))/P(2)) |
---|
| 33 | % 'MINKOWSKI' | 'M': SUM(|A-B|^P).^(1/P) |
---|
| 34 | % 'CITY-BLOCK' | 'C': SUM(|A-B|) |
---|
| 35 | % 'COSINE' | 'O': 1 - (A*B')/||A||*||B|| |
---|
| 36 | % 'FOURIER' | 'F' |
---|
| 37 | % 'TANH' | 'T': TANH(P*(A*B')/LENGTH(A) + P); |
---|
| 38 | % 'ANOVA' | 'A': ANOVA MODEL |
---|
| 39 | % 'BSPLINE' | 'B': BSPLINE MODEL, ORDER P |
---|
| 40 | % 'ANOVABSPLINE' | 'AB': ANOVA-BSPLINE MODEL, ORDER P |
---|
| 41 | % 'ANOVASPLINE1' | 'AS1':ANOVA-SPLINE MODEL, ORDER 1 |
---|
| 42 | % 'ANOVASPLINE2' | 'AS2':ANOVA-SPLINE MODEL, ORDER 2 |
---|
| 43 | % 'ANOVASPLINE3' | 'AS3':ANOVA-SPLINE MODEL, ORDER 3 |
---|
| 44 | % |
---|
| 45 | % In the polynomial case for a non-integer P, the proximity is computed |
---|
| 46 | % as D = SIGN(S+1).*ABS(S+1).^P, in order to avoid problems with negative |
---|
| 47 | % inner products S = A*B'. The features of the objects in A and B may be |
---|
| 48 | % weighted by the weights in the vector WEIGHTS. |
---|
| 49 | % |
---|
| 50 | % SEE ALSO |
---|
| 51 | % PROXM, MAPPINGS, DATASETS |
---|
| 52 | |
---|
| 53 | % Copyright: Robert P.W. Duin, r.p.w.duin@prtools.org, Elzbieta Pekalska, ela.pekalska@googlemail.com |
---|
| 54 | % Faculty EWI, Delft University of Technology and |
---|
| 55 | % School of Computer Science, University of Manchester |
---|
| 56 | |
---|
| 57 | |
---|
| 58 | |
---|
| 59 | function W = proxxm(A,type,s,weights) |
---|
| 60 | |
---|
| 61 | prtrace(mfilename); |
---|
| 62 | |
---|
| 63 | if (nargin < 4) |
---|
| 64 | weights = []; |
---|
| 65 | prwarning(4,'Weights are not supplied, assuming all 1.'); |
---|
| 66 | end |
---|
| 67 | |
---|
| 68 | if (nargin < 3) | (isempty(s)) |
---|
| 69 | s = [1 1]; |
---|
| 70 | prwarning(4,'The proximity parameter is not supplied, assuming 1.'); |
---|
| 71 | end |
---|
| 72 | if length(s) < 2, s(2) = 1; end |
---|
| 73 | |
---|
| 74 | if (nargin < 2) | (isempty(type)) |
---|
| 75 | type = 'd'; |
---|
| 76 | prwarning(4,'Type of the proximity is not supplied, assuming ''DISTANCE''.'); |
---|
| 77 | end |
---|
| 78 | |
---|
| 79 | |
---|
| 80 | % No data, return an UNTRAINED mapping |
---|
| 81 | if (nargin < 1) | (isempty(A)) |
---|
| 82 | W = mapping(mfilename,{type,s,weights}); |
---|
| 83 | W = setname(W,'Proximity mapping'); |
---|
| 84 | return; |
---|
| 85 | end |
---|
| 86 | |
---|
| 87 | |
---|
| 88 | [m,k] = size(A); |
---|
| 89 | |
---|
| 90 | if (isstr(type)) |
---|
| 91 | % Definition of the mapping, just store parameters. |
---|
| 92 | all = char('polynomial','p','homogeneous','h','exp','e','exp-sum','es','rbf','r',... |
---|
| 93 | 'rbf-sum','rs','sigmoid','s','distance','d','thin-plate','tp',... |
---|
| 94 | 'multiquadric','mq','dsigmoid','ds','minkowski','m','city-block','c',... |
---|
| 95 | 'cosine','o','uniform','u','anova','a','anovaspline1','as1',... |
---|
| 96 | 'anovaspline2','as2','anovaspline3','as3','anovabspline',... |
---|
| 97 | 'ab','bspline','b','spline','fourier','f','tanh','t',... |
---|
| 98 | 'n-thin-plate','ntp','shiftdist'); |
---|
| 99 | if (~any(strcmp(cellstr(all),type))) |
---|
| 100 | error(['Unknown proximity type: ' type]) |
---|
| 101 | end |
---|
| 102 | |
---|
| 103 | A = cdats(A,1); |
---|
| 104 | [m,k] = size(A); |
---|
| 105 | if isdataset(A) |
---|
| 106 | W = mapping(mfilename,'trained',{A,type,s,weights},getlab(A),getfeatsize(A),getobjsize(A)); |
---|
| 107 | else |
---|
| 108 | W = mapping(mfilename,'trained',{A,type,s,weights},[],k,m); |
---|
| 109 | end |
---|
| 110 | W = setname(W,'Proximity mapping'); |
---|
| 111 | |
---|
| 112 | |
---|
| 113 | elseif ismapping(type) |
---|
| 114 | |
---|
| 115 | % Execution of the mapping: compute a proximity matrix |
---|
| 116 | % between the input data A and B; output stored in W. |
---|
| 117 | |
---|
| 118 | w = type; |
---|
| 119 | [B,type,s,weights] = deal(w.data{1},w.data{2},w.data{3},w.data{4}); |
---|
| 120 | [kk,n] = size(w); |
---|
| 121 | if (k ~= kk) |
---|
| 122 | error('Mismatch in sizes between the mapping and its data stored internally.'); |
---|
| 123 | end |
---|
| 124 | if (~isempty(weights)) |
---|
| 125 | if (length(weights) ~= k), |
---|
| 126 | error('Weight vector has a wrong length.'); |
---|
| 127 | end |
---|
| 128 | A = A.*(ones(m,1)*weights(:)'); |
---|
| 129 | B = B.*(ones(n,1)*weights(:)'); |
---|
| 130 | end |
---|
| 131 | AA = A; |
---|
| 132 | A = +A; |
---|
| 133 | B = +B; |
---|
| 134 | |
---|
| 135 | switch type |
---|
| 136 | case {'polynomial','p'} |
---|
| 137 | D = A*B' + ones(m,n); |
---|
| 138 | if (s ~= round(s(1))) |
---|
| 139 | D = sign(D).*abs(D).^s(1); |
---|
| 140 | elseif (s(1) ~= 1) |
---|
| 141 | D = D.^s(1); |
---|
| 142 | end |
---|
| 143 | |
---|
| 144 | case {'homogeneous','h'} |
---|
| 145 | D = (A*B'); |
---|
| 146 | if (s ~= round(s(1))) |
---|
| 147 | D = sign(D).*abs(D).^s(1); |
---|
| 148 | elseif (s(1) ~= 1) |
---|
| 149 | D = D.^s(1); |
---|
| 150 | end |
---|
| 151 | |
---|
| 152 | case {'sigmoid','s'} |
---|
| 153 | D = (A*B'); |
---|
| 154 | D = sigm(D/s(1)); |
---|
| 155 | |
---|
| 156 | case {'dsigmoid','ds'} |
---|
| 157 | D = dist2(B,A).^s(1); |
---|
| 158 | D = sigm(D/s(2)); |
---|
| 159 | |
---|
| 160 | case {'city-block','c'} |
---|
| 161 | D = zeros(m,n); |
---|
| 162 | for j=1:n |
---|
| 163 | D(:,j) = sum(abs(A - repmat(+B(j,:),m,1)),2); |
---|
| 164 | end |
---|
| 165 | |
---|
| 166 | case {'minkowski','m'} |
---|
| 167 | D = zeros(m,n); |
---|
| 168 | for j=1:n |
---|
| 169 | D(:,j) = sum(abs(A - repmat(+B(j,:),m,1)).^s(1),2).^(1/s(1)); |
---|
| 170 | end |
---|
| 171 | |
---|
| 172 | case {'exp','e'} |
---|
| 173 | D = dist2(B,A); |
---|
| 174 | D = exp(-sqrt(D)/s(1)); |
---|
| 175 | |
---|
| 176 | case {'exp-sum','es'} |
---|
| 177 | d = sqrt(dist2(B,A)); |
---|
| 178 | D = sign(s(1))*exp(-d/s(1)); |
---|
| 179 | for z=2:length(s) |
---|
| 180 | D = D + sign(s(z))*exp(-d/s(z)); |
---|
| 181 | end |
---|
| 182 | |
---|
| 183 | case {'rbf','r'} |
---|
| 184 | D = dist2(B,A); |
---|
| 185 | D = exp(-D/(s(1)*s(1))); |
---|
| 186 | |
---|
| 187 | case {'rbf-sum','rs'} |
---|
| 188 | d = dist2(B,A); |
---|
| 189 | D = sign(s(1))*exp(-d/(s(1)^2));; |
---|
| 190 | for z=2:length(s) |
---|
| 191 | D = D + sign(s(z))*exp(-d/(s(z)^2)); |
---|
| 192 | end |
---|
| 193 | |
---|
| 194 | case {'distance','d'} |
---|
| 195 | D = dist2(B,A); |
---|
| 196 | if s(1) ~= 2 |
---|
| 197 | D = sqrt(D).^s(1); |
---|
| 198 | end |
---|
| 199 | |
---|
| 200 | case {'shiftdist'} % not properly implemented for test objects |
---|
| 201 | D = dist2(B,A); |
---|
| 202 | if s(1) ~= 2 |
---|
| 203 | D = sqrt(D).^s(1); |
---|
| 204 | end |
---|
| 205 | D = -D; |
---|
| 206 | n = size(D,1); |
---|
| 207 | % shift data such that mean lies in the origin |
---|
| 208 | H = -repmat(1/n,n,n); |
---|
| 209 | H(1:n+1:end) = H(1:n+1:end) + 1; % H = eye(n) - ones(n,n)/n |
---|
| 210 | size(D) |
---|
| 211 | size(H) |
---|
| 212 | D = -0.5 * H * D * H; % D is now the matrix of inner products |
---|
| 213 | |
---|
| 214 | case {'multiquadric','mq'} |
---|
| 215 | D = sqrt(dist2(B,A)/s(1) +s(2).^2); |
---|
| 216 | |
---|
| 217 | case {'thin-plate','tp'} |
---|
| 218 | d = dist2(B,A).^s(1)/s(2); |
---|
| 219 | D = d.*log(1+d); |
---|
| 220 | |
---|
| 221 | case {'n-thin-plate','ntp'} |
---|
| 222 | d = dist2(B,A).^s(1)/s(2); |
---|
| 223 | D = -d.*log(1+d); |
---|
| 224 | |
---|
| 225 | case {'cosine','o'} |
---|
| 226 | D = (A*B'); |
---|
| 227 | lA = sqrt(sum(A.*A,2)); |
---|
| 228 | lB = sqrt(sum(B.*B,2)); |
---|
| 229 | D = 1 - D./(lA*lB'); |
---|
| 230 | |
---|
| 231 | case {'tanh','t'} |
---|
| 232 | D = tanh(s(1)*(A*B')/k + s(1)); |
---|
| 233 | |
---|
| 234 | case {'fourier','f'} |
---|
| 235 | zz = sin(s(1) + 1/2)*2*ones(k,1); |
---|
| 236 | D = zeros(m,n); |
---|
| 237 | for oo=1:m |
---|
| 238 | for o=1:n |
---|
| 239 | z = zz; |
---|
| 240 | I = find(A(oo,:)-B(o,:)); |
---|
| 241 | z(I) = sin(s(1) + 1/2)*(A(oo,I)-B(o,I))./sin((A(oo,I)-B(o,I))/2); |
---|
| 242 | D(oo,o) = prod(z); |
---|
| 243 | end |
---|
| 244 | end |
---|
| 245 | |
---|
| 246 | case {'spline','anovaspline1','as1'} |
---|
| 247 | D = zeros(m,n); |
---|
| 248 | for oo=1:m |
---|
| 249 | for o=1:n |
---|
| 250 | mAB = min(A(oo,:),B(o,:)); |
---|
| 251 | z = 1 + A(oo,:).*B(o,:).*(1 + mAB) - ((A(oo,:)+B(o,:))/2).*mAB.^2 + mAB.^3/3; |
---|
| 252 | D(oo,o) = prod(z); |
---|
| 253 | end |
---|
| 254 | end |
---|
| 255 | |
---|
| 256 | case {'anova','a'} |
---|
| 257 | D = zeros(m,n); |
---|
| 258 | for oo=1:m |
---|
| 259 | for o=1:n |
---|
| 260 | mAB = min(A(oo,:),B(o,:)); |
---|
| 261 | z = 1 + A(oo,:).*B(o,:).*(1 + 0.5*mAB) - mAB.^3/6; |
---|
| 262 | D(oo,o) = prod(z); |
---|
| 263 | end |
---|
| 264 | end |
---|
| 265 | |
---|
| 266 | case {'bspline','b'} |
---|
| 267 | D = zeros(m,n); |
---|
| 268 | for oo=1:m |
---|
| 269 | for o=1:n |
---|
| 270 | z = 0; |
---|
| 271 | AmB = A(oo,:)- B(o,:); |
---|
| 272 | for r = 0:2*(s(1)+1) |
---|
| 273 | z = z + (-1)^r*binomial(2*(s(1)+1),r)*(max(0,AmB + s(1)+1 - r)).^(2*s(1) + 1); |
---|
| 274 | end |
---|
| 275 | D(oo,o) = prod(z); |
---|
| 276 | end |
---|
| 277 | end |
---|
| 278 | |
---|
| 279 | case {'anovaspline2','as2'} |
---|
| 280 | D = zeros(m,n); |
---|
| 281 | for oo=1:m |
---|
| 282 | for o=1:n |
---|
| 283 | mAB = min(A(oo,:),B(o,:)); |
---|
| 284 | AmB = A(oo,:) - B(o,:); |
---|
| 285 | AB = A(oo,:) .* B(o,:); |
---|
| 286 | ApB = A(oo,:) + B(o,:); |
---|
| 287 | z = 1 + AB + AB.^2.*(1 + mAB) - AB.*(ApB).*mAB.^2 + ... |
---|
| 288 | (ApB.^2+2*AB).*mAB.^3/3 - 0.5*ApB.*mAB.^4 + 0.2*mAB.^5; |
---|
| 289 | D(oo,o) = prod(z); |
---|
| 290 | end |
---|
| 291 | end |
---|
| 292 | |
---|
| 293 | case {'anovaspline3','as3'} |
---|
| 294 | D = zeros(m,n); |
---|
| 295 | for oo=1:m |
---|
| 296 | for o=1:n |
---|
| 297 | mAB = min(A(oo,:),B(o,:)); |
---|
| 298 | AmB = A(oo,:)- B(o,:); |
---|
| 299 | AB = A(oo,:).*B(o,:); |
---|
| 300 | ApB = A(oo,:)+B(o,:); |
---|
| 301 | z = 1 + AB + AB.^2 + AB.^3.*(1 + mAB) - 1.5 * AB.^2 .*(ApB).*mAB.^2 + ... |
---|
| 302 | AB.*(ApB.^2+AB).*mAB.^3 - 0.25 * (3*ApB.^3- 2*B(oo,:).^2 - ... |
---|
| 303 | 2*A(o,:).^2).*mAB.^4 + 0.6 * (ApB.^2+AB).*mAB.^5 - 0.5 * ApB .*mAB.^6+mAB.^7/7; |
---|
| 304 | D(oo,o) = prod(z); |
---|
| 305 | end |
---|
| 306 | end |
---|
| 307 | |
---|
| 308 | case {'anovabspline','ab'} |
---|
| 309 | D = zeros(m,n); |
---|
| 310 | for oo=1:m |
---|
| 311 | for o=1:n |
---|
| 312 | z = 0; |
---|
| 313 | AmB = A(oo,:)- B(o,:); |
---|
| 314 | for r = 0:2*(s(1)+1) |
---|
| 315 | z = z + (-1)^r*binomial(2*(s(1)+1),r)*(max(0,BmA + s(1)+1 - r)).^(2*s(1) + 1); |
---|
| 316 | end |
---|
| 317 | D(oo,o) = prod(1+z); |
---|
| 318 | end |
---|
| 319 | end |
---|
| 320 | |
---|
| 321 | otherwise |
---|
| 322 | error('Unknown proximity type') |
---|
| 323 | end |
---|
| 324 | W = setdat(AA,D,w); |
---|
| 325 | |
---|
| 326 | else |
---|
| 327 | error('Illegal TYPE argument.') |
---|
| 328 | end |
---|
| 329 | return; |
---|
| 330 | |
---|
| 331 | |
---|
| 332 | |
---|
| 333 | function D = dist2(B,A) |
---|
| 334 | % Computes square Euclidean distance, avoiding large matrices for a high |
---|
| 335 | % dimensionality data |
---|
| 336 | m = size(A,1); |
---|
| 337 | n = size(B,1); |
---|
| 338 | D = ones(m,1)*sum(B'.*B',1); |
---|
| 339 | D = D + sum(A'.*A',1)'*ones(1,n); |
---|
| 340 | D = D - 2 .* (+A)*(+B)'; |
---|
| 341 | % Clean result. |
---|
| 342 | J = find(D<0); |
---|
| 343 | D(J) = zeros(size(J)); |
---|
| 344 | return |
---|