[1] | 1 | %FASTMAPD Fast Linear Map of Euclidean distances |
---|
| 2 | % |
---|
| 3 | % W = FASTMAPD(D,K) |
---|
| 4 | % |
---|
| 5 | % INPUT |
---|
| 6 | % D NxN symmetric dissimilarity matrix (dataset) |
---|
| 7 | % K Chosen dimensionality (optional; K is automatically detected) |
---|
| 8 | % |
---|
| 9 | % OUTPUT |
---|
| 10 | % W Linear map of Euclidean distances D into a Euclidean space |
---|
| 11 | % |
---|
| 12 | % DEFAULT |
---|
| 13 | % K = [] |
---|
| 14 | % |
---|
| 15 | % DESCRIPTION |
---|
| 16 | % This is an implementation of the FastMap algorithm. D is assumed to be |
---|
| 17 | % a square Euclidean distance matrix and K is the desired dimensionality of |
---|
| 18 | % the projection. However, if the dimensionality M of the original data is |
---|
| 19 | % smaller than K, then only a mapping to a M-dimensional space is returned. |
---|
| 20 | % |
---|
| 21 | % If D does not have a Euclidean behavior, a projection is done to the fewest |
---|
| 22 | % dimensions that explain the Euclidean part the best, i.e. do not deal with |
---|
| 23 | % negative squared distances. |
---|
| 24 | % |
---|
| 25 | % LITERATURE |
---|
| 26 | % 1. C. Faloutsos and K.-I. Lin, FastMap: A Fast Algorithm for Indexing, |
---|
| 27 | % Data-Mining and Visualization of Traditional and Multimedia Datasets, |
---|
| 28 | % ACM SIGMOD, International Conference on Management of Data, 163-174, 1995. |
---|
| 29 | % |
---|
| 30 | |
---|
| 31 | % Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com |
---|
| 32 | % Faculty EWI, Delft University of Technology and |
---|
| 33 | % School of Computer Science, University of Manchester |
---|
| 34 | |
---|
| 35 | |
---|
| 36 | function [W,P] = fastmapd(D,K) |
---|
| 37 | |
---|
| 38 | if nargin < 2, |
---|
| 39 | K = []; |
---|
| 40 | end |
---|
| 41 | if nargin < 1 | isempty(D) |
---|
| 42 | W = mapping(mfilename,K); |
---|
| 43 | W = setname(W,'Fastmapd'); |
---|
| 44 | return |
---|
| 45 | end |
---|
| 46 | |
---|
| 47 | |
---|
| 48 | if (isdataset(D) | isa(D,'double')) |
---|
| 49 | if ismapping(K), |
---|
| 50 | % APPLY THE MAPPING |
---|
| 51 | pars = getdata(K); |
---|
| 52 | P = pars{1}; |
---|
| 53 | dd = pars{2}; |
---|
| 54 | X = pars{3}; |
---|
| 55 | K = pars{4}; |
---|
| 56 | |
---|
| 57 | nlab = getnlab(D); |
---|
| 58 | n = size(D,1); |
---|
| 59 | D = +D.^2; |
---|
| 60 | if all(diag(D) == 0) & issym(D) & length(X) == n, |
---|
| 61 | Y = X; |
---|
| 62 | else |
---|
| 63 | for k=1:K |
---|
| 64 | Y(:,k) = (D(:,P(k,1)) + dd(k)*ones(n,1) - D(:,P(k,2)))./(2*sqrt(dd(k))); |
---|
| 65 | D = (D - distm(Y(:,k),X(:,k))); |
---|
| 66 | end |
---|
| 67 | end |
---|
| 68 | W = dataset(Y,nlab); |
---|
| 69 | return |
---|
| 70 | end |
---|
| 71 | end |
---|
| 72 | |
---|
| 73 | |
---|
| 74 | |
---|
| 75 | |
---|
| 76 | |
---|
| 77 | % TRAIN THE MAPPING |
---|
| 78 | if isdataset(D) |
---|
| 79 | nlab = getnlab(D); |
---|
| 80 | end |
---|
| 81 | discheck(D); |
---|
| 82 | |
---|
| 83 | n = size(D,1); |
---|
| 84 | D = +D.^2; |
---|
| 85 | if isempty(K), |
---|
| 86 | K = n-1; |
---|
| 87 | end |
---|
| 88 | |
---|
| 89 | k = 0; % Current dimensionality |
---|
| 90 | Z = 1:n; % Index of points |
---|
| 91 | tol = 1e-12; % Tolerance when the distances become close to zero |
---|
| 92 | tolNE = 1e-10; % Tolerance for the non-Euclidean behavior |
---|
| 93 | NEucl = 0; % Non-Euclidean behavior |
---|
| 94 | finito = 0; |
---|
| 95 | |
---|
| 96 | while ~finito, |
---|
| 97 | k = k+1; |
---|
| 98 | [mm,O] = max((D(Z,Z)),[],2); |
---|
| 99 | [mm,i] = max(mm); |
---|
| 100 | P(k,1) = Z(O(i)); % P is the index of pivots |
---|
| 101 | P(k,2) = Z(i); |
---|
| 102 | Z([O(i) i]) = []; |
---|
| 103 | dd(k) = D(P(k,1),P(k,2)); |
---|
| 104 | |
---|
| 105 | if dd(k) >= tol, |
---|
| 106 | X(:,k) = (D(:,P(k,1)) + dd(k)*ones(n,1) - D(:,P(k,2)))./(2*sqrt(dd(k))); |
---|
| 107 | D = (D - distm(X(:,k))); |
---|
| 108 | else |
---|
| 109 | P(k,:) = []; |
---|
| 110 | end |
---|
| 111 | |
---|
| 112 | if ~NEucl, |
---|
| 113 | NEucl = NEucl | any(D(:) < -tolNE); |
---|
| 114 | if NEucl, |
---|
| 115 | KKe = k; |
---|
| 116 | KK = k; |
---|
| 117 | end |
---|
| 118 | end |
---|
| 119 | |
---|
| 120 | finito = (k >= K) | (dd(k) <= tol) | NEucl; |
---|
| 121 | end |
---|
| 122 | if ~NEucl, |
---|
| 123 | KK = k; |
---|
| 124 | end |
---|
| 125 | |
---|
| 126 | if NEucl, |
---|
| 127 | disp('Distances do not have a Euclidean behavior!'); |
---|
| 128 | disp(['The Euclidean part of the distances is explained in ' num2str(KKe) 'D.']); |
---|
| 129 | else |
---|
| 130 | if KK < K, |
---|
| 131 | disp(['The distances are perfectly explained in ' num2str(KK) 'D.']); |
---|
| 132 | end |
---|
| 133 | end |
---|
| 134 | |
---|
| 135 | W = mapping(mfilename,'trained',{P,dd,X,KK},[],n,KK); |
---|
| 136 | W = setname(W,'Fastmapd'); |
---|
| 137 | return |
---|
| 138 | |
---|
| 139 | |
---|
| 140 | |
---|