1 | %FASTMAPD Fast Linear Map of Euclidean distances |
---|
2 | % |
---|
3 | % W = FASTMAPD(D,K) |
---|
4 | % |
---|
5 | % INPUT |
---|
6 | % D NxN symmetric dissimilarity matrix (dataset) |
---|
7 | % K Chosen dimensionality (optional; K is automatically detected) |
---|
8 | % |
---|
9 | % OUTPUT |
---|
10 | % W Linear map of Euclidean distances D into a Euclidean space |
---|
11 | % |
---|
12 | % DEFAULT |
---|
13 | % K = [] |
---|
14 | % |
---|
15 | % DESCRIPTION |
---|
16 | % This is an implementation of the FastMap algorithm. D is assumed to be |
---|
17 | % a square Euclidean distance matrix and K is the desired dimensionality of |
---|
18 | % the projection. However, if the dimensionality M of the original data is |
---|
19 | % smaller than K, then only a mapping to a M-dimensional space is returned. |
---|
20 | % |
---|
21 | % If D does not have a Euclidean behavior, a projection is done to the fewest |
---|
22 | % dimensions that explain the Euclidean part the best, i.e. do not deal with |
---|
23 | % negative squared distances. |
---|
24 | % |
---|
25 | % LITERATURE |
---|
26 | % 1. C. Faloutsos and K.-I. Lin, FastMap: A Fast Algorithm for Indexing, |
---|
27 | % Data-Mining and Visualization of Traditional and Multimedia Datasets, |
---|
28 | % ACM SIGMOD, International Conference on Management of Data, 163-174, 1995. |
---|
29 | % |
---|
30 | |
---|
31 | % Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com |
---|
32 | % Faculty EWI, Delft University of Technology and |
---|
33 | % School of Computer Science, University of Manchester |
---|
34 | |
---|
35 | |
---|
36 | function [W,P] = fastmapd(D,K) |
---|
37 | |
---|
38 | if nargin < 2, |
---|
39 | K = []; |
---|
40 | end |
---|
41 | if nargin < 1 | isempty(D) |
---|
42 | W = prmapping(mfilename,K); |
---|
43 | W = setname(W,'Fastmapd'); |
---|
44 | return |
---|
45 | end |
---|
46 | |
---|
47 | |
---|
48 | if (isdataset(D) | isa(D,'double')) |
---|
49 | if ismapping(K), |
---|
50 | % APPLY THE MAPPING |
---|
51 | pars = getdata(K); |
---|
52 | P = pars{1}; |
---|
53 | dd = pars{2}; |
---|
54 | X = pars{3}; |
---|
55 | K = pars{4}; |
---|
56 | |
---|
57 | nlab = getnlab(D); |
---|
58 | n = size(D,1); |
---|
59 | D = +D.^2; |
---|
60 | if all(diag(D) == 0) & issym(D) & length(X) == n, |
---|
61 | Y = X; |
---|
62 | else |
---|
63 | for k=1:K |
---|
64 | Y(:,k) = (D(:,P(k,1)) + dd(k)*ones(n,1) - D(:,P(k,2)))./(2*sqrt(dd(k))); |
---|
65 | D = (D - distm(Y(:,k),X(:,k))); |
---|
66 | end |
---|
67 | end |
---|
68 | W = prdataset(Y,nlab); |
---|
69 | return |
---|
70 | end |
---|
71 | end |
---|
72 | |
---|
73 | |
---|
74 | |
---|
75 | |
---|
76 | |
---|
77 | % TRAIN THE MAPPING |
---|
78 | if isdataset(D) |
---|
79 | nlab = getnlab(D); |
---|
80 | end |
---|
81 | discheck(D); |
---|
82 | |
---|
83 | n = size(D,1); |
---|
84 | D = +D.^2; |
---|
85 | if isempty(K), |
---|
86 | K = n-1; |
---|
87 | end |
---|
88 | |
---|
89 | k = 0; % Current dimensionality |
---|
90 | Z = 1:n; % Index of points |
---|
91 | tol = 1e-12; % Tolerance when the distances become close to zero |
---|
92 | tolNE = 1e-10; % Tolerance for the non-Euclidean behavior |
---|
93 | NEucl = 0; % Non-Euclidean behavior |
---|
94 | finito = 0; |
---|
95 | |
---|
96 | while ~finito, |
---|
97 | k = k+1; |
---|
98 | [mm,O] = max((D(Z,Z)),[],2); |
---|
99 | [mm,i] = max(mm); |
---|
100 | P(k,1) = Z(O(i)); % P is the index of pivots |
---|
101 | P(k,2) = Z(i); |
---|
102 | Z([O(i) i]) = []; |
---|
103 | dd(k) = D(P(k,1),P(k,2)); |
---|
104 | |
---|
105 | if dd(k) >= tol, |
---|
106 | X(:,k) = (D(:,P(k,1)) + dd(k)*ones(n,1) - D(:,P(k,2)))./(2*sqrt(dd(k))); |
---|
107 | D = (D - distm(X(:,k))); |
---|
108 | else |
---|
109 | P(k,:) = []; |
---|
110 | end |
---|
111 | |
---|
112 | if ~NEucl, |
---|
113 | NEucl = NEucl | any(D(:) < -tolNE); |
---|
114 | if NEucl, |
---|
115 | KKe = k; |
---|
116 | KK = k; |
---|
117 | end |
---|
118 | end |
---|
119 | |
---|
120 | finito = (k >= K) | (dd(k) <= tol) | NEucl; |
---|
121 | end |
---|
122 | if ~NEucl, |
---|
123 | KK = k; |
---|
124 | end |
---|
125 | |
---|
126 | if NEucl, |
---|
127 | disp('Distances do not have a Euclidean behavior!'); |
---|
128 | disp(['The Euclidean part of the distances is explained in ' num2str(KKe) 'D.']); |
---|
129 | else |
---|
130 | if KK < K, |
---|
131 | disp(['The distances are perfectly explained in ' num2str(KK) 'D.']); |
---|
132 | end |
---|
133 | end |
---|
134 | |
---|
135 | W = prmapping(mfilename,'trained',{P,dd,X,KK},[],n,KK); |
---|
136 | W = setname(W,'Fastmapd'); |
---|
137 | return |
---|
138 | |
---|
139 | |
---|
140 | |
---|