source: distools/qdistm.m @ 12

Last change on this file since 12 was 10, checked in by bduin, 14 years ago
File size: 4.6 KB
RevLine 
[10]1%QDISTM  Distance Matrix for Quantitative Variables
2%
3%     D = QDISTM (A,B,TYPE,P)
4%       OR
5%     D = QDISTM (A,B)
6%       OR
7%     D = QDISTM (A,TYPE,P)
8%       OR
9%     D = QDISTM (A,TYPE)
10%
11% INPUT
12%   A   NxK Matrix or dataset
13%   B   MxK Matrix or dataset
14%   TYPE  Type of the dissimilarity D (optional; default: 'E'):
15%           'E',  'Euclidean'
16%           'SQE','Square-Euclidean'
17%           'LP', 'LP-distance'
18%           'BC', 'Bray-Curtis'
19%           'CAN','Canberra'
20%           'COR','Correlation'
21%           'COS','Cosine'
22%           'DIV','Divergence'
23%           'EXP','Exponent'
24%           'S',  'Soergel'
25%           'SAM','Spectral-Angular-Mapper'
26%           'TAX','Taxonomic'
27%           'WS', 'Ware-Hedges'
28%   P   Parameter, P > 0 (optional, default: 1)
29%
30% OUTPUT
31%   D   NxM Dissimilarity matrix or dataset
32%
33% DESCRIPTION
34% Computation of the distance matrix D between two sets of vectors, A and B.
35% Distances between vectors X and Y are computed as:
36%   'E':   d(X,Y) = (sum_i (|X_i - Y_i|^2))^(1/2)
37%   'SQE': d(X,Y) =  sum_i (|X_i - Y_i|^2)
38%   'LP':  d(X,Y) = (sum_i (|X_i - Y_i|^P))^(1/P)
39%   'BC':  d(X,Y) =  sum_i (|X_i - Y_i|)/sum_i (X_i + Y_i)
40%   'CAN': d(X,Y) =  sum_i (|X_i - Y_i|)/sum_i (|X_i| + |Y_i|)
41%   'COR': d(X,Y) =  (1 - COV(X,Y) / sqrt(Var(X) * VAR(Y)))/2
42%   'COS': d(X,Y) =  (1 - X'*Y/(||X||*||Y||))
43%   'DIV': d(X,Y) =  sum_i {|X_i - Y_i|^2/(X_i + Y_i)^2}
44%   'EXP': d(X,Y) =  1 - exp (-(X-Y)'(X-Y)/P^2)%
45%   'S':   d(X,Y) =  sum_i (|X_i - Y_i|)/max_i {X_i,Y_i}
46%   'SAM': d(X,Y) =  P arcos (X'Y/P^2)
47%   'TAX': d(X,Y) = (sum_i |X_i - Y_i|^P/r_i^P)^(1/P)
48%   'WS':  d(X,Y) =  sum_i {1 - min_i{X_i,Y_i}/max_i{X_i,Y_i}}
49%
50% If A and B are datasets, then D is a dataset as well with the labels defined
51% by the labels of A and the feature labels defined by the labels of B. If A is
52% not a dataset, but a matrix of doubles, then D is also a matrix of doubles.
53%
54% DEFAULT
55%   B    = A
56%   TYPE = 'E'
57%   P    = 1
58%
59% SEE ALSO
60%   CORRDISTM, COSDISTM, DISTM, EXPDISTM, EUDISTM, LPDISTM, SAMDISTM,
61%
62
63% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
64% Faculty EWI, Delft University of Technology and
65% School of Computer Science, University of Manchester
66
67
68function D = qdistm (A,B,type,p)
69
70bisa = 0;
71if nargin < 2,
72  p    = 1;
73  B    = A;
74  type = 'E';
75  bisa = 1;
76elseif nargin < 3,
77  if isstr(B),
78    p    = 1;
79    type = B;
80    B    = A;
81    bisa = 1;
82  else
83    p    = 1;
84    type = 'E';
85  end
86elseif nargin < 4,
87  if ~isstr(type),
88    p    = type;
89    type = B;
90    B    = A;
91    bisa = 1;
92  else
93    p    = 1;
94  end
95else
96  ;
97end
98
99if ~isstr(type)
100  error ('TYPE is a string.');
101end
102
103if p <= 0,
104  error ('The parameter P must be positive.');
105end
106
107isda = isdataset(A);
108isdb = isdataset(B);
109a    = +A;
110b    = +B;
111[ra,ca] = size(a);
112[rb,cb] = size(b);
113
114if ca ~= cb,
115  error ('The matrices should have the same number of columns.');
116end
117
118
119D = zeros(ra,rb);
120switch lower(type)
121  case {'e','euclidean'}
122    D = sqrt(distm(a,b));
123  case {'sqe','square-euclidean'}
124    D = distm(a,b);
125  case {'lp','lp-distance'}
126    D = lpdistm(a,b,p);
127  case {'bc','bray-curtis'}
128    for i=1:rb
129      D(:,i) = sum(abs(repmat(b(i,:),ra,1) - a),2);
130      D(:,i) = D(:,i) ./ sum((repmat(b(i,:),ra,1) + a),2);
131    end
132  case {'can','canberra'}
133    for i=1:rb
134      D(:,i) = sum( abs(repmat(b(i,:),ra,1) - a) ./ (repmat(abs(b(i,:)),ra,1) + abs(a)), 2);
135    end
136  case {'cor','correlation'}
137    D = corrdistm(a,b);
138  case {'cos','cosine'}
139    D = cosdistm(a,b);
140  case {'div','divergence'}
141    for i=1:rb
142      Z = (abs(repmat(b(i,:),ra,1) - a)).^p;
143      D(:,i) = sum (Z ./(repmat(b(i,:),ra,1) + a).^p, 2);
144      D(:,i) = D(:,i).^(1/p);
145      clear Z;
146    end
147  case {'exp','exponent'}
148    D = expdistm(a,b,p);
149  case {'sam','spectral-angular-mapper'}
150    D = samdistm(a,b,p);
151  case {'s','soergel'}
152    for i=1:rb
153      D(:,i) = sum(abs(repmat(b(i,:),ra,1) - a),2) ./ sum(max(repmat(b(i,:),ra,1),a),2);
154    end
155  case {'tax','taxonomic'}
156    rr = max(b) - min(b);
157    for i=1:rb
158      D(:,i) = sum( (abs(repmat(b(i,:),ra,1) - a)./repmat(rr,ra,1)).^p,2);
159      D(:,i) = D(:,i).^(1/p);
160    end
161  case {'ws','ware-hedges'}
162    for i=1:rb
163      D(:,i) = sum(1 -  min(repmat(b(i,:),ra,1),a) ./ max(repmat(b(i,:),ra,1),a),2);
164    end
165  otherwise
166    error('Wrong dissimilarity type.');
167end
168
169if bisa,
170  D = 0.5*(D+D');         % Make sure that distances are symmetric for D(A,A)
171end
172
173% Set object labels and feature labels
174if xor(isda, isdb),
175  prwarning(1,'One matrix is a dataset and the other is not. ')
176end
177if isda,
178  if isdb,
179    D = setdata(A,D,getlab(B));
180  else
181    D = setdata(A,D);
182  end
183  D.name = 'Distance matrix';
184  if ~isempty(A.name)
185    D.name = [D.name ' for ' A.name];
186  end
187end
188return
Note: See TracBrowser for help on using the repository browser.