source: distools/flpdistm.m @ 128

Last change on this file since 128 was 10, checked in by bduin, 14 years ago
File size: 2.6 KB
RevLine 
[10]1%FLPDISTM  lp (p > 0) (Non)-Metric Distance Matrix
2%
3%     D = FLPDISTM (A,B,P)
4%       OR
5%     D = FLPDISTM (A,B)
6%       OR
7%     D = FLPDISTM (A,P)
8%       OR
9%     D = FLPDISTM (A)
10%
11% INPUT
12%   A   NxK Matrix or dataset
13%   B   MxK Matrix or dataset
14%   P   Parameter; P > 0
15%
16% OUTPUT
17%   D   NxM Dissimilarity matrix or dataset
18%
19% DEFAULT
20%   P = 1
21%   B = A
22%
23% DESCRIPTION
24% Fast computation of the distance matrix D between two sets of vectors, A and B.
25% This can ONLY be used for small sets A and B as the memory is significantly used
26% by computing 3D matrices of the size M x N x K.
27% Distances between vectors X and Y are computed using the lp distance:
28%     d(X,Y) = (sum (|X_i - Y_i|.^P))^(1/P)
29%                i
30% If P = Inf, then the max norm distance is computed:
31%     d(X,Y) = max (|X_i - Y_i|)
32%
33% If A and B are datasets, then D is a dataset as well with the labels defined
34% by the labels of A and the feature labels defined by the labels of B. If A is
35% not a dataset, but a matrix of doubles, then D is also a matrix of doubles.
36%
37% DEFAULT
38%   P = 1
39%   B = A
40%
41% REMARKS
42%   P >= 1      => D is metric
43%   P in (0,1)  => D is non-metric; D.^P is metric and l1-embeddable
44%   P = 1/2     => D is city block / Euclidean distance
45%
46% SEE ALSO
47%   LPDISTM, EUDISTM
48%
49
50% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
51% Faculty EWI, Delft University of Technology and
52% School of Computer Science, University of Manchester
53
54
55
56function D = flpdistm (A,B,p)
57bisa = 0;
58if nargin < 2,
59  p = 1;
60  B = A;
61  bisa = 1;
62else
63  if nargin < 3,
64    if max (size(B)) == 1,
65      p = B;
66      bisa = 1;
67      B = A;
68    else
69      p = 1;
70    end
71  end
72end
73
74if p <= 0,
75  error ('The parameter p must be positive.');
76end
77
78isda = isdataset(A);
79isdb = isdataset(B);
80a    = +A;
81b    = +B;
82[ra,ca] = size(a);
83[rb,cb] = size(b);
84
85if ca ~= cb,
86  error ('The matrices should have the same number of columns.');
87end
88
89D = zeros(ra,rb);
90if p < Inf,
91  D = sum ((abs (repmat(permute(a,[1 3 2]), [1 rb 1]) - ...
92                 repmat(permute(b,[3 1 2]), [ra 1 1]))).^p,3).^(1/p);
93else
94  D = max ((abs (repmat(permute(a,[1 3 2]), [1 rb 1]) - ...
95                 repmat(permute(b,[3 1 2]), [ra 1 1]))),[],3);
96end
97
98
99% Check numerical inaccuracy
100D (find (D < eps)) = 0;   % Make sure that distances are nonnegative
101if bisa,
102  D = 0.5*(D+D');         % Make sure that distances are symmetric for D(A,A)
103end
104
105
106if xor(isda, isdb),
107  prwarning(1,'One matrix is a dataset and the other not. The result is a matrix.')
108elseif isda & isdb,
109  D = setdata(A,D,getlab(B));
110  D.name = 'Distance matrix';
111  if ~isempty(A.name)
112    D.name = [D.name ' for ' A.name];
113  end
114else
115  ;
116end
117return
Note: See TracBrowser for help on using the repository browser.