source: distools/jacsimdistm.m @ 100

Last change on this file since 100 was 79, checked in by bduin, 11 years ago
File size: 2.0 KB
Line 
1%JACSIMDISTM  Jaccard-like Distance Matrix based on Similarities;
2%
3%   D = JACSIMDISTM (A,B)
4%       OR
5%   D = JACSIMDISTM (A)
6%
7% INPUT
8%   A   NxK Matrix or dataset
9%   B   MxK Matrix or dataset (optional; default: B=A)
10%
11% OUTPUT
12%   D   NxM Dissimilarity matrix or prdataset; D in [0,1]
13%
14% DESCRIPTION
15% Computes the distance matrix D between two sets of vectors, A and B.
16% Distances between vectors X and Y are computed based on the similarity
17% formula:
18%     SIM(X,Y) = (X'Y) / (||X||^2 + ||Y||^2 - ||x||*||y||)
19%     D(X,Y)   = SQRT(1 - SIM(X,Y))
20% This is an extension of the binary Jaccard distance.
21%
22% If A and B are datasets, then D is a dataset as well with the labels defined
23% by the labels of A and the feature labels defined by the labels of B. If A is
24% not a dataset, but a matrix of doubles, then D is also a matrix of doubles.
25%
26% DEFAULT
27%   B = A
28%
29% SEE ALSO
30% SIMDISTM, CORRDISTM, COSDISTM, LPDISTM, EUDISTM
31
32% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
33% Faculty EWI, Delft University of Technology and
34% School of Computer Science, University of Manchester
35
36
37
38function D = jacsimdistm(A,B)
39bisa = nargin < 2;
40if bisa,
41  B = A;
42end
43
44isda = isdataset(A);
45isdb = isdataset(B);
46a = +A;
47b = +B;
48
49[ra,ca] = size(a);
50[rb,cb] = size(b);
51
52if ca ~= cb,
53  error ('Matrices should have equal numbers of columns');
54end
55
56aa = sum(a.*a,2);
57bb = sum(b.*b,2)';
58D  = (a*b') ./ (aa(:,ones(rb,1)) + bb(ones(ra,1),:) - sqrt(aa(:,ones(rb,1)) .* bb(ones(ra,1),:)));
59D = sqrt(1 - D);
60
61% Check numerical inaccuracy
62D (find (D < eps)) = 0;   % Make sure that distances are nonnegative
63if bisa,
64  D = 0.5*(D+D');         % Make sure that distances are symmetric for D(A,A)
65end
66
67% Set object labels and feature labels
68if xor(isda, isdb),
69  prwarning(1,'One matrix is a dataset and the other not. ')
70end
71if isda,
72  if isdb,
73    D = setdata(A,D,getlab(B));
74  else
75    D = setdata(A,D);
76  end
77  D.name = 'Distance matrix';
78  if ~isempty(A.name)
79    D.name = [D.name ' for ' A.name];
80  end
81end
82return
Note: See TracBrowser for help on using the repository browser.