source: distools/checkeucl.m @ 52

Last change on this file since 52 was 10, checked in by bduin, 14 years ago
File size: 3.4 KB
Line 
1%CHECKEUCL Check whether a square dissimilarity matrix has a Euclidean behavior
2%
3%   [NEF,NER,W] = CHECKEUCL(D)
4%   [NEF,NER]   = CHECKEUCL(D,K)
5%   [NEF,NER,K] = CHECKEUCL(D,'all')
6%
7% INPUT
8%   D     NxN dissimilarity matrix or dataset
9%   K     Vector with desired subset sizes
10%
11% OUTPUT
12%   NEF   Index of non-Euclidean behavior; negative eigen-fraction NEF in [0,1)
13%   NER   Index of non-Euclidean behavior; negative eigen-ratio NER >= 0
14%   W     Pseudo-Euclidean embedding
15%
16% DESCRIPTION
17% Computes how well the square dissimilarity matrix D can be embedded
18% in a Euclidean space.
19% NER and NEF are computed by performing the Pseudo-Euclidean embedding.
20% NEF is a ratio expressing the sum of magnitudes of all negative
21% eigenvalues divided by the sum of magnitudes of all eigenvalues. NER is
22% a ratio of the magnitude of the lowest negative eigenvalue to the largest
23% positive eigenvalue.
24%
25% D is Euclidean if D.^2 is isometrically embeddable into a Euclidean space.
26% Ideally, both NEF and NER are zero. Note that due to numerical inaccuracies
27% of the emebdding procedure, both NEF and NER might be very small, e.g.
28% in the order of ~1e-10, for perfect Euclidean distance data.
29%
30% In case a set of subset sizes K is given as many random subsets of K(i)
31% objects are selected that are needed to estimate the expected NEF for
32% matrices of K(i)*K(i) dissimilarities with a standard deviation of 5%.
33%
34% SEE ALSO
35% PE_EM
36
37% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
38% Faculty EWI, Delft University of Technology and
39% School of Computer Science, University of Manchester
40%
41
42function [nef, ner, W] = checkeucl(D,N)
43
44        if nargin < 2, N = []; end
45        alf = 0.05;
46        [m,k] = size(D);
47        if m ~= k
48                error('Dissimilarity matrix D should be square.')
49        end
50
51        D = dataset(D,1);  % we are not interested in labels here.
52        D = setfeatlab(D,ones(m,1));
53       
54        if isempty(N)
55                W    = pe_em(D);
56    L    = getdata(W,'eval');
57                nef  = sum(abs(L(find(L < 0))))/sum(abs(L));
58                ner  = max(abs(L(find(L < 0)))) / max(L(find(L > 0)));
59    if isempty(ner), ner = 0; end
60  else
61    if ischar(N) & strcmp(N,'all')
62      N = [3,4,5,7,10,15,20,30,50,70,100,150,200,300,500,700,1000,1500,2000,3000];
63      N = [N(N<m) m];
64    end
65    npoints = length(N);
66                nef = zeros(1,npoints);
67                ner = zeros(1,npoints);
68    if npoints > 1
69      s = sprintf('checkeucl: NEF on %i points: ',npoints);
70      prwaitbar(npoints,s);
71    end
72                for j = 1:npoints
73      if npoints > 1, prwaitbar(npoints,j,[s int2str(j)]); end
74                        n = N(j);
75      if n==m
76        [nef(j) ner(j)] = feval(mfilename,D);
77      else
78        nfe = 0;
79        nre = 0;
80        nfv  = 0;
81        for i=1:5
82          [nf nr] = feval(mfilename,genddat(D,n));
83          nfe = nfe+nf;    nfee = nfe/i;
84          nre = nre+nr;   
85          nfv = nfv+nf*nf; nfvv = nfv/i;
86                                end
87                                if nfee == 0
88                                        acc = 0;
89                                else
90                                        acc = sqrt(nfvv-nfee^2)/(sqrt(i)*nfee);
91                                end
92        while acc > alf & i < 1000
93          i = i+1;
94          [nf nr] = feval(mfilename,genddat(D,n));
95          nfe = nfe+nf;    nfee = nfe/i;
96          nre = nre+nr;   
97          nfv = nfv+nf*nf; nfvv = nfv/i;
98          acc = sqrt(nfvv-nfee^2)/(sqrt(i)*nfee);
99          if nfee < 0.001, nfee = 0; acc = 0; end
100          %disp([i,round(10000*acc)])
101        end
102        nef(j) = nfee;
103        ner(j) = nre/i;
104      end
105                end
106    if npoints > 1, prwaitbar(0); end
107    W = N;
108        end
109
110return;
Note: See TracBrowser for help on using the repository browser.