[10] | 1 | %CHECKEUCL Check whether a square dissimilarity matrix has a Euclidean behavior
|
---|
| 2 | %
|
---|
| 3 | % [NEF,NER,W] = CHECKEUCL(D)
|
---|
| 4 | % [NEF,NER] = CHECKEUCL(D,K)
|
---|
| 5 | % [NEF,NER,K] = CHECKEUCL(D,'all')
|
---|
| 6 | %
|
---|
| 7 | % INPUT
|
---|
| 8 | % D NxN dissimilarity matrix or dataset
|
---|
| 9 | % K Vector with desired subset sizes
|
---|
| 10 | %
|
---|
| 11 | % OUTPUT
|
---|
| 12 | % NEF Index of non-Euclidean behavior; negative eigen-fraction NEF in [0,1)
|
---|
| 13 | % NER Index of non-Euclidean behavior; negative eigen-ratio NER >= 0
|
---|
| 14 | % W Pseudo-Euclidean embedding
|
---|
| 15 | %
|
---|
| 16 | % DESCRIPTION
|
---|
| 17 | % Computes how well the square dissimilarity matrix D can be embedded
|
---|
| 18 | % in a Euclidean space.
|
---|
| 19 | % NER and NEF are computed by performing the Pseudo-Euclidean embedding.
|
---|
| 20 | % NEF is a ratio expressing the sum of magnitudes of all negative
|
---|
| 21 | % eigenvalues divided by the sum of magnitudes of all eigenvalues. NER is
|
---|
| 22 | % a ratio of the magnitude of the lowest negative eigenvalue to the largest
|
---|
| 23 | % positive eigenvalue.
|
---|
| 24 | %
|
---|
| 25 | % D is Euclidean if D.^2 is isometrically embeddable into a Euclidean space.
|
---|
| 26 | % Ideally, both NEF and NER are zero. Note that due to numerical inaccuracies
|
---|
| 27 | % of the emebdding procedure, both NEF and NER might be very small, e.g.
|
---|
| 28 | % in the order of ~1e-10, for perfect Euclidean distance data.
|
---|
| 29 | %
|
---|
| 30 | % In case a set of subset sizes K is given as many random subsets of K(i)
|
---|
| 31 | % objects are selected that are needed to estimate the expected NEF for
|
---|
| 32 | % matrices of K(i)*K(i) dissimilarities with a standard deviation of 5%.
|
---|
| 33 | %
|
---|
| 34 | % SEE ALSO
|
---|
| 35 | % PE_EM
|
---|
| 36 |
|
---|
| 37 | % Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
|
---|
| 38 | % Faculty EWI, Delft University of Technology and
|
---|
| 39 | % School of Computer Science, University of Manchester
|
---|
| 40 | %
|
---|
| 41 |
|
---|
| 42 | function [nef, ner, W] = checkeucl(D,N)
|
---|
| 43 |
|
---|
| 44 | if nargin < 2, N = []; end
|
---|
| 45 | alf = 0.05;
|
---|
| 46 | [m,k] = size(D);
|
---|
| 47 | if m ~= k
|
---|
| 48 | error('Dissimilarity matrix D should be square.')
|
---|
| 49 | end
|
---|
| 50 |
|
---|
| 51 | D = dataset(D,1); % we are not interested in labels here.
|
---|
| 52 | D = setfeatlab(D,ones(m,1));
|
---|
| 53 |
|
---|
| 54 | if isempty(N)
|
---|
| 55 | W = pe_em(D);
|
---|
| 56 | L = getdata(W,'eval');
|
---|
| 57 | nef = sum(abs(L(find(L < 0))))/sum(abs(L));
|
---|
| 58 | ner = max(abs(L(find(L < 0)))) / max(L(find(L > 0)));
|
---|
| 59 | if isempty(ner), ner = 0; end
|
---|
| 60 | else
|
---|
| 61 | if ischar(N) & strcmp(N,'all')
|
---|
| 62 | N = [3,4,5,7,10,15,20,30,50,70,100,150,200,300,500,700,1000,1500,2000,3000];
|
---|
| 63 | N = [N(N<m) m];
|
---|
| 64 | end
|
---|
| 65 | npoints = length(N);
|
---|
| 66 | nef = zeros(1,npoints);
|
---|
| 67 | ner = zeros(1,npoints);
|
---|
| 68 | if npoints > 1
|
---|
| 69 | s = sprintf('checkeucl: NEF on %i points: ',npoints);
|
---|
| 70 | prwaitbar(npoints,s);
|
---|
| 71 | end
|
---|
| 72 | for j = 1:npoints
|
---|
| 73 | if npoints > 1, prwaitbar(npoints,j,[s int2str(j)]); end
|
---|
| 74 | n = N(j);
|
---|
| 75 | if n==m
|
---|
| 76 | [nef(j) ner(j)] = feval(mfilename,D);
|
---|
| 77 | else
|
---|
| 78 | nfe = 0;
|
---|
| 79 | nre = 0;
|
---|
| 80 | nfv = 0;
|
---|
| 81 | for i=1:5
|
---|
| 82 | [nf nr] = feval(mfilename,genddat(D,n));
|
---|
| 83 | nfe = nfe+nf; nfee = nfe/i;
|
---|
| 84 | nre = nre+nr;
|
---|
| 85 | nfv = nfv+nf*nf; nfvv = nfv/i;
|
---|
| 86 | end
|
---|
| 87 | if nfee == 0
|
---|
| 88 | acc = 0;
|
---|
| 89 | else
|
---|
| 90 | acc = sqrt(nfvv-nfee^2)/(sqrt(i)*nfee);
|
---|
| 91 | end
|
---|
| 92 | while acc > alf & i < 1000
|
---|
| 93 | i = i+1;
|
---|
| 94 | [nf nr] = feval(mfilename,genddat(D,n));
|
---|
| 95 | nfe = nfe+nf; nfee = nfe/i;
|
---|
| 96 | nre = nre+nr;
|
---|
| 97 | nfv = nfv+nf*nf; nfvv = nfv/i;
|
---|
| 98 | acc = sqrt(nfvv-nfee^2)/(sqrt(i)*nfee);
|
---|
| 99 | if nfee < 0.001, nfee = 0; acc = 0; end
|
---|
| 100 | %disp([i,round(10000*acc)])
|
---|
| 101 | end
|
---|
| 102 | nef(j) = nfee;
|
---|
| 103 | ner(j) = nre/i;
|
---|
| 104 | end
|
---|
| 105 | end
|
---|
| 106 | if npoints > 1, prwaitbar(0); end
|
---|
| 107 | W = N;
|
---|
| 108 | end
|
---|
| 109 |
|
---|
| 110 | return;
|
---|