1 | %CHARDMAT Characterization of a square, labeled dissimilarity matrix
|
---|
2 | %
|
---|
3 | % [C,D_OUT] = CHARDMAT(D,Ntr,Nsubeucl)
|
---|
4 | %
|
---|
5 | % Characterizes a square (dis)similarity dataset D.
|
---|
6 | % D_OUT is the symmetric, normalized dissimilarity dataset D. If D is
|
---|
7 | % a similarity dataset it is converted to dissimilarities first.
|
---|
8 | % The following fields are returned in the structure C.
|
---|
9 | % name - dataset name as stored by PRTools or read command
|
---|
10 | % desc - dataset description as stored by read command
|
---|
11 | % link - web links as stored by read command
|
---|
12 | % ref - references as stored by read command
|
---|
13 | % asym - asymmetry, 2*|D-D'|./(|D|+|D'|)
|
---|
14 | % size - number of objects
|
---|
15 | % classes - number of classes
|
---|
16 | % clsizes - vector with class sizes
|
---|
17 | % type - 'dis' for dissimilarities, 'sim' for similarities
|
---|
18 | %
|
---|
19 | % all following items are computed for a transformation
|
---|
20 | % of D by MAKESYM and DISNORM (make average distance 1),
|
---|
21 | % similarities are first transformed into dissimilarities
|
---|
22 | % by d(i,j) = sqrt(d(i,j) + d(j,j) - d(i,j) - d(j,i))
|
---|
23 | %
|
---|
24 | % within_mean- average within class dissimilarity
|
---|
25 | % between_mean- average between class dissimilarity
|
---|
26 | % pe_mapping - Pseudo-Euclidean mapping as computed by PE_EM
|
---|
27 | % signature - 2 component vector with # of positive and negative
|
---|
28 | % eigenvalues obtained during the PE embedding
|
---|
29 | % eigenvalues- the eigenvalues obtained during the PE embedding,
|
---|
30 | % see PE_EM for their ranking
|
---|
31 | % nef - Negative Eigen Fraction (sum of absolute negative
|
---|
32 | % eigenvalues divided by sum of all absolute eigenvalues)
|
---|
33 | % ner - Negative Eigen Ratio (- largest negative eigenvalue
|
---|
34 | % divided by largest positive eigenvalue)
|
---|
35 | % trineq - fraction of triangle inequality violations
|
---|
36 | %
|
---|
37 | % the following characteristics refer to a set of five spaces:
|
---|
38 | % - Pseudo-Euclidean space based on a full embedding. Distances
|
---|
39 | % in this space are identical to D.
|
---|
40 | % - Associated space, the same vector spaces, but now treated as
|
---|
41 | % an Euclidean space
|
---|
42 | % - Positive space based on the positive eigenvalues only
|
---|
43 | % - Negative space based on the negative eigenvalues only
|
---|
44 | % - Corrected space based on an embedding of sqrt(D.^2+2*Lmin)
|
---|
45 | % in which Lmin is the absolute values of the largest negative
|
---|
46 | % eigenvalue. The result is a proper Euclidean space.
|
---|
47 | %
|
---|
48 | % loo_a - leave-one-out nearest neighbor errors for all five
|
---|
49 | % embedded spaces
|
---|
50 | % loo_d - leave-one-out nearest neighbor errors for the dissimilarity
|
---|
51 | % spaces related to the above five embedded spaces
|
---|
52 | % lcurve_a - nearest neighbor learning curves for the five embedded
|
---|
53 | % spaces
|
---|
54 | % lcurve_a - nearest neighbor learning curves for the five dissimilarity
|
---|
55 | % spaces
|
---|
56 | % anames - names of the five embedded spaces, useful for annotation
|
---|
57 | % dnames - names of the five dissimilarity spaces
|
---|
58 | %
|
---|
59 | % Ntr (default 200) and Nsubeucl (default 50) control numbers of trials to
|
---|
60 | % estimate the fraction of triangle violations and the accuracy of the
|
---|
61 | % subeuclidean curves.
|
---|
62 |
|
---|
63 |
|
---|
64 | function [c,d] = chardmat(d,Ntr,Nsubeucl,makefigs)
|
---|
65 |
|
---|
66 | if nargin < 4, makefigs = 0; end
|
---|
67 | if nargin < 3 | isempty(Nsubeucl), Nsubeucl = 50; end
|
---|
68 | if nargin < 2 | isempty(Ntr), Ntr = 200; end
|
---|
69 |
|
---|
70 | isdataset(d);
|
---|
71 |
|
---|
72 | datname = getname(d);
|
---|
73 | discheck(d,[],1);
|
---|
74 | m = size(d,1);
|
---|
75 | nclass = getsize(d,3);
|
---|
76 |
|
---|
77 | c.name = datname;
|
---|
78 | c.desc = getuser(d,'desc');
|
---|
79 | c.link = getuser(d,'link');
|
---|
80 | c.ref = getuser(d,'ref');
|
---|
81 | c.asym = asymmetry(d);
|
---|
82 | c.size = m;
|
---|
83 | c.classes = nclass;
|
---|
84 | c.clsizes = classsizes(d);
|
---|
85 |
|
---|
86 | if discheck(d);
|
---|
87 | c.type = 'dis';
|
---|
88 | else
|
---|
89 | c.type = 'sim';
|
---|
90 | d = dissimt(d,'sim2dis');
|
---|
91 | end
|
---|
92 |
|
---|
93 | % we now have a dissimilarity matrix with positive distances
|
---|
94 |
|
---|
95 | d = makesym(d); % make it symmetric now
|
---|
96 | d = d*disnorm(d);
|
---|
97 |
|
---|
98 | uc = zeros(1,nclass);
|
---|
99 | for j=1:c.classes
|
---|
100 | nj = c.clsizes(j);
|
---|
101 | dj = +selcdat(d,j);
|
---|
102 | uc(j) = sum(dj(:))/(nj*(nj-1));
|
---|
103 | end
|
---|
104 | c.within_mean = uc*(c.clsizes'.^2-c.clsizes')/(sum(c.clsizes.^2) - m);
|
---|
105 |
|
---|
106 | ud = (m*(m-1) - uc*(c.clsizes'.^2-c.clsizes')) / (m*(m-1) - sum(c.clsizes.^2) + m);
|
---|
107 | c.between_mean = ud;
|
---|
108 |
|
---|
109 | [nef,ner,w] = checkeucl(d);
|
---|
110 | c.pe_mapping = w;
|
---|
111 | c.signature = getsig(w);
|
---|
112 | c.eigenvalues = getdata(w,'eval');
|
---|
113 | c.nef = nef;
|
---|
114 | c.ner = ner;
|
---|
115 | c.trineq = checktr(d,200);
|
---|
116 |
|
---|
117 | [A D] = disspaces(d,w);
|
---|
118 | nspaces = length(A);
|
---|
119 | c.loo_a = zeros(1,nspaces); % LOO NN errors embedding spaces
|
---|
120 | c.loo_d = zeros(1,nspaces); % LOO NN errors dis spaces
|
---|
121 | c.lcurve_a = cell(1,nspaces); % NN Learning curves embedding spaces
|
---|
122 | c.lcurve_d = cell(1,nspaces); % NN Learning curves dis spaces
|
---|
123 | c.anames = cell(1,nspaces); % names embedded spaces
|
---|
124 | c.dnames = cell(1,nspaces); % names dis spaces
|
---|
125 | t = sprintf('Compute %i learning curves: ',nspaces*2);
|
---|
126 | prwaitbar(nspaces*2,t);
|
---|
127 | for j=1:nspaces
|
---|
128 | c.loo_a(j) = nne(D{j});
|
---|
129 | ddj = distm(D{j});
|
---|
130 | c.loo_d(j) = nne(ddj);
|
---|
131 | prwaitbar(nspaces*2,j*2-1,[t int2str(j*2-1)]);
|
---|
132 | c.lcurve_a{j} = nnerr(D{j});
|
---|
133 | c.lcurve_a{j}.names = getname(A{j});
|
---|
134 | c.anames{j} = getname(A{j});
|
---|
135 | prwaitbar(nspaces*2,j*2,[t int2str(j*2)]);
|
---|
136 | c.lcurve_d{j} = nnerr(ddj);
|
---|
137 | c.lcurve_d{j}.names = getname(D{j});
|
---|
138 | c.dnames{j} = getname(D{j});
|
---|
139 | end
|
---|
140 | prwaitbar(0);
|
---|
141 |
|
---|
142 | if makefigs
|
---|
143 | make_figs(d,c);
|
---|
144 | else
|
---|
145 | show_figs(c,d,Nsubeucl)
|
---|
146 | end
|
---|
147 |
|
---|
148 | return
|
---|
149 |
|
---|
150 | function show_figs(c,d,Nsubeucl)
|
---|
151 |
|
---|
152 | if nargin < 3 | isempty(Nsubeucl)
|
---|
153 | Nsubeucl = 50;
|
---|
154 | end
|
---|
155 |
|
---|
156 | fonts = 12;
|
---|
157 | m = size(d,1);
|
---|
158 | nclass = getsize(d,3);
|
---|
159 | d
|
---|
160 | delfigs
|
---|
161 |
|
---|
162 | figure; imagesc(+d);
|
---|
163 | colormap gray
|
---|
164 | axis off;
|
---|
165 | axis square
|
---|
166 | title('Dissimilarity Matrix');
|
---|
167 | fontsize(fonts);
|
---|
168 |
|
---|
169 | figure; scatterd(d*c.pe_mapping(:,1:2));
|
---|
170 | title('Scatterplot on first two positive eigenvectors')
|
---|
171 | xlabel('Eigenvector 1');
|
---|
172 | ylabel('Eigenvector 2');
|
---|
173 | fontsize(fonts);
|
---|
174 |
|
---|
175 | figure; plotspectrum(c.eigenvalues);
|
---|
176 | fontsize(fonts);
|
---|
177 |
|
---|
178 | figure; plote([c.lcurve_a(1:4) c.lcurve_d(1)],[],char('k-','r-','b-','m-','k--'));
|
---|
179 | V = axis; V(2)= m+1; axis(V);
|
---|
180 | ticks = [1 10 100 1000];
|
---|
181 | ticks = ticks(ticks <= m);
|
---|
182 | set(gca,'xtick',ticks);
|
---|
183 | set(gca,'xticklabel',ticks);
|
---|
184 | fontsize(fonts);
|
---|
185 |
|
---|
186 | if isfield(c,'lcurve_d')
|
---|
187 | figure; plote([c.lcurve_d(1:4) c.lcurve_a(1)],[],char('k--','r--','b--','m--','k-'));
|
---|
188 | V = axis; V(2)= m+1; axis(V);
|
---|
189 | ticks = [1 10 100 1000];
|
---|
190 | ticks = ticks(ticks <= m);
|
---|
191 | set(gca,'xtick',ticks);
|
---|
192 | set(gca,'xticklabel',ticks);
|
---|
193 | fontsize(fonts);
|
---|
194 | end
|
---|
195 |
|
---|
196 | [nef,ner,N] = checkeucl(d,'all');
|
---|
197 | figure;
|
---|
198 | semilogx(N,nef);
|
---|
199 | linewidth(2); fontsize(fonts);
|
---|
200 | V = axis; V(2)= m+1; axis(V);
|
---|
201 | ticks = [1 10 100 1000];
|
---|
202 | ticks = ticks(ticks <= m);
|
---|
203 | set(gca,'xtick',ticks);
|
---|
204 | set(gca,'xticklabel',ticks);
|
---|
205 | xlabel('Subset size')
|
---|
206 | ylabel('Fraction')
|
---|
207 | title('Negative Eigen Fraction')
|
---|
208 |
|
---|
209 | figure; semilogx(N,ner);
|
---|
210 | linewidth(2); fontsize(fonts);
|
---|
211 | V = axis; V(2)= m+1; axis(V);
|
---|
212 | ticks = [1 10 100 1000];
|
---|
213 | ticks = ticks(ticks <= m);
|
---|
214 | set(gca,'xtick',ticks);
|
---|
215 | set(gca,'xticklabel',ticks);
|
---|
216 | xlabel('Subset size')
|
---|
217 | ylabel('Fraction')
|
---|
218 | title('Negative Eigen Ratio')
|
---|
219 |
|
---|
220 | figure;
|
---|
221 | nep = checksubeucl(d,Nsubeucl);
|
---|
222 | n = min(find(nep==1));
|
---|
223 | if isempty(n), n = 0; end
|
---|
224 | plot(nep(1:n+1));
|
---|
225 | linewidth(2); fontsize(fonts);
|
---|
226 | xlabel('Subset size')
|
---|
227 | ylabel('Fraction')
|
---|
228 | title('Fraction of Non-Euclidean Subsets')
|
---|
229 |
|
---|
230 | figure;
|
---|
231 | hist(+d(:),[0:0.05:ceil(max(+d(:)*20))/20]);
|
---|
232 | dc = zeros(sum(c.clsizes.^2),1);
|
---|
233 | n = 0;
|
---|
234 | for j=1:nclass
|
---|
235 | nn = c.clsizes(j)^2;
|
---|
236 | dj = +selcdat(d,j);
|
---|
237 | dc(n+1:n+nn) = dj(:);
|
---|
238 | n = n+nn;
|
---|
239 | end
|
---|
240 | hold on
|
---|
241 | hist(+dc(:),[0:0.05:ceil(max(+d(:)*20))/20]);
|
---|
242 | h = get(gca,'Children');
|
---|
243 | set(h(1),'facecolor',[1 0 0]);
|
---|
244 | V = axis;
|
---|
245 | axis([-0.1 max(+d(:)) 0 V(4)]);
|
---|
246 | fontsize(fonts)
|
---|
247 | legend('between class','within class');
|
---|
248 | title('Histogram of normalized distances')
|
---|
249 |
|
---|
250 | showfigs
|
---|
251 |
|
---|
252 |
|
---|
253 | %FSAVE Save current figure as eps and fig
|
---|
254 | %
|
---|
255 | % FSAVE <dir,fig_nane>
|
---|
256 |
|
---|
257 | function fsave(datdir,file)
|
---|
258 |
|
---|
259 | file = fullfile(datdir,file);
|
---|
260 | uns = get(gcf,'units');
|
---|
261 | pos = get(gcf,'position');
|
---|
262 | set(gcf,'units','pixels');
|
---|
263 | set(gcf,'position',[1 1 900 600]);
|
---|
264 |
|
---|
265 | exportfig(gcf,file,'format','eps','preview','tiff','color','cmyk')
|
---|
266 | %exportfig(gcf,file,'format','png','color','cmyk')
|
---|
267 | %exportfig(gcf,file,'format','jpeg100','color','cmyk')
|
---|
268 | saveas(gcf,file,'fig')
|
---|
269 |
|
---|
270 | set(gcf,'units',uns);
|
---|
271 | set(gcf,'position',pos);
|
---|
272 |
|
---|
273 | return
|
---|
274 |
|
---|