1 | %GENARCHE Generate classifier archetypical data
|
---|
2 | %
|
---|
3 | % A = GENARCHE(CLASSF,N,SEED,VERSION)
|
---|
4 | %
|
---|
5 | % INPUT
|
---|
6 | % CLASSF : Classifer (untrained or name in string)
|
---|
7 | % N : Number of objects per class, default [50 50]
|
---|
8 | % SEED : Initial state for random generator, default as it is
|
---|
9 | % VERSION: Version, default is most recent one
|
---|
10 | %
|
---|
11 | % OUTPUT
|
---|
12 | % A : Dataset
|
---|
13 |
|
---|
14 | function a = genarche(classf,n,seed,version)
|
---|
15 |
|
---|
16 | if nargin < 4, version = 0; end
|
---|
17 | if nargin < 3, seed = []; end
|
---|
18 | if nargin < 2, n = 50; end
|
---|
19 |
|
---|
20 | if ~isempty(seed)
|
---|
21 | rand('state',seed);
|
---|
22 | randn('state',seed);
|
---|
23 | end
|
---|
24 |
|
---|
25 | if ismapping(classf) & isuntrained(classf)
|
---|
26 | classname = getname(classf);
|
---|
27 | elseif isstr(classf)
|
---|
28 | classname = classf;
|
---|
29 | else
|
---|
30 | error('Classifier should be given by string or by untrained mapping')
|
---|
31 | end
|
---|
32 |
|
---|
33 | switch classname
|
---|
34 | case {'qdc','QDA'}
|
---|
35 | a = gendath([n n]);
|
---|
36 | a = a * [4 1; -1 4] ./ sqrt(17);
|
---|
37 | case {'udc','UDA'}
|
---|
38 | a = gendath([n n]);
|
---|
39 | case {'ldc','fisherc','LDA'}
|
---|
40 | a = gendatd([n n]);
|
---|
41 | case {'nmc','Nearest-Mean'}
|
---|
42 | a = gendats([n n]);
|
---|
43 | case {'treec','Dec-Tree'}
|
---|
44 | m = genclass(n,[2/3 1/3]);
|
---|
45 | a = [rand(n,1)*3+2 rand(n,1)*5];
|
---|
46 | a = [a; rand(m(1),1)*2 rand(m(1),1)*5; rand(m(2),1)*5 rand(m(2),1)+5];
|
---|
47 | a = dataset(a,genlab([n n]),'prior',0);
|
---|
48 | case {'LM-Neural-Net'}
|
---|
49 | m = genclass(n,[2/3 1/3]);
|
---|
50 | a = [rand(n,1)*3+2 rand(n,1)*5];
|
---|
51 | a = [a; rand(m(1),1)*2 rand(m(1),1)*5; rand(m(2),1)*5 rand(m(2),1)+5];
|
---|
52 | a = a*[1 1; -1 1];
|
---|
53 | a = dataset(a,genlab([n n]),'prior',0);
|
---|
54 | case {'lmnc','neurc'}
|
---|
55 | state1 = rand('state');
|
---|
56 | state2 = randn('state');
|
---|
57 | rand('state',0);
|
---|
58 | randn('state',0);
|
---|
59 | a = gendatb([200,200],0.5);
|
---|
60 | w = lmnc(a,5,40); % find a feasible LM network with 5 hu
|
---|
61 | rand('state',state1);
|
---|
62 | randn('state',state2);
|
---|
63 | b = gauss(4*n,[10 10],[9 0; 0 9]);
|
---|
64 | b = exp(b/5);
|
---|
65 | b = b - repmat(mean(b)+[1 1],4*n,1);
|
---|
66 | %b = gendatb([2*n 2*n],1); % generate a too large dataset
|
---|
67 | d = +(b*w); % classify
|
---|
68 | u = rand(4*n,1); % reverse labels at random according posteriors
|
---|
69 | J = find(u < min(+d,[],2));
|
---|
70 | [dd,lab0] = max(+d,[],2);
|
---|
71 | lab = lab0;
|
---|
72 | lab(J) = 3-lab0(J);
|
---|
73 | L1 = find(lab == 1);
|
---|
74 | L2 = find(lab == 2);
|
---|
75 | a = dataset([b(L1(1:n),:);b(L2(1:n),:)],genlab([n n]),'prior',0);
|
---|
76 | case {'parzenc','Parzen'}
|
---|
77 | a = gendatb([n n],2);
|
---|
78 | case {'knnc','K-NN'}
|
---|
79 | a = gendatb([n n],2);
|
---|
80 | a = exp(a/7);
|
---|
81 | case {'nnc','1nnc','spirals','1-NN'}
|
---|
82 | x = rand(n,1)*4*pi+0.25*pi;
|
---|
83 | a = [x.*x.*sin(x),x.*x.*cos(x)];
|
---|
84 | y = rand(n,1)*4*pi+0.25*pi;
|
---|
85 | b = [-y.*y.*sin(y),-y.*y.*cos(y)];
|
---|
86 | a = dataset([a;b],genlab([n n]),'prior',0);
|
---|
87 | case {'svc_nu'}
|
---|
88 | % make a linearly separable problem, where one of the classes has
|
---|
89 | % a terribly long moon-shape tail
|
---|
90 | n = round(n/2);
|
---|
91 | S = cat(3,5*eye(2),7*eye(2));
|
---|
92 | extrablob = gauss([n n],[-35 35; 15 0],S);
|
---|
93 | a = [gendatd([n n],2,3.5); extrablob];
|
---|
94 | case {'asym-linear','asym-linear4'}
|
---|
95 | %a = gendatd([2*n 2*n],2,3);
|
---|
96 | b = gendats([n n]);
|
---|
97 | b1 = seldat(b,1) + repmat([0 10],n,1);
|
---|
98 | b2 = seldat(b,2) + repmat([0 -10],n,1);
|
---|
99 | a = gendat([a; b1; b2],[n n]);
|
---|
100 | case {'asym-linear2'}
|
---|
101 | a1 = [rand(n,1) rand(n,1)*10];
|
---|
102 | a2 = [rand(20*n,1)*0.1-1 rand(20*n,1)*10];
|
---|
103 | b1 = [rand(n,1) rand(n,1)*10];
|
---|
104 | b2 = [rand(2*n,1)*100+1 rand(2*n,1)*10];
|
---|
105 | %b3 = [rand(18*n,1)+100 rand(18*n,1)*10];
|
---|
106 | a = [a1;a2;b1;b2]*[1 1;-1 1];
|
---|
107 | a = [a; +gauss(18*n,[40,80],(10*eye(2)))];
|
---|
108 | a = dataset(a,genlab([21*n 21*n]));
|
---|
109 | a = setprior(a,0);
|
---|
110 | a = gendat(a,[n n]);
|
---|
111 | case {'asym-linear3'}
|
---|
112 | a1 = [rand(n,1)*10 rand(n,1)*100];
|
---|
113 | b1 = [rand(n,1)*10+9 rand(n,1)*100+80];
|
---|
114 | a1 = dataset([a1;b1]*[1 1; -1 1],genlab([n n]));
|
---|
115 | b2 = gauss(n,[-100 150],10*eye(2));
|
---|
116 | a2 = gauss(n,[-50 25],10*eye(2));
|
---|
117 | a2 = dataset([a2;b2],genlab([n n]));
|
---|
118 | a = setprior([a1;a2],0);
|
---|
119 | a = gendat(a,[n n]);
|
---|
120 | case {'SVM-1','asym-linear4'}
|
---|
121 | a1 = [[rand(20*n,1)*5 rand(20*n,1)*100]; +gauss(2*n,[8,95],4*eye(2))];
|
---|
122 | b1 = [[rand(20*n,1)*5+11 rand(20*n,1)*100]; +gauss(2*n,[8,5],4*eye(2))];
|
---|
123 | a = [a1;b1]*[2 0; 0 1];
|
---|
124 | %a1 = dataset([a1;b1]*[1 1; -1 1],genlab([n n]));
|
---|
125 | a = dataset(a*[1 1; -1 1],genlab([22*n 22*n]));
|
---|
126 | %b2 = gauss(n,[-20 80],10*eye(2));
|
---|
127 | %a2 = gauss(n,[-90 80],10*eye(2));
|
---|
128 | %a2 = dataset([a2;b2],genlab([n n]));
|
---|
129 | a = setprior(a,0);
|
---|
130 | %a = setprior([a1;a2],0);
|
---|
131 | a = gendat(a,[n n]);
|
---|
132 | case {'Logistic','asym-linear5'}
|
---|
133 | a1 = [gauss(n,[5,50],[1 0; 0 100]); +gauss(n,[-3,25],4*eye(2))];
|
---|
134 | b1 = [gauss(n,[7.5,50],[1 0; 0 100]); +gauss(n,[16,75],4*eye(2))];
|
---|
135 | %a1 = dataset([a1;b1]*[1 1; -1 1],genlab([n n]));
|
---|
136 | a = dataset([a1;b1]*[1 1; -1 1],genlab([2*n 2*n]));
|
---|
137 | %b2 = gauss(n,[-20 80],10*eye(2));
|
---|
138 | %a2 = gauss(n,[-90 80],10*eye(2));
|
---|
139 | %a2 = dataset([a2;b2],genlab([n n]));
|
---|
140 | a = setprior(a,0);
|
---|
141 | %a = setprior([a1;a2],0);
|
---|
142 | a = gendat(a,[n n]);
|
---|
143 | case('line-plane')
|
---|
144 | a = [rand(n,1) 0.25*((rand(n,1)*2).^2)];
|
---|
145 | a = [a; rand(n,2)*[1 0; 0 0.01]+repmat([0 -0.01],n,1)]*[1 1; -1 1];
|
---|
146 | a = dataset(a,genlab([n n]));
|
---|
147 | case {'rbnc','RB-Neural-Net'}
|
---|
148 | a = gendatb([n n]);
|
---|
149 | case 'River'
|
---|
150 | a = genriver([n n],1,0.3);
|
---|
151 | case {'diamonds','Dis-Rep-LC'}
|
---|
152 | a = rand(n,2);
|
---|
153 | m = genclass(n,0.25*ones(1,4));
|
---|
154 | d = (sqrt(2)-1)/2;
|
---|
155 | b = [];
|
---|
156 | b = [b; randxy(m(1),[-d -d],[0 1])];
|
---|
157 | b = [b; randxy(m(2),[-d 1],[1 d+1])];
|
---|
158 | b = [b; randxy(m(3),[1 0],[1+d 1+d])];
|
---|
159 | b = [b; randxy(m(4),[0 -d],[1+d 0])];
|
---|
160 | a = dataset([a;b],genlab([n n]),'prior',0);
|
---|
161 | a = a*[1 1; 1 -1];
|
---|
162 | case {'rbsvc','RB-SVM'}
|
---|
163 | a = circ(n,1,0.5);
|
---|
164 | m = genclass(n,[2/3 1/3]);
|
---|
165 | b = circ(m(1),sqrt(1.5),1);
|
---|
166 | c = circ(m(2),0.5);
|
---|
167 | a = dataset([a;b;c],genlab([n n]),'prior',0);
|
---|
168 | case 'circ2'
|
---|
169 | a = circ(n,1,0.5);
|
---|
170 | m = genclass(n,[2/3 1/3]);
|
---|
171 | b = circ(m(1),sqrt(1.5),1);
|
---|
172 | c = circ(m(2),0.5);
|
---|
173 | a = dataset([a;b;c],genlab([n n]),'prior',0);
|
---|
174 | a(:,2) = 3*a(:,2);
|
---|
175 | a = a*[1 1; 1 -1];
|
---|
176 | case 'chess4'
|
---|
177 | ma = genclass(n,ones(1,8)/8);
|
---|
178 | mb = genclass(n,ones(1,8)/8);
|
---|
179 | a = randxy(ma(1),[0 0],[1 1]);
|
---|
180 | a = [a; randxy(ma(2),[2 0],[3 1])];
|
---|
181 | a = [a; randxy(ma(3),[1 1],[2 2])];
|
---|
182 | a = [a; randxy(ma(4),[3 1],[4 2])];
|
---|
183 | a = [a; randxy(ma(5),[0 2],[1 3])];
|
---|
184 | a = [a; randxy(ma(6),[2 2],[3 3])];
|
---|
185 | a = [a; randxy(ma(7),[1 3],[2 4])];
|
---|
186 | a = [a; randxy(ma(8),[3 3],[4 4])];
|
---|
187 | b = randxy(mb(1),[1 0],[2 1]);
|
---|
188 | b = [b; randxy(mb(2),[3 0],[4 1])];
|
---|
189 | b = [b; randxy(mb(3),[0 1],[1 2])];
|
---|
190 | b = [b; randxy(mb(4),[2 1],[3 2])];
|
---|
191 | b = [b; randxy(mb(5),[1 2],[2 3])];
|
---|
192 | b = [b; randxy(mb(6),[3 2],[4 3])];
|
---|
193 | b = [b; randxy(mb(7),[0 3],[1 4])];
|
---|
194 | b = [b; randxy(mb(8),[2 3],[3 4])];
|
---|
195 | a = dataset([a;b],genlab([n n]),'prior',0);
|
---|
196 | a = a*[1 1; 1 -1];
|
---|
197 | case 'chess41'
|
---|
198 | ma = genclass(n,ones(1,8)/8);
|
---|
199 | mb = genclass(n,ones(1,8)/8);
|
---|
200 | a = randxy(ma(1),[0 0],[1 1]);
|
---|
201 | a = [a; randxy(ma(2),[2 0],[3 1])];
|
---|
202 | a = [a; randxy(ma(3),[1 1],[2 2])];
|
---|
203 | a = [a; randxy(ma(4),[3 1],[4 2])];
|
---|
204 | a = [a; randxy(ma(5),[0 2],[1 3])];
|
---|
205 | a = [a; randxy(ma(6),[2 2],[3 3])];
|
---|
206 | a = [a; randxy(ma(7),[1 3],[2 4])];
|
---|
207 | a = [a; randxy(ma(8),[3 3],[4 4])];
|
---|
208 | b = randxy(mb(1),[1 0],[2 1]);
|
---|
209 | b = [b; randxy(mb(2),[3 0],[4 1])];
|
---|
210 | b = [b; randxy(mb(3),[0 1],[1 2])];
|
---|
211 | b = [b; randxy(mb(4),[2 1],[3 2])];
|
---|
212 | b = [b; randxy(mb(5),[1 2],[2 3])];
|
---|
213 | b = [b; randxy(mb(6),[3 2],[4 3])];
|
---|
214 | b = [b; randxy(mb(7),[0 3],[1 4])];
|
---|
215 | b = [b; randxy(mb(8),[2 3],[3 4])];
|
---|
216 | a = dataset([a;b],genlab([n n]),'prior',0);
|
---|
217 | a(:,2) = 3*a(:,2);
|
---|
218 | a = a*[1 1; 1 -1];
|
---|
219 | case {'Naive-Bayes'}
|
---|
220 | a = [[randn(n,1)/6-0.5; randn(n,1)/6+0.5] 2*rand(2*n,1)-1];
|
---|
221 | b = [2*rand(2*n,1)-1 [randn(n,1)/6-0.5; randn(n,1)/6+0.5]];
|
---|
222 | a = dataset([a;b],genlab([2*n 2*n]));
|
---|
223 | a = setprior(a,0);
|
---|
224 | a = gendat(a,[n n]);
|
---|
225 | otherwise
|
---|
226 | error(sprintf('%s is not implemented',classname))
|
---|
227 | end
|
---|
228 |
|
---|
229 | if ismapping(classf)
|
---|
230 | a = setname(a,[getname(classf) '']);
|
---|
231 | else
|
---|
232 | a = setname(a,classf);
|
---|
233 | end
|
---|
234 |
|
---|
235 | function r = randxy(n,x,y)
|
---|
236 |
|
---|
237 | if nargin < 1, n = 100; end
|
---|
238 | if nargin < 2, x = [0 0]; end
|
---|
239 | if nargin < 3, y = x + [1 1]; end
|
---|
240 |
|
---|
241 | r1 = rand(n,2) .* repmat(y-x,n,1);
|
---|
242 | r = r1 + repmat(x,n,1);
|
---|
243 |
|
---|
244 | function x = circ(n,r1,r2)
|
---|
245 |
|
---|
246 | if nargin < 3, r2 = 0; end
|
---|
247 | if nargin < 2, r1 = 1; end
|
---|
248 |
|
---|
249 | m = ceil(2*n*(r1*r1)/(r1*r1 - r2*r2));
|
---|
250 | x = rand(m,2) - repmat([0.5 0.5],m,1);
|
---|
251 | x = x*r1*2;
|
---|
252 | d = sqrt(sum(x.*x,2));
|
---|
253 | J = find(d < r1 & d > r2);
|
---|
254 | x = x(J(1:n),:);
|
---|
255 |
|
---|
256 | function x = gausst(n,u,s,t)
|
---|
257 | x = randn(n,1);
|
---|
258 | x = t*x.*exp(abs(t)*x);
|
---|
259 | x = x - mean(x) + u;
|
---|
260 | x = s * x ./ std(x);
|
---|
261 |
|
---|