[5] | 1 | %GENARCHE Generate classifier archetypical data
|
---|
| 2 | %
|
---|
| 3 | % A = GENARCHE(CLASSF,N,SEED,VERSION)
|
---|
| 4 | %
|
---|
| 5 | % INPUT
|
---|
| 6 | % CLASSF : Classifer (untrained or name in string)
|
---|
| 7 | % N : Number of objects per class, default [50 50]
|
---|
| 8 | % SEED : Initial state for random generator, default as it is
|
---|
| 9 | % VERSION: Version, default is most recent one
|
---|
| 10 | %
|
---|
| 11 | % OUTPUT
|
---|
| 12 | % A : Dataset
|
---|
| 13 |
|
---|
| 14 | function a = genarche(classf,n,seed,version)
|
---|
| 15 |
|
---|
| 16 | if nargin < 4, version = 0; end
|
---|
| 17 | if nargin < 3, seed = []; end
|
---|
| 18 | if nargin < 2, n = 50; end
|
---|
| 19 |
|
---|
| 20 | if ~isempty(seed)
|
---|
| 21 | rand('state',seed);
|
---|
| 22 | randn('state',seed);
|
---|
| 23 | end
|
---|
| 24 |
|
---|
| 25 | if ismapping(classf) & isuntrained(classf)
|
---|
| 26 | classname = getname(classf);
|
---|
| 27 | elseif isstr(classf)
|
---|
| 28 | classname = classf;
|
---|
| 29 | else
|
---|
| 30 | error('Classifier should be given by string or by untrained mapping')
|
---|
| 31 | end
|
---|
| 32 |
|
---|
| 33 | switch classname
|
---|
| 34 | case {'qdc','QDA'}
|
---|
| 35 | a = gendath([n n]);
|
---|
| 36 | a = a * [4 1; -1 4] ./ sqrt(17);
|
---|
| 37 | case {'udc','UDA'}
|
---|
| 38 | a = gendath([n n]);
|
---|
| 39 | case {'ldc','fisherc','LDA'}
|
---|
| 40 | a = gendatd([n n]);
|
---|
| 41 | case {'nmc','Nearest-Mean'}
|
---|
| 42 | a = gendats([n n]);
|
---|
| 43 | case {'treec','Dec-Tree'}
|
---|
| 44 | m = genclass(n,[2/3 1/3]);
|
---|
| 45 | a = [rand(n,1)*3+2 rand(n,1)*5];
|
---|
| 46 | a = [a; rand(m(1),1)*2 rand(m(1),1)*5; rand(m(2),1)*5 rand(m(2),1)+5];
|
---|
| 47 | a = dataset(a,genlab([n n]),'prior',0);
|
---|
| 48 | case {'LM-Neural-Net'}
|
---|
| 49 | m = genclass(n,[2/3 1/3]);
|
---|
| 50 | a = [rand(n,1)*3+2 rand(n,1)*5];
|
---|
| 51 | a = [a; rand(m(1),1)*2 rand(m(1),1)*5; rand(m(2),1)*5 rand(m(2),1)+5];
|
---|
| 52 | a = a*[1 1; -1 1];
|
---|
| 53 | a = dataset(a,genlab([n n]),'prior',0);
|
---|
| 54 | case {'lmnc','neurc'}
|
---|
| 55 | state1 = rand('state');
|
---|
| 56 | state2 = randn('state');
|
---|
| 57 | rand('state',0);
|
---|
| 58 | randn('state',0);
|
---|
| 59 | a = gendatb([200,200],0.5);
|
---|
| 60 | w = lmnc(a,5,40); % find a feasible LM network with 5 hu
|
---|
| 61 | rand('state',state1);
|
---|
| 62 | randn('state',state2);
|
---|
| 63 | b = gauss(4*n,[10 10],[9 0; 0 9]);
|
---|
| 64 | b = exp(b/5);
|
---|
| 65 | b = b - repmat(mean(b)+[1 1],4*n,1);
|
---|
| 66 | %b = gendatb([2*n 2*n],1); % generate a too large dataset
|
---|
| 67 | d = +(b*w); % classify
|
---|
| 68 | u = rand(4*n,1); % reverse labels at random according posteriors
|
---|
| 69 | J = find(u < min(+d,[],2));
|
---|
| 70 | [dd,lab0] = max(+d,[],2);
|
---|
| 71 | lab = lab0;
|
---|
| 72 | lab(J) = 3-lab0(J);
|
---|
| 73 | L1 = find(lab == 1);
|
---|
| 74 | L2 = find(lab == 2);
|
---|
| 75 | a = dataset([b(L1(1:n),:);b(L2(1:n),:)],genlab([n n]),'prior',0);
|
---|
| 76 | case {'parzenc','Parzen'}
|
---|
| 77 | a = gendatb([n n],2);
|
---|
| 78 | case {'knnc','K-NN'}
|
---|
| 79 | a = gendatb([n n],2);
|
---|
| 80 | a = exp(a/7);
|
---|
| 81 | case {'nnc','1nnc','spirals','1-NN'}
|
---|
| 82 | x = rand(n,1)*4*pi+0.25*pi;
|
---|
| 83 | a = [x.*x.*sin(x),x.*x.*cos(x)];
|
---|
| 84 | y = rand(n,1)*4*pi+0.25*pi;
|
---|
| 85 | b = [-y.*y.*sin(y),-y.*y.*cos(y)];
|
---|
| 86 | a = dataset([a;b],genlab([n n]),'prior',0);
|
---|
| 87 | case {'svc_nu'}
|
---|
| 88 | % make a linearly separable problem, where one of the classes has
|
---|
| 89 | % a terribly long moon-shape tail
|
---|
| 90 | n = round(n/2);
|
---|
| 91 | S = cat(3,5*eye(2),7*eye(2));
|
---|
| 92 | extrablob = gauss([n n],[-35 35; 15 0],S);
|
---|
| 93 | a = [gendatd([n n],2,3.5); extrablob];
|
---|
| 94 | case {'asym-linear','asym-linear4'}
|
---|
| 95 | %a = gendatd([2*n 2*n],2,3);
|
---|
| 96 | b = gendats([n n]);
|
---|
| 97 | b1 = seldat(b,1) + repmat([0 10],n,1);
|
---|
| 98 | b2 = seldat(b,2) + repmat([0 -10],n,1);
|
---|
| 99 | a = gendat([a; b1; b2],[n n]);
|
---|
| 100 | case {'asym-linear2'}
|
---|
| 101 | a1 = [rand(n,1) rand(n,1)*10];
|
---|
| 102 | a2 = [rand(20*n,1)*0.1-1 rand(20*n,1)*10];
|
---|
| 103 | b1 = [rand(n,1) rand(n,1)*10];
|
---|
| 104 | b2 = [rand(2*n,1)*100+1 rand(2*n,1)*10];
|
---|
| 105 | %b3 = [rand(18*n,1)+100 rand(18*n,1)*10];
|
---|
| 106 | a = [a1;a2;b1;b2]*[1 1;-1 1];
|
---|
| 107 | a = [a; +gauss(18*n,[40,80],(10*eye(2)))];
|
---|
| 108 | a = dataset(a,genlab([21*n 21*n]));
|
---|
| 109 | a = setprior(a,0);
|
---|
| 110 | a = gendat(a,[n n]);
|
---|
| 111 | case {'asym-linear3'}
|
---|
| 112 | a1 = [rand(n,1)*10 rand(n,1)*100];
|
---|
| 113 | b1 = [rand(n,1)*10+9 rand(n,1)*100+80];
|
---|
| 114 | a1 = dataset([a1;b1]*[1 1; -1 1],genlab([n n]));
|
---|
| 115 | b2 = gauss(n,[-100 150],10*eye(2));
|
---|
| 116 | a2 = gauss(n,[-50 25],10*eye(2));
|
---|
| 117 | a2 = dataset([a2;b2],genlab([n n]));
|
---|
| 118 | a = setprior([a1;a2],0);
|
---|
| 119 | a = gendat(a,[n n]);
|
---|
| 120 | case {'SVM-1','asym-linear4'}
|
---|
| 121 | a1 = [[rand(20*n,1)*5 rand(20*n,1)*100]; +gauss(2*n,[8,95],4*eye(2))];
|
---|
| 122 | b1 = [[rand(20*n,1)*5+11 rand(20*n,1)*100]; +gauss(2*n,[8,5],4*eye(2))];
|
---|
| 123 | a = [a1;b1]*[2 0; 0 1];
|
---|
| 124 | %a1 = dataset([a1;b1]*[1 1; -1 1],genlab([n n]));
|
---|
| 125 | a = dataset(a*[1 1; -1 1],genlab([22*n 22*n]));
|
---|
| 126 | %b2 = gauss(n,[-20 80],10*eye(2));
|
---|
| 127 | %a2 = gauss(n,[-90 80],10*eye(2));
|
---|
| 128 | %a2 = dataset([a2;b2],genlab([n n]));
|
---|
| 129 | a = setprior(a,0);
|
---|
| 130 | %a = setprior([a1;a2],0);
|
---|
| 131 | a = gendat(a,[n n]);
|
---|
| 132 | case {'Logistic','asym-linear5'}
|
---|
| 133 | a1 = [gauss(n,[5,50],[1 0; 0 100]); +gauss(n,[-3,25],4*eye(2))];
|
---|
| 134 | b1 = [gauss(n,[7.5,50],[1 0; 0 100]); +gauss(n,[16,75],4*eye(2))];
|
---|
| 135 | %a1 = dataset([a1;b1]*[1 1; -1 1],genlab([n n]));
|
---|
| 136 | a = dataset([a1;b1]*[1 1; -1 1],genlab([2*n 2*n]));
|
---|
| 137 | %b2 = gauss(n,[-20 80],10*eye(2));
|
---|
| 138 | %a2 = gauss(n,[-90 80],10*eye(2));
|
---|
| 139 | %a2 = dataset([a2;b2],genlab([n n]));
|
---|
| 140 | a = setprior(a,0);
|
---|
| 141 | %a = setprior([a1;a2],0);
|
---|
| 142 | a = gendat(a,[n n]);
|
---|
| 143 | case('line-plane')
|
---|
| 144 | a = [rand(n,1) 0.25*((rand(n,1)*2).^2)];
|
---|
| 145 | a = [a; rand(n,2)*[1 0; 0 0.01]+repmat([0 -0.01],n,1)]*[1 1; -1 1];
|
---|
| 146 | a = dataset(a,genlab([n n]));
|
---|
| 147 | case {'rbnc','RB-Neural-Net'}
|
---|
| 148 | a = gendatb([n n]);
|
---|
| 149 | case 'River'
|
---|
| 150 | a = genriver([n n],1,0.3);
|
---|
| 151 | case {'diamonds','Dis-Rep-LC'}
|
---|
| 152 | a = rand(n,2);
|
---|
| 153 | m = genclass(n,0.25*ones(1,4));
|
---|
| 154 | d = (sqrt(2)-1)/2;
|
---|
| 155 | b = [];
|
---|
| 156 | b = [b; randxy(m(1),[-d -d],[0 1])];
|
---|
| 157 | b = [b; randxy(m(2),[-d 1],[1 d+1])];
|
---|
| 158 | b = [b; randxy(m(3),[1 0],[1+d 1+d])];
|
---|
| 159 | b = [b; randxy(m(4),[0 -d],[1+d 0])];
|
---|
| 160 | a = dataset([a;b],genlab([n n]),'prior',0);
|
---|
| 161 | a = a*[1 1; 1 -1];
|
---|
| 162 | case {'rbsvc','RB-SVM'}
|
---|
| 163 | a = circ(n,1,0.5);
|
---|
| 164 | m = genclass(n,[2/3 1/3]);
|
---|
| 165 | b = circ(m(1),sqrt(1.5),1);
|
---|
| 166 | c = circ(m(2),0.5);
|
---|
| 167 | a = dataset([a;b;c],genlab([n n]),'prior',0);
|
---|
| 168 | case 'circ2'
|
---|
| 169 | a = circ(n,1,0.5);
|
---|
| 170 | m = genclass(n,[2/3 1/3]);
|
---|
| 171 | b = circ(m(1),sqrt(1.5),1);
|
---|
| 172 | c = circ(m(2),0.5);
|
---|
| 173 | a = dataset([a;b;c],genlab([n n]),'prior',0);
|
---|
| 174 | a(:,2) = 3*a(:,2);
|
---|
| 175 | a = a*[1 1; 1 -1];
|
---|
| 176 | case 'chess4'
|
---|
| 177 | ma = genclass(n,ones(1,8)/8);
|
---|
| 178 | mb = genclass(n,ones(1,8)/8);
|
---|
| 179 | a = randxy(ma(1),[0 0],[1 1]);
|
---|
| 180 | a = [a; randxy(ma(2),[2 0],[3 1])];
|
---|
| 181 | a = [a; randxy(ma(3),[1 1],[2 2])];
|
---|
| 182 | a = [a; randxy(ma(4),[3 1],[4 2])];
|
---|
| 183 | a = [a; randxy(ma(5),[0 2],[1 3])];
|
---|
| 184 | a = [a; randxy(ma(6),[2 2],[3 3])];
|
---|
| 185 | a = [a; randxy(ma(7),[1 3],[2 4])];
|
---|
| 186 | a = [a; randxy(ma(8),[3 3],[4 4])];
|
---|
| 187 | b = randxy(mb(1),[1 0],[2 1]);
|
---|
| 188 | b = [b; randxy(mb(2),[3 0],[4 1])];
|
---|
| 189 | b = [b; randxy(mb(3),[0 1],[1 2])];
|
---|
| 190 | b = [b; randxy(mb(4),[2 1],[3 2])];
|
---|
| 191 | b = [b; randxy(mb(5),[1 2],[2 3])];
|
---|
| 192 | b = [b; randxy(mb(6),[3 2],[4 3])];
|
---|
| 193 | b = [b; randxy(mb(7),[0 3],[1 4])];
|
---|
| 194 | b = [b; randxy(mb(8),[2 3],[3 4])];
|
---|
| 195 | a = dataset([a;b],genlab([n n]),'prior',0);
|
---|
| 196 | a = a*[1 1; 1 -1];
|
---|
| 197 | case 'chess41'
|
---|
| 198 | ma = genclass(n,ones(1,8)/8);
|
---|
| 199 | mb = genclass(n,ones(1,8)/8);
|
---|
| 200 | a = randxy(ma(1),[0 0],[1 1]);
|
---|
| 201 | a = [a; randxy(ma(2),[2 0],[3 1])];
|
---|
| 202 | a = [a; randxy(ma(3),[1 1],[2 2])];
|
---|
| 203 | a = [a; randxy(ma(4),[3 1],[4 2])];
|
---|
| 204 | a = [a; randxy(ma(5),[0 2],[1 3])];
|
---|
| 205 | a = [a; randxy(ma(6),[2 2],[3 3])];
|
---|
| 206 | a = [a; randxy(ma(7),[1 3],[2 4])];
|
---|
| 207 | a = [a; randxy(ma(8),[3 3],[4 4])];
|
---|
| 208 | b = randxy(mb(1),[1 0],[2 1]);
|
---|
| 209 | b = [b; randxy(mb(2),[3 0],[4 1])];
|
---|
| 210 | b = [b; randxy(mb(3),[0 1],[1 2])];
|
---|
| 211 | b = [b; randxy(mb(4),[2 1],[3 2])];
|
---|
| 212 | b = [b; randxy(mb(5),[1 2],[2 3])];
|
---|
| 213 | b = [b; randxy(mb(6),[3 2],[4 3])];
|
---|
| 214 | b = [b; randxy(mb(7),[0 3],[1 4])];
|
---|
| 215 | b = [b; randxy(mb(8),[2 3],[3 4])];
|
---|
| 216 | a = dataset([a;b],genlab([n n]),'prior',0);
|
---|
| 217 | a(:,2) = 3*a(:,2);
|
---|
| 218 | a = a*[1 1; 1 -1];
|
---|
| 219 | case {'Naive-Bayes'}
|
---|
| 220 | a = [[randn(n,1)/6-0.5; randn(n,1)/6+0.5] 2*rand(2*n,1)-1];
|
---|
| 221 | b = [2*rand(2*n,1)-1 [randn(n,1)/6-0.5; randn(n,1)/6+0.5]];
|
---|
| 222 | a = dataset([a;b],genlab([2*n 2*n]));
|
---|
| 223 | a = setprior(a,0);
|
---|
| 224 | a = gendat(a,[n n]);
|
---|
| 225 | otherwise
|
---|
| 226 | error(sprintf('%s is not implemented',classname))
|
---|
| 227 | end
|
---|
| 228 |
|
---|
| 229 | if ismapping(classf)
|
---|
| 230 | a = setname(a,[getname(classf) '']);
|
---|
| 231 | else
|
---|
| 232 | a = setname(a,classf);
|
---|
| 233 | end
|
---|
| 234 |
|
---|
| 235 | function r = randxy(n,x,y)
|
---|
| 236 |
|
---|
| 237 | if nargin < 1, n = 100; end
|
---|
| 238 | if nargin < 2, x = [0 0]; end
|
---|
| 239 | if nargin < 3, y = x + [1 1]; end
|
---|
| 240 |
|
---|
| 241 | r1 = rand(n,2) .* repmat(y-x,n,1);
|
---|
| 242 | r = r1 + repmat(x,n,1);
|
---|
| 243 |
|
---|
| 244 | function x = circ(n,r1,r2)
|
---|
| 245 |
|
---|
| 246 | if nargin < 3, r2 = 0; end
|
---|
| 247 | if nargin < 2, r1 = 1; end
|
---|
| 248 |
|
---|
| 249 | m = ceil(2*n*(r1*r1)/(r1*r1 - r2*r2));
|
---|
| 250 | x = rand(m,2) - repmat([0.5 0.5],m,1);
|
---|
| 251 | x = x*r1*2;
|
---|
| 252 | d = sqrt(sum(x.*x,2));
|
---|
| 253 | J = find(d < r1 & d > r2);
|
---|
| 254 | x = x(J(1:n),:);
|
---|
| 255 |
|
---|
| 256 | function x = gausst(n,u,s,t)
|
---|
| 257 | x = randn(n,1);
|
---|
| 258 | x = t*x.*exp(abs(t)*x);
|
---|
| 259 | x = x - mean(x) + u;
|
---|
| 260 | x = s * x ./ std(x);
|
---|
| 261 |
|
---|