Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: prextra/createA.m @ 5

Last change on this file since 5 was 5, checked in by bduin, 14 years ago

File size: 5.0 KB

Line
1	function [A,Nxi,A2] = createA(X,y,rtype,par,seed)
2	% [A,Nxi,A2] = CREATEA(X,Y,RTYPE,PAR,SEED)
3	%
4	% Create the data matrix containing all pairwise difference vectors in
5	% data matrix X (with their corresponding labels Y, -1/+1).
6	% Because the size of this data matrix can become huge (ALL pairwise
7	% difference vectors is a lot!), you can subsample it by choosing an
8	% appropriate RTYPE.
9	%
10	% RTYPE 'full' use all constraints
11	% 'subs' randomly subsample PAR constraints
12	% 'subk' randomly subsample a fraction PAR of the constraints
13	% 'knn' use the PAR nearest neighbors in the other class
14	% 'xval' subsample and use remaining constraints to optimize C
15	% 'xvalk' subsample a fraction k*n and use remaining constraints
16	% to optimize C
17	% 'kmeans' use k-means clustering with k=PAR
18	% 'randk' subsample objects to get PAR*(Npos+Nneg) constraints
19	%
20	% The SEED is optional, it is the seed for the random sampling.
21	%
22	if nargin<5
23	seed = [];
24	end
25	% If a seed is defined, set it:
26	if ~isempty(seed)
27	rand('state',seed);
28	end
29
30	A2 = [];
31	%---create A for optauc
32
33	k = size(X,2);
34
35	% compute how many xi-s we expect:
36	Ineg = find(y==-1);
37	Ipos = find(y==+1);
38	Nneg = length(Ineg);
39	Npos = length(Ipos);
40
41	% depending on the reduction type
42	switch rtype
43	case 'full' % take all the possibilities:
44	Nxi = Nneg*Npos;
45	A = zeros(Nxi,k);
46	% run over all possibilities:
47	dummyk=0;
48	for i=1:Nneg
49	for j=1:Npos
50	dummyk = dummyk+1;
51	A(dummyk,:) = X(Ineg(i),:)-X(Ipos(j),:);
52	end
53	end
54	case 'subk' % subsample the possibilities, but now not a fixed number,
55	%but k times the number of training objects:
56	Nxi = ceil(par*size(X,1));
57	A = zeros(Nxi,k);
58	Ip = floor(Npos*rand(Nxi,1))+1; Ip = Ip(1:Nxi);
59	In = floor(Nneg*rand(Nxi,1))+1; In = In(1:Nxi);
60	for i=1:Nxi
61	diffx = X(Ineg(In(i)),:) - X(Ipos(Ip(i)),:);
62	A(i,:) = diffx;
63	end
64	case 'subs' % subsample the possibilities:
65	Nxi = par;
66	A = zeros(Nxi,k);
67	Ip = floor(Npos*rand(Nxi,1))+1; Ip = Ip(1:Nxi);
68	In = floor(Nneg*rand(Nxi,1))+1; In = In(1:Nxi);
69	for i=1:Nxi
70	diffx = X(Ineg(In(i)),:) - X(Ipos(Ip(i)),:);
71	A(i,:) = diffx;
72	end
73	case 'knn' % only use the k nearest neighbors
74	Nxi = ceil((Nneg+Npos)*par);
75	A = zeros(Nxi,k);
76	% first process all the neg. examples:
77	D = sqeucldistm(X(Ineg,:),X(Ipos,:));
78	[dummy,I] = sort(D,2);
79	dummyk = 0;
80	for i=1:Nneg
81	for j=1:par
82	thispos = I(i,j);
83	diffx = X(Ineg(i),:)-X(Ipos(thispos),:);
84	dummyk = dummyk+1;
85	A(dummyk,:) = diffx;
86	end
87	end
88	% then to all the pos. examples:
89	D = D'; % (no need to recompute D)
90	[dummy,I] = sort(D,2);
91	for i=1:Npos
92	for j=1:par
93	thispos = I(i,j);
94	diffx = -X(Ipos(i),:)+X(Ineg(thispos),:);
95	dummyk = dummyk+1;
96	A(dummyk,:) = diffx;
97	end
98	end
99	case 'randk' % randomly chosen objs such that you have k(Npos+Nneg)
100	% constraints
101	q = sqrt(par(Npos+Nneg)/(NposNneg));
102	qpos = ceil(qNpos); qneg = ceil(qNneg);
103	Nxi = qpos*qneg;
104	A = zeros(Nxi,k);
105
106	% first select the neg. examples:
107	I = randperm(Nneg); In = Ineg(I(1:qneg));
108	% then select the pos. examples:
109	I = randperm(Npos); Ip = Ipos(I(1:qpos));
110	% run over all possibilities:
111	dummyk=0;
112	for i=1:qneg
113	for j=1:qpos
114	dummyk = dummyk+1;
115	A(dummyk,:) = X(In(i),:)-X(Ip(j),:);
116	end
117	end
118	case 'xval' % take all the possibilities and use part for testing:
119	Nxi = Nneg*Npos;
120	A = zeros(Nxi,k);
121	% run over all possibilities:
122	dummyk=0;
123	for i=1:Nneg
124	for j=1:Npos
125	diffx = X(Ineg(i),:)-X(Ipos(j),:);
126	dummyk = dummyk+1;
127	A(dummyk,:) = diffx;
128	end
129	end
130	% get part of data for constraints, the rest for evalation:
131	I = randperm(Nxi);
132	if par>=size(A,1)
133	warning(sprintf('More constraints requested than available (%d and %d)',par,size(A,1)));
134	disp('Now using half for training and testing');
135	par = ceil(size(A,1)/2);
136	end
137	% if data is really really huge, then subsample more...
138	Mega=100000;
139	if length(I)-par>Mega
140	A2 = A(I((par+1):(par+Mega)),:);
141	else
142	A2 = A(I((par+1):end),:);
143	end
144	A = A(I(1:par),:);
145	Nxi = par;
146	case 'xvalk' % take all the possibilities and use part for testing:
147	par = par*size(X,1);
148	Nxi = Nneg*Npos;
149	A = zeros(Nxi,k);
150	% run over all possibilities:
151	dummyk=0;
152	for i=1:Nneg
153	for j=1:Npos
154	diffx = X(Ineg(i),:)-X(Ipos(j),:);
155	dummyk = dummyk+1;
156	A(dummyk,:) = diffx;
157	end
158	end
159	% get part of data for constraints, the rest for evalation:
160	I = randperm(Nxi);
161	if par>=size(A,1)
162	warning(sprintf('More constraints requested than available (%d and %d)',par,size(A,1)));
163	disp('Now using half for training and testing');
164	par = ceil(size(A,1)/2);
165	end
166	% if data is really really huge, then subsample more...
167	Mega=100000;
168	if length(I)-par>Mega
169	A2 = A(I((par+1):(par+Mega)),:);
170	else
171	A2 = A(I((par+1):end),:);
172	end
173	A = A(I(1:par),:);
174	Nxi = par;
175	case 'kmeans'
176	wp = kmeans_dd(X(Ipos,:),0.1,par);
177	wn = kmeans_dd(X(Ineg,:),0.1,par);
178	Xp = wp.data.w;
179	Xn = wn.data.w;
180	Nxi = par*par;
181	A = zeros(Nxi,k);
182	dummyk=0;
183	for i=1:par
184	for j=1:par
185	diffx = Xn(i,:)-Xp(j,:);
186	dummyk = dummyk + 1;
187	A(dummyk,:) = diffx;
188	end
189	end
190	otherwise
191	error(sprintf('Type %s is not defined',rtype));
192	end
193
194	return

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: prextra/createA.m @ 5

Download in other formats: