1 | %OPAM Orthogonal projection approach: selects "pure" features
|
---|
2 | %
|
---|
3 | %
|
---|
4 | % U = OPAM
|
---|
5 | % U = OPAM([], OPT)
|
---|
6 | %
|
---|
7 | % W = A*U
|
---|
8 | % [W, SEL_VAR_DIS, DIS] = MAP(A, U)
|
---|
9 | % [W, SEL_VAR_DIS, DIS] = OPAM(A, OPT)
|
---|
10 | %
|
---|
11 | % C = B*W
|
---|
12 | % C = MAP(B, W)
|
---|
13 | % C = OPAM(B, W)
|
---|
14 | %
|
---|
15 | %
|
---|
16 | % INPUT
|
---|
17 | % A [M -by- K] training dataset
|
---|
18 | % B [N -by- K] test dataset
|
---|
19 | %
|
---|
20 | % U [K -by- ?] untrained OPAM mapping
|
---|
21 | % W [K -by- K_SEL] trained OPAM feature selection mapping;
|
---|
22 | % indices of selected variables stored in W.DATA.IND
|
---|
23 | %
|
---|
24 | % OPT. optional OPA settings
|
---|
25 | % MAX_NUM the max number of variables to select, INF means selecting as many variables as possible,a
|
---|
26 | % until the rank of dataset is exausted; at least one variable will be always selected; by delault = INF
|
---|
27 | % EPS stopping criteion; algorithm stops ERR < EPS, such features are not included into the set; by default = 1e-2
|
---|
28 | % VERBOSE force/disable messages: PRPROGRESS(VERBOSE,['%s\n'],MSG) will be run;
|
---|
29 | % by default = []
|
---|
30 | %
|
---|
31 | % OUTPUT
|
---|
32 | % U [K -by- ?] untrained OPAM mapping
|
---|
33 | % W [K -by- K_SEL] trained OPAM feature selection mapping;
|
---|
34 | % indices of selected variables stored in W.DATA.IND
|
---|
35 | % SEL_VAR_IND [K_SEL -by- 1] vector of selected variables indices
|
---|
36 | % SEL_VAR_DIS [K_SEL -by- 2] two vectors of selected variables disssimilarities
|
---|
37 | % in the first column are absolute dissims, and in the second are relative dissim
|
---|
38 | % DIS [K_SEL -by- K -by- 2] two matrices which rows are dissimmilarity spectra;
|
---|
39 | % each row contains dissim of all variables at particular selection step;
|
---|
40 | % the first matrix contains abs dissim, the second one containes relative dissim
|
---|
41 | % ERR [K_SEL -by- 1] relative squared error of the data projection to the subspace spanned by the select features
|
---|
42 | % C [N -by- K_SEL] dataset, result of mapping execution on dataset B
|
---|
43 | %
|
---|
44 | % DESRIPTION
|
---|
45 | % Implementation of the OPA algorithm as described in
|
---|
46 | % "Orthogonal Projection Approach Applied to Peak Purity Asessment"
|
---|
47 | % by F. C. Snachez, J. Toft, B. van den Bogaert, D. L. Massart, Anla. Chem. 68, 1, 79-85, 1996
|
---|
48 | % (applied to the transposed data matrix)
|
---|
49 | %
|
---|
50 | % Absolute dissim measure abs_diss_(n+1) = ||dx||^2*det(Y_n^T Y_n)
|
---|
51 | % Relative dissim measure rel_diss_(n+1) = ||dx||^2
|
---|
52 | %
|
---|
53 | % Here, Y_n is the set of n selected features (columns), which are normalized to have a unit length, dx is the part of
|
---|
54 | % a feature x which is (part) orthogonal to span(Y). If n=0 (diss(1)) the dissimilarity to the mean feature is measured.
|
---|
55 | %
|
---|
56 | % SEE ALSO
|
---|
57 | % SIMPLISMAM, VARIMAXFM, SIMLISMAPS, OPAPS, VARIMAXOM, ALS, OPA
|
---|
58 |
|
---|
59 | % Copyright: S.Verzakov, s.verzakov@ewi.tudelft.nl
|
---|
60 | % Faculty EWI, Delft University of Technology
|
---|
61 | % P.O. Box 5031, 2600 GA Delft, The Netherlands
|
---|
62 |
|
---|
63 | % $Id: opam.m,v 1.7 2007/01/31 21:52:19 serguei Exp $
|
---|
64 |
|
---|
65 | function [w, sel_var_dis, dis, err] = opam(a,w)
|
---|
66 |
|
---|
67 | % No dataset given: return untrained mapping.
|
---|
68 | if (nargin < 1) | (isempty(a))
|
---|
69 | if nargin < 2
|
---|
70 | w = [];
|
---|
71 | end
|
---|
72 | w = mapping(mfilename,'untrained',{w});
|
---|
73 | w = setname(w,'OPA Mapping');
|
---|
74 | return
|
---|
75 | end
|
---|
76 |
|
---|
77 | isdataset(a); % Assert that A is a dataset.
|
---|
78 |
|
---|
79 | % training
|
---|
80 | if nargin < 2 | ~isa(w,'mapping')
|
---|
81 | if nargin < 2
|
---|
82 | w = [];
|
---|
83 | end
|
---|
84 |
|
---|
85 | [m,k] = getsize(a);
|
---|
86 |
|
---|
87 | [z, sel_var_ind, sel_var_dis, dis, err] = opa((+a)',w);
|
---|
88 | dis = permute(dis,[2 1 3]);
|
---|
89 |
|
---|
90 | w_data.ind = sel_var_ind;
|
---|
91 |
|
---|
92 | %labels = [num2str([1:length(sel_var_ind)]','%-1d'), num2str(sel_var_ind(:),'(%-1d)')];
|
---|
93 | labels = a.featlab(sel_var_ind,:);
|
---|
94 |
|
---|
95 | % Save all useful data
|
---|
96 | w = mapping(mfilename,'trained',w_data,labels,size(a,2),length(sel_var_ind));
|
---|
97 | w = setname(w,'OPA Feature Selection');
|
---|
98 |
|
---|
99 | % applying
|
---|
100 | else
|
---|
101 | w_data = +w; % Unpack the mapping.
|
---|
102 | %w = a*cmapm(w.size_in,w_data.ind);
|
---|
103 | a = a(:,w_data.ind);
|
---|
104 | %a.featlab = w.labels;
|
---|
105 | units = specunits(a);
|
---|
106 | a = remove_spectra_info(a);
|
---|
107 | if units
|
---|
108 | user = a.user;
|
---|
109 | user.type = 'spectra';
|
---|
110 | user.units = units;
|
---|
111 | a.user = user;
|
---|
112 | end
|
---|
113 | a.name = [a.name ': selected channels'];
|
---|
114 | w = a;
|
---|
115 | end
|
---|
116 |
|
---|
117 | return
|
---|
118 |
|
---|