1 | function [x,msg] = prmissingvalues(x,val) |
---|
2 | %PRMISSINGVALUES Fix the missing values in a dataset |
---|
3 | % |
---|
4 | % [X,MSG] = PRMISSINGVALUES(X,VAL) |
---|
5 | % |
---|
6 | % Fix the missing values (represented by NaN's) in dataset X. String MSG |
---|
7 | % gives a text message of what has been done. |
---|
8 | % |
---|
9 | % The following values VAL are possible: |
---|
10 | % 'remove' remove entries that contain missing values |
---|
11 | % 'mean' fill the entries with the mean of their |
---|
12 | % respective column |
---|
13 | % <value> fill the entries with a fixed constant |
---|
14 | % |
---|
15 | % See also datasets |
---|
16 | |
---|
17 | % Copyright: D.M.J. Tax, D.M.J.Tax@prtools.org |
---|
18 | % Faculty EWI, Delft University of Technology |
---|
19 | % P.O. Box 5031, 2600 GA Delft, The Netherlands |
---|
20 | |
---|
21 | % Where are the offenders? |
---|
22 | I = isnan(x); |
---|
23 | |
---|
24 | % If there are missing values, go: |
---|
25 | if any(I(:)) |
---|
26 | switch val |
---|
27 | case {'remove' 'delete'} |
---|
28 | I = find(sum(I,2)==0); |
---|
29 | x = x(I,:); |
---|
30 | msg = 'Entries with missing values have been removed.'; |
---|
31 | case 'mean' |
---|
32 | k = size(x,2); |
---|
33 | for i=1:k |
---|
34 | J = ~I(:,i); |
---|
35 | if any(I(:,i)) %is there a missing value in this feature? |
---|
36 | if ~any(J) |
---|
37 | error('Missing value cannot be filled: all values are NaN.'); |
---|
38 | end |
---|
39 | mn = mean(x(J,i)); |
---|
40 | x(find(I(:,i)),i) = mn; |
---|
41 | end |
---|
42 | end |
---|
43 | msg = 'Missing values have been replaced by the mean of the feature.'; |
---|
44 | otherwise |
---|
45 | if ~isa(val,'double') |
---|
46 | error('Missing values can only be filled by scalars.'); |
---|
47 | end |
---|
48 | x(I) = val; |
---|
49 | msg = sprintf('Missing values have been replaced by %f.',val); |
---|
50 | end |
---|
51 | end |
---|
52 | return |
---|