[80] | 1 | function [x,msg] = prmissingvalues(x,val) |
---|
| 2 | %PRMISSINGVALUES Fix the missing values in a dataset |
---|
| 3 | % |
---|
| 4 | % [X,MSG] = PRMISSINGVALUES(X,VAL) |
---|
| 5 | % |
---|
| 6 | % Fix the missing values (represented by NaN's) in dataset X. String MSG |
---|
| 7 | % gives a text message of what has been done. |
---|
| 8 | % |
---|
| 9 | % The following values VAL are possible: |
---|
| 10 | % 'remove' remove entries that contain missing values |
---|
| 11 | % 'mean' fill the entries with the mean of their |
---|
| 12 | % respective column |
---|
| 13 | % <value> fill the entries with a fixed constant |
---|
| 14 | % |
---|
| 15 | % See also datasets |
---|
| 16 | |
---|
| 17 | % Copyright: D.M.J. Tax, D.M.J.Tax@prtools.org |
---|
| 18 | % Faculty EWI, Delft University of Technology |
---|
| 19 | % P.O. Box 5031, 2600 GA Delft, The Netherlands |
---|
| 20 | |
---|
| 21 | % Where are the offenders? |
---|
| 22 | I = isnan(x); |
---|
| 23 | |
---|
| 24 | % If there are missing values, go: |
---|
| 25 | if any(I(:)) |
---|
| 26 | switch val |
---|
| 27 | case {'remove' 'delete'} |
---|
| 28 | I = find(sum(I,2)==0); |
---|
| 29 | x = x(I,:); |
---|
| 30 | msg = 'Entries with missing values have been removed.'; |
---|
| 31 | case 'mean' |
---|
| 32 | k = size(x,2); |
---|
| 33 | for i=1:k |
---|
| 34 | J = ~I(:,i); |
---|
| 35 | if any(I(:,i)) %is there a missing value in this feature? |
---|
| 36 | if ~any(J) |
---|
| 37 | error('Missing value cannot be filled: all values are NaN.'); |
---|
| 38 | end |
---|
| 39 | mn = mean(x(J,i)); |
---|
| 40 | x(find(I(:,i)),i) = mn; |
---|
| 41 | end |
---|
| 42 | end |
---|
| 43 | msg = 'Missing values have been replaced by the mean of the feature.'; |
---|
| 44 | otherwise |
---|
| 45 | if ~isa(val,'double') |
---|
| 46 | error('Missing values can only be filled by scalars.'); |
---|
| 47 | end |
---|
| 48 | x(I) = val; |
---|
| 49 | msg = sprintf('Missing values have been replaced by %f.',val); |
---|
| 50 | end |
---|
| 51 | end |
---|
| 52 | return |
---|