1 | %CENSUS Weighted census income data
|
---|
2 | %PRTools UCI dataset import, 199523+99762 objects, 41 mixed features, 2 classes
|
---|
3 | %
|
---|
4 | % [TRAIN,TEST] = CENSUS(VAL)
|
---|
5 | % TRAIN_TEST = CENSUS(VAL)
|
---|
6 | %
|
---|
7 | %DESCRIPTION
|
---|
8 | %This command downloads one of the UCI data sets, converts it into PRTools
|
---|
9 | %format and stores it locally for future use. Consult the <a href="http://archive.ics.uci.edu/ml/datasets/Census-Income+(KDD)">related website</a>
|
---|
10 | %for further information. Please make the appropriate references in
|
---|
11 | %publications that make use of this dataset.
|
---|
12 | %
|
---|
13 | %This dataset contains a number of categorical features with N > 2
|
---|
14 | %categories. They may be converted to N new real features by CAT2REAL.
|
---|
15 | %
|
---|
16 | %Dataset has missing values. By default all objects with missing values are
|
---|
17 | %removed. Use VAL=NaN to avoid this. For other options see MISVAL.
|
---|
18 | %
|
---|
19 | %SEE ALSO <a href="http://prtools.tudelft.nl/prtools/">PRTools Guide</a>, <a href="http://archive.ics.uci.edu/ml/">UCI Website</a>
|
---|
20 | %PRTOOLS, DATASETS, SETFEATDOM, FEATTYPES, CAT2REAL, MISVAL,ADULT
|
---|
21 |
|
---|
22 | % Copyright: R.P.W. Duin
|
---|
23 |
|
---|
24 | function varargout = census(val)
|
---|
25 |
|
---|
26 | if nargin<1
|
---|
27 | val = 'remove';
|
---|
28 | end
|
---|
29 |
|
---|
30 | varargout = cell(1,nargout);
|
---|
31 | [varargout{:}] = pr_loadmatfile;
|
---|
32 | if isempty(varargout{1})
|
---|
33 | opt.format = 'ncnncnccccccccccnnncccccncccccncccccncnnnc';
|
---|
34 | opt1.size = 6;
|
---|
35 | opt2.size = 3;
|
---|
36 | opt.dsetname = 'Census Income KDD';
|
---|
37 | % no matfiles found, create them
|
---|
38 | [varargout{:}] = pr_download_uci('Census-Income+(KDD)',{'census-income.data.gz','census-income.test.gz'},{opt1,opt2,opt});
|
---|
39 | end
|
---|
40 | varargout = varargout*misval(val);
|
---|
41 |
|
---|
42 |
|
---|
43 |
|
---|