source: prdatasets/census.m @ 159

Last change on this file since 159 was 150, checked in by bduin, 5 years ago
File size: 1.5 KB
RevLine 
[142]1%CENSUS Weighted census income data
2%PRTools UCI dataset import, 199523+99762 objects, 41 mixed features, 2 classes
3%
4%  [TRAIN,TEST] = CENSUS(VAL)
5%   TRAIN_TEST  = CENSUS(VAL)
6%
7%DESCRIPTION
8%This command downloads one of the UCI data sets, converts it into PRTools
9%format and stores it locally for future use. Consult the <a href="http://archive.ics.uci.edu/ml/datasets/Census-Income+(KDD)">related website</a>
10%for further information. Please make the appropriate references in
11%publications that make use of this dataset.
12%
13%This dataset contains a number of categorical features with N > 2
14%categories. They may be converted to N new real features by CAT2REAL.
15%
16%Dataset has missing values. By default all objects with missing values are
17%removed. Use VAL=NaN to avoid this. For other options see MISVAL.
18%
[150]19%SEE ALSO <a href="http://prtools.tudelft.nl/prtools/">PRTools Guide</a>, <a href="http://archive.ics.uci.edu/ml/">UCI Website</a>
[142]20%PRTOOLS, DATASETS, SETFEATDOM, FEATTYPES, CAT2REAL, MISVAL,ADULT
21
[150]22% Copyright: R.P.W. Duin
[142]23
24function varargout = census(val)
25
26if nargin<1
27        val = 'remove';
28end
29
30varargout = cell(1,nargout);
31[varargout{:}] = pr_loadmatfile;
32if isempty(varargout{1})
33  opt.format = 'ncnncnccccccccccnnncccccncccccncccccncnnnc';
34  opt1.size  = 6;
35  opt2.size  = 3;
36  opt.dsetname = 'Census Income KDD';
37  % no matfiles found, create them
38  [varargout{:}] = pr_download_uci('Census-Income+(KDD)',{'census-income.data.gz','census-income.test.gz'},{opt1,opt2,opt});
39end
40varargout = varargout*misval(val);
41
42
43
Note: See TracBrowser for help on using the repository browser.