source: prdatasets/breast.m @ 80

Last change on this file since 80 was 80, checked in by dtax, 11 years ago

Creator files for prtools datasets.

File size: 1.4 KB
Line 
1%BREAST 699 objects with 9 features in 2 classes
2%
3%       X = BREAST;
4%
5% Breast cancer Wisconsin dataset obtained from the University of Wisconsin
6% Hospitals, Madison from Dr. William H. Wolberg.
7%
8% REFERENCE
9% O. L. Mangasarian and W. H. Wolberg: "Cancer diagnosis via linear
10% programming", SIAM News, Volume 23, Number 5, September 1990, pp 1 & 18.
11%
12%       X = BREAST(VAL);
13%
14% Per default the missing values are replaced by -1. When you want to
15% do something else, use one of the options in missingvalues.m.
16function x = breast(val)
17
18prdatasets(mfilename,1,'http://prtools.org/prdatasets/breastorg.dat');
19if nargin<1
20        val = -1;
21end
22
23user.desc='The original database of the Wisconsin Breast Cancer Databases from UCI, containing 699 instances, collected between 1989 and 1991. ';
24user.link = 'ftp://ftp.ics.uci.edu/pub/machine-learning-databases/breast-cancer-wisconsin/';
25cl = {'benign' 'malignant'};
26fl = {'Clump Thickness' 'Uniformity of Cell Size' ...
27'Uniformity of Cell Shape' 'Marginal Adhesion' ...
28'Single Epithelial Cell Size' 'Bare Nuclei' 'Bland Chromatin' ...
29'Normal Nucleoli' 'Mitoses'};
30
31a = load('breastorg.dat');
32J = find(a==-1);
33a(J) = NaN;
34nlab = a(:,end)/2;   % the labels for the classes are (2,4), very strange
35x = prdataset(a(:,2:(end-1)), cl(nlab) );
36x = setfeatlab(x,fl);
37x = setname(x,'Breast Wisconsin');
38[x,msg] = prmissingvalues(x,val);
39user.desc = [user.desc msg];
40x = setuser(x,user);
41
42return
Note: See TracBrowser for help on using the repository browser.