%BREAST 699 objects with 9 features in 2 classes
%
% X = BREAST
%
% Breast cancer Wisconsin dataset obtained from the University of Wisconsin
% Hospitals, Madison from Dr. William H. Wolberg.
%
% REFERENCE
% O. L. Mangasarian and W. H. Wolberg: "Cancer diagnosis via linear
% programming", SIAM News, Volume 23, Number 5, September 1990, pp 1 & 18.
%
% X = BREAST(VAL)
%
% By default objects with missing values are removed. When something else
% is desired, use one of the options in MISVAL for Val.
%
% SEE ALSO PRTools Guide, UCI Website
% PRTOOLS, DATASETS, MISVAL
% Copyright: R.P.W. Duin
function x = breast(val)
if nargin < 1, val = 'remove'; end
%prdatasets(mfilename,1,'http://prtools.tudelft.nl/prdatasets/breastorg.dat');
a = pr_getdata('http://prtools.tudelft.nl/prdatasets/breastorg.dat',1);
user.desc='The original database of the Wisconsin Breast Cancer Databases from UCI, containing 699 instances, collected between 1989 and 1991. ';
user.link = 'ftp://ftp.ics.uci.edu/pub/machine-learning-databases/breast-cancer-wisconsin/';
cl = {'benign' 'malignant'};
fl = {'Clump Thickness' 'Uniformity of Cell Size' ...
'Uniformity of Cell Shape' 'Marginal Adhesion' ...
'Single Epithelial Cell Size' 'Bare Nuclei' 'Bland Chromatin' ...
'Normal Nucleoli' 'Mitoses'};
%a = load('breastorg.dat'); % Octave cannot find it
%a = load(fullfile(fileparts(which(mfilename)),'breastorg.dat'));
J = find(a==-1);
a(J) = NaN;
nlab = a(:,end)/2; % the labels for the classes are (2,4), very strange
x = pr_dataset(a(:,2:(end-1)), cl(nlab) );
x = setfeatlab(x,fl);
x = setname(x,'Breast Wisconsin');
x = misval(x,val);
x = setuser(x,user);
return