source: prdatasets/pr_readdataset.m @ 144

Last change on this file since 144 was 137, checked in by bduin, 5 years ago
File size: 1.9 KB
RevLine 
[97]1%PR_READDATASET Convert text file into PRTools dataset
[92]2%
[97]3%   A = PR_READDATASET(FILE,NHEAD,DELIM,MISVAL,FORMAT,NLAB)
[92]4%
[97]5%INPUT
6%  FILE       - filename
7%  NHEAD      - number of headerlines to be skipped, default 0
8%  DELIM      - delimiter characters, default ' '
9%  MISVAL     - character used for missing values, default '?'
10%  FORMAT     - format needed for interpreting feature types of columns.
11%               default is determined from first line, e.g. 'nncc' for two
12%               numeric and two categorical features, see SETFEATDOM and
[137]13%               PR_CELL2DSET
[97]14%  NLAB       - feature to be interpreted as class label, default [].
[92]15%
[97]16%OUTPUT
17%  A          - PRTools dataset
[92]18%
[97]19%SEE ALSO
[137]20%DATASETS, SETFEATDOM, PR_CELL2DSET
[92]21
[132]22% Copyright: R.P.W. Duin
[92]23
[97]24function a = pr_readdataset(file,varargin)
[92]25
[97]26  [nhead,del,misval,form,flab] = setdefaults(varargin,0,' ','?',[],[]);
[92]27
[97]28  [fid,msg] = fopen(file);
29  if fid < 1
30        error(msg)
31  end
32  if isempty(form)        % if no format given ...
33    for j=1:nhead+1
34      s = fgetl(fid);     % derive it from the first nonheader line
35    end       
36    s = mytextscan(s,'c',del,0); % use all %s for time being
37    form = getform(s);    % convert fields to %n where appropriate
38    fseek(fid,0,-1);      % restart
39  end
40  c = mytextscan(fid,strrep(form,'n','s'),del,nhead);
[137]41  a = pr_cell2dset(c,form,misval);
[97]42  if ~isempty(flab)
43    a = feat2lab(a,flab);
44  end
45 
46return
[92]47
[97]48function s = mytextscan(fid,forms,del,nhead)
49  form = repmat('%%',1,numel(forms));
50  form(2:2:end) = forms;
51  forms = strrep(form,'c','s');
52  if del == ' '
53    s = textscan(fid,forms,'Headerlines',nhead);
54  else
55    s = textscan(fid,forms,'Delimiter',del,'Headerlines',nhead);
56  end
[136]57  if ~ischar(fid)
[97]58    fclose(fid);
59  end
60return
[92]61
[97]62function form = getform(s)
63  s = char(s{1});
64  form = repmat('n',1,size(s,1));
65  for j=1:size(s,1)
66    if ~isempty(regexp(s(j,:),'[^0-9+-.eE ]','once'))
67      form(j) = 'c';
68    end
69  end
70return
71
Note: See TracBrowser for help on using the repository browser.