source: prdatasets/pr_readdataset.m

Last change on this file was 137, checked in by bduin, 5 years ago
File size: 1.9 KB
Line 
1%PR_READDATASET Convert text file into PRTools dataset
2%
3%   A = PR_READDATASET(FILE,NHEAD,DELIM,MISVAL,FORMAT,NLAB)
4%
5%INPUT
6%  FILE       - filename
7%  NHEAD      - number of headerlines to be skipped, default 0
8%  DELIM      - delimiter characters, default ' '
9%  MISVAL     - character used for missing values, default '?'
10%  FORMAT     - format needed for interpreting feature types of columns.
11%               default is determined from first line, e.g. 'nncc' for two
12%               numeric and two categorical features, see SETFEATDOM and
13%               PR_CELL2DSET
14%  NLAB       - feature to be interpreted as class label, default [].
15%
16%OUTPUT
17%  A          - PRTools dataset
18%
19%SEE ALSO
20%DATASETS, SETFEATDOM, PR_CELL2DSET
21
22% Copyright: R.P.W. Duin
23
24function a = pr_readdataset(file,varargin)
25
26  [nhead,del,misval,form,flab] = setdefaults(varargin,0,' ','?',[],[]);
27
28  [fid,msg] = fopen(file);
29  if fid < 1
30        error(msg)
31  end
32  if isempty(form)        % if no format given ...
33    for j=1:nhead+1
34      s = fgetl(fid);     % derive it from the first nonheader line
35    end       
36    s = mytextscan(s,'c',del,0); % use all %s for time being
37    form = getform(s);    % convert fields to %n where appropriate
38    fseek(fid,0,-1);      % restart
39  end
40  c = mytextscan(fid,strrep(form,'n','s'),del,nhead);
41  a = pr_cell2dset(c,form,misval);
42  if ~isempty(flab)
43    a = feat2lab(a,flab);
44  end
45 
46return
47
48function s = mytextscan(fid,forms,del,nhead)
49  form = repmat('%%',1,numel(forms));
50  form(2:2:end) = forms;
51  forms = strrep(form,'c','s');
52  if del == ' '
53    s = textscan(fid,forms,'Headerlines',nhead);
54  else
55    s = textscan(fid,forms,'Delimiter',del,'Headerlines',nhead);
56  end
57  if ~ischar(fid)
58    fclose(fid);
59  end
60return
61
62function form = getform(s)
63  s = char(s{1});
64  form = repmat('n',1,size(s,1));
65  for j=1:size(s,1)
66    if ~isempty(regexp(s(j,:),'[^0-9+-.eE ]','once'))
67      form(j) = 'c';
68    end
69  end
70return
71
Note: See TracBrowser for help on using the repository browser.