[136] | 1 | %PR_DOWNLOAD Load or download data and create dataset
|
---|
| 2 | %
|
---|
| 3 | % [A,NEW] = PR_DOWNLOAD(URL,DATFILE,OPTIONS)
|
---|
| 4 | %
|
---|
| 5 | % INPUT
|
---|
| 6 | % URL URL of character file to be downloaded
|
---|
| 7 | % DATFILE Desired name of downloaded and uncompressed file
|
---|
| 8 | % Default: name of the url-file, extended by .dat
|
---|
| 9 | % OPTIONS Structure with options used for parsing and constructing
|
---|
| 10 | % a PRTools dataset
|
---|
| 11 | %
|
---|
| 12 | % OUTPUT
|
---|
| 13 | % A Dataset
|
---|
| 14 | % NEW Logical, TRUE if a new dataset has been created, FALSE if an
|
---|
| 15 | % existing mat-file has been found and used.
|
---|
| 16 | %
|
---|
| 17 | % DESCRIPTION
|
---|
| 18 | % This routine facilitates downloading of character based datasets. DATFILE
|
---|
| 19 | % will be the name (or path with name) in which the URL is downloaded. If
|
---|
| 20 | % needed the URL file is unzipped and/or untarred first. After parsing a
|
---|
| 21 | % PRTools dataset is constructed, stored in a mat-file (optional) and
|
---|
| 22 | % returned. The name of the mat-file is DATFILE extended by .mat.
|
---|
| 23 | %
|
---|
| 24 | % The directory specified in DATFILE, or if not supplied, the directory and
|
---|
| 25 | % the name of the calling routine, will be used for storing files in a
|
---|
| 26 | % subdirectory 'data'. If the mat-file already exists it will be loaded and
|
---|
| 27 | % returned in A (no new download and parsing). If DATFILE already exists it
|
---|
| 28 | % will be used (no new download).
|
---|
| 29 | %
|
---|
| 30 | % OPTIONS should be a structure with the below fields, to be supplied in
|
---|
| 31 | % lower case. Missing fields are replaced by the given defaults.
|
---|
| 32 | %
|
---|
| 33 | % SIZE = []; Size of data to be downloaded, in MB. Not needed,
|
---|
| 34 | % just used to warn the user.
|
---|
| 35 | % PARSE = TRUE; If FALSE, parsing is skipped. Just downloading and
|
---|
| 36 | % uncompression. A will be empty.
|
---|
| 37 | % PARSEFUN = []; A handle of a user supplied parsing function. This
|
---|
| 38 | % function should operate on DATFILE (first parameter,
|
---|
| 39 | % substituted by PR_DOWNLOAD) and return a PRTools
|
---|
| 40 | % dataset. If PARSEFUN is not given, default parsing
|
---|
| 41 | % using PR_READDATASET will be used.
|
---|
| 42 | % PARSEPARS = {}; Cell array with additional parameters for PARSEFUN.
|
---|
| 43 | % FORMAT = []; Needed for default parsing, see PR_READDATASET.
|
---|
| 44 | % NHEADLINES = 0; Needed for default parsing, see PR_READDATASET.
|
---|
| 45 | % MISVALCHAR = '?'; Data characters to be replaced by NaN
|
---|
| 46 | % MISVALUE = []; Data values to be replaced by NaN
|
---|
| 47 | % DELIMETER = ' '; Needed for default parsing, see PR_READDATASET.
|
---|
| 48 | % EXTENSION = 'dat'; Extension to be used for downloaded DATFILE.
|
---|
| 49 | % MATFILE = TRUE; If FALSE, the dataset A will not be saved.
|
---|
| 50 | % LABFEAT = []; Feature found in DATFILE and to be used as class
|
---|
| 51 | % label, see FEAT2LAB.
|
---|
| 52 | % FEATS = []; Columns of dataset to be used ase features.
|
---|
| 53 | % FEATNAMES = []; Desired feature names of dataset A, see SETFEATLAB.
|
---|
| 54 | % CLASSNAMES = []; Class names to be stored in A, see SETLABLIST.
|
---|
| 55 | % USER = []; Additional information to be stored in the
|
---|
| 56 | % user-field of A, see SETUSER.
|
---|
| 57 | % LINK = []; Link for more information in the dataset.
|
---|
| 58 | % DESC = []; Short description of the dataset.
|
---|
| 59 | % DSETNAME = []; Desired name of the dataset A.
|
---|
| 60 | %
|
---|
| 61 | %
|
---|
| 62 | % EXAMPLE
|
---|
| 63 | % url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data';
|
---|
| 64 | % opt.extension = 'dat'; % create iris.dat
|
---|
| 65 | % opt.labfeat = 5; % use feature 5 for labeling
|
---|
| 66 | % opt.matfile = false; % don't create a mat-file
|
---|
| 67 | % c = pr_download(url,[],opt) % load Iris dataset from UCI and parse
|
---|
| 68 | %
|
---|
| 69 | % SEE ALSO
|
---|
| 70 | % DATASETS, SETFEATDOM, GETFEATDOM, FEAT2LAB
|
---|
| 71 |
|
---|
| 72 | % Copyright: R.P.W. Duin
|
---|
| 73 |
|
---|
| 74 | %%
|
---|
| 75 | function [a,new] = pr_download(url,datname,varargin)
|
---|
| 76 |
|
---|
[153] | 77 | %% make sur there is a data subdir
|
---|
| 78 | persistent DATADIREXISTS
|
---|
| 79 | if isempty(DATADIREXISTS)
|
---|
| 80 | datasubdir = fullfile(fileparts(which(mfilename)),'data');
|
---|
| 81 | if exist(datasubdir,'dir') ~= 7
|
---|
| 82 | mkdir(datasubdir);
|
---|
| 83 | end
|
---|
| 84 | DATADIREXISTS = true;
|
---|
| 85 | end
|
---|
| 86 |
|
---|
| 87 | %%
|
---|
[136] | 88 | if nargin >= 3
|
---|
| 89 | % this can be removed when all mfiles in prdatasets call the new version
|
---|
| 90 | % of pr_download_uci
|
---|
| 91 | if ~isstruct(varargin{1}) && ~isempty(varargin{1}) && isnumeric(varargin{1})
|
---|
| 92 | [a,new] = pr_download_old(url,datname,varargin{:});
|
---|
| 93 | return
|
---|
| 94 | else
|
---|
| 95 | opt = varargin{1};
|
---|
| 96 | end
|
---|
| 97 | end
|
---|
| 98 |
|
---|
| 99 | if nargin < 3, opt = []; end
|
---|
| 100 | if nargin < 2, datname = []; end
|
---|
| 101 |
|
---|
| 102 | opt = download_opt(opt); % set defaults where necessary
|
---|
| 103 |
|
---|
| 104 | %% find directory to be used
|
---|
| 105 | if isempty(datname)
|
---|
[137] | 106 | datname = pr_callername;
|
---|
[136] | 107 | dirname = fullfile(fileparts(which(datname)),'data');
|
---|
| 108 | else
|
---|
| 109 | [dirname,datname] = fileparts(datname);
|
---|
| 110 | end
|
---|
| 111 |
|
---|
| 112 | %% set all necessary filenames
|
---|
| 113 | [~,urlname,urlext] = fileparts(url);
|
---|
| 114 | if isempty(datname)
|
---|
| 115 | % will only be empty if called from command line
|
---|
| 116 | datname = urlname;
|
---|
| 117 | dirname = pwd;
|
---|
| 118 | end
|
---|
| 119 | urlname = [urlname urlext]; % name of file to be downloaded
|
---|
| 120 | matname = [datname '.mat']; % name of mat-file to be created
|
---|
| 121 | datname = [datname '.' opt.extension]; % name of datfile to be created
|
---|
| 122 | urlfile = fullfile(dirname,urlname); % temp file for download
|
---|
| 123 | datfile = fullfile(dirname,datname); % unpacked urlfile
|
---|
| 124 | matfile = fullfile(dirname,matname); % final matfile
|
---|
| 125 |
|
---|
| 126 | %% load mat-file if it exist
|
---|
| 127 | new = false;
|
---|
| 128 | if exist(matfile,'file') == 2
|
---|
| 129 | s = prload(matfile);
|
---|
| 130 | f = fieldnames(s);
|
---|
| 131 | % a = getfield(s,f{1});
|
---|
| 132 | a = s.(f{1});
|
---|
| 133 | return % we are done!!
|
---|
| 134 | end
|
---|
| 135 |
|
---|
| 136 | %% download the data file if it doesn't exist
|
---|
| 137 | if exist(datfile,'file') ~= 2 % if datfile does not exist ...
|
---|
| 138 | ask_download(urlname,opt.size);
|
---|
| 139 |
|
---|
| 140 | if ~usejava('jvm') && isunix
|
---|
| 141 | stat = unix(['wget -q -O ' urlfile ' ' url]);
|
---|
| 142 | status = (stat == 0);
|
---|
| 143 | else
|
---|
| 144 | [~,status] = urlwrite(url,urlfile);
|
---|
| 145 | end
|
---|
| 146 | if status == 0
|
---|
| 147 | error(['Server unreachable or file not found: ' url])
|
---|
| 148 | end
|
---|
| 149 |
|
---|
| 150 | % assume file is created, uncompress if needed
|
---|
| 151 | % delete compressed file
|
---|
| 152 | if strcmp(urlext,'.zip')
|
---|
| 153 | disp('Decompression ....')
|
---|
| 154 | if ~usejava('jvm') && isunix
|
---|
| 155 | unix(['unzip ' urlfile ' -d ' datfile]);
|
---|
| 156 | else
|
---|
| 157 | unzip(urlfile,datfile);
|
---|
| 158 | end
|
---|
| 159 | elseif strcmp(urlext,'.gz')
|
---|
| 160 | disp('Decompression ....')
|
---|
| 161 | gunzip(urlfile,datfile);
|
---|
| 162 | elseif strcmp(urlext,'.tar') || strcmp(urlext,'.tgz') || strcmp(urlext,'.tar.gz')
|
---|
| 163 | disp('Decompression ....')
|
---|
| 164 | untar(urlfile,datfile);
|
---|
| 165 | elseif ~strcmp(urlfile,datfile)
|
---|
| 166 | copyfile(urlfile,datfile)
|
---|
| 167 | end
|
---|
| 168 | if exist(datfile,'dir') == 7
|
---|
| 169 | dirn = dir(datfile);
|
---|
| 170 | copyfile(fullfile(datfile,dirn(3).name),[datfile 'tmp']);
|
---|
| 171 | delete([datfile '/*']);
|
---|
| 172 | rmdir(datfile);
|
---|
| 173 | copyfile([datfile 'tmp'],datfile);
|
---|
| 174 | delete([datfile 'tmp']);
|
---|
| 175 | end
|
---|
| 176 | if ~strcmp(urlfile,datfile)
|
---|
| 177 | delete(urlfile);
|
---|
| 178 | end
|
---|
| 179 | end
|
---|
| 180 |
|
---|
| 181 | if ~opt.parse
|
---|
| 182 | % no parsing desired, we are done
|
---|
| 183 | return
|
---|
| 184 | end
|
---|
| 185 |
|
---|
| 186 | %% datfile should now be there, read and convert to dataset
|
---|
| 187 | disp('Parsing ...')
|
---|
| 188 | if isempty(opt.parsefun)
|
---|
| 189 | a = pr_readdataset(datfile,opt.nheadlines,opt.delimeter, ...
|
---|
| 190 | opt.misvalchar,opt.format);
|
---|
| 191 | else
|
---|
| 192 | % user defined parsing
|
---|
| 193 | a = opt.parsefun(datfile,opt.parsepars{:});
|
---|
| 194 | end
|
---|
| 195 |
|
---|
| 196 | %% set dataset fields
|
---|
| 197 | if ~isempty(opt.labfeat) && opt.labfeat > 0
|
---|
| 198 | a = feat2lab(a,opt.labfeat);
|
---|
| 199 | end
|
---|
| 200 | if ~isempty(opt.classnames)
|
---|
| 201 | a = setlablist(a,opt.classnames);
|
---|
| 202 | end
|
---|
| 203 | if ~isempty(opt.feats)
|
---|
| 204 | a = a(:,opt.feats);
|
---|
| 205 | end
|
---|
| 206 | if ~isempty(opt.featnames)
|
---|
| 207 | a = setfeatlab(a,opt.featnames);
|
---|
| 208 | end
|
---|
| 209 | if ~isempty(opt.misvalue)
|
---|
| 210 | J = find(a==opt.misvalue);
|
---|
| 211 | a(J) = NaN;
|
---|
| 212 | end
|
---|
| 213 | if ~isempty(opt.user)
|
---|
| 214 | a = setuser(a,opt.user);
|
---|
| 215 | end
|
---|
| 216 | if ~isempty(opt.link)
|
---|
| 217 | a = setuser(a,opt.link,'link');
|
---|
| 218 | end
|
---|
| 219 | if ~isempty(opt.desc)
|
---|
| 220 | a = setuser(a,opt.desc,'desc');
|
---|
| 221 | end
|
---|
| 222 | if ~isempty(opt.dsetname)
|
---|
| 223 | a = setname(a,opt.dsetname);
|
---|
| 224 | else
|
---|
[137] | 225 | a = setname(a,pr_callername);
|
---|
[136] | 226 | end
|
---|
| 227 |
|
---|
| 228 | %% save if desired
|
---|
| 229 | if opt.matfile
|
---|
| 230 | save(matfile,'a');
|
---|
| 231 | new = true;
|
---|
| 232 | end
|
---|
| 233 |
|
---|
| 234 | return
|
---|
| 235 |
|
---|
| 236 |
|
---|
[137] | 237 | function ask_download(urlname,datsize)
|
---|
[136] | 238 | %% user controlled downloading
|
---|
| 239 | global ASK
|
---|
| 240 |
|
---|
[154] | 241 | if ASK && ~isempty(datsize) && datsize > 1 % ask only if datsize has been set
|
---|
| 242 | siz = ['(' num2str(datsize) ' MB)'];
|
---|
[136] | 243 | q = input(['Dataset is not available, OK to download ' siz ' [y]/n ?'],'s');
|
---|
| 244 | if ~isempty(q) && ~strcmp(q,'y')
|
---|
| 245 | error('No dataset')
|
---|
| 246 | end
|
---|
[154] | 247 | disp(['Downloading ' urlname ' (' num2str(siz) ' MB) ....'])
|
---|
[136] | 248 | else
|
---|
| 249 | disp(['Downloading ' urlname ' ....'])
|
---|
| 250 | end
|
---|
| 251 |
|
---|
| 252 | return
|
---|
| 253 |
|
---|
| 254 | function opt = download_opt(opt_given)
|
---|
| 255 | %%
|
---|
| 256 | opt.size = [];
|
---|
| 257 | opt.parse = true;
|
---|
| 258 | opt.parsefun = [];
|
---|
| 259 | opt.parsepars = {};
|
---|
| 260 | opt.format = [];
|
---|
| 261 | opt.nheadlines = 0;
|
---|
| 262 | opt.misvalchar = '?';
|
---|
| 263 | opt.misvalue = [];
|
---|
| 264 | opt.delimeter = ' ';
|
---|
| 265 | opt.extension = 'dat';
|
---|
| 266 | opt.matfile = true;
|
---|
| 267 | opt.labfeat = [];
|
---|
| 268 | opt.feats = [];
|
---|
| 269 | opt.featnames = '';
|
---|
| 270 | opt.classnames = '';
|
---|
| 271 | opt.user = [];
|
---|
| 272 | opt.dsetname = '';
|
---|
| 273 | opt.link = '';
|
---|
| 274 | opt.desc = '';
|
---|
| 275 |
|
---|
| 276 |
|
---|
| 277 |
|
---|
| 278 | if (~isempty(opt_given))
|
---|
| 279 | if (~isstruct(opt_given))
|
---|
| 280 | error('OPTIONS should be a structure with at least one of the following fields: q, init, etol, optim, maxiter, itmap, isratio, st or inspect.');
|
---|
| 281 | end
|
---|
| 282 | fn = fieldnames(opt_given);
|
---|
| 283 | fall = fieldnames(opt);
|
---|
| 284 | if (~all(ismember(fn,fall)))
|
---|
| 285 | ff = '';
|
---|
| 286 | for j=1:numel(fall)
|
---|
| 287 | ff = [ff char(fall{j}) ', '];
|
---|
| 288 | end
|
---|
| 289 | error(['Wrong field names; valid field names are: ' ff])
|
---|
| 290 | end
|
---|
| 291 | for i = 1:length(fn)
|
---|
| 292 | opt.(fn{i}) = opt_given.(fn{i});
|
---|
| 293 | end
|
---|
| 294 | end
|
---|
| 295 |
|
---|
| 296 | return
|
---|
| 297 |
|
---|
| 298 | function [a,new] = pr_download_old(url,varargin)
|
---|
| 299 | %% This is the old version of pr_download, to be called from the old
|
---|
| 300 | % version of pr_download_uci only (inside it). It can be removed when all
|
---|
| 301 | % mfiles in prdataset make the new call to pr_download_uci
|
---|
| 302 | %
|
---|
| 303 | %PR_DOWNLOAD Load or download data and create dataset
|
---|
| 304 | %
|
---|
| 305 | % A = PR_DOWNLOAD(URL,FILE,SIZE,NHEAD,FORMAT,MISVALCHAR,DELCHAR,NOSAVE)
|
---|
| 306 | %
|
---|
| 307 | % INPUT
|
---|
| 308 | % URL URL of character file to be downloaded
|
---|
| 309 | % FILE Filename to download
|
---|
| 310 | % SIZE Size of data to be downloaded in Mbytes
|
---|
| 311 | % NHEAD # of headerlines to skip
|
---|
| 312 | % FORMAT String or cell array defining the format
|
---|
| 313 | % (default, automatic)
|
---|
| 314 | % MISVALCHAR Character used for missing values
|
---|
| 315 | % DEL Character delimiter used in the file (default ',')
|
---|
| 316 | % NOSAVE Logical, if TRUE A will not be saved, default FALSE
|
---|
| 317 | %
|
---|
| 318 | % OUTPUT
|
---|
| 319 | % A Unlabeled dataset
|
---|
| 320 | %
|
---|
| 321 | % DESCRIPTION
|
---|
| 322 | % This routine facilitates downloading of character based datasets. FILE
|
---|
| 323 | % should be the name (or path with name) in which the URL is downloaded. If
|
---|
| 324 | % needed the URL file is unzipped and/or untarred first. If FILE already
|
---|
| 325 | % exists it is used (no downloading). The file is parsed by TEXTSCAN using
|
---|
| 326 | % the format given in FORMAT (see TEXTSCAN) and the delimiter specified in
|
---|
| 327 | % DEL. If FORMAT is not given an attempt is made to derive it
|
---|
| 328 | % automatically.
|
---|
| 329 | %
|
---|
| 330 | % In case a mat-file name [FILE '.mat'] is found it will be used instead of
|
---|
| 331 | % downloading.
|
---|
| 332 | %
|
---|
| 333 | % Columns (features) given as characters (the '%s' fields in FORMAT) will
|
---|
| 334 | % be stored as text based features. They will be replaced by indices to a
|
---|
| 335 | % set of strings stored in the corresponding feature domain (see
|
---|
| 336 | % SETFEATDOM). Use FEAT2LAB to use such a feature for labeling the dataset,
|
---|
| 337 | % see the below example.
|
---|
| 338 | %
|
---|
| 339 | % EXAMPLE
|
---|
| 340 | % url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data';
|
---|
| 341 | % c = pr_download(url,'iris.dat',[]); % load Iris dataset from UCI
|
---|
| 342 | % % the labels are set as string (char) features in c(:,5)
|
---|
| 343 | % a = feat2lab(c,5); % use feature 5 for labeling
|
---|
| 344 | %
|
---|
| 345 | % SEE ALSO
|
---|
| 346 | % DATASETS, SETFEATDOM, GETFEATDOM, FEAT2LAB
|
---|
| 347 |
|
---|
| 348 | % Copyright: R.P.W. Duin
|
---|
| 349 | % Faculty EWI, Delft University of Technology
|
---|
| 350 | % P.O. Box 5031, 2600 GA Delft, The Netherlands
|
---|
| 351 |
|
---|
| 352 |
|
---|
| 353 | [~,urlname,urlext] = fileparts(url);
|
---|
| 354 | [datname,siz,nhead,form,misval,del,nosave] = setdefaults(varargin,urlname,1,0,[],'?',',',false);
|
---|
| 355 |
|
---|
| 356 | [dirname,datname] = fileparts(datname);
|
---|
| 357 | if isempty(dirname)
|
---|
| 358 | dirname = fileparts(which(mfilename));
|
---|
| 359 | % dirname = pwd;
|
---|
| 360 | end
|
---|
| 361 | urlname = [urlname urlext]; % name of file to be downloaded
|
---|
| 362 | matname = [datname '.mat']; % name of mat-file to be created
|
---|
| 363 | datname = [datname '.dat']; % name of datfile to be created
|
---|
| 364 | urlfile = fullfile(dirname,urlname); % temp file for download
|
---|
| 365 | datfile = fullfile(dirname,datname); % unpacked urlfile
|
---|
| 366 | matfile = fullfile(dirname,matname); % final matfile
|
---|
| 367 |
|
---|
| 368 | new = true; % if matfile exists, use it
|
---|
| 369 | if exist(matfile,'file') == 2
|
---|
| 370 | s = load(matfile);
|
---|
| 371 | f = fieldnames(s);
|
---|
| 372 | a = s.(f{1});
|
---|
| 373 | new = false;
|
---|
| 374 | return
|
---|
| 375 | end
|
---|
| 376 |
|
---|
| 377 | if exist(datfile,'file') ~= 2 % if datfile does not exist ...
|
---|
| 378 | ask_download_old(siz);
|
---|
| 379 | if isempty(siz) || siz == 0
|
---|
| 380 | disp(['Downloading ' urlname ' ....'])
|
---|
| 381 | else
|
---|
| 382 | disp(['Downloading ' urlname ' (' num2str(siz) ' MB) ....'])
|
---|
| 383 | end
|
---|
| 384 |
|
---|
| 385 | %disp(['Downloading ' urlname ' ....']) % download into urlfile
|
---|
| 386 | if ~usejava('jvm') && isunix
|
---|
| 387 | stat = unix(['wget -q -O ' urlfile ' ' url]);
|
---|
| 388 | status = (stat == 0);
|
---|
| 389 | else
|
---|
| 390 | [~,status] = urlwrite(url,urlfile);
|
---|
| 391 | end
|
---|
| 392 | if status == 0
|
---|
| 393 | error(['Server unreachable or file not found: ' url])
|
---|
| 394 | end
|
---|
| 395 |
|
---|
| 396 | % assume file is created, uncompress if needed
|
---|
| 397 | % delete compressed file
|
---|
| 398 | if strcmp(urlext,'.zip')
|
---|
| 399 | disp('Decompression ....')
|
---|
| 400 | if ~usejava('jvm') && isunix
|
---|
| 401 | unix(['unzip ' urlfile ' -d ' datfile]);
|
---|
| 402 | else
|
---|
| 403 | unzip(urlfile,datfile);
|
---|
| 404 | end
|
---|
| 405 | elseif strcmp(urlext,'.gz')
|
---|
| 406 | disp('Decompression ....')
|
---|
| 407 | gunzip(urlfile,datfile);
|
---|
| 408 | elseif strcmp(urlext,'.tar') || strcmp(urlext,'.tgz') || strcmp(urlext,'.tar.gz')
|
---|
| 409 | disp('Decompression ....')
|
---|
| 410 | untar(urlfile,datfile);
|
---|
| 411 | elseif ~strcmp(urlfile,datfile)
|
---|
| 412 | copyfile(urlfile,datfile)
|
---|
| 413 | end
|
---|
| 414 | if exist(datfile,'dir') == 7
|
---|
| 415 | dirn = dir(datfile);
|
---|
| 416 | copyfile(fullfile(datfile,dirn(3).name),[datfile 'tmp']);
|
---|
| 417 | delete([datfile '/*']);
|
---|
| 418 | rmdir(datfile);
|
---|
| 419 | copyfile([datfile 'tmp'],datfile);
|
---|
| 420 | delete([datfile 'tmp']);
|
---|
| 421 | end
|
---|
| 422 | if ~strcmp(urlfile,datfile)
|
---|
| 423 | delete(urlfile);
|
---|
| 424 | end
|
---|
| 425 | end
|
---|
| 426 |
|
---|
| 427 | % datfile should now be there, read and parse it
|
---|
| 428 | fid = fopen(datfile);
|
---|
| 429 | if isempty(form) % if no format given ...
|
---|
| 430 | for j=1:nhead+1
|
---|
| 431 | s = fgetl(fid); % derive it from the first nonheader line
|
---|
| 432 | end
|
---|
| 433 | s = mytextscan(s,'c',del,0); % use all %s for time being
|
---|
| 434 | form = getform(s); % convert fields to %n where appropriate
|
---|
| 435 | fseek(fid,0,-1); % restart
|
---|
| 436 | end
|
---|
| 437 |
|
---|
| 438 | disp('Parsing ...')
|
---|
| 439 | c = mytextscan(fid,strrep(form,'n','s'),del,nhead);
|
---|
[137] | 440 | a = pr_cell2dset(c,form,misval);
|
---|
[136] | 441 |
|
---|
| 442 | if ~nosave % don't save if not needed (e.g. called by pr_download_uci)
|
---|
| 443 | save(matfile,'a');
|
---|
| 444 | end
|
---|
| 445 |
|
---|
| 446 | return
|
---|
| 447 |
|
---|
| 448 | function ask_download_old(size)
|
---|
| 449 |
|
---|
| 450 | global ASK
|
---|
| 451 | if isempty(ASK)
|
---|
| 452 | ASK = true;
|
---|
| 453 | end
|
---|
| 454 |
|
---|
| 455 | if ASK
|
---|
[154] | 456 | if ~isempty(size) && size > 1
|
---|
[136] | 457 | siz = ['(' num2str(size) ' MB)'];
|
---|
[154] | 458 | q = input(['Dataset is not available, OK to download ' siz ' [y]/n ?'],'s');
|
---|
| 459 | if ~isempty(q) && ~strcmp(q,'y')
|
---|
| 460 | error('Dataset not found')
|
---|
| 461 | end
|
---|
[136] | 462 | end
|
---|
| 463 | end
|
---|
| 464 |
|
---|
| 465 | return
|
---|
| 466 |
|
---|
| 467 | function form = getform(s)
|
---|
| 468 | s = char(s{1});
|
---|
| 469 | form = repmat('n',1,size(s,1));
|
---|
| 470 | for j=1:size(s,1)
|
---|
| 471 | %n = textscan(char(s(j,:)),'%n');
|
---|
| 472 | if ~isempty(regexp(s(j,:),'[^0-9+-.eE ]','once'))
|
---|
| 473 | form(j) = 'c';
|
---|
| 474 | end
|
---|
| 475 | end
|
---|
| 476 |
|
---|
| 477 | function s = mytextscan(fid,forms,del,nhead)
|
---|
| 478 |
|
---|
| 479 | form = repmat('%%',1,numel(forms));
|
---|
| 480 | form(2:2:end) = forms;
|
---|
| 481 | forms = strrep(form,'c','s');
|
---|
| 482 | if del == ' '
|
---|
| 483 | s = textscan(fid,forms,'Headerlines',nhead);
|
---|
| 484 | else
|
---|
| 485 | s = textscan(fid,forms,'Delimiter',del,'Headerlines',nhead);
|
---|
| 486 | end
|
---|
| 487 | if ~ischar(fid);
|
---|
| 488 | fclose(fid);
|
---|
| 489 | end |
---|