[136] | 1 | %PR_DOWNLOAD Load or download data and create dataset
|
---|
| 2 | %
|
---|
| 3 | % [A,NEW] = PR_DOWNLOAD(URL,DATFILE,OPTIONS)
|
---|
| 4 | %
|
---|
| 5 | % INPUT
|
---|
| 6 | % URL URL of character file to be downloaded
|
---|
| 7 | % DATFILE Desired name of downloaded and uncompressed file
|
---|
| 8 | % Default: name of the url-file, extended by .dat
|
---|
| 9 | % OPTIONS Structure with options used for parsing and constructing
|
---|
| 10 | % a PRTools dataset
|
---|
| 11 | %
|
---|
| 12 | % OUTPUT
|
---|
| 13 | % A Dataset
|
---|
| 14 | % NEW Logical, TRUE if a new dataset has been created, FALSE if an
|
---|
| 15 | % existing mat-file has been found and used.
|
---|
| 16 | %
|
---|
| 17 | % DESCRIPTION
|
---|
| 18 | % This routine facilitates downloading of character based datasets. DATFILE
|
---|
| 19 | % will be the name (or path with name) in which the URL is downloaded. If
|
---|
| 20 | % needed the URL file is unzipped and/or untarred first. After parsing a
|
---|
| 21 | % PRTools dataset is constructed, stored in a mat-file (optional) and
|
---|
| 22 | % returned. The name of the mat-file is DATFILE extended by .mat.
|
---|
| 23 | %
|
---|
| 24 | % The directory specified in DATFILE, or if not supplied, the directory and
|
---|
| 25 | % the name of the calling routine, will be used for storing files in a
|
---|
| 26 | % subdirectory 'data'. If the mat-file already exists it will be loaded and
|
---|
| 27 | % returned in A (no new download and parsing). If DATFILE already exists it
|
---|
| 28 | % will be used (no new download).
|
---|
| 29 | %
|
---|
| 30 | % OPTIONS should be a structure with the below fields, to be supplied in
|
---|
| 31 | % lower case. Missing fields are replaced by the given defaults.
|
---|
| 32 | %
|
---|
| 33 | % SIZE = []; Size of data to be downloaded, in MB. Not needed,
|
---|
| 34 | % just used to warn the user.
|
---|
| 35 | % PARSE = TRUE; If FALSE, parsing is skipped. Just downloading and
|
---|
| 36 | % uncompression. A will be empty.
|
---|
| 37 | % PARSEFUN = []; A handle of a user supplied parsing function. This
|
---|
| 38 | % function should operate on DATFILE (first parameter,
|
---|
| 39 | % substituted by PR_DOWNLOAD) and return a PRTools
|
---|
| 40 | % dataset. If PARSEFUN is not given, default parsing
|
---|
| 41 | % using PR_READDATASET will be used.
|
---|
| 42 | % PARSEPARS = {}; Cell array with additional parameters for PARSEFUN.
|
---|
| 43 | % FORMAT = []; Needed for default parsing, see PR_READDATASET.
|
---|
| 44 | % NHEADLINES = 0; Needed for default parsing, see PR_READDATASET.
|
---|
| 45 | % MISVALCHAR = '?'; Data characters to be replaced by NaN
|
---|
| 46 | % MISVALUE = []; Data values to be replaced by NaN
|
---|
| 47 | % DELIMETER = ' '; Needed for default parsing, see PR_READDATASET.
|
---|
| 48 | % EXTENSION = 'dat'; Extension to be used for downloaded DATFILE.
|
---|
| 49 | % MATFILE = TRUE; If FALSE, the dataset A will not be saved.
|
---|
| 50 | % LABFEAT = []; Feature found in DATFILE and to be used as class
|
---|
| 51 | % label, see FEAT2LAB.
|
---|
| 52 | % FEATS = []; Columns of dataset to be used ase features.
|
---|
| 53 | % FEATNAMES = []; Desired feature names of dataset A, see SETFEATLAB.
|
---|
| 54 | % CLASSNAMES = []; Class names to be stored in A, see SETLABLIST.
|
---|
| 55 | % USER = []; Additional information to be stored in the
|
---|
| 56 | % user-field of A, see SETUSER.
|
---|
| 57 | % LINK = []; Link for more information in the dataset.
|
---|
| 58 | % DESC = []; Short description of the dataset.
|
---|
| 59 | % DSETNAME = []; Desired name of the dataset A.
|
---|
| 60 | %
|
---|
| 61 | %
|
---|
| 62 | % EXAMPLE
|
---|
| 63 | % url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data';
|
---|
| 64 | % opt.extension = 'dat'; % create iris.dat
|
---|
| 65 | % opt.labfeat = 5; % use feature 5 for labeling
|
---|
| 66 | % opt.matfile = false; % don't create a mat-file
|
---|
| 67 | % c = pr_download(url,[],opt) % load Iris dataset from UCI and parse
|
---|
| 68 | %
|
---|
| 69 | % SEE ALSO
|
---|
| 70 | % DATASETS, SETFEATDOM, GETFEATDOM, FEAT2LAB
|
---|
| 71 |
|
---|
| 72 | % Copyright: R.P.W. Duin
|
---|
| 73 |
|
---|
| 74 | %%
|
---|
| 75 | function [a,new] = pr_download(url,datname,varargin)
|
---|
| 76 |
|
---|
| 77 | if nargin >= 3
|
---|
| 78 | % this can be removed when all mfiles in prdatasets call the new version
|
---|
| 79 | % of pr_download_uci
|
---|
| 80 | if ~isstruct(varargin{1}) && ~isempty(varargin{1}) && isnumeric(varargin{1})
|
---|
| 81 | [a,new] = pr_download_old(url,datname,varargin{:});
|
---|
| 82 | return
|
---|
| 83 | else
|
---|
| 84 | opt = varargin{1};
|
---|
| 85 | end
|
---|
| 86 | end
|
---|
| 87 |
|
---|
| 88 | if nargin < 3, opt = []; end
|
---|
| 89 | if nargin < 2, datname = []; end
|
---|
| 90 |
|
---|
| 91 | opt = download_opt(opt); % set defaults where necessary
|
---|
| 92 |
|
---|
| 93 | %% find directory to be used
|
---|
| 94 | if isempty(datname)
|
---|
[137] | 95 | datname = pr_callername;
|
---|
[136] | 96 | dirname = fullfile(fileparts(which(datname)),'data');
|
---|
| 97 | else
|
---|
| 98 | [dirname,datname] = fileparts(datname);
|
---|
| 99 | end
|
---|
| 100 |
|
---|
| 101 | %% set all necessary filenames
|
---|
| 102 | [~,urlname,urlext] = fileparts(url);
|
---|
| 103 | if isempty(datname)
|
---|
| 104 | % will only be empty if called from command line
|
---|
| 105 | datname = urlname;
|
---|
| 106 | dirname = pwd;
|
---|
| 107 | end
|
---|
| 108 | urlname = [urlname urlext]; % name of file to be downloaded
|
---|
| 109 | matname = [datname '.mat']; % name of mat-file to be created
|
---|
| 110 | datname = [datname '.' opt.extension]; % name of datfile to be created
|
---|
| 111 | urlfile = fullfile(dirname,urlname); % temp file for download
|
---|
| 112 | datfile = fullfile(dirname,datname); % unpacked urlfile
|
---|
| 113 | matfile = fullfile(dirname,matname); % final matfile
|
---|
| 114 |
|
---|
| 115 | %% load mat-file if it exist
|
---|
| 116 | new = false;
|
---|
| 117 | if exist(matfile,'file') == 2
|
---|
| 118 | s = prload(matfile);
|
---|
| 119 | f = fieldnames(s);
|
---|
| 120 | % a = getfield(s,f{1});
|
---|
| 121 | a = s.(f{1});
|
---|
| 122 | return % we are done!!
|
---|
| 123 | end
|
---|
| 124 |
|
---|
| 125 | %% download the data file if it doesn't exist
|
---|
| 126 | if exist(datfile,'file') ~= 2 % if datfile does not exist ...
|
---|
| 127 | ask_download(urlname,opt.size);
|
---|
| 128 |
|
---|
| 129 | if ~usejava('jvm') && isunix
|
---|
| 130 | stat = unix(['wget -q -O ' urlfile ' ' url]);
|
---|
| 131 | status = (stat == 0);
|
---|
| 132 | else
|
---|
| 133 | [~,status] = urlwrite(url,urlfile);
|
---|
| 134 | end
|
---|
| 135 | if status == 0
|
---|
| 136 | error(['Server unreachable or file not found: ' url])
|
---|
| 137 | end
|
---|
| 138 |
|
---|
| 139 | % assume file is created, uncompress if needed
|
---|
| 140 | % delete compressed file
|
---|
| 141 | if strcmp(urlext,'.zip')
|
---|
| 142 | disp('Decompression ....')
|
---|
| 143 | if ~usejava('jvm') && isunix
|
---|
| 144 | unix(['unzip ' urlfile ' -d ' datfile]);
|
---|
| 145 | else
|
---|
| 146 | unzip(urlfile,datfile);
|
---|
| 147 | end
|
---|
| 148 | elseif strcmp(urlext,'.gz')
|
---|
| 149 | disp('Decompression ....')
|
---|
| 150 | gunzip(urlfile,datfile);
|
---|
| 151 | elseif strcmp(urlext,'.tar') || strcmp(urlext,'.tgz') || strcmp(urlext,'.tar.gz')
|
---|
| 152 | disp('Decompression ....')
|
---|
| 153 | untar(urlfile,datfile);
|
---|
| 154 | elseif ~strcmp(urlfile,datfile)
|
---|
| 155 | copyfile(urlfile,datfile)
|
---|
| 156 | end
|
---|
| 157 | if exist(datfile,'dir') == 7
|
---|
| 158 | dirn = dir(datfile);
|
---|
| 159 | copyfile(fullfile(datfile,dirn(3).name),[datfile 'tmp']);
|
---|
| 160 | delete([datfile '/*']);
|
---|
| 161 | rmdir(datfile);
|
---|
| 162 | copyfile([datfile 'tmp'],datfile);
|
---|
| 163 | delete([datfile 'tmp']);
|
---|
| 164 | end
|
---|
| 165 | if ~strcmp(urlfile,datfile)
|
---|
| 166 | delete(urlfile);
|
---|
| 167 | end
|
---|
| 168 | end
|
---|
| 169 |
|
---|
| 170 | if ~opt.parse
|
---|
| 171 | % no parsing desired, we are done
|
---|
| 172 | return
|
---|
| 173 | end
|
---|
| 174 |
|
---|
| 175 | %% datfile should now be there, read and convert to dataset
|
---|
| 176 | disp('Parsing ...')
|
---|
| 177 | if isempty(opt.parsefun)
|
---|
| 178 | a = pr_readdataset(datfile,opt.nheadlines,opt.delimeter, ...
|
---|
| 179 | opt.misvalchar,opt.format);
|
---|
| 180 | else
|
---|
| 181 | % user defined parsing
|
---|
| 182 | a = opt.parsefun(datfile,opt.parsepars{:});
|
---|
| 183 | end
|
---|
| 184 |
|
---|
| 185 | %% set dataset fields
|
---|
| 186 | if ~isempty(opt.labfeat) && opt.labfeat > 0
|
---|
| 187 | a = feat2lab(a,opt.labfeat);
|
---|
| 188 | end
|
---|
| 189 | if ~isempty(opt.classnames)
|
---|
| 190 | a = setlablist(a,opt.classnames);
|
---|
| 191 | end
|
---|
| 192 | if ~isempty(opt.feats)
|
---|
| 193 | a = a(:,opt.feats);
|
---|
| 194 | end
|
---|
| 195 | if ~isempty(opt.featnames)
|
---|
| 196 | a = setfeatlab(a,opt.featnames);
|
---|
| 197 | end
|
---|
| 198 | if ~isempty(opt.misvalue)
|
---|
| 199 | J = find(a==opt.misvalue);
|
---|
| 200 | a(J) = NaN;
|
---|
| 201 | end
|
---|
| 202 | if ~isempty(opt.user)
|
---|
| 203 | a = setuser(a,opt.user);
|
---|
| 204 | end
|
---|
| 205 | if ~isempty(opt.link)
|
---|
| 206 | a = setuser(a,opt.link,'link');
|
---|
| 207 | end
|
---|
| 208 | if ~isempty(opt.desc)
|
---|
| 209 | a = setuser(a,opt.desc,'desc');
|
---|
| 210 | end
|
---|
| 211 | if ~isempty(opt.dsetname)
|
---|
| 212 | a = setname(a,opt.dsetname);
|
---|
| 213 | else
|
---|
[137] | 214 | a = setname(a,pr_callername);
|
---|
[136] | 215 | end
|
---|
| 216 |
|
---|
| 217 | %% save if desired
|
---|
| 218 | if opt.matfile
|
---|
| 219 | save(matfile,'a');
|
---|
| 220 | new = true;
|
---|
| 221 | end
|
---|
| 222 |
|
---|
| 223 | return
|
---|
| 224 |
|
---|
| 225 |
|
---|
[137] | 226 | function ask_download(urlname,datsize)
|
---|
[136] | 227 | %% user controlled downloading
|
---|
| 228 | global ASK
|
---|
| 229 |
|
---|
[137] | 230 | if ASK && ~isempty(datsize) % ask only if datsize has been set
|
---|
| 231 | if datsize ~= 0
|
---|
| 232 | siz = ['(' num2str(datsize) ' MB)'];
|
---|
[136] | 233 | else
|
---|
| 234 | siz = '';
|
---|
| 235 | end
|
---|
| 236 | q = input(['Dataset is not available, OK to download ' siz ' [y]/n ?'],'s');
|
---|
| 237 | if ~isempty(q) && ~strcmp(q,'y')
|
---|
| 238 | error('No dataset')
|
---|
| 239 | end
|
---|
| 240 | else
|
---|
| 241 | siz = [];
|
---|
| 242 | end
|
---|
| 243 |
|
---|
| 244 | if isempty(siz)
|
---|
| 245 | disp(['Downloading ' urlname ' ....'])
|
---|
| 246 | else
|
---|
| 247 | disp(['Downloading ' urlname ' (' num2str(siz) ' MB) ....'])
|
---|
| 248 | end
|
---|
| 249 |
|
---|
| 250 | return
|
---|
| 251 |
|
---|
| 252 | function opt = download_opt(opt_given)
|
---|
| 253 | %%
|
---|
| 254 | opt.size = [];
|
---|
| 255 | opt.parse = true;
|
---|
| 256 | opt.parsefun = [];
|
---|
| 257 | opt.parsepars = {};
|
---|
| 258 | opt.format = [];
|
---|
| 259 | opt.nheadlines = 0;
|
---|
| 260 | opt.misvalchar = '?';
|
---|
| 261 | opt.misvalue = [];
|
---|
| 262 | opt.delimeter = ' ';
|
---|
| 263 | opt.extension = 'dat';
|
---|
| 264 | opt.matfile = true;
|
---|
| 265 | opt.labfeat = [];
|
---|
| 266 | opt.feats = [];
|
---|
| 267 | opt.featnames = '';
|
---|
| 268 | opt.classnames = '';
|
---|
| 269 | opt.user = [];
|
---|
| 270 | opt.dsetname = '';
|
---|
| 271 | opt.link = '';
|
---|
| 272 | opt.desc = '';
|
---|
| 273 |
|
---|
| 274 |
|
---|
| 275 |
|
---|
| 276 | if (~isempty(opt_given))
|
---|
| 277 | if (~isstruct(opt_given))
|
---|
| 278 | error('OPTIONS should be a structure with at least one of the following fields: q, init, etol, optim, maxiter, itmap, isratio, st or inspect.');
|
---|
| 279 | end
|
---|
| 280 | fn = fieldnames(opt_given);
|
---|
| 281 | fall = fieldnames(opt);
|
---|
| 282 | if (~all(ismember(fn,fall)))
|
---|
| 283 | ff = '';
|
---|
| 284 | for j=1:numel(fall)
|
---|
| 285 | ff = [ff char(fall{j}) ', '];
|
---|
| 286 | end
|
---|
| 287 | error(['Wrong field names; valid field names are: ' ff])
|
---|
| 288 | end
|
---|
| 289 | for i = 1:length(fn)
|
---|
| 290 | opt.(fn{i}) = opt_given.(fn{i});
|
---|
| 291 | end
|
---|
| 292 | end
|
---|
| 293 |
|
---|
| 294 | return
|
---|
| 295 |
|
---|
| 296 | function [a,new] = pr_download_old(url,varargin)
|
---|
| 297 | %% This is the old version of pr_download, to be called from the old
|
---|
| 298 | % version of pr_download_uci only (inside it). It can be removed when all
|
---|
| 299 | % mfiles in prdataset make the new call to pr_download_uci
|
---|
| 300 | %
|
---|
| 301 | %PR_DOWNLOAD Load or download data and create dataset
|
---|
| 302 | %
|
---|
| 303 | % A = PR_DOWNLOAD(URL,FILE,SIZE,NHEAD,FORMAT,MISVALCHAR,DELCHAR,NOSAVE)
|
---|
| 304 | %
|
---|
| 305 | % INPUT
|
---|
| 306 | % URL URL of character file to be downloaded
|
---|
| 307 | % FILE Filename to download
|
---|
| 308 | % SIZE Size of data to be downloaded in Mbytes
|
---|
| 309 | % NHEAD # of headerlines to skip
|
---|
| 310 | % FORMAT String or cell array defining the format
|
---|
| 311 | % (default, automatic)
|
---|
| 312 | % MISVALCHAR Character used for missing values
|
---|
| 313 | % DEL Character delimiter used in the file (default ',')
|
---|
| 314 | % NOSAVE Logical, if TRUE A will not be saved, default FALSE
|
---|
| 315 | %
|
---|
| 316 | % OUTPUT
|
---|
| 317 | % A Unlabeled dataset
|
---|
| 318 | %
|
---|
| 319 | % DESCRIPTION
|
---|
| 320 | % This routine facilitates downloading of character based datasets. FILE
|
---|
| 321 | % should be the name (or path with name) in which the URL is downloaded. If
|
---|
| 322 | % needed the URL file is unzipped and/or untarred first. If FILE already
|
---|
| 323 | % exists it is used (no downloading). The file is parsed by TEXTSCAN using
|
---|
| 324 | % the format given in FORMAT (see TEXTSCAN) and the delimiter specified in
|
---|
| 325 | % DEL. If FORMAT is not given an attempt is made to derive it
|
---|
| 326 | % automatically.
|
---|
| 327 | %
|
---|
| 328 | % In case a mat-file name [FILE '.mat'] is found it will be used instead of
|
---|
| 329 | % downloading.
|
---|
| 330 | %
|
---|
| 331 | % Columns (features) given as characters (the '%s' fields in FORMAT) will
|
---|
| 332 | % be stored as text based features. They will be replaced by indices to a
|
---|
| 333 | % set of strings stored in the corresponding feature domain (see
|
---|
| 334 | % SETFEATDOM). Use FEAT2LAB to use such a feature for labeling the dataset,
|
---|
| 335 | % see the below example.
|
---|
| 336 | %
|
---|
| 337 | % EXAMPLE
|
---|
| 338 | % url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data';
|
---|
| 339 | % c = pr_download(url,'iris.dat',[]); % load Iris dataset from UCI
|
---|
| 340 | % % the labels are set as string (char) features in c(:,5)
|
---|
| 341 | % a = feat2lab(c,5); % use feature 5 for labeling
|
---|
| 342 | %
|
---|
| 343 | % SEE ALSO
|
---|
| 344 | % DATASETS, SETFEATDOM, GETFEATDOM, FEAT2LAB
|
---|
| 345 |
|
---|
| 346 | % Copyright: R.P.W. Duin
|
---|
| 347 | % Faculty EWI, Delft University of Technology
|
---|
| 348 | % P.O. Box 5031, 2600 GA Delft, The Netherlands
|
---|
| 349 |
|
---|
| 350 |
|
---|
| 351 | [~,urlname,urlext] = fileparts(url);
|
---|
| 352 | [datname,siz,nhead,form,misval,del,nosave] = setdefaults(varargin,urlname,1,0,[],'?',',',false);
|
---|
| 353 |
|
---|
| 354 | [dirname,datname] = fileparts(datname);
|
---|
| 355 | if isempty(dirname)
|
---|
| 356 | dirname = fileparts(which(mfilename));
|
---|
| 357 | % dirname = pwd;
|
---|
| 358 | end
|
---|
| 359 | urlname = [urlname urlext]; % name of file to be downloaded
|
---|
| 360 | matname = [datname '.mat']; % name of mat-file to be created
|
---|
| 361 | datname = [datname '.dat']; % name of datfile to be created
|
---|
| 362 | urlfile = fullfile(dirname,urlname); % temp file for download
|
---|
| 363 | datfile = fullfile(dirname,datname); % unpacked urlfile
|
---|
| 364 | matfile = fullfile(dirname,matname); % final matfile
|
---|
| 365 |
|
---|
| 366 | new = true; % if matfile exists, use it
|
---|
| 367 | if exist(matfile,'file') == 2
|
---|
| 368 | s = load(matfile);
|
---|
| 369 | f = fieldnames(s);
|
---|
| 370 | a = s.(f{1});
|
---|
| 371 | new = false;
|
---|
| 372 | return
|
---|
| 373 | end
|
---|
| 374 |
|
---|
| 375 | if exist(datfile,'file') ~= 2 % if datfile does not exist ...
|
---|
| 376 | ask_download_old(siz);
|
---|
| 377 | if isempty(siz) || siz == 0
|
---|
| 378 | disp(['Downloading ' urlname ' ....'])
|
---|
| 379 | else
|
---|
| 380 | disp(['Downloading ' urlname ' (' num2str(siz) ' MB) ....'])
|
---|
| 381 | end
|
---|
| 382 |
|
---|
| 383 | %disp(['Downloading ' urlname ' ....']) % download into urlfile
|
---|
| 384 | if ~usejava('jvm') && isunix
|
---|
| 385 | stat = unix(['wget -q -O ' urlfile ' ' url]);
|
---|
| 386 | status = (stat == 0);
|
---|
| 387 | else
|
---|
| 388 | [~,status] = urlwrite(url,urlfile);
|
---|
| 389 | end
|
---|
| 390 | if status == 0
|
---|
| 391 | error(['Server unreachable or file not found: ' url])
|
---|
| 392 | end
|
---|
| 393 |
|
---|
| 394 | % assume file is created, uncompress if needed
|
---|
| 395 | % delete compressed file
|
---|
| 396 | if strcmp(urlext,'.zip')
|
---|
| 397 | disp('Decompression ....')
|
---|
| 398 | if ~usejava('jvm') && isunix
|
---|
| 399 | unix(['unzip ' urlfile ' -d ' datfile]);
|
---|
| 400 | else
|
---|
| 401 | unzip(urlfile,datfile);
|
---|
| 402 | end
|
---|
| 403 | elseif strcmp(urlext,'.gz')
|
---|
| 404 | disp('Decompression ....')
|
---|
| 405 | gunzip(urlfile,datfile);
|
---|
| 406 | elseif strcmp(urlext,'.tar') || strcmp(urlext,'.tgz') || strcmp(urlext,'.tar.gz')
|
---|
| 407 | disp('Decompression ....')
|
---|
| 408 | untar(urlfile,datfile);
|
---|
| 409 | elseif ~strcmp(urlfile,datfile)
|
---|
| 410 | copyfile(urlfile,datfile)
|
---|
| 411 | end
|
---|
| 412 | if exist(datfile,'dir') == 7
|
---|
| 413 | dirn = dir(datfile);
|
---|
| 414 | copyfile(fullfile(datfile,dirn(3).name),[datfile 'tmp']);
|
---|
| 415 | delete([datfile '/*']);
|
---|
| 416 | rmdir(datfile);
|
---|
| 417 | copyfile([datfile 'tmp'],datfile);
|
---|
| 418 | delete([datfile 'tmp']);
|
---|
| 419 | end
|
---|
| 420 | if ~strcmp(urlfile,datfile)
|
---|
| 421 | delete(urlfile);
|
---|
| 422 | end
|
---|
| 423 | end
|
---|
| 424 |
|
---|
| 425 | % datfile should now be there, read and parse it
|
---|
| 426 | fid = fopen(datfile);
|
---|
| 427 | if isempty(form) % if no format given ...
|
---|
| 428 | for j=1:nhead+1
|
---|
| 429 | s = fgetl(fid); % derive it from the first nonheader line
|
---|
| 430 | end
|
---|
| 431 | s = mytextscan(s,'c',del,0); % use all %s for time being
|
---|
| 432 | form = getform(s); % convert fields to %n where appropriate
|
---|
| 433 | fseek(fid,0,-1); % restart
|
---|
| 434 | end
|
---|
| 435 |
|
---|
| 436 | disp('Parsing ...')
|
---|
| 437 | c = mytextscan(fid,strrep(form,'n','s'),del,nhead);
|
---|
[137] | 438 | a = pr_cell2dset(c,form,misval);
|
---|
[136] | 439 |
|
---|
| 440 | if ~nosave % don't save if not needed (e.g. called by pr_download_uci)
|
---|
| 441 | save(matfile,'a');
|
---|
| 442 | end
|
---|
| 443 |
|
---|
| 444 | return
|
---|
| 445 |
|
---|
| 446 | function ask_download_old(size)
|
---|
| 447 |
|
---|
| 448 | global ASK
|
---|
| 449 | if isempty(ASK)
|
---|
| 450 | ASK = true;
|
---|
| 451 | end
|
---|
| 452 |
|
---|
| 453 | if ASK
|
---|
| 454 | if ~isempty(size)
|
---|
| 455 | siz = ['(' num2str(size) ' MB)'];
|
---|
| 456 | else
|
---|
| 457 | siz = '';
|
---|
| 458 | end
|
---|
| 459 | q = input(['Dataset is not available, OK to download ' siz ' [y]/n ?'],'s');
|
---|
| 460 | if ~isempty(q) && ~strcmp(q,'y')
|
---|
| 461 | error('Dataset not found')
|
---|
| 462 | end
|
---|
| 463 | end
|
---|
| 464 |
|
---|
| 465 | return
|
---|
| 466 |
|
---|
| 467 | function form = getform(s)
|
---|
| 468 | s = char(s{1});
|
---|
| 469 | form = repmat('n',1,size(s,1));
|
---|
| 470 | for j=1:size(s,1)
|
---|
| 471 | %n = textscan(char(s(j,:)),'%n');
|
---|
| 472 | if ~isempty(regexp(s(j,:),'[^0-9+-.eE ]','once'))
|
---|
| 473 | form(j) = 'c';
|
---|
| 474 | end
|
---|
| 475 | end
|
---|
| 476 |
|
---|
| 477 | function s = mytextscan(fid,forms,del,nhead)
|
---|
| 478 |
|
---|
| 479 | form = repmat('%%',1,numel(forms));
|
---|
| 480 | form(2:2:end) = forms;
|
---|
| 481 | forms = strrep(form,'c','s');
|
---|
| 482 | if del == ' '
|
---|
| 483 | s = textscan(fid,forms,'Headerlines',nhead);
|
---|
| 484 | else
|
---|
| 485 | s = textscan(fid,forms,'Delimiter',del,'Headerlines',nhead);
|
---|
| 486 | end
|
---|
| 487 | if ~ischar(fid);
|
---|
| 488 | fclose(fid);
|
---|
| 489 | end |
---|