[65] | 1 | % create a MIL dataset from the original WAV-files, segmentation of the |
---|
| 2 | % spectrograms, and computation of features on the segmented regions |
---|
| 3 | % |
---|
| 4 | % This is the new 19 species dataset from the MLSP competition |
---|
| 5 | |
---|
| 6 | % some settings: |
---|
| 7 | windowlen = 512; |
---|
| 8 | fmax = 256; |
---|
| 9 | intens_thr = 0.8; % remove 80% of the signal?? |
---|
| 10 | f_min = 2000; % frequency threshold (everything below is removed) |
---|
[71] | 11 | % blurring of the spectrogram: |
---|
| 12 | G = fspecial('gaussian',[5 5],2); |
---|
[65] | 13 | |
---|
| 14 | % load the 'meta' data like labels and filenames |
---|
[71] | 15 | %dpath = '../birds_mlsp2013/mlsp_contest_dataset/essential_data/'; |
---|
| 16 | dpath = '/data/birds_mlsp2013/mlsp_contest_dataset2/essential_data'; |
---|
| 17 | % load the filenames |
---|
| 18 | fid = fopen(fullfile(dpath,'rec_id2filename.txt')); |
---|
| 19 | data = textscan(fid,'%n%s','headerlines',1); |
---|
| 20 | fclose(fid); |
---|
| 21 | bagid = data{1}; |
---|
| 22 | names = data{2}; |
---|
| 23 | % next load the labels: |
---|
| 24 | fid = fopen(fullfile(dpath,'rec_labels_test_hidden.txt')); |
---|
| 25 | data = textscan(fid,'%n%s','headerlines',1); |
---|
| 26 | bagid2 = data{1}; |
---|
| 27 | labstr = data{2}; |
---|
| 28 | % load the indices for the training and test objects: |
---|
[67] | 29 | N = length(bagid2); |
---|
| 30 | CVfile = fopen(fullfile(dpath,'CVfolds_2.txt')); |
---|
[71] | 31 | CVdata = textscan(CVfile, '%f,%f', N, 'headerlines',1); |
---|
[67] | 32 | fclose(CVfile); |
---|
| 33 | bagid3 = CVdata{1}; |
---|
| 34 | Itst = CVdata{2}; |
---|
| 35 | |
---|
[71] | 36 | % some checking |
---|
[65] | 37 | if any(bagid~=bagid2) |
---|
| 38 | error('Bagid''s do not match.'); |
---|
| 39 | end |
---|
| 40 | |
---|
| 41 | % run over the files, and get the features: |
---|
| 42 | B = size(bagid,1); |
---|
| 43 | x = cell(B,1); |
---|
| 44 | baglab = zeros(B,13); |
---|
| 45 | instlab = ''; |
---|
| 46 | bagid = []; |
---|
| 47 | for i=1:B |
---|
[72] | 48 | i |
---|
| 49 | |
---|
[65] | 50 | %load the signal; |
---|
[67] | 51 | [signal,fs] = wavread(fullfile(dpath,'src_wavs',names{i}(2:end))); |
---|
[65] | 52 | [S,f,t] = spectrogram(signal,windowlen,windowlen/2,fmax,fs); |
---|
| 53 | % smooth and threshold the spectrogram: |
---|
| 54 | I = imfilter(abs(S),G,'same'); |
---|
| 55 | mask = (I>dd_threshold(I(:),intens_thr)); |
---|
| 56 | mask(f<f_min) = 0; |
---|
| 57 | % find interesting regions: |
---|
| 58 | props = regionprops(bwlabel(mask),abs(S)); |
---|
| 59 | bloblab = bwlabel(mask); |
---|
| 60 | Nseg = max(unique(bloblab)); |
---|
| 61 | |
---|
| 62 | % run over blobs: |
---|
[72] | 63 | absim = abs(S); |
---|
| 64 | realim = real(S); |
---|
| 65 | imagim = imag(S); |
---|
| 66 | |
---|
| 67 | maskfeats = nan(Nseg,3); |
---|
| 68 | absfeats = nan(Nseg, 7); |
---|
| 69 | realfeats = nan(Nseg,7); |
---|
| 70 | imagfeats = nan(Nseg,7); |
---|
| 71 | |
---|
[65] | 72 | for j=1:Nseg |
---|
[72] | 73 | |
---|
| 74 | |
---|
[65] | 75 | ix = (bloblab==j); |
---|
[72] | 76 | |
---|
[65] | 77 | % compute/add some blob-properties: |
---|
[72] | 78 | % thisx(j,:) = [props(j).Area, props(j).Centroid, props(j).BoundingBox]; |
---|
| 79 | |
---|
| 80 | pixtotal = sum(sum(ix)); |
---|
| 81 | pixheight = max(sum(ix,1)); |
---|
| 82 | pixwidth = max(sum(ix,2)); |
---|
| 83 | |
---|
| 84 | maskfeats(j,:) = [pixtotal pixheight pixwidth]; |
---|
| 85 | |
---|
| 86 | |
---|
| 87 | seg = absim(ix); |
---|
| 88 | absfeats(j,1) = mean(seg); |
---|
| 89 | absfeats(j,2) = std(seg); |
---|
| 90 | |
---|
| 91 | absfeats(j,3) = quantile(seg(:),0); |
---|
| 92 | absfeats(j,4) = quantile(seg(:),0.25); |
---|
| 93 | absfeats(j,5) = quantile(seg(:),0.5); |
---|
| 94 | absfeats(j,6) = quantile(seg(:),0.75); |
---|
| 95 | absfeats(j,7) = quantile(seg(:),1); |
---|
| 96 | |
---|
| 97 | seg = realim(ix); |
---|
| 98 | realfeats(j,1) = mean(seg); |
---|
| 99 | realfeats(j,2) = std(seg); |
---|
| 100 | |
---|
| 101 | realfeats(j,3) = quantile(seg(:),0); |
---|
| 102 | realfeats(j,4) = quantile(seg(:),0.25); |
---|
| 103 | realfeats(j,5) = quantile(seg(:),0.5); |
---|
| 104 | realfeats(j,6) = quantile(seg(:),0.75); |
---|
| 105 | realfeats(j,7) = quantile(seg(:),1); |
---|
| 106 | |
---|
| 107 | seg = imagim(ix); |
---|
| 108 | imagfeats(j,1) = mean(seg); |
---|
| 109 | imagfeats(j,2) = std(seg); |
---|
| 110 | |
---|
| 111 | imagfeats(j,3) = quantile(seg(:),0); |
---|
| 112 | imagfeats(j,4) = quantile(seg(:),0.25); |
---|
| 113 | imagfeats(j,5) = quantile(seg(:),0.5); |
---|
| 114 | imagfeats(j,6) = quantile(seg(:),0.75); |
---|
| 115 | imagfeats(j,7) = quantile(seg(:),1); |
---|
| 116 | |
---|
[75] | 117 | seg = absim.*ix; |
---|
| 118 | momentfeat = [moments(seg,[1;0],[0;1],1,0) ... |
---|
| 119 | moments(seg,[2;1;0],[0;1;2],1,0) ... |
---|
| 120 | moments(seg,[2,1,0],[0,1,2],1,1) ... |
---|
| 121 | hu_moments(seg) zernike_moments(seg)]; |
---|
[72] | 122 | |
---|
[65] | 123 | % don't forget: |
---|
[72] | 124 | bagid(end+1) = i; |
---|
| 125 | |
---|
[67] | 126 | |
---|
[65] | 127 | end |
---|
[75] | 128 | x{i} = [maskfeats absfeats realfeats imagfeats momentfeat]; |
---|
[65] | 129 | |
---|
[67] | 130 | %Get the labels right for the training bags: |
---|
| 131 | |
---|
[65] | 132 | if ~Itst(i) |
---|
[67] | 133 | eval(['baglab(i,[',labstr{i}(2:end),']+1)=1;']); |
---|
[65] | 134 | end |
---|
| 135 | end |
---|
| 136 | |
---|
| 137 | % create a dataset |
---|
| 138 | a = genmil(x); |
---|
| 139 | % add the labels one by one: |
---|
| 140 | ll = [... |
---|
| 141 | 'BRCR-Brown Creeper '; |
---|
| 142 | 'PAWR-Pacific Wren '; |
---|
| 143 | 'PSFL-Pacific-slope Flycatcher '; |
---|
| 144 | 'RBNU-Red-breasted Nuthatch '; |
---|
| 145 | 'DEJU-Dark-eyed Junco '; |
---|
| 146 | 'OSFL-Olive-sided Flycatcher '; |
---|
| 147 | 'HETH-Hermit Thrush '; |
---|
| 148 | 'CBCH-Chestnut-backed Chickadee'; |
---|
| 149 | 'VATH-Varied Thrush '; |
---|
| 150 | 'HEWA-Hermit Warbler '; |
---|
| 151 | 'SWTH-Swainsons Thrush '; |
---|
| 152 | 'HAFL-Hammonds Flycatcher '; |
---|
| 153 | 'WETA-Western Tanager '; |
---|
| 154 | 'BHGB-Black-headed Grosbeak '; |
---|
| 155 | 'GCKI-Golden Crowned Kinglet '; |
---|
| 156 | 'WAVI-Warbling Vireo '; |
---|
| 157 | 'MGWA-MacGillivrays Warbler '; |
---|
| 158 | 'STJA-Stellars Jay '; |
---|
| 159 | 'CONI-Common Nighthawk ']; |
---|
| 160 | |
---|
| 161 | |
---|
| 162 | for i=1:size(baglab,2) |
---|
| 163 | I = ismember(bagid,find(baglab(:,i))); |
---|
| 164 | a = addlabels(a,genmillabels(I',1),ll(i,:)); |
---|
| 165 | end |
---|
| 166 | % set it to the first bird: |
---|
| 167 | a = changelablist(a,2); |
---|
| 168 | thisll = getlablistnames(a); |
---|
| 169 | a = setname(a,strtrim(thisll(curlablist(a),:))); |
---|
| 170 | |
---|
| 171 | J = Itst(bagid); |
---|
[70] | 172 | x = a(~J,:); |
---|
| 173 | z = a(logical(J),:); |
---|
[76] | 174 | save('birds20130710.mat', 'a', 'x', 'z', 'Itst', 'J'); |
---|