1 | % create a MIL dataset from the original WAV-files, segmentation of the |
---|
2 | % spectrograms, and computation of features on the segmented regions |
---|
3 | % |
---|
4 | % This is the new 19 species dataset from the MLSP competition |
---|
5 | |
---|
6 | % some settings: |
---|
7 | windowlen = 512; |
---|
8 | fmax = 256; |
---|
9 | intens_thr = 0.8; % remove 80% of the signal?? |
---|
10 | f_min = 2000; % frequency threshold (everything below is removed) |
---|
11 | % blurring of the spectrogram: |
---|
12 | G = fspecial('gaussian',[5 5],2); |
---|
13 | |
---|
14 | % load the 'meta' data like labels and filenames |
---|
15 | %dpath = '../birds_mlsp2013/mlsp_contest_dataset/essential_data/'; |
---|
16 | dpath = '/data/birds_mlsp2013/mlsp_contest_dataset2/essential_data'; |
---|
17 | % load the filenames |
---|
18 | fid = fopen(fullfile(dpath,'rec_id2filename.txt')); |
---|
19 | data = textscan(fid,'%n%s','headerlines',1); |
---|
20 | fclose(fid); |
---|
21 | bagid = data{1}; |
---|
22 | names = data{2}; |
---|
23 | % next load the labels: |
---|
24 | fid = fopen(fullfile(dpath,'rec_labels_test_hidden.txt')); |
---|
25 | data = textscan(fid,'%n%s','headerlines',1); |
---|
26 | bagid2 = data{1}; |
---|
27 | labstr = data{2}; |
---|
28 | % load the indices for the training and test objects: |
---|
29 | N = length(bagid2); |
---|
30 | CVfile = fopen(fullfile(dpath,'CVfolds_2.txt')); |
---|
31 | CVdata = textscan(CVfile, '%f,%f', N, 'headerlines',1); |
---|
32 | fclose(CVfile); |
---|
33 | bagid3 = CVdata{1}; |
---|
34 | Itst = CVdata{2}; |
---|
35 | |
---|
36 | % some checking |
---|
37 | if any(bagid~=bagid2) |
---|
38 | error('Bagid''s do not match.'); |
---|
39 | end |
---|
40 | |
---|
41 | % run over the files, and get the features: |
---|
42 | B = size(bagid,1); |
---|
43 | x = cell(B,1); |
---|
44 | baglab = zeros(B,13); |
---|
45 | instlab = ''; |
---|
46 | bagid = []; |
---|
47 | for i=1:B |
---|
48 | %load the signal; |
---|
49 | [signal,fs] = wavread(fullfile(dpath,'src_wavs',names{i}(2:end))); |
---|
50 | [S,f,t] = spectrogram(signal,windowlen,windowlen/2,fmax,fs); |
---|
51 | % smooth and threshold the spectrogram: |
---|
52 | I = imfilter(abs(S),G,'same'); |
---|
53 | mask = (I>dd_threshold(I(:),intens_thr)); |
---|
54 | mask(f<f_min) = 0; |
---|
55 | % find interesting regions: |
---|
56 | props = regionprops(bwlabel(mask),abs(S)); |
---|
57 | bloblab = bwlabel(mask); |
---|
58 | Nseg = max(unique(bloblab)); |
---|
59 | |
---|
60 | % run over blobs: |
---|
61 | im = abs(S); |
---|
62 | thisx = zeros(Nseg,7); |
---|
63 | for j=1:Nseg |
---|
64 | ix = (bloblab==j); |
---|
65 | % compute/add some blob-properties: |
---|
66 | thisx(j,:) = [props(j).Area, props(j).Centroid, props(j).BoundingBox]; |
---|
67 | % don't forget: |
---|
68 | %bagid(end+1) = i; |
---|
69 | |
---|
70 | bagid = [bagid i]; |
---|
71 | |
---|
72 | end |
---|
73 | x{i} = thisx; |
---|
74 | |
---|
75 | %Get the labels right for the training bags: |
---|
76 | |
---|
77 | if ~Itst(i) |
---|
78 | eval(['baglab(i,[',labstr{i}(2:end),']+1)=1;']); |
---|
79 | end |
---|
80 | end |
---|
81 | |
---|
82 | % create a dataset |
---|
83 | a = genmil(x); |
---|
84 | % add the labels one by one: |
---|
85 | ll = [... |
---|
86 | 'BRCR-Brown Creeper '; |
---|
87 | 'PAWR-Pacific Wren '; |
---|
88 | 'PSFL-Pacific-slope Flycatcher '; |
---|
89 | 'RBNU-Red-breasted Nuthatch '; |
---|
90 | 'DEJU-Dark-eyed Junco '; |
---|
91 | 'OSFL-Olive-sided Flycatcher '; |
---|
92 | 'HETH-Hermit Thrush '; |
---|
93 | 'CBCH-Chestnut-backed Chickadee'; |
---|
94 | 'VATH-Varied Thrush '; |
---|
95 | 'HEWA-Hermit Warbler '; |
---|
96 | 'SWTH-Swainsons Thrush '; |
---|
97 | 'HAFL-Hammonds Flycatcher '; |
---|
98 | 'WETA-Western Tanager '; |
---|
99 | 'BHGB-Black-headed Grosbeak '; |
---|
100 | 'GCKI-Golden Crowned Kinglet '; |
---|
101 | 'WAVI-Warbling Vireo '; |
---|
102 | 'MGWA-MacGillivrays Warbler '; |
---|
103 | 'STJA-Stellars Jay '; |
---|
104 | 'CONI-Common Nighthawk ']; |
---|
105 | |
---|
106 | |
---|
107 | for i=1:size(baglab,2) |
---|
108 | I = ismember(bagid,find(baglab(:,i))); |
---|
109 | a = addlabels(a,genmillabels(I',1),ll(i,:)); |
---|
110 | end |
---|
111 | % set it to the first bird: |
---|
112 | a = changelablist(a,2); |
---|
113 | thisll = getlablistnames(a); |
---|
114 | a = setname(a,strtrim(thisll(curlablist(a),:))); |
---|
115 | |
---|
116 | J = Itst(bagid); |
---|
117 | x = a(~J,:); |
---|
118 | z = a(logical(J),:); |
---|
119 | save('birds20130709.mat', 'a', 'x', 'z', 'Itst', 'J'); |
---|