[35] | 1 | %HCLUSTC Hierarchical clustering classifier |
---|
| 2 | % |
---|
| 3 | % W = HCLUSTC(A,CTYPE,K) |
---|
| 4 | % W = HCLUSTC(A,CTYPE,K,U_D) |
---|
| 5 | % |
---|
| 6 | % It returns a hierarchical clustering classifier W, trained on dataset A. |
---|
| 7 | % It is using CTYPE as the between-cluster distance or clustering |
---|
| 8 | % criterion (see hclust.m for more details), and uses K clusters. |
---|
| 9 | % This mapping generalizes to new, unseen data. This is done |
---|
| 10 | % by computing the cluster distance between the new point and all the |
---|
| 11 | % clusters obtained during training. The object is then assigned to the |
---|
| 12 | % closest cluster. |
---|
| 13 | % Per default the (squared) euclidean distance is used, but |
---|
| 14 | % alternatively other proximity measures can be supplied in U_D. |
---|
| 15 | % |
---|
| 16 | % SEE ALSO |
---|
| 17 | % HCLUST, PROXM |
---|
| 18 | |
---|
| 19 | function w = hclustc(a,ctype,k,u_d) |
---|
| 20 | |
---|
| 21 | if nargin<4 |
---|
| 22 | u_d = proxm([],'d',2); |
---|
| 23 | end |
---|
| 24 | if nargin<3 || isempty(k) |
---|
| 25 | k = 10; |
---|
| 26 | end |
---|
| 27 | if nargin<2 || isempty(ctype) |
---|
| 28 | ctype = 's'; |
---|
| 29 | end |
---|
| 30 | if nargin<1 || isempty(a) |
---|
| 31 | w = mapping(mfilename,{ctype,k,u_d}); |
---|
| 32 | w = setname(w,'Hierarchical clustering (k=%d)',k); |
---|
| 33 | return |
---|
| 34 | end |
---|
| 35 | |
---|
| 36 | if ~ismapping(ctype) |
---|
| 37 | w = a*u_d; |
---|
| 38 | D = a*w; |
---|
| 39 | [lab, dendr] = hclust(D,ctype,k); |
---|
| 40 | x = dataset(+a,lab); |
---|
| 41 | |
---|
| 42 | W.u = u_d; |
---|
| 43 | W.x = x; |
---|
| 44 | W.k = k; |
---|
| 45 | W.ctype = ctype; |
---|
| 46 | w = mapping(mfilename,'trained',W,[],size(a,2),k); |
---|
| 47 | w = setname(w,'Hierarchical clustering (k=%d)',k); |
---|
| 48 | |
---|
| 49 | else |
---|
| 50 | % evaluate the clustering on new data: |
---|
| 51 | W = getdata(ctype); |
---|
| 52 | n = size(a,1); |
---|
| 53 | out = zeros(n,W.k); |
---|
| 54 | % compute the distance to each cluster: |
---|
| 55 | for i=1:k |
---|
| 56 | w = seldat(W.x,i)*W.u; |
---|
| 57 | d = a*w; |
---|
| 58 | switch W.ctype |
---|
| 59 | case {'s','single'} |
---|
| 60 | out(:,i) = min(d,[],2); |
---|
| 61 | case {'c','complete'} |
---|
| 62 | out(:,i) = max(d,[],2); |
---|
| 63 | case {'a','average'} |
---|
| 64 | out(:,i) = mean(d,2); |
---|
| 65 | end |
---|
| 66 | end |
---|
| 67 | |
---|
| 68 | w = setdat(a,-out,ctype); |
---|
| 69 | |
---|
| 70 | end |
---|