source: distools/strkerm.m @ 41

Last change on this file since 41 was 10, checked in by bduin, 14 years ago
File size: 3.0 KB
Line 
1%STRKERM  String Kernel Matrix by Lodhi et al
2%
3%       [K,KK] = STRKERM (A,B,W,LAM,KNORM)
4%
5% INPUT
6%   A     Cell structure of N strings or a single string
7%   B     Cell structure of M strings or a single string (optional; default: B = A)
8%   W     Vector of K weights (optional; default: [1 1])
9%   LAM   Decay factor Weight parameter in (0,1] used to scale the intermediate kernels
10%         (optional; default: 1)
11%   KNORM Parameter (0/1) indicating whether the kernel should be normalized
12%         to remove bias wrt to text length (optional; default: 1)
13%
14% OUTPUT
15%   K           NxM kernel matrix
16%   KK          NxMxK matrix of intermediate kernels
17%
18% DESCRIPTION
19% Derives a string kernel matrix K following the idea of Lodhi et al [2002]. A and B are
20% either individual strings or cell structures of strings. The basic idea is to compute
21% the kernel as an inner product of the feature vectors for two strings and give a sum
22% over all common subsequences weighted according to their frequency of occurrence and
23% lengths. Dynamic programming is used to compute the kernel values between strings.
24% If dataset is needed, it should be created afterwards.
25%
26% DEFAULT
27%   B     = A
28%   W     = [1 1]
29%   LAM   = 1
30%   KNORM = 1
31%
32% REFERENCE
33% H.Lodhi, C.Saunders, J. Shawe-Taylor, N.Cristianini, C.Watkins, "Text
34% Classification using String Kernels", J. of Machine Learning Research 2,
35% 419-444, 2002.
36%
37
38% Copyright: Elzbieta Pekalska, ela.pekalska@googlemail.com
39% EWI Faculty, Delft University of Technology and
40% School of Computer Science, University of Manchester
41
42
43function [K,KK] = strkerm (a,b,w,lam,Knorm)
44if nargin < 5,
45  Knorm = 1;
46end
47if nargin < 4 | isempty(lam),
48  lam = 1;
49end
50if nargin < 3 | isempty(w),
51  w = [1 1];
52end
53if nargin < 2 | isempty(b),
54  b = a;
55end
56
57if any(w) < 0,
58  error('Weights should be nonnegative.');
59end
60
61if ~iscell(a),
62  if isstr(a) | (~isstr(a) & min(size(a)) == 1)
63    a = {a};
64  else
65    error('A is improper.');
66  end
67end
68if ~iscell(b),
69  if isstr(b) | (~isstr(b) & min(size(b)) == 1)
70    b = {b};
71  else
72    error('B is improper.');
73  end
74end
75
76
77m = length(a);
78n = length(b);
79
80
81K = zeros(m,n);
82for i=1:m
83  for j=1:n
84    K(i,j) = strkernel(a{i},b{j},w,lam);
85  end
86end
87
88if Knorm,
89  for i=1:m
90    Kaa(i,1) = strkernel(a{i},a{i},w,lam);
91  end
92  for j=1:n
93    Kbb(1,j) = strkernel(b{j},b{j},w,lam);
94  end
95  K = K ./ sqrt(Kaa*Kbb);
96end
97return;
98
99
100
101
102
103
104function K = strkernel(s,t,w,lam)
105
106if isstr(s),
107  s = ['a',s];
108  t = ['a',t];
109else
110  if size(s,1) > size(s,2)
111    s = [s(1); s];
112    t = [s(1); t];
113  else
114    s = [s(1) s];
115    t = [s(1) t];
116  end
117end
118
119ns = length(s);
120nt = length(t);
121n  = length(w);
122
123K = 0;
124KK(:,:,1) = ones(ns+1,nt+1);
125for i=1:n
126  KK(:,:,i+11) = zeros(ns+1,nt+1);
127  for j=1:ns
128    ss = 0;
129    for k=1:nt
130      if t(k) == s(j),
131        ss = ss + KK(j,k,i);
132      end
133      KK(j+1,k+1,i+1) = KK(j,k+1,i+1) +ss;
134    end
135  end
136  K = K + w(i)*KK(ns+1,nt+1,i+1)*lam^(2*i);
137end
Note: See TracBrowser for help on using the repository browser.