Claw 1.7.3
kmp.tpp
1/*
2 CLAW - a C++ Library Absolutely Wonderful
3
4 CLAW is a free library without any particular aim but being useful to
5 anyone.
6
7 Copyright (C) 2005-2011 Julien Jorge
8
9 This library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License as published by the Free Software Foundation; either
12 version 2.1 of the License, or (at your option) any later version.
13
14 This library is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 Lesser General Public License for more details.
18
19 You should have received a copy of the GNU Lesser General Public
20 License along with this library; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22
23 contact: julien.jorge@gamned.org
24*/
25/**
26 * \file kmp.tpp
27 * \brief Implementation of the kmp class.
28 * \author Julien Jorge
29 */
30#include <map>
31#include <assert.h>
32#include <claw/it_index.hpp>
33
34/*---------------------------------------------------------------------------*/
35/**
36 * \brief Calculate the length of the longest common prefix between two
37 * patterns.
38 * \param begin_1 Iterator on the first item in the first pattern.
39 * \param begin_2 Iterator on the first item in the second pattern.
40 * \param end_1 Iterator after the last item in the first pattern.
41 * \param end_2 Iterator after the last item in the second pattern.
42 */
43template<class RandomIterator>
44unsigned int
45claw::text::kmp<RandomIterator>::
46common_prefix_length( const RandomIterator begin_1,
47 const RandomIterator begin_2,
48 const RandomIterator end_1, const RandomIterator end_2
49 ) const
50{
51 unsigned int count = 0;
52 RandomIterator it_1 = begin_1, it_2 = begin_2;
53 bool quit = false;
54
55 while ( (it_1 != end_1) && (it_2 != end_2) && ! quit )
56 if ( *it_1 == *it_2 )
57 {
58 ++it_1;
59 ++it_2;
60 ++count;
61 }
62 else
63 quit = true;
64
65 return count;
66} // common_prefix_length()
67
68/*---------------------------------------------------------------------------*/
69/**
70 * \brief Preprocessing. Calculate the pattern's z-boxes.
71 * \param begin Iterator on the first item in the pattern.
72 * \param end Iterator after the last item in the pattern.
73 * \param out (out) Calculated z-boxes.
74 * \remark out contains only non-zero z-boxes.
75 */
76template<class RandomIterator>
77void claw::text::kmp<RandomIterator>::z_boxes(const RandomIterator begin,
78 const RandomIterator end,
79 std::map<unsigned int,unsigned int>& out) const
80{
81 // right and left bounds of the current item's Z-box.
82 claw::it_index<RandomIterator> it_r(begin);
83 claw::it_index<RandomIterator> it_l(begin);
84 claw::it_index<RandomIterator> it_k(begin); // increment on the items
85 unsigned int z; // le Zi of the current position
86
87 for (++it_k; it_k!=end; ++it_k)
88 {
89 if (it_k > it_r)
90 {
91 z = common_prefix_length(begin, it_k, end, end);
92
93 if ( z > 0 ) // set the Z-box
94 {
95 out[it_k] = z;
96 it_l = it_k;
97 it_r = it_k.operator+(z) - 1;
98 }
99 }
100 else /* k <= r */
101 {
102 unsigned int k_bis = it_k - it_l;
103 claw::it_index<RandomIterator> it_b(it_r - it_k);
104
105 if ( out.find(k_bis) == out.end() )
106 z = 0;
107 else
108 z = out[k_bis];
109
110 if ( z <= (unsigned int)it_b )
111 {
112 if ( z > 0 )
113 out[it_k] = z;
114 }
115 else
116 {
117 claw::it_index<RandomIterator> it_q = it_r + 1;
118
119 it_q += common_prefix_length(it_q, it_b+1, end, end);
120
121 out[it_k] = it_q - it_k;
122 it_r = it_q - 1;
123 it_l = it_k;
124 }
125 }
126 }
127} // z_boxes()
128
129/*---------------------------------------------------------------------------*/
130/**
131 * \brief Preprocessing. Calculate the pattern's spi' values.
132 * \param begin Iterator on the first item in the pattern.
133 * \param end Iterator after the last item in the pattern.
134 * \param out (out) Calculated spi'.
135 * \remark out contains only non-zero spi'.
136 */
137template<class RandomIterator>
138void claw::text::kmp<RandomIterator>::spi_prime(const RandomIterator begin,
139 const RandomIterator end,
140 std::map<unsigned int, unsigned int>& out) const
141{
142 std::map<unsigned int, unsigned int> z; // pattern's Z-boxes.
143 unsigned int j; // loop index.
144
145 // set Z-boxes
146 z_boxes(begin, end, z);
147
148 // calculates spi' (from end to begining)
149 j=end-begin;
150
151 do
152 {
153 --j;
154 if (z.find(j) != z.end())
155 out[j + z[j] - 1] = z[j];
156 }
157 while (j!=0);
158} // spi_prime()
159
160/*---------------------------------------------------------------------------*/
161/**
162 * \brief Pattern matching with the Knuth-Morris-Pratt's algorithm.
163 * \param pattern_begin Iterator on the first item in the pattern.
164 * \param pattern_end Iterator after the last item in the pattern.
165 * \param text_begin Iterator on the first item in the text.
166 * \param text_end Iterator after the last item in the text.
167 * \param action Predicate called with the last found position for the pattern.
168 * \remark Exits if action return false.
169 * \pre pattern_begin != pattern_end
170 */
171template<class RandomIterator>
172template<class UnaryPredicate>
173void claw::text::kmp<RandomIterator>::operator()
174 (const RandomIterator pattern_begin, const RandomIterator pattern_end,
175 const RandomIterator text_begin, const RandomIterator text_end,
176 UnaryPredicate& action ) const
177{
178 std::map<unsigned int, unsigned int> spi; // pattern's spi'
179 claw::it_index<RandomIterator> it_p(pattern_begin,1);
180 claw::it_index<RandomIterator> it_t(text_begin,1);
181 bool stop = false; // result of the last call to the predicate action
182
183 const int pattern_length = pattern_end - pattern_begin;
184 const int text_length = text_end - text_begin;
185
186 assert(pattern_begin != pattern_end);
187
188 spi_prime(pattern_begin, pattern_end, spi);
189
190 unsigned int i = 0;
191
192 while ( ((int)it_t <= text_length - (pattern_length - it_p)) && !stop )
193 {
194 unsigned int common;
195
196 common = common_prefix_length(it_p, it_t, pattern_end, text_end);
197 i += common;
198 it_p += common;
199 it_t += common;
200
201 if (it_p == 1)
202 ++it_t;
203 else
204 {
205 if ( (int)it_p == pattern_length+1 )
206 stop = !action( (int)it_t - pattern_length-1 );
207
208 i = spi[i-1];
209 it_p.set(pattern_begin+i, i+1);
210 }
211 }
212} // operator()