Algorithms Library Toolkit
A toolkit for algorithms, especially for algorithms on formal languages
ExactSubtreeRepeats.h
Go to the documentation of this file.
1
6#pragma once
7
8#include <alib/deque>
9#include <alib/map>
10#include <queue>
11#include <stack>
12#include <alib/tree>
13#include <alib/tuple>
14#include <alib/vector>
16
17#include "SubtreeJumpTable.h"
18
19#include <global/GlobalData.h>
21
22namespace tree {
23
24namespace properties {
25
30
34 class ExactSubtreeRepeatsAux {
35
41 template < class SymbolType >
42 void buildMu ( const ext::vector < common::ranked_symbol < SymbolType > > & symbols );
43
49 template < class SymbolType >
50 void buildP ( const ext::vector < common::ranked_symbol < SymbolType > > & symbols );
51
57 template < class SymbolType >
58 void buildH ( const ext::vector < common::ranked_symbol < SymbolType > > & symbols );
59
65 template < class SymbolType >
66 void buildFC ( const ext::vector < common::ranked_symbol < SymbolType > > & symbols );
67
68 public:
69 template < class SymbolType > ExactSubtreeRepeatsAux ( const ext::vector < common::ranked_symbol < SymbolType > > & symbols );
77 ext::list < ext::pair < ext::deque < unsigned >, unsigned > > found_repeats;
78 unsigned alphabetSize;
79 unsigned treeSize;
80 unsigned sc;
81 };
82
89 template < class SymbolType >
91
99 static void assignLevel ( ext::deque < unsigned > S, unsigned l, ExactSubtreeRepeats::ExactSubtreeRepeatsAux & aux );
100
110 template < class SymbolType >
111 static void partition ( ext::deque < unsigned > S, unsigned l, int ac, const ext::vector < common::ranked_symbol < SymbolType > > & symbols, ExactSubtreeRepeats::ExactSubtreeRepeatsAux & aux );
112
113public:
118 template < class SymbolType >
120};
121
122template < class SymbolType >
123ExactSubtreeRepeats::ExactSubtreeRepeatsAux::ExactSubtreeRepeatsAux ( const ext::vector < common::ranked_symbol < SymbolType > > & symbols ) {
124
125 sc = 0;
126 this->treeSize = symbols.size ( );
127 buildMu ( symbols );
128 buildP ( symbols );
129 buildH ( symbols );
130 buildFC ( symbols );
131 this->T = ext::vector < unsigned > ( symbols.size ( ) );
132 this->TL = ext::vector < unsigned > ( symbols.size ( ) );
133 this->LA = ext::vector < std::queue < ext::pair < ext::deque < unsigned >, unsigned > > > ( this->H.back ( ) + 1 );
134
136 common::Streams::log << "Alphabet size set to " << alphabetSize << std::endl;
137 common::Streams::log << "Tree size set to " << this->treeSize << std::endl;
138 common::Streams::log << "Auxiliary structures computed ! " << std::endl;
139 }
140}
141
142template < class SymbolType >
143void ExactSubtreeRepeats::ExactSubtreeRepeatsAux::buildMu ( const ext::vector < common::ranked_symbol < SymbolType > > & symbols ) {
144 // Build mapping mu_map((Symb, Rank) -> Number) and construct array mu from it
146 this->alphabetSize = 0;
147
148 for ( auto it = symbols.begin ( ); it != symbols.end ( ); it++ ) {
149 auto search = mu_map.find ( ext::make_pair ( it->getSymbol ( ), it->getRank ( ) ) );
150
151 if ( search == mu_map.end ( ) ) {
152 mu_map.insert ( std::make_pair ( ext::make_pair ( it->getSymbol ( ), it->getRank ( ) ), this->alphabetSize ) );
153 mu.push_back ( this->alphabetSize );
154 this->alphabetSize += 1;
155 } else {
156 mu.push_back ( search->second );
157 }
158 }
159
160 // Test mu_map
162 for ( auto it = mu_map.begin ( ); it != mu_map.end ( ); it++ )
163 common::Streams::log << "map: " << it->first << " -> " << it->second << std::endl;
164
165 common::Streams::log << "mu : ";
166
167 for ( auto it : mu )
168 common::Streams::log << it << " ";
169
170 common::Streams::log << std::endl;
171 }
172}
173
174template < class SymbolType >
175void ExactSubtreeRepeats::ExactSubtreeRepeatsAux::buildP ( const ext::vector < common::ranked_symbol < SymbolType > > & symbols ) {
176 // Build parent array
177 P = ext::vector < unsigned > ( this->treeSize - 1 );
178 std::stack < unsigned > RP;
179
180 for ( unsigned i = 0; i < symbols.size ( ); ++i ) {
181 for ( unsigned j = 0; j < symbols[i].getRank ( ); ++j ) {
182 P[RP.top ( )] = i;
183 RP.pop ( );
184 }
185
186 RP.push ( i );
187 }
188
189 // Test parents
191 common::Streams::log << " P : ";
192
193 for ( unsigned parent : P )
194 common::Streams::log << parent << " ";
195
196 common::Streams::log << std::endl;
197 }
198}
199
200template < class SymbolType >
201void ExactSubtreeRepeats::ExactSubtreeRepeatsAux::buildH ( const ext::vector < common::ranked_symbol < SymbolType > > & symbols ) {
202 // Build height array
203 H = ext::vector < unsigned > ( this->treeSize );
204 std::stack < unsigned > RH;
205
206 for ( unsigned i = 0; i < symbols.size ( ); ++i ) {
207 if ( symbols[i].getRank ( ) == 0 ) {
208 RH.push ( 0 );
209 H[i] = 0;
210 } else {
211 unsigned r = 0;
212
213 for ( unsigned j = 0; j < symbols[i].getRank ( ); ++j ) {
214 unsigned p = RH.top ( );
215
216 if ( p > r )
217 r = p;
218
219 RH.pop ( );
220 }
221
222 H[i] = r + 1;
223 RH.push ( r + 1 );
224 }
225 }
226
227 // Test heights
229 common::Streams::log << " H : ";
230
231 for ( unsigned height : H )
232 common::Streams::log << height << " ";
233
234 common::Streams::log << std::endl;
235 }
236}
237
238template < class SymbolType >
239void ExactSubtreeRepeats::ExactSubtreeRepeatsAux::buildFC ( const ext::vector < common::ranked_symbol < SymbolType > > & symbols ) {
240 // Build First child array
241 FC = ext::vector < bool > ( this->treeSize - 1 );
242 std::stack < unsigned > RFC;
243
244 for ( unsigned i = 0; i < symbols.size ( ); ++i ) {
245 if ( symbols[i].getRank ( ) == 0 ) {
246 RFC.push ( i );
247 } else {
248 for ( unsigned j = 0; j < symbols[i].getRank ( ) - 1; ++j ) {
249 unsigned r = RFC.top ( );
250 RFC.pop ( );
251 FC[r] = false;
252 }
253
254 unsigned r = RFC.top ( );
255 RFC.pop ( );
256 FC[r] = true;
257 RFC.push ( i );
258 }
259 }
260
261 // Test First child
263 common::Streams::log << "FC : ";
264
265 for ( bool firstChild : FC )
266 common::Streams::log << firstChild << " ";
267
268 common::Streams::log << std::endl;
269 }
270}
271
272template < class SymbolType >
273void ExactSubtreeRepeats::partition ( ext::deque < unsigned > S, unsigned l, int ac, const ext::vector < common::ranked_symbol < SymbolType > > & symbols, ExactSubtreeRepeats::ExactSubtreeRepeatsAux & aux ) {
274
275 std::queue < unsigned > Q1;
276 std::queue < unsigned > Q2;
277 std::queue < ext::tuple < ext::deque < unsigned >, unsigned, int > > Q3;
278
279 ext::vector < bool > Bn ( symbols.size ( ) );
280 ext::vector < bool > Bs ( aux.alphabetSize );
281 ext::vector < ext::tuple < ext::deque < unsigned >, unsigned, int > > En ( symbols.size ( ) );
282 ext::vector < ext::tuple < ext::deque < unsigned >, unsigned, int > > Es ( aux.alphabetSize );
283
284 while ( !S.empty ( ) ) {
285 unsigned i = S.front ( );
286 S.pop_front ( );
287 unsigned r = i + l;
288
289 if ( aux.T[r] != 0 ) {
290 std::get < 0 > ( En[aux.T[r]] ).push_back ( i );
291
292 if ( ! Bn [ aux.T [ r ] ] ) {
293 Bn[aux.T[r]] = true;
294 std::get < 1 > ( En[aux.T[r]] ) = l + aux.TL[r];
295 std::get < 2 > ( En[aux.T[r]] ) = ac - 1;
296 Q1.push ( aux.T[r] );
297 }
298 } else {
299 unsigned v = aux.mu[r];
300 std::get < 0 > ( Es[v] ).push_back ( i );
301
302 if ( ! Bs [ v ] ) {
303 Bs[v] = true;
304 std::get < 1 > ( Es[v] ) = l + 1;
305 std::get < 2 > ( Es[v] ) = ac + symbols[r].getRank ( ) - 1;
306 Q2.push ( v );
307 }
308 }
309 }
310
311 while ( !Q1.empty ( ) ) {
312 unsigned k = Q1.front ( );
313 Q1.pop ( );
314 Q3.push ( std::move ( En[k] ) );
315
316 /* The next two lines can be safely removed (Partition(), lines 23-24 in the paper)
317 * The variables are local and won't be used in the function again.
318 * Leaving them here to show every step of the algorithm.
319 */
320 /* En[k] = ext::tuple < ext::deque < unsigned >, unsigned, int > ( );
321 * Bn[k] = false;
322 */
323 }
324
325 while ( !Q2.empty ( ) ) {
326 unsigned k = Q2.front ( );
327 Q2.pop ( );
328 Q3.push ( std::move ( Es[k] ) );
329
330 /* The next two lines can be safely removed (Partition(), lines 28-29 in the paper)
331 * The variables are local and won't be used in the function again.
332 * Leaving them here to show every step of the algorithm.
333 */
334 /* Es[k] = ext::tuple < ext::deque < unsigned >, unsigned, int > ( );
335 * Bs[k] = false;
336 */
337 }
338
339 while ( !Q3.empty ( ) ) {
340 ext::tie ( S, l, ac ) = std::move ( Q3.front ( ) );
341 Q3.pop ( );
342
343 if ( ac == 0 ) {
345 common::Streams::log << " ! Repeat : " << S << " " << l << std::endl;
346
347 aux.found_repeats.push_back ( ext::make_pair ( S, l ) );
348 aux.sc += 1;
349
350 for ( unsigned j : S ) {
351 aux.T[j] = aux.sc;
352 aux.TL[j] = l;
353 }
354
355 ExactSubtreeRepeats::assignLevel ( std::move ( S ), l, aux );
356 } else {
357 ExactSubtreeRepeats::partition ( std::move ( S ), l, ac, symbols, aux );
358 }
359 }
360}
361
362template < class SymbolType >
363ext::vector < common::ranked_symbol < unsigned > > ExactSubtreeRepeats::repeatsPostfixRanked ( const ext::vector < common::ranked_symbol < SymbolType > > & symbols ) {
364
365 ExactSubtreeRepeats::ExactSubtreeRepeatsAux aux = ExactSubtreeRepeats::ExactSubtreeRepeatsAux ( symbols );
366
367 ext::vector < ext::deque < unsigned > > As ( aux.alphabetSize );
368 ext::vector < bool > Bs ( aux.alphabetSize );
369 ext::vector < unsigned > Cs ( aux.alphabetSize );
370 std::queue < unsigned > Q5;
371
372 for ( unsigned i = 0; i < aux.treeSize; ++i ) {
373 if ( symbols[i].getRank ( ) == 0 ) {
374 unsigned k = aux.mu[i];
375
376 if ( ! Bs[k] ) {
377 Bs[k] = true;
378 Q5.push ( k );
379 }
380
381 As[k].push_back ( i );
382
383 if ( Cs[k] == 0 ) {
384 aux.sc += 1;
385 Cs[k] = aux.sc;
386 }
387
388 aux.T[i] = Cs[k];
389 aux.TL[i] = 1;
390 } else {
391 aux.T[i] = 0;
392 aux.TL[i] = 0;
393 }
394 }
395
396 // Check As contents
398 common::Streams::log << "One node repeats (As): ";
399
400 for ( unsigned i = 0; i < As.size ( ); ++i )
401 if ( !As[i].empty ( ) )
402 common::Streams::log << i << ":" << As[i] << " ";
403
404 common::Streams::log << std::endl;
405 }
406
407 while ( !Q5.empty ( ) ) {
408 unsigned k = Q5.front ( );
409 Q5.pop ( );
410 Bs[k] = false;
411
413 common::Streams::log << " ! Repeat : " << As[k] << " " << 1 << std::endl;
414
415 aux.found_repeats.push_back ( ext::make_pair ( As[k], 1 ) );
416 ExactSubtreeRepeats::assignLevel ( std::move ( As[k] ), 1, aux );
417 }
418
419 for ( unsigned i = 1; i <= aux.H.back ( ); i++ )
420 while ( !aux.LA[i].empty ( ) ) {
421 ExactSubtreeRepeats::partition ( std::move ( aux.LA[i].front ( ).first ), aux.LA[i].front ( ).second, 0, symbols, aux );
422 aux.LA[i].pop ( );
423 }
424
425 /* Prepare result :
426 * we have collected the triplets (ac part of the triplet from the paper is always 0, so implementation reduces it to pairs) at this point and
427 * need to build a postfix representation of a tree from them.
428 * S-repeats IDs will be used as node-labels for the root (index_from_S + l)
429 * of each subtree.
430 */
431 ext::vector < unsigned > post_repeats ( aux.treeSize );
432
433 unsigned curr_repeat = 0;
434
435 for ( const auto & repeat : aux.found_repeats ) {
436 for ( unsigned s : repeat.first )
437 post_repeats[s + repeat.second - 1] = curr_repeat;
438
439 ++ curr_repeat;
440 }
441
443 common::Streams::log << "Repeat postfix string : ";
444
445 for ( unsigned post_repeat : post_repeats )
446 common::Streams::log << post_repeat << " ";
447
448 common::Streams::log << std::endl;
449 }
450
452
453 for ( unsigned i = 0; i < aux.treeSize; i++ )
454 res.push_back ( common::ranked_symbol < unsigned > ( post_repeats[i], symbols[i].getRank ( ) ) );
455
456 return res;
457}
458
459template < class SymbolType >
461 ext::vector < common::ranked_symbol < unsigned > > res = repeatsPostfixRanked ( tree.getContent ( ) );
462
463 return tree::PostfixRankedTree < unsigned > ( std::move ( res ) );
464}
465
466} /* namespace properties */
467
468} /* namespace tree */
469
static bool verbose
Verbose flag. Some algorithms print additional runtime information about internal datastructures or t...
Definition: GlobalData.h:24
static ext::reference_wrapper< ext::ostream > log
Standard loging stream. Mapped to descriptor 4.
Definition: GlobalData.h:78
Definition: ranked_symbol.hpp:20
Class extending the deque class from the standard library. Original reason is to allow printing of th...
Definition: deque.hpp:44
Class extending the list class from the standard library. Original reason is to allow printing of the...
Definition: list.hpp:44
Class extending the map class from the standard library. Original reason is to allow printing of the ...
Definition: map.hpp:48
auto begin() &
Inherited behavior of begin for non-const instance.
Definition: map.hpp:185
std::pair< iterator, bool > insert(const T &key, const R &value)
Insert variant with explicit key and value parameters.
Definition: map.hpp:118
auto end() &
Inherited behavior of end for non-const instance.
Definition: map.hpp:215
Class extending the vector class from the standard library. Original reason is to allow printing of t...
Definition: vector.hpp:45
Tree structure represented as linear sequece as result of postorder traversal. The representation is ...
Definition: PostfixRankedTree.h:73
Definition: ExactSubtreeRepeats.h:29
static tree::PostfixRankedTree< unsigned > repeats(const tree::PostfixRankedTree< SymbolType > &tree)
Definition: ExactSubtreeRepeats.h:460
int i
Definition: AllEpsilonClosure.h:118
return res
Definition: MinimizeByPartitioning.h:145
constexpr tuple< Elements &... > tie(Elements &... args) noexcept
Helper of extended tuple of references construction. The tuple is constructed to reffer to values in ...
Definition: tuple.hpp:218
constexpr auto make_pair(T1 &&x, T2 &&y)
Definition: pair.hpp:79
Definition: BackwardOccurrenceTest.h:17