Algorithms Library Toolkit
A toolkit for algorithms, especially for algorithms on formal languages
RegExpOptimizeUnboundedPart.hpp
Go to the documentation of this file.
1
6#include <iostream>
7
8namespace regexp::simplify {
9
10template < class SymbolType >
12 while ( Unbounded < SymbolType >::A1( alt ) || Unbounded < SymbolType >::A2( alt ) || Unbounded < SymbolType >::A3( alt ) || Unbounded < SymbolType >::A4( alt ) );
13
14 while ( Unbounded < SymbolType >::A8( alt ) || Unbounded < SymbolType >::A9( alt ) || Unbounded < SymbolType >::A10 ( alt ) || Unbounded < SymbolType >::V2 ( alt ) || Unbounded < SymbolType >::V5 ( alt ) || Unbounded < SymbolType >::V6 ( alt ) );
15
16 for( size_t i = 0; i < alt.getChildren ( ).size ( ); i++ )
17 alt.setChild ( std::move ( alt.getChild ( i ) ).template accept < ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > >, RegExpOptimize::Unbounded < SymbolType > > ( recursive ), i );
18
19 while ( Unbounded < SymbolType >::A1( alt ) || Unbounded < SymbolType >::A2( alt ) || Unbounded < SymbolType >::A3( alt ) || Unbounded < SymbolType >::A4( alt ) || Unbounded < SymbolType >::A8( alt ) || Unbounded < SymbolType >::A9( alt ) || Unbounded < SymbolType >::A10( alt ) || Unbounded < SymbolType >::V2( alt ) || Unbounded < SymbolType >::V5( alt ) || Unbounded < SymbolType >::V6( alt ) || Unbounded < SymbolType >::X1( alt ) );
20
21 for( size_t i = 0; i < alt.getChildren ( ).size ( ); i++ )
22 alt.setChild ( std::move ( alt.getChild ( i ) ).template accept < ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > >, RegExpOptimize::Unbounded < SymbolType > > ( false ), i );
23}
24
25template < class SymbolType >
27 while ( Unbounded < SymbolType >::A5( concat ) || Unbounded < SymbolType >::A6( concat ) || Unbounded < SymbolType >::A7( concat ) );
28
29 while ( Unbounded < SymbolType >::V8 ( concat ) || Unbounded < SymbolType >::V8R ( concat ) || Unbounded < SymbolType >::V9( concat ) );
30
31 for( size_t i = 0; i < concat.getChildren ( ).size ( ); i++ )
32 concat.setChild ( std::move ( concat.getChild ( i ) ).template accept < ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > >, RegExpOptimize::Unbounded < SymbolType > > ( recursive ), i );
33
34 while ( Unbounded < SymbolType >::A5( concat ) || Unbounded < SymbolType >::A6( concat ) || Unbounded < SymbolType >::A7( concat ) || Unbounded < SymbolType >::V8( concat ) || Unbounded < SymbolType >::V8R( concat ) || Unbounded < SymbolType >::V9( concat ) );
35
36 for( size_t i = 0; i < concat.getChildren ( ).size ( ); i++ )
37 concat.setChild ( std::move ( concat.getChild ( i ) ).template accept < ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > >, RegExpOptimize::Unbounded < SymbolType > > ( false ), i );
38}
39
40template < class SymbolType >
42 iter.setChild ( std::move ( iter.getChild ( ) ).template accept < ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > >, RegExpOptimize::Unbounded < SymbolType > > ( recursive ) );
43
44 while ( Unbounded < SymbolType >::A11( iter ) || Unbounded < SymbolType >::V1( iter ) || Unbounded < SymbolType >::V3( iter ) || Unbounded < SymbolType >::V4( iter ) || Unbounded < SymbolType >::V10( iter ) );
45
46 iter.setChild ( std::move ( iter.getChild ( ) ).template accept < ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > >, RegExpOptimize::Unbounded < SymbolType > > ( false ) );
47}
48
49template < class SymbolType >
51 optimize ( alt, recursive );
52
53 if( alt.getElements ( ).size( ) == 1 )
54 return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( std::move ( alt.getChildren ( ).front ( ) ) );
55
56 if( alt.getElements ( ).empty ( ) )
58
60}
61
62template < class SymbolType >
64 optimize ( concat, recursive );
65
66 if( concat.getElements ( ).size( ) == 1 )
67 return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( std::move ( concat.getChildren ( ).front ( ) ) );
68
69 if( concat.getElements ( ).empty ( ) )
71
73}
74
75template < class SymbolType >
76ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > RegExpOptimize::Unbounded < SymbolType >::visit( UnboundedRegExpIteration < SymbolType > && iter, bool recursive ) {
77 optimize ( iter, recursive );
78
79 // V1 is implemented right here
80 if( dynamic_cast < UnboundedRegExpEmpty < SymbolType > * > ( & iter.getChild ( ) ) )
81 return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( UnboundedRegExpEpsilon < SymbolType > ( ) );
82
83 // T1 is implemented right here \e* = \e
84 if ( dynamic_cast < UnboundedRegExpEpsilon < SymbolType > * > ( & iter.getChild ( ) ) )
85 return ext::ptr_value < regexp::UnboundedRegExpElement < SymbolType > > ( UnboundedRegExpEpsilon < SymbolType > ( ) );
86
88}
89
90template < class SymbolType >
93}
94
95template < class SymbolType >
98}
99
100template < class SymbolType >
103}
104
110template < class SymbolType >
111bool RegExpOptimize::Unbounded < SymbolType >::A1( UnboundedRegExpAlternation < SymbolType > & node ) {
112 bool optimized = false;
113
114 for( auto it = node.begin( ); it != node.end( ); ) {
115 if( dynamic_cast < UnboundedRegExpAlternation < SymbolType > * > ( & * it ) ) {
116 UnboundedRegExpAlternation < SymbolType > childAlt ( static_cast < UnboundedRegExpAlternation < SymbolType > && > ( std::move ( * it ) ) );
117 it = node.erase( it );
118
119 it = node.insert ( it, std::make_move_iterator ( childAlt.begin ( ) ), std::make_move_iterator ( childAlt.end ( ) ) );
120
121 optimized = true;
122 } else
123 it ++;
124 }
125
126 return optimized;
127}
128
134template < class SymbolType >
135bool RegExpOptimize::Unbounded < SymbolType >::A2( UnboundedRegExpAlternation < SymbolType > & node ) {
136
137 auto cmp = [ ]( const UnboundedRegExpElement < SymbolType > * a, const UnboundedRegExpElement < SymbolType > * b ) -> bool { return * a < * b; };
138
139 if ( std::is_sorted ( node.begin( ).base ( ), node.end( ).base ( ), cmp ) )
140 return false;
141
142 std::sort ( node.begin ( ).base ( ), node.end ( ).base ( ), cmp );
143 return true;
144}
145
151template < class SymbolType >
152bool RegExpOptimize::Unbounded < SymbolType >::A3( UnboundedRegExpAlternation < SymbolType > & node ) {
153 bool optimized = false;
154
155 // alternation with no children is efectively \0
156
157 for( auto it = node.begin ( ); it != node.end ( ); ) {
158 if( dynamic_cast < UnboundedRegExpEmpty < SymbolType > * >( & * it ) ) {
159 it = node.erase ( it );
160
161 optimized = true;
162 } else
163 it ++;
164 }
165
166 return optimized;
167}
168
174template < class SymbolType >
175bool RegExpOptimize::Unbounded < SymbolType >::A4( UnboundedRegExpAlternation < SymbolType > & node ) {
176
177 /*
178 * two ways of implementing this opitimization:
179 * - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts...
180 * - check every element against other ( O(n*n) )
181 *
182 * As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2
183 */
184 auto cmp = [ ]( const UnboundedRegExpElement < SymbolType > * a, const UnboundedRegExpElement < SymbolType > * b ) -> bool { return * a == * b; };
185
186 size_t size = node.getChildren ( ).size ( );
187 node.erase ( dereferencer ( ext::unique ( node.begin ( ).base ( ), node.end ( ).base ( ), cmp ) ), node.end( ) );
188
189 return size != node.getChildren ( ).size ( );
190}
191
197template < class SymbolType >
198bool RegExpOptimize::Unbounded < SymbolType >::A5( UnboundedRegExpConcatenation < SymbolType > & node ) {
199 bool optimized = false;
200
201 for( auto it = node.begin( ); it != node.end( ); ) {
202 if( dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & * it ) ) {
203
204 UnboundedRegExpConcatenation < SymbolType > childConcat = static_cast < UnboundedRegExpConcatenation < SymbolType > && > ( std::move ( * it ) );
205 it = node.erase ( it );
206
207 it = node.insert ( it, std::make_move_iterator ( childConcat.begin( ) ), std::make_move_iterator ( childConcat.end( ) ) );
208
209 optimized = true;
210 } else
211 ++ it;
212 }
213
214 return optimized;
215}
216
222template < class SymbolType >
223bool RegExpOptimize::Unbounded < SymbolType >::A6( UnboundedRegExpConcatenation < SymbolType > & node ) {
224 bool optimized = false;
225
226 // concatenation with no children is efectively \e
227
228 for( auto it = node.begin( ); it != node.end( ); ) {
229 if ( dynamic_cast < UnboundedRegExpEpsilon < SymbolType > * > ( & * it ) ) {
230 it = node.erase ( it );
231
232 optimized = true;
233 } else
234 ++ it;
235 }
236
237 return optimized;
238}
239
245template < class SymbolType >
246bool RegExpOptimize::Unbounded < SymbolType >::A7( UnboundedRegExpConcatenation < SymbolType > & node ) {
247
248 if(node.getChildren ( ).size() == 1) return false;
249
250 if( std::any_of ( node.begin( ), node.end( ), [ ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return dynamic_cast < const UnboundedRegExpEmpty < SymbolType > * > ( & a ); } ) ) {
251 node.clear( );
252 node.pushBackChild ( UnboundedRegExpEmpty < SymbolType > ( ) );
253
254 return true;
255 }
256
257 return false;
258}
259
265template < class SymbolType >
266bool RegExpOptimize::Unbounded < SymbolType >::A8( UnboundedRegExpAlternation < SymbolType > & node ) {
268
269 for ( UnboundedRegExpElement < SymbolType > & element : node ) {
270 if ( UnboundedRegExpConcatenation < SymbolType > * childConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & element ); childConcat ) {
271 data [ ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( childConcat->getChild ( 0 ) ) ].push_back ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) );
272 } else {
273 data [ ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) ].push_back ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) );
274 }
275 }
276
277 if ( data.size ( ) == node.getChildren ( ).size ( ) )
278 return false;
279
280 UnboundedRegExpAlternation < SymbolType > res;
281 for ( std::pair < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > >, ext::vector < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > > > && entry : ext::make_mover ( data ) ) {
282 if ( entry.second.size ( ) == 1 ) {
283 res.appendElement ( std::move ( entry.second.front ( ).get ( ) ) );
284 } else {
285 UnboundedRegExpConcatenation < SymbolType > innerConcat;
286 innerConcat.appendElement ( std::move ( entry.first.get ( ) ) );
287 UnboundedRegExpAlternation < SymbolType > innerAlt;
288 for ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > & innerEntry : entry.second ) {
289 UnboundedRegExpElement < SymbolType > & innerEntryElement = innerEntry.get ( );
290 if ( UnboundedRegExpConcatenation < SymbolType > * innerEntryConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & innerEntryElement ); innerEntryConcat ) {
291 if ( innerEntryConcat->getElements ( ).size ( ) == 1 ) {
292 innerAlt.appendElement ( UnboundedRegExpEpsilon < SymbolType > ( ) );
293 } else {
294 innerEntryConcat->erase ( innerEntryConcat->begin ( ) );
295 innerAlt.appendElement ( std::move ( * innerEntryConcat ) );
296 }
297 } else {
298 innerAlt.appendElement ( UnboundedRegExpEpsilon < SymbolType > ( ) );
299 }
300 }
301 innerConcat.insert ( innerConcat.end ( ), std::move ( innerAlt ) );
302 res.appendElement ( Unbounded < SymbolType >::visit ( std::move ( innerConcat ), true ) );
303 }
304 }
305
306 node = std::move ( res );
307
308 return true;
309}
310
316template < class SymbolType >
317bool RegExpOptimize::Unbounded < SymbolType >::A9( UnboundedRegExpAlternation < SymbolType > & node ) {
319
320 for ( UnboundedRegExpElement < SymbolType > & element : node ) {
321 if ( UnboundedRegExpConcatenation < SymbolType > * childConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & element ); childConcat ) {
322 data [ ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( childConcat->getChild ( childConcat->getElements ( ).size ( ) - 1 ) ) ].push_back ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) );
323 } else {
324 data [ ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) ].push_back ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > ( element ) );
325 }
326 }
327
328 if ( data.size ( ) == node.getChildren ( ).size ( ) )
329 return false;
330
331 UnboundedRegExpAlternation < SymbolType > res;
332 for ( std::pair < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > >, ext::vector < ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > > > && entry : ext::make_mover ( data ) ) {
333 if ( entry.second.size ( ) == 1 ) {
334 res.appendElement ( std::move ( entry.second.front ( ).get ( ) ) );
335 } else {
336 UnboundedRegExpConcatenation < SymbolType > innerConcat;
337 innerConcat.appendElement ( std::move ( entry.first.get ( ) ) );
338 UnboundedRegExpAlternation < SymbolType > innerAlt;
339 for ( ext::reference_wrapper < UnboundedRegExpElement < SymbolType > > & innerEntry : entry.second ) {
340 UnboundedRegExpElement < SymbolType > & innerEntryElement = innerEntry.get ( );
341 if ( UnboundedRegExpConcatenation < SymbolType > * innerEntryConcat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & innerEntryElement ); innerEntryConcat ) {
342 if ( innerEntryConcat->getElements ( ).size ( ) == 1 ) {
343 innerAlt.appendElement ( UnboundedRegExpEpsilon < SymbolType > ( ) );
344 } else {
345 innerEntryConcat->erase ( innerEntryConcat->rbegin ( ) );
346 innerAlt.appendElement ( std::move ( * innerEntryConcat ) );
347 }
348 } else {
349 innerAlt.appendElement ( UnboundedRegExpEpsilon < SymbolType > ( ) );
350 }
351 }
352 innerConcat.insert ( innerConcat.begin ( ), std::move ( innerAlt ) );
353 res.appendElement ( Unbounded < SymbolType >::visit ( std::move ( innerConcat ), true ) );
354 }
355 }
356
357 node = std::move ( res );
358
359 return true;
360}
361
367template < class SymbolType >
368bool RegExpOptimize::Unbounded < SymbolType >::A10( UnboundedRegExpAlternation < SymbolType > & node ) {
369 bool optimized = false;
370
371 /*
372 * problem:
373 * - \e + x*x = x*
374 * - but if we do not have the eps, but we do have node that generates epsilon... It can actually be the x*x itself...
375 */
376
377 // check if we have some epsilon or iteration left, else nothing to do
378 auto eps = find_if( node.getElements().begin( ), node.getElements().end( ), [ ]( const UnboundedRegExpElement < SymbolType > & a ) -> bool {
379 return regexp::properties::RegExpEpsilon::languageContainsEpsilon ( a );
380 });
381
382 if( eps == node.getElements().end( ) )
383 return false;
384
385 for( size_t i = 0; i < node.getChildren ( ).size ( ); i++ ) {
386 const UnboundedRegExpConcatenation < SymbolType > * childConcat = dynamic_cast < const UnboundedRegExpConcatenation < SymbolType > * > ( & node.getChildren ( ) [ i ] );
387 if( ! childConcat || childConcat->getElements ( ).size ( ) < 2 )
388 continue;
389
390 // if iteration is first element of concatenation
391 const UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & childConcat->getElements ( ).front( ) );
392 if( ! iter )
393 continue;
394
395 // if element of iteration is concatenation, we need to check this specially
396 const UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < const UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) );
397
398 if ( ( concat && equal ( concat->getChildren ( ).begin( ), concat->getChildren ( ).end ( ), childConcat->begin ( ) + 1 , childConcat->end ( ) ) ) || ( childConcat->getElements ( ).size ( ) == 2 && iter->getChild ( ) == childConcat->getElements ( ) [ 1 ] ) ) {
399 optimized = true;
400
401 node.setChild ( std::move ( childConcat->getElements().front() ), i );
402 }
403 }
404
405 return optimized;
406}
407
413template < class SymbolType >
414bool RegExpOptimize::Unbounded < SymbolType >::A11( UnboundedRegExpIteration < SymbolType > & node ) {
415
416 UnboundedRegExpAlternation < SymbolType > * childAlt = dynamic_cast<UnboundedRegExpAlternation < SymbolType > * >( & node.getChild ( ) );
417
418 if( childAlt ) {
419 // check if eps inside iteration's alternation
420 auto eps = find_if ( childAlt->begin( ), childAlt->end( ), [ ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool {
421 return dynamic_cast < const UnboundedRegExpEpsilon < SymbolType > * > ( & a );
422 });
423
424 // if no eps
425 if ( eps == childAlt->end( ) )
426 return false;
427
428 // remove eps from alternation
429 childAlt->erase ( eps );
430 return true;
431 }
432
433 return false;
434}
435
441template < class SymbolType >
442bool RegExpOptimize::Unbounded < SymbolType >::V1( UnboundedRegExpIteration < SymbolType > &) {
443 // implemented in optimize( UnboundedRegExpIteration < SymbolType > )
444
445 return false;
446}
447
453template < class SymbolType >
454bool RegExpOptimize::Unbounded < SymbolType >::V2( UnboundedRegExpAlternation < SymbolType > & node ) {
455 bool optimized = false;
456
457 /*
458 * Bit tricky
459 * We need also to cover the cases like ( a + b + d )* + ( e )* + a + b + c + e = ( a + b + d )* + ( e )* + c
460 */
461
463 // cache iter elements because of operator invalidation after erase only cache nodes that are not iterations (the nodes omitted may only come from situation like x** where the double iteration wil get optimized out elsewhere).
464 for( const UnboundedRegExpElement < SymbolType > & n : node.getElements ( ) ) {
465 if ( const UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & n ); iter ) {
466 if ( const UnboundedRegExpAlternation < SymbolType > * inner = dynamic_cast < const UnboundedRegExpAlternation < SymbolType > * > ( & iter->getChild ( ) ); inner ) {
467 for ( const UnboundedRegExpElement < SymbolType > & innerElement : inner->getElements ( ) ) {
468 if ( ! dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & innerElement ) )
469 iterElements.push_back ( & innerElement );
470 }
471 }
472 else if ( ! dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & iter->getChild ( ) ) ) {
473 iterElements.push_back ( & iter->getChild ( ) );
474 }
475 }
476 }
477
478 for( const UnboundedRegExpElement < SymbolType > * n : iterElements ) {
479 auto it = find_if ( node.getChildren ( ).begin ( ), node.getChildren ( ).end ( ), [ n ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool {
480 return a == *n;
481 });
482
483 if( it == node.getChildren ( ).end ( ) ) {
484 continue;
485 }
486
487 optimized = true;
488 node.erase( it );
489 }
490
491 return optimized;
492}
493
499template < class SymbolType >
500bool RegExpOptimize::Unbounded < SymbolType >::V3 ( UnboundedRegExpIteration < SymbolType > & node ) {
501
502 if ( UnboundedRegExpIteration < SymbolType > * childIter = dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & node.getChild ( ) ); childIter ) {
503 node.setChild ( std::move ( childIter->getChild ( ) ) );
504
505 return true;
506 }
507
508 return false;
509}
510
516template < class SymbolType >
517bool RegExpOptimize::Unbounded < SymbolType >::V4( UnboundedRegExpIteration < SymbolType > & node ) {
518
519 // interpretation: if iteration's element is concat and every concat's element is iteration
520 // or if iteration's element is alternation and inside it, there is a concat wher every concat's element is iteration
521 auto areAllItersInConcat = [] ( UnboundedRegExpElement < SymbolType > & testedNode ) {
522 UnboundedRegExpConcatenation < SymbolType > * cont = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & testedNode );
523 return cont && all_of ( cont->begin( ), cont->end ( ), [ ] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & a ); } );
524 };
525
526 auto toAlternationOfChildren = [] ( UnboundedRegExpConcatenation < SymbolType > & cont ) {
527 UnboundedRegExpAlternation < SymbolType > newAlt;
528
529 for ( UnboundedRegExpElement < SymbolType > & n : cont )
530 newAlt.pushBackChild ( std::move ( static_cast < UnboundedRegExpIteration < SymbolType > & > ( n ).getChild ( ) ) );
531
532 return Unbounded < SymbolType >::visit ( std::move ( newAlt ), true );
533 };
534
535 if ( areAllItersInConcat ( node.getChild ( ) ) ) {
536 node.setChild ( toAlternationOfChildren ( static_cast < UnboundedRegExpConcatenation < SymbolType > & > ( node.getChild ( ) ) ) );
537 return true;
538 } else if ( dynamic_cast < UnboundedRegExpAlternation < SymbolType > * > ( & node.getChild ( ) ) ) {
539 UnboundedRegExpAlternation < SymbolType > & alt = static_cast < UnboundedRegExpAlternation < SymbolType > & > ( node.getChild ( ) );
540 bool res = false;
541 for ( size_t i = 0; i < alt.getChildren ( ).size ( ); ++ i ) {
542 if ( areAllItersInConcat ( alt.getChild ( i ) ) ) {
543 alt.setChild ( toAlternationOfChildren ( static_cast < UnboundedRegExpConcatenation < SymbolType > & > ( alt.getChild ( i ) ) ), i );
544 res = true;
545 }
546 }
547 return res;
548 }
549
550 return false;
551}
552
558template < class SymbolType >
559bool RegExpOptimize::Unbounded < SymbolType >::V5( UnboundedRegExpAlternation < SymbolType > & /* node */ ) {
560 // implemented by combination of A9 and A10
561
562 return false;
563}
564
570template < class SymbolType >
571bool RegExpOptimize::Unbounded < SymbolType >::V6( UnboundedRegExpAlternation < SymbolType > & /* node */ ) {
572 // implemented by combination of A9 and A10R
573
574 return false;
575}
576
582template < class SymbolType >
583bool RegExpOptimize::Unbounded < SymbolType >::V8( UnboundedRegExpConcatenation < SymbolType > & node ) {
584 bool optimized = false;
585
586 // interpretation: if there is iteration in concatenation node, and element of iteration contains eps and is straight before this iteration, then this element can be omitted
587
588 if ( node.getChildren ( ).empty ( ) )
589 return false;
590
591 for( auto it = node.getChildren ( ).begin( ); it != node.getChildren ( ).end( ); ) {
592 const UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & * it );
593
594 if( ! iter ) {
595 ++ it;
596 continue;
597 }
598
599 // if element of iteration is concatenation, we need to check this specially
600 const UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < const UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) );
601
602 if( concat ) {
603 // check if not out of bounds
604 if( static_cast < size_t > ( distance( node.getChildren ( ).begin( ), it ) ) < concat->getChildren ( ).size ( ) ) {
605 it ++;
606 continue;
607 }
608
609 auto it2 = it;
610 ext::retract ( it2, concat->getChildren ( ).size ( ) );
611
613 concat->getChildren().size ( ) == static_cast < size_t > ( distance ( it2, node.getChildren ( ).end ( ) ) ) &&
614 equal ( concat->getChildren ( ).begin( ), concat->getChildren ( ).end( ), it2 ) ) {
615 optimized = true;
616
617 it = node.erase ( it2, it );
618 } else
619 ++ it;
620 } else {
621 // check if not at the first node
622 if ( it == node.getChildren ( ).begin ( ) ) {
623 it ++;
624 continue;
625 }
626
627 auto prev = std::prev ( it );
628
629 if ( regexp::properties::RegExpEpsilon::languageContainsEpsilon ( iter->getElement ( ) ) && iter->getElement ( ) == * prev ) {
630 it = node.erase ( prev );
631 optimized = true;
632 } else
633 ++ it;
634 }
635 }
636
637 return optimized;
638}
639
645template < class SymbolType >
646bool RegExpOptimize::Unbounded < SymbolType >::V8R( UnboundedRegExpConcatenation < SymbolType > & node ) {
647 bool optimized = false;
648
649 // interpretation: if there is iteration in concatenation node, and element of iteration contains eps and is straight before this iteration, then this element can be omitted
650
651 if ( node.getChildren ( ).empty ( ) )
652 return false;
653
654 for( auto it = node.getChildren ( ).rbegin( ); it != node.getChildren ( ).rend( ); ) {
655 const UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & * it );
656
657 if( ! iter ) {
658 ++ it;
659 continue;
660 }
661
662 // if element of iteration is concatenation, we need to check this specially
663 if ( const UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < const UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild ( ) ); concat ) {
664 // check if not out of bounds
665 if( static_cast < size_t > ( distance( node.getChildren ( ).rbegin( ), it ) ) < concat->getChildren ( ).size ( ) ) {
666 it ++;
667 continue;
668 }
669
670 auto it2 = it;
671 ext::retract ( it2, concat->getChildren ( ).size ( ) );
672
674 concat->getChildren().size ( ) == static_cast < size_t > ( distance ( it2, node.getChildren ( ).rend ( ) ) ) &&
675 equal ( concat->getChildren ( ).rbegin( ), concat->getChildren ( ).rend( ), it2 ) ) {
676 optimized = true;
677
678 it = node.erase ( it2, it );
679 } else
680 ++ it;
681 } else {
682 // check if not at the first node
683 if ( it == node.getChildren ( ).rbegin ( ) ) {
684 it ++;
685 continue;
686 }
687
688 auto prev = std::prev ( it );
689
690 if ( regexp::properties::RegExpEpsilon::languageContainsEpsilon ( iter->getElement ( ) ) && iter->getElement ( ) == * prev ) {
691 it = node.erase ( prev );
692 optimized = true;
693 } else
694 ++ it;
695 }
696 }
697
698 return optimized;
699}
700
706template < class SymbolType >
707bool RegExpOptimize::Unbounded < SymbolType >::V9( UnboundedRegExpConcatenation < SymbolType > & node ) {
708 bool optimized = false;
709
710 // interpretation: if concat (C1) with iter && iteration's element is concat (C2), then:
711 // simultaneously iterate through C1 and C2. (axy)*axz=ax(yax)*z -> get ax that is same and relocate them...
712
713 for( auto it = node.begin( ) ; it != node.end( ) ; ) {
714 UnboundedRegExpIteration < SymbolType > * iter = dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & * it );
715 if ( ! iter ) {
716 ++ it;
717 continue;
718 }
719 UnboundedRegExpConcatenation < SymbolType > * concat = dynamic_cast < UnboundedRegExpConcatenation < SymbolType > * > ( & iter->getChild( ) );
720 if( ! concat ) {
721 auto cIter = std::next ( it );
722 if ( cIter == node.end ( ) || * cIter != iter->getChild ( ) ) {
723 ++ it;
724 continue;
725 }
726
727 it = node.insert ( it, std::move ( * std::next ( it ) ) );
728 it = std::next ( it );
729 it = node.erase ( std::next ( it ) );
730 it = std::prev ( it );
731 } else {
732 // find range from <it+1;sth> and <concat.begin;sth> that is equal
733 auto c1Iter = std::next( it );
734 auto c2Iter = concat->begin( );
735 while( c1Iter != node.end() && c2Iter != concat->end( ) && *c1Iter == * c2Iter ) {
736 ++ c1Iter;
737 ++ c2Iter;
738 }
739
740 if( c1Iter == std::next( it ) ) {
741 ++ it;
742 continue;
743 }
744
745 // common::Streams::out << "xy" << std::endl;
746 // UnboundedRegExpConcatenation < SymbolType >* tmp = new UnboundedRegExpConcatenation < SymbolType >( );
747 // tmp->insert( tmp->getChildren().end( ), std::next( it ), c1Iter );
748 // common::Streams::out << RegExp( tmp ) << std::endl;
749
750 // copy the range <it;sth>, delete it and go back to the iter node
752 copyRange.insert ( copyRange.end(), std::make_move_iterator ( std::next( it ) ), std::make_move_iterator ( c1Iter ) );
753 it = node.erase ( std::next ( it ), c1Iter );
754 it = std::prev( it );
755
756 // insert that range before it position
757 it = node.insert( it, std::make_move_iterator ( copyRange.begin( ) ), std::make_move_iterator ( copyRange.end( ) ) );
758
759 // alter the iteration's concat node
760 copyRange.clear( );
761 copyRange.insert ( copyRange.end(), std::make_move_iterator ( concat->begin( ) ), std::make_move_iterator ( c2Iter ) );
762 concat->erase ( concat->begin( ), c2Iter );
763 concat->insert ( concat->end(), std::make_move_iterator ( copyRange.begin( ) ), std::make_move_iterator ( copyRange.end( ) ) );
764 }
765 }
766
767 return optimized;
768}
769
776template < class SymbolType >
777bool RegExpOptimize::Unbounded < SymbolType >::V10( UnboundedRegExpIteration < SymbolType > & node ) {
778
779 // interpretation: if iter's child is alternation where some of its children are iteration, then they do not have to be iterations
780 UnboundedRegExpAlternation < SymbolType > * alt = dynamic_cast < UnboundedRegExpAlternation < SymbolType > * > ( & node.getChild ( ) );
781 if ( ! alt || ! any_of ( alt->begin( ), alt->end( ), [] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & a ); } ) )
782 return false;
783
784 for ( auto it = alt->begin( ); it != alt->end ( ); ++ it ) {
785 if ( dynamic_cast < UnboundedRegExpIteration < SymbolType > * > ( & * it ) )
786 alt->setChild ( std::move ( static_cast < UnboundedRegExpIteration < SymbolType > & > ( * it ).getChild ( ) ), it );
787 }
788
789 node.setChild ( Unbounded < SymbolType >::visit ( std::move ( * alt ), false ) );
790
791 return true;
792}
793
799template < class SymbolType >
800bool RegExpOptimize::Unbounded < SymbolType >::X1( UnboundedRegExpAlternation < SymbolType > & node ) {
801
802 // theorem: In regexp like a* + \e, \e is described twice, first in a*, second in \e.
803 // therefore we can delete the \e as it is redundant
804
805 auto iter = find_if ( node.begin( ), node.end( ), [] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return dynamic_cast < const UnboundedRegExpIteration < SymbolType > * > ( & a );} );
806 auto eps = find_if ( node.begin( ), node.end( ), [] ( const UnboundedRegExpElement < SymbolType > & a ) -> bool { return dynamic_cast < const UnboundedRegExpEpsilon < SymbolType > * > ( & a );} );
807
808 if( iter != node.end( ) && eps != node.end( ) ) {
809 node.erase( eps );
810 return true;
811 }
812
813 return false;
814}
815
816} /* namespace regexp::simplify */
Data & getChild()
Getter of the child of the node.
Definition: tree_base.hpp:403
void setChild(const Data &c)
Setter of the child of the node.
Definition: tree_base.hpp:429
const ext::ptr_vector< Data > & getChildren() &
Getter of the vector of children.
Definition: tree_base.hpp:1071
Data & getChild(size_t index)
Getter of the child at given index.
Definition: tree_base.hpp:1116
void setChild(const Data &d, PositionIterator it)
Setter of the single child of the node.
Definition: tree_base.hpp:1140
Class extending the map class from the standard library. Original reason is to allow printing of the ...
Definition: map.hpp:48
Class representing wrapper of dynamically allocated object behaving like rvalue reference.
Definition: ptr_value.hpp:40
Implementation of vector storing dynamicaly allocated instances of given type. The class mimicks the ...
Definition: ptr_vector.hpp:44
iterator end() &noexcept
Iterator one past the last element of the values range in the vector.
Definition: ptr_vector.hpp:422
iterator begin() &noexcept
Iterator to the begining of the values range in the vector.
Definition: ptr_vector.hpp:382
void clear() noexcept
Removes all values from the vector.
Definition: ptr_vector.hpp:614
iterator insert(iterator pos, R &&value)
Inserts the value on position given by iterator pos.
Definition: ptr_vector.hpp:775
Class extending the reference wrapper class from the standard library. Original reason is to allow it...
Definition: functional.hpp:108
Class extending the vector class from the standard library. Original reason is to allow printing of t...
Definition: vector.hpp:45
Represents the alternation operator in the regular expression. The node can have 0 to n children in l...
Definition: UnboundedRegExpAlternation.h:44
Represents the concatenation operator in the regular expression. The node can have 0 to n children in...
Definition: UnboundedRegExpConcatenation.h:44
Definition: UnboundedRegExpElement.h:62
Represents the empty expression in the regular expression. The node can't have any children.
Definition: UnboundedRegExpEmpty.h:41
Represents the epsilon expression in the regular expression. The node can't have any children.
Definition: UnboundedRegExpEpsilon.h:41
Represents the iteration operator in the regular expression. The node has exactly one child.
Definition: UnboundedRegExpIteration.h:43
static bool languageContainsEpsilon(const regexp::FormalRegExpElement< SymbolType > &regexp)
Definition: RegExpEpsilon.h:91
static regexp::UnboundedRegExp< SymbolType > optimize(const regexp::UnboundedRegExp< SymbolType > &regexp)
int i
Definition: AllEpsilonClosure.h:118
return res
Definition: MinimizeByPartitioning.h:145
constexpr void retract(Iterator &i, Distance n)
A generalization of pointer arithmetic.
Definition: iterator.hpp:887
ForwardIterator unique(ForwardIterator first, ForwardIterator last)
Shuffles values in a sequece so that consecutive duplicate values are pushed to the front and others ...
Definition: algorithm.hpp:384
reference_mover< T > make_mover(T &param)
Move adaptor construction function specialized to lvalue reference parameter.
Definition: iterator.hpp:468
dereferencing_iterator< Iterator > dereferencer(Iterator iter)
Dereferencing adaptor construction function.
Definition: iterator.hpp:815
all_of(T &&...) -> all_of< T... >
constexpr auto visit(Visitor &&vis, Variants &&... vars)
Definition: variant.hpp:42
any_of(T &&...) -> any_of< T... >
Definition: Node.cpp:11
Definition: RegExpOptimize.h:22