Algorithms Library Toolkit
A toolkit for algorithms, especially for algorithms on formal languages
RegExpOptimizeFormalPart.hpp
Go to the documentation of this file.
1
6namespace regexp::simplify {
7
8template < class SymbolType >
10 ext::smart_ptr < FormalRegExpElement < SymbolType > > optimized = optimizeInner ( element );
11
12 if ( FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( & element ) ) {
13 FormalRegExpAlternation < SymbolType > * alternationOptimized = dynamic_cast<FormalRegExpAlternation < SymbolType > * > ( optimized.get ( ) );
14 if( alternationOptimized ) {
15 * alternation = std::move( * alternationOptimized );
16 } else {
18 }
19 } else if ( FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( & element ) ) {
20 FormalRegExpConcatenation < SymbolType > * concatenationOptimized = dynamic_cast<FormalRegExpConcatenation < SymbolType > * > ( optimized.get ( ) );
21 if( concatenationOptimized ) {
22 * concatenation = std::move( * concatenationOptimized );
23 } else {
25 }
26 } else if ( FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType > *>( & element ) ) {
27 FormalRegExpIteration < SymbolType > * iterationOptimized = dynamic_cast<FormalRegExpIteration < SymbolType > *>( optimized.get ( ) );
28 if( iterationOptimized ) {
29 * iteration = std::move( * iterationOptimized );
30 } else {
31 * iteration = FormalRegExpIteration < SymbolType > { optimized };
32 }
33 }
34
35 // Nothing to optimize original element was FormalRegExpSymbol, FormalRegExpEpsilon, or FormalRegExpEmpty
36}
37
38template < class SymbolType >
41
42 // optimize while you can
43 while( A1( elem ) || A2( elem ) || A3( elem ) || A4( elem ) || A10( elem ) || V2( elem ) || V5( elem ) || V6( elem ) || X1( elem )
44 || A5( elem ) || A6( elem ) || A7( elem ) || A8( elem ) || A9( elem ) || V8( elem ) //|| V9( elem )
45 || A11( elem ) || V1( elem ) || V3( elem ) || V4( elem ) || V10( elem ) || S(elem) );
46
48}
49
50template < class SymbolType >
51bool RegExpOptimize::S( FormalRegExpElement < SymbolType > * & node ) {
52 bool optimized = false;
53 if( FormalRegExpAlternation < SymbolType > * alternation = dynamic_cast<FormalRegExpAlternation < SymbolType >*>( node ); alternation ) {
54 if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( alternation->getLeftElement ( ) ); * tmp != alternation->getLeftElement ( ) ) {
55 optimized = true;
56 alternation->setLeftElement ( * tmp );
57 }
58
59 if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( alternation->getRightElement ( ) ); * tmp != alternation->getRightElement ( ) ) {
60 optimized = true;
61 alternation->setRightElement ( * tmp );
62 }
63
64 return optimized;
65 }
66
67 if( FormalRegExpConcatenation < SymbolType > * concatenation = dynamic_cast<FormalRegExpConcatenation < SymbolType >*>( node ); concatenation ) {
68 if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( concatenation->getLeftElement ( ) ); * tmp != concatenation->getLeftElement ( ) ) {
69 optimized = true;
70 concatenation->setLeftElement ( * tmp );
71 }
72
73 if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( concatenation->getRightElement ( ) ); * tmp != concatenation->getRightElement ( ) ) {
74 optimized = true;
75 concatenation->setRightElement ( * tmp );
76 }
77
78 return optimized;
79 }
80
81 if( FormalRegExpIteration < SymbolType > * iteration = dynamic_cast<FormalRegExpIteration < SymbolType >*>( node ); iteration ) {
82 if( ext::smart_ptr < FormalRegExpElement < SymbolType > > tmp = optimizeInner ( iteration->getElement() ); * tmp != iteration->getElement ( ) ) {
83 optimized = true;
84 iteration->setElement ( * tmp );
85 }
86 return optimized;
87 }
88
89 return optimized;
90}
91
92
98template < class SymbolType >
99bool RegExpOptimize::A1( FormalRegExpElement < SymbolType > * & node ) {
100 FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node );
101 if( ! n ) return false;
102
103 if( dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & n->getLeft ( ) ) ) {
104 FormalRegExpAlternation < SymbolType > leftAlt ( std::move ( static_cast < FormalRegExpAlternation < SymbolType > & > ( n->getLeft ( ) ) ) );
105
106 n->setLeft ( std::move ( leftAlt.getLeft ( ) ) );
107 leftAlt.setLeft ( std::move ( leftAlt.getRight ( ) ) );
108 leftAlt.setRight ( std::move ( n->getRight ( ) ) );
109 n->setRight ( std::move ( leftAlt ) );
110
111 return true;
112 }
113
114 return false;
115}
116
122template < class SymbolType >
123bool RegExpOptimize::A2 ( FormalRegExpElement < SymbolType > * & node ) {
124 FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node );
125 if( ! n ) return false;
126
127 if ( dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & n->getRight ( ) ) ) {
128 FormalRegExpAlternation < SymbolType > & rightAlt = static_cast < FormalRegExpAlternation < SymbolType > & > ( n->getRight ( ) );
129
130 if ( n->getLeft ( ) > rightAlt.getLeft ( ) ) {
131 ext::ptr_value < FormalRegExpElement < SymbolType > > tmp ( std::move ( n->getLeft ( ) ) );
132
133 n->setLeft ( std::move ( rightAlt.getLeft ( ) ) );
134 rightAlt.setLeft ( std::move ( tmp ) );
135 return true;
136 } else {
137 return false;
138 }
139 }
140
141 return false;
142}
143
149template < class SymbolType >
150bool RegExpOptimize::A3 ( FormalRegExpElement < SymbolType > * & node ) {
151 FormalRegExpAlternation < SymbolType > * n = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( node );
152 if( ! n ) return false;
153
154 // input can be \0 + \0, so at least one element must be preserved
155
156 if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getRight ( ) ) ) {
157 node = std::move ( n->getLeft ( ) ).clone ( );
158 delete n;
159 return true;
160 }
161
162 if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getLeft ( ) ) ) {
163 node = std::move ( n->getRight ( ) ).clone ( );
164 delete n;
165 return true;
166 }
167
168 return false;
169}
170
176template < class SymbolType >
177bool RegExpOptimize::A4( FormalRegExpElement < SymbolType > * & node ) {
178 /*
179 * two ways of implementing this opitimization:
180 * - sort and call std::unique ( O(n lg n) + O(n) ), but it also sorts...
181 * - check every element against other ( O(n*n) )
182 *
183 * As we always sort in optimization, we can use the first version, but A4 must be __always__ called __after__ A2
184 */
185
186 FormalRegExpAlternation < SymbolType > * n = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( node );
187 if ( ! n ) return false;
188
189 if ( n->getLeftElement() == n->getRightElement() ) {
190 node = std::move ( n->getRight ( ) ).clone ( );
191 delete n;
192 return true;
193 }
194
195 return false;
196}
197
203template < class SymbolType >
204bool RegExpOptimize::A5( FormalRegExpElement < SymbolType > * & node ) {
205 FormalRegExpConcatenation < SymbolType > * n = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node );
206 if( ! n ) return false;
207
208 if( dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & n->getLeft ( ) ) ) {
209 FormalRegExpConcatenation < SymbolType > leftCon ( std::move ( static_cast < FormalRegExpConcatenation < SymbolType > & > ( n->getLeft ( ) ) ) );
210
211 n->setLeft ( std::move ( leftCon.getLeft ( ) ) );
212 leftCon.setLeft ( std::move ( leftCon.getRight ( ) ) );
213 leftCon.setRight ( std::move ( n->getRight ( ) ) );
214 n->setRight ( std::move ( leftCon ) );
215
216 return true;
217 }
218
219 return false;
220}
221
227template < class SymbolType >
228bool RegExpOptimize::A6( FormalRegExpElement < SymbolType > * & node ) {
229 FormalRegExpConcatenation < SymbolType > * n = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node );
230 if( ! n ) return false;
231
232 // input can be \e + \e, so at least one element must be preserved
233
234 if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getRight ( ) ) ) {
235 node = std::move ( n->getLeft ( ) ).clone ( );
236 delete n;
237 return true;
238 }
239
240 if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getLeft ( ) ) ) {
241 node = std::move ( n->getRight ( ) ).clone ( );
242 delete n;
243 return true;
244 }
245
246 return false;
247}
248
254template < class SymbolType >
255bool RegExpOptimize::A7( FormalRegExpElement < SymbolType > * & node ) {
256 FormalRegExpConcatenation < SymbolType > * n = dynamic_cast<FormalRegExpConcatenation < SymbolType > *>( node );
257 if( ! n ) return false;
258
259 if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getRight ( ) ) || dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getLeft ( ) ) ) {
260 delete n;
261 node = new FormalRegExpEmpty < SymbolType > { };
262 return true;
263 }
264
265 return false;
266}
267
273template < class SymbolType >
274bool RegExpOptimize::A8( FormalRegExpElement < SymbolType > * & /* node */) {
275 return false; //TODO
276}
277
283template < class SymbolType >
284bool RegExpOptimize::A9( FormalRegExpElement < SymbolType > * & /* node */) {
285 return false; //TODO
286}
287
293template < class SymbolType >
294bool RegExpOptimize::A10( FormalRegExpElement < SymbolType > * & node ) {
295 /*
296 * problem:
297 * - \e + x*x = x*
298 * - but if we do not have the eps, but we do have iteration, then \e \in h(iter), therefore \e in h(node).
299 */
300
301 FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > * > ( node );
302 if ( ! n ) return false;
303
304 if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getLeft ( ) ) ) {
305 FormalRegExpConcatenation < SymbolType > * rightCon = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & n->getRight ( ) );
306 if ( ! rightCon ) return false;
307
308 if ( FormalRegExpIteration < SymbolType > * rightLeftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & rightCon->getLeft ( ) ); rightLeftIte && rightLeftIte->getElement ( ) == rightCon->getRightElement ( ) ) {
309 node = std::move ( rightCon->getLeft ( ) ).clone ( );
310 delete n;
311 return true;
312 }
313
314 if ( FormalRegExpIteration < SymbolType > * rightRightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & rightCon->getRight ( ) ); rightRightIte && rightRightIte->getElement ( ) == rightCon->getLeftElement ( ) ) {
315 node = std::move ( rightCon->getRight ( ) ).clone ( );
316 delete n;
317 return true;
318 }
319 }
320
321 if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getRight ( ) ) ) {
322 FormalRegExpConcatenation < SymbolType > * leftCon = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & n->getLeft ( ) );
323 if ( ! leftCon ) return false;
324
325 if ( FormalRegExpIteration < SymbolType > * leftLeftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & leftCon->getLeft ( ) ); leftLeftIte && leftLeftIte->getElement ( ) == leftCon->getRightElement ( ) ) {
326 node = std::move ( leftCon->getLeft ( ) ).clone ( );
327 delete n;
328 return true;
329 }
330
331 if ( FormalRegExpIteration < SymbolType > * leftRightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & leftCon->getRight ( ) ); leftRightIte && leftRightIte->getElement ( ) == leftCon->getLeftElement ( ) ) {
332 node = std::move ( leftCon->getRight ( ) ).clone ( );
333 delete n;
334 return true;
335 }
336 }
337
338 return false;
339}
340
346template < class SymbolType >
347bool RegExpOptimize::A11( FormalRegExpElement < SymbolType > * & node ) {
348 FormalRegExpIteration < SymbolType > * n = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node );
349 if( ! n ) return false;
350
351 if ( FormalRegExpAlternation < SymbolType > * childAlt = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & n->getChild ( ) ); childAlt ) {
352 if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & childAlt->getLeft ( ) ) ) {
353 n->setChild ( std::move ( childAlt->getRight ( ) ) );
354 return true;
355 }
356 if ( dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & childAlt->getRight ( ) ) ) {
357 n->setChild ( std::move ( childAlt->getLeft ( ) ) );
358 return true;
359 }
360 }
361
362 return false;
363}
364
371template < class SymbolType >
372bool RegExpOptimize::V1( FormalRegExpElement < SymbolType > * & node ) {
373 FormalRegExpIteration < SymbolType > * n = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node );
374 if( ! n ) return false;
375
376 if ( dynamic_cast < FormalRegExpEmpty < SymbolType > * > ( & n->getChild ( ) ) ) {
377 delete n;
378 node = new FormalRegExpEpsilon < SymbolType > ( );
379 return true;
380 }
381 if ( dynamic_cast<FormalRegExpEpsilon < SymbolType > * > ( & n->getChild ( ) ) ) {
382 delete n;
383 node = new FormalRegExpEpsilon < SymbolType > ( );
384 return true;
385 }
386 return false;
387}
388
394template < class SymbolType >
395bool RegExpOptimize::V2( FormalRegExpElement < SymbolType > * & node ) {
396 FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node );
397 if( ! n ) return false;
398
399 if ( FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getLeft ( ) ); leftIte && leftIte->getElement ( ) == n->getRightElement ( ) ) {
400 node = std::move ( n->getLeft ( ) ).clone ( );
401 delete n;
402 return true;
403 }
404
405 if ( FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getRight ( ) ); rightIte && rightIte->getElement ( ) == n->getLeftElement ( ) ) {
406 node = std::move ( n->getRight ( ) ).clone ( );
407 delete n;
408 return true;
409 }
410
411 return false;
412}
413
419template < class SymbolType >
420bool RegExpOptimize::V3( FormalRegExpElement < SymbolType > * & node ) {
421 FormalRegExpIteration < SymbolType > * n = dynamic_cast<FormalRegExpIteration < SymbolType > *>( node );
422 if( ! n ) return false;
423
424 if( FormalRegExpIteration < SymbolType > * childIter = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getChild ( ) ); childIter ) {
425 n->setChild ( std::move ( childIter->getChild ( ) ) );
426 return true;
427 }
428
429 return false;
430}
431
437template < class SymbolType >
438bool RegExpOptimize::V4( FormalRegExpElement < SymbolType > * & node ) {
439 FormalRegExpIteration < SymbolType > * n = dynamic_cast < FormalRegExpIteration < SymbolType > *>( node );
440 if( ! n ) return false;
441
442 if( FormalRegExpConcatenation < SymbolType > * child = dynamic_cast < FormalRegExpConcatenation < SymbolType > * > ( & n->getChild ( ) ); child ) {
443 if( FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & child->getLeft ( ) ); leftIte ) {
444 if( FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & child->getRight ( ) ); rightIte ) {
445 n->setChild ( FormalRegExpAlternation < SymbolType >( std::move ( leftIte->getElement ( ) ), std::move ( rightIte->getElement ( ) ) ) );
446 }
447 }
448 }
449
450 return true;
451}
452
458template < class SymbolType >
459bool RegExpOptimize::V5( FormalRegExpElement < SymbolType > * & /* node */) {
460 return false; //TODO
461}
462
468template < class SymbolType >
469bool RegExpOptimize::V6( FormalRegExpElement < SymbolType > * & /* node */) {
470 return false; //TODO
471}
472
478template < class SymbolType >
479bool RegExpOptimize::V8( FormalRegExpElement < SymbolType > * & /* node */) {
480 return false; //TODO
481}
482
488template < class SymbolType >
489bool RegExpOptimize::V9( FormalRegExpElement < SymbolType > * & /* node */) {
490 return false; //TODO
491}
492
498template < class SymbolType >
499bool RegExpOptimize::V10( FormalRegExpElement < SymbolType > * & node ) {
500 FormalRegExpIteration < SymbolType > * n = dynamic_cast < FormalRegExpIteration < SymbolType > *>( node );
501 if( ! n ) return false;
502
503 if( FormalRegExpAlternation < SymbolType > * alt = dynamic_cast < FormalRegExpAlternation < SymbolType > * > ( & n->getChild ( ) ); alt ) {
504 if( FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & alt->getLeft ( ) ); leftIte ) {
505 if( FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & alt->getRight ( ) ); rightIte ) {
506 alt->setLeft ( std::move ( leftIte->getChild ( ) ) );
507 alt->setRight ( std::move ( rightIte->getChild ( ) ) );
508 }
509 }
510 }
511
512 return true;
513}
514
520template < class SymbolType >
521bool RegExpOptimize::X1( FormalRegExpElement < SymbolType > * & node ) {
522 FormalRegExpAlternation < SymbolType > * n = dynamic_cast<FormalRegExpAlternation < SymbolType > *>( node );
523 if( ! n ) return false;
524
525 if ( FormalRegExpIteration < SymbolType > * leftIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getLeft ( ) ); leftIte && dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getRight ( ) ) ) {
526 node = std::move ( n->getLeft ( ) ).clone ( );
527 delete n;
528 return true;
529 }
530
531 if ( FormalRegExpIteration < SymbolType > * rightIte = dynamic_cast < FormalRegExpIteration < SymbolType > * > ( & n->getRight ( ) ); rightIte && dynamic_cast < FormalRegExpEpsilon < SymbolType > * > ( & n->getLeft ( ) ) ) {
532 node = std::move ( n->getRight ( ) ).clone ( );
533 delete n;
534 return true;
535 }
536
537 return false;
538
539}
540
541} /* namespace regexp::simplify */
Class representing wrapper of dynamically allocated object behaving like rvalue reference.
Definition: ptr_value.hpp:40
Managed pointer simulating value like behavior.
Definition: memory.hpp:233
T * get()
Getter of the raw managed pointer.
Definition: memory.hpp:367
Represents the alternation operator in the regular expression. The node must have exactly two childre...
Definition: FormalRegExpAlternation.h:44
Represents the concatenation operator in the regular expression. The node must have exactly two child...
Definition: FormalRegExpConcatenation.h:44
Definition: FormalRegExpElement.h:62
Represents the empty expression in the regular expression. The node can't have any children.
Definition: FormalRegExpEmpty.h:41
Represents the epsilon expression in the regular expression. The node can't have any children.
Definition: FormalRegExpEpsilon.h:41
Represents the iteration operator in the regular expression. The node has exactly one child.
Definition: FormalRegExpIteration.h:44
static regexp::UnboundedRegExp< SymbolType > optimize(const regexp::UnboundedRegExp< SymbolType > &regexp)
Definition: Node.cpp:11
Definition: RegExpOptimize.h:22