Algorithms Library Toolkit
A toolkit for algorithms, especially for algorithms on formal languages
ApproximateCoversComputation.h
Go to the documentation of this file.
1
6#pragma once
7
8#include <string/LinearString.h>
9#include <automaton/FSM/NFA.h>
11#include <numeric>
12
13namespace stringology::cover {
14
16public:
26 template < class SymbolType >
27 static ext::set < ext::pair < string::LinearString < SymbolType >, unsigned > > compute ( const string::LinearString < SymbolType > & pattern, unsigned k, bool restricted = false );
28
32 template < class SymbolType >
34
35private:
36 template < class SymbolType >
37 static unsigned smallestDistanceCover ( ext::set < ext::pair < unsigned, unsigned > > subset, const ext::vector < SymbolType > & coverCandidate );
38
39 template < class SymbolType >
40 static ext::set < ext::pair < string::LinearString < SymbolType >, unsigned > > processState ( const ext::set < ext::pair < unsigned, unsigned > > & previousState, ext::vector < SymbolType > & lfactor, unsigned previousDepth, unsigned k, const automaton::NFA<SymbolType, ext::pair<unsigned, unsigned> > & approximateSuffixNDA, const string::LinearString < SymbolType > & pattern, bool restricted );
41};
42
43template < class SymbolType >
45 return ApproximateCoversComputation::compute ( pattern, k, false );
46}
47
48template < class SymbolType >
51
52 ext::set < ext::pair < unsigned, unsigned > > initialState ( { approximateSuffixNDA.getInitialState ( ) } );
54 unsigned depth = 0;
55
56 return processState ( initialState, lfactor, depth, k, approximateSuffixNDA, pattern, restricted );
57}
58
59template < class SymbolType >
60ext::set < ext::pair < string::LinearString < SymbolType >, unsigned > > ApproximateCoversComputation::processState ( const ext::set < ext::pair < unsigned, unsigned > > & previousState, ext::vector < SymbolType > & lfactor, unsigned previousDepth, unsigned k, const automaton::NFA < SymbolType, ext::pair < unsigned, unsigned > > & approximateSuffixNDA, const string::LinearString < SymbolType > & pattern, bool restricted ) {
62
63 for ( const SymbolType & symbol : pattern.getAlphabet ( ) ) {
65 unsigned depth = previousDepth + 1;
66
67 for ( const auto & state : previousState ) {
68 auto transition_range = approximateSuffixNDA.getTransitions ( ).equal_range ( ext::make_pair ( state, symbol ) );
69 for ( const auto & it : transition_range ) {
70 newState.insert ( it.second );
71 }
72 }
73
74 bool levelZero = restricted && std::any_of ( newState.begin ( ), newState.end ( ), [] ( const ext::pair < unsigned, unsigned > & state ) { return state.second == 0; } );
75
76 if ( newState.size ( ) > 1 && ( restricted == levelZero ) ) { // lfactor(newState) is a k-approximately repeating factor
77 if ( newState.begin ( ) -> first == depth ) { // newState is k-approximate prefix
78 lfactor.push_back ( symbol );
79
80 if ( newState.rbegin ( ) -> first == pattern.getContent ( ).size ( ) ) { // The state is final ; safe (at least 2 elements)
81 bool isCover = true;
82 auto it = std::next ( newState.rbegin ( ) );
83 auto prev = newState.rbegin ( );
84
85 for ( ; it != newState.rend ( ); ++ it, ++ prev ) {
86 if ( prev -> first - it -> first > lfactor.size ( ) ) {
87 isCover = false;
88 break;
89 }
90 }
91
92 if ( isCover ) {
93 unsigned l = smallestDistanceCover ( newState, lfactor );
94 if ( lfactor.size ( ) > l ) {
95 resultSet.insert ( ext::make_pair ( string::LinearString < SymbolType > ( lfactor ), l ) );
96 }
97 }
98 }
99
100 ext::set < ext::pair < string::LinearString < SymbolType >, unsigned > > resultSet2 = processState ( newState, lfactor, depth, k, approximateSuffixNDA, pattern, restricted );
101 resultSet.insert ( resultSet2.begin ( ), resultSet2.end ( ) );
102
103 lfactor.pop_back( );
104 }
105 }
106 }
107 return resultSet;
108}
109
110template < class SymbolType >
111unsigned ApproximateCoversComputation::smallestDistanceCover ( ext::set < ext::pair < unsigned, unsigned > > subset, const ext::vector < SymbolType > & coverCandidate ) {
112 unsigned lmin;
113 unsigned lmax;
114 unsigned l;
115
116 lmin = std::max ( subset.begin ( ) -> second, subset.rbegin ( ) -> second );
117 lmax = std::accumulate ( subset.begin ( ), subset.end ( ), subset.begin ( ) -> second, [ ] ( unsigned cMax, const ext::pair < unsigned, unsigned > & e ) { return std::max ( cMax, e.second ); } );
118 l = lmax;
119
120 while ( l > lmin ) {
121 auto it = std::next ( subset.begin ( ) );
122 auto prev = subset.begin ( );
123 auto last = std::prev ( subset.end ( ) );
124 while ( it != subset.end ( ) ) {
125 // erase elements with level(e) = l, but not first nor last element
126 if ( it != last && it -> second == l ) {
127 it = subset.erase ( it );
128 } else {
129 if ( it -> first - prev -> first > coverCandidate.size ( ) ) // neighbouring depth is not <= coverCandidate.size
130 return l;
131
132 prev = it ++;
133 }
134 }
135
136 l = l - 1;
137 }
138
139 return l;
140}
141
142} /* namespace stringology::cover */
143
Nondeterministic finite automaton. Accepts regular languages.
Definition: NFA.h:66
const StateType & getInitialState() const &
Definition: NFA.h:107
Class extending the pair class from the standard library. Original reason is to allow printing of the...
Definition: pair.hpp:43
Definition: set.hpp:44
auto begin() &
Inherited behavior of begin for non-const instance.
Definition: set.hpp:99
auto end() &
Inherited behavior of end for non-const instance.
Definition: set.hpp:129
auto equal_range(K &&key) const &
Make range of elements with key equal to the key.
Definition: set.hpp:200
Class extending the vector class from the standard library. Original reason is to allow printing of t...
Definition: vector.hpp:45
Linear string.
Definition: LinearString.h:57
const ext::set< SymbolType > & getAlphabet() const &
Definition: LinearString.h:103
const ext::vector< SymbolType > & getContent() const &
Definition: LinearString.h:238
Definition: ApproximateCoversComputation.h:15
static ext::set< ext::pair< string::LinearString< SymbolType >, unsigned > > compute(const string::LinearString< SymbolType > &pattern, unsigned k, bool restricted=false)
Definition: ApproximateCoversComputation.h:49
static automaton::NFA< SymbolType, ext::pair< unsigned int, unsigned int > > construct(const string::LinearString< SymbolType > &pattern, unsigned int k)
Definition: NondeterministicApproximateSuffixAutomatonForHammingDistance.h:24
p second
Definition: ToRegExpAlgebraic.h:126
typename T::SymbolType SymbolType
Definition: ReachableStates.h:176
constexpr const T & max(const T &a)
Root case of maximum computation. The maximum from one value is the value itself.
Definition: algorithm.hpp:278
constexpr auto make_pair(T1 &&x, T2 &&y)
Definition: pair.hpp:79
any_of(T &&...) -> any_of< T... >
Definition: ApproximateCoversComputation.cpp:9