Algorithms Library Toolkit
A toolkit for algorithms, especially for algorithms on formal languages
ApproximateSeedComputation.h
Go to the documentation of this file.
1
6#pragma once
7
8#include <alib/set>
9#include <alib/pair>
11#include <string/LinearString.h>
13
14namespace stringology::seed
15{
17
24 {
25 private:
37 template<class SymbolType>
38 static bool existsPrefix( const string::LinearString < SymbolType > & potentialSeed, const State & newState, unsigned distance,
41 {
42 if ( dSuffix.getFinalStates().count( newState ) && newState.rbegin()->second <= distance )
43 return true;
44
45 auto statePtr = & dSuffix.getInitialState();
46 for ( size_t pi = 0; pi < potentialSeed.size(); ++pi )
47 {
48 if ( pi >= pattern.getContent().size() - newState.rbegin()->first
49 && dSuffix.getFinalStates().count( *statePtr )
50 && ( *statePtr ).rbegin()->second <= distance )
51 return true;
52
53 auto it_range = dSuffix.getTransitionsFromState( *statePtr );
54 for ( auto tr = it_range.begin(); tr != it_range.end(); ++tr )
55 {
56 if ( tr->first.second == potentialSeed.getContent().at( pi ) )
57 {
58 statePtr = & tr->second;
59 break;
60 }
61 }
62 }
63 return false;
64 }
65
76 template<class SymbolType>
77 static bool existsSuffix( const string::LinearString < SymbolType > & potentialSeed, const State & newState, unsigned distance,
78 const automaton::DFA < SymbolType, State > & dSuffixReversed )
79 {
80 auto reversedPotentialSeed = string::transform::StringReverse::reverse ( potentialSeed );
81
82 if ( newState.begin()->first == potentialSeed.size() && newState.begin()->second <= distance )
83 return true;
84
85
86 auto statePtr = & dSuffixReversed.getInitialState();
87 for ( size_t si = 0; si < reversedPotentialSeed.size(); ++si )
88 {
89 if( si >= newState.begin()->first - potentialSeed.size()
90 && dSuffixReversed.getFinalStates().count( *statePtr )
91 && ( *statePtr ).rbegin()->second <= distance )
92 return true;
93
94 auto it_range = dSuffixReversed.getTransitionsFromState( *statePtr );
95 for ( auto tr = it_range.begin(); tr != it_range.end(); ++tr )
96 {
97 if ( tr->first.second == reversedPotentialSeed.getContent().at( si ) )
98 {
99 statePtr = & tr->second;
100 break;
101 }
102 }
103 }
104 return false;
105 }
106
115 template<class SymbolType>
116 static bool coversMiddle( const string::LinearString < SymbolType > & potentialSeed, const State & newState )
117 {
118 auto firstIt = newState.begin();
119 firstIt++;
120 for ( auto secondIt = newState.begin(); firstIt != newState.end(); ++firstIt, ++secondIt )
121 {
122 if ( firstIt->first - secondIt->first > potentialSeed.size() )
123 return false;
124 }
125 return true;
126 }
127
140 template<class SymbolType>
141 static bool isSeed( const string::LinearString < SymbolType > & potentialSeed, const State & newState, unsigned distance,
143 const automaton::DFA < SymbolType, State > & dSuffixReversed,
145 {
146 if ( ! potentialSeed.empty()
147 && coversMiddle( potentialSeed, newState )
148 && existsPrefix( potentialSeed, newState, distance, dSuffix, pattern )
149 && existsSuffix( potentialSeed, newState, distance, dSuffixReversed ) )
150 return true;
151 else
152 return false;
153 }
154
167 template<class SymbolType>
168 static unsigned
169 smallestDistanceSeed( const string::LinearString < SymbolType > & seed, const State & newState, unsigned distance,
171 const automaton::DFA < SymbolType, State > & dSuffixReversed,
173 {
174 if ( distance == 0 )
175 return distance;
176
177 unsigned dist = distance - 1;
178 auto state = newState;
179 for ( auto it = state.begin(); it != state.end(); )
180 {
181 if ( it->second > dist )
182 it = state.erase( it );
183 else
184 it++;
185 }
186 while ( ! state.empty() && isSeed( seed, state, dist, dSuffix, dSuffixReversed, pattern ) )
187 {
188 if ( dist == 0 )
189 return dist;
190
191 dist = dist - 1;
192 for ( auto it = state.begin(); it != state.end(); )
193 {
194 if ( it->second > dist )
195 it = state.erase(it);
196 else
197 it++;
198 }
199 }
200 return dist + 1;
201 }
202
216 template<class SymbolType>
217 static void DFS ( const State & state,
220 const automaton::DFA < SymbolType, State > & dSuffixReversed,
223 unsigned distance,
224 bool restricted )
225 {
226 if ( ! restricted || std::any_of( state.begin(), state.end(), []( const auto & x ) -> bool { return x.second == 0; } ) )
227 {
228 /* check if lfactor is seed, add to result if so */
229 if ( ( lfactor.size() < pattern.getContent().size() )
230 && isSeed( lfactor, state, distance, dSuffix, dSuffixReversed, pattern ) )
231 {
232 unsigned d = smallestDistanceSeed( lfactor, state, distance, dSuffix, dSuffixReversed, pattern );
233 if ( lfactor.size() > d )
235 }
236 /* going to next states */
237 auto transitions = dSuffix.getTransitionsFromState( state );
238 for ( const auto & tr : transitions )
239 {
240 string::LinearString < SymbolType > lfactorCopy = lfactor;
241 lfactorCopy.appendSymbol( tr.first.second );
242 DFS( tr.second, lfactorCopy, dSuffix, dSuffixReversed, result, pattern, distance, restricted );
243 }
244 }
245 }
246
247
248 public:
258 template<class SymbolType>
261 unsigned distance,
262 bool restricted )
263 {
265
267 auto dSuffixReversed = DeterministicApproximateSuffixAutomatonForHammingDistanceFactory::construct( reversedPattern, distance );
268
270
271 DFS( dSuffix.getInitialState(), string::LinearString < SymbolType > (dSuffix.getInputAlphabet(), {} ), dSuffix, dSuffixReversed, result, pattern, distance, restricted );
272
273 return result;
274 }
275
284 template<class SymbolType>
287 unsigned distance )
288 {
289 return ApproximateSeedComputation::compute( pattern, distance, false );
290 }
291 };
292}
293
Deterministic finite automaton. Accepts regular languages.
Definition: DFA.h:71
const ext::set< StateType > & getFinalStates() const &
Definition: DFA.h:183
const StateType & getInitialState() const &
Definition: DFA.h:105
const ext::set< SymbolType > & getInputAlphabet() const &
Definition: DFA.h:232
ext::iterator_range< typename ext::map< ext::pair< StateType, SymbolType >, StateType >::const_iterator > getTransitionsFromState(const StateType &from) const
Definition: DFA.h:483
Class extending the pair class from the standard library. Original reason is to allow printing of the...
Definition: pair.hpp:43
Definition: set.hpp:44
auto begin() &
Inherited behavior of begin for non-const instance.
Definition: set.hpp:99
auto end() &
Inherited behavior of end for non-const instance.
Definition: set.hpp:129
Linear string.
Definition: LinearString.h:57
bool empty() const
Definition: LinearString.h:258
const ext::vector< SymbolType > & getContent() const &
Definition: LinearString.h:238
void appendSymbol(SymbolType symbol)
Definition: LinearString.h:230
size_t size() const
Definition: LinearString.h:263
static string::LinearString< SymbolType > reverse(const string::LinearString< SymbolType > &arg)
Definition: StringReverse.h:35
Definition: ApproximateSeedComputation.h:24
static ext::set< ext::pair< string::LinearString< SymbolType >, unsigned > > compute(const string::LinearString< SymbolType > &pattern, unsigned distance)
Definition: ApproximateSeedComputation.h:285
static ext::set< ext::pair< string::LinearString< SymbolType >, unsigned > > compute(const string::LinearString< SymbolType > &pattern, unsigned distance, bool restricted)
Definition: ApproximateSeedComputation.h:259
static automaton::DFA< SymbolType, StateType > construct(const string::LinearString< SymbolType > &pattern, unsigned k)
Definition: DeterministicApproximateSuffixAutomatonForHammingDistanceFactory.h:58
for(const StateType &state :fsm.getStates()) renamingData.insert(std Rename::RenamedAutomaton< T > result(renamingData.at(fsm.getInitialState()))
Definition: Rename.h:253
constexpr auto make_pair(T1 &&x, T2 &&y)
Definition: pair.hpp:79
any_of(T &&...) -> any_of< T... >
Definition: ApproximateSeedComputation.cpp:10