Algorithms Library Toolkit
A toolkit for algorithms, especially for algorithms on formal languages
ExactSuffixAutomaton.h
Go to the documentation of this file.
1
6/*
7 * Author: Radovan Cerveny
8 */
9
10#pragma once
11
12#include <limits>
13
15#include <string/LinearString.h>
16
17#include <global/GlobalData.h>
18
19namespace stringology {
20
21namespace indexing {
22
24private:
25 static constexpr unsigned infinity = std::numeric_limits < unsigned >::max ( );
26
27 template < class SymbolType >
28 static void suffixAutomatonAddSymbol ( automaton::DFA < SymbolType, unsigned > & suffixAutomaton, const SymbolType & symbol, std::vector < std::pair < unsigned, int > > & suffixLinks, unsigned & lastState );
29
30public:
31 template < class SymbolType >
33
34};
35
36template < class SymbolType >
38 automaton::DFA < SymbolType, unsigned > suffixAutomaton ( 0 );
39
40 suffixAutomaton.setInputAlphabet ( pattern.getAlphabet ( ) );
41
42 std::vector < std::pair < unsigned, int > > suffixLinks = { { infinity, 0 } }; //vector is fine, the state number is exactly the index to the vector
43 unsigned lastState = 0;
44
46 common::Streams::log << "String size " << pattern.getContent ( ).size ( ) << std::endl;
47
48 for ( const SymbolType & symbol : pattern.getContent ( ) ) {
49 if ( common::GlobalData::verbose && lastState % 1000 == 0 )
50 common::Streams::log << "Progress " << lastState << std::endl;
51
52 suffixAutomatonAddSymbol ( suffixAutomaton, symbol, suffixLinks, lastState );
53 }
54
55 while ( lastState != infinity ) {
56 suffixAutomaton.addFinalState ( lastState );
57 lastState = suffixLinks [ lastState ].first;
58 }
59
60 return indexes::stringology::SuffixAutomaton < SymbolType > ( std::move ( suffixAutomaton ), pattern.getContent ( ).size ( ) );
61}
62
63template < class SymbolType >
64void ExactSuffixAutomaton::suffixAutomatonAddSymbol ( automaton::DFA < SymbolType, unsigned > & suffixAutomaton, const SymbolType & symbol, std::vector < std::pair < unsigned, int > > & suffixLinks, unsigned & lastState ) {
65 unsigned newState = suffixAutomaton.getStates ( ).size ( );
66
67 suffixAutomaton.addState ( newState );
68
69 int lastSuffixLength = suffixLinks [ lastState ].second;
70
71 suffixLinks.emplace_back ( infinity, lastSuffixLength + 1 );
72
73 unsigned kState = lastState;
74
75 while ( kState != infinity && suffixAutomaton.getTransitions ( ).find ( { kState, symbol } ) == suffixAutomaton.getTransitions ( ).end ( ) ) {
76 suffixAutomaton.addTransition ( kState, symbol, newState );
77 kState = suffixLinks [ kState ].first;
78 }
79
80 if ( kState == infinity ) {
81 suffixLinks [ newState ].first = 0;
82 } else {
83 unsigned qState = suffixAutomaton.getTransitions ( ).find ( { kState, symbol } )->second;
84
85 int kSuffixLength = suffixLinks [ kState ].second;
86 int qSuffixLength = suffixLinks [ qState ].second;
87
88 if ( kSuffixLength + 1 == qSuffixLength ) {
89 suffixLinks [ newState ].first = qState;
90 } else {
91 unsigned cloneState = suffixAutomaton.getStates ( ).size ( );
92 suffixAutomaton.addState ( cloneState );
93
94 suffixLinks.emplace_back ( suffixLinks [ qState ].first, kSuffixLength + 1 );
95
96 for ( const auto & transition : suffixAutomaton.getTransitionsFromState ( qState ) )
97 suffixAutomaton.addTransition ( cloneState, transition.first.second, transition.second );
98
99 while ( kState != infinity && suffixAutomaton.removeTransition ( kState, symbol, qState ) ) {
100 suffixAutomaton.addTransition ( kState, symbol, cloneState );
101 kState = suffixLinks [ kState ].first;
102 }
103
104 suffixLinks [ qState ].first = cloneState;
105 suffixLinks [ newState ].first = cloneState;
106 }
107 }
108 lastState = newState;
109}
110
111} /* namespace indexing */
112
113} /* namespace stringology */
114
Deterministic finite automaton. Accepts regular languages.
Definition: DFA.h:71
bool removeTransition(const StateType &from, const SymbolType &input, const StateType &to)
Removes a transition from the automaton.
Definition: DFA.h:457
const ext::map< ext::pair< StateType, SymbolType >, StateType > & getTransitions() const &
Definition: DFA.h:473
bool addFinalState(StateType state)
Definition: DFA.h:203
const ext::set< StateType > & getStates() const &
Definition: DFA.h:134
bool addTransition(StateType from, SymbolType input, StateType to)
Add a transition to the automaton.
Definition: DFA.h:432
bool addState(StateType state)
Definition: DFA.h:154
void setInputAlphabet(ext::set< SymbolType > symbols)
Definition: DFA.h:270
ext::iterator_range< typename ext::map< ext::pair< StateType, SymbolType >, StateType >::const_iterator > getTransitionsFromState(const StateType &from) const
Definition: DFA.h:483
static bool verbose
Verbose flag. Some algorithms print additional runtime information about internal datastructures or t...
Definition: GlobalData.h:24
static ext::reference_wrapper< ext::ostream > log
Standard loging stream. Mapped to descriptor 4.
Definition: GlobalData.h:78
Suffix automaton string index. Automaton representation of all suffixes. The automaton is general det...
Definition: SuffixAutomaton.h:50
Linear string.
Definition: LinearString.h:57
const ext::set< SymbolType > & getAlphabet() const &
Definition: LinearString.h:103
const ext::vector< SymbolType > & getContent() const &
Definition: LinearString.h:238
Definition: ExactSuffixAutomaton.h:23
static indexes::stringology::SuffixAutomaton< SymbolType > construct(const string::LinearString< SymbolType > &pattern)
Definition: ExactSuffixAutomaton.h:37
p second
Definition: ToRegExpAlgebraic.h:126
typename T::SymbolType SymbolType
Definition: ReachableStates.h:176
constexpr const T & max(const T &a)
Root case of maximum computation. The maximum from one value is the value itself.
Definition: algorithm.hpp:278
Definition: ArithmeticCompression.h:18