Algorithms Library Toolkit
A toolkit for algorithms, especially for algorithms on formal languages
SequentialSampling.h
Go to the documentation of this file.
1
6#pragma once
7
8#include <alib/measure>
9
10#include <alib/set>
11#include <alib/vector>
12
14#include <string/LinearString.h>
15
16
17namespace stringology {
18
19 namespace exact {
20
21 namespace {
22 size_t div_up (const size_t &x ,const size_t &y ) {
23 return ( x % y == 1 ) ? x / y + 1 : x / y ;
24 }
25
26 template <class SymbolType>
27 ext::set < unsigned > SimpleTextSearching ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern ) {
29 const auto & text = subject.getContent();
30 const auto & pat = pattern.getContent();
31 size_t n = text.size(), m = pat.size();
32
33 size_t i = 0 ;
34 while ( i <= n - m ) {
35 size_t j = 0 ;
36 while ( j < m && text[ i + j ] == pat[j] ) ++ j ;
37 if ( j == m ) occ.insert(i) ;
38 i += div_up( j + 1 , 2 ) ;
39 }
40 return occ ;
41 }
42
43 // find first occurence i and return it, or -1
44 template <class SymbolType>
45 ssize_t SimpleTextSearchingFirst ( const ext::vector< SymbolType > & text, const ext::vector< SymbolType > & pat, const size_t & start) {
46 size_t n = text.size(), m = pat.size();
47
48 size_t i = start ;
49 while ( i <= n - m ) {
50 size_t j = 0 ;
51 while ( j < m && text[ i + j ] == pat[j] ) ++ j ;
52 if ( j == m ) return i ;
53 i += div_up( j + 1 , 2 ) ;
54 }
55 return -1;
56 }
57
58 template <class SymbolType>
59 ext::set < unsigned > SeqSampling ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern, const size_t& p , const size_t& q ){
61 const auto & text = subject.getContent();
62 const auto & pat = pattern.getContent();
63 size_t n = text.size(), m = pat.size();
64
65 size_t i = 0 ;
66 while ( i <= n - m ) {
67 if ( text[i + p] != pat[p] || text[i + q] != pat[q] ) ++ i ;
68 else {
69 size_t j = 0 ;
70 while ( j < m && text[i + j] == pat[j] ) ++ j ;
71 if ( j == m ) occ.insert( i ) ;
72 if ( j < q - 1 ) i += p ;
73 else i += div_up( j + 1 , 2 ) ;
74 }
75 }
76 return occ ;
77 }
78
79 // finds the first occurence and return it
80 template <class SymbolType>
81 size_t SeqSamplingFirst ( const ext::vector< SymbolType > & text, const ext::vector < SymbolType > & pat, const size_t& p , const size_t& q , const size_t& start){
82 size_t n = text.size(), m = pat.size();
83
84 size_t i = start ;
85 while ( i <= n - m ) {
86 if ( text[i + p] != pat[p] || text[i + q] != pat[q] ) ++ i ;
87 else {
88 size_t j = 0 ;
89 while ( j < m && text[i + j] == pat[j] ) ++ j ;
90 if ( j == m ) return i ;
91 if ( j < q - 1 ) i += p ;
92 else i += div_up(j + 1 , 2 );
93 }
94 }
95 return -1 ;
96 }
97
98 template <class SymbolType>
99 ext::set < unsigned > Mix ( const string::LinearString < SymbolType > & subject, const string::LinearString < SymbolType > & pattern, const size_t& per) {
101 const auto & text = subject.getContent();
102 const auto & pat = pattern.getContent();
103 size_t n = text.size(), m = pat.size();
104
105 const ext::vector vv_ = ext::vector<SymbolType>( pat.begin() , pat.begin() + 2 * per - 1 ) ;
106 ssize_t periodic_prefix , period ;
108
109 ssize_t i = 0 ;
110 while ( static_cast < size_t > ( i ) <= n - m ) {
111 if (periodic_prefix == -1) {
112 i = SimpleTextSearchingFirst(text, vv_ , i );
113 } else { i = SeqSamplingFirst(text, vv_ , periodic_prefix - period , periodic_prefix ,i ); }
114 if ( i == -1 || static_cast < size_t > ( i ) > n - m ) return occ ;
115 size_t j = 2 * per - 1;
116 while (j < m && text[i + j] == pat[j]) ++j;
117 if (j == m) {
118 occ.insert(i);
119 i += per;
120 } else {
121 i += std::max ( j - per , static_cast < size_t > ( 1 ) ) ;
122 }
123
124 }
125 return occ ;
126 }
127 }
128
134 public:
139 template < class SymbolType >
141
142 };
143
144 template < class SymbolType >
146 ssize_t periodic_prefix , period ;
148 ext::tie(periodic_prefix , period ) = string::properties::PeriodicPrefix::construct(pattern);
150
153 if ( periodic_prefix == -1 ) {
154 res = SimpleTextSearching( subject , pattern) ;
155 } else if ( static_cast < size_t > ( periodic_prefix ) < pattern.getContent().size() ) {
156 res = SeqSampling( subject, pattern , periodic_prefix - period, periodic_prefix ) ;
157 } else res = Mix( subject, pattern, period ) ;
158
160 return res ;
161 }
162
163
164 } /* namespace exact */
165
166} /* namespace stringology */
167
Definition: set.hpp:44
Class extending the vector class from the standard library. Original reason is to allow printing of t...
Definition: vector.hpp:45
auto begin() &
Inherited behavior of begin for non-const instance.
Definition: vector.hpp:125
Linear string.
Definition: LinearString.h:57
const ext::vector< SymbolType > & getContent() const &
Definition: LinearString.h:238
static ext::pair< ssize_t, ssize_t > construct(const string::LinearString< SymbolType > &string)
Definition: PeriodicPrefix.h:43
Definition: SequentialSampling.h:133
static ext::set< unsigned > match(const string::LinearString< SymbolType > &subject, const string::LinearString< SymbolType > &pattern)
Definition: SequentialSampling.h:145
int i
Definition: AllEpsilonClosure.h:118
return res
Definition: MinimizeByPartitioning.h:145
q
Definition: SingleInitialStateEpsilonTransition.h:85
constexpr tuple< Elements &... > tie(Elements &... args) noexcept
Helper of extended tuple of references construction. The tuple is constructed to reffer to values in ...
Definition: tuple.hpp:218
constexpr const T & max(const T &a)
Root case of maximum computation. The maximum from one value is the value itself.
Definition: algorithm.hpp:278
void start(measurements::stealth_string name, measurements::Type type)
Definition: measurements.cpp:14
void end()
Definition: measurements.cpp:19
size_t div_up(const size_t &x, const size_t &y)
Definition: GalilSeiferas.h:39
Definition: ArithmeticCompression.h:18