Algorithms Library Toolkit
A toolkit for algorithms, especially for algorithms on formal languages
BNDMOccurrences.h
Go to the documentation of this file.
1
6#pragma once
7
9#include <string/LinearString.h>
10#include <global/GlobalData.h>
11
12#include <ext/foreach>
13
14namespace stringology {
15
16namespace query {
17
24
25public:
32 template < class SymbolType, size_t BitmaskBitCount >
34
35};
36
37template < class SymbolType, size_t BitmaskBitCount >
39
41
42 size_t patternLength = pattern.getString ( ).getContent ( ).size ( );
43 size_t subjectLength = subject.getContent ( ).size ( );
44 size_t posInSubject = 0;
45 size_t bitmaskLength = std::min ( BitmaskBitCount, patternLength );
46
48
49 while ( posInSubject + patternLength <= subjectLength ) {
50 size_t posInPattern = bitmaskLength;
51 size_t lastPosOfFactor = bitmaskLength;
52
53 // Set the bitmask to all ones
54 currentBitmask.set ( );
55
56 while ( posInPattern > 0 && currentBitmask.any ( ) ) {
57 typename ext::map < SymbolType, ext::bitset < BitmaskBitCount > >::const_iterator symbolVectorIter = pattern.getData ( ).find ( subject.getContent ( ).at ( posInSubject + posInPattern - 1 ) );
58 if ( symbolVectorIter == pattern.getData ( ).end ( ) )
59 break;
60
61 currentBitmask &= symbolVectorIter->second;
62 posInPattern--;
63
64 // Test whether the most significant bit is set
65 if ( currentBitmask.test ( bitmaskLength - 1 ) ) {
66 // and we didn't process all symbols of the pattern
67 if ( posInPattern > 0 )
68 lastPosOfFactor = posInPattern;
69 else {
70 size_t k = bitmaskLength;
71
72 // out of bitset fallback to naive checking of occurrence here
73 while ( k < patternLength && pattern.getString ( ).getContent ( ).at ( k ) == subject.getContent ( ).at ( posInSubject + k ) ) k++;
74
75 if ( k == patternLength )
76 // Yay, there is match!!!
77 occ.insert ( posInSubject );
78 }
79 }
80
81 currentBitmask <<= 1;
82 }
83
84 posInSubject += lastPosOfFactor;
85 }
86
87 return occ;
88}
89
90} /* namespace query */
91
92} /* namespace stringology */
93
Class extending the bitset class from the standard library. Original reason is to allow printing of t...
Definition: bitset.hpp:42
Class extending the map class from the standard library. Original reason is to allow printing of the ...
Definition: map.hpp:48
Definition: set.hpp:44
Bit set string index. Stores a bit set for each symbol of the alphabet. The bit set of symbol a conta...
Definition: BitSetIndex.h:54
const ext::map< SymbolType, ext::bitset< BitmaskBitCount > > & getData() const &
Definition: BitSetIndex.h:185
const string::LinearString< SymbolType > & getString() const &
Definition: BitSetIndex.h:195
Linear string.
Definition: LinearString.h:57
const ext::vector< SymbolType > & getContent() const &
Definition: LinearString.h:238
Definition: BNDMOccurrences.h:23
static ext::set< unsigned > query(const indexes::stringology::BitSetIndex< SymbolType, BitmaskBitCount > &pattern, const string::LinearString< SymbolType > &subject)
Definition: BNDMOccurrences.h:38
constexpr const T & min(const T &a)
Definition: algorithm.hpp:310
Definition: ArithmeticCompression.h:18