Algorithms Library Toolkit
A toolkit for algorithms, especially for algorithms on formal languages
LZ77Compression.h
Go to the documentation of this file.
1
6#pragma once
7
8#include <tuple>
9#include <vector>
10
11#include <string/LinearString.h>
12
14
15namespace stringology {
16
17namespace compression {
18
20public:
21 template < class SymbolType >
22 static std::vector < std::tuple < unsigned, unsigned, SymbolType > > compress ( const string::LinearString < SymbolType > & string, unsigned searchBufferSize, unsigned lookaheadBufferSize );
23
24private:
25 template < class SymbolType >
26 static int equal ( const string::LinearString < SymbolType > & string, unsigned first1, unsigned last1, unsigned first2 );
27};
28
29// Main method that handle compress
30template < class SymbolType >
31std::vector < std::tuple < unsigned, unsigned, SymbolType > > LZ77Compression::compress ( const string::LinearString < SymbolType > & string, unsigned searchBufferSize, unsigned lookaheadBufferSize ) {
32
33 if(searchBufferSize == 0)
34 throw exception::CommonException("LZ77: search buffer size must be greater than 0");
35
36 if(lookaheadBufferSize == 0)
37 throw exception::CommonException("LZ77: lookahead buffer size must be greater than 0");
38
39 size_t pointer = 0;
40
41 std::vector < std::tuple < unsigned, unsigned, SymbolType > > output;
42
43 while ( pointer < string.getContent ( ).size ( ) ) {
44 unsigned lookaheadBufferRealSize = lookaheadBufferSize;
45 if ( pointer + lookaheadBufferSize + 1 > string.getContent ( ).size ( ) )
46 lookaheadBufferRealSize = string.getContent ( ).size ( ) - pointer - 1;
47
48 unsigned searchBufferRealSize = searchBufferSize;
49 if ( pointer < searchBufferSize )
50 searchBufferRealSize = pointer;
51
52 unsigned maxMatch = 0;
53 unsigned sbPointer = pointer;
54 for ( unsigned j = pointer - searchBufferRealSize; j < pointer; j ++ ) {
55 unsigned match = equal ( string, pointer, pointer + lookaheadBufferRealSize, j );
56
57 if ( maxMatch <= match ) {
58 maxMatch = match;
59 sbPointer = j;
60 }
61 }
62
63 output.push_back ( std::tuple < unsigned, unsigned, SymbolType > ( pointer - sbPointer, maxMatch, string.getContent ( ) [ pointer + maxMatch ] ) );
64 pointer = pointer + maxMatch + 1;
65 }
66
67 return output;
68}
69
70// Method that return longest match of two substrings that are defined as incoming string and index of first staring letter
71template < class SymbolType >
72int LZ77Compression::equal ( const string::LinearString < SymbolType > & string, unsigned first1, unsigned last1, unsigned first2 ) {
73
74 int steps = 0;
75
76 while ( first1 < last1 ) {
77 if ( string.getContent ( )[first1] != string.getContent ( )[first2] )
78 return steps;
79
80 first1++;
81 first2++;
82 steps++;
83 }
84
85 return steps;
86}
87
88} /* namespace compression */
89
90} /* namespace stringology */
91
Basic exception from which all other exceptions are derived.
Definition: CommonException.h:21
Linear string.
Definition: LinearString.h:57
Definition: LZ77Compression.h:19
static std::vector< std::tuple< unsigned, unsigned, SymbolType > > compress(const string::LinearString< SymbolType > &string, unsigned searchBufferSize, unsigned lookaheadBufferSize)
Definition: LZ77Compression.h:31
Definition: ArithmeticCompression.h:18