Algorithms Library Toolkit
A toolkit for algorithms, especially for algorithms on formal languages
Lexer.h
Go to the documentation of this file.
1
6#pragma once
7
8#include <memory>
9#include <sstream>
10
11#include <ext/iostream>
12
13#include <alib/string>
14
16
17#include <lexer/CharSequence.h>
18
19namespace cli {
20
21class Lexer {
22public:
23 enum class Hint {
24 NONE,
25 TYPE,
26 FILE
27 };
28
29private:
30 CharSequence m_source;
31 Hint m_hint;
32
33public:
34 enum class TokenType : unsigned {
37 DOUBLE,
38 STRING,
39
44 EQUAL,
46
53
55 AT_SIGN,
69
70 AND,
71 OR,
72 DEC,
73 INC,
74 ASSIGN,
75
76 COMMA,
77 DOT,
78
79 FILE,
80 TYPE,
81 ERROR,
82 EOT,
83 EOS
84 };
85
86 friend bool operator < ( TokenType first, TokenType second ) {
87 return static_cast < unsigned > ( first ) < static_cast < unsigned > ( second );
88 }
89
90 static std::string tokenTypeToString ( TokenType type ) {
91 switch ( type ) {
93 return "identifier";
95 return "unsigned";
97 return "double";
99 return "string";
100
102 return "less_than";
104 return "less_than_or_equal";
106 return "more_than";
108 return "more_or_equal_operator";
109 case TokenType::EQUAL :
110 return "equal";
112 return "not_equal";
113
115 return "left_paren";
117 return "right_paren";
119 return "left_brace";
121 return "right_brace";
123 return "left_bracket";
125 return "right_bracket";
126
128 return "dolar_sign";
129 case TokenType::AT_SIGN :
130 return "at_sign";
132 return "ampersand_sign";
134 return "pipe_sign";
136 return "caret_sign";
138 return "colon_sign";
140 return "semicolon_sign";
142 return "minus_sign";
144 return "plus_sign";
146 return "slash_sign";
148 return "asterisk_sign";
150 return "tilde_sign";
152 return "exclemation_sign";
154 return "percentage_sign";
156 return "hash_sign";
157
158 case TokenType::AND:
159 return "and";
160 case TokenType::OR:
161 return "or";
162 case TokenType::DEC:
163 return "dec";
164 case TokenType::INC:
165 return "inc";
167 return "assign";
168
169 case TokenType::COMMA :
170 return "comma";
171 case TokenType::DOT :
172 return "dot";
173
174 case TokenType::FILE :
175 return "file";
176 case TokenType::TYPE :
177 return "type";
178 case TokenType::ERROR :
179 return "error";
180 case TokenType::EOT :
181 return "eot";
182 case TokenType::EOS :
183 return "eos";
184 default:
185 throw exception::CommonException ( "Unhandled case in Lexer::tokenTypeToString" );
186 }
187 }
188
189 static TokenType is_kw ( const std::string & /* identifier */ ) {
191 }
192
193 struct Token {
194 std::string m_value;
195 std::string m_raw;
197
198 size_t m_line = 0;
199 size_t m_position = 0;
200
201 size_t m_raw_line = 0;
202 size_t m_raw_position = 0;
203
204 friend std::ostream & operator << ( std::ostream & out, const Token & token ) {
205 out << Lexer::tokenTypeToString ( token.m_type );
206
207 if ( token.m_value.empty ( ) )
208 return out;
209
210 switch ( token.m_type ) {
213 case TokenType::DOUBLE :
214 return out << ": " << token.m_value;
215 case TokenType::STRING :
216 return out << ": \"" << token.m_value << "\"";
217 default:
218 return out;
219 }
220 }
221
222 bool operator < ( const Token & token ) const {
223 return std::tie ( m_type, m_value ) < std::tie ( token.m_type, token.m_value ); // m_raw omitted intentionally
224 }
225
226 };
227
228 explicit Lexer ( CharSequence source ) : m_source ( std::move ( source ) ), m_hint ( Hint::NONE ) {
229 m_source.advance ( true );
230 }
231
232 Token nextToken ( bool readNextLine = false );
233
234 void putback ( Token && token ) {
235 m_source.putback ( std::move ( token.m_raw ), token.m_raw_line, token.m_raw_position );
236 }
237
238 void setHint ( Hint hint ) {
239 m_hint = hint;
240 }
241};
242
243} /* namespace cli */
244
Definition: CharSequence.h:16
void putback(std::string string, size_t line, size_t position)
Definition: CharSequence.h:54
void advance(bool readNextLine)
Definition: CharSequence.cpp:54
Definition: Lexer.h:21
Hint
Definition: Lexer.h:23
void setHint(Hint hint)
Definition: Lexer.h:238
static TokenType is_kw(const std::string &)
Definition: Lexer.h:189
static std::string tokenTypeToString(TokenType type)
Definition: Lexer.h:90
void putback(Token &&token)
Definition: Lexer.h:234
Lexer(CharSequence source)
Definition: Lexer.h:228
Token nextToken(bool readNextLine=false)
Definition: Lexer.cpp:12
TokenType
Definition: Lexer.h:34
friend bool operator<(TokenType first, TokenType second)
Definition: Lexer.h:86
Basic exception from which all other exceptions are derived.
Definition: CommonException.h:21
p second
Definition: ToRegExpAlgebraic.h:126
Definition: Arg.h:11
constexpr tuple< Elements &... > tie(Elements &... args) noexcept
Helper of extended tuple of references construction. The tuple is constructed to reffer to values in ...
Definition: tuple.hpp:218
void hint(Hint hint)
Definition: measurements.cpp:37
Definition: FordFulkerson.hpp:16
Definition: Lexer.h:193
size_t m_raw_line
Definition: Lexer.h:201
TokenType m_type
Definition: Lexer.h:196
size_t m_raw_position
Definition: Lexer.h:202
size_t m_line
Definition: Lexer.h:198
size_t m_position
Definition: Lexer.h:199
std::string m_raw
Definition: Lexer.h:195
bool operator<(const Token &token) const
Definition: Lexer.h:222
friend std::ostream & operator<<(std::ostream &out, const Token &token)
Definition: Lexer.h:204
std::string m_value
Definition: Lexer.h:194