Class for lexing an inja Template. More...

#include <inja.hpp>

Collaboration diagram for inja::Lexer:

Public Member Functions
	Lexer (const LexerConfig &config)
SourceLocation	current_position () const
void	start (std::string_view input)
Token	scan ()
const LexerConfig &	get_config () const

Private Types
enum class	State { Text , ExpressionStart , ExpressionStartForceLstrip , ExpressionBody , LineStart , LineBody , StatementStart , StatementStartNoLstrip , StatementStartForceLstrip , StatementBody , CommentStart , CommentStartForceLstrip , CommentBody }
enum class	MinusState { Operator , Number }

Private Member Functions
Token	scan_body (std::string_view close, Token::Kind closeKind, std::string_view close_trim=std::string_view(), bool trim=false)
Token	scan_id ()
Token	scan_number ()
Token	scan_string ()
Token	make_token (Token::Kind kind) const
void	skip_whitespaces_and_newlines ()
void	skip_whitespaces_and_first_newline ()

Static Private Member Functions
static std::string_view	clear_final_line_if_whitespace (std::string_view text)

Private Attributes
const LexerConfig &	config
State	state
MinusState	minus_state
std::string_view	m_in
size_t	tok_start
size_t	pos

Detailed Description

Class for lexing an inja Template.

Definition at line 1097 of file inja.hpp.

Member Enumeration Documentation

◆ MinusState

enum class inja::Lexer::MinusState

strongprivate

Enumerator
Operator
Number

Definition at line 1114 of file inja.hpp.

                              {
            Operator,
            Number,
        };

◆ State

enum class inja::Lexer::State

strongprivate

Enumerator
Text
ExpressionStart
ExpressionStartForceLstrip
ExpressionBody
LineStart
LineBody
StatementStart
StatementStartNoLstrip
StatementStartForceLstrip
StatementBody
CommentStart
CommentStartForceLstrip
CommentBody

Definition at line 1098 of file inja.hpp.

                         {
            Text,
            ExpressionStart,
            ExpressionStartForceLstrip,
            ExpressionBody,
            LineStart,
            LineBody,
            StatementStart,
            StatementStartNoLstrip,
            StatementStartForceLstrip,
            StatementBody,
            CommentStart,
            CommentStartForceLstrip,
            CommentBody,
        };

Constructor & Destructor Documentation

◆ Lexer()

inja::Lexer::Lexer ( const LexerConfig & config )

inlineexplicit

Definition at line 1358 of file inja.hpp.

            : config(config)
            , state(State::Text)
            , minus_state(MinusState::Number) {
        }

References config, minus_state, Number, state, and Text.

Referenced by inja::Parser::Parser().

Here is the caller graph for this function:

Member Function Documentation

◆ clear_final_line_if_whitespace()

std::string_view inja::Lexer::clear_final_line_if_whitespace ( std::string_view text )

inlinestaticprivate

Definition at line 1342 of file inja.hpp.

                                                                                  {
            std::string_view result = text;
            while (!result.empty()) {
                const char ch = result.back();
                if (ch == ' ' || ch == '\t') {
                    result.remove_suffix(1);
                } else if (ch == '\n' || ch == '\r') {
                    break;
                } else {
                    return text;
                }
            }
            return result;
        }

Referenced by scan().

Here is the caller graph for this function:

◆ current_position()

SourceLocation inja::Lexer::current_position ( ) const

inline

Definition at line 1364 of file inja.hpp.

                                                {
            return get_source_location(m_in, tok_start);
        }

References inja::get_source_location(), m_in, and tok_start.

Referenced by inja::Parser::throw_parser_error().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ get_config()

const LexerConfig & inja::Lexer::get_config ( ) const

inline

Definition at line 1517 of file inja.hpp.

                                              {
            return config;
        }

References config.

Referenced by inja::Parser::parse_into_template().

Here is the caller graph for this function:

◆ make_token()

Token inja::Lexer::make_token ( Token::Kind kind ) const

inlineprivate

Definition at line 1310 of file inja.hpp.

                                               {
            return Token(kind, string_view::slice(m_in, tok_start, pos));
        }

References m_in, pos, inja::string_view::slice(), tok_start, and inja::Token::Token().

Referenced by scan(), scan_body(), scan_id(), scan_number(), and scan_string().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ scan()

Token inja::Lexer::scan ( )

inline

Definition at line 1381 of file inja.hpp.

                     {
            tok_start = pos;
 
        again:
            if (tok_start >= m_in.size()) {
                return make_token(Token::Kind::Eof);
            }
 
            switch (state) {
                default:
                case State::Text: {
                    // fast-scan to first open character
                    const size_t open_start = m_in.substr(pos).find_first_of(config.open_chars);
                    if (open_start == std::string_view::npos) {
                        // didn't find open, return remaining text as text token
                        pos = m_in.size();
                        return make_token(Token::Kind::Text);
                    }
                    pos += open_start;
 
                    // try to match one of the opening sequences, and get the close
                    std::string_view open_str = m_in.substr(pos);
                    bool must_lstrip = false;
                    if (inja::string_view::starts_with(open_str, config.expression_open)) {
                        if (inja::string_view::starts_with(open_str, config.expression_open_force_lstrip)) {
                            state = State::ExpressionStartForceLstrip;
                            must_lstrip = true;
                        } else {
                            state = State::ExpressionStart;
                        }
                    } else if (inja::string_view::starts_with(open_str, config.statement_open)) {
                        if (inja::string_view::starts_with(open_str, config.statement_open_no_lstrip)) {
                            state = State::StatementStartNoLstrip;
                        } else if (inja::string_view::starts_with(open_str, config.statement_open_force_lstrip)) {
                            state = State::StatementStartForceLstrip;
                            must_lstrip = true;
                        } else {
                            state = State::StatementStart;
                            must_lstrip = config.lstrip_blocks;
                        }
                    } else if (inja::string_view::starts_with(open_str, config.comment_open)) {
                        if (inja::string_view::starts_with(open_str, config.comment_open_force_lstrip)) {
                            state = State::CommentStartForceLstrip;
                            must_lstrip = true;
                        } else {
                            state = State::CommentStart;
                            must_lstrip = config.lstrip_blocks;
                        }
                    } else if ((pos == 0 || m_in[pos - 1] == '\n') && inja::string_view::starts_with(open_str, config.line_statement)) {
                        state = State::LineStart;
                    } else {
                        pos += 1; // wasn't actually an opening sequence
                        goto again;
                    }
 
                    std::string_view text = string_view::slice(m_in, tok_start, pos);
                    if (must_lstrip) {
                        text = clear_final_line_if_whitespace(text);
                    }
 
                    if (text.empty()) {
                        goto again; // don't generate empty token
                    }
                    return Token(Token::Kind::Text, text);
                }
                case State::ExpressionStart: {
                    state = State::ExpressionBody;
                    pos += config.expression_open.size();
                    return make_token(Token::Kind::ExpressionOpen);
                }
                case State::ExpressionStartForceLstrip: {
                    state = State::ExpressionBody;
                    pos += config.expression_open_force_lstrip.size();
                    return make_token(Token::Kind::ExpressionOpen);
                }
                case State::LineStart: {
                    state = State::LineBody;
                    pos += config.line_statement.size();
                    return make_token(Token::Kind::LineStatementOpen);
                }
                case State::StatementStart: {
                    state = State::StatementBody;
                    pos += config.statement_open.size();
                    return make_token(Token::Kind::StatementOpen);
                }
                case State::StatementStartNoLstrip: {
                    state = State::StatementBody;
                    pos += config.statement_open_no_lstrip.size();
                    return make_token(Token::Kind::StatementOpen);
                }
                case State::StatementStartForceLstrip: {
                    state = State::StatementBody;
                    pos += config.statement_open_force_lstrip.size();
                    return make_token(Token::Kind::StatementOpen);
                }
                case State::CommentStart: {
                    state = State::CommentBody;
                    pos += config.comment_open.size();
                    return make_token(Token::Kind::CommentOpen);
                }
                case State::CommentStartForceLstrip: {
                    state = State::CommentBody;
                    pos += config.comment_open_force_lstrip.size();
                    return make_token(Token::Kind::CommentOpen);
                }
                case State::ExpressionBody:
                    return scan_body(config.expression_close, Token::Kind::ExpressionClose, config.expression_close_force_rstrip);
                case State::LineBody:
                    return scan_body("\n", Token::Kind::LineStatementClose);
                case State::StatementBody:
                    return scan_body(
                        config.statement_close, Token::Kind::StatementClose, config.statement_close_force_rstrip, config.trim_blocks);
                case State::CommentBody: {
                    // fast-scan to comment close
                    const size_t end = m_in.substr(pos).find(config.comment_close);
                    if (end == std::string_view::npos) {
                        pos = m_in.size();
                        return make_token(Token::Kind::Eof);
                    }
 
                    // Check for trim pattern
                    const bool must_rstrip = inja::string_view::starts_with(m_in.substr(pos + end - 1), config.comment_close_force_rstrip);
 
                    // return the entire comment in the close token
                    state = State::Text;
                    pos += end + config.comment_close.size();
                    Token tok = make_token(Token::Kind::CommentClose);
 
                    if (must_rstrip || config.trim_blocks) {
                        skip_whitespaces_and_first_newline();
                    }
                    return tok;
                }
            }
        }

References clear_final_line_if_whitespace(), inja::LexerConfig::comment_close, inja::LexerConfig::comment_close_force_rstrip, inja::LexerConfig::comment_open, inja::LexerConfig::comment_open_force_lstrip, CommentBody, inja::Token::CommentClose, inja::Token::CommentOpen, CommentStart, CommentStartForceLstrip, config, inja::Token::Eof, inja::LexerConfig::expression_close, inja::LexerConfig::expression_close_force_rstrip, inja::LexerConfig::expression_open, inja::LexerConfig::expression_open_force_lstrip, ExpressionBody, inja::Token::ExpressionClose, inja::Token::ExpressionOpen, ExpressionStart, ExpressionStartForceLstrip, inja::LexerConfig::line_statement, LineBody, LineStart, inja::Token::LineStatementClose, inja::Token::LineStatementOpen, inja::LexerConfig::lstrip_blocks, m_in, make_token(), inja::LexerConfig::open_chars, pos, scan_body(), skip_whitespaces_and_first_newline(), inja::string_view::slice(), inja::string_view::starts_with(), state, inja::LexerConfig::statement_close, inja::LexerConfig::statement_close_force_rstrip, inja::LexerConfig::statement_open, inja::LexerConfig::statement_open_force_lstrip, inja::LexerConfig::statement_open_no_lstrip, StatementBody, inja::Token::StatementClose, inja::Token::StatementOpen, StatementStart, StatementStartForceLstrip, StatementStartNoLstrip, Text, inja::Token::Text, tok_start, inja::Token::Token(), and inja::LexerConfig::trim_blocks.

Referenced by inja::Parser::get_next_token(), and inja::Parser::get_peek_token().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ scan_body()

Token inja::Lexer::scan_body	(	std::string_view	close,
		Token::Kind	closeKind,
		std::string_view	close_trim = std::string_view(),
		bool	trim = false )

inlineprivate

Definition at line 1128 of file inja.hpp.

                                                                                                                              {
        again:
            // skip whitespace (except for \n as it might be a close)
            if (tok_start >= m_in.size()) {
                return make_token(Token::Kind::Eof);
            }
            const char ch = m_in[tok_start];
            if (ch == ' ' || ch == '\t' || ch == '\r') {
                tok_start += 1;
                goto again;
            }
 
            // check for close
            if (!close_trim.empty() && inja::string_view::starts_with(m_in.substr(tok_start), close_trim)) {
                state = State::Text;
                pos = tok_start + close_trim.size();
                const Token tok = make_token(closeKind);
                skip_whitespaces_and_newlines();
                return tok;
            }
 
            if (inja::string_view::starts_with(m_in.substr(tok_start), close)) {
                state = State::Text;
                pos = tok_start + close.size();
                const Token tok = make_token(closeKind);
                if (trim) {
                    skip_whitespaces_and_first_newline();
                }
                return tok;
            }
 
            // skip \n
            if (ch == '\n') {
                tok_start += 1;
                goto again;
            }
 
            pos = tok_start + 1;
            if (std::isalpha(ch)) {
                minus_state = MinusState::Operator;
                return scan_id();
            }
 
            const MinusState current_minus_state = minus_state;
            if (minus_state == MinusState::Operator) {
                minus_state = MinusState::Number;
            }
 
            switch (ch) {
                case '+':
                    return make_token(Token::Kind::Plus);
                case '-':
                    if (current_minus_state == MinusState::Operator) {
                        return make_token(Token::Kind::Minus);
                    }
                    return scan_number();
                case '*':
                    return make_token(Token::Kind::Times);
                case '/':
                    return make_token(Token::Kind::Slash);
                case '^':
                    return make_token(Token::Kind::Power);
                case '%':
                    return make_token(Token::Kind::Percent);
                case '.':
                    return make_token(Token::Kind::Dot);
                case ',':
                    return make_token(Token::Kind::Comma);
                case ':':
                    return make_token(Token::Kind::Colon);
                case '(':
                    return make_token(Token::Kind::LeftParen);
                case ')':
                    minus_state = MinusState::Operator;
                    return make_token(Token::Kind::RightParen);
                case '[':
                    return make_token(Token::Kind::LeftBracket);
                case ']':
                    minus_state = MinusState::Operator;
                    return make_token(Token::Kind::RightBracket);
                case '{':
                    return make_token(Token::Kind::LeftBrace);
                case '}':
                    minus_state = MinusState::Operator;
                    return make_token(Token::Kind::RightBrace);
                case '>':
                    if (pos < m_in.size() && m_in[pos] == '=') {
                        pos += 1;
                        return make_token(Token::Kind::GreaterEqual);
                    }
                    return make_token(Token::Kind::GreaterThan);
                case '<':
                    if (pos < m_in.size() && m_in[pos] == '=') {
                        pos += 1;
                        return make_token(Token::Kind::LessEqual);
                    }
                    return make_token(Token::Kind::LessThan);
                case '=':
                    if (pos < m_in.size() && m_in[pos] == '=') {
                        pos += 1;
                        return make_token(Token::Kind::Equal);
                    }
                    return make_token(Token::Kind::Unknown);
                case '!':
                    if (pos < m_in.size() && m_in[pos] == '=') {
                        pos += 1;
                        return make_token(Token::Kind::NotEqual);
                    }
                    return make_token(Token::Kind::Unknown);
                case '\"':
                    return scan_string();
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    minus_state = MinusState::Operator;
                    return scan_number();
                case '_':
                case '@':
                case '$':
                    minus_state = MinusState::Operator;
                    return scan_id();
                default:
                    return make_token(Token::Kind::Unknown);
            }
        }

References inja::Token::Colon, inja::Token::Comma, inja::Token::Dot, inja::Token::Eof, inja::Token::Equal, inja::Token::GreaterEqual, inja::Token::GreaterThan, inja::Token::LeftBrace, inja::Token::LeftBracket, inja::Token::LeftParen, inja::Token::LessEqual, inja::Token::LessThan, m_in, make_token(), inja::Token::Minus, minus_state, inja::Token::NotEqual, Number, Operator, inja::Token::Percent, inja::Token::Plus, pos, inja::Token::Power, inja::Token::RightBrace, inja::Token::RightBracket, inja::Token::RightParen, scan_id(), scan_number(), scan_string(), skip_whitespaces_and_first_newline(), skip_whitespaces_and_newlines(), inja::Token::Slash, inja::string_view::starts_with(), state, Text, inja::Token::Times, tok_start, and inja::Token::Unknown.

Referenced by scan().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ scan_id()

Token inja::Lexer::scan_id ( )

inlineprivate

Definition at line 1261 of file inja.hpp.

                        {
            for (;;) {
                if (pos >= m_in.size()) {
                    break;
                }
                const char ch = m_in[pos];
                if (!std::isalnum(ch) && ch != '.' && ch != '/' && ch != '_' && ch != '-') {
                    break;
                }
                pos += 1;
            }
            return make_token(Token::Kind::Id);
        }

References inja::Token::Id, m_in, make_token(), and pos.

Referenced by scan_body().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ scan_number()

Token inja::Lexer::scan_number ( )

inlineprivate

Definition at line 1275 of file inja.hpp.

                            {
            for (;;) {
                if (pos >= m_in.size()) {
                    break;
                }
                const char ch = m_in[pos];
                // be very permissive in lexer (we'll catch errors when conversion happens)
                if (!(std::isdigit(ch) || ch == '.' || ch == 'e' || ch == 'E' ||
                      (ch == '+' && (pos == 0 || m_in[pos - 1] == 'e' || m_in[pos - 1] == 'E')) ||
                      (ch == '-' && (pos == 0 || m_in[pos - 1] == 'e' || m_in[pos - 1] == 'E')))) {
                    break;
                }
                pos += 1;
            }
            return make_token(Token::Kind::Number);
        }

References m_in, make_token(), inja::Token::Number, and pos.

Referenced by scan_body().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ scan_string()

Token inja::Lexer::scan_string ( )

inlineprivate

Definition at line 1292 of file inja.hpp.

                            {
            bool escape{false};
            for (;;) {
                if (pos >= m_in.size()) {
                    break;
                }
                const char ch = m_in[pos++];
                if (ch == '\\') {
                    escape = true;
                } else if (!escape && ch == m_in[tok_start]) {
                    break;
                } else {
                    escape = false;
                }
            }
            return make_token(Token::Kind::String);
        }

References m_in, make_token(), pos, inja::Token::String, and tok_start.

Referenced by scan_body().

Here is the call graph for this function:

Here is the caller graph for this function:

◆ skip_whitespaces_and_first_newline()

void inja::Lexer::skip_whitespaces_and_first_newline ( )

inlineprivate

Definition at line 1322 of file inja.hpp.

                                                  {
            if (pos < m_in.size()) {
                while (pos < m_in.size() && (m_in[pos] == ' ' || m_in[pos] == '\t')) {
                    pos += 1;
                }
            }
 
            if (pos < m_in.size()) {
                const char ch = m_in[pos];
                if (ch == '\n') {
                    pos += 1;
                } else if (ch == '\r') {
                    pos += 1;
                    if (pos < m_in.size() && m_in[pos] == '\n') {
                        pos += 1;
                    }
                }
            }
        }

References m_in, and pos.

Referenced by scan(), and scan_body().

Here is the caller graph for this function:

◆ skip_whitespaces_and_newlines()

void inja::Lexer::skip_whitespaces_and_newlines ( )

inlineprivate

Definition at line 1314 of file inja.hpp.

                                             {
            if (pos < m_in.size()) {
                while (pos < m_in.size() && (m_in[pos] == ' ' || m_in[pos] == '\t' || m_in[pos] == '\n' || m_in[pos] == '\r')) {
                    pos += 1;
                }
            }
        }

References m_in, and pos.

Referenced by scan_body().

Here is the caller graph for this function:

◆ start()

void inja::Lexer::start ( std::string_view input )

inline

Definition at line 1368 of file inja.hpp.

                                         {
            m_in = input;
            tok_start = 0;
            pos = 0;
            state = State::Text;
            minus_state = MinusState::Number;
 
            // Consume byte order mark (BOM) for UTF-8
            if (inja::string_view::starts_with(m_in, "\xEF\xBB\xBF")) {
                m_in = m_in.substr(3);
            }
        }

References m_in, minus_state, Number, pos, inja::string_view::starts_with(), state, Text, and tok_start.

Referenced by inja::Parser::parse_into().

Here is the call graph for this function:

Here is the caller graph for this function:

Member Data Documentation

◆ config

const LexerConfig& inja::Lexer::config

private

Definition at line 1119 of file inja.hpp.

Referenced by get_config(), Lexer(), and scan().

◆ m_in

std::string_view inja::Lexer::m_in

private

Definition at line 1123 of file inja.hpp.

Referenced by current_position(), make_token(), scan(), scan_body(), scan_id(), scan_number(), scan_string(), skip_whitespaces_and_first_newline(), skip_whitespaces_and_newlines(), and start().

◆ minus_state

MinusState inja::Lexer::minus_state

private

Definition at line 1122 of file inja.hpp.

Referenced by Lexer(), scan_body(), and start().

◆ pos

size_t inja::Lexer::pos

private

Definition at line 1125 of file inja.hpp.

Referenced by make_token(), scan(), scan_body(), scan_id(), scan_number(), scan_string(), skip_whitespaces_and_first_newline(), skip_whitespaces_and_newlines(), and start().

◆ state

State inja::Lexer::state

private

Definition at line 1121 of file inja.hpp.

Referenced by Lexer(), scan(), scan_body(), and start().

◆ tok_start

size_t inja::Lexer::tok_start

private

Definition at line 1124 of file inja.hpp.

Referenced by current_position(), make_token(), scan(), scan_body(), scan_string(), and start().

The documentation for this class was generated from the following file:

lib/inja.hpp

MQTTSuite

Public Member Functions

Private Types

Private Member Functions

Static Private Member Functions

Private Attributes

Detailed Description

Member Enumeration Documentation

◆ MinusState

◆ State

Constructor & Destructor Documentation

◆ Lexer()

Member Function Documentation

◆ clear_final_line_if_whitespace()

◆ current_position()

◆ get_config()

◆ make_token()

◆ scan()

◆ scan_body()

◆ scan_id()

◆ scan_number()

◆ scan_string()

◆ skip_whitespaces_and_first_newline()

◆ skip_whitespaces_and_newlines()

◆ start()

Member Data Documentation

◆ config

◆ m_in

◆ minus_state

◆ pos

◆ state

◆ tok_start