From 7559cd2b8e2c25b2809635c47eb8310aff088f1c Mon Sep 17 00:00:00 2001 From: alpha Date: Mon, 18 Jul 2022 02:29:36 +0000 Subject: [PATCH] simplify token scanning Stole this idea from @tenderlove FossilOrigin-Name: 3d144d88cb447e1f5d4c79c0a3185cd80fe879b3ab8d039d20bec255db7bbbb5 --- ruby/lox.rb | 89 ++++++++++++++++++++++++++---------------------- ruby/test_lox.rb | 2 +- 2 files changed, 49 insertions(+), 42 deletions(-) diff --git a/ruby/lox.rb b/ruby/lox.rb index 9ac7357..f82ba5d 100755 --- a/ruby/lox.rb +++ b/ruby/lox.rb @@ -54,24 +54,47 @@ module Lox end class Scanner - KEYWORDS = { - and: :AND, - class: :CLASS, - else: :ELSE, - false: :FALSE, - for: :FOR, - fun: :FUN, - if: :IF, - nil: :NIL, - or: :OR, - print: :PRINT, - return: :RETURN, - super: :SUPER, - this: :THIS, - true: :TRUE, - var: :VAR, - while: :WHILE, - }.transform_keys(&:to_s) + TOKENS = %w[ + ( LEFT_PAREN + ) RIGHT_PAREN + { LEFT_BRACE + } RIGHT_BRACE + , COMMA + . DOT + - MINUS + + PLUS + ; SEMICOLON + * STAR + != BANG_EQUAL + ! BANG + == EQUAL_EQUAL + = EQUAL + <= LESS_EQUAL + < LESS + >= GREATER_EQUAL + > GREATER + / SLASH + ].each_slice(2).to_h {|k,v| [k, v.to_sym] } + TOKENS_RE = Regexp.union(TOKENS.keys) + + KEYWORDS = %w[ + and AND + class CLASS + else ELSE + false FALSE + for FOR + fun FUN + if IF + nil NIL + or OR + print PRINT + return RETURN + super SUPER + this THIS + true TRUE + var VAR + while WHILE + ].each_slice(2).to_h.transform_values(&:to_sym) State = Struct.new(:ss, :tokens, :errors, :line) do def eos? = ss.eos? @@ -89,30 +112,14 @@ module Lox until state.eos? case - when state.scan(/\(/) then state.add_token(:LEFT_PAREN) - when state.scan(/\)/) then state.add_token(:RIGHT_PAREN) - when state.scan(/\{/) then state.add_token(:LEFT_BRACE) - when state.scan(/}/) then state.add_token(:RIGHT_BRACE) - when state.scan(/,/) then state.add_token(:COMMA) - when state.scan(/\./) then state.add_token(:DOT) - when state.scan(/-/) then state.add_token(:MINUS) - when state.scan(/\+/) then state.add_token(:PLUS) - when state.scan(/;/) then state.add_token(:SEMICOLON) - when state.scan(/\*/) then state.add_token(:STAR) - when state.scan(/!=/) then state.add_token(:BANG_EQUAL) - when state.scan(/!/) then state.add_token(:BANG) - when state.scan(/==/) then state.add_token(:EQUAL_EQUAL) - when state.scan(/=/) then state.add_token(:EQUAL) - when state.scan(/<=/) then state.add_token(:LESS_EQUAL) - when state.scan(/=/) then state.add_token(:GREATER_EQUAL) - when state.scan(/>/) then state.add_token(:GREATER) - when state.scan(/\/\/(?~\n)+/) # ignore line comment + when state.scan(/\/\/(?~\n)/) # ignore line comment when state.scan(/\/\*/) scan_block_comment(state) - when state.scan(/\//) then state.add_token(:SLASH) + when matched = state.scan(TOKENS_RE) + state.add_token(TOKENS.fetch(matched)) when state.scan(/[ \r\t]/) # ignore whitespace - when state.scan(/\n/) then state.line += 1 + when state.scan(/\n/) + state.line += 1 when state.scan(/"/) scan_str(state) when number = state.scan(/\d+(\.\d+)?/) @@ -145,7 +152,7 @@ module Lox when state.eos? state.errors << Error.new(line: state.line, message: "Unterminated string.") return - when c = state.scan(/./) + when c = state.scan(/(?~"|\n)/) text << c else fail "unreachable!" @@ -165,7 +172,7 @@ module Lox when state.eos? state.errors << Error.new(line: state.line, message: "Unterminated block comment.") return - when c = state.scan(/./) + when state.scan(/./) # no-op else fail "unreachable!" diff --git a/ruby/test_lox.rb b/ruby/test_lox.rb index 6114659..464361e 100644 --- a/ruby/test_lox.rb +++ b/ruby/test_lox.rb @@ -65,7 +65,7 @@ class TestScanner < Minitest::Test def test_comments_and_whitespace tokens = @scanner.scan(<<~SRC) (\t) // here lies a comment - . + . // SRC assert_equal %i[LEFT_PAREN RIGHT_PAREN DOT EOF], tokens.map(&:type)