simplify token scanning

Stole this idea from @tenderlove

FossilOrigin-Name: 3d144d88cb447e1f5d4c79c0a3185cd80fe879b3ab8d039d20bec255db7bbbb5
private
alpha 2 years ago
parent b4e4edfe8c
commit 7559cd2b8e

@ -54,24 +54,47 @@ module Lox
end end
class Scanner class Scanner
KEYWORDS = { TOKENS = %w[
and: :AND, ( LEFT_PAREN
class: :CLASS, ) RIGHT_PAREN
else: :ELSE, { LEFT_BRACE
false: :FALSE, } RIGHT_BRACE
for: :FOR, , COMMA
fun: :FUN, . DOT
if: :IF, - MINUS
nil: :NIL, + PLUS
or: :OR, ; SEMICOLON
print: :PRINT, * STAR
return: :RETURN, != BANG_EQUAL
super: :SUPER, ! BANG
this: :THIS, == EQUAL_EQUAL
true: :TRUE, = EQUAL
var: :VAR, <= LESS_EQUAL
while: :WHILE, < LESS
}.transform_keys(&:to_s) >= GREATER_EQUAL
> GREATER
/ SLASH
].each_slice(2).to_h {|k,v| [k, v.to_sym] }
TOKENS_RE = Regexp.union(TOKENS.keys)
KEYWORDS = %w[
and AND
class CLASS
else ELSE
false FALSE
for FOR
fun FUN
if IF
nil NIL
or OR
print PRINT
return RETURN
super SUPER
this THIS
true TRUE
var VAR
while WHILE
].each_slice(2).to_h.transform_values(&:to_sym)
State = Struct.new(:ss, :tokens, :errors, :line) do State = Struct.new(:ss, :tokens, :errors, :line) do
def eos? = ss.eos? def eos? = ss.eos?
@ -89,30 +112,14 @@ module Lox
until state.eos? until state.eos?
case case
when state.scan(/\(/) then state.add_token(:LEFT_PAREN) when state.scan(/\/\/(?~\n)/) # ignore line comment
when state.scan(/\)/) then state.add_token(:RIGHT_PAREN)
when state.scan(/\{/) then state.add_token(:LEFT_BRACE)
when state.scan(/}/) then state.add_token(:RIGHT_BRACE)
when state.scan(/,/) then state.add_token(:COMMA)
when state.scan(/\./) then state.add_token(:DOT)
when state.scan(/-/) then state.add_token(:MINUS)
when state.scan(/\+/) then state.add_token(:PLUS)
when state.scan(/;/) then state.add_token(:SEMICOLON)
when state.scan(/\*/) then state.add_token(:STAR)
when state.scan(/!=/) then state.add_token(:BANG_EQUAL)
when state.scan(/!/) then state.add_token(:BANG)
when state.scan(/==/) then state.add_token(:EQUAL_EQUAL)
when state.scan(/=/) then state.add_token(:EQUAL)
when state.scan(/<=/) then state.add_token(:LESS_EQUAL)
when state.scan(/</) then state.add_token(:LESS)
when state.scan(/>=/) then state.add_token(:GREATER_EQUAL)
when state.scan(/>/) then state.add_token(:GREATER)
when state.scan(/\/\/(?~\n)+/) # ignore line comment
when state.scan(/\/\*/) when state.scan(/\/\*/)
scan_block_comment(state) scan_block_comment(state)
when state.scan(/\//) then state.add_token(:SLASH) when matched = state.scan(TOKENS_RE)
state.add_token(TOKENS.fetch(matched))
when state.scan(/[ \r\t]/) # ignore whitespace when state.scan(/[ \r\t]/) # ignore whitespace
when state.scan(/\n/) then state.line += 1 when state.scan(/\n/)
state.line += 1
when state.scan(/"/) when state.scan(/"/)
scan_str(state) scan_str(state)
when number = state.scan(/\d+(\.\d+)?/) when number = state.scan(/\d+(\.\d+)?/)
@ -145,7 +152,7 @@ module Lox
when state.eos? when state.eos?
state.errors << Error.new(line: state.line, message: "Unterminated string.") state.errors << Error.new(line: state.line, message: "Unterminated string.")
return return
when c = state.scan(/./) when c = state.scan(/(?~"|\n)/)
text << c text << c
else else
fail "unreachable!" fail "unreachable!"
@ -165,7 +172,7 @@ module Lox
when state.eos? when state.eos?
state.errors << Error.new(line: state.line, message: "Unterminated block comment.") state.errors << Error.new(line: state.line, message: "Unterminated block comment.")
return return
when c = state.scan(/./) when state.scan(/./)
# no-op # no-op
else else
fail "unreachable!" fail "unreachable!"

@ -65,7 +65,7 @@ class TestScanner < Minitest::Test
def test_comments_and_whitespace def test_comments_and_whitespace
tokens = @scanner.scan(<<~SRC) tokens = @scanner.scan(<<~SRC)
(\t) // here lies a comment (\t) // here lies a comment
. . //
SRC SRC
assert_equal %i[LEFT_PAREN RIGHT_PAREN DOT EOF], tokens.map(&:type) assert_equal %i[LEFT_PAREN RIGHT_PAREN DOT EOF], tokens.map(&:type)

Loading…
Cancel
Save