diff --git a/ruby/lox.rb b/ruby/lox.rb index e2b1c4a..e1dde32 100755 --- a/ruby/lox.rb +++ b/ruby/lox.rb @@ -1,5 +1,7 @@ #!/usr/bin/env ruby -w +require "strscan" + module Lox class Error < StandardError def initialize(line:, where: "", message:) @@ -32,7 +34,7 @@ module Lox end def self.run(src) - Runner.new(src).run + Runner.new.run(src) end def self.error(line, msg) @@ -40,17 +42,118 @@ module Lox end class Runner - def initialize(scanner:) + def initialize(scanner: Scanner.new) @scanner = scanner end def run(src) - @scanner.scan(src) + @scanner.scan(src).each do |token| + puts token + end end end class Scanner + KEYWORDS = { + and: :AND, + class: :CLASS, + else: :ELSE, + false: :FALSE, + for: :FOR, + fun: :FUN, + if: :IF, + nil: :NIL, + or: :OR, + print: :PRINT, + return: :RETURN, + super: :SUPER, + this: :THIS, + true: :TRUE, + var: :VAR, + while: :WHILE, + }.transform_keys(&:to_s) + + State = Struct.new(:ss, :tokens, :errors, :line) do + def eos? = ss.eos? + def scan(re) = ss.scan(re) + def pos = ss.pos + + def add_token(type, text: nil, literal: nil) + text ||= ss.matched + self.tokens << Token.new(type, text, literal, line) + end + end + def scan(src) + state = State.new(StringScanner.new(src), [], [], 1) + + until state.eos? + case + when state.scan(/\(/) then state.add_token(:LEFT_PAREN) + when state.scan(/\)/) then state.add_token(:RIGHT_PAREN) + when state.scan(/\{/) then state.add_token(:LEFT_BRACE) + when state.scan(/}/) then state.add_token(:RIGHT_BRACE) + when state.scan(/,/) then state.add_token(:COMMA) + when state.scan(/\./) then state.add_token(:DOT) + when state.scan(/-/) then state.add_token(:MINUS) + when state.scan(/\+/) then state.add_token(:PLUS) + when state.scan(/;/) then state.add_token(:SEMICOLON) + when state.scan(/\*/) then state.add_token(:STAR) + when state.scan(/!=/) then state.add_token(:BANG_EQUAL) + when state.scan(/!/) then state.add_token(:BANG) + when state.scan(/==/) then state.add_token(:EQUAL_EQUAL) + when state.scan(/=/) then state.add_token(:EQUAL) + when state.scan(/<=/) then state.add_token(:LESS_EQUAL) + when state.scan(/=/) then state.add_token(:GREATER_EQUAL) + when state.scan(/>/) then state.add_token(:GREATER) + when state.scan(/\/\/(?~\n)+/) # ignore comment + when state.scan(/\//) then state.add_token(:SLASH) + when state.scan(/[ \r\t]/) # ignore whitespace + when state.scan(/\n/) then state.line += 1 + when state.scan(/"/) + scan_str(state) + when number = state.scan(/\d+(\.\d+)?/) + state.add_token(:NUMBER, literal: number.to_f) + when identifier = state.scan(/[a-zA-Z_]\w+/) + type = KEYWORDS.fetch(identifier, :IDENTIFIER) + state.add_token(type) + else + state.errors << Error.new(line: state.line, message: "Unexpected character.") + end + end + + state.tokens + end + + private + + def scan_str(state) + text = ?" + loop do + case + when state.scan(/"/) + text << ?" + state.add_token(:STRING, text:, literal: text[1..-2]) + return + when state.scan(/\n/) + text << ?\n + state.line += 1 + when state.eos? + state.errors << Error.new(line: state.line, message: "Unterminated string.") + return + when c = state.scan(/./) + text << c + else + fail "unreachable!" + end + end + end + end + + Token = Struct.new(:type, :lexeme, :literal, :line) do + def to_s + "#{type} #{lexeme} #{literal}" end end end diff --git a/ruby/test_lox.rb b/ruby/test_lox.rb index b0f493a..b9aee89 100644 --- a/ruby/test_lox.rb +++ b/ruby/test_lox.rb @@ -31,3 +31,72 @@ class TestRunner < Minitest::Test assert_equal %w[ some tokens ], tokens end end + +class TestScanner < Minitest::Test + def setup + @scanner = Lox::Scanner.new + end + + def test_basic_tokens + %w[( LEFT_PAREN + ) RIGHT_PAREN + { LEFT_BRACE + } RIGHT_BRACE + , COMMA + . DOT + - MINUS + + PLUS + ; SEMICOLON + * STAR + != BANG_EQUAL + ! BANG + == EQUAL_EQUAL + = EQUAL + <= LESS_EQUAL + < LESS + >= GREATER_EQUAL + > GREATER + / SLASH].each_slice(2).to_h.transform_values(&:to_sym).each do |str, token_type| + assert_equal [token_type.to_sym], @scanner.scan(str).map(&:type) + end + end + + def test_comments_and_whitespace + tokens = @scanner.scan(<<~SRC) + (\t) // here lies a comment + . + SRC + + assert_equal %i[LEFT_PAREN RIGHT_PAREN DOT], tokens.map(&:type) + end + + def test_line_numbers + tokens = @scanner.scan(<<~SRC) + ( + ) + SRC + + assert_equal [1, 2], tokens.map(&:line) + end + + def test_strings + assert_equal [Lox::Token.new(:STRING, '""', "", 1)], @scanner.scan('""') + assert_equal [], @scanner.scan('"') # TODO test the error once it's exposed + assert_equal [Lox::Token.new(:STRING, '"foo"', "foo", 1)], @scanner.scan('"foo"') + assert_equal [Lox::Token.new(:STRING, "\"foo\nbar\"", "foo\nbar", 2)], @scanner.scan("\"foo\nbar\"") + end + + def test_numbers + assert_equal [ + Lox::Token.new(:NUMBER, "123", 123.0, 1), + Lox::Token.new(:NUMBER, "123.4", 123.4, 1), + ], @scanner.scan("123 123.4") + end + + def test_identifiers + assert_equal [ + Lox::Token.new(:OR, "or", nil, 1), + Lox::Token.new(:IDENTIFIER, "orchid", nil, 1), + ], @scanner.scan("or orchid") + end +end