diff --git a/rust/src/compiler.rs b/rust/src/compiler.rs new file mode 100644 index 0000000..0899064 --- /dev/null +++ b/rust/src/compiler.rs @@ -0,0 +1,22 @@ +use crate::scanner::{Scanner, TokenKind}; +use color_eyre::eyre::Result; + +pub fn compile(source: &str) -> Result<()> { + let mut scanner = Scanner::new(source); + + let mut line = None; + loop { + let token = scanner.scan()?; + if Some(token.line) != line { + print!("{:4} ", token.line); + line = Some(token.line); + } else { + print!(" | "); + } + println!("{:2?} '{}'", token.kind, token.value); + + if token.kind == TokenKind::Eof { + return Ok(()); + } + } +} diff --git a/rust/src/error.rs b/rust/src/error.rs index 4cf7d4c..60cfbe9 100644 --- a/rust/src/error.rs +++ b/rust/src/error.rs @@ -1,5 +1,5 @@ use std::io; -use std::process::{ExitCode, Termination}; +// use std::process::{ExitCode, Termination}; use thiserror::Error; @@ -22,13 +22,13 @@ pub enum Error { // Doesn't actually work with eyre... yet? But I'm // willing to give up "nice" exit status codes for // eyre's error handling. -impl Termination for Error { - fn report(self) -> ExitCode { - ExitCode::from(match self { - Error::Compile => 65, - Error::Runtime => 70, - Error::ReadFile { path, source } => 74, - Error::Usage => 64, - }) - } -} +// impl Termination for Error { +// fn report(self) -> ExitCode { +// ExitCode::from(match self { +// Error::Compile => 65, +// Error::Runtime => 70, +// Error::ReadFile { path, source } => 74, +// Error::Usage => 64, +// }) +// } +// } diff --git a/rust/src/main.rs b/rust/src/main.rs index dd2616b..c16b643 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -11,7 +11,9 @@ use tracing::Level; use tracing_subscriber::FmtSubscriber; mod chunk; +mod compiler; mod error; +mod scanner; mod value; mod vm; diff --git a/rust/src/scanner.rs b/rust/src/scanner.rs new file mode 100644 index 0000000..c3c9a44 --- /dev/null +++ b/rust/src/scanner.rs @@ -0,0 +1,353 @@ +use color_eyre::eyre::Result; + +pub struct Scanner<'a> { + source: &'a [u8], + start: usize, + current: usize, + line: usize, +} + +impl<'a> Scanner<'a> { + pub fn new(source: &'a str) -> Self { + Self { + source: source.as_bytes(), + start: 0, + current: 0, + line: 1, + } + } + + pub fn scan(&mut self) -> Result { + self.skip_whitespace(); + + self.start = self.current; + + if self.is_at_end() { + return self.make_token(TokenKind::Eof); + } + + let c = self.advance(); + if is_alpha(c) { + return self.identifier(); + } + if c.is_digit(10) { + return self.number(); + } + + return match c { + '(' => self.make_token(TokenKind::LeftParen), + ')' => self.make_token(TokenKind::RightParen), + '{' => self.make_token(TokenKind::LeftBrace), + '}' => self.make_token(TokenKind::RightBrace), + ';' => self.make_token(TokenKind::Semicolon), + ',' => self.make_token(TokenKind::Comma), + '.' => self.make_token(TokenKind::Dot), + '-' => self.make_token(TokenKind::Minus), + '+' => self.make_token(TokenKind::Plus), + '/' => self.make_token(TokenKind::Slash), + '*' => self.make_token(TokenKind::Star), + '!' => { + if self.is_match('=') { + self.make_token(TokenKind::BangEqual) + } else { + self.make_token(TokenKind::Bang) + } + } + '=' => { + if self.is_match('=') { + self.make_token(TokenKind::EqualEqual) + } else { + self.make_token(TokenKind::Equal) + } + } + '<' => { + if self.is_match('=') { + self.make_token(TokenKind::LessEqual) + } else { + self.make_token(TokenKind::Less) + } + } + '>' => { + if self.is_match('=') { + self.make_token(TokenKind::GreaterEqual) + } else { + self.make_token(TokenKind::Greater) + } + } + '"' => self.string(), + _ => self.error_token("Unexpected character."), + }; + } + + fn is_at_end(&self) -> bool { + self.source[self.current] == 0 + } + + fn make_token(&self, kind: TokenKind) -> Result { + Ok(Token { + kind, + value: std::str::from_utf8(&self.source[self.start..self.current])?, + line: self.line, + }) + } + + fn error_token(&self, message: &'static str) -> Result { + Ok(Token { + kind: TokenKind::Error, + value: message, + line: self.line, + }) + } + + fn advance(&mut self) -> char { + self.current += 1; + self.source[self.current - 1] as char + } + + fn is_match(&mut self, expected: char) -> bool { + if self.is_at_end() { + return false; + } + + if self.source[self.current] as char != expected { + return false; + } + + self.current += 1; + true + } + + fn skip_whitespace(&mut self) { + loop { + let c = self.peek(); + match c { + ' ' | '\r' | '\t' => { + self.advance(); + break; + } + '\n' => { + self.line += 1; + self.advance(); + break; + } + '/' => { + if self.peek_next() == '/' { + while self.peek() != '\n' && !self.is_at_end() { + self.advance(); + } + } else { + return; + } + } + _ => { + return; + } + } + } + } + + fn peek(&self) -> char { + self.source[self.current] as char + } + + fn peek_next(&mut self) -> char { + if self.is_at_end() { + 0 as char + } else { + self.source[self.current + 1] as char + } + } + + fn string(&mut self) -> Result { + while self.peek() != '"' && !self.is_at_end() { + if self.peek() == '\n' { + self.line += 1; + } + self.advance(); + } + + if self.is_at_end() { + return self.error_token("Unterminated string."); + } + + self.advance(); + self.make_token(TokenKind::String) + } + + fn number(&mut self) -> Result { + while self.peek().is_digit(10) { + self.advance(); + } + + if self.peek() == '.' && self.peek_next().is_digit(10) { + self.advance(); + + while self.peek().is_digit(10) { + self.advance(); + } + } + + self.make_token(TokenKind::Number) + } + + fn identifier(&mut self) -> Result { + while is_alpha(self.peek()) || self.peek().is_digit(10) { + self.advance(); + } + + self.make_token(self.identifier_type()) + } + + fn identifier_type(&self) -> TokenKind { + match self.source[self.start] as char { + 'a' => self.check_keyword(1, "nd", TokenKind::And), + 'c' => self.check_keyword(1, "lass", TokenKind::Class), + 'e' => self.check_keyword(1, "lse", TokenKind::Else), + 'i' => self.check_keyword(1, "f", TokenKind::If), + 'n' => self.check_keyword(1, "il", TokenKind::Nil), + 'o' => self.check_keyword(1, "r", TokenKind::Or), + 'p' => self.check_keyword(1, "rint", TokenKind::Print), + 'r' => self.check_keyword(1, "eturn", TokenKind::Return), + 's' => self.check_keyword(1, "uper", TokenKind::Super), + 'v' => self.check_keyword(1, "ar", TokenKind::Var), + 'w' => self.check_keyword(1, "hile", TokenKind::While), + 'f' => { + if self.current - self.start > 1 { + match self.source[self.start + 1] as char { + 'a' => self.check_keyword(2, "lse", TokenKind::False), + 'o' => self.check_keyword(2, "r", TokenKind::For), + 'u' => self.check_keyword(2, "n", TokenKind::Fun), + _ => TokenKind::Identifier + } + } else { + TokenKind::Identifier + } + } + 't' => { + if self.current - self.start > 1 { + match self.source[self.start + 1] as char { + 'h' => self.check_keyword(2, "is", TokenKind::This), + 'r' => self.check_keyword(2, "ue", TokenKind::True), + _ => TokenKind::Identifier + } + } else { + TokenKind::Identifier + } + } + _ => TokenKind::Identifier, + } + } + + fn check_keyword(&self, start: usize, rest: &str, kind: TokenKind) -> TokenKind { + // Do I have an off-by-one error here? + if &self.source[self.start + start..self.current] == rest.as_bytes() { + kind + } else { + TokenKind::Identifier + } + } +} + +fn is_alpha(c: char) -> bool { + c.is_ascii_alphabetic() || c == '_' +} + +pub struct Token<'a> { + pub kind: TokenKind, + pub value: &'a str, + pub line: usize, +} + +#[derive(Debug, PartialEq)] +pub enum TokenKind { + // Single-character tokens. + LeftParen, + RightParen, + LeftBrace, + RightBrace, + Comma, + Dot, + Minus, + Plus, + Semicolon, + Slash, + Star, + // One or two character tokens. + Bang, + BangEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + // Literals. + Identifier, + String, + Number, + // Keywords. + And, + Class, + Else, + False, + For, + Fun, + If, + Nil, + Or, + Print, + Return, + Super, + This, + True, + Var, + While, + // + Error, + Eof, +} + +#[cfg(test)] +mod tests { + // Note this useful idiom: importing names from outer (for mod tests) scope. + use super::*; + + #[test] + fn test_scan() { + assert_scan_token("(", TokenKind::LeftParen); + + assert_scan_token("!", TokenKind::Bang); + assert_scan_token("!=", TokenKind::BangEqual); + assert_scan_token("!a", TokenKind::Bang); + + assert_scan_token("a", TokenKind::Identifier); + assert_scan_token("_a", TokenKind::Identifier); + assert_scan_token("a1", TokenKind::Identifier); + + assert_scan_token("\"foo\"", TokenKind::String); + + assert_scan_token("1a", TokenKind::Number); + assert_scan_token("1.1", TokenKind::Number); + + assert_scan_token("and", TokenKind::And); + assert_scan_token("andy", TokenKind::Identifier); + + assert_scan_token("@", TokenKind::Error); + assert_scan_token("", TokenKind::Eof); + } + + #[test] + fn test_whitespace() { + assert_scan_token(" foo", TokenKind::Identifier); + assert_scan_token("\tfoo", TokenKind::Identifier); + assert_scan_token("// \n", TokenKind::Eof); + } + + fn assert_scan_token(source: &str, kind: TokenKind) { + let mut s = source.to_string(); + s.push(0 as char); + let mut scanner = Scanner::new(&s); + let token = scanner.scan().unwrap(); + assert_eq!(token.kind, kind); + } +} diff --git a/rust/src/vm.rs b/rust/src/vm.rs index e70d147..a4de50d 100644 --- a/rust/src/vm.rs +++ b/rust/src/vm.rs @@ -2,10 +2,9 @@ use std::ops::{Add, Div, Mul, Sub}; use color_eyre::eyre::Result; -use crate::{ - chunk::{Chunk, DisassembledInstruction, OpCode}, - value::Value, -}; +use crate::chunk::{Chunk, DisassembledInstruction, OpCode}; +use crate::compiler::compile; +use crate::value::Value; use tracing::debug; pub struct VM { @@ -16,6 +15,11 @@ pub struct VM { } impl VM { + pub fn interpret(source: &str) -> Result<()> { + compile(source); + Ok(()) + } + pub fn new(chunk: Chunk) -> Self { Self { chunk,