From 1320a0988faab3c5e6c10df4828658f4380bb94a Mon Sep 17 00:00:00 2001 From: Alpha Chen Date: Sun, 23 Oct 2022 14:58:00 -0700 Subject: [PATCH] mu --- rust/src/compiler.rs | 2 +- rust/src/scanner.rs | 103 +++++++++++++++++++++++-------------------- 2 files changed, 55 insertions(+), 50 deletions(-) diff --git a/rust/src/compiler.rs b/rust/src/compiler.rs index f5a4130..8f81807 100644 --- a/rust/src/compiler.rs +++ b/rust/src/compiler.rs @@ -1,4 +1,4 @@ -use crate::scanner::{Scanner, TokenKind}; +use crate::scanner::Scanner; use color_eyre::eyre::Result; pub fn compile(source: &str) -> Result<()> { diff --git a/rust/src/scanner.rs b/rust/src/scanner.rs index f0fdc18..abfe0d3 100644 --- a/rust/src/scanner.rs +++ b/rust/src/scanner.rs @@ -4,6 +4,10 @@ pub struct Scanner<'a> { line: usize, } +fn is_alpha(c: char) -> bool { + c.is_ascii_alphabetic() || c == '_' +} + impl<'a> Scanner<'a> { pub fn new(source: &'a str) -> Self { Self { @@ -13,22 +17,6 @@ impl<'a> Scanner<'a> { } } - fn make_token(&self, kind: TokenKind) -> Token<'a> { - Token { - kind, - value: std::str::from_utf8(&self.source[..self.current]).unwrap(), - line: self.line, - } - } - - fn error_token(&self, message: &'static str) -> Token<'a> { - Token { - kind: TokenKind::Error, - value: message, - line: self.line, - } - } - fn advance(&mut self) -> char { self.current += 1; self.source[self.current - 1] as char @@ -75,7 +63,7 @@ impl<'a> Scanner<'a> { self.source.get(self.current).map(|&x| x as char) } - fn peek_next(&mut self) -> Option { + fn peek_next(&self) -> Option { self.source.get(self.current + 1).map(|&x| x as char) } @@ -89,11 +77,11 @@ impl<'a> Scanner<'a> { if self.peek().is_none() { // Maybe this should return an Err? - return self.error_token("Unterminated string."); + return Token::error(&self, "Unterminated string."); } self.advance(); - self.make_token(TokenKind::String) + Token::new(self, TokenKind::String) } fn number(&mut self) -> Token<'a> { @@ -109,7 +97,7 @@ impl<'a> Scanner<'a> { } } - self.make_token(TokenKind::Number) + Token::new(self, TokenKind::Number) } fn identifier(&mut self) -> Token<'a> { @@ -117,7 +105,7 @@ impl<'a> Scanner<'a> { self.advance(); } - self.make_token(self.identifier_type()) + Token::new(self, self.identifier_type()) } fn identifier_type(&self) -> TokenKind { @@ -154,66 +142,83 @@ impl<'a> Iterator for Scanner<'a> { fn next(&mut self) -> Option { self.skip_whitespace(); - self.peek()?; + + if self.peek().is_none() { + return Some(Token::new(self, TokenKind::Eof)); + } Some(match self.advance() { c if is_alpha(c) => self.identifier(), c if c.is_ascii_digit() => self.number(), - '(' => self.make_token(TokenKind::LeftParen), - ')' => self.make_token(TokenKind::RightParen), - '{' => self.make_token(TokenKind::LeftBrace), - '}' => self.make_token(TokenKind::RightBrace), - ';' => self.make_token(TokenKind::Semicolon), - ',' => self.make_token(TokenKind::Comma), - '.' => self.make_token(TokenKind::Dot), - '-' => self.make_token(TokenKind::Minus), - '+' => self.make_token(TokenKind::Plus), - '/' => self.make_token(TokenKind::Slash), - '*' => self.make_token(TokenKind::Star), + '(' => Token::new(self, TokenKind::LeftParen), + ')' => Token::new(self, TokenKind::RightParen), + '{' => Token::new(self, TokenKind::LeftBrace), + '}' => Token::new(self, TokenKind::RightBrace), + ';' => Token::new(self, TokenKind::Semicolon), + ',' => Token::new(self, TokenKind::Comma), + '.' => Token::new(self, TokenKind::Dot), + '-' => Token::new(self, TokenKind::Minus), + '+' => Token::new(self, TokenKind::Plus), + '/' => Token::new(self, TokenKind::Slash), + '*' => Token::new(self, TokenKind::Star), '!' => { if self.is_match('=') { - self.make_token(TokenKind::BangEqual) + Token::new(self, TokenKind::BangEqual) } else { - self.make_token(TokenKind::Bang) + Token::new(self, TokenKind::Bang) } } '=' => { if self.is_match('=') { - self.make_token(TokenKind::EqualEqual) + Token::new(self, TokenKind::EqualEqual) } else { - self.make_token(TokenKind::Equal) + Token::new(self, TokenKind::Equal) } } '<' => { if self.is_match('=') { - self.make_token(TokenKind::LessEqual) + Token::new(self, TokenKind::LessEqual) } else { - self.make_token(TokenKind::Less) + Token::new(self, TokenKind::Less) } } '>' => { if self.is_match('=') { - self.make_token(TokenKind::GreaterEqual) + Token::new(self, TokenKind::GreaterEqual) } else { - self.make_token(TokenKind::Greater) + Token::new(self, TokenKind::Greater) } } '"' => self.string(), - _ => self.error_token("Unexpected character."), + _ => Token::error(self, "Unexpected character."), }) } } -fn is_alpha(c: char) -> bool { - c.is_ascii_alphabetic() || c == '_' -} - pub struct Token<'a> { pub kind: TokenKind, pub value: &'a str, pub line: usize, } +impl<'a> Token<'a> { + fn new(scanner: &Scanner<'a>, kind: TokenKind) -> Self { + Token { + kind, + value: std::str::from_utf8(&scanner.source[..scanner.current]).unwrap(), + line: scanner.line, + } + } + + fn error(scanner: &Scanner, message: &'static str) -> Self { + Token { + kind: TokenKind::Error, + value: message, + line: scanner.line, + } + } +} + #[derive(Debug, PartialEq)] pub enum TokenKind { // Single-character tokens. @@ -293,7 +298,7 @@ mod tests { assert_source_scan("fa", Some(TokenKind::Identifier)); assert_source_scan("@", Some(TokenKind::Error)); - assert_source_scan("", None); + assert_source_scan("", Some(TokenKind::Eof)); } #[test] @@ -301,14 +306,14 @@ mod tests { let mut scanner = Scanner::new("()"); assert_scan(&mut scanner, Some(TokenKind::LeftParen)); assert_scan(&mut scanner, Some(TokenKind::RightParen)); - assert_scan(&mut scanner, None); + assert_scan(&mut scanner, Some(TokenKind::Eof)); } #[test] fn test_whitespace() { assert_source_scan(" foo", Some(TokenKind::Identifier)); assert_source_scan("\tfoo", Some(TokenKind::Identifier)); - assert_source_scan("// \n", None); + assert_source_scan("// \n", Some(TokenKind::Eof)); } fn assert_source_scan(source: &str, kind: Option) {