From 5d49f5ceea46c12402f10ceea46669d14175af56 Mon Sep 17 00:00:00 2001 From: Alpha Chen Date: Mon, 24 Oct 2022 15:28:12 -0700 Subject: [PATCH] refactoring Scanner --- rust/src/scanner.rs | 176 ++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 96 deletions(-) diff --git a/rust/src/scanner.rs b/rust/src/scanner.rs index abfe0d3..9a897d8 100644 --- a/rust/src/scanner.rs +++ b/rust/src/scanner.rs @@ -4,8 +4,8 @@ pub struct Scanner<'a> { line: usize, } -fn is_alpha(c: char) -> bool { - c.is_ascii_alphabetic() || c == '_' +fn is_alpha(c: &char) -> bool { + c.is_ascii_alphabetic() || *c == '_' } impl<'a> Scanner<'a> { @@ -17,44 +17,32 @@ impl<'a> Scanner<'a> { } } - fn advance(&mut self) -> char { + fn advance(&mut self) -> Option { self.current += 1; - self.source[self.current - 1] as char + self.source.get(self.current - 1).map(|&x| x as char) } - fn is_match(&mut self, expected: char) -> bool { - if self.peek() != Some(expected) { - return false; + fn advance_if bool>(&mut self, f: F) -> bool { + if matches!(self.peek(), Some(c) if f(&c)) { + self.current += 1; + true + } else { + false } - - self.current += 1; - true } fn skip_whitespace(&mut self) { loop { - match self.peek() { - Some(' ') | Some('\r') | Some('\t') => { - self.advance(); - break; - } - Some('\n') => { - self.line += 1; - self.advance(); - break; - } - Some('/') => { - if matches!(self.peek_next(), Some(c) if c == '/') { - while matches!(self.peek(), Some(c) if c != '\n') { - self.advance(); - } - } else { - return; - } - } - _ => { - return; - } + while self.advance_if(|&c| c == ' ' || c == '\r' || c == '\t') {} + + if self.peek() == Some('/') && self.peek_next() == Some('/') { + while self.advance_if(|&c| c != '\n') {} + } + + if self.advance_if(|&c| c == '\n') { + self.line += 1; + } else { + return; } } } @@ -68,42 +56,34 @@ impl<'a> Scanner<'a> { } fn string(&mut self) -> Token<'a> { - while matches!(self.peek(), Some(c) if c != '"') { - if self.peek() == Some('\n') { - self.line += 1; + loop { + match self.advance() { + Some('"') => break, + Some('\n') => { + self.line += 1; + } + Some(_) => {} + None => return Token::error(self, "Unterminated string."), } - self.advance(); } - if self.peek().is_none() { - // Maybe this should return an Err? - return Token::error(&self, "Unterminated string."); - } - - self.advance(); Token::new(self, TokenKind::String) } fn number(&mut self) -> Token<'a> { - while matches!(self.peek(), Some(c) if c.is_ascii_digit()) { - self.advance(); - } + while self.advance_if(char::is_ascii_digit) {} if self.peek() == Some('.') && matches!(self.peek_next(), Some(c) if c.is_ascii_digit()) { self.advance(); - while matches!(self.peek(), Some(c) if c.is_ascii_digit()) { - self.advance(); - } + while self.advance_if(char::is_ascii_digit) {} } Token::new(self, TokenKind::Number) } fn identifier(&mut self) -> Token<'a> { - while matches!(self.peek(), Some(c) if is_alpha(c) || c.is_ascii_digit()) { - self.advance(); - } + while self.advance_if(|c| is_alpha(c) || c.is_ascii_digit()) {} Token::new(self, self.identifier_type()) } @@ -143,55 +123,59 @@ impl<'a> Iterator for Scanner<'a> { fn next(&mut self) -> Option { self.skip_whitespace(); - if self.peek().is_none() { - return Some(Token::new(self, TokenKind::Eof)); - } - - Some(match self.advance() { - c if is_alpha(c) => self.identifier(), - c if c.is_ascii_digit() => self.number(), - '(' => Token::new(self, TokenKind::LeftParen), - ')' => Token::new(self, TokenKind::RightParen), - '{' => Token::new(self, TokenKind::LeftBrace), - '}' => Token::new(self, TokenKind::RightBrace), - ';' => Token::new(self, TokenKind::Semicolon), - ',' => Token::new(self, TokenKind::Comma), - '.' => Token::new(self, TokenKind::Dot), - '-' => Token::new(self, TokenKind::Minus), - '+' => Token::new(self, TokenKind::Plus), - '/' => Token::new(self, TokenKind::Slash), - '*' => Token::new(self, TokenKind::Star), - '!' => { - if self.is_match('=') { - Token::new(self, TokenKind::BangEqual) - } else { - Token::new(self, TokenKind::Bang) + self.advance() + .map(|c| match c { + c if is_alpha(&c) => self.identifier(), + c if c.is_ascii_digit() => self.number(), + '(' => Token::new(self, TokenKind::LeftParen), + ')' => Token::new(self, TokenKind::RightParen), + '{' => Token::new(self, TokenKind::LeftBrace), + '}' => Token::new(self, TokenKind::RightBrace), + ';' => Token::new(self, TokenKind::Semicolon), + ',' => Token::new(self, TokenKind::Comma), + '.' => Token::new(self, TokenKind::Dot), + '-' => Token::new(self, TokenKind::Minus), + '+' => Token::new(self, TokenKind::Plus), + '/' => Token::new(self, TokenKind::Slash), + '*' => Token::new(self, TokenKind::Star), + '!' => { + if self.advance_if(|&c| c == '=') { + Token::new(self, TokenKind::BangEqual) + } else { + Token::new(self, TokenKind::Bang) + } } - } - '=' => { - if self.is_match('=') { - Token::new(self, TokenKind::EqualEqual) - } else { - Token::new(self, TokenKind::Equal) + '=' => { + if self.advance_if(|&c| c == '=') { + Token::new(self, TokenKind::EqualEqual) + } else { + Token::new(self, TokenKind::Equal) + } } - } - '<' => { - if self.is_match('=') { - Token::new(self, TokenKind::LessEqual) - } else { - Token::new(self, TokenKind::Less) + '<' => { + if self.advance_if(|&c| c == '=') { + Token::new(self, TokenKind::LessEqual) + } else { + Token::new(self, TokenKind::Less) + } } - } - '>' => { - if self.is_match('=') { - Token::new(self, TokenKind::GreaterEqual) - } else { - Token::new(self, TokenKind::Greater) + '>' => { + if self.advance_if(|&c| c == '=') { + Token::new(self, TokenKind::GreaterEqual) + } else { + Token::new(self, TokenKind::Greater) + } } - } - '"' => self.string(), - _ => Token::error(self, "Unexpected character."), - }) + '"' => self.string(), + _ => Token::error(self, "Unexpected character."), + }) + .or({ + Some(Token { + kind: TokenKind::Eof, + value: "", + line: self.line, + }) + }) } }