From 9b69d3db207d027bbe171f1fe5014fd416fee045 Mon Sep 17 00:00:00 2001 From: Alpha Chen Date: Fri, 21 Oct 2022 20:03:45 -0700 Subject: [PATCH] iterator all the things --- rust/src/compiler.rs | 9 +-- rust/src/scanner.rs | 175 ++++++++++++++++++++++--------------------- 2 files changed, 91 insertions(+), 93 deletions(-) diff --git a/rust/src/compiler.rs b/rust/src/compiler.rs index 71ceefe..f5a4130 100644 --- a/rust/src/compiler.rs +++ b/rust/src/compiler.rs @@ -5,8 +5,7 @@ pub fn compile(source: &str) -> Result<()> { let mut scanner = Scanner::new(source); let mut line = None; - loop { - let token = scanner.scan(); + for token in &mut scanner { if Some(token.line) != line { print!("{:4} ", token.line); line = Some(token.line); @@ -14,9 +13,7 @@ pub fn compile(source: &str) -> Result<()> { print!(" | "); } println!("{:2?} '{}'", token.kind, token.value); - - if token.kind == TokenKind::Eof { - return Ok(()); - } } + + Ok(()) } diff --git a/rust/src/scanner.rs b/rust/src/scanner.rs index 6017646..2ed7a35 100644 --- a/rust/src/scanner.rs +++ b/rust/src/scanner.rs @@ -15,61 +15,7 @@ impl<'a> Scanner<'a> { } } - pub fn scan(&mut self) -> Token { - self.skip_whitespace(); - - if self.peek().is_none() { - return self.make_token(TokenKind::Eof); - } - - match self.advance() { - c if is_alpha(c) => self.identifier(), - c if c.is_ascii_digit() => self.number(), - '(' => self.make_token(TokenKind::LeftParen), - ')' => self.make_token(TokenKind::RightParen), - '{' => self.make_token(TokenKind::LeftBrace), - '}' => self.make_token(TokenKind::RightBrace), - ';' => self.make_token(TokenKind::Semicolon), - ',' => self.make_token(TokenKind::Comma), - '.' => self.make_token(TokenKind::Dot), - '-' => self.make_token(TokenKind::Minus), - '+' => self.make_token(TokenKind::Plus), - '/' => self.make_token(TokenKind::Slash), - '*' => self.make_token(TokenKind::Star), - '!' => { - if self.is_match('=') { - self.make_token(TokenKind::BangEqual) - } else { - self.make_token(TokenKind::Bang) - } - } - '=' => { - if self.is_match('=') { - self.make_token(TokenKind::EqualEqual) - } else { - self.make_token(TokenKind::Equal) - } - } - '<' => { - if self.is_match('=') { - self.make_token(TokenKind::LessEqual) - } else { - self.make_token(TokenKind::Less) - } - } - '>' => { - if self.is_match('=') { - self.make_token(TokenKind::GreaterEqual) - } else { - self.make_token(TokenKind::Greater) - } - } - '"' => self.string(), - _ => self.error_token("Unexpected character."), - } - } - - fn make_token(&self, kind: TokenKind) -> Token { + fn make_token(&self, kind: TokenKind) -> Token<'a> { Token { kind, value: std::str::from_utf8(&self.source[..self.current]).unwrap(), @@ -77,7 +23,7 @@ impl<'a> Scanner<'a> { } } - fn error_token(&self, message: &'static str) -> Token { + fn error_token(&self, message: &'static str) -> Token<'a> { Token { kind: TokenKind::Error, value: message, @@ -135,7 +81,7 @@ impl<'a> Scanner<'a> { self.source.get(self.current + 1).map(|&x| x as char) } - fn string(&mut self) -> Token { + fn string(&mut self) -> Token<'a> { while matches!(self.peek(), Some(c) if c != '"') { if self.peek() == Some('\n') { self.line += 1; @@ -152,7 +98,7 @@ impl<'a> Scanner<'a> { self.make_token(TokenKind::String) } - fn number(&mut self) -> Token { + fn number(&mut self) -> Token<'a> { while matches!(self.peek(), Some(c) if c.is_ascii_digit()) { self.advance(); } @@ -168,7 +114,7 @@ impl<'a> Scanner<'a> { self.make_token(TokenKind::Number) } - fn identifier(&mut self) -> Token { + fn identifier(&mut self) -> Token<'a> { while matches!(self.peek(), Some(c) if is_alpha(c) || c.is_ascii_digit()) { self.advance(); } @@ -205,6 +151,61 @@ impl<'a> Scanner<'a> { } } +impl<'a> Iterator for Scanner<'a> { + type Item = Token<'a>; + + fn next(&mut self) -> Option { + self.skip_whitespace(); + self.peek()?; + + Some(match self.advance() { + c if is_alpha(c) => self.identifier(), + c if c.is_ascii_digit() => self.number(), + '(' => self.make_token(TokenKind::LeftParen), + ')' => self.make_token(TokenKind::RightParen), + '{' => self.make_token(TokenKind::LeftBrace), + '}' => self.make_token(TokenKind::RightBrace), + ';' => self.make_token(TokenKind::Semicolon), + ',' => self.make_token(TokenKind::Comma), + '.' => self.make_token(TokenKind::Dot), + '-' => self.make_token(TokenKind::Minus), + '+' => self.make_token(TokenKind::Plus), + '/' => self.make_token(TokenKind::Slash), + '*' => self.make_token(TokenKind::Star), + '!' => { + if self.is_match('=') { + self.make_token(TokenKind::BangEqual) + } else { + self.make_token(TokenKind::Bang) + } + } + '=' => { + if self.is_match('=') { + self.make_token(TokenKind::EqualEqual) + } else { + self.make_token(TokenKind::Equal) + } + } + '<' => { + if self.is_match('=') { + self.make_token(TokenKind::LessEqual) + } else { + self.make_token(TokenKind::Less) + } + } + '>' => { + if self.is_match('=') { + self.make_token(TokenKind::GreaterEqual) + } else { + self.make_token(TokenKind::Greater) + } + } + '"' => self.string(), + _ => self.error_token("Unexpected character."), + }) + } +} + fn is_alpha(c: char) -> bool { c.is_ascii_alphabetic() || c == '_' } @@ -271,54 +272,54 @@ mod tests { #[test] fn test_scan() { - assert_source_scan("(", TokenKind::LeftParen); + assert_source_scan("(", Some(TokenKind::LeftParen)); - assert_source_scan("!", TokenKind::Bang); - assert_source_scan("!=", TokenKind::BangEqual); - assert_source_scan("!a", TokenKind::Bang); + assert_source_scan("!", Some(TokenKind::Bang)); + assert_source_scan("!=", Some(TokenKind::BangEqual)); + assert_source_scan("!a", Some(TokenKind::Bang)); - assert_source_scan("a", TokenKind::Identifier); - assert_source_scan("_a", TokenKind::Identifier); - assert_source_scan("a1", TokenKind::Identifier); + assert_source_scan("a", Some(TokenKind::Identifier)); + assert_source_scan("_a", Some(TokenKind::Identifier)); + assert_source_scan("a1", Some(TokenKind::Identifier)); - assert_source_scan("\"foo\"", TokenKind::String); + assert_source_scan("\"foo\"", Some(TokenKind::String)); - assert_source_scan("1a", TokenKind::Number); - assert_source_scan("1.1", TokenKind::Number); + assert_source_scan("1a", Some(TokenKind::Number)); + assert_source_scan("1.1", Some(TokenKind::Number)); - assert_source_scan("a", TokenKind::Identifier); - assert_source_scan("an", TokenKind::Identifier); - assert_source_scan("and", TokenKind::And); - assert_source_scan("andy", TokenKind::Identifier); - assert_source_scan("false", TokenKind::False); - assert_source_scan("fa", TokenKind::Identifier); + assert_source_scan("a", Some(TokenKind::Identifier)); + assert_source_scan("an", Some(TokenKind::Identifier)); + assert_source_scan("and", Some(TokenKind::And)); + assert_source_scan("andy", Some(TokenKind::Identifier)); + assert_source_scan("false", Some(TokenKind::False)); + assert_source_scan("fa", Some(TokenKind::Identifier)); - assert_source_scan("@", TokenKind::Error); - assert_source_scan("", TokenKind::Eof); + assert_source_scan("@", Some(TokenKind::Error)); + assert_source_scan("", None); } #[test] fn test_multi_scan() { let mut scanner = Scanner::new("()"); - assert_scan(&mut scanner, TokenKind::LeftParen); - assert_scan(&mut scanner, TokenKind::RightParen); - assert_scan(&mut scanner, TokenKind::Eof); + assert_scan(&mut scanner, Some(TokenKind::LeftParen)); + assert_scan(&mut scanner, Some(TokenKind::RightParen)); + assert_scan(&mut scanner, None); } #[test] fn test_whitespace() { - assert_source_scan(" foo", TokenKind::Identifier); - assert_source_scan("\tfoo", TokenKind::Identifier); - assert_source_scan("// \n", TokenKind::Eof); + assert_source_scan(" foo", Some(TokenKind::Identifier)); + assert_source_scan("\tfoo", Some(TokenKind::Identifier)); + assert_source_scan("// \n", None); } - fn assert_source_scan(source: &str, kind: TokenKind) { + fn assert_source_scan(source: &str, kind: Option) { let mut scanner = Scanner::new(source); assert_scan(&mut scanner, kind); } - fn assert_scan(scanner: &mut Scanner, kind: TokenKind) { - let token = scanner.scan(); - assert_eq!(token.kind, kind); + fn assert_scan(scanner: &mut Scanner, kind: Option) { + let token = scanner.next(); + assert_eq!(token.map(|x| x.kind), kind); } }