Compare commits

...

2 Commits
andres ... main

Author SHA1 Message Date
Alpha Chen 5d49f5ceea
refactoring Scanner
2 years ago
Alpha Chen 1320a0988f
mu
2 years ago

@ -1,4 +1,4 @@
use crate::scanner::{Scanner, TokenKind};
use crate::scanner::Scanner;
use color_eyre::eyre::Result;
pub fn compile(source: &str) -> Result<()> {

@ -4,6 +4,10 @@ pub struct Scanner<'a> {
line: usize,
}
fn is_alpha(c: &char) -> bool {
c.is_ascii_alphabetic() || *c == '_'
}
impl<'a> Scanner<'a> {
pub fn new(source: &'a str) -> Self {
Self {
@ -13,111 +17,75 @@ impl<'a> Scanner<'a> {
}
}
fn make_token(&self, kind: TokenKind) -> Token<'a> {
Token {
kind,
value: std::str::from_utf8(&self.source[..self.current]).unwrap(),
line: self.line,
}
}
fn error_token(&self, message: &'static str) -> Token<'a> {
Token {
kind: TokenKind::Error,
value: message,
line: self.line,
}
}
fn advance(&mut self) -> char {
fn advance(&mut self) -> Option<char> {
self.current += 1;
self.source[self.current - 1] as char
}
fn is_match(&mut self, expected: char) -> bool {
if self.peek() != Some(expected) {
return false;
self.source.get(self.current - 1).map(|&x| x as char)
}
fn advance_if<F: Fn(&char) -> bool>(&mut self, f: F) -> bool {
if matches!(self.peek(), Some(c) if f(&c)) {
self.current += 1;
true
} else {
false
}
}
fn skip_whitespace(&mut self) {
loop {
match self.peek() {
Some(' ') | Some('\r') | Some('\t') => {
self.advance();
break;
while self.advance_if(|&c| c == ' ' || c == '\r' || c == '\t') {}
if self.peek() == Some('/') && self.peek_next() == Some('/') {
while self.advance_if(|&c| c != '\n') {}
}
Some('\n') => {
if self.advance_if(|&c| c == '\n') {
self.line += 1;
self.advance();
break;
}
Some('/') => {
if matches!(self.peek_next(), Some(c) if c == '/') {
while matches!(self.peek(), Some(c) if c != '\n') {
self.advance();
}
} else {
return;
}
}
_ => {
return;
}
}
}
}
fn peek(&self) -> Option<char> {
self.source.get(self.current).map(|&x| x as char)
}
fn peek_next(&mut self) -> Option<char> {
fn peek_next(&self) -> Option<char> {
self.source.get(self.current + 1).map(|&x| x as char)
}
fn string(&mut self) -> Token<'a> {
while matches!(self.peek(), Some(c) if c != '"') {
if self.peek() == Some('\n') {
loop {
match self.advance() {
Some('"') => break,
Some('\n') => {
self.line += 1;
}
self.advance();
Some(_) => {}
None => return Token::error(self, "Unterminated string."),
}
if self.peek().is_none() {
// Maybe this should return an Err?
return self.error_token("Unterminated string.");
}
self.advance();
self.make_token(TokenKind::String)
Token::new(self, TokenKind::String)
}
fn number(&mut self) -> Token<'a> {
while matches!(self.peek(), Some(c) if c.is_ascii_digit()) {
self.advance();
}
while self.advance_if(char::is_ascii_digit) {}
if self.peek() == Some('.') && matches!(self.peek_next(), Some(c) if c.is_ascii_digit()) {
self.advance();
while matches!(self.peek(), Some(c) if c.is_ascii_digit()) {
self.advance();
}
while self.advance_if(char::is_ascii_digit) {}
}
self.make_token(TokenKind::Number)
Token::new(self, TokenKind::Number)
}
fn identifier(&mut self) -> Token<'a> {
while matches!(self.peek(), Some(c) if is_alpha(c) || c.is_ascii_digit()) {
self.advance();
}
while self.advance_if(|c| is_alpha(c) || c.is_ascii_digit()) {}
self.make_token(self.identifier_type())
Token::new(self, self.identifier_type())
}
fn identifier_type(&self) -> TokenKind {
@ -154,66 +122,87 @@ impl<'a> Iterator for Scanner<'a> {
fn next(&mut self) -> Option<Self::Item> {
self.skip_whitespace();
self.peek()?;
Some(match self.advance() {
c if is_alpha(c) => self.identifier(),
self.advance()
.map(|c| match c {
c if is_alpha(&c) => self.identifier(),
c if c.is_ascii_digit() => self.number(),
'(' => self.make_token(TokenKind::LeftParen),
')' => self.make_token(TokenKind::RightParen),
'{' => self.make_token(TokenKind::LeftBrace),
'}' => self.make_token(TokenKind::RightBrace),
';' => self.make_token(TokenKind::Semicolon),
',' => self.make_token(TokenKind::Comma),
'.' => self.make_token(TokenKind::Dot),
'-' => self.make_token(TokenKind::Minus),
'+' => self.make_token(TokenKind::Plus),
'/' => self.make_token(TokenKind::Slash),
'*' => self.make_token(TokenKind::Star),
'(' => Token::new(self, TokenKind::LeftParen),
')' => Token::new(self, TokenKind::RightParen),
'{' => Token::new(self, TokenKind::LeftBrace),
'}' => Token::new(self, TokenKind::RightBrace),
';' => Token::new(self, TokenKind::Semicolon),
',' => Token::new(self, TokenKind::Comma),
'.' => Token::new(self, TokenKind::Dot),
'-' => Token::new(self, TokenKind::Minus),
'+' => Token::new(self, TokenKind::Plus),
'/' => Token::new(self, TokenKind::Slash),
'*' => Token::new(self, TokenKind::Star),
'!' => {
if self.is_match('=') {
self.make_token(TokenKind::BangEqual)
if self.advance_if(|&c| c == '=') {
Token::new(self, TokenKind::BangEqual)
} else {
self.make_token(TokenKind::Bang)
Token::new(self, TokenKind::Bang)
}
}
'=' => {
if self.is_match('=') {
self.make_token(TokenKind::EqualEqual)
if self.advance_if(|&c| c == '=') {
Token::new(self, TokenKind::EqualEqual)
} else {
self.make_token(TokenKind::Equal)
Token::new(self, TokenKind::Equal)
}
}
'<' => {
if self.is_match('=') {
self.make_token(TokenKind::LessEqual)
if self.advance_if(|&c| c == '=') {
Token::new(self, TokenKind::LessEqual)
} else {
self.make_token(TokenKind::Less)
Token::new(self, TokenKind::Less)
}
}
'>' => {
if self.is_match('=') {
self.make_token(TokenKind::GreaterEqual)
if self.advance_if(|&c| c == '=') {
Token::new(self, TokenKind::GreaterEqual)
} else {
self.make_token(TokenKind::Greater)
Token::new(self, TokenKind::Greater)
}
}
'"' => self.string(),
_ => self.error_token("Unexpected character."),
_ => Token::error(self, "Unexpected character."),
})
.or({
Some(Token {
kind: TokenKind::Eof,
value: "",
line: self.line,
})
})
}
}
fn is_alpha(c: char) -> bool {
c.is_ascii_alphabetic() || c == '_'
}
pub struct Token<'a> {
pub kind: TokenKind,
pub value: &'a str,
pub line: usize,
}
impl<'a> Token<'a> {
fn new(scanner: &Scanner<'a>, kind: TokenKind) -> Self {
Token {
kind,
value: std::str::from_utf8(&scanner.source[..scanner.current]).unwrap(),
line: scanner.line,
}
}
fn error(scanner: &Scanner, message: &'static str) -> Self {
Token {
kind: TokenKind::Error,
value: message,
line: scanner.line,
}
}
}
#[derive(Debug, PartialEq)]
pub enum TokenKind {
// Single-character tokens.
@ -293,7 +282,7 @@ mod tests {
assert_source_scan("fa", Some(TokenKind::Identifier));
assert_source_scan("@", Some(TokenKind::Error));
assert_source_scan("", None);
assert_source_scan("", Some(TokenKind::Eof));
}
#[test]
@ -301,14 +290,14 @@ mod tests {
let mut scanner = Scanner::new("()");
assert_scan(&mut scanner, Some(TokenKind::LeftParen));
assert_scan(&mut scanner, Some(TokenKind::RightParen));
assert_scan(&mut scanner, None);
assert_scan(&mut scanner, Some(TokenKind::Eof));
}
#[test]
fn test_whitespace() {
assert_source_scan(" foo", Some(TokenKind::Identifier));
assert_source_scan("\tfoo", Some(TokenKind::Identifier));
assert_source_scan("// \n", None);
assert_source_scan("// \n", Some(TokenKind::Eof));
}
fn assert_source_scan(source: &str, kind: Option<TokenKind>) {

Loading…
Cancel
Save