|
|
@ -4,8 +4,8 @@ pub struct Scanner<'a> {
|
|
|
|
line: usize,
|
|
|
|
line: usize,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn is_alpha(c: &char) -> bool {
|
|
|
|
fn is_alpha(c: char) -> bool {
|
|
|
|
c.is_ascii_alphabetic() || *c == '_'
|
|
|
|
c.is_ascii_alphabetic() || c == '_'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl<'a> Scanner<'a> {
|
|
|
|
impl<'a> Scanner<'a> {
|
|
|
@ -17,34 +17,46 @@ impl<'a> Scanner<'a> {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn advance(&mut self) -> Option<char> {
|
|
|
|
fn advance(&mut self) -> char {
|
|
|
|
self.current += 1;
|
|
|
|
self.current += 1;
|
|
|
|
self.source.get(self.current - 1).map(|&x| x as char)
|
|
|
|
self.source[self.current - 1] as char
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn is_match(&mut self, expected: char) -> bool {
|
|
|
|
|
|
|
|
if self.peek() != Some(expected) {
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn advance_if<F: Fn(&char) -> bool>(&mut self, f: F) -> bool {
|
|
|
|
|
|
|
|
if matches!(self.peek(), Some(c) if f(&c)) {
|
|
|
|
|
|
|
|
self.current += 1;
|
|
|
|
self.current += 1;
|
|
|
|
true
|
|
|
|
true
|
|
|
|
} else {
|
|
|
|
|
|
|
|
false
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn skip_whitespace(&mut self) {
|
|
|
|
fn skip_whitespace(&mut self) {
|
|
|
|
loop {
|
|
|
|
loop {
|
|
|
|
while self.advance_if(|&c| c == ' ' || c == '\r' || c == '\t') {}
|
|
|
|
match self.peek() {
|
|
|
|
|
|
|
|
Some(' ') | Some('\r') | Some('\t') => {
|
|
|
|
if self.peek() == Some('/') && self.peek_next() == Some('/') {
|
|
|
|
self.advance();
|
|
|
|
while self.advance_if(|&c| c != '\n') {}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Some('\n') => {
|
|
|
|
if self.advance_if(|&c| c == '\n') {
|
|
|
|
|
|
|
|
self.line += 1;
|
|
|
|
self.line += 1;
|
|
|
|
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
Some('/') => {
|
|
|
|
|
|
|
|
if matches!(self.peek_next(), Some(c) if c == '/') {
|
|
|
|
|
|
|
|
while matches!(self.peek(), Some(c) if c != '\n') {
|
|
|
|
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
return;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
_ => {
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn peek(&self) -> Option<char> {
|
|
|
|
fn peek(&self) -> Option<char> {
|
|
|
@ -56,36 +68,47 @@ impl<'a> Scanner<'a> {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn string(&mut self) -> Token<'a> {
|
|
|
|
fn string(&mut self) -> Token<'a> {
|
|
|
|
loop {
|
|
|
|
while matches!(self.peek(), Some(c) if c != '"') {
|
|
|
|
match self.advance() {
|
|
|
|
if self.peek() == Some('\n') {
|
|
|
|
Some('"') => break,
|
|
|
|
|
|
|
|
Some('\n') => {
|
|
|
|
|
|
|
|
self.line += 1;
|
|
|
|
self.line += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Some(_) => {}
|
|
|
|
self.advance();
|
|
|
|
None => return Token::error(self, "Unterminated string."),
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if self.peek().is_none() {
|
|
|
|
|
|
|
|
// Maybe this should return an Err?
|
|
|
|
|
|
|
|
return Token::error(&self, "Unterminated string.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Token::new(self, TokenKind::String)
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
// Token::new(&self, TokenKind::String)
|
|
|
|
|
|
|
|
self.make_token(TokenKind::String)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn number(&mut self) -> Token<'a> {
|
|
|
|
fn number(&mut self) -> Token<'a> {
|
|
|
|
while self.advance_if(char::is_ascii_digit) {}
|
|
|
|
while matches!(self.peek(), Some(c) if c.is_ascii_digit()) {
|
|
|
|
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if self.peek() == Some('.') && matches!(self.peek_next(), Some(c) if c.is_ascii_digit()) {
|
|
|
|
if self.peek() == Some('.') && matches!(self.peek_next(), Some(c) if c.is_ascii_digit()) {
|
|
|
|
self.advance();
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
|
|
|
|
while self.advance_if(char::is_ascii_digit) {}
|
|
|
|
while matches!(self.peek(), Some(c) if c.is_ascii_digit()) {
|
|
|
|
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Token::new(self, TokenKind::Number)
|
|
|
|
// Token::new(&self, TokenKind::Number)
|
|
|
|
|
|
|
|
self.make_token(TokenKind::Number)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn identifier(&mut self) -> Token<'a> {
|
|
|
|
fn identifier(&mut self) -> Token<'a> {
|
|
|
|
while self.advance_if(|c| is_alpha(c) || c.is_ascii_digit()) {}
|
|
|
|
while matches!(self.peek(), Some(c) if is_alpha(c) || c.is_ascii_digit()) {
|
|
|
|
|
|
|
|
self.advance();
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Token::new(self, self.identifier_type())
|
|
|
|
// Token::new(&self, self.identifier_type())
|
|
|
|
|
|
|
|
self.make_token(self.identifier_type())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn identifier_type(&self) -> TokenKind {
|
|
|
|
fn identifier_type(&self) -> TokenKind {
|
|
|
@ -115,6 +138,14 @@ impl<'a> Scanner<'a> {
|
|
|
|
_ => TokenKind::Identifier,
|
|
|
|
_ => TokenKind::Identifier,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn make_token(&self, kind: TokenKind) -> Token<'a> {
|
|
|
|
|
|
|
|
Token {
|
|
|
|
|
|
|
|
kind: TokenKind::And,
|
|
|
|
|
|
|
|
value: std::str::from_utf8(&self.source[..self.current]).unwrap(),
|
|
|
|
|
|
|
|
line: self.line,
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl<'a> Iterator for Scanner<'a> {
|
|
|
|
impl<'a> Iterator for Scanner<'a> {
|
|
|
@ -122,60 +153,54 @@ impl<'a> Iterator for Scanner<'a> {
|
|
|
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
self.skip_whitespace();
|
|
|
|
self.skip_whitespace();
|
|
|
|
|
|
|
|
self.peek()?;
|
|
|
|
|
|
|
|
|
|
|
|
self.advance()
|
|
|
|
Some(match self.advance() {
|
|
|
|
.map(|c| match c {
|
|
|
|
c if is_alpha(c) => self.identifier(),
|
|
|
|
c if is_alpha(&c) => self.identifier(),
|
|
|
|
|
|
|
|
c if c.is_ascii_digit() => self.number(),
|
|
|
|
c if c.is_ascii_digit() => self.number(),
|
|
|
|
'(' => Token::new(self, TokenKind::LeftParen),
|
|
|
|
'(' => Token::new(self, TokenKind::LeftParen),
|
|
|
|
')' => Token::new(self, TokenKind::RightParen),
|
|
|
|
// '(' => self.make_token(TokenKind::LeftParen),
|
|
|
|
'{' => Token::new(self, TokenKind::LeftBrace),
|
|
|
|
// ')' => self.make_token(TokenKind::RightParen),
|
|
|
|
'}' => Token::new(self, TokenKind::RightBrace),
|
|
|
|
// '{' => self.make_token(TokenKind::LeftBrace),
|
|
|
|
';' => Token::new(self, TokenKind::Semicolon),
|
|
|
|
// '}' => self.make_token(TokenKind::RightBrace),
|
|
|
|
',' => Token::new(self, TokenKind::Comma),
|
|
|
|
// ';' => self.make_token(TokenKind::Semicolon),
|
|
|
|
'.' => Token::new(self, TokenKind::Dot),
|
|
|
|
// ',' => self.make_token(TokenKind::Comma),
|
|
|
|
'-' => Token::new(self, TokenKind::Minus),
|
|
|
|
// '.' => self.make_token(TokenKind::Dot),
|
|
|
|
'+' => Token::new(self, TokenKind::Plus),
|
|
|
|
// '-' => self.make_token(TokenKind::Minus),
|
|
|
|
'/' => Token::new(self, TokenKind::Slash),
|
|
|
|
// '+' => self.make_token(TokenKind::Plus),
|
|
|
|
'*' => Token::new(self, TokenKind::Star),
|
|
|
|
// '/' => self.make_token(TokenKind::Slash),
|
|
|
|
'!' => {
|
|
|
|
// '*' => self.make_token(TokenKind::Star),
|
|
|
|
if self.advance_if(|&c| c == '=') {
|
|
|
|
// '!' => {
|
|
|
|
Token::new(self, TokenKind::BangEqual)
|
|
|
|
// if self.is_match('=') {
|
|
|
|
} else {
|
|
|
|
// self.make_token(TokenKind::BangEqual)
|
|
|
|
Token::new(self, TokenKind::Bang)
|
|
|
|
// } else {
|
|
|
|
}
|
|
|
|
// self.make_token(TokenKind::Bang)
|
|
|
|
}
|
|
|
|
// }
|
|
|
|
'=' => {
|
|
|
|
// }
|
|
|
|
if self.advance_if(|&c| c == '=') {
|
|
|
|
// '=' => {
|
|
|
|
Token::new(self, TokenKind::EqualEqual)
|
|
|
|
// if self.is_match('=') {
|
|
|
|
} else {
|
|
|
|
// self.make_token(TokenKind::EqualEqual)
|
|
|
|
Token::new(self, TokenKind::Equal)
|
|
|
|
// } else {
|
|
|
|
}
|
|
|
|
// self.make_token(TokenKind::Equal)
|
|
|
|
}
|
|
|
|
// }
|
|
|
|
'<' => {
|
|
|
|
// }
|
|
|
|
if self.advance_if(|&c| c == '=') {
|
|
|
|
// '<' => {
|
|
|
|
Token::new(self, TokenKind::LessEqual)
|
|
|
|
// if self.is_match('=') {
|
|
|
|
} else {
|
|
|
|
// self.make_token(TokenKind::LessEqual)
|
|
|
|
Token::new(self, TokenKind::Less)
|
|
|
|
// } else {
|
|
|
|
}
|
|
|
|
// self.make_token(TokenKind::Less)
|
|
|
|
}
|
|
|
|
// }
|
|
|
|
'>' => {
|
|
|
|
// }
|
|
|
|
if self.advance_if(|&c| c == '=') {
|
|
|
|
// '>' => {
|
|
|
|
Token::new(self, TokenKind::GreaterEqual)
|
|
|
|
// if self.is_match('=') {
|
|
|
|
} else {
|
|
|
|
// self.make_token(TokenKind::GreaterEqual)
|
|
|
|
Token::new(self, TokenKind::Greater)
|
|
|
|
// } else {
|
|
|
|
}
|
|
|
|
// self.make_token(TokenKind::Greater)
|
|
|
|
}
|
|
|
|
// }
|
|
|
|
'"' => self.string(),
|
|
|
|
// }
|
|
|
|
|
|
|
|
// '"' => self.string(),
|
|
|
|
_ => Token::error(self, "Unexpected character."),
|
|
|
|
_ => Token::error(self, "Unexpected character."),
|
|
|
|
})
|
|
|
|
})
|
|
|
|
.or({
|
|
|
|
|
|
|
|
Some(Token {
|
|
|
|
|
|
|
|
kind: TokenKind::Eof,
|
|
|
|
|
|
|
|
value: "",
|
|
|
|
|
|
|
|
line: self.line,
|
|
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -282,7 +307,7 @@ mod tests {
|
|
|
|
assert_source_scan("fa", Some(TokenKind::Identifier));
|
|
|
|
assert_source_scan("fa", Some(TokenKind::Identifier));
|
|
|
|
|
|
|
|
|
|
|
|
assert_source_scan("@", Some(TokenKind::Error));
|
|
|
|
assert_source_scan("@", Some(TokenKind::Error));
|
|
|
|
assert_source_scan("", Some(TokenKind::Eof));
|
|
|
|
assert_source_scan("", None);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[test]
|
|
|
@ -290,14 +315,14 @@ mod tests {
|
|
|
|
let mut scanner = Scanner::new("()");
|
|
|
|
let mut scanner = Scanner::new("()");
|
|
|
|
assert_scan(&mut scanner, Some(TokenKind::LeftParen));
|
|
|
|
assert_scan(&mut scanner, Some(TokenKind::LeftParen));
|
|
|
|
assert_scan(&mut scanner, Some(TokenKind::RightParen));
|
|
|
|
assert_scan(&mut scanner, Some(TokenKind::RightParen));
|
|
|
|
assert_scan(&mut scanner, Some(TokenKind::Eof));
|
|
|
|
assert_scan(&mut scanner, None);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[test]
|
|
|
|
fn test_whitespace() {
|
|
|
|
fn test_whitespace() {
|
|
|
|
assert_source_scan(" foo", Some(TokenKind::Identifier));
|
|
|
|
assert_source_scan(" foo", Some(TokenKind::Identifier));
|
|
|
|
assert_source_scan("\tfoo", Some(TokenKind::Identifier));
|
|
|
|
assert_source_scan("\tfoo", Some(TokenKind::Identifier));
|
|
|
|
assert_source_scan("// \n", Some(TokenKind::Eof));
|
|
|
|
assert_source_scan("// \n", None);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn assert_source_scan(source: &str, kind: Option<TokenKind>) {
|
|
|
|
fn assert_source_scan(source: &str, kind: Option<TokenKind>) {
|
|
|
|