|
|
@ -4,6 +4,10 @@ pub struct Scanner<'a> {
|
|
|
|
line: usize,
|
|
|
|
line: usize,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn is_alpha(c: char) -> bool {
|
|
|
|
|
|
|
|
c.is_ascii_alphabetic() || c == '_'
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
impl<'a> Scanner<'a> {
|
|
|
|
impl<'a> Scanner<'a> {
|
|
|
|
pub fn new(source: &'a str) -> Self {
|
|
|
|
pub fn new(source: &'a str) -> Self {
|
|
|
|
Self {
|
|
|
|
Self {
|
|
|
@ -13,22 +17,6 @@ impl<'a> Scanner<'a> {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn make_token(&self, kind: TokenKind) -> Token<'a> {
|
|
|
|
|
|
|
|
Token {
|
|
|
|
|
|
|
|
kind,
|
|
|
|
|
|
|
|
value: std::str::from_utf8(&self.source[..self.current]).unwrap(),
|
|
|
|
|
|
|
|
line: self.line,
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn error_token(&self, message: &'static str) -> Token<'a> {
|
|
|
|
|
|
|
|
Token {
|
|
|
|
|
|
|
|
kind: TokenKind::Error,
|
|
|
|
|
|
|
|
value: message,
|
|
|
|
|
|
|
|
line: self.line,
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn advance(&mut self) -> char {
|
|
|
|
fn advance(&mut self) -> char {
|
|
|
|
self.current += 1;
|
|
|
|
self.current += 1;
|
|
|
|
self.source[self.current - 1] as char
|
|
|
|
self.source[self.current - 1] as char
|
|
|
@ -75,7 +63,7 @@ impl<'a> Scanner<'a> {
|
|
|
|
self.source.get(self.current).map(|&x| x as char)
|
|
|
|
self.source.get(self.current).map(|&x| x as char)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn peek_next(&mut self) -> Option<char> {
|
|
|
|
fn peek_next(&self) -> Option<char> {
|
|
|
|
self.source.get(self.current + 1).map(|&x| x as char)
|
|
|
|
self.source.get(self.current + 1).map(|&x| x as char)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -89,11 +77,11 @@ impl<'a> Scanner<'a> {
|
|
|
|
|
|
|
|
|
|
|
|
if self.peek().is_none() {
|
|
|
|
if self.peek().is_none() {
|
|
|
|
// Maybe this should return an Err?
|
|
|
|
// Maybe this should return an Err?
|
|
|
|
return self.error_token("Unterminated string.");
|
|
|
|
return Token::error(&self, "Unterminated string.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
self.advance();
|
|
|
|
self.advance();
|
|
|
|
self.make_token(TokenKind::String)
|
|
|
|
Token::new(self, TokenKind::String)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn number(&mut self) -> Token<'a> {
|
|
|
|
fn number(&mut self) -> Token<'a> {
|
|
|
@ -109,7 +97,7 @@ impl<'a> Scanner<'a> {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
self.make_token(TokenKind::Number)
|
|
|
|
Token::new(self, TokenKind::Number)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn identifier(&mut self) -> Token<'a> {
|
|
|
|
fn identifier(&mut self) -> Token<'a> {
|
|
|
@ -117,7 +105,7 @@ impl<'a> Scanner<'a> {
|
|
|
|
self.advance();
|
|
|
|
self.advance();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
self.make_token(self.identifier_type())
|
|
|
|
Token::new(self, self.identifier_type())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn identifier_type(&self) -> TokenKind {
|
|
|
|
fn identifier_type(&self) -> TokenKind {
|
|
|
@ -154,66 +142,83 @@ impl<'a> Iterator for Scanner<'a> {
|
|
|
|
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
self.skip_whitespace();
|
|
|
|
self.skip_whitespace();
|
|
|
|
self.peek()?;
|
|
|
|
|
|
|
|
|
|
|
|
if self.peek().is_none() {
|
|
|
|
|
|
|
|
return Some(Token::new(self, TokenKind::Eof));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Some(match self.advance() {
|
|
|
|
Some(match self.advance() {
|
|
|
|
c if is_alpha(c) => self.identifier(),
|
|
|
|
c if is_alpha(c) => self.identifier(),
|
|
|
|
c if c.is_ascii_digit() => self.number(),
|
|
|
|
c if c.is_ascii_digit() => self.number(),
|
|
|
|
'(' => self.make_token(TokenKind::LeftParen),
|
|
|
|
'(' => Token::new(self, TokenKind::LeftParen),
|
|
|
|
')' => self.make_token(TokenKind::RightParen),
|
|
|
|
')' => Token::new(self, TokenKind::RightParen),
|
|
|
|
'{' => self.make_token(TokenKind::LeftBrace),
|
|
|
|
'{' => Token::new(self, TokenKind::LeftBrace),
|
|
|
|
'}' => self.make_token(TokenKind::RightBrace),
|
|
|
|
'}' => Token::new(self, TokenKind::RightBrace),
|
|
|
|
';' => self.make_token(TokenKind::Semicolon),
|
|
|
|
';' => Token::new(self, TokenKind::Semicolon),
|
|
|
|
',' => self.make_token(TokenKind::Comma),
|
|
|
|
',' => Token::new(self, TokenKind::Comma),
|
|
|
|
'.' => self.make_token(TokenKind::Dot),
|
|
|
|
'.' => Token::new(self, TokenKind::Dot),
|
|
|
|
'-' => self.make_token(TokenKind::Minus),
|
|
|
|
'-' => Token::new(self, TokenKind::Minus),
|
|
|
|
'+' => self.make_token(TokenKind::Plus),
|
|
|
|
'+' => Token::new(self, TokenKind::Plus),
|
|
|
|
'/' => self.make_token(TokenKind::Slash),
|
|
|
|
'/' => Token::new(self, TokenKind::Slash),
|
|
|
|
'*' => self.make_token(TokenKind::Star),
|
|
|
|
'*' => Token::new(self, TokenKind::Star),
|
|
|
|
'!' => {
|
|
|
|
'!' => {
|
|
|
|
if self.is_match('=') {
|
|
|
|
if self.is_match('=') {
|
|
|
|
self.make_token(TokenKind::BangEqual)
|
|
|
|
Token::new(self, TokenKind::BangEqual)
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
self.make_token(TokenKind::Bang)
|
|
|
|
Token::new(self, TokenKind::Bang)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
'=' => {
|
|
|
|
'=' => {
|
|
|
|
if self.is_match('=') {
|
|
|
|
if self.is_match('=') {
|
|
|
|
self.make_token(TokenKind::EqualEqual)
|
|
|
|
Token::new(self, TokenKind::EqualEqual)
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
self.make_token(TokenKind::Equal)
|
|
|
|
Token::new(self, TokenKind::Equal)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
'<' => {
|
|
|
|
'<' => {
|
|
|
|
if self.is_match('=') {
|
|
|
|
if self.is_match('=') {
|
|
|
|
self.make_token(TokenKind::LessEqual)
|
|
|
|
Token::new(self, TokenKind::LessEqual)
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
self.make_token(TokenKind::Less)
|
|
|
|
Token::new(self, TokenKind::Less)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
'>' => {
|
|
|
|
'>' => {
|
|
|
|
if self.is_match('=') {
|
|
|
|
if self.is_match('=') {
|
|
|
|
self.make_token(TokenKind::GreaterEqual)
|
|
|
|
Token::new(self, TokenKind::GreaterEqual)
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
self.make_token(TokenKind::Greater)
|
|
|
|
Token::new(self, TokenKind::Greater)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
'"' => self.string(),
|
|
|
|
'"' => self.string(),
|
|
|
|
_ => self.error_token("Unexpected character."),
|
|
|
|
_ => Token::error(self, "Unexpected character."),
|
|
|
|
})
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn is_alpha(c: char) -> bool {
|
|
|
|
|
|
|
|
c.is_ascii_alphabetic() || c == '_'
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pub struct Token<'a> {
|
|
|
|
pub struct Token<'a> {
|
|
|
|
pub kind: TokenKind,
|
|
|
|
pub kind: TokenKind,
|
|
|
|
pub value: &'a str,
|
|
|
|
pub value: &'a str,
|
|
|
|
pub line: usize,
|
|
|
|
pub line: usize,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
impl<'a> Token<'a> {
|
|
|
|
|
|
|
|
fn new(scanner: &Scanner<'a>, kind: TokenKind) -> Self {
|
|
|
|
|
|
|
|
Token {
|
|
|
|
|
|
|
|
kind,
|
|
|
|
|
|
|
|
value: std::str::from_utf8(&scanner.source[..scanner.current]).unwrap(),
|
|
|
|
|
|
|
|
line: scanner.line,
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn error(scanner: &Scanner, message: &'static str) -> Self {
|
|
|
|
|
|
|
|
Token {
|
|
|
|
|
|
|
|
kind: TokenKind::Error,
|
|
|
|
|
|
|
|
value: message,
|
|
|
|
|
|
|
|
line: scanner.line,
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
|
|
pub enum TokenKind {
|
|
|
|
pub enum TokenKind {
|
|
|
|
// Single-character tokens.
|
|
|
|
// Single-character tokens.
|
|
|
@ -293,7 +298,7 @@ mod tests {
|
|
|
|
assert_source_scan("fa", Some(TokenKind::Identifier));
|
|
|
|
assert_source_scan("fa", Some(TokenKind::Identifier));
|
|
|
|
|
|
|
|
|
|
|
|
assert_source_scan("@", Some(TokenKind::Error));
|
|
|
|
assert_source_scan("@", Some(TokenKind::Error));
|
|
|
|
assert_source_scan("", None);
|
|
|
|
assert_source_scan("", Some(TokenKind::Eof));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[test]
|
|
|
@ -301,14 +306,14 @@ mod tests {
|
|
|
|
let mut scanner = Scanner::new("()");
|
|
|
|
let mut scanner = Scanner::new("()");
|
|
|
|
assert_scan(&mut scanner, Some(TokenKind::LeftParen));
|
|
|
|
assert_scan(&mut scanner, Some(TokenKind::LeftParen));
|
|
|
|
assert_scan(&mut scanner, Some(TokenKind::RightParen));
|
|
|
|
assert_scan(&mut scanner, Some(TokenKind::RightParen));
|
|
|
|
assert_scan(&mut scanner, None);
|
|
|
|
assert_scan(&mut scanner, Some(TokenKind::Eof));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[test]
|
|
|
|
fn test_whitespace() {
|
|
|
|
fn test_whitespace() {
|
|
|
|
assert_source_scan(" foo", Some(TokenKind::Identifier));
|
|
|
|
assert_source_scan(" foo", Some(TokenKind::Identifier));
|
|
|
|
assert_source_scan("\tfoo", Some(TokenKind::Identifier));
|
|
|
|
assert_source_scan("\tfoo", Some(TokenKind::Identifier));
|
|
|
|
assert_source_scan("// \n", None);
|
|
|
|
assert_source_scan("// \n", Some(TokenKind::Eof));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
fn assert_source_scan(source: &str, kind: Option<TokenKind>) {
|
|
|
|
fn assert_source_scan(source: &str, kind: Option<TokenKind>) {
|
|
|
|