add control sequences to lexer
This commit is contained in:
parent
2c4dbe0971
commit
2533cf75e3
89
src/lexer.rs
89
src/lexer.rs
|
@ -1,9 +1,17 @@
|
||||||
use std::iter::Peekable;
|
use std::iter::Peekable;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum CharacterClass {
|
||||||
|
Digit,
|
||||||
|
Whitespace,
|
||||||
|
WordCharacter
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Token {
|
pub enum Token {
|
||||||
Character(char),
|
Character(char),
|
||||||
CharacterRange(char, char)
|
CharacterRange(char, char),
|
||||||
|
CharacterClass(CharacterClass, bool),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -24,6 +32,32 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn read_integer(&mut self, radix: u32) -> Option<u32> {
|
||||||
|
match u32::from_str_radix(self.scanner.by_ref().take_while(|c| c.is_digit(radix)).collect::<String>().to_owned().as_str(), radix) {
|
||||||
|
Ok(result) => Some(result),
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Failed to read DecimalIntgegerLiteral: {e}");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_hex_number(&mut self, hex_digits: usize) -> Option<u32> {
|
||||||
|
let number_string = self.scanner.by_ref().take(hex_digits).collect::<String>();
|
||||||
|
|
||||||
|
if number_string.len() != hex_digits {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
match u32::from_str_radix(number_string.as_str(), 16) {
|
||||||
|
Ok(result) => Some(result),
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Failed to read DecimalIntgegerLiteral: {e}");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn handle_character(&mut self) -> Option<Token> {
|
fn handle_character(&mut self) -> Option<Token> {
|
||||||
let character = self.scanner.next()?;
|
let character = self.scanner.next()?;
|
||||||
|
|
||||||
|
@ -35,6 +69,57 @@ where
|
||||||
|
|
||||||
Some(Token::Character(character))
|
Some(Token::Character(character))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn handle_escape(&mut self) -> Option<Token> {
|
||||||
|
// discard leading backslash
|
||||||
|
self.scanner.next()?;
|
||||||
|
|
||||||
|
let escapee = self.scanner.peek()?.to_ascii_lowercase();
|
||||||
|
|
||||||
|
if "dsw".contains(escapee) {
|
||||||
|
return self.handle_character_class_escape();
|
||||||
|
}
|
||||||
|
|
||||||
|
if escapee.is_digit(10) {
|
||||||
|
return Some(Token::Character(char::from_u32(self.read_integer(10)?)?))
|
||||||
|
}
|
||||||
|
|
||||||
|
self.handle_character_escape()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_character_class_escape(&mut self) -> Option<Token> {
|
||||||
|
match self.scanner.next()? {
|
||||||
|
'd' => Some(Token::CharacterClass(CharacterClass::Digit, false)),
|
||||||
|
'D' => Some(Token::CharacterClass(CharacterClass::Digit, true)),
|
||||||
|
's' => Some(Token::CharacterClass(CharacterClass::Whitespace, false)),
|
||||||
|
'S' => Some(Token::CharacterClass(CharacterClass::Whitespace, true)),
|
||||||
|
'w' => Some(Token::CharacterClass(CharacterClass::WordCharacter, false)),
|
||||||
|
'W' => Some(Token::CharacterClass(CharacterClass::WordCharacter, true)),
|
||||||
|
_ => panic!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn handle_character_escape(&mut self) -> Option<Token> {
|
||||||
|
match self.scanner.next()? {
|
||||||
|
'f' => Some(Token::Character(char::from_u32(12)?)),
|
||||||
|
'n' => Some(Token::Character('\n')),
|
||||||
|
'r' => Some(Token::Character('\r')),
|
||||||
|
't' => Some(Token::Character('\t')),
|
||||||
|
'v' => Some(Token::Character(char::from_u32(11)?)),
|
||||||
|
|
||||||
|
'c' => {
|
||||||
|
eprintln!("Control sequences are not supported yet");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
'x' => Some(Token::Character(char::from_u32(self.read_hex_number(2)?)?)),
|
||||||
|
'u' => Some(Token::Character(char::from_u32(self.read_hex_number(4)?)?)),
|
||||||
|
c => {
|
||||||
|
eprintln!("Invalid escape sequence '\\{c}'");
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I> Iterator for Lexer<I>
|
impl<I> Iterator for Lexer<I>
|
||||||
|
@ -50,7 +135,7 @@ where
|
||||||
'-' => todo!(),
|
'-' => todo!(),
|
||||||
'^' => todo!(),
|
'^' => todo!(),
|
||||||
'$' => todo!(),
|
'$' => todo!(),
|
||||||
'\\' => todo!(),
|
'\\' => self.handle_escape(),
|
||||||
'.' => todo!(),
|
'.' => todo!(),
|
||||||
'*' => todo!(),
|
'*' => todo!(),
|
||||||
'+' => todo!(),
|
'+' => todo!(),
|
||||||
|
|
|
@ -2,7 +2,7 @@ mod parser;
|
||||||
mod lexer;
|
mod lexer;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let output = match parser::parse_string("Hello A-Ztesting!") {
|
let output = match parser::parse_string(r"Hello A-Ztesting! \s\D \228 \xAF \u2F55 \o") {
|
||||||
Ok(val) => val,
|
Ok(val) => val,
|
||||||
Err(e) => panic!("{e}")
|
Err(e) => panic!("{e}")
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue