start working on lexing brackets
This commit is contained in:
parent
2533cf75e3
commit
2cf7b979d9
59
src/lexer.rs
59
src/lexer.rs
|
@ -7,11 +7,28 @@ pub enum CharacterClass {
|
|||
WordCharacter
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum CaptureGroupType {
|
||||
Normal,
|
||||
Anonymous,
|
||||
Named(String),
|
||||
PositiveLookahead,
|
||||
NegativeLookahead,
|
||||
PositiveLookbehind,
|
||||
NegativeLookbehind
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Token {
|
||||
Character(char),
|
||||
CharacterRange(char, char),
|
||||
CharacterClass(CharacterClass, bool),
|
||||
|
||||
CharacterClassStart(bool),
|
||||
CharacterClassEnd,
|
||||
|
||||
CaptureGroupStart(CaptureGroupType),
|
||||
CaptureGroupEnd
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -120,6 +137,40 @@ where
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_character_class_start(&mut self) -> Option<Token> {
|
||||
// consume [ character
|
||||
self.scanner.next();
|
||||
|
||||
// see if there is a ^ following the [
|
||||
let negate = self.scanner.next_if(|&c| c == '^').is_some();
|
||||
Some(Token::CharacterClassStart(negate))
|
||||
}
|
||||
|
||||
fn handle_capture_group_start(&mut self) -> Option<Token> {
|
||||
// consume ( character
|
||||
self.scanner.next();
|
||||
|
||||
Some(Token::CaptureGroupStart(
|
||||
match self.scanner.next_if(|&c| c == '?') {
|
||||
None => CaptureGroupType::Normal,
|
||||
Some(_) => self.get_capture_group_type()?
|
||||
}
|
||||
))
|
||||
}
|
||||
|
||||
fn get_capture_group_type(&mut self) -> Option<CaptureGroupType> {
|
||||
Some(match self.scanner.next()? {
|
||||
':' => CaptureGroupType::Anonymous,
|
||||
'!' => CaptureGroupType::NegativeLookahead,
|
||||
'=' => CaptureGroupType::PositiveLookahead,
|
||||
|
||||
_ => {
|
||||
eprintln!("Unexpected token after ?");
|
||||
return None;
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<I> Iterator for Lexer<I>
|
||||
|
@ -140,10 +191,10 @@ where
|
|||
'*' => todo!(),
|
||||
'+' => todo!(),
|
||||
'?' => todo!(),
|
||||
'(' => todo!(),
|
||||
')' => todo!(),
|
||||
'[' => todo!(),
|
||||
']' => todo!(),
|
||||
'(' => self.handle_capture_group_start(),
|
||||
')' => { self.scanner.next(); Some(Token::CaptureGroupEnd)},
|
||||
'[' => self.handle_character_class_start(),
|
||||
']' => { self.scanner.next(); Some(Token::CharacterClassEnd) },
|
||||
'{' => todo!(),
|
||||
'}' => todo!(),
|
||||
'|' => todo!(),
|
||||
|
|
|
@ -2,7 +2,7 @@ mod parser;
|
|||
mod lexer;
|
||||
|
||||
fn main() {
|
||||
let output = match parser::parse_string(r"Hello A-Ztesting! \s\D \228 \xAF \u2F55 \o") {
|
||||
let output = match parser::parse_string(r"[^A-Za-z]") {
|
||||
Ok(val) => val,
|
||||
Err(e) => panic!("{e}")
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue