From 2cf7b979d962c4133b72d2bf665d7e2b1a5df0e9 Mon Sep 17 00:00:00 2001 From: Robert Date: Sun, 23 Apr 2023 19:30:22 +0200 Subject: [PATCH] start working on lexing brackets --- src/lexer.rs | 59 ++++++++++++++++++++++++++++++++++++++++++++++++---- src/main.rs | 2 +- 2 files changed, 56 insertions(+), 5 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index d0f5c71..44f8c88 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -7,11 +7,28 @@ pub enum CharacterClass { WordCharacter } +#[derive(Debug)] +pub enum CaptureGroupType { + Normal, + Anonymous, + Named(String), + PositiveLookahead, + NegativeLookahead, + PositiveLookbehind, + NegativeLookbehind +} + #[derive(Debug)] pub enum Token { Character(char), CharacterRange(char, char), CharacterClass(CharacterClass, bool), + + CharacterClassStart(bool), + CharacterClassEnd, + + CaptureGroupStart(CaptureGroupType), + CaptureGroupEnd } #[derive(Debug)] @@ -120,6 +137,40 @@ where } } } + + fn handle_character_class_start(&mut self) -> Option { + // consume [ character + self.scanner.next(); + + // see if there is a ^ following the [ + let negate = self.scanner.next_if(|&c| c == '^').is_some(); + Some(Token::CharacterClassStart(negate)) + } + + fn handle_capture_group_start(&mut self) -> Option { + // consume ( character + self.scanner.next(); + + Some(Token::CaptureGroupStart( + match self.scanner.next_if(|&c| c == '?') { + None => CaptureGroupType::Normal, + Some(_) => self.get_capture_group_type()? + } + )) + } + + fn get_capture_group_type(&mut self) -> Option { + Some(match self.scanner.next()? { + ':' => CaptureGroupType::Anonymous, + '!' => CaptureGroupType::NegativeLookahead, + '=' => CaptureGroupType::PositiveLookahead, + + _ => { + eprintln!("Unexpected token after ?"); + return None; + } + }) + } } impl Iterator for Lexer @@ -140,10 +191,10 @@ where '*' => todo!(), '+' => todo!(), '?' => todo!(), - '(' => todo!(), - ')' => todo!(), - '[' => todo!(), - ']' => todo!(), + '(' => self.handle_capture_group_start(), + ')' => { self.scanner.next(); Some(Token::CaptureGroupEnd)}, + '[' => self.handle_character_class_start(), + ']' => { self.scanner.next(); Some(Token::CharacterClassEnd) }, '{' => todo!(), '}' => todo!(), '|' => todo!(), diff --git a/src/main.rs b/src/main.rs index 46801d9..d501726 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,7 +2,7 @@ mod parser; mod lexer; fn main() { - let output = match parser::parse_string(r"Hello A-Ztesting! \s\D \228 \xAF \u2F55 \o") { + let output = match parser::parse_string(r"[^A-Za-z]") { Ok(val) => val, Err(e) => panic!("{e}") };