From c0c8153cbe26693699fb89516cac05d22c15d9ae Mon Sep 17 00:00:00 2001 From: ElnuDev Date: Sat, 27 May 2023 14:20:11 -0700 Subject: [PATCH] Simplify parsing --- renrs/src/lib.rs | 183 +++++++++++++++++---------------------------- renrs/src/rpy.pest | 12 +-- 2 files changed, 76 insertions(+), 119 deletions(-) diff --git a/renrs/src/lib.rs b/renrs/src/lib.rs index 02e146f..4351603 100644 --- a/renrs/src/lib.rs +++ b/renrs/src/lib.rs @@ -32,19 +32,15 @@ impl Token { use Token::*; -// Parsing types - -type Line = Vec; - // Indented command block #[derive(Debug)] -struct Block { - elements: Vec>, +struct CommandBlock { + elements: Vec, next: Option, } -impl Block { - fn next(&mut self) -> Option<&T> { +impl CommandBlock { + fn next(&mut self) -> Option<&Command> { let mut next = match self.next { Some(next) => next, None => return None, @@ -80,7 +76,7 @@ impl Block { } } -impl Default for Block { +impl Default for CommandBlock { fn default() -> Self { Self { elements: Vec::new(), @@ -90,13 +86,11 @@ impl Default for Block { } #[derive(Debug)] -enum BlockElement { - Command(T), - Block(Block), +enum BlockElement { + Command(Command), + Block(CommandBlock), } -type LineBlock = Block; -type CommandBlock = Block; type Script = CommandBlock; // Parsed script commands @@ -123,116 +117,44 @@ pub enum Event { Say { name: Option, text: String }, } -// ========================================== -// Step 1 parsing -// converting from pest pairs to Token blocks -// ========================================== - -// Tokenize raw script string -fn parse(script: &str) -> LineBlock { - let file = RpyParser::parse(Rule::file, script) - .expect("unsuccessful parse") - .next() - .unwrap(); - parse_block(file) -} +// ====== +// Parser +// ====== type Pair<'a> = pest::iterators::Pair<'a, Rule>; -// Tokenize block -fn parse_block(pair: Pair) -> LineBlock { - let mut block = LineBlock::default(); - for element in pair.into_inner() { - block.elements.push(match element.as_rule() { - Rule::block => BlockElement::Block(parse_block(element)), - Rule::line => { - let line = parse_line(element); - // TODO: For some reason a blank final line is always parsed - if line.len() == 0 { - continue; - } - BlockElement::Command(line) - }, - Rule::EOI => break, // end - _ => unreachable!(), - }); - } - block -} - -// Tokenize line -fn parse_line(pair: Pair) -> Line { - let mut tokens = Vec::new(); - for token in pair.into_inner() { - tokens.push(parse_token(token)); - } - tokens -} - -// Tokenize token -fn parse_token(pair: Pair) -> Token { - let token = pair.as_rule(); - macro_rules! contents { - () => { - pair.into_inner().next().unwrap() - }; - } - match token { - Rule::String => { - let contents = contents!(); - Token::Str(match contents.as_rule() { - Rule::SingleQuoteStringData => contents.as_str().replace("\\'", "'"), - Rule::DoubleQuoteStringData => contents.as_str().replace("\\\"", "\""), - _ => unreachable!(), - }) - } - Rule::Array => { - let contents = contents!(); - let mut array = Vec::new(); - for token in contents.into_inner() { - array.push(parse_token(token)); - } - Token::Array(array) - } - Rule::Boolean => Token::Boolean(match pair.as_str() { - "True" => true, - "False" => false, - _ => unreachable!(), - }), - Rule::Number => Token::Number(pair.as_str().parse().unwrap()), - Rule::Keyword => Token::Keyword(pair.as_str().to_owned()), - __ => unreachable!(), - } +// Read file into commands +fn parse_file(file_path: &PathBuf) -> CommandBlock { + let unparsed_file = fs::read_to_string(file_path).expect("cannot find file"); + parse(&unparsed_file) } - -// ============================================== -// Step 2 reading -// converting from Token blocks to Command blocks -// ============================================== - -// Read file into commands -fn read_file(file_path: &PathBuf) -> CommandBlock { - let line_block = { - let unparsed_file = fs::read_to_string(file_path).expect("cannot find file"); - parse(&unparsed_file) - }; - read_block(&line_block) +fn parse(script: &str) -> CommandBlock { + let file = RpyParser::parse(Rule::File, script) + .expect("unsuccessful parse") + .next() + .unwrap(); + parse_block(file) } -// Read line block into command block -fn read_block(block: &LineBlock) -> CommandBlock { +fn parse_block(block: Pair) -> CommandBlock { CommandBlock { - elements: block.elements.iter().map(|element| match element { - BlockElement::Command(line) => BlockElement::Command(read_command(&line)), - BlockElement::Block(block) => BlockElement::Block(read_block(&block)), + elements: block.into_inner().filter_map(|pair| match pair.as_rule() { + Rule::Line => Some(BlockElement::Command(parse_command(pair))), + Rule::Block => Some(BlockElement::Block(parse_block(pair))), + Rule::EOI => None, // end + _ => unreachable!(), }).collect(), ..Default::default() } } -// Read token array to command -fn read_command(line: &Line) -> Command { +fn parse_command(pair: Pair) -> Command { + use Token::*; + let line: Vec = pair + .into_inner() + .map(|pair| parse_token(pair)) + .collect(); macro_rules! unknown { () => { panic!("Unknown command {}", describe_line(&line)) @@ -260,7 +182,7 @@ fn read_command(line: &Line) -> Command { // Line description e.g. [String, Keyword, Array] // Used in parse_command as feedback for invalid commands -fn describe_line(line: &Line) -> String { +fn describe_line(line: &[Token]) -> String { let mut description = "[".to_owned(); let mut iter = line.iter(); description.push_str(&format!("{}", iter.next().unwrap().print())); @@ -271,6 +193,41 @@ fn describe_line(line: &Line) -> String { description } +fn parse_token(pair: Pair) -> Token { + let token = pair.as_rule(); + macro_rules! contents { + () => { + pair.into_inner().next().unwrap() + }; + } + match token { + Rule::String => { + let contents = contents!(); + Token::Str(match contents.as_rule() { + Rule::SingleQuoteStringData => contents.as_str().replace("\\'", "'"), + Rule::DoubleQuoteStringData => contents.as_str().replace("\\\"", "\""), + _ => unreachable!(), + }) + } + Rule::Array => { + let contents = contents!(); + let mut array = Vec::new(); + for token in contents.into_inner() { + array.push(parse_token(token)); + } + Token::Array(array) + } + Rule::Boolean => Token::Boolean(match pair.as_str() { + "True" => true, + "False" => false, + _ => unreachable!(), + }), + Rule::Number => Token::Number(pair.as_str().parse().unwrap()), + Rule::Keyword => Token::Keyword(pair.as_str().to_owned()), + __ => unreachable!(), + } +} + // ===== // State // ===== @@ -282,7 +239,7 @@ pub struct State { impl State { pub fn from_file(file: PathBuf) -> State { State { - script: read_file(&file), + script: parse_file(&file), } } diff --git a/renrs/src/rpy.pest b/renrs/src/rpy.pest index a8b69c6..b50e52f 100644 --- a/renrs/src/rpy.pest +++ b/renrs/src/rpy.pest @@ -5,7 +5,7 @@ char = { !NEWLINE ~ ANY } // http://pest.rs/book/grammars/syntax.html#atomic inner = @{ char* } -Token = { String | Array | Boolean | Number | Keyword } +token = { String | Array | Boolean | Number | Keyword } // KEYWORDS // has to be atomic for no implicit separate (spaces) @@ -28,7 +28,7 @@ String = ${ // Array Array = ${ "[" ~ "]" - | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token)* ~ NEWLINE* ~ "]" + | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token)* ~ NEWLINE* ~ "]" } // BOOLEAN @@ -48,11 +48,11 @@ COMMENT = _{ "#" ~ char* } Colon = { ":" } // lines are comprised of a statement -line = @{ (Token ~ whitespace+)* ~ Token ~ Colon? } +Line = @{ (token ~ whitespace+)* ~ token ~ Colon? } -file = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI } +File = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI } -block = { +Block = { // The first line in the block PEEK_ALL ~ PUSH(" "+ | "\t"+) ~ block_content ~ // Subsequent lines in the block @@ -66,5 +66,5 @@ block = { whitespace = _{ " " } block_content = _{ - line ~ (whitespace+ ~ line)* ~ (NEWLINE | EOI) ~ block* + Line ~ (whitespace+ ~ Line)* ~ (NEWLINE | EOI) ~ Block* } \ No newline at end of file