From 6707b97ef5473c6a888870675796b7351757d265 Mon Sep 17 00:00:00 2001 From: ElnuDev Date: Sat, 27 May 2023 13:19:45 -0700 Subject: [PATCH 1/2] Use PascalCase for exposed pest grammar --- renrs/src/lib.rs | 14 +++++++------- renrs/src/rpy.pest | 32 +++++++++++++++++--------------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/renrs/src/lib.rs b/renrs/src/lib.rs index 916dd69..02e146f 100644 --- a/renrs/src/lib.rs +++ b/renrs/src/lib.rs @@ -178,15 +178,15 @@ fn parse_token(pair: Pair) -> Token { }; } match token { - Rule::string => { + Rule::String => { let contents = contents!(); Token::Str(match contents.as_rule() { - Rule::single_quote_string_data => contents.as_str().replace("\\'", "'"), - Rule::double_quote_string_data => contents.as_str().replace("\\\"", "\""), + Rule::SingleQuoteStringData => contents.as_str().replace("\\'", "'"), + Rule::DoubleQuoteStringData => contents.as_str().replace("\\\"", "\""), _ => unreachable!(), }) } - Rule::array => { + Rule::Array => { let contents = contents!(); let mut array = Vec::new(); for token in contents.into_inner() { @@ -194,13 +194,13 @@ fn parse_token(pair: Pair) -> Token { } Token::Array(array) } - Rule::boolean => Token::Boolean(match pair.as_str() { + Rule::Boolean => Token::Boolean(match pair.as_str() { "True" => true, "False" => false, _ => unreachable!(), }), - Rule::number => Token::Number(pair.as_str().parse().unwrap()), - Rule::keyword => Token::Keyword(pair.as_str().to_owned()), + Rule::Number => Token::Number(pair.as_str().parse().unwrap()), + Rule::Keyword => Token::Keyword(pair.as_str().to_owned()), __ => unreachable!(), } } diff --git a/renrs/src/rpy.pest b/renrs/src/rpy.pest index 0834d09..a8b69c6 100644 --- a/renrs/src/rpy.pest +++ b/renrs/src/rpy.pest @@ -1,52 +1,54 @@ // characters are anything but newlines char = { !NEWLINE ~ ANY } -// token definition +// Token definition // http://pest.rs/book/grammars/syntax.html#atomic inner = @{ char* } -token = { string | array | boolean | number | keyword } +Token = { String | Array | Boolean | Number | Keyword } // KEYWORDS // has to be atomic for no implicit separate (spaces) -keyword = ${ (!(whitespace | NEWLINE) ~ ANY)+ } +Keyword = ${ (!(whitespace | NEWLINE) ~ ANY)+ } // STRING -single_quote_string_data = @{ ( +SingleQuoteStringData = @{ ( "\\'" // Escaped single quotes | (!"'" ~ ANY) )* } -double_quote_string_data = @{ ( +DoubleQuoteStringData = @{ ( "\\\"" // Escaped double quotes | (!"\"" ~ ANY) )* } -string = ${ - ("'" ~ single_quote_string_data ~ "'") - | ("\"" ~ double_quote_string_data ~ "\"") +String = ${ + ("'" ~ SingleQuoteStringData ~ "'") + | ("\"" ~ DoubleQuoteStringData ~ "\"") } -// ARRAY -array = ${ +// Array +Array = ${ "[" ~ "]" - | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token)* ~ NEWLINE* ~ "]" + | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token)* ~ NEWLINE* ~ "]" } // BOOLEAN -boolean = ${ "True" | "False" } +Boolean = ${ "True" | "False" } // NUMBER -number = @{ +Number = @{ "-"? ~ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) ~ ("." ~ ASCII_DIGIT*)? } // comments are a # followed by -// any number of non-newline characters +// any Number of non-newline characters COMMENT = _{ "#" ~ char* } +Colon = { ":" } + // lines are comprised of a statement -line = @{ (token ~ whitespace+)* ~ token } +line = @{ (Token ~ whitespace+)* ~ Token ~ Colon? } file = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI } From c0c8153cbe26693699fb89516cac05d22c15d9ae Mon Sep 17 00:00:00 2001 From: ElnuDev Date: Sat, 27 May 2023 14:20:11 -0700 Subject: [PATCH 2/2] Simplify parsing --- renrs/src/lib.rs | 185 +++++++++++++++++---------------------------- renrs/src/rpy.pest | 12 +-- 2 files changed, 77 insertions(+), 120 deletions(-) diff --git a/renrs/src/lib.rs b/renrs/src/lib.rs index 02e146f..4351603 100644 --- a/renrs/src/lib.rs +++ b/renrs/src/lib.rs @@ -32,19 +32,15 @@ impl Token { use Token::*; -// Parsing types - -type Line = Vec; - // Indented command block #[derive(Debug)] -struct Block { - elements: Vec>, +struct CommandBlock { + elements: Vec, next: Option, } -impl Block { - fn next(&mut self) -> Option<&T> { +impl CommandBlock { + fn next(&mut self) -> Option<&Command> { let mut next = match self.next { Some(next) => next, None => return None, @@ -80,7 +76,7 @@ impl Block { } } -impl Default for Block { +impl Default for CommandBlock { fn default() -> Self { Self { elements: Vec::new(), @@ -90,13 +86,11 @@ impl Default for Block { } #[derive(Debug)] -enum BlockElement { - Command(T), - Block(Block), +enum BlockElement { + Command(Command), + Block(CommandBlock), } -type LineBlock = Block; -type CommandBlock = Block; type Script = CommandBlock; // Parsed script commands @@ -123,53 +117,82 @@ pub enum Event { Say { name: Option, text: String }, } -// ========================================== -// Step 1 parsing -// converting from pest pairs to Token blocks -// ========================================== +// ====== +// Parser +// ====== -// Tokenize raw script string -fn parse(script: &str) -> LineBlock { - let file = RpyParser::parse(Rule::file, script) +type Pair<'a> = pest::iterators::Pair<'a, Rule>; + +// Read file into commands +fn parse_file(file_path: &PathBuf) -> CommandBlock { + let unparsed_file = fs::read_to_string(file_path).expect("cannot find file"); + parse(&unparsed_file) +} + +fn parse(script: &str) -> CommandBlock { + let file = RpyParser::parse(Rule::File, script) .expect("unsuccessful parse") .next() .unwrap(); parse_block(file) } -type Pair<'a> = pest::iterators::Pair<'a, Rule>; - -// Tokenize block -fn parse_block(pair: Pair) -> LineBlock { - let mut block = LineBlock::default(); - for element in pair.into_inner() { - block.elements.push(match element.as_rule() { - Rule::block => BlockElement::Block(parse_block(element)), - Rule::line => { - let line = parse_line(element); - // TODO: For some reason a blank final line is always parsed - if line.len() == 0 { - continue; - } - BlockElement::Command(line) - }, - Rule::EOI => break, // end +fn parse_block(block: Pair) -> CommandBlock { + CommandBlock { + elements: block.into_inner().filter_map(|pair| match pair.as_rule() { + Rule::Line => Some(BlockElement::Command(parse_command(pair))), + Rule::Block => Some(BlockElement::Block(parse_block(pair))), + Rule::EOI => None, // end _ => unreachable!(), - }); + }).collect(), + ..Default::default() } - block } -// Tokenize line -fn parse_line(pair: Pair) -> Line { - let mut tokens = Vec::new(); - for token in pair.into_inner() { - tokens.push(parse_token(token)); +fn parse_command(pair: Pair) -> Command { + use Token::*; + let line: Vec = pair + .into_inner() + .map(|pair| parse_token(pair)) + .collect(); + macro_rules! unknown { + () => { + panic!("Unknown command {}", describe_line(&line)) + }; + } + match line.as_slice() { + [Str(text)] => Say { + name: None, + text: text.to_owned(), + }, + [Str(name), Str(text)] => Say { + name: Some(name.to_owned()), + text: text.to_owned(), + }, + [Keyword(keyword), Str(food), tail @ ..] if keyword.eq("eat") => Eat { + food: food.to_owned(), + politely: match tail { + [Boolean(politely)] => *politely, + _ => unknown!(), + }, + }, + _ => unknown!(), } - tokens } -// Tokenize token +// Line description e.g. [String, Keyword, Array] +// Used in parse_command as feedback for invalid commands +fn describe_line(line: &[Token]) -> String { + let mut description = "[".to_owned(); + let mut iter = line.iter(); + description.push_str(&format!("{}", iter.next().unwrap().print())); + for token in iter { + description.push_str(&format!(", {}", token.print())); + } + description.push_str("]"); + description +} + fn parse_token(pair: Pair) -> Token { let token = pair.as_rule(); macro_rules! contents { @@ -205,72 +228,6 @@ fn parse_token(pair: Pair) -> Token { } } - -// ============================================== -// Step 2 reading -// converting from Token blocks to Command blocks -// ============================================== - -// Read file into commands -fn read_file(file_path: &PathBuf) -> CommandBlock { - let line_block = { - let unparsed_file = fs::read_to_string(file_path).expect("cannot find file"); - parse(&unparsed_file) - }; - read_block(&line_block) -} - -// Read line block into command block -fn read_block(block: &LineBlock) -> CommandBlock { - CommandBlock { - elements: block.elements.iter().map(|element| match element { - BlockElement::Command(line) => BlockElement::Command(read_command(&line)), - BlockElement::Block(block) => BlockElement::Block(read_block(&block)), - }).collect(), - ..Default::default() - } -} - -// Read token array to command -fn read_command(line: &Line) -> Command { - macro_rules! unknown { - () => { - panic!("Unknown command {}", describe_line(&line)) - }; - } - match line.as_slice() { - [Str(text)] => Say { - name: None, - text: text.to_owned(), - }, - [Str(name), Str(text)] => Say { - name: Some(name.to_owned()), - text: text.to_owned(), - }, - [Keyword(keyword), Str(food), tail @ ..] if keyword.eq("eat") => Eat { - food: food.to_owned(), - politely: match tail { - [Boolean(politely)] => *politely, - _ => unknown!(), - }, - }, - _ => unknown!(), - } -} - -// Line description e.g. [String, Keyword, Array] -// Used in parse_command as feedback for invalid commands -fn describe_line(line: &Line) -> String { - let mut description = "[".to_owned(); - let mut iter = line.iter(); - description.push_str(&format!("{}", iter.next().unwrap().print())); - for token in iter { - description.push_str(&format!(", {}", token.print())); - } - description.push_str("]"); - description -} - // ===== // State // ===== @@ -282,7 +239,7 @@ pub struct State { impl State { pub fn from_file(file: PathBuf) -> State { State { - script: read_file(&file), + script: parse_file(&file), } } diff --git a/renrs/src/rpy.pest b/renrs/src/rpy.pest index a8b69c6..b50e52f 100644 --- a/renrs/src/rpy.pest +++ b/renrs/src/rpy.pest @@ -5,7 +5,7 @@ char = { !NEWLINE ~ ANY } // http://pest.rs/book/grammars/syntax.html#atomic inner = @{ char* } -Token = { String | Array | Boolean | Number | Keyword } +token = { String | Array | Boolean | Number | Keyword } // KEYWORDS // has to be atomic for no implicit separate (spaces) @@ -28,7 +28,7 @@ String = ${ // Array Array = ${ "[" ~ "]" - | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token)* ~ NEWLINE* ~ "]" + | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token)* ~ NEWLINE* ~ "]" } // BOOLEAN @@ -48,11 +48,11 @@ COMMENT = _{ "#" ~ char* } Colon = { ":" } // lines are comprised of a statement -line = @{ (Token ~ whitespace+)* ~ Token ~ Colon? } +Line = @{ (token ~ whitespace+)* ~ token ~ Colon? } -file = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI } +File = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI } -block = { +Block = { // The first line in the block PEEK_ALL ~ PUSH(" "+ | "\t"+) ~ block_content ~ // Subsequent lines in the block @@ -66,5 +66,5 @@ block = { whitespace = _{ " " } block_content = _{ - line ~ (whitespace+ ~ line)* ~ (NEWLINE | EOI) ~ block* + Line ~ (whitespace+ ~ Line)* ~ (NEWLINE | EOI) ~ Block* } \ No newline at end of file