From 6707b97ef5473c6a888870675796b7351757d265 Mon Sep 17 00:00:00 2001
From: ElnuDev <elnu@elnu.com>
Date: Sat, 27 May 2023 13:19:45 -0700
Subject: [PATCH 1/2] Use PascalCase for exposed pest grammar

---
 renrs/src/lib.rs   | 14 +++++++-------
 renrs/src/rpy.pest | 32 +++++++++++++++++---------------
 2 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/renrs/src/lib.rs b/renrs/src/lib.rs
index 916dd69..02e146f 100644
--- a/renrs/src/lib.rs
+++ b/renrs/src/lib.rs
@@ -178,15 +178,15 @@ fn parse_token(pair: Pair) -> Token {
         };
     }
     match token {
-        Rule::string => {
+        Rule::String => {
             let contents = contents!();
             Token::Str(match contents.as_rule() {
-                Rule::single_quote_string_data => contents.as_str().replace("\\'", "'"),
-                Rule::double_quote_string_data => contents.as_str().replace("\\\"", "\""),
+                Rule::SingleQuoteStringData => contents.as_str().replace("\\'", "'"),
+                Rule::DoubleQuoteStringData => contents.as_str().replace("\\\"", "\""),
                 _ => unreachable!(),
             })
         }
-        Rule::array => {
+        Rule::Array => {
             let contents = contents!();
             let mut array = Vec::new();
             for token in contents.into_inner() {
@@ -194,13 +194,13 @@ fn parse_token(pair: Pair) -> Token {
             }
             Token::Array(array)
         }
-        Rule::boolean => Token::Boolean(match pair.as_str() {
+        Rule::Boolean => Token::Boolean(match pair.as_str() {
             "True" => true,
             "False" => false,
             _ => unreachable!(),
         }),
-        Rule::number => Token::Number(pair.as_str().parse().unwrap()),
-        Rule::keyword => Token::Keyword(pair.as_str().to_owned()),
+        Rule::Number => Token::Number(pair.as_str().parse().unwrap()),
+        Rule::Keyword => Token::Keyword(pair.as_str().to_owned()),
         __ => unreachable!(),
     }
 }
diff --git a/renrs/src/rpy.pest b/renrs/src/rpy.pest
index 0834d09..a8b69c6 100644
--- a/renrs/src/rpy.pest
+++ b/renrs/src/rpy.pest
@@ -1,52 +1,54 @@
 // characters are anything but newlines
 char = { !NEWLINE ~ ANY }
 
-// token definition
+// Token definition
 // http://pest.rs/book/grammars/syntax.html#atomic
 inner = @{ char* }
 
-token = { string | array | boolean | number | keyword }
+Token = { String | Array | Boolean | Number | Keyword }
 
 // KEYWORDS
 // has to be atomic for no implicit separate (spaces)
-keyword = ${ (!(whitespace | NEWLINE) ~ ANY)+ }
+Keyword = ${ (!(whitespace | NEWLINE) ~ ANY)+ }
 
 // STRING
-single_quote_string_data = @{ (
+SingleQuoteStringData = @{ (
     "\\'" // Escaped single quotes
     | (!"'" ~ ANY)
 )* }
-double_quote_string_data = @{ (
+DoubleQuoteStringData = @{ (
     "\\\"" // Escaped double quotes
     | (!"\"" ~ ANY)
 )* }
-string = ${
-    ("'" ~ single_quote_string_data ~ "'")
-    | ("\"" ~ double_quote_string_data ~ "\"")
+String = ${
+    ("'" ~ SingleQuoteStringData ~ "'")
+    | ("\"" ~ DoubleQuoteStringData ~ "\"")
 }
 
-// ARRAY
-array = ${
+// Array
+Array = ${
     "[" ~ "]"
-    | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token)* ~ NEWLINE* ~ "]"
+    | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token)* ~ NEWLINE* ~ "]"
 }
 
 // BOOLEAN
-boolean = ${ "True" | "False" }
+Boolean = ${ "True" | "False" }
 
 // NUMBER
-number = @{
+Number = @{
     "-"?
     ~ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*)
     ~ ("." ~ ASCII_DIGIT*)?
 }
 
 // comments are a # followed by
-// any number of non-newline characters
+// any Number of non-newline characters
 COMMENT = _{ "#" ~ char* }
 
+Colon = { ":" }
+
 // lines are comprised of a statement
-line = @{ (token ~ whitespace+)* ~ token }
+line = @{ (Token ~ whitespace+)* ~ Token ~ Colon? }
 
 file = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI }
 

From c0c8153cbe26693699fb89516cac05d22c15d9ae Mon Sep 17 00:00:00 2001
From: ElnuDev <elnu@elnu.com>
Date: Sat, 27 May 2023 14:20:11 -0700
Subject: [PATCH 2/2] Simplify parsing

---
 renrs/src/lib.rs   | 185 +++++++++++++++++----------------------------
 renrs/src/rpy.pest |  12 +--
 2 files changed, 77 insertions(+), 120 deletions(-)

diff --git a/renrs/src/lib.rs b/renrs/src/lib.rs
index 02e146f..4351603 100644
--- a/renrs/src/lib.rs
+++ b/renrs/src/lib.rs
@@ -32,19 +32,15 @@ impl Token {
 
 use Token::*;
 
-// Parsing types
-
-type Line = Vec<Token>;
-
 // Indented command block
 #[derive(Debug)]
-struct Block<T> {
-    elements: Vec<BlockElement<T>>,
+struct CommandBlock {
+    elements: Vec<BlockElement>,
     next: Option<usize>,
 }
 
-impl<T> Block<T> {
-    fn next(&mut self) -> Option<&T> {
+impl CommandBlock {
+    fn next(&mut self) -> Option<&Command> {
         let mut next = match self.next {
             Some(next) => next,
             None => return None,
@@ -80,7 +76,7 @@ impl<T> Block<T> {
     }
 }
 
-impl<T> Default for Block<T> {
+impl Default for CommandBlock {
     fn default() -> Self {
         Self {
             elements: Vec::new(),
@@ -90,13 +86,11 @@ impl<T> Default for Block<T> {
 }
 
 #[derive(Debug)]
-enum BlockElement<T> {
-    Command(T),
-    Block(Block<T>),
+enum BlockElement {
+    Command(Command),
+    Block(CommandBlock),
 }
 
-type LineBlock = Block<Line>;
-type CommandBlock = Block<Command>;
 type Script = CommandBlock;
 
 // Parsed script commands
@@ -123,53 +117,82 @@ pub enum Event {
     Say { name: Option<String>, text: String },
 }
 
-// ==========================================
-// Step 1 parsing
-// converting from pest pairs to Token blocks
-// ==========================================
+// ======
+// Parser
+// ======
 
-// Tokenize raw script string
-fn parse(script: &str) -> LineBlock {
-    let file = RpyParser::parse(Rule::file, script)
+type Pair<'a> = pest::iterators::Pair<'a, Rule>;
+
+// Read file into commands
+fn parse_file(file_path: &PathBuf) -> CommandBlock {
+    let unparsed_file = fs::read_to_string(file_path).expect("cannot find file");
+    parse(&unparsed_file)
+}
+
+fn parse(script: &str) -> CommandBlock {
+    let file = RpyParser::parse(Rule::File, script)
         .expect("unsuccessful parse")
         .next()
         .unwrap();
     parse_block(file)
 }
 
-type Pair<'a> = pest::iterators::Pair<'a, Rule>;
-
-// Tokenize block
-fn parse_block(pair: Pair) -> LineBlock {
-    let mut block = LineBlock::default();
-    for element in pair.into_inner() {
-        block.elements.push(match element.as_rule() {
-            Rule::block => BlockElement::Block(parse_block(element)),
-            Rule::line => {
-                let line = parse_line(element);
-                // TODO: For some reason a blank final line is always parsed
-                if line.len() == 0 {
-                    continue;
-                }
-                BlockElement::Command(line)
-            },
-            Rule::EOI => break, // end
+fn parse_block(block: Pair) -> CommandBlock {
+    CommandBlock {
+        elements: block.into_inner().filter_map(|pair| match pair.as_rule() {
+            Rule::Line => Some(BlockElement::Command(parse_command(pair))),
+            Rule::Block => Some(BlockElement::Block(parse_block(pair))),
+            Rule::EOI => None, // end
             _ => unreachable!(),
-        });
+        }).collect(),
+        ..Default::default()
     }
-    block
 }
 
-// Tokenize line
-fn parse_line(pair: Pair) -> Line {
-    let mut tokens = Vec::new();
-    for token in pair.into_inner() {
-        tokens.push(parse_token(token));
+fn parse_command(pair: Pair) -> Command {
+    use Token::*;
+    let line: Vec<Token> = pair
+        .into_inner()
+        .map(|pair| parse_token(pair))
+        .collect();
+    macro_rules! unknown {
+        () => {
+            panic!("Unknown command {}", describe_line(&line))
+        };
+    }
+    match line.as_slice() {
+        [Str(text)] => Say {
+            name: None,
+            text: text.to_owned(),
+        },
+        [Str(name), Str(text)] => Say {
+            name: Some(name.to_owned()),
+            text: text.to_owned(),
+        },
+        [Keyword(keyword), Str(food), tail @ ..] if keyword.eq("eat") => Eat {
+            food: food.to_owned(),
+            politely: match tail {
+                [Boolean(politely)] => *politely,
+                _ => unknown!(),
+            },
+        },
+        _ => unknown!(),
     }
-    tokens
 }
 
-// Tokenize token
+// Line description e.g. [String, Keyword, Array]
+// Used in parse_command as feedback for invalid commands
+fn describe_line(line: &[Token]) -> String {
+    let mut description = "[".to_owned();
+    let mut iter = line.iter();
+    description.push_str(&format!("{}", iter.next().unwrap().print()));
+    for token in iter {
+        description.push_str(&format!(", {}", token.print()));
+    }
+    description.push_str("]");
+    description
+}
+
 fn parse_token(pair: Pair) -> Token {
     let token = pair.as_rule();
     macro_rules! contents {
@@ -205,72 +228,6 @@ fn parse_token(pair: Pair) -> Token {
     }
 }
 
-
-// ==============================================
-// Step 2 reading
-// converting from Token blocks to Command blocks
-// ==============================================
-
-// Read file into commands
-fn read_file(file_path: &PathBuf) -> CommandBlock {
-    let line_block = {
-        let unparsed_file = fs::read_to_string(file_path).expect("cannot find file");
-        parse(&unparsed_file)
-    };
-    read_block(&line_block)
-}
-
-// Read line block into command block
-fn read_block(block: &LineBlock) -> CommandBlock {
-    CommandBlock {
-        elements: block.elements.iter().map(|element| match element {
-            BlockElement::Command(line) => BlockElement::Command(read_command(&line)),
-            BlockElement::Block(block) => BlockElement::Block(read_block(&block)),
-        }).collect(),
-        ..Default::default()
-    }
-}
-
-// Read token array to command
-fn read_command(line: &Line) -> Command {
-    macro_rules! unknown {
-        () => {
-            panic!("Unknown command {}", describe_line(&line))
-        };
-    }
-    match line.as_slice() {
-        [Str(text)] => Say {
-            name: None,
-            text: text.to_owned(),
-        },
-        [Str(name), Str(text)] => Say {
-            name: Some(name.to_owned()),
-            text: text.to_owned(),
-        },
-        [Keyword(keyword), Str(food), tail @ ..] if keyword.eq("eat") => Eat {
-            food: food.to_owned(),
-            politely: match tail {
-                [Boolean(politely)] => *politely,
-                _ => unknown!(),
-            },
-        },
-        _ => unknown!(),
-    }
-}
-
-// Line description e.g. [String, Keyword, Array]
-// Used in parse_command as feedback for invalid commands
-fn describe_line(line: &Line) -> String {
-    let mut description = "[".to_owned();
-    let mut iter = line.iter();
-    description.push_str(&format!("{}", iter.next().unwrap().print()));
-    for token in iter {
-        description.push_str(&format!(", {}", token.print()));
-    }
-    description.push_str("]");
-    description
-}
-
 // =====
 // State
 // =====
@@ -282,7 +239,7 @@ pub struct State {
 impl State {
     pub fn from_file(file: PathBuf) -> State {
         State {
-            script: read_file(&file),
+            script: parse_file(&file),
         }
     }
 
diff --git a/renrs/src/rpy.pest b/renrs/src/rpy.pest
index a8b69c6..b50e52f 100644
--- a/renrs/src/rpy.pest
+++ b/renrs/src/rpy.pest
@@ -5,7 +5,7 @@ char = { !NEWLINE ~ ANY }
 // http://pest.rs/book/grammars/syntax.html#atomic
 inner = @{ char* }
 
-Token = { String | Array | Boolean | Number | Keyword }
+token = { String | Array | Boolean | Number | Keyword }
 
 // KEYWORDS
 // has to be atomic for no implicit separate (spaces)
@@ -28,7 +28,7 @@ String = ${
 // Array
 Array = ${
     "[" ~ "]"
-    | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token)* ~ NEWLINE* ~ "]"
+    | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token)* ~ NEWLINE* ~ "]"
 }
 
 // BOOLEAN
@@ -48,11 +48,11 @@ COMMENT = _{ "#" ~ char* }
 Colon = { ":" }
 
 // lines are comprised of a statement
-line = @{ (Token ~ whitespace+)* ~ Token ~ Colon? }
+Line = @{ (token ~ whitespace+)* ~ token ~ Colon? }
 
-file = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI }
+File = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI }
 
-block = {
+Block = {
     // The first line in the block
     PEEK_ALL ~ PUSH("  "+ | "\t"+) ~ block_content ~
     // Subsequent lines in the block
@@ -66,5 +66,5 @@ block = {
 whitespace = _{ " " }
 
 block_content = _{
-    line ~ (whitespace+ ~ line)* ~ (NEWLINE | EOI) ~ block*
+    Line ~ (whitespace+ ~ Line)* ~ (NEWLINE | EOI) ~ Block*
 }
\ No newline at end of file