Simplify parsing

2023-05-27 14:20:11 -07:00 · 2023-05-27 14:20:11 -07:00 · c0c8153cbe
commit c0c8153cbe
parent 6707b97ef5
2 changed files with 77 additions and 120 deletions
--- a/renrs/src/lib.rs
+++ b/renrs/src/lib.rs
@ -32,19 +32,15 @@ impl Token {
 use Token::*;
 // Parsing types
 type Line = Vec<Token>;
 // Indented command block
 #[derive(Debug)]
-struct Block<T> {
+struct CommandBlock {
-    elements: Vec<BlockElement<T>>,
+    elements: Vec<BlockElement>,
    next: Option<usize>,
 }
-impl<T> Block<T> {
+impl CommandBlock {
-    fn next(&mut self) -> Option<&T> {
+    fn next(&mut self) -> Option<&Command> {
        let mut next = match self.next {
            Some(next) => next,
            None => return None,
@ -80,7 +76,7 @@ impl<T> Block<T> {
    }
 }
-impl<T> Default for Block<T> {
+impl Default for CommandBlock {
    fn default() -> Self {
        Self {
            elements: Vec::new(),
@ -90,13 +86,11 @@ impl<T> Default for Block<T> {
 }
 #[derive(Debug)]
-enum BlockElement<T> {
+enum BlockElement {
-    Command(T),
+    Command(Command),
-    Block(Block<T>),
+    Block(CommandBlock),
 }
 type LineBlock = Block<Line>;
 type CommandBlock = Block<Command>;
 type Script = CommandBlock;
 // Parsed script commands
@ -123,53 +117,82 @@ pub enum Event {
    Say { name: Option<String>, text: String },
 }
-// ==========================================
+// ======
-// Step 1 parsing
+// Parser
-// converting from pest pairs to Token blocks
+// ======
 // ==========================================
-// Tokenize raw script string
+type Pair<'a> = pest::iterators::Pair<'a, Rule>;
-fn parse(script: &str) -> LineBlock {
+
-    let file = RpyParser::parse(Rule::file, script)
+// Read file into commands
 fn parse_file(file_path: &PathBuf) -> CommandBlock {
    let unparsed_file = fs::read_to_string(file_path).expect("cannot find file");
    parse(&unparsed_file)
 }
 fn parse(script: &str) -> CommandBlock {
    let file = RpyParser::parse(Rule::File, script)
        .expect("unsuccessful parse")
        .next()
        .unwrap();
    parse_block(file)
 }
-type Pair<'a> = pest::iterators::Pair<'a, Rule>;
+fn parse_block(block: Pair) -> CommandBlock {
-
+    CommandBlock {
-// Tokenize block
+        elements: block.into_inner().filter_map(|pair| match pair.as_rule() {
-fn parse_block(pair: Pair) -> LineBlock {
+            Rule::Line => Some(BlockElement::Command(parse_command(pair))),
-    let mut block = LineBlock::default();
+            Rule::Block => Some(BlockElement::Block(parse_block(pair))),
-    for element in pair.into_inner() {
+            Rule::EOI => None, // end
        block.elements.push(match element.as_rule() {
            Rule::block => BlockElement::Block(parse_block(element)),
            Rule::line => {
                let line = parse_line(element);
                // TODO: For some reason a blank final line is always parsed
                if line.len() == 0 {
                    continue;
                }
                BlockElement::Command(line)
            },
            Rule::EOI => break, // end
            _ => unreachable!(),
-        });
+        }).collect(),
        ..Default::default()
    }
    block
 }
-// Tokenize line
+fn parse_command(pair: Pair) -> Command {
-fn parse_line(pair: Pair) -> Line {
+    use Token::*;
-    let mut tokens = Vec::new();
+    let line: Vec<Token> = pair
-    for token in pair.into_inner() {
+        .into_inner()
-        tokens.push(parse_token(token));
+        .map(|pair| parse_token(pair))
        .collect();
    macro_rules! unknown {
        () => {
            panic!("Unknown command {}", describe_line(&line))
        };
    }
    match line.as_slice() {
        [Str(text)] => Say {
            name: None,
            text: text.to_owned(),
        },
        [Str(name), Str(text)] => Say {
            name: Some(name.to_owned()),
            text: text.to_owned(),
        },
        [Keyword(keyword), Str(food), tail @ ..] if keyword.eq("eat") => Eat {
            food: food.to_owned(),
            politely: match tail {
                [Boolean(politely)] => *politely,
                _ => unknown!(),
            },
        },
        _ => unknown!(),
    }
    tokens
 }
-// Tokenize token
+// Line description e.g. [String, Keyword, Array]
 // Used in parse_command as feedback for invalid commands
 fn describe_line(line: &[Token]) -> String {
    let mut description = "[".to_owned();
    let mut iter = line.iter();
    description.push_str(&format!("{}", iter.next().unwrap().print()));
    for token in iter {
        description.push_str(&format!(", {}", token.print()));
    }
    description.push_str("]");
    description
 }
 fn parse_token(pair: Pair) -> Token {
    let token = pair.as_rule();
    macro_rules! contents {
@ -205,72 +228,6 @@ fn parse_token(pair: Pair) -> Token {
    }
 }
 // ==============================================
 // Step 2 reading
 // converting from Token blocks to Command blocks
 // ==============================================
 // Read file into commands
 fn read_file(file_path: &PathBuf) -> CommandBlock {
    let line_block = {
        let unparsed_file = fs::read_to_string(file_path).expect("cannot find file");
        parse(&unparsed_file)
    };
    read_block(&line_block)
 }
 // Read line block into command block
 fn read_block(block: &LineBlock) -> CommandBlock {
    CommandBlock {
        elements: block.elements.iter().map(|element| match element {
            BlockElement::Command(line) => BlockElement::Command(read_command(&line)),
            BlockElement::Block(block) => BlockElement::Block(read_block(&block)),
        }).collect(),
        ..Default::default()
    }
 }
 // Read token array to command
 fn read_command(line: &Line) -> Command {
    macro_rules! unknown {
        () => {
            panic!("Unknown command {}", describe_line(&line))
        };
    }
    match line.as_slice() {
        [Str(text)] => Say {
            name: None,
            text: text.to_owned(),
        },
        [Str(name), Str(text)] => Say {
            name: Some(name.to_owned()),
            text: text.to_owned(),
        },
        [Keyword(keyword), Str(food), tail @ ..] if keyword.eq("eat") => Eat {
            food: food.to_owned(),
            politely: match tail {
                [Boolean(politely)] => *politely,
                _ => unknown!(),
            },
        },
        _ => unknown!(),
    }
 }
 // Line description e.g. [String, Keyword, Array]
 // Used in parse_command as feedback for invalid commands
 fn describe_line(line: &Line) -> String {
    let mut description = "[".to_owned();
    let mut iter = line.iter();
    description.push_str(&format!("{}", iter.next().unwrap().print()));
    for token in iter {
        description.push_str(&format!(", {}", token.print()));
    }
    description.push_str("]");
    description
 }
 // =====
 // State
 // =====
@ -282,7 +239,7 @@ pub struct State {
 impl State {
    pub fn from_file(file: PathBuf) -> State {
        State {
-            script: read_file(&file),
+            script: parse_file(&file),
        }
    }
--- a/renrs/src/rpy.pest
+++ b/renrs/src/rpy.pest
@ -5,7 +5,7 @@ char = { !NEWLINE ~ ANY }
 // http://pest.rs/book/grammars/syntax.html#atomic
 inner = @{ char* }
-Token = { String | Array | Boolean | Number | Keyword }
+token = { String | Array | Boolean | Number | Keyword }
 // KEYWORDS
 // has to be atomic for no implicit separate (spaces)
@ -28,7 +28,7 @@ String = ${
 // Array
 Array = ${
    "[" ~ "]"
-    | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token)* ~ NEWLINE* ~ "]"
+    | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token)* ~ NEWLINE* ~ "]"
 }
 // BOOLEAN
@ -48,11 +48,11 @@ COMMENT = _{ "#" ~ char* }
 Colon = { ":" }
 // lines are comprised of a statement
-line = @{ (Token ~ whitespace+)* ~ Token ~ Colon? }
+Line = @{ (token ~ whitespace+)* ~ token ~ Colon? }
-file = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI }
+File = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI }
-block = {
+Block = {
    // The first line in the block
    PEEK_ALL ~ PUSH("  "+ | "\t"+) ~ block_content ~
    // Subsequent lines in the block
@ -66,5 +66,5 @@ block = {
 whitespace = _{ " " }
 block_content = _{
-    line ~ (whitespace+ ~ line)* ~ (NEWLINE | EOI) ~ block*
+    Line ~ (whitespace+ ~ Line)* ~ (NEWLINE | EOI) ~ Block*
 }