Simplify parsing

This commit is contained in:
Elnu 2023-05-27 14:20:11 -07:00
parent 6707b97ef5
commit c0c8153cbe
2 changed files with 77 additions and 120 deletions

View file

@ -32,19 +32,15 @@ impl Token {
use Token::*;
// Parsing types
type Line = Vec<Token>;
// Indented command block
#[derive(Debug)]
struct Block<T> {
elements: Vec<BlockElement<T>>,
struct CommandBlock {
elements: Vec<BlockElement>,
next: Option<usize>,
}
impl<T> Block<T> {
fn next(&mut self) -> Option<&T> {
impl CommandBlock {
fn next(&mut self) -> Option<&Command> {
let mut next = match self.next {
Some(next) => next,
None => return None,
@ -80,7 +76,7 @@ impl<T> Block<T> {
}
}
impl<T> Default for Block<T> {
impl Default for CommandBlock {
fn default() -> Self {
Self {
elements: Vec::new(),
@ -90,13 +86,11 @@ impl<T> Default for Block<T> {
}
#[derive(Debug)]
enum BlockElement<T> {
Command(T),
Block(Block<T>),
enum BlockElement {
Command(Command),
Block(CommandBlock),
}
type LineBlock = Block<Line>;
type CommandBlock = Block<Command>;
type Script = CommandBlock;
// Parsed script commands
@ -123,53 +117,82 @@ pub enum Event {
Say { name: Option<String>, text: String },
}
// ==========================================
// Step 1 parsing
// converting from pest pairs to Token blocks
// ==========================================
// ======
// Parser
// ======
// Tokenize raw script string
fn parse(script: &str) -> LineBlock {
let file = RpyParser::parse(Rule::file, script)
type Pair<'a> = pest::iterators::Pair<'a, Rule>;
// Read file into commands
fn parse_file(file_path: &PathBuf) -> CommandBlock {
let unparsed_file = fs::read_to_string(file_path).expect("cannot find file");
parse(&unparsed_file)
}
fn parse(script: &str) -> CommandBlock {
let file = RpyParser::parse(Rule::File, script)
.expect("unsuccessful parse")
.next()
.unwrap();
parse_block(file)
}
type Pair<'a> = pest::iterators::Pair<'a, Rule>;
// Tokenize block
fn parse_block(pair: Pair) -> LineBlock {
let mut block = LineBlock::default();
for element in pair.into_inner() {
block.elements.push(match element.as_rule() {
Rule::block => BlockElement::Block(parse_block(element)),
Rule::line => {
let line = parse_line(element);
// TODO: For some reason a blank final line is always parsed
if line.len() == 0 {
continue;
}
BlockElement::Command(line)
},
Rule::EOI => break, // end
fn parse_block(block: Pair) -> CommandBlock {
CommandBlock {
elements: block.into_inner().filter_map(|pair| match pair.as_rule() {
Rule::Line => Some(BlockElement::Command(parse_command(pair))),
Rule::Block => Some(BlockElement::Block(parse_block(pair))),
Rule::EOI => None, // end
_ => unreachable!(),
});
}).collect(),
..Default::default()
}
block
}
// Tokenize line
fn parse_line(pair: Pair) -> Line {
let mut tokens = Vec::new();
for token in pair.into_inner() {
tokens.push(parse_token(token));
fn parse_command(pair: Pair) -> Command {
use Token::*;
let line: Vec<Token> = pair
.into_inner()
.map(|pair| parse_token(pair))
.collect();
macro_rules! unknown {
() => {
panic!("Unknown command {}", describe_line(&line))
};
}
match line.as_slice() {
[Str(text)] => Say {
name: None,
text: text.to_owned(),
},
[Str(name), Str(text)] => Say {
name: Some(name.to_owned()),
text: text.to_owned(),
},
[Keyword(keyword), Str(food), tail @ ..] if keyword.eq("eat") => Eat {
food: food.to_owned(),
politely: match tail {
[Boolean(politely)] => *politely,
_ => unknown!(),
},
},
_ => unknown!(),
}
tokens
}
// Tokenize token
// Line description e.g. [String, Keyword, Array]
// Used in parse_command as feedback for invalid commands
fn describe_line(line: &[Token]) -> String {
let mut description = "[".to_owned();
let mut iter = line.iter();
description.push_str(&format!("{}", iter.next().unwrap().print()));
for token in iter {
description.push_str(&format!(", {}", token.print()));
}
description.push_str("]");
description
}
fn parse_token(pair: Pair) -> Token {
let token = pair.as_rule();
macro_rules! contents {
@ -205,72 +228,6 @@ fn parse_token(pair: Pair) -> Token {
}
}
// ==============================================
// Step 2 reading
// converting from Token blocks to Command blocks
// ==============================================
// Read file into commands
fn read_file(file_path: &PathBuf) -> CommandBlock {
let line_block = {
let unparsed_file = fs::read_to_string(file_path).expect("cannot find file");
parse(&unparsed_file)
};
read_block(&line_block)
}
// Read line block into command block
fn read_block(block: &LineBlock) -> CommandBlock {
CommandBlock {
elements: block.elements.iter().map(|element| match element {
BlockElement::Command(line) => BlockElement::Command(read_command(&line)),
BlockElement::Block(block) => BlockElement::Block(read_block(&block)),
}).collect(),
..Default::default()
}
}
// Read token array to command
fn read_command(line: &Line) -> Command {
macro_rules! unknown {
() => {
panic!("Unknown command {}", describe_line(&line))
};
}
match line.as_slice() {
[Str(text)] => Say {
name: None,
text: text.to_owned(),
},
[Str(name), Str(text)] => Say {
name: Some(name.to_owned()),
text: text.to_owned(),
},
[Keyword(keyword), Str(food), tail @ ..] if keyword.eq("eat") => Eat {
food: food.to_owned(),
politely: match tail {
[Boolean(politely)] => *politely,
_ => unknown!(),
},
},
_ => unknown!(),
}
}
// Line description e.g. [String, Keyword, Array]
// Used in parse_command as feedback for invalid commands
fn describe_line(line: &Line) -> String {
let mut description = "[".to_owned();
let mut iter = line.iter();
description.push_str(&format!("{}", iter.next().unwrap().print()));
for token in iter {
description.push_str(&format!(", {}", token.print()));
}
description.push_str("]");
description
}
// =====
// State
// =====
@ -282,7 +239,7 @@ pub struct State {
impl State {
pub fn from_file(file: PathBuf) -> State {
State {
script: read_file(&file),
script: parse_file(&file),
}
}

View file

@ -5,7 +5,7 @@ char = { !NEWLINE ~ ANY }
// http://pest.rs/book/grammars/syntax.html#atomic
inner = @{ char* }
Token = { String | Array | Boolean | Number | Keyword }
token = { String | Array | Boolean | Number | Keyword }
// KEYWORDS
// has to be atomic for no implicit separate (spaces)
@ -28,7 +28,7 @@ String = ${
// Array
Array = ${
"[" ~ "]"
| "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ Token)* ~ NEWLINE* ~ "]"
| "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token)* ~ NEWLINE* ~ "]"
}
// BOOLEAN
@ -48,11 +48,11 @@ COMMENT = _{ "#" ~ char* }
Colon = { ":" }
// lines are comprised of a statement
line = @{ (Token ~ whitespace+)* ~ Token ~ Colon? }
Line = @{ (token ~ whitespace+)* ~ token ~ Colon? }
file = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI }
File = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI }
block = {
Block = {
// The first line in the block
PEEK_ALL ~ PUSH(" "+ | "\t"+) ~ block_content ~
// Subsequent lines in the block
@ -66,5 +66,5 @@ block = {
whitespace = _{ " " }
block_content = _{
line ~ (whitespace+ ~ line)* ~ (NEWLINE | EOI) ~ block*
Line ~ (whitespace+ ~ Line)* ~ (NEWLINE | EOI) ~ Block*
}