diff --git a/renrs/src/lib.rs b/renrs/src/lib.rs index 916dd69..4351603 100644 --- a/renrs/src/lib.rs +++ b/renrs/src/lib.rs @@ -32,19 +32,15 @@ impl Token { use Token::*; -// Parsing types - -type Line = Vec; - // Indented command block #[derive(Debug)] -struct Block { - elements: Vec>, +struct CommandBlock { + elements: Vec, next: Option, } -impl Block { - fn next(&mut self) -> Option<&T> { +impl CommandBlock { + fn next(&mut self) -> Option<&Command> { let mut next = match self.next { Some(next) => next, None => return None, @@ -80,7 +76,7 @@ impl Block { } } -impl Default for Block { +impl Default for CommandBlock { fn default() -> Self { Self { elements: Vec::new(), @@ -90,13 +86,11 @@ impl Default for Block { } #[derive(Debug)] -enum BlockElement { - Command(T), - Block(Block), +enum BlockElement { + Command(Command), + Block(CommandBlock), } -type LineBlock = Block; -type CommandBlock = Block; type Script = CommandBlock; // Parsed script commands @@ -123,116 +117,44 @@ pub enum Event { Say { name: Option, text: String }, } -// ========================================== -// Step 1 parsing -// converting from pest pairs to Token blocks -// ========================================== +// ====== +// Parser +// ====== -// Tokenize raw script string -fn parse(script: &str) -> LineBlock { - let file = RpyParser::parse(Rule::file, script) +type Pair<'a> = pest::iterators::Pair<'a, Rule>; + +// Read file into commands +fn parse_file(file_path: &PathBuf) -> CommandBlock { + let unparsed_file = fs::read_to_string(file_path).expect("cannot find file"); + parse(&unparsed_file) +} + +fn parse(script: &str) -> CommandBlock { + let file = RpyParser::parse(Rule::File, script) .expect("unsuccessful parse") .next() .unwrap(); parse_block(file) } -type Pair<'a> = pest::iterators::Pair<'a, Rule>; - -// Tokenize block -fn parse_block(pair: Pair) -> LineBlock { - let mut block = LineBlock::default(); - for element in pair.into_inner() { - block.elements.push(match element.as_rule() { - Rule::block => BlockElement::Block(parse_block(element)), - Rule::line => { - let line = parse_line(element); - // TODO: For some reason a blank final line is always parsed - if line.len() == 0 { - continue; - } - BlockElement::Command(line) - }, - Rule::EOI => break, // end - _ => unreachable!(), - }); - } - block -} - -// Tokenize line -fn parse_line(pair: Pair) -> Line { - let mut tokens = Vec::new(); - for token in pair.into_inner() { - tokens.push(parse_token(token)); - } - tokens -} - -// Tokenize token -fn parse_token(pair: Pair) -> Token { - let token = pair.as_rule(); - macro_rules! contents { - () => { - pair.into_inner().next().unwrap() - }; - } - match token { - Rule::string => { - let contents = contents!(); - Token::Str(match contents.as_rule() { - Rule::single_quote_string_data => contents.as_str().replace("\\'", "'"), - Rule::double_quote_string_data => contents.as_str().replace("\\\"", "\""), - _ => unreachable!(), - }) - } - Rule::array => { - let contents = contents!(); - let mut array = Vec::new(); - for token in contents.into_inner() { - array.push(parse_token(token)); - } - Token::Array(array) - } - Rule::boolean => Token::Boolean(match pair.as_str() { - "True" => true, - "False" => false, - _ => unreachable!(), - }), - Rule::number => Token::Number(pair.as_str().parse().unwrap()), - Rule::keyword => Token::Keyword(pair.as_str().to_owned()), - __ => unreachable!(), - } -} - - -// ============================================== -// Step 2 reading -// converting from Token blocks to Command blocks -// ============================================== - -// Read file into commands -fn read_file(file_path: &PathBuf) -> CommandBlock { - let line_block = { - let unparsed_file = fs::read_to_string(file_path).expect("cannot find file"); - parse(&unparsed_file) - }; - read_block(&line_block) -} - -// Read line block into command block -fn read_block(block: &LineBlock) -> CommandBlock { +fn parse_block(block: Pair) -> CommandBlock { CommandBlock { - elements: block.elements.iter().map(|element| match element { - BlockElement::Command(line) => BlockElement::Command(read_command(&line)), - BlockElement::Block(block) => BlockElement::Block(read_block(&block)), + elements: block.into_inner().filter_map(|pair| match pair.as_rule() { + Rule::Line => Some(BlockElement::Command(parse_command(pair))), + Rule::Block => Some(BlockElement::Block(parse_block(pair))), + Rule::EOI => None, // end + _ => unreachable!(), }).collect(), ..Default::default() } } -// Read token array to command -fn read_command(line: &Line) -> Command { +fn parse_command(pair: Pair) -> Command { + use Token::*; + let line: Vec = pair + .into_inner() + .map(|pair| parse_token(pair)) + .collect(); macro_rules! unknown { () => { panic!("Unknown command {}", describe_line(&line)) @@ -260,7 +182,7 @@ fn read_command(line: &Line) -> Command { // Line description e.g. [String, Keyword, Array] // Used in parse_command as feedback for invalid commands -fn describe_line(line: &Line) -> String { +fn describe_line(line: &[Token]) -> String { let mut description = "[".to_owned(); let mut iter = line.iter(); description.push_str(&format!("{}", iter.next().unwrap().print())); @@ -271,6 +193,41 @@ fn describe_line(line: &Line) -> String { description } +fn parse_token(pair: Pair) -> Token { + let token = pair.as_rule(); + macro_rules! contents { + () => { + pair.into_inner().next().unwrap() + }; + } + match token { + Rule::String => { + let contents = contents!(); + Token::Str(match contents.as_rule() { + Rule::SingleQuoteStringData => contents.as_str().replace("\\'", "'"), + Rule::DoubleQuoteStringData => contents.as_str().replace("\\\"", "\""), + _ => unreachable!(), + }) + } + Rule::Array => { + let contents = contents!(); + let mut array = Vec::new(); + for token in contents.into_inner() { + array.push(parse_token(token)); + } + Token::Array(array) + } + Rule::Boolean => Token::Boolean(match pair.as_str() { + "True" => true, + "False" => false, + _ => unreachable!(), + }), + Rule::Number => Token::Number(pair.as_str().parse().unwrap()), + Rule::Keyword => Token::Keyword(pair.as_str().to_owned()), + __ => unreachable!(), + } +} + // ===== // State // ===== @@ -282,7 +239,7 @@ pub struct State { impl State { pub fn from_file(file: PathBuf) -> State { State { - script: read_file(&file), + script: parse_file(&file), } } diff --git a/renrs/src/rpy.pest b/renrs/src/rpy.pest index 0834d09..b50e52f 100644 --- a/renrs/src/rpy.pest +++ b/renrs/src/rpy.pest @@ -1,56 +1,58 @@ // characters are anything but newlines char = { !NEWLINE ~ ANY } -// token definition +// Token definition // http://pest.rs/book/grammars/syntax.html#atomic inner = @{ char* } -token = { string | array | boolean | number | keyword } +token = { String | Array | Boolean | Number | Keyword } // KEYWORDS // has to be atomic for no implicit separate (spaces) -keyword = ${ (!(whitespace | NEWLINE) ~ ANY)+ } +Keyword = ${ (!(whitespace | NEWLINE) ~ ANY)+ } // STRING -single_quote_string_data = @{ ( +SingleQuoteStringData = @{ ( "\\'" // Escaped single quotes | (!"'" ~ ANY) )* } -double_quote_string_data = @{ ( +DoubleQuoteStringData = @{ ( "\\\"" // Escaped double quotes | (!"\"" ~ ANY) )* } -string = ${ - ("'" ~ single_quote_string_data ~ "'") - | ("\"" ~ double_quote_string_data ~ "\"") +String = ${ + ("'" ~ SingleQuoteStringData ~ "'") + | ("\"" ~ DoubleQuoteStringData ~ "\"") } -// ARRAY -array = ${ +// Array +Array = ${ "[" ~ "]" | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token)* ~ NEWLINE* ~ "]" } // BOOLEAN -boolean = ${ "True" | "False" } +Boolean = ${ "True" | "False" } // NUMBER -number = @{ +Number = @{ "-"? ~ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) ~ ("." ~ ASCII_DIGIT*)? } // comments are a # followed by -// any number of non-newline characters +// any Number of non-newline characters COMMENT = _{ "#" ~ char* } +Colon = { ":" } + // lines are comprised of a statement -line = @{ (token ~ whitespace+)* ~ token } +Line = @{ (token ~ whitespace+)* ~ token ~ Colon? } -file = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI } +File = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI } -block = { +Block = { // The first line in the block PEEK_ALL ~ PUSH(" "+ | "\t"+) ~ block_content ~ // Subsequent lines in the block @@ -64,5 +66,5 @@ block = { whitespace = _{ " " } block_content = _{ - line ~ (whitespace+ ~ line)* ~ (NEWLINE | EOI) ~ block* + Line ~ (whitespace+ ~ Line)* ~ (NEWLINE | EOI) ~ Block* } \ No newline at end of file