Compare commits

..

No commits in common. "c0c8153cbe26693699fb89516cac05d22c15d9ae" and "f178baf1e64b64b8c8e54c1af9f39c8621137fc4" have entirely different histories.

2 changed files with 130 additions and 89 deletions

View file

@ -32,15 +32,19 @@ impl Token {
use Token::*; use Token::*;
// Parsing types
type Line = Vec<Token>;
// Indented command block // Indented command block
#[derive(Debug)] #[derive(Debug)]
struct CommandBlock { struct Block<T> {
elements: Vec<BlockElement>, elements: Vec<BlockElement<T>>,
next: Option<usize>, next: Option<usize>,
} }
impl CommandBlock { impl<T> Block<T> {
fn next(&mut self) -> Option<&Command> { fn next(&mut self) -> Option<&T> {
let mut next = match self.next { let mut next = match self.next {
Some(next) => next, Some(next) => next,
None => return None, None => return None,
@ -76,7 +80,7 @@ impl CommandBlock {
} }
} }
impl Default for CommandBlock { impl<T> Default for Block<T> {
fn default() -> Self { fn default() -> Self {
Self { Self {
elements: Vec::new(), elements: Vec::new(),
@ -86,11 +90,13 @@ impl Default for CommandBlock {
} }
#[derive(Debug)] #[derive(Debug)]
enum BlockElement { enum BlockElement<T> {
Command(Command), Command(T),
Block(CommandBlock), Block(Block<T>),
} }
type LineBlock = Block<Line>;
type CommandBlock = Block<Command>;
type Script = CommandBlock; type Script = CommandBlock;
// Parsed script commands // Parsed script commands
@ -117,44 +123,116 @@ pub enum Event {
Say { name: Option<String>, text: String }, Say { name: Option<String>, text: String },
} }
// ====== // ==========================================
// Parser // Step 1 parsing
// ====== // converting from pest pairs to Token blocks
// ==========================================
type Pair<'a> = pest::iterators::Pair<'a, Rule>; // Tokenize raw script string
fn parse(script: &str) -> LineBlock {
// Read file into commands let file = RpyParser::parse(Rule::file, script)
fn parse_file(file_path: &PathBuf) -> CommandBlock {
let unparsed_file = fs::read_to_string(file_path).expect("cannot find file");
parse(&unparsed_file)
}
fn parse(script: &str) -> CommandBlock {
let file = RpyParser::parse(Rule::File, script)
.expect("unsuccessful parse") .expect("unsuccessful parse")
.next() .next()
.unwrap(); .unwrap();
parse_block(file) parse_block(file)
} }
fn parse_block(block: Pair) -> CommandBlock { type Pair<'a> = pest::iterators::Pair<'a, Rule>;
CommandBlock {
elements: block.into_inner().filter_map(|pair| match pair.as_rule() { // Tokenize block
Rule::Line => Some(BlockElement::Command(parse_command(pair))), fn parse_block(pair: Pair) -> LineBlock {
Rule::Block => Some(BlockElement::Block(parse_block(pair))), let mut block = LineBlock::default();
Rule::EOI => None, // end for element in pair.into_inner() {
block.elements.push(match element.as_rule() {
Rule::block => BlockElement::Block(parse_block(element)),
Rule::line => {
let line = parse_line(element);
// TODO: For some reason a blank final line is always parsed
if line.len() == 0 {
continue;
}
BlockElement::Command(line)
},
Rule::EOI => break, // end
_ => unreachable!(), _ => unreachable!(),
});
}
block
}
// Tokenize line
fn parse_line(pair: Pair) -> Line {
let mut tokens = Vec::new();
for token in pair.into_inner() {
tokens.push(parse_token(token));
}
tokens
}
// Tokenize token
fn parse_token(pair: Pair) -> Token {
let token = pair.as_rule();
macro_rules! contents {
() => {
pair.into_inner().next().unwrap()
};
}
match token {
Rule::string => {
let contents = contents!();
Token::Str(match contents.as_rule() {
Rule::single_quote_string_data => contents.as_str().replace("\\'", "'"),
Rule::double_quote_string_data => contents.as_str().replace("\\\"", "\""),
_ => unreachable!(),
})
}
Rule::array => {
let contents = contents!();
let mut array = Vec::new();
for token in contents.into_inner() {
array.push(parse_token(token));
}
Token::Array(array)
}
Rule::boolean => Token::Boolean(match pair.as_str() {
"True" => true,
"False" => false,
_ => unreachable!(),
}),
Rule::number => Token::Number(pair.as_str().parse().unwrap()),
Rule::keyword => Token::Keyword(pair.as_str().to_owned()),
__ => unreachable!(),
}
}
// ==============================================
// Step 2 reading
// converting from Token blocks to Command blocks
// ==============================================
// Read file into commands
fn read_file(file_path: &PathBuf) -> CommandBlock {
let line_block = {
let unparsed_file = fs::read_to_string(file_path).expect("cannot find file");
parse(&unparsed_file)
};
read_block(&line_block)
}
// Read line block into command block
fn read_block(block: &LineBlock) -> CommandBlock {
CommandBlock {
elements: block.elements.iter().map(|element| match element {
BlockElement::Command(line) => BlockElement::Command(read_command(&line)),
BlockElement::Block(block) => BlockElement::Block(read_block(&block)),
}).collect(), }).collect(),
..Default::default() ..Default::default()
} }
} }
fn parse_command(pair: Pair) -> Command { // Read token array to command
use Token::*; fn read_command(line: &Line) -> Command {
let line: Vec<Token> = pair
.into_inner()
.map(|pair| parse_token(pair))
.collect();
macro_rules! unknown { macro_rules! unknown {
() => { () => {
panic!("Unknown command {}", describe_line(&line)) panic!("Unknown command {}", describe_line(&line))
@ -182,7 +260,7 @@ fn parse_command(pair: Pair) -> Command {
// Line description e.g. [String, Keyword, Array] // Line description e.g. [String, Keyword, Array]
// Used in parse_command as feedback for invalid commands // Used in parse_command as feedback for invalid commands
fn describe_line(line: &[Token]) -> String { fn describe_line(line: &Line) -> String {
let mut description = "[".to_owned(); let mut description = "[".to_owned();
let mut iter = line.iter(); let mut iter = line.iter();
description.push_str(&format!("{}", iter.next().unwrap().print())); description.push_str(&format!("{}", iter.next().unwrap().print()));
@ -193,41 +271,6 @@ fn describe_line(line: &[Token]) -> String {
description description
} }
fn parse_token(pair: Pair) -> Token {
let token = pair.as_rule();
macro_rules! contents {
() => {
pair.into_inner().next().unwrap()
};
}
match token {
Rule::String => {
let contents = contents!();
Token::Str(match contents.as_rule() {
Rule::SingleQuoteStringData => contents.as_str().replace("\\'", "'"),
Rule::DoubleQuoteStringData => contents.as_str().replace("\\\"", "\""),
_ => unreachable!(),
})
}
Rule::Array => {
let contents = contents!();
let mut array = Vec::new();
for token in contents.into_inner() {
array.push(parse_token(token));
}
Token::Array(array)
}
Rule::Boolean => Token::Boolean(match pair.as_str() {
"True" => true,
"False" => false,
_ => unreachable!(),
}),
Rule::Number => Token::Number(pair.as_str().parse().unwrap()),
Rule::Keyword => Token::Keyword(pair.as_str().to_owned()),
__ => unreachable!(),
}
}
// ===== // =====
// State // State
// ===== // =====
@ -239,7 +282,7 @@ pub struct State {
impl State { impl State {
pub fn from_file(file: PathBuf) -> State { pub fn from_file(file: PathBuf) -> State {
State { State {
script: parse_file(&file), script: read_file(&file),
} }
} }

View file

@ -1,58 +1,56 @@
// characters are anything but newlines // characters are anything but newlines
char = { !NEWLINE ~ ANY } char = { !NEWLINE ~ ANY }
// Token definition // token definition
// http://pest.rs/book/grammars/syntax.html#atomic // http://pest.rs/book/grammars/syntax.html#atomic
inner = @{ char* } inner = @{ char* }
token = { String | Array | Boolean | Number | Keyword } token = { string | array | boolean | number | keyword }
// KEYWORDS // KEYWORDS
// has to be atomic for no implicit separate (spaces) // has to be atomic for no implicit separate (spaces)
Keyword = ${ (!(whitespace | NEWLINE) ~ ANY)+ } keyword = ${ (!(whitespace | NEWLINE) ~ ANY)+ }
// STRING // STRING
SingleQuoteStringData = @{ ( single_quote_string_data = @{ (
"\\'" // Escaped single quotes "\\'" // Escaped single quotes
| (!"'" ~ ANY) | (!"'" ~ ANY)
)* } )* }
DoubleQuoteStringData = @{ ( double_quote_string_data = @{ (
"\\\"" // Escaped double quotes "\\\"" // Escaped double quotes
| (!"\"" ~ ANY) | (!"\"" ~ ANY)
)* } )* }
String = ${ string = ${
("'" ~ SingleQuoteStringData ~ "'") ("'" ~ single_quote_string_data ~ "'")
| ("\"" ~ DoubleQuoteStringData ~ "\"") | ("\"" ~ double_quote_string_data ~ "\"")
} }
// Array // ARRAY
Array = ${ array = ${
"[" ~ "]" "[" ~ "]"
| "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token)* ~ NEWLINE* ~ "]" | "[" ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token ~ ("," ~ whitespace* ~ NEWLINE* ~ whitespace* ~ token)* ~ NEWLINE* ~ "]"
} }
// BOOLEAN // BOOLEAN
Boolean = ${ "True" | "False" } boolean = ${ "True" | "False" }
// NUMBER // NUMBER
Number = @{ number = @{
"-"? "-"?
~ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*) ~ ("0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT*)
~ ("." ~ ASCII_DIGIT*)? ~ ("." ~ ASCII_DIGIT*)?
} }
// comments are a # followed by // comments are a # followed by
// any Number of non-newline characters // any number of non-newline characters
COMMENT = _{ "#" ~ char* } COMMENT = _{ "#" ~ char* }
Colon = { ":" }
// lines are comprised of a statement // lines are comprised of a statement
Line = @{ (token ~ whitespace+)* ~ token ~ Colon? } line = @{ (token ~ whitespace+)* ~ token }
File = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI } file = { SOI ~ NEWLINE* ~ block_content* ~ NEWLINE* ~ EOI }
Block = { block = {
// The first line in the block // The first line in the block
PEEK_ALL ~ PUSH(" "+ | "\t"+) ~ block_content ~ PEEK_ALL ~ PUSH(" "+ | "\t"+) ~ block_content ~
// Subsequent lines in the block // Subsequent lines in the block
@ -66,5 +64,5 @@ Block = {
whitespace = _{ " " } whitespace = _{ " " }
block_content = _{ block_content = _{
Line ~ (whitespace+ ~ Line)* ~ (NEWLINE | EOI) ~ Block* line ~ (whitespace+ ~ line)* ~ (NEWLINE | EOI) ~ block*
} }