From 3794a1d607e945fda11c85746fd658a08dee5207 Mon Sep 17 00:00:00 2001 From: ElnuDev Date: Sat, 20 May 2023 13:00:23 -0700 Subject: [PATCH 1/7] Implement naive arrays, better debugging --- demo/demo.rpy | 1 + src/lib.rs | 28 ++++++++++++++++++++++++++++ src/rpy.pest | 7 ++++++- 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/demo/demo.rpy b/demo/demo.rpy index f41016c..d39ef89 100644 --- a/demo/demo.rpy +++ b/demo/demo.rpy @@ -8,5 +8,6 @@ multiple lines" 'this is a single quote string' 'this also has escaped \'quotes\'' this is cool # comment +[ "this", "is", "an", "array" ] huh \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index d6fb698..5ca3dba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,6 +7,28 @@ use pest_derive::Parser; #[grammar = "rpy.pest"] struct RpyParser; +fn describe_token(pair: pest::iterators::Pair) { + let token = pair.as_rule(); + match token { + Rule::token => {}, + _ => panic!("Not a token!"), + }; + let contents = pair.into_inner().next().unwrap(); + let contents_rule = contents.as_rule(); + let str = match contents_rule { + Rule::string => { + let data = contents.into_inner().next().unwrap(); + match data.as_rule() { + Rule::single_quote_string_data => data.as_str().replace("\\'", "'"), + Rule::double_quote_string_data => data.as_str().replace("\\\"", "\""), + _ => unreachable!(), + } + }, + _ => contents.into_inner().as_str().to_owned(), + }; + println!("{:?}: {}", contents_rule, str); +} + pub fn parse(file_path: &str) { let unparsed_file = fs::read_to_string(file_path).expect("cannot find file"); let file = RpyParser::parse(Rule::file, &unparsed_file) @@ -30,6 +52,12 @@ pub fn parse(file_path: &str) { _ => unreachable!(), }); }, + Rule::array => { + println!("Array:"); + for element in token.into_inner() { + describe_token(element); + } + } Rule::keyword => println!("keyword: {}", token.as_str()), _ => unreachable!(), }; diff --git a/src/rpy.pest b/src/rpy.pest index 6ffbdbf..b7b5bf7 100644 --- a/src/rpy.pest +++ b/src/rpy.pest @@ -8,11 +8,13 @@ char = { !NEWLINE ~ ANY } // http://pest.rs/book/grammars/syntax.html#atomic inner = @{ char* } -token = { string | keyword } +token = { string | array | keyword } +// KEYWORDS // has to be atomic for no implicit separate (spaces) keyword = @{ (!(WHITESPACE | NEWLINE) ~ ANY)+ } +// STRING single_quote_string_data = @{ ( "\\'" // Escaped single quotes | (!"'" ~ ANY) @@ -26,6 +28,9 @@ string = ${ | ("\"" ~ double_quote_string_data ~ "\"") } +// ARRAY +array = { "[" ~ token ~ ("," ~ token)* ~ "]"} + // comments are a # followed by // any number of non-newline characters COMMENT = _{ "#" ~ char* } From 5442a8fc8770a7389fda83e76c7b5850fbbdb266 Mon Sep 17 00:00:00 2001 From: ElnuDev Date: Sat, 20 May 2023 13:19:51 -0700 Subject: [PATCH 2/7] Multiline arrays, better debugging --- demo/demo.rpy | 9 ++++++++- src/lib.rs | 36 +++++++++--------------------------- src/rpy.pest | 5 ++++- 3 files changed, 21 insertions(+), 29 deletions(-) diff --git a/demo/demo.rpy b/demo/demo.rpy index d39ef89..6d2ff52 100644 --- a/demo/demo.rpy +++ b/demo/demo.rpy @@ -8,6 +8,13 @@ multiple lines" 'this is a single quote string' 'this also has escaped \'quotes\'' this is cool # comment -[ "this", "is", "an", "array" ] +any empty array [] +[ + "this", # test + "is", + "an", + "array" +] +["this","is","an","array"] huh \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 5ca3dba..0f42168 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,7 +24,14 @@ fn describe_token(pair: pest::iterators::Pair) { _ => unreachable!(), } }, - _ => contents.into_inner().as_str().to_owned(), + Rule::array => { + println!("array: Start array"); + for token in contents.into_inner() { + describe_token(token); + } + "End array".to_string() + } + _ => contents.as_str().to_owned(), }; println!("{:?}: {}", contents_rule, str); } @@ -37,33 +44,8 @@ pub fn parse(file_path: &str) { for line in file.into_inner() { match line.as_rule() { Rule::line => { - println!("Line:"); for token in line.into_inner() { - match token.as_rule() { - Rule::token => { - let token = token.into_inner().next().unwrap(); - match token.as_rule() { - Rule::string => { - let string_data = token.into_inner().next().unwrap(); - let str = string_data.as_str(); - println!("string: {}", match string_data.as_rule() { - Rule::single_quote_string_data => str.replace("\\'", "'"), - Rule::double_quote_string_data => str.replace("\\\"", "\""), - _ => unreachable!(), - }); - }, - Rule::array => { - println!("Array:"); - for element in token.into_inner() { - describe_token(element); - } - } - Rule::keyword => println!("keyword: {}", token.as_str()), - _ => unreachable!(), - }; - }, - _ => unreachable!(), - } + describe_token(token); } println!() }, diff --git a/src/rpy.pest b/src/rpy.pest index b7b5bf7..060253c 100644 --- a/src/rpy.pest +++ b/src/rpy.pest @@ -29,7 +29,10 @@ string = ${ } // ARRAY -array = { "[" ~ token ~ ("," ~ token)* ~ "]"} +array = { + "[" ~ "]" + | "[" ~ NEWLINE* ~ token ~ ("," ~ NEWLINE* ~ token)* ~ NEWLINE* ~ "]" +} // comments are a # followed by // any number of non-newline characters From d957816dede5f001de03ea90800cd83bb3929e5a Mon Sep 17 00:00:00 2001 From: ElnuDev Date: Sat, 20 May 2023 17:37:53 -0700 Subject: [PATCH 3/7] Convert to internal tokens --- demo/src/main.rs | 5 +++- src/lib.rs | 70 +++++++++++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 29 deletions(-) diff --git a/demo/src/main.rs b/demo/src/main.rs index 4e2d100..8045c4e 100644 --- a/demo/src/main.rs +++ b/demo/src/main.rs @@ -1,5 +1,8 @@ use renrs; fn main() { - renrs::parse("demo.rpy"); + let tokens = renrs::parse_file("demo.rpy"); + for token in tokens { + println!("{:?}", token); + } } diff --git a/src/lib.rs b/src/lib.rs index 0f42168..0dfeb54 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,7 +7,35 @@ use pest_derive::Parser; #[grammar = "rpy.pest"] struct RpyParser; -fn describe_token(pair: pest::iterators::Pair) { +#[derive(Debug)] +pub enum Token { + Keyword(String), + String(String), + Array(Vec), +} + +pub fn parse(script: &str) -> Vec> { + let file = RpyParser::parse(Rule::file, script) + .expect("unsuccessful parse") + .next().unwrap(); + let mut lines = Vec::new(); + for line in file.into_inner() { + let mut tokens = Vec::new(); + match line.as_rule() { + Rule::line => { + for token in line.into_inner() { + tokens.push(parse_token(token)); + } + }, + Rule::EOI => (), + _ => unreachable!(), + } + lines.push(tokens); + } + lines +} + +fn parse_token(pair: pest::iterators::Pair) -> Token { let token = pair.as_rule(); match token { Rule::token => {}, @@ -15,42 +43,28 @@ fn describe_token(pair: pest::iterators::Pair) { }; let contents = pair.into_inner().next().unwrap(); let contents_rule = contents.as_rule(); - let str = match contents_rule { + match contents_rule { Rule::string => { let data = contents.into_inner().next().unwrap(); - match data.as_rule() { + Token::String(match data.as_rule() { Rule::single_quote_string_data => data.as_str().replace("\\'", "'"), Rule::double_quote_string_data => data.as_str().replace("\\\"", "\""), _ => unreachable!(), - } + }) }, Rule::array => { - println!("array: Start array"); + let mut array = Vec::new(); for token in contents.into_inner() { - describe_token(token); + array.push(parse_token(token)); } - "End array".to_string() - } - _ => contents.as_str().to_owned(), - }; - println!("{:?}: {}", contents_rule, str); -} - -pub fn parse(file_path: &str) { - let unparsed_file = fs::read_to_string(file_path).expect("cannot find file"); - let file = RpyParser::parse(Rule::file, &unparsed_file) - .expect("unsuccessful parse") // unwrap the parse result - .next().unwrap(); // get and unwrap the `file` rule; never fails - for line in file.into_inner() { - match line.as_rule() { - Rule::line => { - for token in line.into_inner() { - describe_token(token); - } - println!() - }, - Rule::EOI => (), - _ => unreachable!(), + Token::Array(array) } + Rule::keyword => Token::Keyword(contents.as_str().to_owned()), + __ => unreachable!(), } } + +pub fn parse_file(file_path: &str) -> Vec> { + let unparsed_file = fs::read_to_string(file_path).expect("cannot find file"); + parse(&unparsed_file) +} From 4113accdaa72dca501908736c1bf6ecff3deabbb Mon Sep 17 00:00:00 2001 From: ElnuDev Date: Sat, 20 May 2023 18:35:02 -0700 Subject: [PATCH 4/7] Cover empty file cases, deal with empty final line --- src/lib.rs | 5 ++++- src/rpy.pest | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0dfeb54..0e8fe86 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,7 +30,10 @@ pub fn parse(script: &str) -> Vec> { Rule::EOI => (), _ => unreachable!(), } - lines.push(tokens); + // TODO: For some a blank final line is always parsed + if tokens.len() > 0 { + lines.push(tokens); + } } lines } diff --git a/src/rpy.pest b/src/rpy.pest index 060253c..5600fa6 100644 --- a/src/rpy.pest +++ b/src/rpy.pest @@ -41,4 +41,4 @@ COMMENT = _{ "#" ~ char* } // lines are comprised of a statement line = { token+ } -file = { SOI ~ line ~ (NEWLINE+ ~ line)* ~ NEWLINE* ~ EOI } \ No newline at end of file +file = { SOI ~ (line ~ (NEWLINE+ ~ line)*)? ~ NEWLINE* ~ EOI } \ No newline at end of file From 10db5e959b9d97d190adcbb3d4c708ed553073dd Mon Sep 17 00:00:00 2001 From: ElnuDev Date: Sat, 20 May 2023 19:37:16 -0700 Subject: [PATCH 5/7] Add command parsing --- demo/demo.rpy | 23 ++------------- demo/src/main.rs | 6 ++-- src/lib.rs | 74 ++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 72 insertions(+), 31 deletions(-) diff --git a/demo/demo.rpy b/demo/demo.rpy index 6d2ff52..54b0d05 100644 --- a/demo/demo.rpy +++ b/demo/demo.rpy @@ -1,20 +1,3 @@ -show black amogus # this is a comment -# this is a full line comment -what the heck -"this is a string with a # comment" -"this is a string over -multiple lines" -"this is \"escaped\"" -'this is a single quote string' -'this also has escaped \'quotes\'' -this is cool # comment -any empty array [] -[ - "this", # test - "is", - "an", - "array" -] -["this","is","an","array"] - -huh \ No newline at end of file +"Bob sat on the bench." +"Bob" "Good morning!" +eat "apple" \ No newline at end of file diff --git a/demo/src/main.rs b/demo/src/main.rs index 8045c4e..08a09bf 100644 --- a/demo/src/main.rs +++ b/demo/src/main.rs @@ -1,8 +1,8 @@ use renrs; fn main() { - let tokens = renrs::parse_file("demo.rpy"); - for token in tokens { - println!("{:?}", token); + let commands = renrs::parse_file("demo.rpy"); + for command in commands { + println!("{:?}", command); } } diff --git a/src/lib.rs b/src/lib.rs index 0e8fe86..a459740 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,14 +7,39 @@ use pest_derive::Parser; #[grammar = "rpy.pest"] struct RpyParser; +// Raw script tokens #[derive(Debug)] pub enum Token { Keyword(String), - String(String), + Str(String), Array(Vec), } -pub fn parse(script: &str) -> Vec> { +impl Token { + fn print(&self) -> String { + match &self { + Keyword(keyword) => keyword.to_owned(), + Str(string) => "String".to_owned(), + Array(tokens) => describe_token_array(&tokens), + } + } +} + +use Token::*; + +// Parsed script commands +#[derive(Debug)] +pub enum Command { + Say { + name: Option, + text: String, + } +} + +use Command::*; + +// Tokenize raw script string +fn tokenize(script: &str) -> Vec> { let file = RpyParser::parse(Rule::file, script) .expect("unsuccessful parse") .next().unwrap(); @@ -24,7 +49,7 @@ pub fn parse(script: &str) -> Vec> { match line.as_rule() { Rule::line => { for token in line.into_inner() { - tokens.push(parse_token(token)); + tokens.push(parse_pair(token)); } }, Rule::EOI => (), @@ -38,7 +63,8 @@ pub fn parse(script: &str) -> Vec> { lines } -fn parse_token(pair: pest::iterators::Pair) -> Token { +// Parse raw pest data into Token +fn parse_pair(pair: pest::iterators::Pair) -> Token { let token = pair.as_rule(); match token { Rule::token => {}, @@ -49,7 +75,7 @@ fn parse_token(pair: pest::iterators::Pair) -> Token { match contents_rule { Rule::string => { let data = contents.into_inner().next().unwrap(); - Token::String(match data.as_rule() { + Token::Str(match data.as_rule() { Rule::single_quote_string_data => data.as_str().replace("\\'", "'"), Rule::double_quote_string_data => data.as_str().replace("\\\"", "\""), _ => unreachable!(), @@ -58,7 +84,7 @@ fn parse_token(pair: pest::iterators::Pair) -> Token { Rule::array => { let mut array = Vec::new(); for token in contents.into_inner() { - array.push(parse_token(token)); + array.push(parse_pair(token)); } Token::Array(array) } @@ -67,7 +93,39 @@ fn parse_token(pair: pest::iterators::Pair) -> Token { } } -pub fn parse_file(file_path: &str) -> Vec> { +// Tokenize file +fn tokenize_file(file_path: &str) -> Vec> { let unparsed_file = fs::read_to_string(file_path).expect("cannot find file"); - parse(&unparsed_file) + tokenize(&unparsed_file) +} + +fn describe_token_array(line: &Vec) -> String { + let mut description = "[".to_owned(); + let mut iter = line.iter(); + description.push_str(&format!("{}", iter.next().unwrap().print())); + for token in iter { + description.push_str(&format!(", {}", token.print())); + } + description.push_str("]"); + description +} + +// Parse file into commands +pub fn parse_file(file_path: &str) -> Vec { + let token_lines = tokenize_file(file_path); + let mut commands = Vec::new(); + for line in token_lines { + commands.push(match line.as_slice() { + [Str(text)] => Say { + name: None, + text: text.to_owned() + }, + [Str(name), Str(text)] => Say { + name: Some(name.to_owned()), + text: text.to_owned() + }, + _ => panic!("Unknown command {}", describe_token_array(&line)), + }); + } + commands } From 59e4c0058b779ddef859c7a0089f457085b8cdd2 Mon Sep 17 00:00:00 2001 From: ElnuDev Date: Sat, 20 May 2023 19:51:03 -0700 Subject: [PATCH 6/7] Small changes --- demo/demo.rpy | 2 +- src/lib.rs | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/demo/demo.rpy b/demo/demo.rpy index 54b0d05..69be8ba 100644 --- a/demo/demo.rpy +++ b/demo/demo.rpy @@ -1,3 +1,3 @@ "Bob sat on the bench." "Bob" "Good morning!" -eat "apple" \ No newline at end of file +eat \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index a459740..775617b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,11 +16,11 @@ pub enum Token { } impl Token { - fn print(&self) -> String { + fn print(&self) -> &str { match &self { - Keyword(keyword) => keyword.to_owned(), - Str(string) => "String".to_owned(), - Array(tokens) => describe_token_array(&tokens), + Keyword(keyword) => keyword, + Str(_) => "String", + Array(_) => "Array", } } } @@ -99,7 +99,7 @@ fn tokenize_file(file_path: &str) -> Vec> { tokenize(&unparsed_file) } -fn describe_token_array(line: &Vec) -> String { +fn describe_line(line: &Vec) -> String { let mut description = "[".to_owned(); let mut iter = line.iter(); description.push_str(&format!("{}", iter.next().unwrap().print())); @@ -115,16 +115,17 @@ pub fn parse_file(file_path: &str) -> Vec { let token_lines = tokenize_file(file_path); let mut commands = Vec::new(); for line in token_lines { + println!("{:?}", line); commands.push(match line.as_slice() { [Str(text)] => Say { name: None, - text: text.to_owned() + text: text.to_owned(), }, [Str(name), Str(text)] => Say { name: Some(name.to_owned()), - text: text.to_owned() + text: text.to_owned(), }, - _ => panic!("Unknown command {}", describe_token_array(&line)), + _ => panic!("Unknown command {}", describe_line(&line)), }); } commands From 10e966ef050359d440eb4cc2e59e51bb0e091afe Mon Sep 17 00:00:00 2001 From: ElnuDev Date: Sat, 20 May 2023 19:52:35 -0700 Subject: [PATCH 7/7] Remove unused renrs import --- demo/src/main.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/demo/src/main.rs b/demo/src/main.rs index 08a09bf..4f9e293 100644 --- a/demo/src/main.rs +++ b/demo/src/main.rs @@ -1,5 +1,3 @@ -use renrs; - fn main() { let commands = renrs::parse_file("demo.rpy"); for command in commands {