From a37c4c0f9520050dd2615ab232efb8769be6e4cd Mon Sep 17 00:00:00 2001 From: Yehowshua Immanuel Date: Fri, 17 Jun 2022 18:16:51 -0400 Subject: [PATCH] now using ParseResult as parser return type exclusively --- README.md | 6 ++- src/vcd/parse.rs | 138 +++++++++++++++++++++++++---------------------- 2 files changed, 78 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index 02e147d..641c3da 100644 --- a/README.md +++ b/README.md @@ -26,14 +26,16 @@ The first build of the program may take some time. # TODO - [x] We need a way to merge lines. + - [ ] We need to start regression testing the parser over all files + - [ ] Decide if I want to return option types + - [ ] Propagate all to question mark unwrap types. + - [ ] Don't want variation in hh:mm:ss - [ ] Consolidate error messages and add cursors. - [ ] Consider what to do with don't care values will probably just convert them to strings for now. - - [ ] Test for speed and see if stream of bytes is helpful - [ ] Split ``parse.rs``. It's getting too large. - [ ] Include line and possible column numbers - [ ] Change states to lowercase - - [ ] We need to start regression testing the parser over all files - [ ] Take a look at GTKWave parser to compare effificiency. - [ ] Send survey to community channel. diff --git a/src/vcd/parse.rs b/src/vcd/parse.rs index f4d2755..750b777 100644 --- a/src/vcd/parse.rs +++ b/src/vcd/parse.rs @@ -6,6 +6,8 @@ use ::function_name::named; #[derive(Debug)] pub struct Residual<'a>(&'a str); +#[derive(Debug)] +pub struct ParseResult<'a> {matched : &'a str, residual : &'a str} pub fn digit(chr : u8) -> bool { let zero = b'0' as u8; @@ -16,27 +18,30 @@ pub fn digit(chr : u8) -> bool { return between_zero_and_nine } -pub fn take_until<'a>(word : &'a str, pattern : u8) -> Option<(&'a str, Residual)> { +pub fn take_until<'a>(word : &'a str, pattern : u8) -> ParseResult<'a> { let mut new_start = 0; for chr in word.as_bytes() { if (*chr == pattern) { - return Some((&word[0..new_start], Residual(&word[new_start+1..]))); + break } else { new_start += 1; } } - None + return + ParseResult{ + matched : &word[0..new_start], + residual : &word[new_start..] + }; + } -pub fn take_while<'a>(word : &'a str, cond : fn(u8) -> bool) -> (&'a str, Residual) { +pub fn take_while<'a>(word : &'a str, cond : fn(u8) -> bool) -> ParseResult<'a> { let mut new_start = 0; - dbg!(word); for chr in word.as_bytes() { - dbg!(&chr); if (cond(*chr)) { new_start += 1; } @@ -45,11 +50,15 @@ pub fn take_while<'a>(word : &'a str, cond : fn(u8) -> bool) -> (&'a str, Residu } } - return (&word[0..new_start], Residual(&word[new_start..])); + return + ParseResult{ + matched : &word[0..new_start], + residual : &word[new_start..] + }; } -fn tag<'a>(word : &'a str, pattern : &'a str) -> Option<&'a str> { +fn tag<'a>(word : &'a str, pattern : &'a str) -> ParseResult<'a> { let lhs = word.as_bytes().iter(); let rhs = pattern.as_bytes(); let iter = lhs.zip(rhs); @@ -58,11 +67,44 @@ fn tag<'a>(word : &'a str, pattern : &'a str) -> Option<&'a str> { let mut res = true; for (c_lhs, c_rhs) in iter { res = res && (c_lhs == c_rhs); - if !res {return None} + if !res {break} new_start += 1; } - Some(&word[new_start..]) + return + ParseResult{ + matched : &word[0..new_start], + residual : &word[new_start..] + }; +} + +impl<'a> ParseResult<'a> { + fn match_not_empty(& self) -> Result<(), String> { + if self.matched == "" { + return Err("failed".to_string()) + } + else { + return Ok(()) + } + } + + fn assert_match(& self) -> Result<&str, String> { + if self.matched == "" { + return Err("no match".to_string()) + } + else { + return Ok(self.matched) + } + } + + fn assert_residual(& self) -> Result<&str, String> { + if self.residual == "" { + return Err("no residual".to_string()) + } + else { + return Ok(self.residual) + } + } } #[named] @@ -133,8 +175,9 @@ fn parse_date( // get hour let (word, cursor) = word_and_ctx4; - let (hh, Residual(remainder)) = take_until(word, b':').ok_or("did not find colon")?; - let hh : u8 = hh.to_string() + let res = take_until(word, b':'); + res.assert_match()?; + let hh : u8 = res.matched.to_string() .parse() .map_err(|_| "failed to parse".to_string())?; @@ -146,8 +189,10 @@ fn parse_date( } // get minute - let (mm, Residual(remainder)) = take_until(remainder, b':').ok_or("did not find colon")?; - let mm : u8 = mm.to_string() + let word = &res.residual[1..]; // chop off colon which is at index 0 + let res = take_until(word, b':'); + res.assert_match()?; + let mm : u8 = res.matched.to_string() .parse() .map_err(|_| "failed to parse".to_string())?; @@ -160,7 +205,9 @@ fn parse_date( // get second // let ss : u8 = remainder.to_string().parse().unwrap(); - let ss : u8 = remainder.to_string() + res.assert_residual()?; + let residual = &res.residual[1..]; // chop of colon which is at index 0 + let ss : u8 = residual.to_string() .parse() .map_err(|_| "failed to parse".to_string())?; @@ -187,36 +234,6 @@ fn parse_date( return Ok(full_date.unwrap()) } - let full_date = format!("{day} {month} {date} {hh}:{ss}:{mm} {year}"); - let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y"); - if full_date.is_ok() { - return Ok(full_date.unwrap()) - } - - let full_date = format!("{day} {month} {date} {mm}:{hh}:{ss} {year}"); - let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y"); - if full_date.is_ok() { - return Ok(full_date.unwrap()) - } - - let full_date = format!("{day} {month} {date} {mm}:{ss}:{hh} {year}"); - let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y"); - if full_date.is_ok() { - return Ok(full_date.unwrap()) - } - - let full_date = format!("{day} {month} {date} {ss}:{mm}:{hh} {year}"); - let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y"); - if full_date.is_ok() { - return Ok(full_date.unwrap()) - } - - let full_date = format!("{day} {month} {date} {ss}:{hh}:{mm} {year}"); - let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y"); - if full_date.is_ok() { - return Ok(full_date.unwrap()) - } - Err("failed to parse date".to_string()) } @@ -257,15 +274,14 @@ fn parse_timescale(word_reader : &mut WordReader) -> Result<(Option, Timesc // first get timescale let (word, cursor) = word_reader.next_word().ok_or(&err_msg)?; let word = word.to_string(); - dbg!(&word); - let (scalar, Residual(residual)) = take_while(word.as_str(), digit); + let ParseResult{matched, residual} = take_while(word.as_str(), digit); + let scalar = matched; let scalar : u32 = scalar.to_string().parse() .map_err(|_| &err_msg)?; let timescale = { if residual == "" { - dbg!("parse_timescale"); let (word, cursor) = word_reader.next_word().ok_or(&err_msg)?; let unit = match word { "ps" => {Ok(Timescale::ps)} @@ -294,7 +310,7 @@ fn parse_timescale(word_reader : &mut WordReader) -> Result<(Option, Timesc // then check for the `$end` keyword let (end, cursor) = word_reader.next_word().ok_or(&err_msg)?; - tag(end, "$end").ok_or(&err_msg)?; + tag(end, "$end").match_not_empty()?; return Ok(timescale); @@ -303,6 +319,8 @@ fn parse_timescale(word_reader : &mut WordReader) -> Result<(Option, Timesc #[named] fn parse_metadata(word_reader : &mut WordReader) -> Result { + let err_msg = format!("reached end of file without parser leaving {}", function_name!()); + let mut metadata = Metadata { date : None, version : None, @@ -311,20 +329,13 @@ fn parse_metadata(word_reader : &mut WordReader) -> Result { loop { // check for another word in the file - let word = word_reader.next_word(); + let (word, cursor) = word_reader.next_word().ok_or(&err_msg)?; - // if there isn't another word left in the file, then we exit - if word.is_none() { - return Err(format!("reached end of file without parser leaving {}", function_name!())) - } - - // destructure - let (word, cursor) = word.unwrap(); - - match tag(word, "$") { + let ParseResult{matched, residual} = tag(word, "$"); + match matched { // we hope that this word stars with a `$` - Some(ident) => { - match ident { + "$" => { + match residual { "date" => { let err_msg = format!("reached end of file without parser leaving {}", function_name!()); // a date is typically composed of the 5 following words which can @@ -400,7 +411,6 @@ fn parse_metadata(word_reader : &mut WordReader) -> Result { } } "timescale" => { - dbg!("here"); let timescale = parse_timescale(word_reader); if timescale.is_ok() { metadata.timescale = timescale.unwrap(); @@ -413,8 +423,8 @@ fn parse_metadata(word_reader : &mut WordReader) -> Result { _ => {} } } - // if not, then we keep looping - None => {} + // if word does not start with `$`, then we keep looping + _ => {} } }