New parser #2
|
@ -26,14 +26,16 @@ The first build of the program may take some time.
|
|||
|
||||
# TODO
|
||||
- [x] We need a way to merge lines.
|
||||
- [ ] We need to start regression testing the parser over all files
|
||||
- [ ] Decide if I want to return option types
|
||||
- [ ] Propagate all to question mark unwrap types.
|
||||
- [ ] Don't want variation in hh:mm:ss
|
||||
- [ ] Consolidate error messages and add cursors.
|
||||
- [ ] Consider what to do with don't care values
|
||||
will probably just convert them to strings for now.
|
||||
- [ ] Test for speed and see if stream of bytes is helpful
|
||||
- [ ] Split ``parse.rs``. It's getting too large.
|
||||
- [ ] Include line and possible column numbers
|
||||
- [ ] Change states to lowercase
|
||||
- [ ] We need to start regression testing the parser over all files
|
||||
- [ ] Take a look at GTKWave parser to compare effificiency.
|
||||
- [ ] Send survey to community channel.
|
||||
|
||||
|
|
138
src/vcd/parse.rs
138
src/vcd/parse.rs
|
@ -6,6 +6,8 @@ use ::function_name::named;
|
|||
|
||||
#[derive(Debug)]
|
||||
pub struct Residual<'a>(&'a str);
|
||||
#[derive(Debug)]
|
||||
pub struct ParseResult<'a> {matched : &'a str, residual : &'a str}
|
||||
|
||||
pub fn digit(chr : u8) -> bool {
|
||||
let zero = b'0' as u8;
|
||||
|
@ -16,27 +18,30 @@ pub fn digit(chr : u8) -> bool {
|
|||
return between_zero_and_nine
|
||||
}
|
||||
|
||||
pub fn take_until<'a>(word : &'a str, pattern : u8) -> Option<(&'a str, Residual)> {
|
||||
pub fn take_until<'a>(word : &'a str, pattern : u8) -> ParseResult<'a> {
|
||||
let mut new_start = 0;
|
||||
|
||||
for chr in word.as_bytes() {
|
||||
if (*chr == pattern) {
|
||||
return Some((&word[0..new_start], Residual(&word[new_start+1..])));
|
||||
break
|
||||
}
|
||||
else {
|
||||
new_start += 1;
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
return
|
||||
ParseResult{
|
||||
matched : &word[0..new_start],
|
||||
residual : &word[new_start..]
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
pub fn take_while<'a>(word : &'a str, cond : fn(u8) -> bool) -> (&'a str, Residual) {
|
||||
pub fn take_while<'a>(word : &'a str, cond : fn(u8) -> bool) -> ParseResult<'a> {
|
||||
let mut new_start = 0;
|
||||
dbg!(word);
|
||||
|
||||
for chr in word.as_bytes() {
|
||||
dbg!(&chr);
|
||||
if (cond(*chr)) {
|
||||
new_start += 1;
|
||||
}
|
||||
|
@ -45,11 +50,15 @@ pub fn take_while<'a>(word : &'a str, cond : fn(u8) -> bool) -> (&'a str, Residu
|
|||
}
|
||||
}
|
||||
|
||||
return (&word[0..new_start], Residual(&word[new_start..]));
|
||||
return
|
||||
ParseResult{
|
||||
matched : &word[0..new_start],
|
||||
residual : &word[new_start..]
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
fn tag<'a>(word : &'a str, pattern : &'a str) -> Option<&'a str> {
|
||||
fn tag<'a>(word : &'a str, pattern : &'a str) -> ParseResult<'a> {
|
||||
let lhs = word.as_bytes().iter();
|
||||
let rhs = pattern.as_bytes();
|
||||
let iter = lhs.zip(rhs);
|
||||
|
@ -58,11 +67,44 @@ fn tag<'a>(word : &'a str, pattern : &'a str) -> Option<&'a str> {
|
|||
let mut res = true;
|
||||
for (c_lhs, c_rhs) in iter {
|
||||
res = res && (c_lhs == c_rhs);
|
||||
if !res {return None}
|
||||
if !res {break}
|
||||
new_start += 1;
|
||||
}
|
||||
|
||||
Some(&word[new_start..])
|
||||
return
|
||||
ParseResult{
|
||||
matched : &word[0..new_start],
|
||||
residual : &word[new_start..]
|
||||
};
|
||||
}
|
||||
|
||||
impl<'a> ParseResult<'a> {
|
||||
fn match_not_empty(& self) -> Result<(), String> {
|
||||
if self.matched == "" {
|
||||
return Err("failed".to_string())
|
||||
}
|
||||
else {
|
||||
return Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_match(& self) -> Result<&str, String> {
|
||||
if self.matched == "" {
|
||||
return Err("no match".to_string())
|
||||
}
|
||||
else {
|
||||
return Ok(self.matched)
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_residual(& self) -> Result<&str, String> {
|
||||
if self.residual == "" {
|
||||
return Err("no residual".to_string())
|
||||
}
|
||||
else {
|
||||
return Ok(self.residual)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[named]
|
||||
|
@ -133,8 +175,9 @@ fn parse_date(
|
|||
// get hour
|
||||
let (word, cursor) = word_and_ctx4;
|
||||
|
||||
let (hh, Residual(remainder)) = take_until(word, b':').ok_or("did not find colon")?;
|
||||
let hh : u8 = hh.to_string()
|
||||
let res = take_until(word, b':');
|
||||
res.assert_match()?;
|
||||
let hh : u8 = res.matched.to_string()
|
||||
.parse()
|
||||
.map_err(|_| "failed to parse".to_string())?;
|
||||
|
||||
|
@ -146,8 +189,10 @@ fn parse_date(
|
|||
}
|
||||
|
||||
// get minute
|
||||
let (mm, Residual(remainder)) = take_until(remainder, b':').ok_or("did not find colon")?;
|
||||
let mm : u8 = mm.to_string()
|
||||
let word = &res.residual[1..]; // chop off colon which is at index 0
|
||||
let res = take_until(word, b':');
|
||||
res.assert_match()?;
|
||||
let mm : u8 = res.matched.to_string()
|
||||
.parse()
|
||||
.map_err(|_| "failed to parse".to_string())?;
|
||||
|
||||
|
@ -160,7 +205,9 @@ fn parse_date(
|
|||
|
||||
// get second
|
||||
// let ss : u8 = remainder.to_string().parse().unwrap();
|
||||
let ss : u8 = remainder.to_string()
|
||||
res.assert_residual()?;
|
||||
let residual = &res.residual[1..]; // chop of colon which is at index 0
|
||||
let ss : u8 = residual.to_string()
|
||||
.parse()
|
||||
.map_err(|_| "failed to parse".to_string())?;
|
||||
|
||||
|
@ -187,36 +234,6 @@ fn parse_date(
|
|||
return Ok(full_date.unwrap())
|
||||
}
|
||||
|
||||
let full_date = format!("{day} {month} {date} {hh}:{ss}:{mm} {year}");
|
||||
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
|
||||
if full_date.is_ok() {
|
||||
return Ok(full_date.unwrap())
|
||||
}
|
||||
|
||||
let full_date = format!("{day} {month} {date} {mm}:{hh}:{ss} {year}");
|
||||
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
|
||||
if full_date.is_ok() {
|
||||
return Ok(full_date.unwrap())
|
||||
}
|
||||
|
||||
let full_date = format!("{day} {month} {date} {mm}:{ss}:{hh} {year}");
|
||||
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
|
||||
if full_date.is_ok() {
|
||||
return Ok(full_date.unwrap())
|
||||
}
|
||||
|
||||
let full_date = format!("{day} {month} {date} {ss}:{mm}:{hh} {year}");
|
||||
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
|
||||
if full_date.is_ok() {
|
||||
return Ok(full_date.unwrap())
|
||||
}
|
||||
|
||||
let full_date = format!("{day} {month} {date} {ss}:{hh}:{mm} {year}");
|
||||
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
|
||||
if full_date.is_ok() {
|
||||
return Ok(full_date.unwrap())
|
||||
}
|
||||
|
||||
Err("failed to parse date".to_string())
|
||||
|
||||
}
|
||||
|
@ -257,15 +274,14 @@ fn parse_timescale(word_reader : &mut WordReader) -> Result<(Option<u32>, Timesc
|
|||
// first get timescale
|
||||
let (word, cursor) = word_reader.next_word().ok_or(&err_msg)?;
|
||||
let word = word.to_string();
|
||||
dbg!(&word);
|
||||
let (scalar, Residual(residual)) = take_while(word.as_str(), digit);
|
||||
let ParseResult{matched, residual} = take_while(word.as_str(), digit);
|
||||
let scalar = matched;
|
||||
|
||||
let scalar : u32 = scalar.to_string().parse()
|
||||
.map_err(|_| &err_msg)?;
|
||||
|
||||
let timescale = {
|
||||
if residual == "" {
|
||||
dbg!("parse_timescale");
|
||||
let (word, cursor) = word_reader.next_word().ok_or(&err_msg)?;
|
||||
let unit = match word {
|
||||
"ps" => {Ok(Timescale::ps)}
|
||||
|
@ -294,7 +310,7 @@ fn parse_timescale(word_reader : &mut WordReader) -> Result<(Option<u32>, Timesc
|
|||
|
||||
// then check for the `$end` keyword
|
||||
let (end, cursor) = word_reader.next_word().ok_or(&err_msg)?;
|
||||
tag(end, "$end").ok_or(&err_msg)?;
|
||||
tag(end, "$end").match_not_empty()?;
|
||||
|
||||
return Ok(timescale);
|
||||
|
||||
|
@ -303,6 +319,8 @@ fn parse_timescale(word_reader : &mut WordReader) -> Result<(Option<u32>, Timesc
|
|||
|
||||
#[named]
|
||||
fn parse_metadata(word_reader : &mut WordReader) -> Result<Metadata, String> {
|
||||
let err_msg = format!("reached end of file without parser leaving {}", function_name!());
|
||||
|
||||
let mut metadata = Metadata {
|
||||
date : None,
|
||||
version : None,
|
||||
|
@ -311,20 +329,13 @@ fn parse_metadata(word_reader : &mut WordReader) -> Result<Metadata, String> {
|
|||
|
||||
loop {
|
||||
// check for another word in the file
|
||||
let word = word_reader.next_word();
|
||||
let (word, cursor) = word_reader.next_word().ok_or(&err_msg)?;
|
||||
|
||||
// if there isn't another word left in the file, then we exit
|
||||
if word.is_none() {
|
||||
return Err(format!("reached end of file without parser leaving {}", function_name!()))
|
||||
}
|
||||
|
||||
// destructure
|
||||
let (word, cursor) = word.unwrap();
|
||||
|
||||
match tag(word, "$") {
|
||||
let ParseResult{matched, residual} = tag(word, "$");
|
||||
match matched {
|
||||
// we hope that this word stars with a `$`
|
||||
Some(ident) => {
|
||||
match ident {
|
||||
"$" => {
|
||||
match residual {
|
||||
"date" => {
|
||||
let err_msg = format!("reached end of file without parser leaving {}", function_name!());
|
||||
// a date is typically composed of the 5 following words which can
|
||||
|
@ -400,7 +411,6 @@ fn parse_metadata(word_reader : &mut WordReader) -> Result<Metadata, String> {
|
|||
}
|
||||
}
|
||||
"timescale" => {
|
||||
dbg!("here");
|
||||
let timescale = parse_timescale(word_reader);
|
||||
if timescale.is_ok() {
|
||||
metadata.timescale = timescale.unwrap();
|
||||
|
@ -413,8 +423,8 @@ fn parse_metadata(word_reader : &mut WordReader) -> Result<Metadata, String> {
|
|||
_ => {}
|
||||
}
|
||||
}
|
||||
// if not, then we keep looping
|
||||
None => {}
|
||||
// if word does not start with `$`, then we keep looping
|
||||
_ => {}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue