New parser #2
|
@ -26,14 +26,16 @@ The first build of the program may take some time.
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
- [x] We need a way to merge lines.
|
- [x] We need a way to merge lines.
|
||||||
|
- [ ] We need to start regression testing the parser over all files
|
||||||
|
- [ ] Decide if I want to return option types
|
||||||
|
- [ ] Propagate all to question mark unwrap types.
|
||||||
|
- [ ] Don't want variation in hh:mm:ss
|
||||||
- [ ] Consolidate error messages and add cursors.
|
- [ ] Consolidate error messages and add cursors.
|
||||||
- [ ] Consider what to do with don't care values
|
- [ ] Consider what to do with don't care values
|
||||||
will probably just convert them to strings for now.
|
will probably just convert them to strings for now.
|
||||||
- [ ] Test for speed and see if stream of bytes is helpful
|
|
||||||
- [ ] Split ``parse.rs``. It's getting too large.
|
- [ ] Split ``parse.rs``. It's getting too large.
|
||||||
- [ ] Include line and possible column numbers
|
- [ ] Include line and possible column numbers
|
||||||
- [ ] Change states to lowercase
|
- [ ] Change states to lowercase
|
||||||
- [ ] We need to start regression testing the parser over all files
|
|
||||||
- [ ] Take a look at GTKWave parser to compare effificiency.
|
- [ ] Take a look at GTKWave parser to compare effificiency.
|
||||||
- [ ] Send survey to community channel.
|
- [ ] Send survey to community channel.
|
||||||
|
|
||||||
|
|
138
src/vcd/parse.rs
138
src/vcd/parse.rs
|
@ -6,6 +6,8 @@ use ::function_name::named;
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Residual<'a>(&'a str);
|
pub struct Residual<'a>(&'a str);
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ParseResult<'a> {matched : &'a str, residual : &'a str}
|
||||||
|
|
||||||
pub fn digit(chr : u8) -> bool {
|
pub fn digit(chr : u8) -> bool {
|
||||||
let zero = b'0' as u8;
|
let zero = b'0' as u8;
|
||||||
|
@ -16,27 +18,30 @@ pub fn digit(chr : u8) -> bool {
|
||||||
return between_zero_and_nine
|
return between_zero_and_nine
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn take_until<'a>(word : &'a str, pattern : u8) -> Option<(&'a str, Residual)> {
|
pub fn take_until<'a>(word : &'a str, pattern : u8) -> ParseResult<'a> {
|
||||||
let mut new_start = 0;
|
let mut new_start = 0;
|
||||||
|
|
||||||
for chr in word.as_bytes() {
|
for chr in word.as_bytes() {
|
||||||
if (*chr == pattern) {
|
if (*chr == pattern) {
|
||||||
return Some((&word[0..new_start], Residual(&word[new_start+1..])));
|
break
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
new_start += 1;
|
new_start += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
None
|
return
|
||||||
|
ParseResult{
|
||||||
|
matched : &word[0..new_start],
|
||||||
|
residual : &word[new_start..]
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn take_while<'a>(word : &'a str, cond : fn(u8) -> bool) -> (&'a str, Residual) {
|
pub fn take_while<'a>(word : &'a str, cond : fn(u8) -> bool) -> ParseResult<'a> {
|
||||||
let mut new_start = 0;
|
let mut new_start = 0;
|
||||||
dbg!(word);
|
|
||||||
|
|
||||||
for chr in word.as_bytes() {
|
for chr in word.as_bytes() {
|
||||||
dbg!(&chr);
|
|
||||||
if (cond(*chr)) {
|
if (cond(*chr)) {
|
||||||
new_start += 1;
|
new_start += 1;
|
||||||
}
|
}
|
||||||
|
@ -45,11 +50,15 @@ pub fn take_while<'a>(word : &'a str, cond : fn(u8) -> bool) -> (&'a str, Residu
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return (&word[0..new_start], Residual(&word[new_start..]));
|
return
|
||||||
|
ParseResult{
|
||||||
|
matched : &word[0..new_start],
|
||||||
|
residual : &word[new_start..]
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tag<'a>(word : &'a str, pattern : &'a str) -> Option<&'a str> {
|
fn tag<'a>(word : &'a str, pattern : &'a str) -> ParseResult<'a> {
|
||||||
let lhs = word.as_bytes().iter();
|
let lhs = word.as_bytes().iter();
|
||||||
let rhs = pattern.as_bytes();
|
let rhs = pattern.as_bytes();
|
||||||
let iter = lhs.zip(rhs);
|
let iter = lhs.zip(rhs);
|
||||||
|
@ -58,11 +67,44 @@ fn tag<'a>(word : &'a str, pattern : &'a str) -> Option<&'a str> {
|
||||||
let mut res = true;
|
let mut res = true;
|
||||||
for (c_lhs, c_rhs) in iter {
|
for (c_lhs, c_rhs) in iter {
|
||||||
res = res && (c_lhs == c_rhs);
|
res = res && (c_lhs == c_rhs);
|
||||||
if !res {return None}
|
if !res {break}
|
||||||
new_start += 1;
|
new_start += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
Some(&word[new_start..])
|
return
|
||||||
|
ParseResult{
|
||||||
|
matched : &word[0..new_start],
|
||||||
|
residual : &word[new_start..]
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ParseResult<'a> {
|
||||||
|
fn match_not_empty(& self) -> Result<(), String> {
|
||||||
|
if self.matched == "" {
|
||||||
|
return Err("failed".to_string())
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn assert_match(& self) -> Result<&str, String> {
|
||||||
|
if self.matched == "" {
|
||||||
|
return Err("no match".to_string())
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return Ok(self.matched)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn assert_residual(& self) -> Result<&str, String> {
|
||||||
|
if self.residual == "" {
|
||||||
|
return Err("no residual".to_string())
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return Ok(self.residual)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[named]
|
#[named]
|
||||||
|
@ -133,8 +175,9 @@ fn parse_date(
|
||||||
// get hour
|
// get hour
|
||||||
let (word, cursor) = word_and_ctx4;
|
let (word, cursor) = word_and_ctx4;
|
||||||
|
|
||||||
let (hh, Residual(remainder)) = take_until(word, b':').ok_or("did not find colon")?;
|
let res = take_until(word, b':');
|
||||||
let hh : u8 = hh.to_string()
|
res.assert_match()?;
|
||||||
|
let hh : u8 = res.matched.to_string()
|
||||||
.parse()
|
.parse()
|
||||||
.map_err(|_| "failed to parse".to_string())?;
|
.map_err(|_| "failed to parse".to_string())?;
|
||||||
|
|
||||||
|
@ -146,8 +189,10 @@ fn parse_date(
|
||||||
}
|
}
|
||||||
|
|
||||||
// get minute
|
// get minute
|
||||||
let (mm, Residual(remainder)) = take_until(remainder, b':').ok_or("did not find colon")?;
|
let word = &res.residual[1..]; // chop off colon which is at index 0
|
||||||
let mm : u8 = mm.to_string()
|
let res = take_until(word, b':');
|
||||||
|
res.assert_match()?;
|
||||||
|
let mm : u8 = res.matched.to_string()
|
||||||
.parse()
|
.parse()
|
||||||
.map_err(|_| "failed to parse".to_string())?;
|
.map_err(|_| "failed to parse".to_string())?;
|
||||||
|
|
||||||
|
@ -160,7 +205,9 @@ fn parse_date(
|
||||||
|
|
||||||
// get second
|
// get second
|
||||||
// let ss : u8 = remainder.to_string().parse().unwrap();
|
// let ss : u8 = remainder.to_string().parse().unwrap();
|
||||||
let ss : u8 = remainder.to_string()
|
res.assert_residual()?;
|
||||||
|
let residual = &res.residual[1..]; // chop of colon which is at index 0
|
||||||
|
let ss : u8 = residual.to_string()
|
||||||
.parse()
|
.parse()
|
||||||
.map_err(|_| "failed to parse".to_string())?;
|
.map_err(|_| "failed to parse".to_string())?;
|
||||||
|
|
||||||
|
@ -187,36 +234,6 @@ fn parse_date(
|
||||||
return Ok(full_date.unwrap())
|
return Ok(full_date.unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
let full_date = format!("{day} {month} {date} {hh}:{ss}:{mm} {year}");
|
|
||||||
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
|
|
||||||
if full_date.is_ok() {
|
|
||||||
return Ok(full_date.unwrap())
|
|
||||||
}
|
|
||||||
|
|
||||||
let full_date = format!("{day} {month} {date} {mm}:{hh}:{ss} {year}");
|
|
||||||
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
|
|
||||||
if full_date.is_ok() {
|
|
||||||
return Ok(full_date.unwrap())
|
|
||||||
}
|
|
||||||
|
|
||||||
let full_date = format!("{day} {month} {date} {mm}:{ss}:{hh} {year}");
|
|
||||||
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
|
|
||||||
if full_date.is_ok() {
|
|
||||||
return Ok(full_date.unwrap())
|
|
||||||
}
|
|
||||||
|
|
||||||
let full_date = format!("{day} {month} {date} {ss}:{mm}:{hh} {year}");
|
|
||||||
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
|
|
||||||
if full_date.is_ok() {
|
|
||||||
return Ok(full_date.unwrap())
|
|
||||||
}
|
|
||||||
|
|
||||||
let full_date = format!("{day} {month} {date} {ss}:{hh}:{mm} {year}");
|
|
||||||
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
|
|
||||||
if full_date.is_ok() {
|
|
||||||
return Ok(full_date.unwrap())
|
|
||||||
}
|
|
||||||
|
|
||||||
Err("failed to parse date".to_string())
|
Err("failed to parse date".to_string())
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -257,15 +274,14 @@ fn parse_timescale(word_reader : &mut WordReader) -> Result<(Option<u32>, Timesc
|
||||||
// first get timescale
|
// first get timescale
|
||||||
let (word, cursor) = word_reader.next_word().ok_or(&err_msg)?;
|
let (word, cursor) = word_reader.next_word().ok_or(&err_msg)?;
|
||||||
let word = word.to_string();
|
let word = word.to_string();
|
||||||
dbg!(&word);
|
let ParseResult{matched, residual} = take_while(word.as_str(), digit);
|
||||||
let (scalar, Residual(residual)) = take_while(word.as_str(), digit);
|
let scalar = matched;
|
||||||
|
|
||||||
let scalar : u32 = scalar.to_string().parse()
|
let scalar : u32 = scalar.to_string().parse()
|
||||||
.map_err(|_| &err_msg)?;
|
.map_err(|_| &err_msg)?;
|
||||||
|
|
||||||
let timescale = {
|
let timescale = {
|
||||||
if residual == "" {
|
if residual == "" {
|
||||||
dbg!("parse_timescale");
|
|
||||||
let (word, cursor) = word_reader.next_word().ok_or(&err_msg)?;
|
let (word, cursor) = word_reader.next_word().ok_or(&err_msg)?;
|
||||||
let unit = match word {
|
let unit = match word {
|
||||||
"ps" => {Ok(Timescale::ps)}
|
"ps" => {Ok(Timescale::ps)}
|
||||||
|
@ -294,7 +310,7 @@ fn parse_timescale(word_reader : &mut WordReader) -> Result<(Option<u32>, Timesc
|
||||||
|
|
||||||
// then check for the `$end` keyword
|
// then check for the `$end` keyword
|
||||||
let (end, cursor) = word_reader.next_word().ok_or(&err_msg)?;
|
let (end, cursor) = word_reader.next_word().ok_or(&err_msg)?;
|
||||||
tag(end, "$end").ok_or(&err_msg)?;
|
tag(end, "$end").match_not_empty()?;
|
||||||
|
|
||||||
return Ok(timescale);
|
return Ok(timescale);
|
||||||
|
|
||||||
|
@ -303,6 +319,8 @@ fn parse_timescale(word_reader : &mut WordReader) -> Result<(Option<u32>, Timesc
|
||||||
|
|
||||||
#[named]
|
#[named]
|
||||||
fn parse_metadata(word_reader : &mut WordReader) -> Result<Metadata, String> {
|
fn parse_metadata(word_reader : &mut WordReader) -> Result<Metadata, String> {
|
||||||
|
let err_msg = format!("reached end of file without parser leaving {}", function_name!());
|
||||||
|
|
||||||
let mut metadata = Metadata {
|
let mut metadata = Metadata {
|
||||||
date : None,
|
date : None,
|
||||||
version : None,
|
version : None,
|
||||||
|
@ -311,20 +329,13 @@ fn parse_metadata(word_reader : &mut WordReader) -> Result<Metadata, String> {
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
// check for another word in the file
|
// check for another word in the file
|
||||||
let word = word_reader.next_word();
|
let (word, cursor) = word_reader.next_word().ok_or(&err_msg)?;
|
||||||
|
|
||||||
// if there isn't another word left in the file, then we exit
|
let ParseResult{matched, residual} = tag(word, "$");
|
||||||
if word.is_none() {
|
match matched {
|
||||||
return Err(format!("reached end of file without parser leaving {}", function_name!()))
|
|
||||||
}
|
|
||||||
|
|
||||||
// destructure
|
|
||||||
let (word, cursor) = word.unwrap();
|
|
||||||
|
|
||||||
match tag(word, "$") {
|
|
||||||
// we hope that this word stars with a `$`
|
// we hope that this word stars with a `$`
|
||||||
Some(ident) => {
|
"$" => {
|
||||||
match ident {
|
match residual {
|
||||||
"date" => {
|
"date" => {
|
||||||
let err_msg = format!("reached end of file without parser leaving {}", function_name!());
|
let err_msg = format!("reached end of file without parser leaving {}", function_name!());
|
||||||
// a date is typically composed of the 5 following words which can
|
// a date is typically composed of the 5 following words which can
|
||||||
|
@ -400,7 +411,6 @@ fn parse_metadata(word_reader : &mut WordReader) -> Result<Metadata, String> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"timescale" => {
|
"timescale" => {
|
||||||
dbg!("here");
|
|
||||||
let timescale = parse_timescale(word_reader);
|
let timescale = parse_timescale(word_reader);
|
||||||
if timescale.is_ok() {
|
if timescale.is_ok() {
|
||||||
metadata.timescale = timescale.unwrap();
|
metadata.timescale = timescale.unwrap();
|
||||||
|
@ -413,8 +423,8 @@ fn parse_metadata(word_reader : &mut WordReader) -> Result<Metadata, String> {
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if not, then we keep looping
|
// if word does not start with `$`, then we keep looping
|
||||||
None => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue