now parsing date more robustly

This commit is contained in:
Yehowshua Immanuel 2022-06-11 00:01:53 -04:00
parent 4c7417c729
commit 14af6e94e3
2 changed files with 107 additions and 28 deletions

View file

@ -29,6 +29,7 @@ The first build of the program may take some time.
- [ ] Consider what to do with don't care values - [ ] Consider what to do with don't care values
will probably just convert them to strings for now. will probably just convert them to strings for now.
- [ ] Test for speed and see if stream of bytes is helpful - [ ] Test for speed and see if stream of bytes is helpful
- [ ] Split ``parse.rs``. It's getting too large.
- [ ] Include line and possible column numbers - [ ] Include line and possible column numbers
- [ ] Change states to lowercase - [ ] Change states to lowercase
- [ ] We need to start regression testing the parser over all files - [ ] We need to start regression testing the parser over all files

View file

@ -40,11 +40,11 @@ fn tag<'a>(word : &'a str, pattern : &'a str) -> Option<&'a str> {
#[named] #[named]
fn parse_date( fn parse_date(
word_and_ctx1 : (&str, Cursor), word_and_ctx1 : (&str, &Cursor),
word_and_ctx2 : (&str, Cursor), word_and_ctx2 : (&str, &Cursor),
word_and_ctx3 : (&str, Cursor), word_and_ctx3 : (&str, &Cursor),
word_and_ctx4 : (&str, Cursor), word_and_ctx4 : (&str, &Cursor),
word_and_ctx5 : (&str, Cursor), word_and_ctx5 : (&str, &Cursor),
) -> Result<DateTime<Utc>, String> { ) -> Result<DateTime<Utc>, String> {
let day = { let day = {
@ -86,7 +86,11 @@ fn parse_date(
// check for another word in the file // check for another word in the file
let (word, cursor) = word_and_ctx3; let (word, cursor) = word_and_ctx3;
let date : u8 = word.to_string().parse().unwrap(); // let date : u8 = word.to_string().parse().unwrap();
let date : u8 = match word.to_string().parse() {
Ok(date) => date,
Err(_) => {return Err("".to_string())}
};
if date > 31 { if date > 31 {
let msg = format!("reached end of file without parser leaving {}\n", function_name!()); let msg = format!("reached end of file without parser leaving {}\n", function_name!());
@ -96,15 +100,17 @@ fn parse_date(
} }
word.to_string() date.to_string()
}; };
let (hh, mm, ss) = { let (hh, mm, ss) = {
// get hour // get hour
let (word, cursor) = word_and_ctx4; let (word, cursor) = word_and_ctx4;
let (hh, Residual(remainder)) = take_until(word, b':').unwrap(); let (hh, Residual(remainder)) = take_until(word, b':').ok_or("did not find colon")?;
let hh : u8 = hh.to_string().parse().unwrap(); let hh : u8 = hh.to_string()
.parse()
.map_err(|_| "failed to parse".to_string())?;
if hh > 23 { if hh > 23 {
let msg = format!("reached end of file without parser leaving {}\n", function_name!()); let msg = format!("reached end of file without parser leaving {}\n", function_name!());
@ -114,8 +120,10 @@ fn parse_date(
} }
// get minute // get minute
let (mm, Residual(remainder)) = take_until(remainder, b':').unwrap(); let (mm, Residual(remainder)) = take_until(remainder, b':').ok_or("did not find colon")?;
let mm : u8 = mm.to_string().parse().unwrap(); let mm : u8 = mm.to_string()
.parse()
.map_err(|_| "failed to parse".to_string())?;
if mm > 60 { if mm > 60 {
let msg = format!("reached end of file without parser leaving {}\n", function_name!()); let msg = format!("reached end of file without parser leaving {}\n", function_name!());
@ -125,7 +133,10 @@ fn parse_date(
} }
// get second // get second
let ss : u8 = remainder.to_string().parse().unwrap(); // let ss : u8 = remainder.to_string().parse().unwrap();
let ss : u8 = remainder.to_string()
.parse()
.map_err(|_| "failed to parse".to_string())?;
if ss > 60 { if ss > 60 {
let msg = format!("reached end of file without parser leaving {}\n", function_name!()); let msg = format!("reached end of file without parser leaving {}\n", function_name!());
@ -142,16 +153,51 @@ fn parse_date(
word.to_string() word.to_string()
}; };
let date = Utc.datetime_from_str( // unfortunately, the minutes, seconds, and hour could occur in an
format!("{day} {month} {date} {mm}:{hh}:{ss} {year}").as_str(), // unexpected order
"%a %b %e %T %Y").unwrap(); let full_date = format!("{day} {month} {date} {mm}:{hh}:{ss} {year}");
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
if full_date.is_ok() {
return Ok(full_date.unwrap())
}
let full_date = format!("{day} {month} {date} {mm}:{ss}:{hh} {year}");
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
if full_date.is_ok() {
return Ok(full_date.unwrap())
}
let full_date = format!("{day} {month} {date} {ss}:{mm}:{hh} {year}");
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
if full_date.is_ok() {
return Ok(full_date.unwrap())
}
let full_date = format!("{day} {month} {date} {ss}:{hh}:{mm} {year}");
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
if full_date.is_ok() {
return Ok(full_date.unwrap())
}
let full_date = format!("{day} {month} {date} {hh}:{ss}:{mm} {year}");
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
if full_date.is_ok() {
return Ok(full_date.unwrap())
}
let full_date = format!("{day} {month} {date} {hh}:{mm}:{ss} {year}");
let full_date = Utc.datetime_from_str(full_date.as_str(), "%a %b %e %T %Y");
if full_date.is_ok() {
return Ok(full_date.unwrap())
}
Err("failed to parse dat".to_string())
Ok(date)
} }
#[named] #[named]
fn parse_header(word_reader : &mut WordReader) -> Result<Metadata, String> { fn parse_metadata(word_reader : &mut WordReader) -> Result<Metadata, String> {
let mut header = Metadata { let mut metadata = Metadata {
date : None, date : None,
version : None, version : None,
timescale : (None, Timescale::unit) timescale : (None, Timescale::unit)
@ -168,7 +214,6 @@ fn parse_header(word_reader : &mut WordReader) -> Result<Metadata, String> {
// destructure // destructure
let (word, cursor) = word.unwrap(); let (word, cursor) = word.unwrap();
let ident = tag(word, "$");
match tag(word, "$") { match tag(word, "$") {
// we hope that this word stars with a `$` // we hope that this word stars with a `$`
@ -182,6 +227,11 @@ fn parse_header(word_reader : &mut WordReader) -> Result<Metadata, String> {
// Thus, we must lookahead read the 5 next words, and try our date // Thus, we must lookahead read the 5 next words, and try our date
// parser on 5! = 120 permutations of the 5 words. // parser on 5! = 120 permutations of the 5 words.
// //
// It is also possible that within each permutation, the hours,
// minutes, and seconds could be in an unusual order, which means
// that we may search up to 6 different permutations oh hh::mm:ss,
// for an upper bound total of 720 permutations
//
// While looking ahead, if one of the 5 words in `$end`, we have to // While looking ahead, if one of the 5 words in `$end`, we have to
// immediately stop trying to get more words. // immediately stop trying to get more words.
@ -206,15 +256,43 @@ fn parse_header(word_reader : &mut WordReader) -> Result<Metadata, String> {
// words // words
if found_end {continue} if found_end {continue}
let iter = lookahead_5_words let permutations = lookahead_5_words
.iter() .iter()
.permutations(lookahead_5_words.len()); .permutations(lookahead_5_words.len());
// let parsed_date = parse_date(word_reader).unwrap();
// header.date = Some(parsed_date); // go ahead and search for a match amongst permuted date text
for mut permutations in permutations {
let (w1, s1) = permutations.pop().unwrap();
let arg_1 = (&w1[..], s1);
let (w2, s2) = permutations.pop().unwrap();
let arg_2 = (&w2[..], s2);
let (w3, s3) = permutations.pop().unwrap();
let arg_3 = (&w3[..], s3);
let (w4, s4) = permutations.pop().unwrap();
let arg_4 = (&w4[..], s4);
let (w5, s5) = permutations.pop().unwrap();
let arg_5 = (&w5[..], s5);
let parsed_date = parse_date(arg_1, arg_2, arg_3, arg_4, arg_5);
// store date and exit loop if a match is found
if parsed_date.is_ok() {
metadata.date = Some(parsed_date.unwrap());
break
}
}
} }
"version" => {println!("got version")} "version" => {println!("found version")}
"timescale" => {println!("got timescale")} "timescale" => {println!("found timescale")}
// in VCDs, the scope keyword indicates the end of the metadata section
"scope" => {break} "scope" => {break}
// we keep searching for words until we've found one of the following
// keywords, ["version", "timescale", "scope"]
_ => {} _ => {}
} }
} }
@ -223,12 +301,12 @@ fn parse_header(word_reader : &mut WordReader) -> Result<Metadata, String> {
} }
} }
return Ok(header) return Ok(metadata)
} }
pub fn parse_vcd(file : File) { pub fn parse_vcd(file : File) {
let mut word_gen = WordReader::new(file); let mut word_gen = WordReader::new(file);
let header = parse_header(&mut word_gen).unwrap(); let header = parse_metadata(&mut word_gen).unwrap();
dbg!(header); dbg!(header);
} }